vfs_cache.c revision 116182
1/* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Poul-Henning Kamp of the FreeBSD Project. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 37 */ 38 39#include <sys/cdefs.h> 40__FBSDID("$FreeBSD: head/sys/kern/vfs_cache.c 116182 2003-06-11 00:56:59Z obrien $"); 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/kernel.h> 45#include <sys/lock.h> 46#include <sys/mutex.h> 47#include <sys/sysctl.h> 48#include <sys/mount.h> 49#include <sys/vnode.h> 50#include <sys/namei.h> 51#include <sys/malloc.h> 52#include <sys/syscallsubr.h> 53#include <sys/sysproto.h> 54#include <sys/proc.h> 55#include <sys/filedesc.h> 56#include <sys/fnv_hash.h> 57 58/* 59 * This structure describes the elements in the cache of recent 60 * names looked up by namei. 61 */ 62 63struct namecache { 64 LIST_ENTRY(namecache) nc_hash; /* hash chain */ 65 LIST_ENTRY(namecache) nc_src; /* source vnode list */ 66 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 67 struct vnode *nc_dvp; /* vnode of parent of name */ 68 struct vnode *nc_vp; /* vnode the name refers to */ 69 u_char nc_flag; /* flag bits */ 70 u_char nc_nlen; /* length of name */ 71 char nc_name[0]; /* segment name */ 72}; 73 74/* 75 * Name caching works as follows: 76 * 77 * Names found by directory scans are retained in a cache 78 * for future reference. It is managed LRU, so frequently 79 * used names will hang around. Cache is indexed by hash value 80 * obtained from (vp, name) where vp refers to the directory 81 * containing name. 82 * 83 * If it is a "negative" entry, (i.e. for a name that is known NOT to 84 * exist) the vnode pointer will be NULL. 85 * 86 * Upon reaching the last segment of a path, if the reference 87 * is for DELETE, or NOCACHE is set (rewrite), and the 88 * name is located in the cache, it will be dropped. 89 */ 90 91/* 92 * Structures associated with name cacheing. 93 */ 94#define NCHHASH(hash) \ 95 (&nchashtbl[(hash) & nchash]) 96static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 97static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 98static u_long nchash; /* size of hash table */ 99SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 100static u_long ncnegfactor = 16; /* ratio of negative entries */ 101SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 102static u_long numneg; /* number of cache entries allocated */ 103SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 104static u_long numcache; /* number of cache entries allocated */ 105SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 106static u_long numcachehv; /* number of cache entries with vnodes held */ 107SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, ""); 108#if 0 109static u_long numcachepl; /* number of cache purge for leaf entries */ 110SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, ""); 111#endif 112struct nchstats nchstats; /* cache effectiveness statistics */ 113 114static int doingcache = 1; /* 1 => enable the cache */ 115SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 116 117/* Export size information to userland */ 118SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 119SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 120 121/* 122 * The new name cache statistics 123 */ 124SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 125#define STATNODE(mode, name, var) \ 126 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 127STATNODE(CTLFLAG_RD, numneg, &numneg); 128STATNODE(CTLFLAG_RD, numcache, &numcache); 129static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 130static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 131static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 132static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 133static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 134static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 135static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 136static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 137static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 138static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 139 140SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats, 141 sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); 142 143 144 145static void cache_zap(struct namecache *ncp); 146 147static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 148 149/* 150 * Flags in namecache.nc_flag 151 */ 152#define NCF_WHITE 1 153 154/* 155 * Grab an atomic snapshot of the name cache hash chain lengths 156 */ 157SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); 158 159static int 160sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 161{ 162 int error; 163 struct nchashhead *ncpp; 164 struct namecache *ncp; 165 int n_nchash; 166 int count; 167 168 n_nchash = nchash + 1; /* nchash is max index, not count */ 169 if (!req->oldptr) 170 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 171 172 /* Scan hash tables for applicable entries */ 173 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 174 count = 0; 175 LIST_FOREACH(ncp, ncpp, nc_hash) { 176 count++; 177 } 178 error = SYSCTL_OUT(req, &count, sizeof(count)); 179 if (error) 180 return (error); 181 } 182 return (0); 183} 184SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD, 185 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths"); 186 187static int 188sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 189{ 190 int error; 191 struct nchashhead *ncpp; 192 struct namecache *ncp; 193 int n_nchash; 194 int count, maxlength, used, pct; 195 196 if (!req->oldptr) 197 return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 198 199 n_nchash = nchash + 1; /* nchash is max index, not count */ 200 used = 0; 201 maxlength = 0; 202 203 /* Scan hash tables for applicable entries */ 204 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 205 count = 0; 206 LIST_FOREACH(ncp, ncpp, nc_hash) { 207 count++; 208 } 209 if (count) 210 used++; 211 if (maxlength < count) 212 maxlength = count; 213 } 214 n_nchash = nchash + 1; 215 pct = (used * 100 * 100) / n_nchash; 216 error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash)); 217 if (error) 218 return (error); 219 error = SYSCTL_OUT(req, &used, sizeof(used)); 220 if (error) 221 return (error); 222 error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength)); 223 if (error) 224 return (error); 225 error = SYSCTL_OUT(req, &pct, sizeof(pct)); 226 if (error) 227 return (error); 228 return (0); 229} 230SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD, 231 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths"); 232 233/* 234 * cache_zap(): 235 * 236 * Removes a namecache entry from cache, whether it contains an actual 237 * pointer to a vnode or if it is just a negative cache entry. 238 */ 239static void 240cache_zap(ncp) 241 struct namecache *ncp; 242{ 243 LIST_REMOVE(ncp, nc_hash); 244 LIST_REMOVE(ncp, nc_src); 245 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 246 vdrop(ncp->nc_dvp); 247 numcachehv--; 248 } 249 if (ncp->nc_vp) { 250 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 251 } else { 252 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 253 numneg--; 254 } 255 numcache--; 256 free(ncp, M_VFSCACHE); 257} 258 259/* 260 * cache_leaf_test() 261 * 262 * Test whether this (directory) vnode's namei cache entry contains 263 * subdirectories or not. Used to determine whether the directory is 264 * a leaf in the namei cache or not. Note: the directory may still 265 * contain files in the namei cache. 266 * 267 * Returns 0 if the directory is a leaf, -1 if it isn't. 268 */ 269int 270cache_leaf_test(struct vnode *vp) 271{ 272 struct namecache *ncpc; 273 274 for (ncpc = LIST_FIRST(&vp->v_cache_src); 275 ncpc != NULL; 276 ncpc = LIST_NEXT(ncpc, nc_src) 277 ) { 278 if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR) 279 return(-1); 280 } 281 return(0); 282} 283 284/* 285 * Lookup an entry in the cache 286 * 287 * Lookup is called with dvp pointing to the directory to search, 288 * cnp pointing to the name of the entry being sought. If the lookup 289 * succeeds, the vnode is returned in *vpp, and a status of -1 is 290 * returned. If the lookup determines that the name does not exist 291 * (negative cacheing), a status of ENOENT is returned. If the lookup 292 * fails, a status of zero is returned. 293 */ 294 295int 296cache_lookup(dvp, vpp, cnp) 297 struct vnode *dvp; 298 struct vnode **vpp; 299 struct componentname *cnp; 300{ 301 struct namecache *ncp; 302 u_int32_t hash; 303 304 if (!doingcache) { 305 cnp->cn_flags &= ~MAKEENTRY; 306 return (0); 307 } 308 309 numcalls++; 310 311 if (cnp->cn_nameptr[0] == '.') { 312 if (cnp->cn_namelen == 1) { 313 *vpp = dvp; 314 dothits++; 315 return (-1); 316 } 317 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 318 dotdothits++; 319 if (dvp->v_dd->v_id != dvp->v_ddid || 320 (cnp->cn_flags & MAKEENTRY) == 0) { 321 dvp->v_ddid = 0; 322 return (0); 323 } 324 *vpp = dvp->v_dd; 325 return (-1); 326 } 327 } 328 329 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 330 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 331 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 332 numchecks++; 333 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 334 !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) 335 break; 336 } 337 338 /* We failed to find an entry */ 339 if (ncp == 0) { 340 if ((cnp->cn_flags & MAKEENTRY) == 0) { 341 nummisszap++; 342 } else { 343 nummiss++; 344 } 345 nchstats.ncs_miss++; 346 return (0); 347 } 348 349 /* We don't want to have an entry, so dump it */ 350 if ((cnp->cn_flags & MAKEENTRY) == 0) { 351 numposzaps++; 352 nchstats.ncs_badhits++; 353 cache_zap(ncp); 354 return (0); 355 } 356 357 /* We found a "positive" match, return the vnode */ 358 if (ncp->nc_vp) { 359 numposhits++; 360 nchstats.ncs_goodhits++; 361 *vpp = ncp->nc_vp; 362 return (-1); 363 } 364 365 /* We found a negative match, and want to create it, so purge */ 366 if (cnp->cn_nameiop == CREATE) { 367 numnegzaps++; 368 nchstats.ncs_badhits++; 369 cache_zap(ncp); 370 return (0); 371 } 372 373 numneghits++; 374 /* 375 * We found a "negative" match, so we shift it to the end of 376 * the "negative" cache entries queue to satisfy LRU. Also, 377 * check to see if the entry is a whiteout; indicate this to 378 * the componentname, if so. 379 */ 380 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 381 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 382 nchstats.ncs_neghits++; 383 if (ncp->nc_flag & NCF_WHITE) 384 cnp->cn_flags |= ISWHITEOUT; 385 return (ENOENT); 386} 387 388/* 389 * Add an entry to the cache. 390 */ 391void 392cache_enter(dvp, vp, cnp) 393 struct vnode *dvp; 394 struct vnode *vp; 395 struct componentname *cnp; 396{ 397 struct namecache *ncp; 398 struct nchashhead *ncpp; 399 u_int32_t hash; 400 int len; 401 402 if (!doingcache) 403 return; 404 405 if (cnp->cn_nameptr[0] == '.') { 406 if (cnp->cn_namelen == 1) { 407 return; 408 } 409 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 410 if (vp) { 411 dvp->v_dd = vp; 412 dvp->v_ddid = vp->v_id; 413 } else { 414 dvp->v_dd = dvp; 415 dvp->v_ddid = 0; 416 } 417 return; 418 } 419 } 420 421 ncp = (struct namecache *) 422 malloc(sizeof *ncp + cnp->cn_namelen, M_VFSCACHE, M_WAITOK); 423 bzero((char *)ncp, sizeof *ncp); 424 numcache++; 425 if (!vp) { 426 numneg++; 427 ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0; 428 } else if (vp->v_type == VDIR) { 429 vp->v_dd = dvp; 430 vp->v_ddid = dvp->v_id; 431 } 432 433 /* 434 * Set the rest of the namecache entry elements, calculate it's 435 * hash key and insert it into the appropriate chain within 436 * the cache entries table. 437 */ 438 ncp->nc_vp = vp; 439 ncp->nc_dvp = dvp; 440 len = ncp->nc_nlen = cnp->cn_namelen; 441 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 442 bcopy(cnp->cn_nameptr, ncp->nc_name, len); 443 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 444 ncpp = NCHHASH(hash); 445 LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 446 if (LIST_EMPTY(&dvp->v_cache_src)) { 447 vhold(dvp); 448 numcachehv++; 449 } 450 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 451 /* 452 * If the entry is "negative", we place it into the 453 * "negative" cache queue, otherwise, we place it into the 454 * destination vnode's cache entries queue. 455 */ 456 if (vp) { 457 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 458 } else { 459 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 460 } 461 if (numneg * ncnegfactor > numcache) { 462 ncp = TAILQ_FIRST(&ncneg); 463 cache_zap(ncp); 464 } 465} 466 467/* 468 * Name cache initialization, from vfs_init() when we are booting 469 */ 470static void 471nchinit(void *dummy __unused) 472{ 473 474 TAILQ_INIT(&ncneg); 475 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 476} 477SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL) 478 479 480/* 481 * Invalidate all entries to a particular vnode. 482 * 483 * Remove all entries in the namecache relating to this vnode and 484 * change the v_id. We take the v_id from a global counter, since 485 * it becomes a handy sequence number in crash-dumps that way. 486 * No valid vnode will ever have (v_id == 0). 487 * 488 * XXX: Only time and the size of v_id prevents this from failing: 489 * XXX: In theory we should hunt down all (struct vnode*, v_id) 490 * XXX: soft references and nuke them, at least on the global 491 * XXX: v_id wraparound. The period of resistance can be extended 492 * XXX: by incrementing each vnodes v_id individually instead of 493 * XXX: using the global v_id. 494 */ 495 496void 497cache_purge(vp) 498 struct vnode *vp; 499{ 500 static u_long nextid; 501 502 while (!LIST_EMPTY(&vp->v_cache_src)) 503 cache_zap(LIST_FIRST(&vp->v_cache_src)); 504 while (!TAILQ_EMPTY(&vp->v_cache_dst)) 505 cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 506 507 do 508 nextid++; 509 while (nextid == vp->v_id || !nextid); 510 vp->v_id = nextid; 511 vp->v_dd = vp; 512 vp->v_ddid = 0; 513} 514 515/* 516 * Flush all entries referencing a particular filesystem. 517 * 518 * Since we need to check it anyway, we will flush all the invalid 519 * entries at the same time. 520 */ 521void 522cache_purgevfs(mp) 523 struct mount *mp; 524{ 525 struct nchashhead *ncpp; 526 struct namecache *ncp, *nnp; 527 528 /* Scan hash tables for applicable entries */ 529 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 530 for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) { 531 nnp = LIST_NEXT(ncp, nc_hash); 532 if (ncp->nc_dvp->v_mount == mp) { 533 cache_zap(ncp); 534 } 535 } 536 } 537} 538 539/* 540 * Perform canonical checks and cache lookup and pass on to filesystem 541 * through the vop_cachedlookup only if needed. 542 */ 543 544int 545vfs_cache_lookup(ap) 546 struct vop_lookup_args /* { 547 struct vnode *a_dvp; 548 struct vnode **a_vpp; 549 struct componentname *a_cnp; 550 } */ *ap; 551{ 552 struct vnode *dvp, *vp; 553 int lockparent; 554 int error; 555 struct vnode **vpp = ap->a_vpp; 556 struct componentname *cnp = ap->a_cnp; 557 struct ucred *cred = cnp->cn_cred; 558 int flags = cnp->cn_flags; 559 struct thread *td = cnp->cn_thread; 560 u_long vpid; /* capability number of vnode */ 561 562 *vpp = NULL; 563 dvp = ap->a_dvp; 564 lockparent = flags & LOCKPARENT; 565 566 if (dvp->v_type != VDIR) 567 return (ENOTDIR); 568 569 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 570 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 571 return (EROFS); 572 573 error = VOP_ACCESS(dvp, VEXEC, cred, td); 574 575 if (error) 576 return (error); 577 578 error = cache_lookup(dvp, vpp, cnp); 579 580#ifdef LOOKUP_SHARED 581 if (!error) { 582 /* We do this because the rest of the system now expects to get 583 * a shared lock, which is later upgraded if LOCKSHARED is not 584 * set. We have so many cases here because of bugs that yield 585 * inconsistant lock states. This all badly needs to be fixed 586 */ 587 error = VOP_CACHEDLOOKUP(dvp, vpp, cnp); 588 if (!error) { 589 int flock; 590 591 flock = VOP_ISLOCKED(*vpp, td); 592 if (flock != LK_EXCLUSIVE) { 593 if (flock == 0) { 594 if ((flags & ISLASTCN) && 595 (flags & LOCKSHARED)) 596 VOP_LOCK(*vpp, LK_SHARED, td); 597 else 598 VOP_LOCK(*vpp, LK_EXCLUSIVE, td); 599 } 600 } else if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 601 VOP_LOCK(*vpp, LK_DOWNGRADE, td); 602 } 603 return (error); 604 } 605#else 606 if (!error) 607 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 608#endif 609 610 if (error == ENOENT) 611 return (error); 612 613 vp = *vpp; 614 vpid = vp->v_id; 615 cnp->cn_flags &= ~PDIRUNLOCK; 616 if (dvp == vp) { /* lookup on "." */ 617 VREF(vp); 618 error = 0; 619 } else if (flags & ISDOTDOT) { 620 VOP_UNLOCK(dvp, 0, td); 621 cnp->cn_flags |= PDIRUNLOCK; 622#ifdef LOOKUP_SHARED 623 if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 624 error = vget(vp, LK_SHARED, td); 625 else 626 error = vget(vp, LK_EXCLUSIVE, td); 627#else 628 error = vget(vp, LK_EXCLUSIVE, td); 629#endif 630 631 if (!error && lockparent && (flags & ISLASTCN)) { 632 if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0) 633 cnp->cn_flags &= ~PDIRUNLOCK; 634 } 635 } else { 636#ifdef LOOKUP_SHARED 637 if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 638 error = vget(vp, LK_SHARED, td); 639 else 640 error = vget(vp, LK_EXCLUSIVE, td); 641#else 642 error = vget(vp, LK_EXCLUSIVE, td); 643#endif 644 if (!lockparent || error || !(flags & ISLASTCN)) { 645 VOP_UNLOCK(dvp, 0, td); 646 cnp->cn_flags |= PDIRUNLOCK; 647 } 648 } 649 /* 650 * Check that the capability number did not change 651 * while we were waiting for the lock. 652 */ 653 if (!error) { 654 if (vpid == vp->v_id) 655 return (0); 656 vput(vp); 657 if (lockparent && dvp != vp && (flags & ISLASTCN)) { 658 VOP_UNLOCK(dvp, 0, td); 659 cnp->cn_flags |= PDIRUNLOCK; 660 } 661 } 662 if (cnp->cn_flags & PDIRUNLOCK) { 663 error = vn_lock(dvp, LK_EXCLUSIVE, td); 664 if (error) 665 return (error); 666 cnp->cn_flags &= ~PDIRUNLOCK; 667 } 668#ifdef LOOKUP_SHARED 669 error = VOP_CACHEDLOOKUP(dvp, vpp, cnp); 670 671 if (!error) { 672 int flock = 0; 673 674 flock = VOP_ISLOCKED(*vpp, td); 675 if (flock != LK_EXCLUSIVE) { 676 if (flock == 0) { 677 if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 678 VOP_LOCK(*vpp, LK_SHARED, td); 679 else 680 VOP_LOCK(*vpp, LK_EXCLUSIVE, td); 681 } 682 } else if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 683 VOP_LOCK(*vpp, LK_DOWNGRADE, td); 684 } 685 686 return (error); 687#else 688 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 689#endif 690} 691 692 693#ifndef _SYS_SYSPROTO_H_ 694struct __getcwd_args { 695 u_char *buf; 696 u_int buflen; 697}; 698#endif 699 700/* 701 * XXX All of these sysctls would probably be more productive dead. 702 */ 703static int disablecwd; 704SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, 705 "Disable the getcwd syscall"); 706 707/* Various statistics for the getcwd syscall */ 708static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 709static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 710static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 711static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 712static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 713static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 714 715/* Implementation of the getcwd syscall */ 716int 717__getcwd(td, uap) 718 struct thread *td; 719 struct __getcwd_args *uap; 720{ 721 722 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen)); 723} 724 725int 726kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen) 727{ 728 char *bp, *tmpbuf; 729 int error, i, slash_prefixed; 730 struct filedesc *fdp; 731 struct namecache *ncp; 732 struct vnode *vp; 733 734 numcwdcalls++; 735 if (disablecwd) 736 return (ENODEV); 737 if (buflen < 2) 738 return (EINVAL); 739 if (buflen > MAXPATHLEN) 740 buflen = MAXPATHLEN; 741 error = 0; 742 tmpbuf = bp = malloc(buflen, M_TEMP, M_WAITOK); 743 bp += buflen - 1; 744 *bp = '\0'; 745 fdp = td->td_proc->p_fd; 746 slash_prefixed = 0; 747 FILEDESC_LOCK(fdp); 748 mp_fixme("No vnode locking done!"); 749 for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { 750 if (vp->v_vflag & VV_ROOT) { 751 if (vp->v_mount == NULL) { /* forced unmount */ 752 FILEDESC_UNLOCK(fdp); 753 free(tmpbuf, M_TEMP); 754 return (EBADF); 755 } 756 vp = vp->v_mount->mnt_vnodecovered; 757 continue; 758 } 759 if (vp->v_dd->v_id != vp->v_ddid) { 760 FILEDESC_UNLOCK(fdp); 761 numcwdfail1++; 762 free(tmpbuf, M_TEMP); 763 return (ENOTDIR); 764 } 765 ncp = TAILQ_FIRST(&vp->v_cache_dst); 766 if (!ncp) { 767 FILEDESC_UNLOCK(fdp); 768 numcwdfail2++; 769 free(tmpbuf, M_TEMP); 770 return (ENOENT); 771 } 772 if (ncp->nc_dvp != vp->v_dd) { 773 FILEDESC_UNLOCK(fdp); 774 numcwdfail3++; 775 free(tmpbuf, M_TEMP); 776 return (EBADF); 777 } 778 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 779 if (bp == tmpbuf) { 780 FILEDESC_UNLOCK(fdp); 781 numcwdfail4++; 782 free(tmpbuf, M_TEMP); 783 return (ENOMEM); 784 } 785 *--bp = ncp->nc_name[i]; 786 } 787 if (bp == tmpbuf) { 788 FILEDESC_UNLOCK(fdp); 789 numcwdfail4++; 790 free(tmpbuf, M_TEMP); 791 return (ENOMEM); 792 } 793 *--bp = '/'; 794 slash_prefixed = 1; 795 vp = vp->v_dd; 796 } 797 FILEDESC_UNLOCK(fdp); 798 if (!slash_prefixed) { 799 if (bp == tmpbuf) { 800 numcwdfail4++; 801 free(tmpbuf, M_TEMP); 802 return (ENOMEM); 803 } 804 *--bp = '/'; 805 } 806 numcwdfound++; 807 if (bufseg == UIO_SYSSPACE) 808 bcopy(bp, buf, strlen(bp) + 1); 809 else 810 error = copyout(bp, buf, strlen(bp) + 1); 811 free(tmpbuf, M_TEMP); 812 return (error); 813} 814 815/* 816 * Thus begins the fullpath magic. 817 */ 818 819#undef STATNODE 820#define STATNODE(name) \ 821 static u_int name; \ 822 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 823 824static int disablefullpath; 825SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0, 826 "Disable the vn_fullpath function"); 827 828STATNODE(numfullpathcalls); 829STATNODE(numfullpathfail1); 830STATNODE(numfullpathfail2); 831STATNODE(numfullpathfail3); 832STATNODE(numfullpathfail4); 833STATNODE(numfullpathfound); 834 835/* 836 * Retrieve the full filesystem path that correspond to a vnode from the name 837 * cache (if available) 838 */ 839int 840vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) 841{ 842 char *bp, *buf; 843 int i, slash_prefixed; 844 struct filedesc *fdp; 845 struct namecache *ncp; 846 struct vnode *vp; 847 848 numfullpathcalls++; 849 if (disablefullpath) 850 return (ENODEV); 851 if (vn == NULL) 852 return (EINVAL); 853 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 854 bp = buf + MAXPATHLEN - 1; 855 *bp = '\0'; 856 fdp = td->td_proc->p_fd; 857 slash_prefixed = 0; 858 FILEDESC_LOCK(fdp); 859 for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) { 860 ASSERT_VOP_LOCKED(vp, "vn_fullpath"); 861 if (vp->v_vflag & VV_ROOT) { 862 if (vp->v_mount == NULL) { /* forced unmount */ 863 FILEDESC_UNLOCK(fdp); 864 free(buf, M_TEMP); 865 return (EBADF); 866 } 867 vp = vp->v_mount->mnt_vnodecovered; 868 continue; 869 } 870 if (vp != vn && vp->v_dd->v_id != vp->v_ddid) { 871 FILEDESC_UNLOCK(fdp); 872 numfullpathfail1++; 873 free(buf, M_TEMP); 874 return (ENOTDIR); 875 } 876 ncp = TAILQ_FIRST(&vp->v_cache_dst); 877 if (!ncp) { 878 FILEDESC_UNLOCK(fdp); 879 numfullpathfail2++; 880 free(buf, M_TEMP); 881 return (ENOENT); 882 } 883 if (vp != vn && ncp->nc_dvp != vp->v_dd) { 884 FILEDESC_UNLOCK(fdp); 885 numfullpathfail3++; 886 free(buf, M_TEMP); 887 return (EBADF); 888 } 889 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 890 if (bp == buf) { 891 FILEDESC_UNLOCK(fdp); 892 numfullpathfail4++; 893 free(buf, M_TEMP); 894 return (ENOMEM); 895 } 896 *--bp = ncp->nc_name[i]; 897 } 898 if (bp == buf) { 899 FILEDESC_UNLOCK(fdp); 900 numfullpathfail4++; 901 free(buf, M_TEMP); 902 return (ENOMEM); 903 } 904 *--bp = '/'; 905 slash_prefixed = 1; 906 vp = ncp->nc_dvp; 907 } 908 if (!slash_prefixed) { 909 if (bp == buf) { 910 FILEDESC_UNLOCK(fdp); 911 numfullpathfail4++; 912 free(buf, M_TEMP); 913 return (ENOMEM); 914 } 915 *--bp = '/'; 916 } 917 FILEDESC_UNLOCK(fdp); 918 numfullpathfound++; 919 *retbuf = bp; 920 *freebuf = buf; 921 return (0); 922} 923