vfs_cache.c revision 211616
1139804Simp/*- 222521Sdyson * Copyright (c) 1989, 1993, 1995 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 522521Sdyson * This code is derived from software contributed to Berkeley by 622521Sdyson * Poul-Henning Kamp of the FreeBSD Project. 722521Sdyson * 81541Srgrimes * Redistribution and use in source and binary forms, with or without 91541Srgrimes * modification, are permitted provided that the following conditions 101541Srgrimes * are met: 111541Srgrimes * 1. Redistributions of source code must retain the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer. 131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer in the 151541Srgrimes * documentation and/or other materials provided with the distribution. 161541Srgrimes * 4. Neither the name of the University nor the names of its contributors 171541Srgrimes * may be used to endorse or promote products derived from this software 181541Srgrimes * without specific prior written permission. 191541Srgrimes * 201541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 211541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 221541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 231541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 241541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 251541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 261541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 271541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 281541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 291541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 301541Srgrimes * SUCH DAMAGE. 311541Srgrimes * 3223521Sbde * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 331541Srgrimes */ 341541Srgrimes 35116182Sobrien#include <sys/cdefs.h> 36116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/vfs_cache.c 211616 2010-08-22 11:18:57Z rpaulo $"); 37116182Sobrien 38190829Srwatson#include "opt_kdtrace.h" 39190141Skib#include "opt_ktrace.h" 40190141Skib 411541Srgrimes#include <sys/param.h> 42183155Sjhb#include <sys/filedesc.h> 43183155Sjhb#include <sys/fnv_hash.h> 4412820Sphk#include <sys/kernel.h> 4576166Smarkm#include <sys/lock.h> 46183155Sjhb#include <sys/malloc.h> 47183155Sjhb#include <sys/mount.h> 481541Srgrimes#include <sys/namei.h> 49183155Sjhb#include <sys/proc.h> 50187839Sjhb#include <sys/rwlock.h> 51190829Srwatson#include <sys/sdt.h> 52102870Siedowse#include <sys/syscallsubr.h> 53183155Sjhb#include <sys/sysctl.h> 5451906Sphk#include <sys/sysproto.h> 55183155Sjhb#include <sys/systm.h> 56183155Sjhb#include <sys/vnode.h> 57190141Skib#ifdef KTRACE 58190141Skib#include <sys/ktrace.h> 59190141Skib#endif 601541Srgrimes 61116289Sdes#include <vm/uma.h> 62116289Sdes 63190829SrwatsonSDT_PROVIDER_DECLARE(vfs); 64211616SrpauloSDT_PROBE_DEFINE3(vfs, namecache, enter, done, done, "struct vnode *", "char *", 65190829Srwatson "struct vnode *"); 66211616SrpauloSDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, done, "struct vnode *", 67190829Srwatson "char *"); 68211616SrpauloSDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, entry, "struct vnode *"); 69211616SrpauloSDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, hit, "struct vnode *", 70190829Srwatson "struct char *", "struct vnode *"); 71211616SrpauloSDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, miss, "struct vnode *"); 72211616SrpauloSDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, return, "int", 73211616Srpaulo "struct vnode *", "struct char *"); 74211616SrpauloSDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, hit, "struct vnode *", "char *", 75190829Srwatson "struct vnode *"); 76211616SrpauloSDT_PROBE_DEFINE2(vfs, namecache, lookup, hit_negative, hit-negative, 77211616Srpaulo "struct vnode *", "char *"); 78211616SrpauloSDT_PROBE_DEFINE2(vfs, namecache, lookup, miss, miss, "struct vnode *", 79190829Srwatson "char *"); 80211616SrpauloSDT_PROBE_DEFINE1(vfs, namecache, purge, done, done, "struct vnode *"); 81211616SrpauloSDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done, done, "struct vnode *"); 82211616SrpauloSDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, done, "struct mount *"); 83211616SrpauloSDT_PROBE_DEFINE3(vfs, namecache, zap, done, done, "struct vnode *", "char *", 84190829Srwatson "struct vnode *"); 85211616SrpauloSDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, done, "struct vnode *", 86190829Srwatson "char *"); 87190829Srwatson 8851906Sphk/* 8959652Sgreen * This structure describes the elements in the cache of recent 9059652Sgreen * names looked up by namei. 9159652Sgreen */ 9259652Sgreen 9359652Sgreenstruct namecache { 9460938Sjake LIST_ENTRY(namecache) nc_hash; /* hash chain */ 9560938Sjake LIST_ENTRY(namecache) nc_src; /* source vnode list */ 9660938Sjake TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 9759652Sgreen struct vnode *nc_dvp; /* vnode of parent of name */ 9859652Sgreen struct vnode *nc_vp; /* vnode the name refers to */ 9959652Sgreen u_char nc_flag; /* flag bits */ 10059652Sgreen u_char nc_nlen; /* length of name */ 101190829Srwatson char nc_name[0]; /* segment name + nul */ 10259652Sgreen}; 10359652Sgreen 10459652Sgreen/* 1051541Srgrimes * Name caching works as follows: 1061541Srgrimes * 1071541Srgrimes * Names found by directory scans are retained in a cache 1081541Srgrimes * for future reference. It is managed LRU, so frequently 1091541Srgrimes * used names will hang around. Cache is indexed by hash value 1101541Srgrimes * obtained from (vp, name) where vp refers to the directory 1111541Srgrimes * containing name. 1121541Srgrimes * 11322521Sdyson * If it is a "negative" entry, (i.e. for a name that is known NOT to 11422521Sdyson * exist) the vnode pointer will be NULL. 1156968Sphk * 1161541Srgrimes * Upon reaching the last segment of a path, if the reference 1171541Srgrimes * is for DELETE, or NOCACHE is set (rewrite), and the 1181541Srgrimes * name is located in the cache, it will be dropped. 1191541Srgrimes */ 1201541Srgrimes 1211541Srgrimes/* 1221541Srgrimes * Structures associated with name cacheing. 1231541Srgrimes */ 12474501Speter#define NCHHASH(hash) \ 12574501Speter (&nchashtbl[(hash) & nchash]) 12660938Sjakestatic LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 12760938Sjakestatic TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 12823521Sbdestatic u_long nchash; /* size of hash table */ 12962622SjhbSYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 13025453Sphkstatic u_long ncnegfactor = 16; /* ratio of negative entries */ 13162622SjhbSYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 13291690Seivindstatic u_long numneg; /* number of cache entries allocated */ 13362622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 13423521Sbdestatic u_long numcache; /* number of cache entries allocated */ 13562622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 13675654Stanimurastatic u_long numcachehv; /* number of cache entries with vnodes held */ 13775654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, ""); 13884249Sdillon#if 0 13975654Stanimurastatic u_long numcachepl; /* number of cache purge for leaf entries */ 14075654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, ""); 14184249Sdillon#endif 14222521Sdysonstruct nchstats nchstats; /* cache effectiveness statistics */ 1431541Srgrimes 144187839Sjhbstatic struct rwlock cache_lock; 145187839SjhbRW_SYSINIT(vfscache, &cache_lock, "Name Cache"); 146120792Sjeff 147187839Sjhb#define CACHE_UPGRADE_LOCK() rw_try_upgrade(&cache_lock) 148187839Sjhb#define CACHE_RLOCK() rw_rlock(&cache_lock) 149187839Sjhb#define CACHE_RUNLOCK() rw_runlock(&cache_lock) 150187839Sjhb#define CACHE_WLOCK() rw_wlock(&cache_lock) 151187839Sjhb#define CACHE_WUNLOCK() rw_wunlock(&cache_lock) 152120792Sjeff 153116289Sdes/* 154116289Sdes * UMA zones for the VFS cache. 155116289Sdes * 156116289Sdes * The small cache is used for entries with short names, which are the 157116289Sdes * most common. The large cache is used for entries which are too big to 158116289Sdes * fit in the small cache. 159116289Sdes */ 160116289Sdesstatic uma_zone_t cache_zone_small; 161116289Sdesstatic uma_zone_t cache_zone_large; 162116289Sdes 163190829Srwatson#define CACHE_PATH_CUTOFF 35 164190829Srwatson#define CACHE_ZONE_SMALL (sizeof(struct namecache) + CACHE_PATH_CUTOFF \ 165190829Srwatson + 1) 166190829Srwatson#define CACHE_ZONE_LARGE (sizeof(struct namecache) + NAME_MAX + 1) 167116289Sdes 168116289Sdes#define cache_alloc(len) uma_zalloc(((len) <= CACHE_PATH_CUTOFF) ? \ 169116289Sdes cache_zone_small : cache_zone_large, M_WAITOK) 170116289Sdes#define cache_free(ncp) do { \ 171116289Sdes if (ncp != NULL) \ 172116289Sdes uma_zfree(((ncp)->nc_nlen <= CACHE_PATH_CUTOFF) ? \ 173116289Sdes cache_zone_small : cache_zone_large, (ncp)); \ 174116289Sdes} while (0) 175116289Sdes 17623521Sbdestatic int doingcache = 1; /* 1 => enable the cache */ 17723521SbdeSYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 17891690Seivind 17991690Seivind/* Export size information to userland */ 180157799SjmgSYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, 0, 181157799Sjmg sizeof(struct namecache), ""); 18223521Sbde 18329788Sphk/* 18429788Sphk * The new name cache statistics 18529788Sphk */ 186141627Sphkstatic SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 18729788Sphk#define STATNODE(mode, name, var) \ 18862622Sjhb SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 18929788SphkSTATNODE(CTLFLAG_RD, numneg, &numneg); 19029788SphkSTATNODE(CTLFLAG_RD, numcache, &numcache); 19129788Sphkstatic u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 19229788Sphkstatic u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 19329788Sphkstatic u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 19429788Sphkstatic u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 19529788Sphkstatic u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 19629804Sphkstatic u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 19729788Sphkstatic u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 19829788Sphkstatic u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 19929788Sphkstatic u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 20029788Sphkstatic u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 201187839Sjhbstatic u_long numupgrades; STATNODE(CTLFLAG_RD, numupgrades, &numupgrades); 20229788Sphk 203187658SjhbSYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD | CTLFLAG_MPSAFE, 204187658Sjhb &nchstats, sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); 20529788Sphk 20668922Srwatson 20768922Srwatson 208140712Sjeffstatic void cache_zap(struct namecache *ncp); 209194601Skibstatic int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, 210194601Skib u_int *buflen); 211144318Sdasstatic int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, 212144318Sdas char *buf, char **retbuf, u_int buflen); 2136968Sphk 21469774Sphkstatic MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 21551906Sphk 21622521Sdyson/* 21725453Sphk * Flags in namecache.nc_flag 21825453Sphk */ 219190533Skan#define NCF_WHITE 0x01 220190533Skan#define NCF_ISDOTDOT 0x02 22175402Speter 222189593Sjhb#ifdef DIAGNOSTIC 22325453Sphk/* 22475402Speter * Grab an atomic snapshot of the name cache hash chain lengths 22575402Speter */ 22675402SpeterSYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); 22775402Speter 22875402Speterstatic int 22975402Spetersysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 23075402Speter{ 23175402Speter int error; 23275402Speter struct nchashhead *ncpp; 23375402Speter struct namecache *ncp; 23475402Speter int n_nchash; 23575402Speter int count; 23675402Speter 23775402Speter n_nchash = nchash + 1; /* nchash is max index, not count */ 23875402Speter if (!req->oldptr) 23975402Speter return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 24075402Speter 24175402Speter /* Scan hash tables for applicable entries */ 24275402Speter for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 243187839Sjhb CACHE_RLOCK(); 24475402Speter count = 0; 24575402Speter LIST_FOREACH(ncp, ncpp, nc_hash) { 24675402Speter count++; 24775402Speter } 248187839Sjhb CACHE_RUNLOCK(); 24998994Salfred error = SYSCTL_OUT(req, &count, sizeof(count)); 25075402Speter if (error) 25175402Speter return (error); 25275402Speter } 25375402Speter return (0); 25475402Speter} 255187658SjhbSYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD| 256187658Sjhb CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", 257187658Sjhb "nchash chain lengths"); 25875402Speter 25975402Speterstatic int 26075402Spetersysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 26175402Speter{ 26275402Speter int error; 26375402Speter struct nchashhead *ncpp; 26475402Speter struct namecache *ncp; 26575402Speter int n_nchash; 26675402Speter int count, maxlength, used, pct; 26775402Speter 26875402Speter if (!req->oldptr) 26975402Speter return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 27075402Speter 27175402Speter n_nchash = nchash + 1; /* nchash is max index, not count */ 27275402Speter used = 0; 27375402Speter maxlength = 0; 27475402Speter 27575402Speter /* Scan hash tables for applicable entries */ 27675402Speter for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 27775402Speter count = 0; 278187839Sjhb CACHE_RLOCK(); 27975402Speter LIST_FOREACH(ncp, ncpp, nc_hash) { 28075402Speter count++; 28175402Speter } 282187839Sjhb CACHE_RUNLOCK(); 28375402Speter if (count) 28475402Speter used++; 28575402Speter if (maxlength < count) 28675402Speter maxlength = count; 28775402Speter } 28875402Speter n_nchash = nchash + 1; 28975402Speter pct = (used * 100 * 100) / n_nchash; 29098994Salfred error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash)); 29175402Speter if (error) 29275402Speter return (error); 29398994Salfred error = SYSCTL_OUT(req, &used, sizeof(used)); 29475402Speter if (error) 29575402Speter return (error); 29698994Salfred error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength)); 29775402Speter if (error) 29875402Speter return (error); 29998994Salfred error = SYSCTL_OUT(req, &pct, sizeof(pct)); 30075402Speter if (error) 30175402Speter return (error); 30275402Speter return (0); 30375402Speter} 304187658SjhbSYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD| 305187658Sjhb CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I", 306187658Sjhb "nchash chain lengths"); 307189593Sjhb#endif 30875402Speter 30975402Speter/* 310110952Sarr * cache_zap(): 311110952Sarr * 312110952Sarr * Removes a namecache entry from cache, whether it contains an actual 313110952Sarr * pointer to a vnode or if it is just a negative cache entry. 31422521Sdyson */ 31525453Sphkstatic void 316140712Sjeffcache_zap(ncp) 31725453Sphk struct namecache *ncp; 31825453Sphk{ 319120792Sjeff struct vnode *vp; 320120792Sjeff 321187839Sjhb rw_assert(&cache_lock, RA_WLOCKED); 322147326Sjeff CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp); 323190829Srwatson#ifdef KDTRACE_HOOKS 324190829Srwatson if (ncp->nc_vp != NULL) { 325190829Srwatson SDT_PROBE(vfs, namecache, zap, done, ncp->nc_dvp, 326190829Srwatson ncp->nc_name, ncp->nc_vp, 0, 0); 327190829Srwatson } else { 328190829Srwatson SDT_PROBE(vfs, namecache, zap_negative, done, ncp->nc_dvp, 329190829Srwatson ncp->nc_name, 0, 0, 0); 330190829Srwatson } 331190829Srwatson#endif 332120792Sjeff vp = NULL; 33325453Sphk LIST_REMOVE(ncp, nc_hash); 334190533Skan if (ncp->nc_flag & NCF_ISDOTDOT) { 335190533Skan if (ncp == ncp->nc_dvp->v_cache_dd) 336190533Skan ncp->nc_dvp->v_cache_dd = NULL; 337190533Skan } else { 338190533Skan LIST_REMOVE(ncp, nc_src); 339190533Skan if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 340190533Skan vp = ncp->nc_dvp; 341190533Skan numcachehv--; 342190533Skan } 34375654Stanimura } 34425453Sphk if (ncp->nc_vp) { 34525453Sphk TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 346190533Skan if (ncp == ncp->nc_vp->v_cache_dd) 347190533Skan ncp->nc_vp->v_cache_dd = NULL; 34825453Sphk } else { 34925453Sphk TAILQ_REMOVE(&ncneg, ncp, nc_dst); 35025453Sphk numneg--; 35125453Sphk } 35225453Sphk numcache--; 353116289Sdes cache_free(ncp); 354120792Sjeff if (vp) 355120792Sjeff vdrop(vp); 35622521Sdyson} 3576968Sphk 35822521Sdyson/* 35923521Sbde * Lookup an entry in the cache 3606968Sphk * 3616968Sphk * Lookup is called with dvp pointing to the directory to search, 36222521Sdyson * cnp pointing to the name of the entry being sought. If the lookup 36322521Sdyson * succeeds, the vnode is returned in *vpp, and a status of -1 is 36422521Sdyson * returned. If the lookup determines that the name does not exist 36522521Sdyson * (negative cacheing), a status of ENOENT is returned. If the lookup 366183330Sjhb * fails, a status of zero is returned. If the directory vnode is 367183330Sjhb * recycled out from under us due to a forced unmount, a status of 368190387Sjhb * ENOENT is returned. 369144296Sjeff * 370144296Sjeff * vpp is locked and ref'd on return. If we're looking up DOTDOT, dvp is 371144296Sjeff * unlocked. If we're looking up . an extra ref is taken, but the lock is 372144296Sjeff * not recursively acquired. 3731541Srgrimes */ 3746968Sphk 3751541Srgrimesint 3761541Srgrimescache_lookup(dvp, vpp, cnp) 3771541Srgrimes struct vnode *dvp; 3781541Srgrimes struct vnode **vpp; 3791541Srgrimes struct componentname *cnp; 3801541Srgrimes{ 38151906Sphk struct namecache *ncp; 382209390Sed uint32_t hash; 383187839Sjhb int error, ltype, wlocked; 3841541Srgrimes 3856928Sphk if (!doingcache) { 3866928Sphk cnp->cn_flags &= ~MAKEENTRY; 3871541Srgrimes return (0); 3886928Sphk } 389144296Sjeffretry: 390187839Sjhb CACHE_RLOCK(); 391187839Sjhb wlocked = 0; 39229788Sphk numcalls++; 393187839Sjhb error = 0; 39429788Sphk 395187839Sjhbretry_wlocked: 39625453Sphk if (cnp->cn_nameptr[0] == '.') { 39725453Sphk if (cnp->cn_namelen == 1) { 39825453Sphk *vpp = dvp; 399147326Sjeff CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .", 400147326Sjeff dvp, cnp->cn_nameptr); 40129788Sphk dothits++; 402190829Srwatson SDT_PROBE(vfs, namecache, lookup, hit, dvp, ".", 403190829Srwatson *vpp, 0, 0); 404144296Sjeff goto success; 40525453Sphk } 40625453Sphk if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 40729788Sphk dotdothits++; 408190829Srwatson if (dvp->v_cache_dd == NULL) { 409190829Srwatson SDT_PROBE(vfs, namecache, lookup, miss, dvp, 410190829Srwatson "..", NULL, 0, 0); 411187839Sjhb goto unlock; 412190829Srwatson } 413190533Skan if ((cnp->cn_flags & MAKEENTRY) == 0) { 414190942Skib if (!wlocked && !CACHE_UPGRADE_LOCK()) 415190942Skib goto wlock; 416190533Skan if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT) 417190533Skan cache_zap(dvp->v_cache_dd); 418190533Skan dvp->v_cache_dd = NULL; 419196203Skib CACHE_WUNLOCK(); 420196203Skib return (0); 42125453Sphk } 422190533Skan if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT) 423190533Skan *vpp = dvp->v_cache_dd->nc_vp; 424190533Skan else 425190533Skan *vpp = dvp->v_cache_dd->nc_dvp; 426191081Skan /* Return failure if negative entry was found. */ 427191081Skan if (*vpp == NULL) { 428191082Skan ncp = dvp->v_cache_dd; 429191082Skan goto negative_success; 430191081Skan } 431147326Sjeff CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..", 432147326Sjeff dvp, cnp->cn_nameptr, *vpp); 433190829Srwatson SDT_PROBE(vfs, namecache, lookup, hit, dvp, "..", 434190829Srwatson *vpp, 0, 0); 435144296Sjeff goto success; 43625453Sphk } 4371541Srgrimes } 4386968Sphk 43974501Speter hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 440144319Sdas hash = fnv_32_buf(&dvp, sizeof(dvp), hash); 44174501Speter LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 44229788Sphk numchecks++; 44325453Sphk if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 44431879Sbde !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) 44522521Sdyson break; 4461541Srgrimes } 4476968Sphk 44822521Sdyson /* We failed to find an entry */ 449187839Sjhb if (ncp == NULL) { 450190829Srwatson SDT_PROBE(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr, 451190829Srwatson NULL, 0, 0); 45229804Sphk if ((cnp->cn_flags & MAKEENTRY) == 0) { 45329804Sphk nummisszap++; 45429804Sphk } else { 45529804Sphk nummiss++; 45629804Sphk } 45722521Sdyson nchstats.ncs_miss++; 458187839Sjhb goto unlock; 45922521Sdyson } 46022521Sdyson 4616968Sphk /* We don't want to have an entry, so dump it */ 4626928Sphk if ((cnp->cn_flags & MAKEENTRY) == 0) { 46329788Sphk numposzaps++; 4641541Srgrimes nchstats.ncs_badhits++; 465187839Sjhb if (!wlocked && !CACHE_UPGRADE_LOCK()) 466187839Sjhb goto wlock; 467140712Sjeff cache_zap(ncp); 468187839Sjhb CACHE_WUNLOCK(); 4696968Sphk return (0); 47023521Sbde } 4716968Sphk 4726968Sphk /* We found a "positive" match, return the vnode */ 473116201Sdes if (ncp->nc_vp) { 47429788Sphk numposhits++; 4751541Srgrimes nchstats.ncs_goodhits++; 4761541Srgrimes *vpp = ncp->nc_vp; 477147326Sjeff CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p", 478147326Sjeff dvp, cnp->cn_nameptr, *vpp, ncp); 479190829Srwatson SDT_PROBE(vfs, namecache, lookup, hit, dvp, ncp->nc_name, 480190829Srwatson *vpp, 0, 0); 481144296Sjeff goto success; 4821541Srgrimes } 4831541Srgrimes 484191082Skannegative_success: 4856968Sphk /* We found a negative match, and want to create it, so purge */ 4866968Sphk if (cnp->cn_nameiop == CREATE) { 48729788Sphk numnegzaps++; 4887013Sphk nchstats.ncs_badhits++; 489187839Sjhb if (!wlocked && !CACHE_UPGRADE_LOCK()) 490187839Sjhb goto wlock; 491140712Sjeff cache_zap(ncp); 492187839Sjhb CACHE_WUNLOCK(); 4936968Sphk return (0); 4946968Sphk } 4956968Sphk 496187839Sjhb if (!wlocked && !CACHE_UPGRADE_LOCK()) 497187839Sjhb goto wlock; 49829788Sphk numneghits++; 49922521Sdyson /* 500110967Sarr * We found a "negative" match, so we shift it to the end of 501110967Sarr * the "negative" cache entries queue to satisfy LRU. Also, 502110967Sarr * check to see if the entry is a whiteout; indicate this to 503110967Sarr * the componentname, if so. 50422521Sdyson */ 50525453Sphk TAILQ_REMOVE(&ncneg, ncp, nc_dst); 50625453Sphk TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 5076968Sphk nchstats.ncs_neghits++; 50825453Sphk if (ncp->nc_flag & NCF_WHITE) 50925453Sphk cnp->cn_flags |= ISWHITEOUT; 510190829Srwatson SDT_PROBE(vfs, namecache, lookup, hit_negative, dvp, ncp->nc_name, 511190829Srwatson 0, 0, 0); 512187839Sjhb CACHE_WUNLOCK(); 5136968Sphk return (ENOENT); 514144296Sjeff 515187839Sjhbwlock: 516187839Sjhb /* 517187839Sjhb * We need to update the cache after our lookup, so upgrade to 518187839Sjhb * a write lock and retry the operation. 519187839Sjhb */ 520187839Sjhb CACHE_RUNLOCK(); 521187839Sjhb CACHE_WLOCK(); 522187839Sjhb numupgrades++; 523187839Sjhb wlocked = 1; 524187839Sjhb goto retry_wlocked; 525187839Sjhb 526144296Sjeffsuccess: 527144296Sjeff /* 528144296Sjeff * On success we return a locked and ref'd vnode as per the lookup 529144296Sjeff * protocol. 530144296Sjeff */ 531144296Sjeff if (dvp == *vpp) { /* lookup on "." */ 532144296Sjeff VREF(*vpp); 533187839Sjhb if (wlocked) 534187839Sjhb CACHE_WUNLOCK(); 535187839Sjhb else 536187839Sjhb CACHE_RUNLOCK(); 537172274Spjd /* 538172274Spjd * When we lookup "." we still can be asked to lock it 539172274Spjd * differently... 540172274Spjd */ 541178046Spjd ltype = cnp->cn_lkflags & LK_TYPE_MASK; 542183330Sjhb if (ltype != VOP_ISLOCKED(*vpp)) { 543183330Sjhb if (ltype == LK_EXCLUSIVE) { 544183330Sjhb vn_lock(*vpp, LK_UPGRADE | LK_RETRY); 545183330Sjhb if ((*vpp)->v_iflag & VI_DOOMED) { 546183330Sjhb /* forced unmount */ 547183330Sjhb vrele(*vpp); 548183330Sjhb *vpp = NULL; 549190387Sjhb return (ENOENT); 550183330Sjhb } 551183330Sjhb } else 552183330Sjhb vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY); 553183330Sjhb } 554144296Sjeff return (-1); 555144296Sjeff } 556170000Spjd ltype = 0; /* silence gcc warning */ 557170000Spjd if (cnp->cn_flags & ISDOTDOT) { 558176559Sattilio ltype = VOP_ISLOCKED(dvp); 559175294Sattilio VOP_UNLOCK(dvp, 0); 560170000Spjd } 561144296Sjeff VI_LOCK(*vpp); 562187839Sjhb if (wlocked) 563187839Sjhb CACHE_WUNLOCK(); 564187839Sjhb else 565187839Sjhb CACHE_RUNLOCK(); 566176559Sattilio error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread); 567190887Skib if (cnp->cn_flags & ISDOTDOT) { 568175202Sattilio vn_lock(dvp, ltype | LK_RETRY); 569190887Skib if (dvp->v_iflag & VI_DOOMED) { 570190887Skib if (error == 0) 571190887Skib vput(*vpp); 572190887Skib *vpp = NULL; 573190887Skib return (ENOENT); 574190887Skib } 575190887Skib } 576145006Sjeff if (error) { 577144296Sjeff *vpp = NULL; 578144296Sjeff goto retry; 579144296Sjeff } 580178046Spjd if ((cnp->cn_flags & ISLASTCN) && 581178046Spjd (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) { 582178046Spjd ASSERT_VOP_ELOCKED(*vpp, "cache_lookup"); 583178046Spjd } 584144296Sjeff return (-1); 585187839Sjhb 586187839Sjhbunlock: 587187839Sjhb if (wlocked) 588187839Sjhb CACHE_WUNLOCK(); 589187839Sjhb else 590187839Sjhb CACHE_RUNLOCK(); 591187839Sjhb return (0); 5921541Srgrimes} 5931541Srgrimes 5941541Srgrimes/* 5956968Sphk * Add an entry to the cache. 5961541Srgrimes */ 5971549Srgrimesvoid 5981541Srgrimescache_enter(dvp, vp, cnp) 5991541Srgrimes struct vnode *dvp; 6001541Srgrimes struct vnode *vp; 6011541Srgrimes struct componentname *cnp; 6021541Srgrimes{ 603185557Skib struct namecache *ncp, *n2; 60451906Sphk struct nchashhead *ncpp; 605209390Sed uint32_t hash; 606190533Skan int flag; 607120792Sjeff int hold; 608120792Sjeff int zap; 60951906Sphk int len; 6101541Srgrimes 611147326Sjeff CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr); 612147296Sjeff VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp, 613206671Skib ("cache_enter: Adding a doomed vnode")); 614206894Skib VNASSERT(dvp == NULL || (dvp->v_iflag & VI_DOOMED) == 0, dvp, 615206894Skib ("cache_enter: Doomed vnode used as src")); 616147296Sjeff 6171541Srgrimes if (!doingcache) 6181541Srgrimes return; 6196968Sphk 620187460Smckay /* 621187460Smckay * Avoid blowout in namecache entries. 622187460Smckay */ 623187460Smckay if (numcache >= desiredvnodes * 2) 624187460Smckay return; 625187460Smckay 626190533Skan flag = 0; 62725453Sphk if (cnp->cn_nameptr[0] == '.') { 628190533Skan if (cnp->cn_namelen == 1) 62925453Sphk return; 63025453Sphk if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 631187839Sjhb CACHE_WLOCK(); 632190533Skan /* 633190533Skan * If dotdot entry already exists, just retarget it 634190533Skan * to new parent vnode, otherwise continue with new 635190533Skan * namecache entry allocation. 636190533Skan */ 637191218Skan if ((ncp = dvp->v_cache_dd) != NULL && 638191218Skan ncp->nc_flag & NCF_ISDOTDOT) { 639191218Skan KASSERT(ncp->nc_dvp == dvp, 640191218Skan ("wrong isdotdot parent")); 641191218Skan if (ncp->nc_vp != NULL) 642190533Skan TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, 643190533Skan ncp, nc_dst); 644191218Skan else 645191218Skan TAILQ_REMOVE(&ncneg, ncp, nc_dst); 646191218Skan if (vp != NULL) 647190533Skan TAILQ_INSERT_HEAD(&vp->v_cache_dst, 648190533Skan ncp, nc_dst); 649191218Skan else 650191218Skan TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 651191218Skan ncp->nc_vp = vp; 652191218Skan CACHE_WUNLOCK(); 653191218Skan return; 654190533Skan } 655190533Skan dvp->v_cache_dd = NULL; 656190829Srwatson SDT_PROBE(vfs, namecache, enter, done, dvp, "..", vp, 657190829Srwatson 0, 0); 658187839Sjhb CACHE_WUNLOCK(); 659190533Skan flag = NCF_ISDOTDOT; 66025453Sphk } 6616968Sphk } 662116201Sdes 663120792Sjeff hold = 0; 664120792Sjeff zap = 0; 665182061Sjhb 666182061Sjhb /* 667182061Sjhb * Calculate the hash key and setup as much of the new 668182061Sjhb * namecache entry as possible before acquiring the lock. 669182061Sjhb */ 670116289Sdes ncp = cache_alloc(cnp->cn_namelen); 671182061Sjhb ncp->nc_vp = vp; 672182061Sjhb ncp->nc_dvp = dvp; 673190533Skan ncp->nc_flag = flag; 674182061Sjhb len = ncp->nc_nlen = cnp->cn_namelen; 675182061Sjhb hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 676190829Srwatson strlcpy(ncp->nc_name, cnp->cn_nameptr, len + 1); 677182061Sjhb hash = fnv_32_buf(&dvp, sizeof(dvp), hash); 678187839Sjhb CACHE_WLOCK(); 679182061Sjhb 680182061Sjhb /* 681186600Skib * See if this vnode or negative entry is already in the cache 682186600Skib * with this name. This can happen with concurrent lookups of 683186600Skib * the same path name. 684182061Sjhb */ 685186600Skib ncpp = NCHHASH(hash); 686186600Skib LIST_FOREACH(n2, ncpp, nc_hash) { 687186600Skib if (n2->nc_dvp == dvp && 688186600Skib n2->nc_nlen == cnp->cn_namelen && 689186600Skib !bcmp(n2->nc_name, cnp->cn_nameptr, n2->nc_nlen)) { 690187839Sjhb CACHE_WUNLOCK(); 691186600Skib cache_free(ncp); 692186600Skib return; 693182061Sjhb } 694185557Skib } 695182061Sjhb 696190945Skan if (flag == NCF_ISDOTDOT) { 697190945Skan /* 698190945Skan * See if we are trying to add .. entry, but some other lookup 699190945Skan * has populated v_cache_dd pointer already. 700190945Skan */ 701190945Skan if (dvp->v_cache_dd != NULL) { 702190945Skan CACHE_WUNLOCK(); 703190945Skan cache_free(ncp); 704190945Skan return; 705190945Skan } 706190945Skan KASSERT(vp == NULL || vp->v_type == VDIR, 707190945Skan ("wrong vnode type %p", vp)); 708190945Skan dvp->v_cache_dd = ncp; 709190533Skan } 710190533Skan 71125453Sphk numcache++; 71228954Sphk if (!vp) { 71325453Sphk numneg++; 714190533Skan if (cnp->cn_flags & ISWHITEOUT) 715190533Skan ncp->nc_flag |= NCF_WHITE; 71629071Sphk } else if (vp->v_type == VDIR) { 717190945Skan if (flag != NCF_ISDOTDOT) { 718190533Skan if ((n2 = vp->v_cache_dd) != NULL && 719190533Skan (n2->nc_flag & NCF_ISDOTDOT) != 0) 720190533Skan cache_zap(n2); 721190533Skan vp->v_cache_dd = ncp; 722190533Skan } 723144319Sdas } else { 724190533Skan vp->v_cache_dd = NULL; 72528954Sphk } 72623521Sbde 72722521Sdyson /* 728182061Sjhb * Insert the new namecache entry into the appropriate chain 729182061Sjhb * within the cache entries table. 73022521Sdyson */ 7316928Sphk LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 732190533Skan if (flag != NCF_ISDOTDOT) { 733190533Skan if (LIST_EMPTY(&dvp->v_cache_src)) { 734190533Skan hold = 1; 735190533Skan numcachehv++; 736190533Skan } 737190533Skan LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 73875654Stanimura } 739190533Skan 740110967Sarr /* 741110967Sarr * If the entry is "negative", we place it into the 742110967Sarr * "negative" cache queue, otherwise, we place it into the 743110967Sarr * destination vnode's cache entries queue. 744110967Sarr */ 74525453Sphk if (vp) { 74625453Sphk TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 747190829Srwatson SDT_PROBE(vfs, namecache, enter, done, dvp, ncp->nc_name, vp, 748190829Srwatson 0, 0); 74925453Sphk } else { 75025453Sphk TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 751190829Srwatson SDT_PROBE(vfs, namecache, enter_negative, done, dvp, 752190829Srwatson ncp->nc_name, 0, 0, 0); 75325453Sphk } 75451906Sphk if (numneg * ncnegfactor > numcache) { 75525453Sphk ncp = TAILQ_FIRST(&ncneg); 756120792Sjeff zap = 1; 75725453Sphk } 758120792Sjeff if (hold) 759120792Sjeff vhold(dvp); 760120792Sjeff if (zap) 761140712Sjeff cache_zap(ncp); 762187839Sjhb CACHE_WUNLOCK(); 7631541Srgrimes} 7641541Srgrimes 7651541Srgrimes/* 7661541Srgrimes * Name cache initialization, from vfs_init() when we are booting 7671541Srgrimes */ 76869664Speterstatic void 76969664Speternchinit(void *dummy __unused) 7701541Srgrimes{ 77123521Sbde 77225453Sphk TAILQ_INIT(&ncneg); 773116289Sdes 774116289Sdes cache_zone_small = uma_zcreate("S VFS Cache", CACHE_ZONE_SMALL, NULL, 775116289Sdes NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 776116289Sdes cache_zone_large = uma_zcreate("L VFS Cache", CACHE_ZONE_LARGE, NULL, 777116289Sdes NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 778116289Sdes 77969664Speter nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 7801541Srgrimes} 781177253SrwatsonSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL); 7821541Srgrimes 78369664Speter 7841541Srgrimes/* 78546011Sphk * Invalidate all entries to a particular vnode. 7861541Srgrimes */ 7871549Srgrimesvoid 7881541Srgrimescache_purge(vp) 7891541Srgrimes struct vnode *vp; 7901541Srgrimes{ 7911541Srgrimes 792147326Sjeff CTR1(KTR_VFS, "cache_purge(%p)", vp); 793190829Srwatson SDT_PROBE(vfs, namecache, purge, done, vp, 0, 0, 0, 0); 794187839Sjhb CACHE_WLOCK(); 795147331Sjeff while (!LIST_EMPTY(&vp->v_cache_src)) 796147331Sjeff cache_zap(LIST_FIRST(&vp->v_cache_src)); 797116201Sdes while (!TAILQ_EMPTY(&vp->v_cache_dst)) 798140712Sjeff cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 799190533Skan if (vp->v_cache_dd != NULL) { 800190533Skan KASSERT(vp->v_cache_dd->nc_flag & NCF_ISDOTDOT, 801190533Skan ("lost dotdot link")); 802190533Skan cache_zap(vp->v_cache_dd); 803190533Skan } 804190533Skan KASSERT(vp->v_cache_dd == NULL, ("incomplete purge")); 805187839Sjhb CACHE_WUNLOCK(); 8061541Srgrimes} 8071541Srgrimes 8081541Srgrimes/* 809188833Sjhb * Invalidate all negative entries for a particular directory vnode. 810188833Sjhb */ 811188833Sjhbvoid 812188833Sjhbcache_purge_negative(vp) 813188833Sjhb struct vnode *vp; 814188833Sjhb{ 815188833Sjhb struct namecache *cp, *ncp; 816188833Sjhb 817188833Sjhb CTR1(KTR_VFS, "cache_purge_negative(%p)", vp); 818190829Srwatson SDT_PROBE(vfs, namecache, purge_negative, done, vp, 0, 0, 0, 0); 819188833Sjhb CACHE_WLOCK(); 820188833Sjhb LIST_FOREACH_SAFE(cp, &vp->v_cache_src, nc_src, ncp) { 821188833Sjhb if (cp->nc_vp == NULL) 822188833Sjhb cache_zap(cp); 823188833Sjhb } 824188833Sjhb CACHE_WUNLOCK(); 825188833Sjhb} 826188833Sjhb 827188833Sjhb/* 8286968Sphk * Flush all entries referencing a particular filesystem. 8291541Srgrimes */ 8301549Srgrimesvoid 8311541Srgrimescache_purgevfs(mp) 8321541Srgrimes struct mount *mp; 8331541Srgrimes{ 8346968Sphk struct nchashhead *ncpp; 83522521Sdyson struct namecache *ncp, *nnp; 8361541Srgrimes 8376968Sphk /* Scan hash tables for applicable entries */ 838190829Srwatson SDT_PROBE(vfs, namecache, purgevfs, done, mp, 0, 0, 0, 0); 839187839Sjhb CACHE_WLOCK(); 84029071Sphk for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 841169999Spjd LIST_FOREACH_SAFE(ncp, ncpp, nc_hash, nnp) { 842169999Spjd if (ncp->nc_dvp->v_mount == mp) 843169999Spjd cache_zap(ncp); 8441541Srgrimes } 8451541Srgrimes } 846187839Sjhb CACHE_WUNLOCK(); 8471541Srgrimes} 84828787Sphk 84928787Sphk/* 85028787Sphk * Perform canonical checks and cache lookup and pass on to filesystem 85128787Sphk * through the vop_cachedlookup only if needed. 85228787Sphk */ 85328787Sphk 85428787Sphkint 85528787Sphkvfs_cache_lookup(ap) 85628787Sphk struct vop_lookup_args /* { 85728787Sphk struct vnode *a_dvp; 85828787Sphk struct vnode **a_vpp; 85928787Sphk struct componentname *a_cnp; 86028787Sphk } */ *ap; 86128787Sphk{ 862144296Sjeff struct vnode *dvp; 86328787Sphk int error; 86428787Sphk struct vnode **vpp = ap->a_vpp; 86528787Sphk struct componentname *cnp = ap->a_cnp; 86628787Sphk struct ucred *cred = cnp->cn_cred; 86728787Sphk int flags = cnp->cn_flags; 86883366Sjulian struct thread *td = cnp->cn_thread; 86928787Sphk 87028787Sphk *vpp = NULL; 87165665Sbp dvp = ap->a_dvp; 87228787Sphk 87365665Sbp if (dvp->v_type != VDIR) 874116201Sdes return (ENOTDIR); 87528787Sphk 87665665Sbp if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 87728787Sphk (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 87828787Sphk return (EROFS); 87928787Sphk 88083366Sjulian error = VOP_ACCESS(dvp, VEXEC, cred, td); 88128787Sphk if (error) 88228787Sphk return (error); 88328787Sphk 88465665Sbp error = cache_lookup(dvp, vpp, cnp); 885144296Sjeff if (error == 0) 886144287Sjeff return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 887183330Sjhb if (error == -1) 888183330Sjhb return (0); 889183330Sjhb return (error); 89028787Sphk} 89151906Sphk 89251906Sphk 89351906Sphk#ifndef _SYS_SYSPROTO_H_ 89451906Sphkstruct __getcwd_args { 89551906Sphk u_char *buf; 89651906Sphk u_int buflen; 89751906Sphk}; 89851906Sphk#endif 89951906Sphk 90091690Seivind/* 90191690Seivind * XXX All of these sysctls would probably be more productive dead. 90291690Seivind */ 90351906Sphkstatic int disablecwd; 90491690SeivindSYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, 90591690Seivind "Disable the getcwd syscall"); 90651906Sphk 907167232Srwatson/* Implementation of the getcwd syscall. */ 90851906Sphkint 90983366Sjulian__getcwd(td, uap) 91083366Sjulian struct thread *td; 91151906Sphk struct __getcwd_args *uap; 91251906Sphk{ 913112430Sphk 914102870Siedowse return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen)); 915102870Siedowse} 916102870Siedowse 917102870Siedowseint 918112430Sphkkern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen) 919102870Siedowse{ 920102870Siedowse char *bp, *tmpbuf; 92151906Sphk struct filedesc *fdp; 922185298Smarcus struct vnode *cdir, *rdir; 923185298Smarcus int error, vfslocked; 92451906Sphk 925112430Sphk if (disablecwd) 92651906Sphk return (ENODEV); 927102870Siedowse if (buflen < 2) 92851906Sphk return (EINVAL); 929102870Siedowse if (buflen > MAXPATHLEN) 930102870Siedowse buflen = MAXPATHLEN; 931144318Sdas 932144318Sdas tmpbuf = malloc(buflen, M_TEMP, M_WAITOK); 933144318Sdas fdp = td->td_proc->p_fd; 934168355Srwatson FILEDESC_SLOCK(fdp); 935185298Smarcus cdir = fdp->fd_cdir; 936185298Smarcus VREF(cdir); 937185298Smarcus rdir = fdp->fd_rdir; 938185298Smarcus VREF(rdir); 939168355Srwatson FILEDESC_SUNLOCK(fdp); 940185298Smarcus error = vn_fullpath1(td, cdir, rdir, tmpbuf, &bp, buflen); 941185298Smarcus vfslocked = VFS_LOCK_GIANT(rdir->v_mount); 942185298Smarcus vrele(rdir); 943185298Smarcus VFS_UNLOCK_GIANT(vfslocked); 944185298Smarcus vfslocked = VFS_LOCK_GIANT(cdir->v_mount); 945185298Smarcus vrele(cdir); 946185298Smarcus VFS_UNLOCK_GIANT(vfslocked); 947144318Sdas 948144318Sdas if (!error) { 949144318Sdas if (bufseg == UIO_SYSSPACE) 950144318Sdas bcopy(bp, buf, strlen(bp) + 1); 951144318Sdas else 952144318Sdas error = copyout(bp, buf, strlen(bp) + 1); 953190141Skib#ifdef KTRACE 954190141Skib if (KTRPOINT(curthread, KTR_NAMEI)) 955190141Skib ktrnamei(bp); 956190141Skib#endif 957144318Sdas } 958102870Siedowse free(tmpbuf, M_TEMP); 95951906Sphk return (error); 96051906Sphk} 96151906Sphk 96259652Sgreen/* 96359652Sgreen * Thus begins the fullpath magic. 96459652Sgreen */ 96559652Sgreen 96659652Sgreen#undef STATNODE 96759652Sgreen#define STATNODE(name) \ 96859652Sgreen static u_int name; \ 96962622Sjhb SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 97059652Sgreen 97159652Sgreenstatic int disablefullpath; 97291690SeivindSYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0, 97391690Seivind "Disable the vn_fullpath function"); 97459652Sgreen 975144318Sdas/* These count for kern___getcwd(), too. */ 97659652SgreenSTATNODE(numfullpathcalls); 97759652SgreenSTATNODE(numfullpathfail1); 97859652SgreenSTATNODE(numfullpathfail2); 97959652SgreenSTATNODE(numfullpathfail4); 98059652SgreenSTATNODE(numfullpathfound); 98159652Sgreen 98291690Seivind/* 98391690Seivind * Retrieve the full filesystem path that correspond to a vnode from the name 98491690Seivind * cache (if available) 98591690Seivind */ 98659652Sgreenint 98785287Sdesvn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) 98885287Sdes{ 989144318Sdas char *buf; 99059652Sgreen struct filedesc *fdp; 991185298Smarcus struct vnode *rdir; 992185298Smarcus int error, vfslocked; 99359652Sgreen 99459652Sgreen if (disablefullpath) 99559652Sgreen return (ENODEV); 99685287Sdes if (vn == NULL) 99759652Sgreen return (EINVAL); 998144318Sdas 999111119Simp buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1000144318Sdas fdp = td->td_proc->p_fd; 1001168355Srwatson FILEDESC_SLOCK(fdp); 1002185298Smarcus rdir = fdp->fd_rdir; 1003185298Smarcus VREF(rdir); 1004168355Srwatson FILEDESC_SUNLOCK(fdp); 1005185298Smarcus error = vn_fullpath1(td, vn, rdir, buf, retbuf, MAXPATHLEN); 1006185298Smarcus vfslocked = VFS_LOCK_GIANT(rdir->v_mount); 1007185298Smarcus vrele(rdir); 1008185298Smarcus VFS_UNLOCK_GIANT(vfslocked); 1009144318Sdas 1010144318Sdas if (!error) 1011144318Sdas *freebuf = buf; 1012144318Sdas else 1013144318Sdas free(buf, M_TEMP); 1014144318Sdas return (error); 1015144318Sdas} 1016144318Sdas 1017144318Sdas/* 1018181060Scsjp * This function is similar to vn_fullpath, but it attempts to lookup the 1019181060Scsjp * pathname relative to the global root mount point. This is required for the 1020181060Scsjp * auditing sub-system, as audited pathnames must be absolute, relative to the 1021181060Scsjp * global root mount point. 1022181060Scsjp */ 1023181060Scsjpint 1024181060Scsjpvn_fullpath_global(struct thread *td, struct vnode *vn, 1025181060Scsjp char **retbuf, char **freebuf) 1026181060Scsjp{ 1027181060Scsjp char *buf; 1028181060Scsjp int error; 1029181060Scsjp 1030181060Scsjp if (disablefullpath) 1031181060Scsjp return (ENODEV); 1032181060Scsjp if (vn == NULL) 1033181060Scsjp return (EINVAL); 1034181060Scsjp buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1035181060Scsjp error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, MAXPATHLEN); 1036181060Scsjp if (!error) 1037181060Scsjp *freebuf = buf; 1038181060Scsjp else 1039181060Scsjp free(buf, M_TEMP); 1040181060Scsjp return (error); 1041181060Scsjp} 1042181060Scsjp 1043193174Skibint 1044194601Skibvn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen) 1045193174Skib{ 1046193174Skib int error; 1047193174Skib 1048193174Skib CACHE_RLOCK(); 1049194601Skib error = vn_vptocnp_locked(vp, cred, buf, buflen); 1050193174Skib if (error == 0) { 1051193174Skib /* 1052193174Skib * vn_vptocnp_locked() dropped hold acquired by 1053193174Skib * VOP_VPTOCNP immediately after locking the 1054193174Skib * cache. Since we are going to drop the cache rlock, 1055193174Skib * re-hold the result. 1056193174Skib */ 1057193174Skib vhold(*vp); 1058193174Skib CACHE_RUNLOCK(); 1059193174Skib } 1060193174Skib return (error); 1061193174Skib} 1062193174Skib 1063185956Smarcusstatic int 1064194601Skibvn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, 1065194601Skib u_int *buflen) 1066185956Smarcus{ 1067185956Smarcus struct vnode *dvp; 1068193174Skib struct namecache *ncp; 1069185956Smarcus int error, vfslocked; 1070185956Smarcus 1071193174Skib TAILQ_FOREACH(ncp, &((*vp)->v_cache_dst), nc_dst) { 1072193174Skib if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) 1073193174Skib break; 1074193174Skib } 1075193174Skib if (ncp != NULL) { 1076193174Skib if (*buflen < ncp->nc_nlen) { 1077193174Skib CACHE_RUNLOCK(); 1078193174Skib numfullpathfail4++; 1079193174Skib error = ENOMEM; 1080193174Skib SDT_PROBE(vfs, namecache, fullpath, return, error, 1081193186Skib vp, NULL, 0, 0); 1082193174Skib return (error); 1083193174Skib } 1084193174Skib *buflen -= ncp->nc_nlen; 1085193174Skib memcpy(buf + *buflen, ncp->nc_name, ncp->nc_nlen); 1086193174Skib SDT_PROBE(vfs, namecache, fullpath, hit, ncp->nc_dvp, 1087193174Skib ncp->nc_name, vp, 0, 0); 1088193174Skib *vp = ncp->nc_dvp; 1089193174Skib return (0); 1090193174Skib } 1091193174Skib SDT_PROBE(vfs, namecache, fullpath, miss, vp, 0, 0, 0, 0); 1092193174Skib 1093185956Smarcus vhold(*vp); 1094187839Sjhb CACHE_RUNLOCK(); 1095185956Smarcus vfslocked = VFS_LOCK_GIANT((*vp)->v_mount); 1096185956Smarcus vn_lock(*vp, LK_SHARED | LK_RETRY); 1097194601Skib error = VOP_VPTOCNP(*vp, &dvp, cred, buf, buflen); 1098185956Smarcus VOP_UNLOCK(*vp, 0); 1099186455Skib vdrop(*vp); 1100185956Smarcus VFS_UNLOCK_GIANT(vfslocked); 1101185956Smarcus if (error) { 1102185956Smarcus numfullpathfail2++; 1103193186Skib SDT_PROBE(vfs, namecache, fullpath, return, error, vp, 1104193174Skib NULL, 0, 0); 1105185956Smarcus return (error); 1106185956Smarcus } 1107193174Skib 1108185956Smarcus *vp = dvp; 1109187839Sjhb CACHE_RLOCK(); 1110185956Smarcus if ((*vp)->v_iflag & VI_DOOMED) { 1111185956Smarcus /* forced unmount */ 1112190697Skan CACHE_RUNLOCK(); 1113185956Smarcus vdrop(*vp); 1114193174Skib error = ENOENT; 1115193186Skib SDT_PROBE(vfs, namecache, fullpath, return, error, vp, 1116193174Skib NULL, 0, 0); 1117193174Skib return (error); 1118185956Smarcus } 1119185956Smarcus vdrop(*vp); 1120185956Smarcus 1121185956Smarcus return (0); 1122185956Smarcus} 1123185956Smarcus 1124181060Scsjp/* 1125144318Sdas * The magic behind kern___getcwd() and vn_fullpath(). 1126144318Sdas */ 1127144318Sdasstatic int 1128144318Sdasvn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, 1129144318Sdas char *buf, char **retbuf, u_int buflen) 1130144318Sdas{ 1131193174Skib int error, slash_prefixed; 1132190829Srwatson#ifdef KDTRACE_HOOKS 1133190829Srwatson struct vnode *startvp = vp; 1134190829Srwatson#endif 1135144318Sdas 1136185956Smarcus buflen--; 1137193174Skib buf[buflen] = '\0'; 1138144318Sdas error = 0; 113959652Sgreen slash_prefixed = 0; 1140144318Sdas 1141190829Srwatson SDT_PROBE(vfs, namecache, fullpath, entry, vp, 0, 0, 0, 0); 1142193174Skib numfullpathcalls++; 1143187839Sjhb CACHE_RLOCK(); 1144144318Sdas if (vp->v_type != VDIR) { 1145194601Skib error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen); 1146193174Skib if (error) 1147190829Srwatson return (error); 1148193518Smarcus if (buflen == 0) { 1149193518Smarcus CACHE_RUNLOCK(); 1150193174Skib return (ENOMEM); 1151193518Smarcus } 1152193174Skib buf[--buflen] = '/'; 1153144318Sdas slash_prefixed = 1; 1154144318Sdas } 1155144318Sdas while (vp != rdir && vp != rootvnode) { 1156101308Sjeff if (vp->v_vflag & VV_ROOT) { 1157155385Sjeff if (vp->v_iflag & VI_DOOMED) { /* forced unmount */ 1158187839Sjhb CACHE_RUNLOCK(); 1159190387Sjhb error = ENOENT; 1160193186Skib SDT_PROBE(vfs, namecache, fullpath, return, 1161193186Skib error, vp, NULL, 0, 0); 1162144318Sdas break; 116359652Sgreen } 116459652Sgreen vp = vp->v_mount->mnt_vnodecovered; 116559652Sgreen continue; 116659652Sgreen } 1167185956Smarcus if (vp->v_type != VDIR) { 1168193174Skib CACHE_RUNLOCK(); 116959652Sgreen numfullpathfail1++; 1170144318Sdas error = ENOTDIR; 1171193186Skib SDT_PROBE(vfs, namecache, fullpath, return, 1172193186Skib error, vp, NULL, 0, 0); 1173144318Sdas break; 117459652Sgreen } 1175194601Skib error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen); 1176193174Skib if (error) 1177193174Skib break; 1178193174Skib if (buflen == 0) { 1179193518Smarcus CACHE_RUNLOCK(); 1180144318Sdas error = ENOMEM; 1181193186Skib SDT_PROBE(vfs, namecache, fullpath, return, error, 1182193186Skib startvp, NULL, 0, 0); 1183144318Sdas break; 118459652Sgreen } 1185193174Skib buf[--buflen] = '/'; 118659652Sgreen slash_prefixed = 1; 1187144318Sdas } 1188193174Skib if (error) 1189144318Sdas return (error); 119059652Sgreen if (!slash_prefixed) { 1191193174Skib if (buflen == 0) { 1192193174Skib CACHE_RUNLOCK(); 1193120792Sjeff numfullpathfail4++; 1194193186Skib SDT_PROBE(vfs, namecache, fullpath, return, ENOMEM, 1195193186Skib startvp, NULL, 0, 0); 119659652Sgreen return (ENOMEM); 1197193174Skib } 1198193174Skib buf[--buflen] = '/'; 119959652Sgreen } 120059652Sgreen numfullpathfound++; 1201187839Sjhb CACHE_RUNLOCK(); 1202144318Sdas 1203193186Skib SDT_PROBE(vfs, namecache, fullpath, return, 0, startvp, buf + buflen, 1204193174Skib 0, 0); 1205193174Skib *retbuf = buf + buflen; 120659652Sgreen return (0); 120759652Sgreen} 1208177782Skib 1209177782Skibint 1210177782Skibvn_commname(struct vnode *vp, char *buf, u_int buflen) 1211177782Skib{ 1212177782Skib struct namecache *ncp; 1213177782Skib int l; 1214177782Skib 1215187839Sjhb CACHE_RLOCK(); 1216190533Skan TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst) 1217190533Skan if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) 1218190533Skan break; 1219190533Skan if (ncp == NULL) { 1220187839Sjhb CACHE_RUNLOCK(); 1221177782Skib return (ENOENT); 1222177782Skib } 1223177782Skib l = min(ncp->nc_nlen, buflen - 1); 1224177782Skib memcpy(buf, ncp->nc_name, l); 1225187839Sjhb CACHE_RUNLOCK(); 1226177782Skib buf[l] = '\0'; 1227177782Skib return (0); 1228177782Skib} 1229