vfs_cache.c revision 92130
11541Srgrimes/* 222521Sdyson * Copyright (c) 1989, 1993, 1995 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 522521Sdyson * This code is derived from software contributed to Berkeley by 622521Sdyson * Poul-Henning Kamp of the FreeBSD Project. 722521Sdyson * 81541Srgrimes * Redistribution and use in source and binary forms, with or without 91541Srgrimes * modification, are permitted provided that the following conditions 101541Srgrimes * are met: 111541Srgrimes * 1. Redistributions of source code must retain the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer. 131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer in the 151541Srgrimes * documentation and/or other materials provided with the distribution. 161541Srgrimes * 3. All advertising materials mentioning features or use of this software 171541Srgrimes * must display the following acknowledgement: 181541Srgrimes * This product includes software developed by the University of 191541Srgrimes * California, Berkeley and its contributors. 201541Srgrimes * 4. Neither the name of the University nor the names of its contributors 211541Srgrimes * may be used to endorse or promote products derived from this software 221541Srgrimes * without specific prior written permission. 231541Srgrimes * 241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 271541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 341541Srgrimes * SUCH DAMAGE. 351541Srgrimes * 3623521Sbde * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 3750477Speter * $FreeBSD: head/sys/kern/vfs_cache.c 92130 2002-03-12 04:00:11Z jeff $ 381541Srgrimes */ 391541Srgrimes 401541Srgrimes#include <sys/param.h> 411541Srgrimes#include <sys/systm.h> 4212820Sphk#include <sys/kernel.h> 4376166Smarkm#include <sys/lock.h> 4489316Salfred#include <sys/mutex.h> 4512820Sphk#include <sys/sysctl.h> 461541Srgrimes#include <sys/mount.h> 471541Srgrimes#include <sys/vnode.h> 481541Srgrimes#include <sys/namei.h> 491541Srgrimes#include <sys/malloc.h> 5051906Sphk#include <sys/sysproto.h> 5151906Sphk#include <sys/proc.h> 5251906Sphk#include <sys/filedesc.h> 5374384Speter#include <sys/fnv_hash.h> 541541Srgrimes 5551906Sphk/* 5659652Sgreen * This structure describes the elements in the cache of recent 5759652Sgreen * names looked up by namei. 5859652Sgreen */ 5959652Sgreen 6059652Sgreenstruct namecache { 6160938Sjake LIST_ENTRY(namecache) nc_hash; /* hash chain */ 6260938Sjake LIST_ENTRY(namecache) nc_src; /* source vnode list */ 6360938Sjake TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 6459652Sgreen struct vnode *nc_dvp; /* vnode of parent of name */ 6559652Sgreen struct vnode *nc_vp; /* vnode the name refers to */ 6659652Sgreen u_char nc_flag; /* flag bits */ 6759652Sgreen u_char nc_nlen; /* length of name */ 6859652Sgreen char nc_name[0]; /* segment name */ 6959652Sgreen}; 7059652Sgreen 7159652Sgreen/* 721541Srgrimes * Name caching works as follows: 731541Srgrimes * 741541Srgrimes * Names found by directory scans are retained in a cache 751541Srgrimes * for future reference. It is managed LRU, so frequently 761541Srgrimes * used names will hang around. Cache is indexed by hash value 771541Srgrimes * obtained from (vp, name) where vp refers to the directory 781541Srgrimes * containing name. 791541Srgrimes * 8022521Sdyson * If it is a "negative" entry, (i.e. for a name that is known NOT to 8122521Sdyson * exist) the vnode pointer will be NULL. 826968Sphk * 831541Srgrimes * Upon reaching the last segment of a path, if the reference 841541Srgrimes * is for DELETE, or NOCACHE is set (rewrite), and the 851541Srgrimes * name is located in the cache, it will be dropped. 861541Srgrimes */ 871541Srgrimes 881541Srgrimes/* 891541Srgrimes * Structures associated with name cacheing. 901541Srgrimes */ 9174501Speter#define NCHHASH(hash) \ 9274501Speter (&nchashtbl[(hash) & nchash]) 9360938Sjakestatic LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 9460938Sjakestatic TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 9523521Sbdestatic u_long nchash; /* size of hash table */ 9662622SjhbSYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 9725453Sphkstatic u_long ncnegfactor = 16; /* ratio of negative entries */ 9862622SjhbSYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 9991690Seivindstatic u_long numneg; /* number of cache entries allocated */ 10062622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 10123521Sbdestatic u_long numcache; /* number of cache entries allocated */ 10262622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 10375654Stanimurastatic u_long numcachehv; /* number of cache entries with vnodes held */ 10475654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, ""); 10584249Sdillon#if 0 10675654Stanimurastatic u_long numcachepl; /* number of cache purge for leaf entries */ 10775654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, ""); 10884249Sdillon#endif 10922521Sdysonstruct nchstats nchstats; /* cache effectiveness statistics */ 1101541Srgrimes 11123521Sbdestatic int doingcache = 1; /* 1 => enable the cache */ 11223521SbdeSYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 11391690Seivind 11491690Seivind/* Export size information to userland */ 11525453SphkSYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 11625453SphkSYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 11723521Sbde 11829788Sphk/* 11929788Sphk * The new name cache statistics 12029788Sphk */ 12138984SbdeSYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 12229788Sphk#define STATNODE(mode, name, var) \ 12362622Sjhb SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 12429788SphkSTATNODE(CTLFLAG_RD, numneg, &numneg); 12529788SphkSTATNODE(CTLFLAG_RD, numcache, &numcache); 12629788Sphkstatic u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 12729788Sphkstatic u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 12829788Sphkstatic u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 12929788Sphkstatic u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 13029788Sphkstatic u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 13129804Sphkstatic u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 13229788Sphkstatic u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 13329788Sphkstatic u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 13429788Sphkstatic u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 13529788Sphkstatic u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 13629788Sphk 13768922SrwatsonSYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats, 13868922Srwatson sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); 13929788Sphk 14068922Srwatson 14168922Srwatson 14225453Sphkstatic void cache_zap __P((struct namecache *ncp)); 1436968Sphk 14469774Sphkstatic MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 14551906Sphk 14622521Sdyson/* 14725453Sphk * Flags in namecache.nc_flag 14825453Sphk */ 14925453Sphk#define NCF_WHITE 1 15075402Speter 15125453Sphk/* 15275402Speter * Grab an atomic snapshot of the name cache hash chain lengths 15375402Speter */ 15475402SpeterSYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); 15575402Speter 15675402Speterstatic int 15775402Spetersysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 15875402Speter{ 15975402Speter int error; 16075402Speter struct nchashhead *ncpp; 16175402Speter struct namecache *ncp; 16275402Speter int n_nchash; 16375402Speter int count; 16475402Speter 16575402Speter n_nchash = nchash + 1; /* nchash is max index, not count */ 16675402Speter if (!req->oldptr) 16775402Speter return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 16875402Speter 16975402Speter /* Scan hash tables for applicable entries */ 17075402Speter for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 17175402Speter count = 0; 17275402Speter LIST_FOREACH(ncp, ncpp, nc_hash) { 17375402Speter count++; 17475402Speter } 17575402Speter error = SYSCTL_OUT(req, (caddr_t)&count, sizeof(count)); 17675402Speter if (error) 17775402Speter return (error); 17875402Speter } 17975402Speter return (0); 18075402Speter} 18175402SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD, 18275402Speter 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths"); 18375402Speter 18475402Speterstatic int 18575402Spetersysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 18675402Speter{ 18775402Speter int error; 18875402Speter struct nchashhead *ncpp; 18975402Speter struct namecache *ncp; 19075402Speter int n_nchash; 19175402Speter int count, maxlength, used, pct; 19275402Speter 19375402Speter if (!req->oldptr) 19475402Speter return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 19575402Speter 19675402Speter n_nchash = nchash + 1; /* nchash is max index, not count */ 19775402Speter used = 0; 19875402Speter maxlength = 0; 19975402Speter 20075402Speter /* Scan hash tables for applicable entries */ 20175402Speter for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 20275402Speter count = 0; 20375402Speter LIST_FOREACH(ncp, ncpp, nc_hash) { 20475402Speter count++; 20575402Speter } 20675402Speter if (count) 20775402Speter used++; 20875402Speter if (maxlength < count) 20975402Speter maxlength = count; 21075402Speter } 21175402Speter n_nchash = nchash + 1; 21275402Speter pct = (used * 100 * 100) / n_nchash; 21375402Speter error = SYSCTL_OUT(req, (caddr_t)&n_nchash, sizeof(n_nchash)); 21475402Speter if (error) 21575402Speter return (error); 21675402Speter error = SYSCTL_OUT(req, (caddr_t)&used, sizeof(used)); 21775402Speter if (error) 21875402Speter return (error); 21975402Speter error = SYSCTL_OUT(req, (caddr_t)&maxlength, sizeof(maxlength)); 22075402Speter if (error) 22175402Speter return (error); 22275402Speter error = SYSCTL_OUT(req, (caddr_t)&pct, sizeof(pct)); 22375402Speter if (error) 22475402Speter return (error); 22575402Speter return (0); 22675402Speter} 22775402SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD, 22875402Speter 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths"); 22975402Speter 23075402Speter/* 23122521Sdyson * Delete an entry from its hash list and move it to the front 23222521Sdyson * of the LRU list for immediate reuse. 23322521Sdyson */ 23425453Sphkstatic void 23525453Sphkcache_zap(ncp) 23625453Sphk struct namecache *ncp; 23725453Sphk{ 23825453Sphk LIST_REMOVE(ncp, nc_hash); 23925453Sphk LIST_REMOVE(ncp, nc_src); 24075654Stanimura if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 24128954Sphk vdrop(ncp->nc_dvp); 24275654Stanimura numcachehv--; 24375654Stanimura } 24425453Sphk if (ncp->nc_vp) { 24525453Sphk TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 24625453Sphk } else { 24725453Sphk TAILQ_REMOVE(&ncneg, ncp, nc_dst); 24825453Sphk numneg--; 24925453Sphk } 25025453Sphk numcache--; 25151906Sphk free(ncp, M_VFSCACHE); 25222521Sdyson} 2536968Sphk 25422521Sdyson/* 25584249Sdillon * cache_leaf_test() 25684249Sdillon * 25784249Sdillon * Test whether this (directory) vnode's namei cache entry contains 25884249Sdillon * subdirectories or not. Used to determine whether the directory is 25984249Sdillon * a leaf in the namei cache or not. Note: the directory may still 26084249Sdillon * contain files in the namei cache. 26184249Sdillon * 26284249Sdillon * Returns 0 if the directory is a leaf, -1 if it isn't. 26384249Sdillon */ 26484249Sdillonint 26584249Sdilloncache_leaf_test(struct vnode *vp) 26684249Sdillon{ 26784249Sdillon struct namecache *ncpc; 26884249Sdillon 26984249Sdillon for (ncpc = LIST_FIRST(&vp->v_cache_src); 27084249Sdillon ncpc != NULL; 27184249Sdillon ncpc = LIST_NEXT(ncpc, nc_src) 27284249Sdillon ) { 27384249Sdillon if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR) 27484249Sdillon return(-1); 27584249Sdillon } 27684249Sdillon return(0); 27784249Sdillon} 27884249Sdillon 27984249Sdillon/* 28023521Sbde * Lookup an entry in the cache 2816968Sphk * 28223521Sbde * We don't do this if the segment name is long, simply so the cache 2836968Sphk * can avoid holding long names (which would either waste space, or 2841541Srgrimes * add greatly to the complexity). 2851541Srgrimes * 2866968Sphk * Lookup is called with dvp pointing to the directory to search, 28722521Sdyson * cnp pointing to the name of the entry being sought. If the lookup 28822521Sdyson * succeeds, the vnode is returned in *vpp, and a status of -1 is 28922521Sdyson * returned. If the lookup determines that the name does not exist 29022521Sdyson * (negative cacheing), a status of ENOENT is returned. If the lookup 29122521Sdyson * fails, a status of zero is returned. 2921541Srgrimes */ 2936968Sphk 2941541Srgrimesint 2951541Srgrimescache_lookup(dvp, vpp, cnp) 2961541Srgrimes struct vnode *dvp; 2971541Srgrimes struct vnode **vpp; 2981541Srgrimes struct componentname *cnp; 2991541Srgrimes{ 30051906Sphk struct namecache *ncp; 30174384Speter u_int32_t hash; 3021541Srgrimes 3036928Sphk if (!doingcache) { 3046928Sphk cnp->cn_flags &= ~MAKEENTRY; 3051541Srgrimes return (0); 3066928Sphk } 30725453Sphk 30829788Sphk numcalls++; 30929788Sphk 31025453Sphk if (cnp->cn_nameptr[0] == '.') { 31125453Sphk if (cnp->cn_namelen == 1) { 31225453Sphk *vpp = dvp; 31329788Sphk dothits++; 31425453Sphk return (-1); 31525453Sphk } 31625453Sphk if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 31729788Sphk dotdothits++; 31825453Sphk if (dvp->v_dd->v_id != dvp->v_ddid || 31925453Sphk (cnp->cn_flags & MAKEENTRY) == 0) { 32025453Sphk dvp->v_ddid = 0; 32125453Sphk return (0); 32225453Sphk } 32325453Sphk *vpp = dvp->v_dd; 32425453Sphk return (-1); 32525453Sphk } 3261541Srgrimes } 3276968Sphk 32874501Speter hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 32974501Speter hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 33074501Speter LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 33129788Sphk numchecks++; 33225453Sphk if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 33331879Sbde !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) 33422521Sdyson break; 3351541Srgrimes } 3366968Sphk 33722521Sdyson /* We failed to find an entry */ 33822521Sdyson if (ncp == 0) { 33929804Sphk if ((cnp->cn_flags & MAKEENTRY) == 0) { 34029804Sphk nummisszap++; 34129804Sphk } else { 34229804Sphk nummiss++; 34329804Sphk } 34422521Sdyson nchstats.ncs_miss++; 34522521Sdyson return (0); 34622521Sdyson } 34722521Sdyson 3486968Sphk /* We don't want to have an entry, so dump it */ 3496928Sphk if ((cnp->cn_flags & MAKEENTRY) == 0) { 35029788Sphk numposzaps++; 3511541Srgrimes nchstats.ncs_badhits++; 35225453Sphk cache_zap(ncp); 3536968Sphk return (0); 35423521Sbde } 3556968Sphk 3566968Sphk /* We found a "positive" match, return the vnode */ 35722521Sdyson if (ncp->nc_vp) { 35829788Sphk numposhits++; 3591541Srgrimes nchstats.ncs_goodhits++; 3601541Srgrimes *vpp = ncp->nc_vp; 3611541Srgrimes return (-1); 3621541Srgrimes } 3631541Srgrimes 3646968Sphk /* We found a negative match, and want to create it, so purge */ 3656968Sphk if (cnp->cn_nameiop == CREATE) { 36629788Sphk numnegzaps++; 3677013Sphk nchstats.ncs_badhits++; 36825453Sphk cache_zap(ncp); 3696968Sphk return (0); 3706968Sphk } 3716968Sphk 37229788Sphk numneghits++; 37322521Sdyson /* 37422521Sdyson * We found a "negative" match, ENOENT notifies client of this match. 37522521Sdyson * The nc_vpid field records whether this is a whiteout. 37622521Sdyson */ 37725453Sphk TAILQ_REMOVE(&ncneg, ncp, nc_dst); 37825453Sphk TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 3796968Sphk nchstats.ncs_neghits++; 38025453Sphk if (ncp->nc_flag & NCF_WHITE) 38125453Sphk cnp->cn_flags |= ISWHITEOUT; 3826968Sphk return (ENOENT); 3831541Srgrimes} 3841541Srgrimes 3851541Srgrimes/* 3866968Sphk * Add an entry to the cache. 3871541Srgrimes */ 3881549Srgrimesvoid 3891541Srgrimescache_enter(dvp, vp, cnp) 3901541Srgrimes struct vnode *dvp; 3911541Srgrimes struct vnode *vp; 3921541Srgrimes struct componentname *cnp; 3931541Srgrimes{ 39451906Sphk struct namecache *ncp; 39551906Sphk struct nchashhead *ncpp; 39674384Speter u_int32_t hash; 39751906Sphk int len; 3981541Srgrimes 3991541Srgrimes if (!doingcache) 4001541Srgrimes return; 4016968Sphk 40225453Sphk if (cnp->cn_nameptr[0] == '.') { 40325453Sphk if (cnp->cn_namelen == 1) { 40425453Sphk return; 4056928Sphk } 40625453Sphk if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 40725453Sphk if (vp) { 40825453Sphk dvp->v_dd = vp; 40925453Sphk dvp->v_ddid = vp->v_id; 41025453Sphk } else { 41125453Sphk dvp->v_dd = dvp; 41225453Sphk dvp->v_ddid = 0; 41325453Sphk } 41425453Sphk return; 41525453Sphk } 4166968Sphk } 41725453Sphk 41825453Sphk ncp = (struct namecache *) 41951906Sphk malloc(sizeof *ncp + cnp->cn_namelen, M_VFSCACHE, M_WAITOK); 42025453Sphk bzero((char *)ncp, sizeof *ncp); 42125453Sphk numcache++; 42228954Sphk if (!vp) { 42325453Sphk numneg++; 42428954Sphk ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0; 42529071Sphk } else if (vp->v_type == VDIR) { 42629071Sphk vp->v_dd = dvp; 42729071Sphk vp->v_ddid = dvp->v_id; 42828954Sphk } 42923521Sbde 43022521Sdyson /* 43122521Sdyson * Fill in cache info, if vp is NULL this is a "negative" cache entry. 43222521Sdyson * For negative entries, we have to record whether it is a whiteout. 43322521Sdyson * the whiteout flag is stored in the nc_vpid field which is 43422521Sdyson * otherwise unused. 43522521Sdyson */ 4361541Srgrimes ncp->nc_vp = vp; 4371541Srgrimes ncp->nc_dvp = dvp; 43851906Sphk len = ncp->nc_nlen = cnp->cn_namelen; 43974501Speter hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 44074384Speter bcopy(cnp->cn_nameptr, ncp->nc_name, len); 44174501Speter hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 44274501Speter ncpp = NCHHASH(hash); 4436928Sphk LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 44475654Stanimura if (LIST_EMPTY(&dvp->v_cache_src)) { 44528954Sphk vhold(dvp); 44675654Stanimura numcachehv++; 44775654Stanimura } 44825453Sphk LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 44925453Sphk if (vp) { 45025453Sphk TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 45125453Sphk } else { 45225453Sphk TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 45325453Sphk } 45451906Sphk if (numneg * ncnegfactor > numcache) { 45525453Sphk ncp = TAILQ_FIRST(&ncneg); 45625453Sphk cache_zap(ncp); 45725453Sphk } 4581541Srgrimes} 4591541Srgrimes 4601541Srgrimes/* 4611541Srgrimes * Name cache initialization, from vfs_init() when we are booting 4621541Srgrimes */ 46369664Speterstatic void 46469664Speternchinit(void *dummy __unused) 4651541Srgrimes{ 46623521Sbde 46725453Sphk TAILQ_INIT(&ncneg); 46869664Speter nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 4691541Srgrimes} 47069664SpeterSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL) 4711541Srgrimes 47269664Speter 4731541Srgrimes/* 47446011Sphk * Invalidate all entries to a particular vnode. 47523521Sbde * 47646011Sphk * Remove all entries in the namecache relating to this vnode and 47746011Sphk * change the v_id. We take the v_id from a global counter, since 47846011Sphk * it becomes a handy sequence number in crash-dumps that way. 47946011Sphk * No valid vnode will ever have (v_id == 0). 48046011Sphk * 48146011Sphk * XXX: Only time and the size of v_id prevents this from failing: 48246011Sphk * XXX: In theory we should hunt down all (struct vnode*, v_id) 48346011Sphk * XXX: soft references and nuke them, at least on the global 48446011Sphk * XXX: v_id wraparound. The period of resistance can be extended 48546011Sphk * XXX: by incrementing each vnodes v_id individually instead of 48646011Sphk * XXX: using the global v_id. 4871541Srgrimes */ 48846011Sphk 4891549Srgrimesvoid 4901541Srgrimescache_purge(vp) 4911541Srgrimes struct vnode *vp; 4921541Srgrimes{ 49329094Sphk static u_long nextid; 4941541Srgrimes 49525453Sphk while (!LIST_EMPTY(&vp->v_cache_src)) 49625453Sphk cache_zap(LIST_FIRST(&vp->v_cache_src)); 49725453Sphk while (!TAILQ_EMPTY(&vp->v_cache_dst)) 49825453Sphk cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 49925453Sphk 50046011Sphk do 50146011Sphk nextid++; 50246011Sphk while (nextid == vp->v_id || !nextid); 50329094Sphk vp->v_id = nextid; 50425453Sphk vp->v_dd = vp; 50525453Sphk vp->v_ddid = 0; 5061541Srgrimes} 5071541Srgrimes 5081541Srgrimes/* 5096968Sphk * Flush all entries referencing a particular filesystem. 5101541Srgrimes * 5116968Sphk * Since we need to check it anyway, we will flush all the invalid 51212968Sphk * entries at the same time. 5131541Srgrimes */ 5141549Srgrimesvoid 5151541Srgrimescache_purgevfs(mp) 5161541Srgrimes struct mount *mp; 5171541Srgrimes{ 5186968Sphk struct nchashhead *ncpp; 51922521Sdyson struct namecache *ncp, *nnp; 5201541Srgrimes 5216968Sphk /* Scan hash tables for applicable entries */ 52229071Sphk for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 52325453Sphk for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) { 52425453Sphk nnp = LIST_NEXT(ncp, nc_hash); 52525453Sphk if (ncp->nc_dvp->v_mount == mp) { 52625453Sphk cache_zap(ncp); 5276968Sphk } 5281541Srgrimes } 5291541Srgrimes } 5301541Srgrimes} 53128787Sphk 53228787Sphk/* 53328787Sphk * Perform canonical checks and cache lookup and pass on to filesystem 53428787Sphk * through the vop_cachedlookup only if needed. 53528787Sphk */ 53628787Sphk 53728787Sphkint 53828787Sphkvfs_cache_lookup(ap) 53928787Sphk struct vop_lookup_args /* { 54028787Sphk struct vnode *a_dvp; 54128787Sphk struct vnode **a_vpp; 54228787Sphk struct componentname *a_cnp; 54328787Sphk } */ *ap; 54428787Sphk{ 54565665Sbp struct vnode *dvp, *vp; 54665665Sbp int lockparent; 54728787Sphk int error; 54828787Sphk struct vnode **vpp = ap->a_vpp; 54928787Sphk struct componentname *cnp = ap->a_cnp; 55028787Sphk struct ucred *cred = cnp->cn_cred; 55128787Sphk int flags = cnp->cn_flags; 55283366Sjulian struct thread *td = cnp->cn_thread; 55328787Sphk u_long vpid; /* capability number of vnode */ 55428787Sphk 55528787Sphk *vpp = NULL; 55665665Sbp dvp = ap->a_dvp; 55728787Sphk lockparent = flags & LOCKPARENT; 55828787Sphk 55965665Sbp if (dvp->v_type != VDIR) 56028787Sphk return (ENOTDIR); 56128787Sphk 56265665Sbp if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 56328787Sphk (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 56428787Sphk return (EROFS); 56528787Sphk 56683366Sjulian error = VOP_ACCESS(dvp, VEXEC, cred, td); 56728787Sphk 56828787Sphk if (error) 56928787Sphk return (error); 57028787Sphk 57165665Sbp error = cache_lookup(dvp, vpp, cnp); 57228787Sphk 57392130Sjeff#ifdef LOOKUP_SHARED 57492130Sjeff if (!error) { 57592130Sjeff /* We do this because the rest of the system now expects to get 57692130Sjeff * a shared lock, which is later upgraded if LOCKSHARED is not 57792130Sjeff * set. We have so many cases here because of bugs that yield 57892130Sjeff * inconsistant lock states. This all badly needs to be fixed 57992130Sjeff */ 58092130Sjeff error = VOP_CACHEDLOOKUP(dvp, vpp, cnp); 58192130Sjeff if (!error) { 58292130Sjeff int flock; 58392130Sjeff 58492130Sjeff flock = VOP_ISLOCKED(*vpp, td); 58592130Sjeff if (flock != LK_EXCLUSIVE) { 58692130Sjeff if (flock == 0) { 58792130Sjeff if ((flags & ISLASTCN) && 58892130Sjeff (flags & LOCKSHARED)) 58992130Sjeff VOP_LOCK(*vpp, LK_SHARED, td); 59092130Sjeff else 59192130Sjeff VOP_LOCK(*vpp, LK_EXCLUSIVE, td); 59292130Sjeff } 59392130Sjeff } else if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 59492130Sjeff VOP_LOCK(*vpp, LK_DOWNGRADE, td); 59592130Sjeff } 59692130Sjeff return (error); 59792130Sjeff } 59892130Sjeff#else 59928787Sphk if (!error) 60065665Sbp return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 60192130Sjeff#endif 60228787Sphk 60328787Sphk if (error == ENOENT) 60428787Sphk return (error); 60528787Sphk 60665665Sbp vp = *vpp; 60765665Sbp vpid = vp->v_id; 60865973Sbp cnp->cn_flags &= ~PDIRUNLOCK; 60965665Sbp if (dvp == vp) { /* lookup on "." */ 61065665Sbp VREF(vp); 61128787Sphk error = 0; 61228787Sphk } else if (flags & ISDOTDOT) { 61383366Sjulian VOP_UNLOCK(dvp, 0, td); 61465973Sbp cnp->cn_flags |= PDIRUNLOCK; 61592130Sjeff#ifdef LOOKUP_SHARED 61692130Sjeff if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 61792130Sjeff error = vget(vp, LK_SHARED, td); 61892130Sjeff else 61992130Sjeff error = vget(vp, LK_EXCLUSIVE, td); 62092130Sjeff#else 62183366Sjulian error = vget(vp, LK_EXCLUSIVE, td); 62292130Sjeff#endif 62392130Sjeff 62465973Sbp if (!error && lockparent && (flags & ISLASTCN)) { 62583366Sjulian if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0) 62665973Sbp cnp->cn_flags &= ~PDIRUNLOCK; 62765973Sbp } 62828787Sphk } else { 62992130Sjeff#ifdef LOOKUP_SHARED 63092130Sjeff if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 63192130Sjeff error = vget(vp, LK_SHARED, td); 63292130Sjeff else 63392130Sjeff error = vget(vp, LK_EXCLUSIVE, td); 63492130Sjeff#else 63583366Sjulian error = vget(vp, LK_EXCLUSIVE, td); 63692130Sjeff#endif 63765973Sbp if (!lockparent || error || !(flags & ISLASTCN)) { 63883366Sjulian VOP_UNLOCK(dvp, 0, td); 63965973Sbp cnp->cn_flags |= PDIRUNLOCK; 64065973Sbp } 64128787Sphk } 64228787Sphk /* 64328787Sphk * Check that the capability number did not change 64428787Sphk * while we were waiting for the lock. 64528787Sphk */ 64628787Sphk if (!error) { 64765665Sbp if (vpid == vp->v_id) 64828787Sphk return (0); 64965665Sbp vput(vp); 65065973Sbp if (lockparent && dvp != vp && (flags & ISLASTCN)) { 65183366Sjulian VOP_UNLOCK(dvp, 0, td); 65265973Sbp cnp->cn_flags |= PDIRUNLOCK; 65365973Sbp } 65428787Sphk } 65565973Sbp if (cnp->cn_flags & PDIRUNLOCK) { 65683366Sjulian error = vn_lock(dvp, LK_EXCLUSIVE, td); 65765973Sbp if (error) 65865973Sbp return (error); 65965973Sbp cnp->cn_flags &= ~PDIRUNLOCK; 66065973Sbp } 66192130Sjeff#ifdef LOOKUP_SHARED 66292130Sjeff error = VOP_CACHEDLOOKUP(dvp, vpp, cnp); 66392130Sjeff 66492130Sjeff if (!error) { 66592130Sjeff int flock = 0; 66692130Sjeff 66792130Sjeff flock = VOP_ISLOCKED(*vpp, td); 66892130Sjeff if (flock != LK_EXCLUSIVE) { 66992130Sjeff if (flock == 0) { 67092130Sjeff if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 67192130Sjeff VOP_LOCK(*vpp, LK_SHARED, td); 67292130Sjeff else 67392130Sjeff VOP_LOCK(*vpp, LK_EXCLUSIVE, td); 67492130Sjeff } 67592130Sjeff } else if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 67692130Sjeff VOP_LOCK(*vpp, LK_DOWNGRADE, td); 67792130Sjeff } 67892130Sjeff 67992130Sjeff return (error); 68092130Sjeff#else 68165665Sbp return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 68292130Sjeff#endif 68328787Sphk} 68451906Sphk 68551906Sphk 68651906Sphk#ifndef _SYS_SYSPROTO_H_ 68751906Sphkstruct __getcwd_args { 68851906Sphk u_char *buf; 68951906Sphk u_int buflen; 69051906Sphk}; 69151906Sphk#endif 69251906Sphk 69391690Seivind/* 69491690Seivind * XXX All of these sysctls would probably be more productive dead. 69591690Seivind */ 69651906Sphkstatic int disablecwd; 69791690SeivindSYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, 69891690Seivind "Disable the getcwd syscall"); 69951906Sphk 70091690Seivind/* Various statistics for the getcwd syscall */ 70151906Sphkstatic u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 70251906Sphkstatic u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 70351906Sphkstatic u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 70451906Sphkstatic u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 70551906Sphkstatic u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 70651906Sphkstatic u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 70791690Seivind 70891690Seivind/* Implementation of the getcwd syscall */ 70951906Sphkint 71083366Sjulian__getcwd(td, uap) 71183366Sjulian struct thread *td; 71251906Sphk struct __getcwd_args *uap; 71351906Sphk{ 71451906Sphk char *bp, *buf; 71551906Sphk int error, i, slash_prefixed; 71651906Sphk struct filedesc *fdp; 71751906Sphk struct namecache *ncp; 71851906Sphk struct vnode *vp; 71951906Sphk 72051906Sphk numcwdcalls++; 72151906Sphk if (disablecwd) 72251906Sphk return (ENODEV); 72351906Sphk if (uap->buflen < 2) 72451906Sphk return (EINVAL); 72551906Sphk if (uap->buflen > MAXPATHLEN) 72651906Sphk uap->buflen = MAXPATHLEN; 72751906Sphk buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK); 72851906Sphk bp += uap->buflen - 1; 72951906Sphk *bp = '\0'; 73083366Sjulian fdp = td->td_proc->p_fd; 73151906Sphk slash_prefixed = 0; 73289306Salfred FILEDESC_LOCK(fdp); 73351906Sphk for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { 73451906Sphk if (vp->v_flag & VROOT) { 73583000Siedowse if (vp->v_mount == NULL) { /* forced unmount */ 73689306Salfred FILEDESC_UNLOCK(fdp); 73783000Siedowse free(buf, M_TEMP); 73857199Speter return (EBADF); 73983000Siedowse } 74051906Sphk vp = vp->v_mount->mnt_vnodecovered; 74151906Sphk continue; 74251906Sphk } 74351906Sphk if (vp->v_dd->v_id != vp->v_ddid) { 74489306Salfred FILEDESC_UNLOCK(fdp); 74551906Sphk numcwdfail1++; 74651906Sphk free(buf, M_TEMP); 74751906Sphk return (ENOTDIR); 74851906Sphk } 74951906Sphk ncp = TAILQ_FIRST(&vp->v_cache_dst); 75051906Sphk if (!ncp) { 75189306Salfred FILEDESC_UNLOCK(fdp); 75251906Sphk numcwdfail2++; 75351906Sphk free(buf, M_TEMP); 75451906Sphk return (ENOENT); 75551906Sphk } 75651906Sphk if (ncp->nc_dvp != vp->v_dd) { 75789306Salfred FILEDESC_UNLOCK(fdp); 75851906Sphk numcwdfail3++; 75951906Sphk free(buf, M_TEMP); 76051906Sphk return (EBADF); 76151906Sphk } 76251906Sphk for (i = ncp->nc_nlen - 1; i >= 0; i--) { 76351906Sphk if (bp == buf) { 76489306Salfred FILEDESC_UNLOCK(fdp); 76551906Sphk numcwdfail4++; 76651906Sphk free(buf, M_TEMP); 76751906Sphk return (ENOMEM); 76851906Sphk } 76951906Sphk *--bp = ncp->nc_name[i]; 77051906Sphk } 77151906Sphk if (bp == buf) { 77289306Salfred FILEDESC_UNLOCK(fdp); 77351906Sphk numcwdfail4++; 77451906Sphk free(buf, M_TEMP); 77551906Sphk return (ENOMEM); 77651906Sphk } 77751906Sphk *--bp = '/'; 77851906Sphk slash_prefixed = 1; 77951906Sphk vp = vp->v_dd; 78051906Sphk } 78189306Salfred FILEDESC_UNLOCK(fdp); 78251906Sphk if (!slash_prefixed) { 78351906Sphk if (bp == buf) { 78451906Sphk numcwdfail4++; 78551906Sphk free(buf, M_TEMP); 78651906Sphk return (ENOMEM); 78751906Sphk } 78851906Sphk *--bp = '/'; 78951906Sphk } 79051906Sphk numcwdfound++; 79151906Sphk error = copyout(bp, uap->buf, strlen(bp) + 1); 79251906Sphk free(buf, M_TEMP); 79351906Sphk return (error); 79451906Sphk} 79551906Sphk 79659652Sgreen/* 79759652Sgreen * Thus begins the fullpath magic. 79859652Sgreen */ 79959652Sgreen 80059652Sgreen#undef STATNODE 80159652Sgreen#define STATNODE(name) \ 80259652Sgreen static u_int name; \ 80362622Sjhb SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 80459652Sgreen 80559652Sgreenstatic int disablefullpath; 80691690SeivindSYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0, 80791690Seivind "Disable the vn_fullpath function"); 80859652Sgreen 80959652SgreenSTATNODE(numfullpathcalls); 81059652SgreenSTATNODE(numfullpathfail1); 81159652SgreenSTATNODE(numfullpathfail2); 81259652SgreenSTATNODE(numfullpathfail3); 81359652SgreenSTATNODE(numfullpathfail4); 81459652SgreenSTATNODE(numfullpathfound); 81559652Sgreen 81691690Seivind/* 81791690Seivind * Retrieve the full filesystem path that correspond to a vnode from the name 81891690Seivind * cache (if available) 81991690Seivind */ 82059652Sgreenint 82185287Sdesvn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) 82285287Sdes{ 82359652Sgreen char *bp, *buf; 82459652Sgreen int i, slash_prefixed; 82559652Sgreen struct filedesc *fdp; 82659652Sgreen struct namecache *ncp; 82785287Sdes struct vnode *vp; 82859652Sgreen 82959652Sgreen numfullpathcalls++; 83059652Sgreen if (disablefullpath) 83159652Sgreen return (ENODEV); 83285287Sdes if (vn == NULL) 83359652Sgreen return (EINVAL); 83459652Sgreen buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 83559652Sgreen bp = buf + MAXPATHLEN - 1; 83659652Sgreen *bp = '\0'; 83785287Sdes fdp = td->td_proc->p_fd; 83859652Sgreen slash_prefixed = 0; 83989306Salfred FILEDESC_LOCK(fdp); 84085287Sdes for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) { 84159652Sgreen if (vp->v_flag & VROOT) { 84259652Sgreen if (vp->v_mount == NULL) { /* forced unmount */ 84389306Salfred FILEDESC_UNLOCK(fdp); 84459652Sgreen free(buf, M_TEMP); 84559652Sgreen return (EBADF); 84659652Sgreen } 84759652Sgreen vp = vp->v_mount->mnt_vnodecovered; 84859652Sgreen continue; 84959652Sgreen } 85085287Sdes if (vp != vn && vp->v_dd->v_id != vp->v_ddid) { 85189306Salfred FILEDESC_UNLOCK(fdp); 85259652Sgreen numfullpathfail1++; 85359652Sgreen free(buf, M_TEMP); 85459652Sgreen return (ENOTDIR); 85559652Sgreen } 85659652Sgreen ncp = TAILQ_FIRST(&vp->v_cache_dst); 85759652Sgreen if (!ncp) { 85889306Salfred FILEDESC_UNLOCK(fdp); 85959652Sgreen numfullpathfail2++; 86059652Sgreen free(buf, M_TEMP); 86159652Sgreen return (ENOENT); 86259652Sgreen } 86385287Sdes if (vp != vn && ncp->nc_dvp != vp->v_dd) { 86489306Salfred FILEDESC_UNLOCK(fdp); 86559652Sgreen numfullpathfail3++; 86659652Sgreen free(buf, M_TEMP); 86759652Sgreen return (EBADF); 86859652Sgreen } 86959652Sgreen for (i = ncp->nc_nlen - 1; i >= 0; i--) { 87059652Sgreen if (bp == buf) { 87189306Salfred FILEDESC_UNLOCK(fdp); 87259652Sgreen numfullpathfail4++; 87359652Sgreen free(buf, M_TEMP); 87459652Sgreen return (ENOMEM); 87559652Sgreen } 87659652Sgreen *--bp = ncp->nc_name[i]; 87759652Sgreen } 87859652Sgreen if (bp == buf) { 87989306Salfred FILEDESC_UNLOCK(fdp); 88059652Sgreen numfullpathfail4++; 88159652Sgreen free(buf, M_TEMP); 88259652Sgreen return (ENOMEM); 88359652Sgreen } 88459652Sgreen *--bp = '/'; 88559652Sgreen slash_prefixed = 1; 88659652Sgreen vp = ncp->nc_dvp; 88759652Sgreen } 88859652Sgreen if (!slash_prefixed) { 88959652Sgreen if (bp == buf) { 89089306Salfred FILEDESC_UNLOCK(fdp); 89159652Sgreen numfullpathfail4++; 89259652Sgreen free(buf, M_TEMP); 89359652Sgreen return (ENOMEM); 89459652Sgreen } 89559652Sgreen *--bp = '/'; 89659652Sgreen } 89789306Salfred FILEDESC_UNLOCK(fdp); 89859652Sgreen numfullpathfound++; 89959652Sgreen *retbuf = bp; 90085287Sdes *freebuf = buf; 90159652Sgreen return (0); 90259652Sgreen} 903