vfs_cache.c revision 96616
11541Srgrimes/* 222521Sdyson * Copyright (c) 1989, 1993, 1995 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 522521Sdyson * This code is derived from software contributed to Berkeley by 622521Sdyson * Poul-Henning Kamp of the FreeBSD Project. 722521Sdyson * 81541Srgrimes * Redistribution and use in source and binary forms, with or without 91541Srgrimes * modification, are permitted provided that the following conditions 101541Srgrimes * are met: 111541Srgrimes * 1. Redistributions of source code must retain the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer. 131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer in the 151541Srgrimes * documentation and/or other materials provided with the distribution. 161541Srgrimes * 3. All advertising materials mentioning features or use of this software 171541Srgrimes * must display the following acknowledgement: 181541Srgrimes * This product includes software developed by the University of 191541Srgrimes * California, Berkeley and its contributors. 201541Srgrimes * 4. Neither the name of the University nor the names of its contributors 211541Srgrimes * may be used to endorse or promote products derived from this software 221541Srgrimes * without specific prior written permission. 231541Srgrimes * 241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 271541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 341541Srgrimes * SUCH DAMAGE. 351541Srgrimes * 3623521Sbde * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 3750477Speter * $FreeBSD: head/sys/kern/vfs_cache.c 96616 2002-05-14 21:59:49Z jeff $ 381541Srgrimes */ 391541Srgrimes 401541Srgrimes#include <sys/param.h> 411541Srgrimes#include <sys/systm.h> 4212820Sphk#include <sys/kernel.h> 4376166Smarkm#include <sys/lock.h> 4489316Salfred#include <sys/mutex.h> 4512820Sphk#include <sys/sysctl.h> 461541Srgrimes#include <sys/mount.h> 471541Srgrimes#include <sys/vnode.h> 481541Srgrimes#include <sys/namei.h> 491541Srgrimes#include <sys/malloc.h> 5051906Sphk#include <sys/sysproto.h> 5151906Sphk#include <sys/proc.h> 5251906Sphk#include <sys/filedesc.h> 5374384Speter#include <sys/fnv_hash.h> 541541Srgrimes 5551906Sphk/* 5659652Sgreen * This structure describes the elements in the cache of recent 5759652Sgreen * names looked up by namei. 5859652Sgreen */ 5959652Sgreen 6059652Sgreenstruct namecache { 6160938Sjake LIST_ENTRY(namecache) nc_hash; /* hash chain */ 6260938Sjake LIST_ENTRY(namecache) nc_src; /* source vnode list */ 6360938Sjake TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 6459652Sgreen struct vnode *nc_dvp; /* vnode of parent of name */ 6559652Sgreen struct vnode *nc_vp; /* vnode the name refers to */ 6659652Sgreen u_char nc_flag; /* flag bits */ 6759652Sgreen u_char nc_nlen; /* length of name */ 6859652Sgreen char nc_name[0]; /* segment name */ 6959652Sgreen}; 7059652Sgreen 7159652Sgreen/* 721541Srgrimes * Name caching works as follows: 731541Srgrimes * 741541Srgrimes * Names found by directory scans are retained in a cache 751541Srgrimes * for future reference. It is managed LRU, so frequently 761541Srgrimes * used names will hang around. Cache is indexed by hash value 771541Srgrimes * obtained from (vp, name) where vp refers to the directory 781541Srgrimes * containing name. 791541Srgrimes * 8022521Sdyson * If it is a "negative" entry, (i.e. for a name that is known NOT to 8122521Sdyson * exist) the vnode pointer will be NULL. 826968Sphk * 831541Srgrimes * Upon reaching the last segment of a path, if the reference 841541Srgrimes * is for DELETE, or NOCACHE is set (rewrite), and the 851541Srgrimes * name is located in the cache, it will be dropped. 861541Srgrimes */ 871541Srgrimes 881541Srgrimes/* 891541Srgrimes * Structures associated with name cacheing. 901541Srgrimes */ 9174501Speter#define NCHHASH(hash) \ 9274501Speter (&nchashtbl[(hash) & nchash]) 9360938Sjakestatic LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 9460938Sjakestatic TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 9523521Sbdestatic u_long nchash; /* size of hash table */ 9662622SjhbSYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 9725453Sphkstatic u_long ncnegfactor = 16; /* ratio of negative entries */ 9862622SjhbSYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 9991690Seivindstatic u_long numneg; /* number of cache entries allocated */ 10062622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 10123521Sbdestatic u_long numcache; /* number of cache entries allocated */ 10262622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 10375654Stanimurastatic u_long numcachehv; /* number of cache entries with vnodes held */ 10475654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, ""); 10584249Sdillon#if 0 10675654Stanimurastatic u_long numcachepl; /* number of cache purge for leaf entries */ 10775654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, ""); 10884249Sdillon#endif 10922521Sdysonstruct nchstats nchstats; /* cache effectiveness statistics */ 1101541Srgrimes 11123521Sbdestatic int doingcache = 1; /* 1 => enable the cache */ 11223521SbdeSYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 11391690Seivind 11491690Seivind/* Export size information to userland */ 11525453SphkSYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 11625453SphkSYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 11723521Sbde 11829788Sphk/* 11929788Sphk * The new name cache statistics 12029788Sphk */ 12138984SbdeSYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 12229788Sphk#define STATNODE(mode, name, var) \ 12362622Sjhb SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 12429788SphkSTATNODE(CTLFLAG_RD, numneg, &numneg); 12529788SphkSTATNODE(CTLFLAG_RD, numcache, &numcache); 12629788Sphkstatic u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 12729788Sphkstatic u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 12829788Sphkstatic u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 12929788Sphkstatic u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 13029788Sphkstatic u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 13129804Sphkstatic u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 13229788Sphkstatic u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 13329788Sphkstatic u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 13429788Sphkstatic u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 13529788Sphkstatic u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 13629788Sphk 13768922SrwatsonSYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats, 13868922Srwatson sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); 13929788Sphk 14068922Srwatson 14168922Srwatson 14292723Salfredstatic void cache_zap(struct namecache *ncp); 1436968Sphk 14469774Sphkstatic MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 14551906Sphk 14622521Sdyson/* 14725453Sphk * Flags in namecache.nc_flag 14825453Sphk */ 14925453Sphk#define NCF_WHITE 1 15075402Speter 15125453Sphk/* 15275402Speter * Grab an atomic snapshot of the name cache hash chain lengths 15375402Speter */ 15475402SpeterSYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); 15575402Speter 15675402Speterstatic int 15775402Spetersysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 15875402Speter{ 15975402Speter int error; 16075402Speter struct nchashhead *ncpp; 16175402Speter struct namecache *ncp; 16275402Speter int n_nchash; 16375402Speter int count; 16475402Speter 16575402Speter n_nchash = nchash + 1; /* nchash is max index, not count */ 16675402Speter if (!req->oldptr) 16775402Speter return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 16875402Speter 16975402Speter /* Scan hash tables for applicable entries */ 17075402Speter for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 17175402Speter count = 0; 17275402Speter LIST_FOREACH(ncp, ncpp, nc_hash) { 17375402Speter count++; 17475402Speter } 17575402Speter error = SYSCTL_OUT(req, (caddr_t)&count, sizeof(count)); 17675402Speter if (error) 17775402Speter return (error); 17875402Speter } 17975402Speter return (0); 18075402Speter} 18175402SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD, 18275402Speter 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths"); 18375402Speter 18475402Speterstatic int 18575402Spetersysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 18675402Speter{ 18775402Speter int error; 18875402Speter struct nchashhead *ncpp; 18975402Speter struct namecache *ncp; 19075402Speter int n_nchash; 19175402Speter int count, maxlength, used, pct; 19275402Speter 19375402Speter if (!req->oldptr) 19475402Speter return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 19575402Speter 19675402Speter n_nchash = nchash + 1; /* nchash is max index, not count */ 19775402Speter used = 0; 19875402Speter maxlength = 0; 19975402Speter 20075402Speter /* Scan hash tables for applicable entries */ 20175402Speter for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 20275402Speter count = 0; 20375402Speter LIST_FOREACH(ncp, ncpp, nc_hash) { 20475402Speter count++; 20575402Speter } 20675402Speter if (count) 20775402Speter used++; 20875402Speter if (maxlength < count) 20975402Speter maxlength = count; 21075402Speter } 21175402Speter n_nchash = nchash + 1; 21275402Speter pct = (used * 100 * 100) / n_nchash; 21375402Speter error = SYSCTL_OUT(req, (caddr_t)&n_nchash, sizeof(n_nchash)); 21475402Speter if (error) 21575402Speter return (error); 21675402Speter error = SYSCTL_OUT(req, (caddr_t)&used, sizeof(used)); 21775402Speter if (error) 21875402Speter return (error); 21975402Speter error = SYSCTL_OUT(req, (caddr_t)&maxlength, sizeof(maxlength)); 22075402Speter if (error) 22175402Speter return (error); 22275402Speter error = SYSCTL_OUT(req, (caddr_t)&pct, sizeof(pct)); 22375402Speter if (error) 22475402Speter return (error); 22575402Speter return (0); 22675402Speter} 22775402SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD, 22875402Speter 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths"); 22975402Speter 23075402Speter/* 23122521Sdyson * Delete an entry from its hash list and move it to the front 23222521Sdyson * of the LRU list for immediate reuse. 23322521Sdyson */ 23425453Sphkstatic void 23525453Sphkcache_zap(ncp) 23625453Sphk struct namecache *ncp; 23725453Sphk{ 23825453Sphk LIST_REMOVE(ncp, nc_hash); 23925453Sphk LIST_REMOVE(ncp, nc_src); 24075654Stanimura if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 24128954Sphk vdrop(ncp->nc_dvp); 24275654Stanimura numcachehv--; 24375654Stanimura } 24425453Sphk if (ncp->nc_vp) { 24525453Sphk TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 24625453Sphk } else { 24725453Sphk TAILQ_REMOVE(&ncneg, ncp, nc_dst); 24825453Sphk numneg--; 24925453Sphk } 25025453Sphk numcache--; 25151906Sphk free(ncp, M_VFSCACHE); 25222521Sdyson} 2536968Sphk 25422521Sdyson/* 25584249Sdillon * cache_leaf_test() 25684249Sdillon * 25784249Sdillon * Test whether this (directory) vnode's namei cache entry contains 25884249Sdillon * subdirectories or not. Used to determine whether the directory is 25984249Sdillon * a leaf in the namei cache or not. Note: the directory may still 26084249Sdillon * contain files in the namei cache. 26184249Sdillon * 26284249Sdillon * Returns 0 if the directory is a leaf, -1 if it isn't. 26384249Sdillon */ 26484249Sdillonint 26584249Sdilloncache_leaf_test(struct vnode *vp) 26684249Sdillon{ 26784249Sdillon struct namecache *ncpc; 26884249Sdillon 26984249Sdillon for (ncpc = LIST_FIRST(&vp->v_cache_src); 27084249Sdillon ncpc != NULL; 27184249Sdillon ncpc = LIST_NEXT(ncpc, nc_src) 27284249Sdillon ) { 27384249Sdillon if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR) 27484249Sdillon return(-1); 27584249Sdillon } 27684249Sdillon return(0); 27784249Sdillon} 27884249Sdillon 27984249Sdillon/* 28023521Sbde * Lookup an entry in the cache 2816968Sphk * 2826968Sphk * Lookup is called with dvp pointing to the directory to search, 28322521Sdyson * cnp pointing to the name of the entry being sought. If the lookup 28422521Sdyson * succeeds, the vnode is returned in *vpp, and a status of -1 is 28522521Sdyson * returned. If the lookup determines that the name does not exist 28622521Sdyson * (negative cacheing), a status of ENOENT is returned. If the lookup 28722521Sdyson * fails, a status of zero is returned. 2881541Srgrimes */ 2896968Sphk 2901541Srgrimesint 2911541Srgrimescache_lookup(dvp, vpp, cnp) 2921541Srgrimes struct vnode *dvp; 2931541Srgrimes struct vnode **vpp; 2941541Srgrimes struct componentname *cnp; 2951541Srgrimes{ 29651906Sphk struct namecache *ncp; 29774384Speter u_int32_t hash; 2981541Srgrimes 2996928Sphk if (!doingcache) { 3006928Sphk cnp->cn_flags &= ~MAKEENTRY; 3011541Srgrimes return (0); 3026928Sphk } 30325453Sphk 30429788Sphk numcalls++; 30529788Sphk 30625453Sphk if (cnp->cn_nameptr[0] == '.') { 30725453Sphk if (cnp->cn_namelen == 1) { 30825453Sphk *vpp = dvp; 30929788Sphk dothits++; 31025453Sphk return (-1); 31125453Sphk } 31225453Sphk if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 31329788Sphk dotdothits++; 31425453Sphk if (dvp->v_dd->v_id != dvp->v_ddid || 31525453Sphk (cnp->cn_flags & MAKEENTRY) == 0) { 31625453Sphk dvp->v_ddid = 0; 31725453Sphk return (0); 31825453Sphk } 31925453Sphk *vpp = dvp->v_dd; 32025453Sphk return (-1); 32125453Sphk } 3221541Srgrimes } 3236968Sphk 32474501Speter hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 32574501Speter hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 32674501Speter LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 32729788Sphk numchecks++; 32825453Sphk if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 32931879Sbde !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) 33022521Sdyson break; 3311541Srgrimes } 3326968Sphk 33322521Sdyson /* We failed to find an entry */ 33422521Sdyson if (ncp == 0) { 33529804Sphk if ((cnp->cn_flags & MAKEENTRY) == 0) { 33629804Sphk nummisszap++; 33729804Sphk } else { 33829804Sphk nummiss++; 33929804Sphk } 34022521Sdyson nchstats.ncs_miss++; 34122521Sdyson return (0); 34222521Sdyson } 34322521Sdyson 3446968Sphk /* We don't want to have an entry, so dump it */ 3456928Sphk if ((cnp->cn_flags & MAKEENTRY) == 0) { 34629788Sphk numposzaps++; 3471541Srgrimes nchstats.ncs_badhits++; 34825453Sphk cache_zap(ncp); 3496968Sphk return (0); 35023521Sbde } 3516968Sphk 3526968Sphk /* We found a "positive" match, return the vnode */ 35322521Sdyson if (ncp->nc_vp) { 35429788Sphk numposhits++; 3551541Srgrimes nchstats.ncs_goodhits++; 3561541Srgrimes *vpp = ncp->nc_vp; 3571541Srgrimes return (-1); 3581541Srgrimes } 3591541Srgrimes 3606968Sphk /* We found a negative match, and want to create it, so purge */ 3616968Sphk if (cnp->cn_nameiop == CREATE) { 36229788Sphk numnegzaps++; 3637013Sphk nchstats.ncs_badhits++; 36425453Sphk cache_zap(ncp); 3656968Sphk return (0); 3666968Sphk } 3676968Sphk 36829788Sphk numneghits++; 36922521Sdyson /* 37022521Sdyson * We found a "negative" match, ENOENT notifies client of this match. 37122521Sdyson * The nc_vpid field records whether this is a whiteout. 37222521Sdyson */ 37325453Sphk TAILQ_REMOVE(&ncneg, ncp, nc_dst); 37425453Sphk TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 3756968Sphk nchstats.ncs_neghits++; 37625453Sphk if (ncp->nc_flag & NCF_WHITE) 37725453Sphk cnp->cn_flags |= ISWHITEOUT; 3786968Sphk return (ENOENT); 3791541Srgrimes} 3801541Srgrimes 3811541Srgrimes/* 3826968Sphk * Add an entry to the cache. 3831541Srgrimes */ 3841549Srgrimesvoid 3851541Srgrimescache_enter(dvp, vp, cnp) 3861541Srgrimes struct vnode *dvp; 3871541Srgrimes struct vnode *vp; 3881541Srgrimes struct componentname *cnp; 3891541Srgrimes{ 39051906Sphk struct namecache *ncp; 39151906Sphk struct nchashhead *ncpp; 39274384Speter u_int32_t hash; 39351906Sphk int len; 3941541Srgrimes 3951541Srgrimes if (!doingcache) 3961541Srgrimes return; 3976968Sphk 39825453Sphk if (cnp->cn_nameptr[0] == '.') { 39925453Sphk if (cnp->cn_namelen == 1) { 40025453Sphk return; 4016928Sphk } 40225453Sphk if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 40325453Sphk if (vp) { 40425453Sphk dvp->v_dd = vp; 40525453Sphk dvp->v_ddid = vp->v_id; 40625453Sphk } else { 40725453Sphk dvp->v_dd = dvp; 40825453Sphk dvp->v_ddid = 0; 40925453Sphk } 41025453Sphk return; 41125453Sphk } 4126968Sphk } 41325453Sphk 41425453Sphk ncp = (struct namecache *) 41551906Sphk malloc(sizeof *ncp + cnp->cn_namelen, M_VFSCACHE, M_WAITOK); 41625453Sphk bzero((char *)ncp, sizeof *ncp); 41725453Sphk numcache++; 41828954Sphk if (!vp) { 41925453Sphk numneg++; 42028954Sphk ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0; 42129071Sphk } else if (vp->v_type == VDIR) { 42229071Sphk vp->v_dd = dvp; 42329071Sphk vp->v_ddid = dvp->v_id; 42428954Sphk } 42523521Sbde 42622521Sdyson /* 42722521Sdyson * Fill in cache info, if vp is NULL this is a "negative" cache entry. 42822521Sdyson * For negative entries, we have to record whether it is a whiteout. 42922521Sdyson * the whiteout flag is stored in the nc_vpid field which is 43022521Sdyson * otherwise unused. 43122521Sdyson */ 4321541Srgrimes ncp->nc_vp = vp; 4331541Srgrimes ncp->nc_dvp = dvp; 43451906Sphk len = ncp->nc_nlen = cnp->cn_namelen; 43574501Speter hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 43674384Speter bcopy(cnp->cn_nameptr, ncp->nc_name, len); 43774501Speter hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 43874501Speter ncpp = NCHHASH(hash); 4396928Sphk LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 44075654Stanimura if (LIST_EMPTY(&dvp->v_cache_src)) { 44128954Sphk vhold(dvp); 44275654Stanimura numcachehv++; 44375654Stanimura } 44425453Sphk LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 44525453Sphk if (vp) { 44625453Sphk TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 44725453Sphk } else { 44825453Sphk TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 44925453Sphk } 45051906Sphk if (numneg * ncnegfactor > numcache) { 45125453Sphk ncp = TAILQ_FIRST(&ncneg); 45225453Sphk cache_zap(ncp); 45325453Sphk } 4541541Srgrimes} 4551541Srgrimes 4561541Srgrimes/* 4571541Srgrimes * Name cache initialization, from vfs_init() when we are booting 4581541Srgrimes */ 45969664Speterstatic void 46069664Speternchinit(void *dummy __unused) 4611541Srgrimes{ 46223521Sbde 46325453Sphk TAILQ_INIT(&ncneg); 46469664Speter nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 4651541Srgrimes} 46669664SpeterSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL) 4671541Srgrimes 46869664Speter 4691541Srgrimes/* 47046011Sphk * Invalidate all entries to a particular vnode. 47123521Sbde * 47246011Sphk * Remove all entries in the namecache relating to this vnode and 47346011Sphk * change the v_id. We take the v_id from a global counter, since 47446011Sphk * it becomes a handy sequence number in crash-dumps that way. 47546011Sphk * No valid vnode will ever have (v_id == 0). 47646011Sphk * 47746011Sphk * XXX: Only time and the size of v_id prevents this from failing: 47846011Sphk * XXX: In theory we should hunt down all (struct vnode*, v_id) 47946011Sphk * XXX: soft references and nuke them, at least on the global 48046011Sphk * XXX: v_id wraparound. The period of resistance can be extended 48146011Sphk * XXX: by incrementing each vnodes v_id individually instead of 48246011Sphk * XXX: using the global v_id. 4831541Srgrimes */ 48446011Sphk 4851549Srgrimesvoid 4861541Srgrimescache_purge(vp) 4871541Srgrimes struct vnode *vp; 4881541Srgrimes{ 48929094Sphk static u_long nextid; 4901541Srgrimes 49125453Sphk while (!LIST_EMPTY(&vp->v_cache_src)) 49225453Sphk cache_zap(LIST_FIRST(&vp->v_cache_src)); 49325453Sphk while (!TAILQ_EMPTY(&vp->v_cache_dst)) 49425453Sphk cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 49525453Sphk 49646011Sphk do 49746011Sphk nextid++; 49846011Sphk while (nextid == vp->v_id || !nextid); 49929094Sphk vp->v_id = nextid; 50025453Sphk vp->v_dd = vp; 50125453Sphk vp->v_ddid = 0; 5021541Srgrimes} 5031541Srgrimes 5041541Srgrimes/* 5056968Sphk * Flush all entries referencing a particular filesystem. 5061541Srgrimes * 5076968Sphk * Since we need to check it anyway, we will flush all the invalid 50812968Sphk * entries at the same time. 5091541Srgrimes */ 5101549Srgrimesvoid 5111541Srgrimescache_purgevfs(mp) 5121541Srgrimes struct mount *mp; 5131541Srgrimes{ 5146968Sphk struct nchashhead *ncpp; 51522521Sdyson struct namecache *ncp, *nnp; 5161541Srgrimes 5176968Sphk /* Scan hash tables for applicable entries */ 51829071Sphk for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 51925453Sphk for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) { 52025453Sphk nnp = LIST_NEXT(ncp, nc_hash); 52125453Sphk if (ncp->nc_dvp->v_mount == mp) { 52225453Sphk cache_zap(ncp); 5236968Sphk } 5241541Srgrimes } 5251541Srgrimes } 5261541Srgrimes} 52728787Sphk 52828787Sphk/* 52928787Sphk * Perform canonical checks and cache lookup and pass on to filesystem 53028787Sphk * through the vop_cachedlookup only if needed. 53128787Sphk */ 53228787Sphk 53328787Sphkint 53428787Sphkvfs_cache_lookup(ap) 53528787Sphk struct vop_lookup_args /* { 53628787Sphk struct vnode *a_dvp; 53728787Sphk struct vnode **a_vpp; 53828787Sphk struct componentname *a_cnp; 53928787Sphk } */ *ap; 54028787Sphk{ 54165665Sbp struct vnode *dvp, *vp; 54265665Sbp int lockparent; 54328787Sphk int error; 54428787Sphk struct vnode **vpp = ap->a_vpp; 54528787Sphk struct componentname *cnp = ap->a_cnp; 54628787Sphk struct ucred *cred = cnp->cn_cred; 54728787Sphk int flags = cnp->cn_flags; 54883366Sjulian struct thread *td = cnp->cn_thread; 54928787Sphk u_long vpid; /* capability number of vnode */ 55028787Sphk 55128787Sphk *vpp = NULL; 55265665Sbp dvp = ap->a_dvp; 55328787Sphk lockparent = flags & LOCKPARENT; 55428787Sphk 55565665Sbp if (dvp->v_type != VDIR) 55628787Sphk return (ENOTDIR); 55728787Sphk 55865665Sbp if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 55928787Sphk (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 56028787Sphk return (EROFS); 56128787Sphk 56283366Sjulian error = VOP_ACCESS(dvp, VEXEC, cred, td); 56328787Sphk 56428787Sphk if (error) 56528787Sphk return (error); 56628787Sphk 56765665Sbp error = cache_lookup(dvp, vpp, cnp); 56828787Sphk 56996616Sjeff#ifdef LOOKUP_SHARED 57092130Sjeff if (!error) { 57192130Sjeff /* We do this because the rest of the system now expects to get 57292130Sjeff * a shared lock, which is later upgraded if LOCKSHARED is not 57392130Sjeff * set. We have so many cases here because of bugs that yield 57492130Sjeff * inconsistant lock states. This all badly needs to be fixed 57592130Sjeff */ 57692130Sjeff error = VOP_CACHEDLOOKUP(dvp, vpp, cnp); 57792130Sjeff if (!error) { 57892130Sjeff int flock; 57992130Sjeff 58092130Sjeff flock = VOP_ISLOCKED(*vpp, td); 58192130Sjeff if (flock != LK_EXCLUSIVE) { 58292130Sjeff if (flock == 0) { 58392130Sjeff if ((flags & ISLASTCN) && 58492130Sjeff (flags & LOCKSHARED)) 58592130Sjeff VOP_LOCK(*vpp, LK_SHARED, td); 58692130Sjeff else 58792130Sjeff VOP_LOCK(*vpp, LK_EXCLUSIVE, td); 58892130Sjeff } 58992130Sjeff } else if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 59092130Sjeff VOP_LOCK(*vpp, LK_DOWNGRADE, td); 59192130Sjeff } 59292130Sjeff return (error); 59392130Sjeff } 59492130Sjeff#else 59528787Sphk if (!error) 59665665Sbp return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 59792130Sjeff#endif 59828787Sphk 59928787Sphk if (error == ENOENT) 60028787Sphk return (error); 60128787Sphk 60265665Sbp vp = *vpp; 60365665Sbp vpid = vp->v_id; 60465973Sbp cnp->cn_flags &= ~PDIRUNLOCK; 60565665Sbp if (dvp == vp) { /* lookup on "." */ 60665665Sbp VREF(vp); 60728787Sphk error = 0; 60828787Sphk } else if (flags & ISDOTDOT) { 60983366Sjulian VOP_UNLOCK(dvp, 0, td); 61065973Sbp cnp->cn_flags |= PDIRUNLOCK; 61196616Sjeff#ifdef LOOKUP_SHARED 61292130Sjeff if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 61392130Sjeff error = vget(vp, LK_SHARED, td); 61492130Sjeff else 61592130Sjeff error = vget(vp, LK_EXCLUSIVE, td); 61692130Sjeff#else 61783366Sjulian error = vget(vp, LK_EXCLUSIVE, td); 61892130Sjeff#endif 61992130Sjeff 62065973Sbp if (!error && lockparent && (flags & ISLASTCN)) { 62183366Sjulian if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0) 62265973Sbp cnp->cn_flags &= ~PDIRUNLOCK; 62365973Sbp } 62428787Sphk } else { 62596616Sjeff#ifdef LOOKUP_SHARED 62692130Sjeff if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 62792130Sjeff error = vget(vp, LK_SHARED, td); 62892130Sjeff else 62992130Sjeff error = vget(vp, LK_EXCLUSIVE, td); 63092130Sjeff#else 63183366Sjulian error = vget(vp, LK_EXCLUSIVE, td); 63292130Sjeff#endif 63365973Sbp if (!lockparent || error || !(flags & ISLASTCN)) { 63483366Sjulian VOP_UNLOCK(dvp, 0, td); 63565973Sbp cnp->cn_flags |= PDIRUNLOCK; 63665973Sbp } 63728787Sphk } 63828787Sphk /* 63928787Sphk * Check that the capability number did not change 64028787Sphk * while we were waiting for the lock. 64128787Sphk */ 64228787Sphk if (!error) { 64365665Sbp if (vpid == vp->v_id) 64428787Sphk return (0); 64565665Sbp vput(vp); 64665973Sbp if (lockparent && dvp != vp && (flags & ISLASTCN)) { 64783366Sjulian VOP_UNLOCK(dvp, 0, td); 64865973Sbp cnp->cn_flags |= PDIRUNLOCK; 64965973Sbp } 65028787Sphk } 65165973Sbp if (cnp->cn_flags & PDIRUNLOCK) { 65283366Sjulian error = vn_lock(dvp, LK_EXCLUSIVE, td); 65365973Sbp if (error) 65465973Sbp return (error); 65565973Sbp cnp->cn_flags &= ~PDIRUNLOCK; 65665973Sbp } 65796616Sjeff#ifdef LOOKUP_SHARED 65892130Sjeff error = VOP_CACHEDLOOKUP(dvp, vpp, cnp); 65992130Sjeff 66092130Sjeff if (!error) { 66192130Sjeff int flock = 0; 66292130Sjeff 66392130Sjeff flock = VOP_ISLOCKED(*vpp, td); 66492130Sjeff if (flock != LK_EXCLUSIVE) { 66592130Sjeff if (flock == 0) { 66692130Sjeff if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 66792130Sjeff VOP_LOCK(*vpp, LK_SHARED, td); 66892130Sjeff else 66992130Sjeff VOP_LOCK(*vpp, LK_EXCLUSIVE, td); 67092130Sjeff } 67192130Sjeff } else if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 67292130Sjeff VOP_LOCK(*vpp, LK_DOWNGRADE, td); 67392130Sjeff } 67492130Sjeff 67592130Sjeff return (error); 67692130Sjeff#else 67765665Sbp return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 67892130Sjeff#endif 67928787Sphk} 68051906Sphk 68151906Sphk 68251906Sphk#ifndef _SYS_SYSPROTO_H_ 68351906Sphkstruct __getcwd_args { 68451906Sphk u_char *buf; 68551906Sphk u_int buflen; 68651906Sphk}; 68751906Sphk#endif 68851906Sphk 68991690Seivind/* 69091690Seivind * XXX All of these sysctls would probably be more productive dead. 69191690Seivind */ 69251906Sphkstatic int disablecwd; 69391690SeivindSYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, 69491690Seivind "Disable the getcwd syscall"); 69551906Sphk 69691690Seivind/* Various statistics for the getcwd syscall */ 69751906Sphkstatic u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 69851906Sphkstatic u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 69951906Sphkstatic u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 70051906Sphkstatic u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 70151906Sphkstatic u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 70251906Sphkstatic u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 70391690Seivind 70491690Seivind/* Implementation of the getcwd syscall */ 70551906Sphkint 70683366Sjulian__getcwd(td, uap) 70783366Sjulian struct thread *td; 70851906Sphk struct __getcwd_args *uap; 70951906Sphk{ 71051906Sphk char *bp, *buf; 71151906Sphk int error, i, slash_prefixed; 71251906Sphk struct filedesc *fdp; 71351906Sphk struct namecache *ncp; 71451906Sphk struct vnode *vp; 71551906Sphk 71651906Sphk numcwdcalls++; 71751906Sphk if (disablecwd) 71851906Sphk return (ENODEV); 71951906Sphk if (uap->buflen < 2) 72051906Sphk return (EINVAL); 72151906Sphk if (uap->buflen > MAXPATHLEN) 72251906Sphk uap->buflen = MAXPATHLEN; 72351906Sphk buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK); 72451906Sphk bp += uap->buflen - 1; 72551906Sphk *bp = '\0'; 72683366Sjulian fdp = td->td_proc->p_fd; 72751906Sphk slash_prefixed = 0; 72889306Salfred FILEDESC_LOCK(fdp); 72951906Sphk for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { 73051906Sphk if (vp->v_flag & VROOT) { 73183000Siedowse if (vp->v_mount == NULL) { /* forced unmount */ 73289306Salfred FILEDESC_UNLOCK(fdp); 73383000Siedowse free(buf, M_TEMP); 73457199Speter return (EBADF); 73583000Siedowse } 73651906Sphk vp = vp->v_mount->mnt_vnodecovered; 73751906Sphk continue; 73851906Sphk } 73951906Sphk if (vp->v_dd->v_id != vp->v_ddid) { 74089306Salfred FILEDESC_UNLOCK(fdp); 74151906Sphk numcwdfail1++; 74251906Sphk free(buf, M_TEMP); 74351906Sphk return (ENOTDIR); 74451906Sphk } 74551906Sphk ncp = TAILQ_FIRST(&vp->v_cache_dst); 74651906Sphk if (!ncp) { 74789306Salfred FILEDESC_UNLOCK(fdp); 74851906Sphk numcwdfail2++; 74951906Sphk free(buf, M_TEMP); 75051906Sphk return (ENOENT); 75151906Sphk } 75251906Sphk if (ncp->nc_dvp != vp->v_dd) { 75389306Salfred FILEDESC_UNLOCK(fdp); 75451906Sphk numcwdfail3++; 75551906Sphk free(buf, M_TEMP); 75651906Sphk return (EBADF); 75751906Sphk } 75851906Sphk for (i = ncp->nc_nlen - 1; i >= 0; i--) { 75951906Sphk if (bp == buf) { 76089306Salfred FILEDESC_UNLOCK(fdp); 76151906Sphk numcwdfail4++; 76251906Sphk free(buf, M_TEMP); 76351906Sphk return (ENOMEM); 76451906Sphk } 76551906Sphk *--bp = ncp->nc_name[i]; 76651906Sphk } 76751906Sphk if (bp == buf) { 76889306Salfred FILEDESC_UNLOCK(fdp); 76951906Sphk numcwdfail4++; 77051906Sphk free(buf, M_TEMP); 77151906Sphk return (ENOMEM); 77251906Sphk } 77351906Sphk *--bp = '/'; 77451906Sphk slash_prefixed = 1; 77551906Sphk vp = vp->v_dd; 77651906Sphk } 77789306Salfred FILEDESC_UNLOCK(fdp); 77851906Sphk if (!slash_prefixed) { 77951906Sphk if (bp == buf) { 78051906Sphk numcwdfail4++; 78151906Sphk free(buf, M_TEMP); 78251906Sphk return (ENOMEM); 78351906Sphk } 78451906Sphk *--bp = '/'; 78551906Sphk } 78651906Sphk numcwdfound++; 78751906Sphk error = copyout(bp, uap->buf, strlen(bp) + 1); 78851906Sphk free(buf, M_TEMP); 78951906Sphk return (error); 79051906Sphk} 79151906Sphk 79259652Sgreen/* 79359652Sgreen * Thus begins the fullpath magic. 79459652Sgreen */ 79559652Sgreen 79659652Sgreen#undef STATNODE 79759652Sgreen#define STATNODE(name) \ 79859652Sgreen static u_int name; \ 79962622Sjhb SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 80059652Sgreen 80159652Sgreenstatic int disablefullpath; 80291690SeivindSYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0, 80391690Seivind "Disable the vn_fullpath function"); 80459652Sgreen 80559652SgreenSTATNODE(numfullpathcalls); 80659652SgreenSTATNODE(numfullpathfail1); 80759652SgreenSTATNODE(numfullpathfail2); 80859652SgreenSTATNODE(numfullpathfail3); 80959652SgreenSTATNODE(numfullpathfail4); 81059652SgreenSTATNODE(numfullpathfound); 81159652Sgreen 81291690Seivind/* 81391690Seivind * Retrieve the full filesystem path that correspond to a vnode from the name 81491690Seivind * cache (if available) 81591690Seivind */ 81659652Sgreenint 81785287Sdesvn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) 81885287Sdes{ 81959652Sgreen char *bp, *buf; 82059652Sgreen int i, slash_prefixed; 82159652Sgreen struct filedesc *fdp; 82259652Sgreen struct namecache *ncp; 82385287Sdes struct vnode *vp; 82459652Sgreen 82559652Sgreen numfullpathcalls++; 82659652Sgreen if (disablefullpath) 82759652Sgreen return (ENODEV); 82885287Sdes if (vn == NULL) 82959652Sgreen return (EINVAL); 83059652Sgreen buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 83159652Sgreen bp = buf + MAXPATHLEN - 1; 83259652Sgreen *bp = '\0'; 83385287Sdes fdp = td->td_proc->p_fd; 83459652Sgreen slash_prefixed = 0; 83589306Salfred FILEDESC_LOCK(fdp); 83685287Sdes for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) { 83759652Sgreen if (vp->v_flag & VROOT) { 83859652Sgreen if (vp->v_mount == NULL) { /* forced unmount */ 83989306Salfred FILEDESC_UNLOCK(fdp); 84059652Sgreen free(buf, M_TEMP); 84159652Sgreen return (EBADF); 84259652Sgreen } 84359652Sgreen vp = vp->v_mount->mnt_vnodecovered; 84459652Sgreen continue; 84559652Sgreen } 84685287Sdes if (vp != vn && vp->v_dd->v_id != vp->v_ddid) { 84789306Salfred FILEDESC_UNLOCK(fdp); 84859652Sgreen numfullpathfail1++; 84959652Sgreen free(buf, M_TEMP); 85059652Sgreen return (ENOTDIR); 85159652Sgreen } 85259652Sgreen ncp = TAILQ_FIRST(&vp->v_cache_dst); 85359652Sgreen if (!ncp) { 85489306Salfred FILEDESC_UNLOCK(fdp); 85559652Sgreen numfullpathfail2++; 85659652Sgreen free(buf, M_TEMP); 85759652Sgreen return (ENOENT); 85859652Sgreen } 85985287Sdes if (vp != vn && ncp->nc_dvp != vp->v_dd) { 86089306Salfred FILEDESC_UNLOCK(fdp); 86159652Sgreen numfullpathfail3++; 86259652Sgreen free(buf, M_TEMP); 86359652Sgreen return (EBADF); 86459652Sgreen } 86559652Sgreen for (i = ncp->nc_nlen - 1; i >= 0; i--) { 86659652Sgreen if (bp == buf) { 86789306Salfred FILEDESC_UNLOCK(fdp); 86859652Sgreen numfullpathfail4++; 86959652Sgreen free(buf, M_TEMP); 87059652Sgreen return (ENOMEM); 87159652Sgreen } 87259652Sgreen *--bp = ncp->nc_name[i]; 87359652Sgreen } 87459652Sgreen if (bp == buf) { 87589306Salfred FILEDESC_UNLOCK(fdp); 87659652Sgreen numfullpathfail4++; 87759652Sgreen free(buf, M_TEMP); 87859652Sgreen return (ENOMEM); 87959652Sgreen } 88059652Sgreen *--bp = '/'; 88159652Sgreen slash_prefixed = 1; 88259652Sgreen vp = ncp->nc_dvp; 88359652Sgreen } 88459652Sgreen if (!slash_prefixed) { 88559652Sgreen if (bp == buf) { 88689306Salfred FILEDESC_UNLOCK(fdp); 88759652Sgreen numfullpathfail4++; 88859652Sgreen free(buf, M_TEMP); 88959652Sgreen return (ENOMEM); 89059652Sgreen } 89159652Sgreen *--bp = '/'; 89259652Sgreen } 89389306Salfred FILEDESC_UNLOCK(fdp); 89459652Sgreen numfullpathfound++; 89559652Sgreen *retbuf = bp; 89685287Sdes *freebuf = buf; 89759652Sgreen return (0); 89859652Sgreen} 899