vfs_cache.c revision 102870
11541Srgrimes/* 222521Sdyson * Copyright (c) 1989, 1993, 1995 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 522521Sdyson * This code is derived from software contributed to Berkeley by 622521Sdyson * Poul-Henning Kamp of the FreeBSD Project. 722521Sdyson * 81541Srgrimes * Redistribution and use in source and binary forms, with or without 91541Srgrimes * modification, are permitted provided that the following conditions 101541Srgrimes * are met: 111541Srgrimes * 1. Redistributions of source code must retain the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer. 131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer in the 151541Srgrimes * documentation and/or other materials provided with the distribution. 161541Srgrimes * 3. All advertising materials mentioning features or use of this software 171541Srgrimes * must display the following acknowledgement: 181541Srgrimes * This product includes software developed by the University of 191541Srgrimes * California, Berkeley and its contributors. 201541Srgrimes * 4. Neither the name of the University nor the names of its contributors 211541Srgrimes * may be used to endorse or promote products derived from this software 221541Srgrimes * without specific prior written permission. 231541Srgrimes * 241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 271541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 341541Srgrimes * SUCH DAMAGE. 351541Srgrimes * 3623521Sbde * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 3750477Speter * $FreeBSD: head/sys/kern/vfs_cache.c 102870 2002-09-02 22:40:30Z iedowse $ 381541Srgrimes */ 391541Srgrimes 401541Srgrimes#include <sys/param.h> 411541Srgrimes#include <sys/systm.h> 4212820Sphk#include <sys/kernel.h> 4376166Smarkm#include <sys/lock.h> 4489316Salfred#include <sys/mutex.h> 4512820Sphk#include <sys/sysctl.h> 461541Srgrimes#include <sys/mount.h> 471541Srgrimes#include <sys/vnode.h> 481541Srgrimes#include <sys/namei.h> 491541Srgrimes#include <sys/malloc.h> 50102870Siedowse#include <sys/syscallsubr.h> 5151906Sphk#include <sys/sysproto.h> 5251906Sphk#include <sys/proc.h> 5351906Sphk#include <sys/filedesc.h> 5474384Speter#include <sys/fnv_hash.h> 551541Srgrimes 5651906Sphk/* 5759652Sgreen * This structure describes the elements in the cache of recent 5859652Sgreen * names looked up by namei. 5959652Sgreen */ 6059652Sgreen 6159652Sgreenstruct namecache { 6260938Sjake LIST_ENTRY(namecache) nc_hash; /* hash chain */ 6360938Sjake LIST_ENTRY(namecache) nc_src; /* source vnode list */ 6460938Sjake TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 6559652Sgreen struct vnode *nc_dvp; /* vnode of parent of name */ 6659652Sgreen struct vnode *nc_vp; /* vnode the name refers to */ 6759652Sgreen u_char nc_flag; /* flag bits */ 6859652Sgreen u_char nc_nlen; /* length of name */ 6959652Sgreen char nc_name[0]; /* segment name */ 7059652Sgreen}; 7159652Sgreen 7259652Sgreen/* 731541Srgrimes * Name caching works as follows: 741541Srgrimes * 751541Srgrimes * Names found by directory scans are retained in a cache 761541Srgrimes * for future reference. It is managed LRU, so frequently 771541Srgrimes * used names will hang around. Cache is indexed by hash value 781541Srgrimes * obtained from (vp, name) where vp refers to the directory 791541Srgrimes * containing name. 801541Srgrimes * 8122521Sdyson * If it is a "negative" entry, (i.e. for a name that is known NOT to 8222521Sdyson * exist) the vnode pointer will be NULL. 836968Sphk * 841541Srgrimes * Upon reaching the last segment of a path, if the reference 851541Srgrimes * is for DELETE, or NOCACHE is set (rewrite), and the 861541Srgrimes * name is located in the cache, it will be dropped. 871541Srgrimes */ 881541Srgrimes 891541Srgrimes/* 901541Srgrimes * Structures associated with name cacheing. 911541Srgrimes */ 9274501Speter#define NCHHASH(hash) \ 9374501Speter (&nchashtbl[(hash) & nchash]) 9460938Sjakestatic LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 9560938Sjakestatic TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 9623521Sbdestatic u_long nchash; /* size of hash table */ 9762622SjhbSYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 9825453Sphkstatic u_long ncnegfactor = 16; /* ratio of negative entries */ 9962622SjhbSYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 10091690Seivindstatic u_long numneg; /* number of cache entries allocated */ 10162622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 10223521Sbdestatic u_long numcache; /* number of cache entries allocated */ 10362622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 10475654Stanimurastatic u_long numcachehv; /* number of cache entries with vnodes held */ 10575654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, ""); 10684249Sdillon#if 0 10775654Stanimurastatic u_long numcachepl; /* number of cache purge for leaf entries */ 10875654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, ""); 10984249Sdillon#endif 11022521Sdysonstruct nchstats nchstats; /* cache effectiveness statistics */ 1111541Srgrimes 11223521Sbdestatic int doingcache = 1; /* 1 => enable the cache */ 11323521SbdeSYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 11491690Seivind 11591690Seivind/* Export size information to userland */ 11625453SphkSYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 11725453SphkSYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 11823521Sbde 11929788Sphk/* 12029788Sphk * The new name cache statistics 12129788Sphk */ 12238984SbdeSYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 12329788Sphk#define STATNODE(mode, name, var) \ 12462622Sjhb SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 12529788SphkSTATNODE(CTLFLAG_RD, numneg, &numneg); 12629788SphkSTATNODE(CTLFLAG_RD, numcache, &numcache); 12729788Sphkstatic u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 12829788Sphkstatic u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 12929788Sphkstatic u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 13029788Sphkstatic u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 13129788Sphkstatic u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 13229804Sphkstatic u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 13329788Sphkstatic u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 13429788Sphkstatic u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 13529788Sphkstatic u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 13629788Sphkstatic u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 13729788Sphk 13868922SrwatsonSYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats, 13968922Srwatson sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); 14029788Sphk 14168922Srwatson 14268922Srwatson 14392723Salfredstatic void cache_zap(struct namecache *ncp); 1446968Sphk 14569774Sphkstatic MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 14651906Sphk 14722521Sdyson/* 14825453Sphk * Flags in namecache.nc_flag 14925453Sphk */ 15025453Sphk#define NCF_WHITE 1 15175402Speter 15225453Sphk/* 15375402Speter * Grab an atomic snapshot of the name cache hash chain lengths 15475402Speter */ 15575402SpeterSYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); 15675402Speter 15775402Speterstatic int 15875402Spetersysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 15975402Speter{ 16075402Speter int error; 16175402Speter struct nchashhead *ncpp; 16275402Speter struct namecache *ncp; 16375402Speter int n_nchash; 16475402Speter int count; 16575402Speter 16675402Speter n_nchash = nchash + 1; /* nchash is max index, not count */ 16775402Speter if (!req->oldptr) 16875402Speter return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 16975402Speter 17075402Speter /* Scan hash tables for applicable entries */ 17175402Speter for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 17275402Speter count = 0; 17375402Speter LIST_FOREACH(ncp, ncpp, nc_hash) { 17475402Speter count++; 17575402Speter } 17698994Salfred error = SYSCTL_OUT(req, &count, sizeof(count)); 17775402Speter if (error) 17875402Speter return (error); 17975402Speter } 18075402Speter return (0); 18175402Speter} 18275402SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD, 18375402Speter 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths"); 18475402Speter 18575402Speterstatic int 18675402Spetersysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 18775402Speter{ 18875402Speter int error; 18975402Speter struct nchashhead *ncpp; 19075402Speter struct namecache *ncp; 19175402Speter int n_nchash; 19275402Speter int count, maxlength, used, pct; 19375402Speter 19475402Speter if (!req->oldptr) 19575402Speter return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 19675402Speter 19775402Speter n_nchash = nchash + 1; /* nchash is max index, not count */ 19875402Speter used = 0; 19975402Speter maxlength = 0; 20075402Speter 20175402Speter /* Scan hash tables for applicable entries */ 20275402Speter for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 20375402Speter count = 0; 20475402Speter LIST_FOREACH(ncp, ncpp, nc_hash) { 20575402Speter count++; 20675402Speter } 20775402Speter if (count) 20875402Speter used++; 20975402Speter if (maxlength < count) 21075402Speter maxlength = count; 21175402Speter } 21275402Speter n_nchash = nchash + 1; 21375402Speter pct = (used * 100 * 100) / n_nchash; 21498994Salfred error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash)); 21575402Speter if (error) 21675402Speter return (error); 21798994Salfred error = SYSCTL_OUT(req, &used, sizeof(used)); 21875402Speter if (error) 21975402Speter return (error); 22098994Salfred error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength)); 22175402Speter if (error) 22275402Speter return (error); 22398994Salfred error = SYSCTL_OUT(req, &pct, sizeof(pct)); 22475402Speter if (error) 22575402Speter return (error); 22675402Speter return (0); 22775402Speter} 22875402SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD, 22975402Speter 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths"); 23075402Speter 23175402Speter/* 23222521Sdyson * Delete an entry from its hash list and move it to the front 23322521Sdyson * of the LRU list for immediate reuse. 23422521Sdyson */ 23525453Sphkstatic void 23625453Sphkcache_zap(ncp) 23725453Sphk struct namecache *ncp; 23825453Sphk{ 23925453Sphk LIST_REMOVE(ncp, nc_hash); 24025453Sphk LIST_REMOVE(ncp, nc_src); 24175654Stanimura if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 24228954Sphk vdrop(ncp->nc_dvp); 24375654Stanimura numcachehv--; 24475654Stanimura } 24525453Sphk if (ncp->nc_vp) { 24625453Sphk TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 24725453Sphk } else { 24825453Sphk TAILQ_REMOVE(&ncneg, ncp, nc_dst); 24925453Sphk numneg--; 25025453Sphk } 25125453Sphk numcache--; 25251906Sphk free(ncp, M_VFSCACHE); 25322521Sdyson} 2546968Sphk 25522521Sdyson/* 25684249Sdillon * cache_leaf_test() 25784249Sdillon * 25884249Sdillon * Test whether this (directory) vnode's namei cache entry contains 25984249Sdillon * subdirectories or not. Used to determine whether the directory is 26084249Sdillon * a leaf in the namei cache or not. Note: the directory may still 26184249Sdillon * contain files in the namei cache. 26284249Sdillon * 26384249Sdillon * Returns 0 if the directory is a leaf, -1 if it isn't. 26484249Sdillon */ 26584249Sdillonint 26684249Sdilloncache_leaf_test(struct vnode *vp) 26784249Sdillon{ 26884249Sdillon struct namecache *ncpc; 26984249Sdillon 27084249Sdillon for (ncpc = LIST_FIRST(&vp->v_cache_src); 27184249Sdillon ncpc != NULL; 27284249Sdillon ncpc = LIST_NEXT(ncpc, nc_src) 27384249Sdillon ) { 27484249Sdillon if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR) 27584249Sdillon return(-1); 27684249Sdillon } 27784249Sdillon return(0); 27884249Sdillon} 27984249Sdillon 28084249Sdillon/* 28123521Sbde * Lookup an entry in the cache 2826968Sphk * 2836968Sphk * Lookup is called with dvp pointing to the directory to search, 28422521Sdyson * cnp pointing to the name of the entry being sought. If the lookup 28522521Sdyson * succeeds, the vnode is returned in *vpp, and a status of -1 is 28622521Sdyson * returned. If the lookup determines that the name does not exist 28722521Sdyson * (negative cacheing), a status of ENOENT is returned. If the lookup 28822521Sdyson * fails, a status of zero is returned. 2891541Srgrimes */ 2906968Sphk 2911541Srgrimesint 2921541Srgrimescache_lookup(dvp, vpp, cnp) 2931541Srgrimes struct vnode *dvp; 2941541Srgrimes struct vnode **vpp; 2951541Srgrimes struct componentname *cnp; 2961541Srgrimes{ 29751906Sphk struct namecache *ncp; 29874384Speter u_int32_t hash; 2991541Srgrimes 3006928Sphk if (!doingcache) { 3016928Sphk cnp->cn_flags &= ~MAKEENTRY; 3021541Srgrimes return (0); 3036928Sphk } 30425453Sphk 30529788Sphk numcalls++; 30629788Sphk 30725453Sphk if (cnp->cn_nameptr[0] == '.') { 30825453Sphk if (cnp->cn_namelen == 1) { 30925453Sphk *vpp = dvp; 31029788Sphk dothits++; 31125453Sphk return (-1); 31225453Sphk } 31325453Sphk if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 31429788Sphk dotdothits++; 31525453Sphk if (dvp->v_dd->v_id != dvp->v_ddid || 31625453Sphk (cnp->cn_flags & MAKEENTRY) == 0) { 31725453Sphk dvp->v_ddid = 0; 31825453Sphk return (0); 31925453Sphk } 32025453Sphk *vpp = dvp->v_dd; 32125453Sphk return (-1); 32225453Sphk } 3231541Srgrimes } 3246968Sphk 32574501Speter hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 32674501Speter hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 32774501Speter LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 32829788Sphk numchecks++; 32925453Sphk if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 33031879Sbde !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) 33122521Sdyson break; 3321541Srgrimes } 3336968Sphk 33422521Sdyson /* We failed to find an entry */ 33522521Sdyson if (ncp == 0) { 33629804Sphk if ((cnp->cn_flags & MAKEENTRY) == 0) { 33729804Sphk nummisszap++; 33829804Sphk } else { 33929804Sphk nummiss++; 34029804Sphk } 34122521Sdyson nchstats.ncs_miss++; 34222521Sdyson return (0); 34322521Sdyson } 34422521Sdyson 3456968Sphk /* We don't want to have an entry, so dump it */ 3466928Sphk if ((cnp->cn_flags & MAKEENTRY) == 0) { 34729788Sphk numposzaps++; 3481541Srgrimes nchstats.ncs_badhits++; 34925453Sphk cache_zap(ncp); 3506968Sphk return (0); 35123521Sbde } 3526968Sphk 3536968Sphk /* We found a "positive" match, return the vnode */ 35422521Sdyson if (ncp->nc_vp) { 35529788Sphk numposhits++; 3561541Srgrimes nchstats.ncs_goodhits++; 3571541Srgrimes *vpp = ncp->nc_vp; 3581541Srgrimes return (-1); 3591541Srgrimes } 3601541Srgrimes 3616968Sphk /* We found a negative match, and want to create it, so purge */ 3626968Sphk if (cnp->cn_nameiop == CREATE) { 36329788Sphk numnegzaps++; 3647013Sphk nchstats.ncs_badhits++; 36525453Sphk cache_zap(ncp); 3666968Sphk return (0); 3676968Sphk } 3686968Sphk 36929788Sphk numneghits++; 37022521Sdyson /* 37122521Sdyson * We found a "negative" match, ENOENT notifies client of this match. 37222521Sdyson * The nc_vpid field records whether this is a whiteout. 37322521Sdyson */ 37425453Sphk TAILQ_REMOVE(&ncneg, ncp, nc_dst); 37525453Sphk TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 3766968Sphk nchstats.ncs_neghits++; 37725453Sphk if (ncp->nc_flag & NCF_WHITE) 37825453Sphk cnp->cn_flags |= ISWHITEOUT; 3796968Sphk return (ENOENT); 3801541Srgrimes} 3811541Srgrimes 3821541Srgrimes/* 3836968Sphk * Add an entry to the cache. 3841541Srgrimes */ 3851549Srgrimesvoid 3861541Srgrimescache_enter(dvp, vp, cnp) 3871541Srgrimes struct vnode *dvp; 3881541Srgrimes struct vnode *vp; 3891541Srgrimes struct componentname *cnp; 3901541Srgrimes{ 39151906Sphk struct namecache *ncp; 39251906Sphk struct nchashhead *ncpp; 39374384Speter u_int32_t hash; 39451906Sphk int len; 3951541Srgrimes 3961541Srgrimes if (!doingcache) 3971541Srgrimes return; 3986968Sphk 39925453Sphk if (cnp->cn_nameptr[0] == '.') { 40025453Sphk if (cnp->cn_namelen == 1) { 40125453Sphk return; 4026928Sphk } 40325453Sphk if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 40425453Sphk if (vp) { 40525453Sphk dvp->v_dd = vp; 40625453Sphk dvp->v_ddid = vp->v_id; 40725453Sphk } else { 40825453Sphk dvp->v_dd = dvp; 40925453Sphk dvp->v_ddid = 0; 41025453Sphk } 41125453Sphk return; 41225453Sphk } 4136968Sphk } 41425453Sphk 41525453Sphk ncp = (struct namecache *) 41651906Sphk malloc(sizeof *ncp + cnp->cn_namelen, M_VFSCACHE, M_WAITOK); 41725453Sphk bzero((char *)ncp, sizeof *ncp); 41825453Sphk numcache++; 41928954Sphk if (!vp) { 42025453Sphk numneg++; 42128954Sphk ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0; 42229071Sphk } else if (vp->v_type == VDIR) { 42329071Sphk vp->v_dd = dvp; 42429071Sphk vp->v_ddid = dvp->v_id; 42528954Sphk } 42623521Sbde 42722521Sdyson /* 42822521Sdyson * Fill in cache info, if vp is NULL this is a "negative" cache entry. 42922521Sdyson * For negative entries, we have to record whether it is a whiteout. 43022521Sdyson * the whiteout flag is stored in the nc_vpid field which is 43122521Sdyson * otherwise unused. 43222521Sdyson */ 4331541Srgrimes ncp->nc_vp = vp; 4341541Srgrimes ncp->nc_dvp = dvp; 43551906Sphk len = ncp->nc_nlen = cnp->cn_namelen; 43674501Speter hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 43774384Speter bcopy(cnp->cn_nameptr, ncp->nc_name, len); 43874501Speter hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 43974501Speter ncpp = NCHHASH(hash); 4406928Sphk LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 44175654Stanimura if (LIST_EMPTY(&dvp->v_cache_src)) { 44228954Sphk vhold(dvp); 44375654Stanimura numcachehv++; 44475654Stanimura } 44525453Sphk LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 44625453Sphk if (vp) { 44725453Sphk TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 44825453Sphk } else { 44925453Sphk TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 45025453Sphk } 45151906Sphk if (numneg * ncnegfactor > numcache) { 45225453Sphk ncp = TAILQ_FIRST(&ncneg); 45325453Sphk cache_zap(ncp); 45425453Sphk } 4551541Srgrimes} 4561541Srgrimes 4571541Srgrimes/* 4581541Srgrimes * Name cache initialization, from vfs_init() when we are booting 4591541Srgrimes */ 46069664Speterstatic void 46169664Speternchinit(void *dummy __unused) 4621541Srgrimes{ 46323521Sbde 46425453Sphk TAILQ_INIT(&ncneg); 46569664Speter nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 4661541Srgrimes} 46769664SpeterSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL) 4681541Srgrimes 46969664Speter 4701541Srgrimes/* 47146011Sphk * Invalidate all entries to a particular vnode. 47223521Sbde * 47346011Sphk * Remove all entries in the namecache relating to this vnode and 47446011Sphk * change the v_id. We take the v_id from a global counter, since 47546011Sphk * it becomes a handy sequence number in crash-dumps that way. 47646011Sphk * No valid vnode will ever have (v_id == 0). 47746011Sphk * 47846011Sphk * XXX: Only time and the size of v_id prevents this from failing: 47946011Sphk * XXX: In theory we should hunt down all (struct vnode*, v_id) 48046011Sphk * XXX: soft references and nuke them, at least on the global 48146011Sphk * XXX: v_id wraparound. The period of resistance can be extended 48246011Sphk * XXX: by incrementing each vnodes v_id individually instead of 48346011Sphk * XXX: using the global v_id. 4841541Srgrimes */ 48546011Sphk 4861549Srgrimesvoid 4871541Srgrimescache_purge(vp) 4881541Srgrimes struct vnode *vp; 4891541Srgrimes{ 49029094Sphk static u_long nextid; 4911541Srgrimes 49225453Sphk while (!LIST_EMPTY(&vp->v_cache_src)) 49325453Sphk cache_zap(LIST_FIRST(&vp->v_cache_src)); 49425453Sphk while (!TAILQ_EMPTY(&vp->v_cache_dst)) 49525453Sphk cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 49625453Sphk 49746011Sphk do 49846011Sphk nextid++; 49946011Sphk while (nextid == vp->v_id || !nextid); 50029094Sphk vp->v_id = nextid; 50125453Sphk vp->v_dd = vp; 50225453Sphk vp->v_ddid = 0; 5031541Srgrimes} 5041541Srgrimes 5051541Srgrimes/* 5066968Sphk * Flush all entries referencing a particular filesystem. 5071541Srgrimes * 5086968Sphk * Since we need to check it anyway, we will flush all the invalid 50912968Sphk * entries at the same time. 5101541Srgrimes */ 5111549Srgrimesvoid 5121541Srgrimescache_purgevfs(mp) 5131541Srgrimes struct mount *mp; 5141541Srgrimes{ 5156968Sphk struct nchashhead *ncpp; 51622521Sdyson struct namecache *ncp, *nnp; 5171541Srgrimes 5186968Sphk /* Scan hash tables for applicable entries */ 51929071Sphk for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 52025453Sphk for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) { 52125453Sphk nnp = LIST_NEXT(ncp, nc_hash); 52225453Sphk if (ncp->nc_dvp->v_mount == mp) { 52325453Sphk cache_zap(ncp); 5246968Sphk } 5251541Srgrimes } 5261541Srgrimes } 5271541Srgrimes} 52828787Sphk 52928787Sphk/* 53028787Sphk * Perform canonical checks and cache lookup and pass on to filesystem 53128787Sphk * through the vop_cachedlookup only if needed. 53228787Sphk */ 53328787Sphk 53428787Sphkint 53528787Sphkvfs_cache_lookup(ap) 53628787Sphk struct vop_lookup_args /* { 53728787Sphk struct vnode *a_dvp; 53828787Sphk struct vnode **a_vpp; 53928787Sphk struct componentname *a_cnp; 54028787Sphk } */ *ap; 54128787Sphk{ 54265665Sbp struct vnode *dvp, *vp; 54365665Sbp int lockparent; 54428787Sphk int error; 54528787Sphk struct vnode **vpp = ap->a_vpp; 54628787Sphk struct componentname *cnp = ap->a_cnp; 54728787Sphk struct ucred *cred = cnp->cn_cred; 54828787Sphk int flags = cnp->cn_flags; 54983366Sjulian struct thread *td = cnp->cn_thread; 55028787Sphk u_long vpid; /* capability number of vnode */ 55128787Sphk 55228787Sphk *vpp = NULL; 55365665Sbp dvp = ap->a_dvp; 55428787Sphk lockparent = flags & LOCKPARENT; 55528787Sphk 55665665Sbp if (dvp->v_type != VDIR) 55728787Sphk return (ENOTDIR); 55828787Sphk 55965665Sbp if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 56028787Sphk (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 56128787Sphk return (EROFS); 56228787Sphk 56383366Sjulian error = VOP_ACCESS(dvp, VEXEC, cred, td); 56428787Sphk 56528787Sphk if (error) 56628787Sphk return (error); 56728787Sphk 56865665Sbp error = cache_lookup(dvp, vpp, cnp); 56928787Sphk 57096616Sjeff#ifdef LOOKUP_SHARED 57192130Sjeff if (!error) { 57292130Sjeff /* We do this because the rest of the system now expects to get 57392130Sjeff * a shared lock, which is later upgraded if LOCKSHARED is not 57492130Sjeff * set. We have so many cases here because of bugs that yield 57592130Sjeff * inconsistant lock states. This all badly needs to be fixed 57692130Sjeff */ 57792130Sjeff error = VOP_CACHEDLOOKUP(dvp, vpp, cnp); 57892130Sjeff if (!error) { 57992130Sjeff int flock; 58092130Sjeff 58192130Sjeff flock = VOP_ISLOCKED(*vpp, td); 58292130Sjeff if (flock != LK_EXCLUSIVE) { 58392130Sjeff if (flock == 0) { 58492130Sjeff if ((flags & ISLASTCN) && 58592130Sjeff (flags & LOCKSHARED)) 58692130Sjeff VOP_LOCK(*vpp, LK_SHARED, td); 58792130Sjeff else 58892130Sjeff VOP_LOCK(*vpp, LK_EXCLUSIVE, td); 58992130Sjeff } 59092130Sjeff } else if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 59192130Sjeff VOP_LOCK(*vpp, LK_DOWNGRADE, td); 59292130Sjeff } 59392130Sjeff return (error); 59492130Sjeff } 59592130Sjeff#else 59628787Sphk if (!error) 59765665Sbp return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 59892130Sjeff#endif 59928787Sphk 60028787Sphk if (error == ENOENT) 60128787Sphk return (error); 60228787Sphk 60365665Sbp vp = *vpp; 60465665Sbp vpid = vp->v_id; 60565973Sbp cnp->cn_flags &= ~PDIRUNLOCK; 60665665Sbp if (dvp == vp) { /* lookup on "." */ 60765665Sbp VREF(vp); 60828787Sphk error = 0; 60928787Sphk } else if (flags & ISDOTDOT) { 61083366Sjulian VOP_UNLOCK(dvp, 0, td); 61165973Sbp cnp->cn_flags |= PDIRUNLOCK; 61296616Sjeff#ifdef LOOKUP_SHARED 61392130Sjeff if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 61492130Sjeff error = vget(vp, LK_SHARED, td); 61592130Sjeff else 61692130Sjeff error = vget(vp, LK_EXCLUSIVE, td); 61792130Sjeff#else 61883366Sjulian error = vget(vp, LK_EXCLUSIVE, td); 61992130Sjeff#endif 62092130Sjeff 62165973Sbp if (!error && lockparent && (flags & ISLASTCN)) { 62283366Sjulian if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0) 62365973Sbp cnp->cn_flags &= ~PDIRUNLOCK; 62465973Sbp } 62528787Sphk } else { 62696616Sjeff#ifdef LOOKUP_SHARED 62792130Sjeff if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 62892130Sjeff error = vget(vp, LK_SHARED, td); 62992130Sjeff else 63092130Sjeff error = vget(vp, LK_EXCLUSIVE, td); 63192130Sjeff#else 63283366Sjulian error = vget(vp, LK_EXCLUSIVE, td); 63392130Sjeff#endif 63465973Sbp if (!lockparent || error || !(flags & ISLASTCN)) { 63583366Sjulian VOP_UNLOCK(dvp, 0, td); 63665973Sbp cnp->cn_flags |= PDIRUNLOCK; 63765973Sbp } 63828787Sphk } 63928787Sphk /* 64028787Sphk * Check that the capability number did not change 64128787Sphk * while we were waiting for the lock. 64228787Sphk */ 64328787Sphk if (!error) { 64465665Sbp if (vpid == vp->v_id) 64528787Sphk return (0); 64665665Sbp vput(vp); 64765973Sbp if (lockparent && dvp != vp && (flags & ISLASTCN)) { 64883366Sjulian VOP_UNLOCK(dvp, 0, td); 64965973Sbp cnp->cn_flags |= PDIRUNLOCK; 65065973Sbp } 65128787Sphk } 65265973Sbp if (cnp->cn_flags & PDIRUNLOCK) { 65383366Sjulian error = vn_lock(dvp, LK_EXCLUSIVE, td); 65465973Sbp if (error) 65565973Sbp return (error); 65665973Sbp cnp->cn_flags &= ~PDIRUNLOCK; 65765973Sbp } 65896616Sjeff#ifdef LOOKUP_SHARED 65992130Sjeff error = VOP_CACHEDLOOKUP(dvp, vpp, cnp); 66092130Sjeff 66192130Sjeff if (!error) { 66292130Sjeff int flock = 0; 66392130Sjeff 66492130Sjeff flock = VOP_ISLOCKED(*vpp, td); 66592130Sjeff if (flock != LK_EXCLUSIVE) { 66692130Sjeff if (flock == 0) { 66792130Sjeff if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 66892130Sjeff VOP_LOCK(*vpp, LK_SHARED, td); 66992130Sjeff else 67092130Sjeff VOP_LOCK(*vpp, LK_EXCLUSIVE, td); 67192130Sjeff } 67292130Sjeff } else if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 67392130Sjeff VOP_LOCK(*vpp, LK_DOWNGRADE, td); 67492130Sjeff } 67592130Sjeff 67692130Sjeff return (error); 67792130Sjeff#else 67865665Sbp return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 67992130Sjeff#endif 68028787Sphk} 68151906Sphk 68251906Sphk 68351906Sphk#ifndef _SYS_SYSPROTO_H_ 68451906Sphkstruct __getcwd_args { 68551906Sphk u_char *buf; 68651906Sphk u_int buflen; 68751906Sphk}; 68851906Sphk#endif 68951906Sphk 69091690Seivind/* 69191690Seivind * XXX All of these sysctls would probably be more productive dead. 69291690Seivind */ 69351906Sphkstatic int disablecwd; 69491690SeivindSYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, 69591690Seivind "Disable the getcwd syscall"); 69651906Sphk 69791690Seivind/* Various statistics for the getcwd syscall */ 69851906Sphkstatic u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 69951906Sphkstatic u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 70051906Sphkstatic u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 70151906Sphkstatic u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 70251906Sphkstatic u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 70351906Sphkstatic u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 70491690Seivind 70591690Seivind/* Implementation of the getcwd syscall */ 70651906Sphkint 70783366Sjulian__getcwd(td, uap) 70883366Sjulian struct thread *td; 70951906Sphk struct __getcwd_args *uap; 71051906Sphk{ 711102870Siedowse 712102870Siedowse return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen)); 713102870Siedowse} 714102870Siedowse 715102870Siedowseint 716102870Siedowsekern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen) 717102870Siedowse{ 718102870Siedowse char *bp, *tmpbuf; 71951906Sphk int error, i, slash_prefixed; 72051906Sphk struct filedesc *fdp; 72151906Sphk struct namecache *ncp; 72251906Sphk struct vnode *vp; 72351906Sphk 72451906Sphk numcwdcalls++; 72551906Sphk if (disablecwd) 72651906Sphk return (ENODEV); 727102870Siedowse if (buflen < 2) 72851906Sphk return (EINVAL); 729102870Siedowse if (buflen > MAXPATHLEN) 730102870Siedowse buflen = MAXPATHLEN; 731102870Siedowse error = 0; 732102870Siedowse tmpbuf = bp = malloc(buflen, M_TEMP, M_WAITOK); 733102870Siedowse bp += buflen - 1; 73451906Sphk *bp = '\0'; 73583366Sjulian fdp = td->td_proc->p_fd; 73651906Sphk slash_prefixed = 0; 73789306Salfred FILEDESC_LOCK(fdp); 738101308Sjeff mp_fixme("No vnode locking done!"); 73951906Sphk for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { 740101308Sjeff if (vp->v_vflag & VV_ROOT) { 74183000Siedowse if (vp->v_mount == NULL) { /* forced unmount */ 74289306Salfred FILEDESC_UNLOCK(fdp); 743102870Siedowse free(tmpbuf, M_TEMP); 74457199Speter return (EBADF); 74583000Siedowse } 74651906Sphk vp = vp->v_mount->mnt_vnodecovered; 74751906Sphk continue; 74851906Sphk } 74951906Sphk if (vp->v_dd->v_id != vp->v_ddid) { 75089306Salfred FILEDESC_UNLOCK(fdp); 75151906Sphk numcwdfail1++; 752102870Siedowse free(tmpbuf, M_TEMP); 75351906Sphk return (ENOTDIR); 75451906Sphk } 75551906Sphk ncp = TAILQ_FIRST(&vp->v_cache_dst); 75651906Sphk if (!ncp) { 75789306Salfred FILEDESC_UNLOCK(fdp); 75851906Sphk numcwdfail2++; 759102870Siedowse free(tmpbuf, M_TEMP); 76051906Sphk return (ENOENT); 76151906Sphk } 76251906Sphk if (ncp->nc_dvp != vp->v_dd) { 76389306Salfred FILEDESC_UNLOCK(fdp); 76451906Sphk numcwdfail3++; 765102870Siedowse free(tmpbuf, M_TEMP); 76651906Sphk return (EBADF); 76751906Sphk } 76851906Sphk for (i = ncp->nc_nlen - 1; i >= 0; i--) { 769102870Siedowse if (bp == tmpbuf) { 77089306Salfred FILEDESC_UNLOCK(fdp); 77151906Sphk numcwdfail4++; 772102870Siedowse free(tmpbuf, M_TEMP); 77351906Sphk return (ENOMEM); 77451906Sphk } 77551906Sphk *--bp = ncp->nc_name[i]; 77651906Sphk } 777102870Siedowse if (bp == tmpbuf) { 77889306Salfred FILEDESC_UNLOCK(fdp); 77951906Sphk numcwdfail4++; 780102870Siedowse free(tmpbuf, M_TEMP); 78151906Sphk return (ENOMEM); 78251906Sphk } 78351906Sphk *--bp = '/'; 78451906Sphk slash_prefixed = 1; 78551906Sphk vp = vp->v_dd; 78651906Sphk } 78789306Salfred FILEDESC_UNLOCK(fdp); 78851906Sphk if (!slash_prefixed) { 789102870Siedowse if (bp == tmpbuf) { 79051906Sphk numcwdfail4++; 791102870Siedowse free(tmpbuf, M_TEMP); 79251906Sphk return (ENOMEM); 79351906Sphk } 79451906Sphk *--bp = '/'; 79551906Sphk } 79651906Sphk numcwdfound++; 797102870Siedowse if (bufseg == UIO_SYSSPACE) 798102870Siedowse bcopy(bp, buf, strlen(bp) + 1); 799102870Siedowse else 800102870Siedowse error = copyout(bp, buf, strlen(bp) + 1); 801102870Siedowse free(tmpbuf, M_TEMP); 80251906Sphk return (error); 80351906Sphk} 80451906Sphk 80559652Sgreen/* 80659652Sgreen * Thus begins the fullpath magic. 80759652Sgreen */ 80859652Sgreen 80959652Sgreen#undef STATNODE 81059652Sgreen#define STATNODE(name) \ 81159652Sgreen static u_int name; \ 81262622Sjhb SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 81359652Sgreen 81459652Sgreenstatic int disablefullpath; 81591690SeivindSYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0, 81691690Seivind "Disable the vn_fullpath function"); 81759652Sgreen 81859652SgreenSTATNODE(numfullpathcalls); 81959652SgreenSTATNODE(numfullpathfail1); 82059652SgreenSTATNODE(numfullpathfail2); 82159652SgreenSTATNODE(numfullpathfail3); 82259652SgreenSTATNODE(numfullpathfail4); 82359652SgreenSTATNODE(numfullpathfound); 82459652Sgreen 82591690Seivind/* 82691690Seivind * Retrieve the full filesystem path that correspond to a vnode from the name 82791690Seivind * cache (if available) 82891690Seivind */ 82959652Sgreenint 83085287Sdesvn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) 83185287Sdes{ 83259652Sgreen char *bp, *buf; 83359652Sgreen int i, slash_prefixed; 83459652Sgreen struct filedesc *fdp; 83559652Sgreen struct namecache *ncp; 83685287Sdes struct vnode *vp; 83759652Sgreen 83859652Sgreen numfullpathcalls++; 83959652Sgreen if (disablefullpath) 84059652Sgreen return (ENODEV); 84185287Sdes if (vn == NULL) 84259652Sgreen return (EINVAL); 84359652Sgreen buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 84459652Sgreen bp = buf + MAXPATHLEN - 1; 84559652Sgreen *bp = '\0'; 84685287Sdes fdp = td->td_proc->p_fd; 84759652Sgreen slash_prefixed = 0; 84889306Salfred FILEDESC_LOCK(fdp); 84985287Sdes for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) { 850101365Sjeff ASSERT_VOP_LOCKED(vp, "vn_fullpath"); 851101308Sjeff if (vp->v_vflag & VV_ROOT) { 85259652Sgreen if (vp->v_mount == NULL) { /* forced unmount */ 85389306Salfred FILEDESC_UNLOCK(fdp); 85459652Sgreen free(buf, M_TEMP); 85559652Sgreen return (EBADF); 85659652Sgreen } 85759652Sgreen vp = vp->v_mount->mnt_vnodecovered; 85859652Sgreen continue; 85959652Sgreen } 86085287Sdes if (vp != vn && vp->v_dd->v_id != vp->v_ddid) { 86189306Salfred FILEDESC_UNLOCK(fdp); 86259652Sgreen numfullpathfail1++; 86359652Sgreen free(buf, M_TEMP); 86459652Sgreen return (ENOTDIR); 86559652Sgreen } 86659652Sgreen ncp = TAILQ_FIRST(&vp->v_cache_dst); 86759652Sgreen if (!ncp) { 86889306Salfred FILEDESC_UNLOCK(fdp); 86959652Sgreen numfullpathfail2++; 87059652Sgreen free(buf, M_TEMP); 87159652Sgreen return (ENOENT); 87259652Sgreen } 87385287Sdes if (vp != vn && ncp->nc_dvp != vp->v_dd) { 87489306Salfred FILEDESC_UNLOCK(fdp); 87559652Sgreen numfullpathfail3++; 87659652Sgreen free(buf, M_TEMP); 87759652Sgreen return (EBADF); 87859652Sgreen } 87959652Sgreen for (i = ncp->nc_nlen - 1; i >= 0; i--) { 88059652Sgreen if (bp == buf) { 88189306Salfred FILEDESC_UNLOCK(fdp); 88259652Sgreen numfullpathfail4++; 88359652Sgreen free(buf, M_TEMP); 88459652Sgreen return (ENOMEM); 88559652Sgreen } 88659652Sgreen *--bp = ncp->nc_name[i]; 88759652Sgreen } 88859652Sgreen if (bp == buf) { 88989306Salfred FILEDESC_UNLOCK(fdp); 89059652Sgreen numfullpathfail4++; 89159652Sgreen free(buf, M_TEMP); 89259652Sgreen return (ENOMEM); 89359652Sgreen } 89459652Sgreen *--bp = '/'; 89559652Sgreen slash_prefixed = 1; 89659652Sgreen vp = ncp->nc_dvp; 89759652Sgreen } 89859652Sgreen if (!slash_prefixed) { 89959652Sgreen if (bp == buf) { 90089306Salfred FILEDESC_UNLOCK(fdp); 90159652Sgreen numfullpathfail4++; 90259652Sgreen free(buf, M_TEMP); 90359652Sgreen return (ENOMEM); 90459652Sgreen } 90559652Sgreen *--bp = '/'; 90659652Sgreen } 90789306Salfred FILEDESC_UNLOCK(fdp); 90859652Sgreen numfullpathfound++; 90959652Sgreen *retbuf = bp; 91085287Sdes *freebuf = buf; 91159652Sgreen return (0); 91259652Sgreen} 913