vfs_cache.c revision 84249
11541Srgrimes/* 222521Sdyson * Copyright (c) 1989, 1993, 1995 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 522521Sdyson * This code is derived from software contributed to Berkeley by 622521Sdyson * Poul-Henning Kamp of the FreeBSD Project. 722521Sdyson * 81541Srgrimes * Redistribution and use in source and binary forms, with or without 91541Srgrimes * modification, are permitted provided that the following conditions 101541Srgrimes * are met: 111541Srgrimes * 1. Redistributions of source code must retain the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer. 131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer in the 151541Srgrimes * documentation and/or other materials provided with the distribution. 161541Srgrimes * 3. All advertising materials mentioning features or use of this software 171541Srgrimes * must display the following acknowledgement: 181541Srgrimes * This product includes software developed by the University of 191541Srgrimes * California, Berkeley and its contributors. 201541Srgrimes * 4. Neither the name of the University nor the names of its contributors 211541Srgrimes * may be used to endorse or promote products derived from this software 221541Srgrimes * without specific prior written permission. 231541Srgrimes * 241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 271541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 341541Srgrimes * SUCH DAMAGE. 351541Srgrimes * 3623521Sbde * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 3750477Speter * $FreeBSD: head/sys/kern/vfs_cache.c 84249 2001-10-01 04:33:35Z dillon $ 381541Srgrimes */ 391541Srgrimes 401541Srgrimes#include <sys/param.h> 411541Srgrimes#include <sys/systm.h> 4212820Sphk#include <sys/kernel.h> 4376166Smarkm#include <sys/lock.h> 4412820Sphk#include <sys/sysctl.h> 451541Srgrimes#include <sys/mount.h> 461541Srgrimes#include <sys/vnode.h> 471541Srgrimes#include <sys/namei.h> 481541Srgrimes#include <sys/malloc.h> 4951906Sphk#include <sys/sysproto.h> 5051906Sphk#include <sys/proc.h> 5151906Sphk#include <sys/filedesc.h> 5274384Speter#include <sys/fnv_hash.h> 531541Srgrimes 5451906Sphk/* 5559652Sgreen * This structure describes the elements in the cache of recent 5659652Sgreen * names looked up by namei. 5759652Sgreen */ 5859652Sgreen 5959652Sgreenstruct namecache { 6060938Sjake LIST_ENTRY(namecache) nc_hash; /* hash chain */ 6160938Sjake LIST_ENTRY(namecache) nc_src; /* source vnode list */ 6260938Sjake TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 6359652Sgreen struct vnode *nc_dvp; /* vnode of parent of name */ 6459652Sgreen struct vnode *nc_vp; /* vnode the name refers to */ 6559652Sgreen u_char nc_flag; /* flag bits */ 6659652Sgreen u_char nc_nlen; /* length of name */ 6759652Sgreen char nc_name[0]; /* segment name */ 6859652Sgreen}; 6959652Sgreen 7059652Sgreen/* 711541Srgrimes * Name caching works as follows: 721541Srgrimes * 731541Srgrimes * Names found by directory scans are retained in a cache 741541Srgrimes * for future reference. It is managed LRU, so frequently 751541Srgrimes * used names will hang around. Cache is indexed by hash value 761541Srgrimes * obtained from (vp, name) where vp refers to the directory 771541Srgrimes * containing name. 781541Srgrimes * 7922521Sdyson * If it is a "negative" entry, (i.e. for a name that is known NOT to 8022521Sdyson * exist) the vnode pointer will be NULL. 816968Sphk * 821541Srgrimes * Upon reaching the last segment of a path, if the reference 831541Srgrimes * is for DELETE, or NOCACHE is set (rewrite), and the 841541Srgrimes * name is located in the cache, it will be dropped. 851541Srgrimes */ 861541Srgrimes 871541Srgrimes/* 881541Srgrimes * Structures associated with name cacheing. 891541Srgrimes */ 9074501Speter#define NCHHASH(hash) \ 9174501Speter (&nchashtbl[(hash) & nchash]) 9260938Sjakestatic LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 9360938Sjakestatic TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 9423521Sbdestatic u_long nchash; /* size of hash table */ 9562622SjhbSYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 9625453Sphkstatic u_long ncnegfactor = 16; /* ratio of negative entries */ 9762622SjhbSYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 9825453Sphkstatic u_long numneg; /* number of cache entries allocated */ 9962622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 10023521Sbdestatic u_long numcache; /* number of cache entries allocated */ 10162622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 10275654Stanimurastatic u_long numcachehv; /* number of cache entries with vnodes held */ 10375654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, ""); 10484249Sdillon#if 0 10575654Stanimurastatic u_long numcachepl; /* number of cache purge for leaf entries */ 10675654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, ""); 10784249Sdillon#endif 10822521Sdysonstruct nchstats nchstats; /* cache effectiveness statistics */ 1091541Srgrimes 11023521Sbdestatic int doingcache = 1; /* 1 => enable the cache */ 11123521SbdeSYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 11225453SphkSYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 11325453SphkSYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 11423521Sbde 11529788Sphk/* 11629788Sphk * The new name cache statistics 11729788Sphk */ 11838984SbdeSYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 11929788Sphk#define STATNODE(mode, name, var) \ 12062622Sjhb SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 12129788SphkSTATNODE(CTLFLAG_RD, numneg, &numneg); 12229788SphkSTATNODE(CTLFLAG_RD, numcache, &numcache); 12329788Sphkstatic u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 12429788Sphkstatic u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 12529788Sphkstatic u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 12629788Sphkstatic u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 12729788Sphkstatic u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 12829804Sphkstatic u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 12929788Sphkstatic u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 13029788Sphkstatic u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 13129788Sphkstatic u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 13229788Sphkstatic u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 13329788Sphk 13468922SrwatsonSYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats, 13568922Srwatson sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); 13629788Sphk 13768922Srwatson 13868922Srwatson 13925453Sphkstatic void cache_zap __P((struct namecache *ncp)); 1406968Sphk 14169774Sphkstatic MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 14251906Sphk 14322521Sdyson/* 14425453Sphk * Flags in namecache.nc_flag 14525453Sphk */ 14625453Sphk#define NCF_WHITE 1 14775402Speter 14825453Sphk/* 14975402Speter * Grab an atomic snapshot of the name cache hash chain lengths 15075402Speter */ 15175402SpeterSYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); 15275402Speter 15375402Speterstatic int 15475402Spetersysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 15575402Speter{ 15675402Speter int error; 15775402Speter struct nchashhead *ncpp; 15875402Speter struct namecache *ncp; 15975402Speter int n_nchash; 16075402Speter int count; 16175402Speter 16275402Speter n_nchash = nchash + 1; /* nchash is max index, not count */ 16375402Speter if (!req->oldptr) 16475402Speter return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 16575402Speter 16675402Speter /* Scan hash tables for applicable entries */ 16775402Speter for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 16875402Speter count = 0; 16975402Speter LIST_FOREACH(ncp, ncpp, nc_hash) { 17075402Speter count++; 17175402Speter } 17275402Speter error = SYSCTL_OUT(req, (caddr_t)&count, sizeof(count)); 17375402Speter if (error) 17475402Speter return (error); 17575402Speter } 17675402Speter return (0); 17775402Speter} 17875402SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD, 17975402Speter 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths"); 18075402Speter 18175402Speterstatic int 18275402Spetersysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 18375402Speter{ 18475402Speter int error; 18575402Speter struct nchashhead *ncpp; 18675402Speter struct namecache *ncp; 18775402Speter int n_nchash; 18875402Speter int count, maxlength, used, pct; 18975402Speter 19075402Speter if (!req->oldptr) 19175402Speter return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 19275402Speter 19375402Speter n_nchash = nchash + 1; /* nchash is max index, not count */ 19475402Speter used = 0; 19575402Speter maxlength = 0; 19675402Speter 19775402Speter /* Scan hash tables for applicable entries */ 19875402Speter for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 19975402Speter count = 0; 20075402Speter LIST_FOREACH(ncp, ncpp, nc_hash) { 20175402Speter count++; 20275402Speter } 20375402Speter if (count) 20475402Speter used++; 20575402Speter if (maxlength < count) 20675402Speter maxlength = count; 20775402Speter } 20875402Speter n_nchash = nchash + 1; 20975402Speter pct = (used * 100 * 100) / n_nchash; 21075402Speter error = SYSCTL_OUT(req, (caddr_t)&n_nchash, sizeof(n_nchash)); 21175402Speter if (error) 21275402Speter return (error); 21375402Speter error = SYSCTL_OUT(req, (caddr_t)&used, sizeof(used)); 21475402Speter if (error) 21575402Speter return (error); 21675402Speter error = SYSCTL_OUT(req, (caddr_t)&maxlength, sizeof(maxlength)); 21775402Speter if (error) 21875402Speter return (error); 21975402Speter error = SYSCTL_OUT(req, (caddr_t)&pct, sizeof(pct)); 22075402Speter if (error) 22175402Speter return (error); 22275402Speter return (0); 22375402Speter} 22475402SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD, 22575402Speter 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths"); 22675402Speter 22775402Speter/* 22822521Sdyson * Delete an entry from its hash list and move it to the front 22922521Sdyson * of the LRU list for immediate reuse. 23022521Sdyson */ 23125453Sphkstatic void 23225453Sphkcache_zap(ncp) 23325453Sphk struct namecache *ncp; 23425453Sphk{ 23525453Sphk LIST_REMOVE(ncp, nc_hash); 23625453Sphk LIST_REMOVE(ncp, nc_src); 23775654Stanimura if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 23828954Sphk vdrop(ncp->nc_dvp); 23975654Stanimura numcachehv--; 24075654Stanimura } 24125453Sphk if (ncp->nc_vp) { 24225453Sphk TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 24325453Sphk } else { 24425453Sphk TAILQ_REMOVE(&ncneg, ncp, nc_dst); 24525453Sphk numneg--; 24625453Sphk } 24725453Sphk numcache--; 24851906Sphk free(ncp, M_VFSCACHE); 24922521Sdyson} 2506968Sphk 25122521Sdyson/* 25284249Sdillon * cache_leaf_test() 25384249Sdillon * 25484249Sdillon * Test whether this (directory) vnode's namei cache entry contains 25584249Sdillon * subdirectories or not. Used to determine whether the directory is 25684249Sdillon * a leaf in the namei cache or not. Note: the directory may still 25784249Sdillon * contain files in the namei cache. 25884249Sdillon * 25984249Sdillon * Returns 0 if the directory is a leaf, -1 if it isn't. 26084249Sdillon */ 26184249Sdillonint 26284249Sdilloncache_leaf_test(struct vnode *vp) 26384249Sdillon{ 26484249Sdillon struct namecache *ncpc; 26584249Sdillon 26684249Sdillon for (ncpc = LIST_FIRST(&vp->v_cache_src); 26784249Sdillon ncpc != NULL; 26884249Sdillon ncpc = LIST_NEXT(ncpc, nc_src) 26984249Sdillon ) { 27084249Sdillon if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR) 27184249Sdillon return(-1); 27284249Sdillon } 27384249Sdillon return(0); 27484249Sdillon} 27584249Sdillon 27684249Sdillon/* 27723521Sbde * Lookup an entry in the cache 2786968Sphk * 27923521Sbde * We don't do this if the segment name is long, simply so the cache 2806968Sphk * can avoid holding long names (which would either waste space, or 2811541Srgrimes * add greatly to the complexity). 2821541Srgrimes * 2836968Sphk * Lookup is called with dvp pointing to the directory to search, 28422521Sdyson * cnp pointing to the name of the entry being sought. If the lookup 28522521Sdyson * succeeds, the vnode is returned in *vpp, and a status of -1 is 28622521Sdyson * returned. If the lookup determines that the name does not exist 28722521Sdyson * (negative cacheing), a status of ENOENT is returned. If the lookup 28822521Sdyson * fails, a status of zero is returned. 2891541Srgrimes */ 2906968Sphk 2911541Srgrimesint 2921541Srgrimescache_lookup(dvp, vpp, cnp) 2931541Srgrimes struct vnode *dvp; 2941541Srgrimes struct vnode **vpp; 2951541Srgrimes struct componentname *cnp; 2961541Srgrimes{ 29751906Sphk struct namecache *ncp; 29874384Speter u_int32_t hash; 2991541Srgrimes 3006928Sphk if (!doingcache) { 3016928Sphk cnp->cn_flags &= ~MAKEENTRY; 3021541Srgrimes return (0); 3036928Sphk } 30425453Sphk 30529788Sphk numcalls++; 30629788Sphk 30725453Sphk if (cnp->cn_nameptr[0] == '.') { 30825453Sphk if (cnp->cn_namelen == 1) { 30925453Sphk *vpp = dvp; 31029788Sphk dothits++; 31125453Sphk return (-1); 31225453Sphk } 31325453Sphk if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 31429788Sphk dotdothits++; 31525453Sphk if (dvp->v_dd->v_id != dvp->v_ddid || 31625453Sphk (cnp->cn_flags & MAKEENTRY) == 0) { 31725453Sphk dvp->v_ddid = 0; 31825453Sphk return (0); 31925453Sphk } 32025453Sphk *vpp = dvp->v_dd; 32125453Sphk return (-1); 32225453Sphk } 3231541Srgrimes } 3246968Sphk 32574501Speter hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 32674501Speter hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 32774501Speter LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 32829788Sphk numchecks++; 32925453Sphk if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 33031879Sbde !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) 33122521Sdyson break; 3321541Srgrimes } 3336968Sphk 33422521Sdyson /* We failed to find an entry */ 33522521Sdyson if (ncp == 0) { 33629804Sphk if ((cnp->cn_flags & MAKEENTRY) == 0) { 33729804Sphk nummisszap++; 33829804Sphk } else { 33929804Sphk nummiss++; 34029804Sphk } 34122521Sdyson nchstats.ncs_miss++; 34222521Sdyson return (0); 34322521Sdyson } 34422521Sdyson 3456968Sphk /* We don't want to have an entry, so dump it */ 3466928Sphk if ((cnp->cn_flags & MAKEENTRY) == 0) { 34729788Sphk numposzaps++; 3481541Srgrimes nchstats.ncs_badhits++; 34925453Sphk cache_zap(ncp); 3506968Sphk return (0); 35123521Sbde } 3526968Sphk 3536968Sphk /* We found a "positive" match, return the vnode */ 35422521Sdyson if (ncp->nc_vp) { 35529788Sphk numposhits++; 3561541Srgrimes nchstats.ncs_goodhits++; 3571541Srgrimes *vpp = ncp->nc_vp; 3581541Srgrimes return (-1); 3591541Srgrimes } 3601541Srgrimes 3616968Sphk /* We found a negative match, and want to create it, so purge */ 3626968Sphk if (cnp->cn_nameiop == CREATE) { 36329788Sphk numnegzaps++; 3647013Sphk nchstats.ncs_badhits++; 36525453Sphk cache_zap(ncp); 3666968Sphk return (0); 3676968Sphk } 3686968Sphk 36929788Sphk numneghits++; 37022521Sdyson /* 37122521Sdyson * We found a "negative" match, ENOENT notifies client of this match. 37222521Sdyson * The nc_vpid field records whether this is a whiteout. 37322521Sdyson */ 37425453Sphk TAILQ_REMOVE(&ncneg, ncp, nc_dst); 37525453Sphk TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 3766968Sphk nchstats.ncs_neghits++; 37725453Sphk if (ncp->nc_flag & NCF_WHITE) 37825453Sphk cnp->cn_flags |= ISWHITEOUT; 3796968Sphk return (ENOENT); 3801541Srgrimes} 3811541Srgrimes 3821541Srgrimes/* 3836968Sphk * Add an entry to the cache. 3841541Srgrimes */ 3851549Srgrimesvoid 3861541Srgrimescache_enter(dvp, vp, cnp) 3871541Srgrimes struct vnode *dvp; 3881541Srgrimes struct vnode *vp; 3891541Srgrimes struct componentname *cnp; 3901541Srgrimes{ 39151906Sphk struct namecache *ncp; 39251906Sphk struct nchashhead *ncpp; 39374384Speter u_int32_t hash; 39451906Sphk int len; 3951541Srgrimes 3961541Srgrimes if (!doingcache) 3971541Srgrimes return; 3986968Sphk 39925453Sphk if (cnp->cn_nameptr[0] == '.') { 40025453Sphk if (cnp->cn_namelen == 1) { 40125453Sphk return; 4026928Sphk } 40325453Sphk if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 40425453Sphk if (vp) { 40525453Sphk dvp->v_dd = vp; 40625453Sphk dvp->v_ddid = vp->v_id; 40725453Sphk } else { 40825453Sphk dvp->v_dd = dvp; 40925453Sphk dvp->v_ddid = 0; 41025453Sphk } 41125453Sphk return; 41225453Sphk } 4136968Sphk } 41425453Sphk 41525453Sphk ncp = (struct namecache *) 41651906Sphk malloc(sizeof *ncp + cnp->cn_namelen, M_VFSCACHE, M_WAITOK); 41725453Sphk bzero((char *)ncp, sizeof *ncp); 41825453Sphk numcache++; 41928954Sphk if (!vp) { 42025453Sphk numneg++; 42128954Sphk ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0; 42229071Sphk } else if (vp->v_type == VDIR) { 42329071Sphk vp->v_dd = dvp; 42429071Sphk vp->v_ddid = dvp->v_id; 42528954Sphk } 42623521Sbde 42722521Sdyson /* 42822521Sdyson * Fill in cache info, if vp is NULL this is a "negative" cache entry. 42922521Sdyson * For negative entries, we have to record whether it is a whiteout. 43022521Sdyson * the whiteout flag is stored in the nc_vpid field which is 43122521Sdyson * otherwise unused. 43222521Sdyson */ 4331541Srgrimes ncp->nc_vp = vp; 4341541Srgrimes ncp->nc_dvp = dvp; 43551906Sphk len = ncp->nc_nlen = cnp->cn_namelen; 43674501Speter hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 43774384Speter bcopy(cnp->cn_nameptr, ncp->nc_name, len); 43874501Speter hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 43974501Speter ncpp = NCHHASH(hash); 4406928Sphk LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 44175654Stanimura if (LIST_EMPTY(&dvp->v_cache_src)) { 44228954Sphk vhold(dvp); 44375654Stanimura numcachehv++; 44475654Stanimura } 44525453Sphk LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 44625453Sphk if (vp) { 44725453Sphk TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 44825453Sphk } else { 44925453Sphk TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 45025453Sphk } 45151906Sphk if (numneg * ncnegfactor > numcache) { 45225453Sphk ncp = TAILQ_FIRST(&ncneg); 45325453Sphk cache_zap(ncp); 45425453Sphk } 4551541Srgrimes} 4561541Srgrimes 4571541Srgrimes/* 4581541Srgrimes * Name cache initialization, from vfs_init() when we are booting 4591541Srgrimes */ 46069664Speterstatic void 46169664Speternchinit(void *dummy __unused) 4621541Srgrimes{ 46323521Sbde 46425453Sphk TAILQ_INIT(&ncneg); 46569664Speter nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 4661541Srgrimes} 46769664SpeterSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL) 4681541Srgrimes 46969664Speter 4701541Srgrimes/* 47146011Sphk * Invalidate all entries to a particular vnode. 47223521Sbde * 47346011Sphk * Remove all entries in the namecache relating to this vnode and 47446011Sphk * change the v_id. We take the v_id from a global counter, since 47546011Sphk * it becomes a handy sequence number in crash-dumps that way. 47646011Sphk * No valid vnode will ever have (v_id == 0). 47746011Sphk * 47846011Sphk * XXX: Only time and the size of v_id prevents this from failing: 47946011Sphk * XXX: In theory we should hunt down all (struct vnode*, v_id) 48046011Sphk * XXX: soft references and nuke them, at least on the global 48146011Sphk * XXX: v_id wraparound. The period of resistance can be extended 48246011Sphk * XXX: by incrementing each vnodes v_id individually instead of 48346011Sphk * XXX: using the global v_id. 4841541Srgrimes */ 48546011Sphk 4861549Srgrimesvoid 4871541Srgrimescache_purge(vp) 4881541Srgrimes struct vnode *vp; 4891541Srgrimes{ 49029094Sphk static u_long nextid; 4911541Srgrimes 49225453Sphk while (!LIST_EMPTY(&vp->v_cache_src)) 49325453Sphk cache_zap(LIST_FIRST(&vp->v_cache_src)); 49425453Sphk while (!TAILQ_EMPTY(&vp->v_cache_dst)) 49525453Sphk cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 49625453Sphk 49746011Sphk do 49846011Sphk nextid++; 49946011Sphk while (nextid == vp->v_id || !nextid); 50029094Sphk vp->v_id = nextid; 50125453Sphk vp->v_dd = vp; 50225453Sphk vp->v_ddid = 0; 5031541Srgrimes} 5041541Srgrimes 5051541Srgrimes/* 5066968Sphk * Flush all entries referencing a particular filesystem. 5071541Srgrimes * 5086968Sphk * Since we need to check it anyway, we will flush all the invalid 50912968Sphk * entries at the same time. 5101541Srgrimes */ 5111549Srgrimesvoid 5121541Srgrimescache_purgevfs(mp) 5131541Srgrimes struct mount *mp; 5141541Srgrimes{ 5156968Sphk struct nchashhead *ncpp; 51622521Sdyson struct namecache *ncp, *nnp; 5171541Srgrimes 5186968Sphk /* Scan hash tables for applicable entries */ 51929071Sphk for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 52025453Sphk for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) { 52125453Sphk nnp = LIST_NEXT(ncp, nc_hash); 52225453Sphk if (ncp->nc_dvp->v_mount == mp) { 52325453Sphk cache_zap(ncp); 5246968Sphk } 5251541Srgrimes } 5261541Srgrimes } 5271541Srgrimes} 52828787Sphk 52984249Sdillon#if 0 53084249Sdillon 53128787Sphk/* 53275654Stanimura * Flush all dirctory entries with no child directories held in 53375654Stanimura * the cache. 53475654Stanimura * 53575654Stanimura * Since we need to check it anyway, we will flush all the invalid 53675654Stanimura * entries at the same time. 53775654Stanimura */ 53875654Stanimuravoid 53975654Stanimuracache_purgeleafdirs(ndir) 54075654Stanimura int ndir; 54175654Stanimura{ 54275654Stanimura struct nchashhead *ncpp; 54375654Stanimura struct namecache *ncp, *nnp, *ncpc, *nnpc; 54475654Stanimura struct vnode *dvp; 54575654Stanimura 54675654Stanimura /* Scan hash tables for applicable entries */ 54775654Stanimura for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl && ndir > 0; ncpp--) { 54875654Stanimura for (ncp = LIST_FIRST(ncpp); ncp != 0 && ndir > 0; ncp = nnp) { 54975654Stanimura nnp = LIST_NEXT(ncp, nc_hash); 55075654Stanimura if (ncp->nc_dvp != 0) { 55175654Stanimura /* 55275654Stanimura * Skip over if nc_dvp of this cache holds 55375654Stanimura * a child directory, or the hold count of 55475654Stanimura * nc_dvp is greater than 1 (in which case 55575654Stanimura * nc_dvp is likely to be the working 55675654Stanimura * directory of a process). 55775654Stanimura */ 55875654Stanimura if (ncp->nc_dvp->v_holdcnt > 1) 55975654Stanimura continue; 56075654Stanimura for (ncpc = LIST_FIRST(&ncp->nc_dvp->v_cache_src); 56175654Stanimura ncpc != 0; ncpc = nnpc) { 56275654Stanimura nnpc = LIST_NEXT(ncpc, nc_src); 56375654Stanimura if (ncpc->nc_vp != 0 && ncpc->nc_vp->v_type == VDIR) 56475654Stanimura break; 56575654Stanimura } 56675654Stanimura if (ncpc == 0) { 56775654Stanimura /* 56875654Stanimura * Zap all of this directory's children, 56975654Stanimura * held in ncp->nc_dvp->v_cache_src. 57075654Stanimura */ 57175654Stanimura dvp = ncp->nc_dvp; 57275654Stanimura while (!LIST_EMPTY(&dvp->v_cache_src)) 57375654Stanimura cache_zap(LIST_FIRST(&dvp->v_cache_src)); 57475654Stanimura 57575654Stanimura ndir--; 57675654Stanimura 57775654Stanimura /* Restart in case where nnp is reclaimed. */ 57875654Stanimura nnp = LIST_FIRST(ncpp); 57975654Stanimura continue; 58075654Stanimura } 58175654Stanimura } 58275654Stanimura } 58375654Stanimura } 58475654Stanimura numcachepl++; 58575654Stanimura} 58675654Stanimura 58784249Sdillon#endif 58884249Sdillon 58975654Stanimura/* 59028787Sphk * Perform canonical checks and cache lookup and pass on to filesystem 59128787Sphk * through the vop_cachedlookup only if needed. 59228787Sphk */ 59328787Sphk 59428787Sphkint 59528787Sphkvfs_cache_lookup(ap) 59628787Sphk struct vop_lookup_args /* { 59728787Sphk struct vnode *a_dvp; 59828787Sphk struct vnode **a_vpp; 59928787Sphk struct componentname *a_cnp; 60028787Sphk } */ *ap; 60128787Sphk{ 60265665Sbp struct vnode *dvp, *vp; 60365665Sbp int lockparent; 60428787Sphk int error; 60528787Sphk struct vnode **vpp = ap->a_vpp; 60628787Sphk struct componentname *cnp = ap->a_cnp; 60728787Sphk struct ucred *cred = cnp->cn_cred; 60828787Sphk int flags = cnp->cn_flags; 60983366Sjulian struct thread *td = cnp->cn_thread; 61028787Sphk u_long vpid; /* capability number of vnode */ 61128787Sphk 61228787Sphk *vpp = NULL; 61365665Sbp dvp = ap->a_dvp; 61428787Sphk lockparent = flags & LOCKPARENT; 61528787Sphk 61665665Sbp if (dvp->v_type != VDIR) 61728787Sphk return (ENOTDIR); 61828787Sphk 61965665Sbp if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 62028787Sphk (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 62128787Sphk return (EROFS); 62228787Sphk 62383366Sjulian error = VOP_ACCESS(dvp, VEXEC, cred, td); 62428787Sphk 62528787Sphk if (error) 62628787Sphk return (error); 62728787Sphk 62865665Sbp error = cache_lookup(dvp, vpp, cnp); 62928787Sphk 63028787Sphk if (!error) 63165665Sbp return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 63228787Sphk 63328787Sphk if (error == ENOENT) 63428787Sphk return (error); 63528787Sphk 63665665Sbp vp = *vpp; 63765665Sbp vpid = vp->v_id; 63865973Sbp cnp->cn_flags &= ~PDIRUNLOCK; 63965665Sbp if (dvp == vp) { /* lookup on "." */ 64065665Sbp VREF(vp); 64128787Sphk error = 0; 64228787Sphk } else if (flags & ISDOTDOT) { 64383366Sjulian VOP_UNLOCK(dvp, 0, td); 64465973Sbp cnp->cn_flags |= PDIRUNLOCK; 64583366Sjulian error = vget(vp, LK_EXCLUSIVE, td); 64665973Sbp if (!error && lockparent && (flags & ISLASTCN)) { 64783366Sjulian if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0) 64865973Sbp cnp->cn_flags &= ~PDIRUNLOCK; 64965973Sbp } 65028787Sphk } else { 65183366Sjulian error = vget(vp, LK_EXCLUSIVE, td); 65265973Sbp if (!lockparent || error || !(flags & ISLASTCN)) { 65383366Sjulian VOP_UNLOCK(dvp, 0, td); 65465973Sbp cnp->cn_flags |= PDIRUNLOCK; 65565973Sbp } 65628787Sphk } 65728787Sphk /* 65828787Sphk * Check that the capability number did not change 65928787Sphk * while we were waiting for the lock. 66028787Sphk */ 66128787Sphk if (!error) { 66265665Sbp if (vpid == vp->v_id) 66328787Sphk return (0); 66465665Sbp vput(vp); 66565973Sbp if (lockparent && dvp != vp && (flags & ISLASTCN)) { 66683366Sjulian VOP_UNLOCK(dvp, 0, td); 66765973Sbp cnp->cn_flags |= PDIRUNLOCK; 66865973Sbp } 66928787Sphk } 67065973Sbp if (cnp->cn_flags & PDIRUNLOCK) { 67183366Sjulian error = vn_lock(dvp, LK_EXCLUSIVE, td); 67265973Sbp if (error) 67365973Sbp return (error); 67465973Sbp cnp->cn_flags &= ~PDIRUNLOCK; 67565973Sbp } 67665665Sbp return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 67728787Sphk} 67851906Sphk 67951906Sphk 68051906Sphk#ifndef _SYS_SYSPROTO_H_ 68151906Sphkstruct __getcwd_args { 68251906Sphk u_char *buf; 68351906Sphk u_int buflen; 68451906Sphk}; 68551906Sphk#endif 68651906Sphk 68751906Sphkstatic int disablecwd; 68851906SphkSYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, ""); 68951906Sphk 69051906Sphkstatic u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 69151906Sphkstatic u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 69251906Sphkstatic u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 69351906Sphkstatic u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 69451906Sphkstatic u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 69551906Sphkstatic u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 69651906Sphkint 69783366Sjulian__getcwd(td, uap) 69883366Sjulian struct thread *td; 69951906Sphk struct __getcwd_args *uap; 70051906Sphk{ 70151906Sphk char *bp, *buf; 70251906Sphk int error, i, slash_prefixed; 70351906Sphk struct filedesc *fdp; 70451906Sphk struct namecache *ncp; 70551906Sphk struct vnode *vp; 70651906Sphk 70751906Sphk numcwdcalls++; 70851906Sphk if (disablecwd) 70951906Sphk return (ENODEV); 71051906Sphk if (uap->buflen < 2) 71151906Sphk return (EINVAL); 71251906Sphk if (uap->buflen > MAXPATHLEN) 71351906Sphk uap->buflen = MAXPATHLEN; 71451906Sphk buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK); 71551906Sphk bp += uap->buflen - 1; 71651906Sphk *bp = '\0'; 71783366Sjulian fdp = td->td_proc->p_fd; 71851906Sphk slash_prefixed = 0; 71951906Sphk for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { 72051906Sphk if (vp->v_flag & VROOT) { 72183000Siedowse if (vp->v_mount == NULL) { /* forced unmount */ 72283000Siedowse free(buf, M_TEMP); 72357199Speter return (EBADF); 72483000Siedowse } 72551906Sphk vp = vp->v_mount->mnt_vnodecovered; 72651906Sphk continue; 72751906Sphk } 72851906Sphk if (vp->v_dd->v_id != vp->v_ddid) { 72951906Sphk numcwdfail1++; 73051906Sphk free(buf, M_TEMP); 73151906Sphk return (ENOTDIR); 73251906Sphk } 73351906Sphk ncp = TAILQ_FIRST(&vp->v_cache_dst); 73451906Sphk if (!ncp) { 73551906Sphk numcwdfail2++; 73651906Sphk free(buf, M_TEMP); 73751906Sphk return (ENOENT); 73851906Sphk } 73951906Sphk if (ncp->nc_dvp != vp->v_dd) { 74051906Sphk numcwdfail3++; 74151906Sphk free(buf, M_TEMP); 74251906Sphk return (EBADF); 74351906Sphk } 74451906Sphk for (i = ncp->nc_nlen - 1; i >= 0; i--) { 74551906Sphk if (bp == buf) { 74651906Sphk numcwdfail4++; 74751906Sphk free(buf, M_TEMP); 74851906Sphk return (ENOMEM); 74951906Sphk } 75051906Sphk *--bp = ncp->nc_name[i]; 75151906Sphk } 75251906Sphk if (bp == buf) { 75351906Sphk numcwdfail4++; 75451906Sphk free(buf, M_TEMP); 75551906Sphk return (ENOMEM); 75651906Sphk } 75751906Sphk *--bp = '/'; 75851906Sphk slash_prefixed = 1; 75951906Sphk vp = vp->v_dd; 76051906Sphk } 76151906Sphk if (!slash_prefixed) { 76251906Sphk if (bp == buf) { 76351906Sphk numcwdfail4++; 76451906Sphk free(buf, M_TEMP); 76551906Sphk return (ENOMEM); 76651906Sphk } 76751906Sphk *--bp = '/'; 76851906Sphk } 76951906Sphk numcwdfound++; 77051906Sphk error = copyout(bp, uap->buf, strlen(bp) + 1); 77151906Sphk free(buf, M_TEMP); 77251906Sphk return (error); 77351906Sphk} 77451906Sphk 77559652Sgreen/* 77659652Sgreen * Thus begins the fullpath magic. 77759652Sgreen */ 77859652Sgreen 77959652Sgreen#undef STATNODE 78059652Sgreen#define STATNODE(name) \ 78159652Sgreen static u_int name; \ 78262622Sjhb SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 78359652Sgreen 78459652Sgreenstatic int disablefullpath; 78559652SgreenSYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, 78659652Sgreen &disablefullpath, 0, ""); 78759652Sgreen 78859652SgreenSTATNODE(numfullpathcalls); 78959652SgreenSTATNODE(numfullpathfail1); 79059652SgreenSTATNODE(numfullpathfail2); 79159652SgreenSTATNODE(numfullpathfail3); 79259652SgreenSTATNODE(numfullpathfail4); 79359652SgreenSTATNODE(numfullpathfound); 79459652Sgreen 79559652Sgreenint 79659652Sgreentextvp_fullpath(struct proc *p, char **retbuf, char **retfreebuf) { 79759652Sgreen char *bp, *buf; 79859652Sgreen int i, slash_prefixed; 79959652Sgreen struct filedesc *fdp; 80059652Sgreen struct namecache *ncp; 80159652Sgreen struct vnode *vp, *textvp; 80259652Sgreen 80359652Sgreen numfullpathcalls++; 80459652Sgreen if (disablefullpath) 80559652Sgreen return (ENODEV); 80659652Sgreen textvp = p->p_textvp; 80759652Sgreen if (textvp == NULL) 80859652Sgreen return (EINVAL); 80959652Sgreen buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 81059652Sgreen bp = buf + MAXPATHLEN - 1; 81159652Sgreen *bp = '\0'; 81259652Sgreen fdp = p->p_fd; 81359652Sgreen slash_prefixed = 0; 81459652Sgreen for (vp = textvp; vp != fdp->fd_rdir && vp != rootvnode;) { 81559652Sgreen if (vp->v_flag & VROOT) { 81659652Sgreen if (vp->v_mount == NULL) { /* forced unmount */ 81759652Sgreen free(buf, M_TEMP); 81859652Sgreen return (EBADF); 81959652Sgreen } 82059652Sgreen vp = vp->v_mount->mnt_vnodecovered; 82159652Sgreen continue; 82259652Sgreen } 82359652Sgreen if (vp != textvp && vp->v_dd->v_id != vp->v_ddid) { 82459652Sgreen numfullpathfail1++; 82559652Sgreen free(buf, M_TEMP); 82659652Sgreen return (ENOTDIR); 82759652Sgreen } 82859652Sgreen ncp = TAILQ_FIRST(&vp->v_cache_dst); 82959652Sgreen if (!ncp) { 83059652Sgreen numfullpathfail2++; 83159652Sgreen free(buf, M_TEMP); 83259652Sgreen return (ENOENT); 83359652Sgreen } 83459652Sgreen if (vp != textvp && ncp->nc_dvp != vp->v_dd) { 83559652Sgreen numfullpathfail3++; 83659652Sgreen free(buf, M_TEMP); 83759652Sgreen return (EBADF); 83859652Sgreen } 83959652Sgreen for (i = ncp->nc_nlen - 1; i >= 0; i--) { 84059652Sgreen if (bp == buf) { 84159652Sgreen numfullpathfail4++; 84259652Sgreen free(buf, M_TEMP); 84359652Sgreen return (ENOMEM); 84459652Sgreen } 84559652Sgreen *--bp = ncp->nc_name[i]; 84659652Sgreen } 84759652Sgreen if (bp == buf) { 84859652Sgreen numfullpathfail4++; 84959652Sgreen free(buf, M_TEMP); 85059652Sgreen return (ENOMEM); 85159652Sgreen } 85259652Sgreen *--bp = '/'; 85359652Sgreen slash_prefixed = 1; 85459652Sgreen vp = ncp->nc_dvp; 85559652Sgreen } 85659652Sgreen if (!slash_prefixed) { 85759652Sgreen if (bp == buf) { 85859652Sgreen numfullpathfail4++; 85959652Sgreen free(buf, M_TEMP); 86059652Sgreen return (ENOMEM); 86159652Sgreen } 86259652Sgreen *--bp = '/'; 86359652Sgreen } 86459652Sgreen numfullpathfound++; 86559652Sgreen *retbuf = bp; 86659652Sgreen *retfreebuf = buf; 86759652Sgreen return (0); 86859652Sgreen} 869