vfs_cache.c revision 75654
11541Srgrimes/* 222521Sdyson * Copyright (c) 1989, 1993, 1995 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 522521Sdyson * This code is derived from software contributed to Berkeley by 622521Sdyson * Poul-Henning Kamp of the FreeBSD Project. 722521Sdyson * 81541Srgrimes * Redistribution and use in source and binary forms, with or without 91541Srgrimes * modification, are permitted provided that the following conditions 101541Srgrimes * are met: 111541Srgrimes * 1. Redistributions of source code must retain the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer. 131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer in the 151541Srgrimes * documentation and/or other materials provided with the distribution. 161541Srgrimes * 3. All advertising materials mentioning features or use of this software 171541Srgrimes * must display the following acknowledgement: 181541Srgrimes * This product includes software developed by the University of 191541Srgrimes * California, Berkeley and its contributors. 201541Srgrimes * 4. Neither the name of the University nor the names of its contributors 211541Srgrimes * may be used to endorse or promote products derived from this software 221541Srgrimes * without specific prior written permission. 231541Srgrimes * 241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 271541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 341541Srgrimes * SUCH DAMAGE. 351541Srgrimes * 3623521Sbde * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 3750477Speter * $FreeBSD: head/sys/kern/vfs_cache.c 75654 2001-04-18 11:19:50Z tanimura $ 381541Srgrimes */ 391541Srgrimes 401541Srgrimes#include <sys/param.h> 411541Srgrimes#include <sys/systm.h> 4212820Sphk#include <sys/kernel.h> 4312820Sphk#include <sys/sysctl.h> 441541Srgrimes#include <sys/mount.h> 451541Srgrimes#include <sys/vnode.h> 461541Srgrimes#include <sys/namei.h> 471541Srgrimes#include <sys/malloc.h> 4851906Sphk#include <sys/sysproto.h> 4951906Sphk#include <sys/proc.h> 5051906Sphk#include <sys/filedesc.h> 5174384Speter#include <sys/fnv_hash.h> 521541Srgrimes 5351906Sphk/* 5459652Sgreen * This structure describes the elements in the cache of recent 5559652Sgreen * names looked up by namei. 5659652Sgreen */ 5759652Sgreen 5859652Sgreenstruct namecache { 5960938Sjake LIST_ENTRY(namecache) nc_hash; /* hash chain */ 6060938Sjake LIST_ENTRY(namecache) nc_src; /* source vnode list */ 6160938Sjake TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 6259652Sgreen struct vnode *nc_dvp; /* vnode of parent of name */ 6359652Sgreen struct vnode *nc_vp; /* vnode the name refers to */ 6459652Sgreen u_char nc_flag; /* flag bits */ 6559652Sgreen u_char nc_nlen; /* length of name */ 6659652Sgreen char nc_name[0]; /* segment name */ 6759652Sgreen}; 6859652Sgreen 6959652Sgreen/* 701541Srgrimes * Name caching works as follows: 711541Srgrimes * 721541Srgrimes * Names found by directory scans are retained in a cache 731541Srgrimes * for future reference. It is managed LRU, so frequently 741541Srgrimes * used names will hang around. Cache is indexed by hash value 751541Srgrimes * obtained from (vp, name) where vp refers to the directory 761541Srgrimes * containing name. 771541Srgrimes * 7822521Sdyson * If it is a "negative" entry, (i.e. for a name that is known NOT to 7922521Sdyson * exist) the vnode pointer will be NULL. 806968Sphk * 811541Srgrimes * Upon reaching the last segment of a path, if the reference 821541Srgrimes * is for DELETE, or NOCACHE is set (rewrite), and the 831541Srgrimes * name is located in the cache, it will be dropped. 841541Srgrimes */ 851541Srgrimes 861541Srgrimes/* 871541Srgrimes * Structures associated with name cacheing. 881541Srgrimes */ 8974501Speter#define NCHHASH(hash) \ 9074501Speter (&nchashtbl[(hash) & nchash]) 9160938Sjakestatic LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 9260938Sjakestatic TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 9323521Sbdestatic u_long nchash; /* size of hash table */ 9462622SjhbSYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 9525453Sphkstatic u_long ncnegfactor = 16; /* ratio of negative entries */ 9662622SjhbSYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 9725453Sphkstatic u_long numneg; /* number of cache entries allocated */ 9862622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 9923521Sbdestatic u_long numcache; /* number of cache entries allocated */ 10062622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 10175654Stanimurastatic u_long numcachehv; /* number of cache entries with vnodes held */ 10275654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, ""); 10375654Stanimurastatic u_long numcachepl; /* number of cache purge for leaf entries */ 10475654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, ""); 10522521Sdysonstruct nchstats nchstats; /* cache effectiveness statistics */ 1061541Srgrimes 10723521Sbdestatic int doingcache = 1; /* 1 => enable the cache */ 10823521SbdeSYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 10925453SphkSYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 11025453SphkSYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 11123521Sbde 11229788Sphk/* 11329788Sphk * The new name cache statistics 11429788Sphk */ 11538984SbdeSYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 11629788Sphk#define STATNODE(mode, name, var) \ 11762622Sjhb SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 11829788SphkSTATNODE(CTLFLAG_RD, numneg, &numneg); 11929788SphkSTATNODE(CTLFLAG_RD, numcache, &numcache); 12029788Sphkstatic u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 12129788Sphkstatic u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 12229788Sphkstatic u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 12329788Sphkstatic u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 12429788Sphkstatic u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 12529804Sphkstatic u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 12629788Sphkstatic u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 12729788Sphkstatic u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 12829788Sphkstatic u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 12929788Sphkstatic u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 13029788Sphk 13168922SrwatsonSYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats, 13268922Srwatson sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); 13329788Sphk 13468922Srwatson 13568922Srwatson 13625453Sphkstatic void cache_zap __P((struct namecache *ncp)); 1376968Sphk 13869774Sphkstatic MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 13951906Sphk 14022521Sdyson/* 14125453Sphk * Flags in namecache.nc_flag 14225453Sphk */ 14325453Sphk#define NCF_WHITE 1 14475402Speter 14525453Sphk/* 14675402Speter * Grab an atomic snapshot of the name cache hash chain lengths 14775402Speter */ 14875402SpeterSYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); 14975402Speter 15075402Speterstatic int 15175402Spetersysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 15275402Speter{ 15375402Speter int error; 15475402Speter struct nchashhead *ncpp; 15575402Speter struct namecache *ncp; 15675402Speter int n_nchash; 15775402Speter int count; 15875402Speter 15975402Speter n_nchash = nchash + 1; /* nchash is max index, not count */ 16075402Speter if (!req->oldptr) 16175402Speter return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 16275402Speter 16375402Speter /* Scan hash tables for applicable entries */ 16475402Speter for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 16575402Speter count = 0; 16675402Speter LIST_FOREACH(ncp, ncpp, nc_hash) { 16775402Speter count++; 16875402Speter } 16975402Speter error = SYSCTL_OUT(req, (caddr_t)&count, sizeof(count)); 17075402Speter if (error) 17175402Speter return (error); 17275402Speter } 17375402Speter return (0); 17475402Speter} 17575402SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD, 17675402Speter 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths"); 17775402Speter 17875402Speterstatic int 17975402Spetersysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 18075402Speter{ 18175402Speter int error; 18275402Speter struct nchashhead *ncpp; 18375402Speter struct namecache *ncp; 18475402Speter int n_nchash; 18575402Speter int count, maxlength, used, pct; 18675402Speter 18775402Speter if (!req->oldptr) 18875402Speter return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 18975402Speter 19075402Speter n_nchash = nchash + 1; /* nchash is max index, not count */ 19175402Speter used = 0; 19275402Speter maxlength = 0; 19375402Speter 19475402Speter /* Scan hash tables for applicable entries */ 19575402Speter for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 19675402Speter count = 0; 19775402Speter LIST_FOREACH(ncp, ncpp, nc_hash) { 19875402Speter count++; 19975402Speter } 20075402Speter if (count) 20175402Speter used++; 20275402Speter if (maxlength < count) 20375402Speter maxlength = count; 20475402Speter } 20575402Speter n_nchash = nchash + 1; 20675402Speter pct = (used * 100 * 100) / n_nchash; 20775402Speter error = SYSCTL_OUT(req, (caddr_t)&n_nchash, sizeof(n_nchash)); 20875402Speter if (error) 20975402Speter return (error); 21075402Speter error = SYSCTL_OUT(req, (caddr_t)&used, sizeof(used)); 21175402Speter if (error) 21275402Speter return (error); 21375402Speter error = SYSCTL_OUT(req, (caddr_t)&maxlength, sizeof(maxlength)); 21475402Speter if (error) 21575402Speter return (error); 21675402Speter error = SYSCTL_OUT(req, (caddr_t)&pct, sizeof(pct)); 21775402Speter if (error) 21875402Speter return (error); 21975402Speter return (0); 22075402Speter} 22175402SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD, 22275402Speter 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths"); 22375402Speter 22475402Speter/* 22522521Sdyson * Delete an entry from its hash list and move it to the front 22622521Sdyson * of the LRU list for immediate reuse. 22722521Sdyson */ 22825453Sphkstatic void 22925453Sphkcache_zap(ncp) 23025453Sphk struct namecache *ncp; 23125453Sphk{ 23225453Sphk LIST_REMOVE(ncp, nc_hash); 23325453Sphk LIST_REMOVE(ncp, nc_src); 23475654Stanimura if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 23528954Sphk vdrop(ncp->nc_dvp); 23675654Stanimura numcachehv--; 23775654Stanimura } 23825453Sphk if (ncp->nc_vp) { 23925453Sphk TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 24025453Sphk } else { 24125453Sphk TAILQ_REMOVE(&ncneg, ncp, nc_dst); 24225453Sphk numneg--; 24325453Sphk } 24425453Sphk numcache--; 24551906Sphk free(ncp, M_VFSCACHE); 24622521Sdyson} 2476968Sphk 24822521Sdyson/* 24923521Sbde * Lookup an entry in the cache 2506968Sphk * 25123521Sbde * We don't do this if the segment name is long, simply so the cache 2526968Sphk * can avoid holding long names (which would either waste space, or 2531541Srgrimes * add greatly to the complexity). 2541541Srgrimes * 2556968Sphk * Lookup is called with dvp pointing to the directory to search, 25622521Sdyson * cnp pointing to the name of the entry being sought. If the lookup 25722521Sdyson * succeeds, the vnode is returned in *vpp, and a status of -1 is 25822521Sdyson * returned. If the lookup determines that the name does not exist 25922521Sdyson * (negative cacheing), a status of ENOENT is returned. If the lookup 26022521Sdyson * fails, a status of zero is returned. 2611541Srgrimes */ 2626968Sphk 2631541Srgrimesint 2641541Srgrimescache_lookup(dvp, vpp, cnp) 2651541Srgrimes struct vnode *dvp; 2661541Srgrimes struct vnode **vpp; 2671541Srgrimes struct componentname *cnp; 2681541Srgrimes{ 26951906Sphk struct namecache *ncp; 27074384Speter u_int32_t hash; 2711541Srgrimes 2726928Sphk if (!doingcache) { 2736928Sphk cnp->cn_flags &= ~MAKEENTRY; 2741541Srgrimes return (0); 2756928Sphk } 27625453Sphk 27729788Sphk numcalls++; 27829788Sphk 27925453Sphk if (cnp->cn_nameptr[0] == '.') { 28025453Sphk if (cnp->cn_namelen == 1) { 28125453Sphk *vpp = dvp; 28229788Sphk dothits++; 28325453Sphk return (-1); 28425453Sphk } 28525453Sphk if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 28629788Sphk dotdothits++; 28725453Sphk if (dvp->v_dd->v_id != dvp->v_ddid || 28825453Sphk (cnp->cn_flags & MAKEENTRY) == 0) { 28925453Sphk dvp->v_ddid = 0; 29025453Sphk return (0); 29125453Sphk } 29225453Sphk *vpp = dvp->v_dd; 29325453Sphk return (-1); 29425453Sphk } 2951541Srgrimes } 2966968Sphk 29774501Speter hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 29874501Speter hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 29974501Speter LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 30029788Sphk numchecks++; 30125453Sphk if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 30231879Sbde !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) 30322521Sdyson break; 3041541Srgrimes } 3056968Sphk 30622521Sdyson /* We failed to find an entry */ 30722521Sdyson if (ncp == 0) { 30829804Sphk if ((cnp->cn_flags & MAKEENTRY) == 0) { 30929804Sphk nummisszap++; 31029804Sphk } else { 31129804Sphk nummiss++; 31229804Sphk } 31322521Sdyson nchstats.ncs_miss++; 31422521Sdyson return (0); 31522521Sdyson } 31622521Sdyson 3176968Sphk /* We don't want to have an entry, so dump it */ 3186928Sphk if ((cnp->cn_flags & MAKEENTRY) == 0) { 31929788Sphk numposzaps++; 3201541Srgrimes nchstats.ncs_badhits++; 32125453Sphk cache_zap(ncp); 3226968Sphk return (0); 32323521Sbde } 3246968Sphk 3256968Sphk /* We found a "positive" match, return the vnode */ 32622521Sdyson if (ncp->nc_vp) { 32729788Sphk numposhits++; 3281541Srgrimes nchstats.ncs_goodhits++; 3291541Srgrimes *vpp = ncp->nc_vp; 3301541Srgrimes return (-1); 3311541Srgrimes } 3321541Srgrimes 3336968Sphk /* We found a negative match, and want to create it, so purge */ 3346968Sphk if (cnp->cn_nameiop == CREATE) { 33529788Sphk numnegzaps++; 3367013Sphk nchstats.ncs_badhits++; 33725453Sphk cache_zap(ncp); 3386968Sphk return (0); 3396968Sphk } 3406968Sphk 34129788Sphk numneghits++; 34222521Sdyson /* 34322521Sdyson * We found a "negative" match, ENOENT notifies client of this match. 34422521Sdyson * The nc_vpid field records whether this is a whiteout. 34522521Sdyson */ 34625453Sphk TAILQ_REMOVE(&ncneg, ncp, nc_dst); 34725453Sphk TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 3486968Sphk nchstats.ncs_neghits++; 34925453Sphk if (ncp->nc_flag & NCF_WHITE) 35025453Sphk cnp->cn_flags |= ISWHITEOUT; 3516968Sphk return (ENOENT); 3521541Srgrimes} 3531541Srgrimes 3541541Srgrimes/* 3556968Sphk * Add an entry to the cache. 3561541Srgrimes */ 3571549Srgrimesvoid 3581541Srgrimescache_enter(dvp, vp, cnp) 3591541Srgrimes struct vnode *dvp; 3601541Srgrimes struct vnode *vp; 3611541Srgrimes struct componentname *cnp; 3621541Srgrimes{ 36351906Sphk struct namecache *ncp; 36451906Sphk struct nchashhead *ncpp; 36574384Speter u_int32_t hash; 36651906Sphk int len; 3671541Srgrimes 3681541Srgrimes if (!doingcache) 3691541Srgrimes return; 3706968Sphk 37125453Sphk if (cnp->cn_nameptr[0] == '.') { 37225453Sphk if (cnp->cn_namelen == 1) { 37325453Sphk return; 3746928Sphk } 37525453Sphk if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 37625453Sphk if (vp) { 37725453Sphk dvp->v_dd = vp; 37825453Sphk dvp->v_ddid = vp->v_id; 37925453Sphk } else { 38025453Sphk dvp->v_dd = dvp; 38125453Sphk dvp->v_ddid = 0; 38225453Sphk } 38325453Sphk return; 38425453Sphk } 3856968Sphk } 38625453Sphk 38725453Sphk ncp = (struct namecache *) 38851906Sphk malloc(sizeof *ncp + cnp->cn_namelen, M_VFSCACHE, M_WAITOK); 38925453Sphk bzero((char *)ncp, sizeof *ncp); 39025453Sphk numcache++; 39128954Sphk if (!vp) { 39225453Sphk numneg++; 39328954Sphk ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0; 39429071Sphk } else if (vp->v_type == VDIR) { 39529071Sphk vp->v_dd = dvp; 39629071Sphk vp->v_ddid = dvp->v_id; 39728954Sphk } 39823521Sbde 39922521Sdyson /* 40022521Sdyson * Fill in cache info, if vp is NULL this is a "negative" cache entry. 40122521Sdyson * For negative entries, we have to record whether it is a whiteout. 40222521Sdyson * the whiteout flag is stored in the nc_vpid field which is 40322521Sdyson * otherwise unused. 40422521Sdyson */ 4051541Srgrimes ncp->nc_vp = vp; 4061541Srgrimes ncp->nc_dvp = dvp; 40751906Sphk len = ncp->nc_nlen = cnp->cn_namelen; 40874501Speter hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 40974384Speter bcopy(cnp->cn_nameptr, ncp->nc_name, len); 41074501Speter hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 41174501Speter ncpp = NCHHASH(hash); 4126928Sphk LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 41375654Stanimura if (LIST_EMPTY(&dvp->v_cache_src)) { 41428954Sphk vhold(dvp); 41575654Stanimura numcachehv++; 41675654Stanimura } 41725453Sphk LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 41825453Sphk if (vp) { 41925453Sphk TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 42025453Sphk } else { 42125453Sphk TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 42225453Sphk } 42351906Sphk if (numneg * ncnegfactor > numcache) { 42425453Sphk ncp = TAILQ_FIRST(&ncneg); 42525453Sphk cache_zap(ncp); 42625453Sphk } 4271541Srgrimes} 4281541Srgrimes 4291541Srgrimes/* 4301541Srgrimes * Name cache initialization, from vfs_init() when we are booting 4311541Srgrimes */ 43269664Speterstatic void 43369664Speternchinit(void *dummy __unused) 4341541Srgrimes{ 43523521Sbde 43625453Sphk TAILQ_INIT(&ncneg); 43769664Speter nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 4381541Srgrimes} 43969664SpeterSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL) 4401541Srgrimes 44169664Speter 4421541Srgrimes/* 44346011Sphk * Invalidate all entries to a particular vnode. 44423521Sbde * 44546011Sphk * Remove all entries in the namecache relating to this vnode and 44646011Sphk * change the v_id. We take the v_id from a global counter, since 44746011Sphk * it becomes a handy sequence number in crash-dumps that way. 44846011Sphk * No valid vnode will ever have (v_id == 0). 44946011Sphk * 45046011Sphk * XXX: Only time and the size of v_id prevents this from failing: 45146011Sphk * XXX: In theory we should hunt down all (struct vnode*, v_id) 45246011Sphk * XXX: soft references and nuke them, at least on the global 45346011Sphk * XXX: v_id wraparound. The period of resistance can be extended 45446011Sphk * XXX: by incrementing each vnodes v_id individually instead of 45546011Sphk * XXX: using the global v_id. 4561541Srgrimes */ 45746011Sphk 4581549Srgrimesvoid 4591541Srgrimescache_purge(vp) 4601541Srgrimes struct vnode *vp; 4611541Srgrimes{ 46229094Sphk static u_long nextid; 4631541Srgrimes 46425453Sphk while (!LIST_EMPTY(&vp->v_cache_src)) 46525453Sphk cache_zap(LIST_FIRST(&vp->v_cache_src)); 46625453Sphk while (!TAILQ_EMPTY(&vp->v_cache_dst)) 46725453Sphk cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 46825453Sphk 46946011Sphk do 47046011Sphk nextid++; 47146011Sphk while (nextid == vp->v_id || !nextid); 47229094Sphk vp->v_id = nextid; 47325453Sphk vp->v_dd = vp; 47425453Sphk vp->v_ddid = 0; 4751541Srgrimes} 4761541Srgrimes 4771541Srgrimes/* 4786968Sphk * Flush all entries referencing a particular filesystem. 4791541Srgrimes * 4806968Sphk * Since we need to check it anyway, we will flush all the invalid 48112968Sphk * entries at the same time. 4821541Srgrimes */ 4831549Srgrimesvoid 4841541Srgrimescache_purgevfs(mp) 4851541Srgrimes struct mount *mp; 4861541Srgrimes{ 4876968Sphk struct nchashhead *ncpp; 48822521Sdyson struct namecache *ncp, *nnp; 4891541Srgrimes 4906968Sphk /* Scan hash tables for applicable entries */ 49129071Sphk for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 49225453Sphk for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) { 49325453Sphk nnp = LIST_NEXT(ncp, nc_hash); 49425453Sphk if (ncp->nc_dvp->v_mount == mp) { 49525453Sphk cache_zap(ncp); 4966968Sphk } 4971541Srgrimes } 4981541Srgrimes } 4991541Srgrimes} 50028787Sphk 50128787Sphk/* 50275654Stanimura * Flush all dirctory entries with no child directories held in 50375654Stanimura * the cache. 50475654Stanimura * 50575654Stanimura * Since we need to check it anyway, we will flush all the invalid 50675654Stanimura * entries at the same time. 50775654Stanimura */ 50875654Stanimuravoid 50975654Stanimuracache_purgeleafdirs(ndir) 51075654Stanimura int ndir; 51175654Stanimura{ 51275654Stanimura struct nchashhead *ncpp; 51375654Stanimura struct namecache *ncp, *nnp, *ncpc, *nnpc; 51475654Stanimura struct vnode *dvp; 51575654Stanimura 51675654Stanimura /* Scan hash tables for applicable entries */ 51775654Stanimura for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl && ndir > 0; ncpp--) { 51875654Stanimura for (ncp = LIST_FIRST(ncpp); ncp != 0 && ndir > 0; ncp = nnp) { 51975654Stanimura nnp = LIST_NEXT(ncp, nc_hash); 52075654Stanimura if (ncp->nc_dvp != 0) { 52175654Stanimura /* 52275654Stanimura * Skip over if nc_dvp of this cache holds 52375654Stanimura * a child directory, or the hold count of 52475654Stanimura * nc_dvp is greater than 1 (in which case 52575654Stanimura * nc_dvp is likely to be the working 52675654Stanimura * directory of a process). 52775654Stanimura */ 52875654Stanimura if (ncp->nc_dvp->v_holdcnt > 1) 52975654Stanimura continue; 53075654Stanimura for (ncpc = LIST_FIRST(&ncp->nc_dvp->v_cache_src); 53175654Stanimura ncpc != 0; ncpc = nnpc) { 53275654Stanimura nnpc = LIST_NEXT(ncpc, nc_src); 53375654Stanimura if (ncpc->nc_vp != 0 && ncpc->nc_vp->v_type == VDIR) 53475654Stanimura break; 53575654Stanimura } 53675654Stanimura if (ncpc == 0) { 53775654Stanimura /* 53875654Stanimura * Zap all of this directory's children, 53975654Stanimura * held in ncp->nc_dvp->v_cache_src. 54075654Stanimura */ 54175654Stanimura dvp = ncp->nc_dvp; 54275654Stanimura while (!LIST_EMPTY(&dvp->v_cache_src)) 54375654Stanimura cache_zap(LIST_FIRST(&dvp->v_cache_src)); 54475654Stanimura 54575654Stanimura ndir--; 54675654Stanimura 54775654Stanimura /* Restart in case where nnp is reclaimed. */ 54875654Stanimura nnp = LIST_FIRST(ncpp); 54975654Stanimura continue; 55075654Stanimura } 55175654Stanimura } 55275654Stanimura } 55375654Stanimura } 55475654Stanimura numcachepl++; 55575654Stanimura} 55675654Stanimura 55775654Stanimura/* 55828787Sphk * Perform canonical checks and cache lookup and pass on to filesystem 55928787Sphk * through the vop_cachedlookup only if needed. 56028787Sphk */ 56128787Sphk 56228787Sphkint 56328787Sphkvfs_cache_lookup(ap) 56428787Sphk struct vop_lookup_args /* { 56528787Sphk struct vnode *a_dvp; 56628787Sphk struct vnode **a_vpp; 56728787Sphk struct componentname *a_cnp; 56828787Sphk } */ *ap; 56928787Sphk{ 57065665Sbp struct vnode *dvp, *vp; 57165665Sbp int lockparent; 57228787Sphk int error; 57328787Sphk struct vnode **vpp = ap->a_vpp; 57428787Sphk struct componentname *cnp = ap->a_cnp; 57528787Sphk struct ucred *cred = cnp->cn_cred; 57628787Sphk int flags = cnp->cn_flags; 57728787Sphk struct proc *p = cnp->cn_proc; 57828787Sphk u_long vpid; /* capability number of vnode */ 57928787Sphk 58028787Sphk *vpp = NULL; 58165665Sbp dvp = ap->a_dvp; 58228787Sphk lockparent = flags & LOCKPARENT; 58328787Sphk 58465665Sbp if (dvp->v_type != VDIR) 58528787Sphk return (ENOTDIR); 58628787Sphk 58765665Sbp if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 58828787Sphk (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 58928787Sphk return (EROFS); 59028787Sphk 59165665Sbp error = VOP_ACCESS(dvp, VEXEC, cred, p); 59228787Sphk 59328787Sphk if (error) 59428787Sphk return (error); 59528787Sphk 59665665Sbp error = cache_lookup(dvp, vpp, cnp); 59728787Sphk 59828787Sphk if (!error) 59965665Sbp return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 60028787Sphk 60128787Sphk if (error == ENOENT) 60228787Sphk return (error); 60328787Sphk 60465665Sbp vp = *vpp; 60565665Sbp vpid = vp->v_id; 60665973Sbp cnp->cn_flags &= ~PDIRUNLOCK; 60765665Sbp if (dvp == vp) { /* lookup on "." */ 60865665Sbp VREF(vp); 60928787Sphk error = 0; 61028787Sphk } else if (flags & ISDOTDOT) { 61165665Sbp VOP_UNLOCK(dvp, 0, p); 61265973Sbp cnp->cn_flags |= PDIRUNLOCK; 61365665Sbp error = vget(vp, LK_EXCLUSIVE, p); 61465973Sbp if (!error && lockparent && (flags & ISLASTCN)) { 61565973Sbp if ((error = vn_lock(dvp, LK_EXCLUSIVE, p)) == 0) 61665973Sbp cnp->cn_flags &= ~PDIRUNLOCK; 61765973Sbp } 61828787Sphk } else { 61965665Sbp error = vget(vp, LK_EXCLUSIVE, p); 62065973Sbp if (!lockparent || error || !(flags & ISLASTCN)) { 62165665Sbp VOP_UNLOCK(dvp, 0, p); 62265973Sbp cnp->cn_flags |= PDIRUNLOCK; 62365973Sbp } 62428787Sphk } 62528787Sphk /* 62628787Sphk * Check that the capability number did not change 62728787Sphk * while we were waiting for the lock. 62828787Sphk */ 62928787Sphk if (!error) { 63065665Sbp if (vpid == vp->v_id) 63128787Sphk return (0); 63265665Sbp vput(vp); 63365973Sbp if (lockparent && dvp != vp && (flags & ISLASTCN)) { 63465665Sbp VOP_UNLOCK(dvp, 0, p); 63565973Sbp cnp->cn_flags |= PDIRUNLOCK; 63665973Sbp } 63728787Sphk } 63865973Sbp if (cnp->cn_flags & PDIRUNLOCK) { 63965973Sbp error = vn_lock(dvp, LK_EXCLUSIVE, p); 64065973Sbp if (error) 64165973Sbp return (error); 64265973Sbp cnp->cn_flags &= ~PDIRUNLOCK; 64365973Sbp } 64465665Sbp return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 64528787Sphk} 64651906Sphk 64751906Sphk 64851906Sphk#ifndef _SYS_SYSPROTO_H_ 64951906Sphkstruct __getcwd_args { 65051906Sphk u_char *buf; 65151906Sphk u_int buflen; 65251906Sphk}; 65351906Sphk#endif 65451906Sphk 65551906Sphkstatic int disablecwd; 65651906SphkSYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, ""); 65751906Sphk 65851906Sphkstatic u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 65951906Sphkstatic u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 66051906Sphkstatic u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 66151906Sphkstatic u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 66251906Sphkstatic u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 66351906Sphkstatic u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 66451906Sphkint 66551906Sphk__getcwd(p, uap) 66651906Sphk struct proc *p; 66751906Sphk struct __getcwd_args *uap; 66851906Sphk{ 66951906Sphk char *bp, *buf; 67051906Sphk int error, i, slash_prefixed; 67151906Sphk struct filedesc *fdp; 67251906Sphk struct namecache *ncp; 67351906Sphk struct vnode *vp; 67451906Sphk 67551906Sphk numcwdcalls++; 67651906Sphk if (disablecwd) 67751906Sphk return (ENODEV); 67851906Sphk if (uap->buflen < 2) 67951906Sphk return (EINVAL); 68051906Sphk if (uap->buflen > MAXPATHLEN) 68151906Sphk uap->buflen = MAXPATHLEN; 68251906Sphk buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK); 68351906Sphk bp += uap->buflen - 1; 68451906Sphk *bp = '\0'; 68551906Sphk fdp = p->p_fd; 68651906Sphk slash_prefixed = 0; 68751906Sphk for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { 68851906Sphk if (vp->v_flag & VROOT) { 68957199Speter if (vp->v_mount == NULL) /* forced unmount */ 69057199Speter return (EBADF); 69151906Sphk vp = vp->v_mount->mnt_vnodecovered; 69251906Sphk continue; 69351906Sphk } 69451906Sphk if (vp->v_dd->v_id != vp->v_ddid) { 69551906Sphk numcwdfail1++; 69651906Sphk free(buf, M_TEMP); 69751906Sphk return (ENOTDIR); 69851906Sphk } 69951906Sphk ncp = TAILQ_FIRST(&vp->v_cache_dst); 70051906Sphk if (!ncp) { 70151906Sphk numcwdfail2++; 70251906Sphk free(buf, M_TEMP); 70351906Sphk return (ENOENT); 70451906Sphk } 70551906Sphk if (ncp->nc_dvp != vp->v_dd) { 70651906Sphk numcwdfail3++; 70751906Sphk free(buf, M_TEMP); 70851906Sphk return (EBADF); 70951906Sphk } 71051906Sphk for (i = ncp->nc_nlen - 1; i >= 0; i--) { 71151906Sphk if (bp == buf) { 71251906Sphk numcwdfail4++; 71351906Sphk free(buf, M_TEMP); 71451906Sphk return (ENOMEM); 71551906Sphk } 71651906Sphk *--bp = ncp->nc_name[i]; 71751906Sphk } 71851906Sphk if (bp == buf) { 71951906Sphk numcwdfail4++; 72051906Sphk free(buf, M_TEMP); 72151906Sphk return (ENOMEM); 72251906Sphk } 72351906Sphk *--bp = '/'; 72451906Sphk slash_prefixed = 1; 72551906Sphk vp = vp->v_dd; 72651906Sphk } 72751906Sphk if (!slash_prefixed) { 72851906Sphk if (bp == buf) { 72951906Sphk numcwdfail4++; 73051906Sphk free(buf, M_TEMP); 73151906Sphk return (ENOMEM); 73251906Sphk } 73351906Sphk *--bp = '/'; 73451906Sphk } 73551906Sphk numcwdfound++; 73651906Sphk error = copyout(bp, uap->buf, strlen(bp) + 1); 73751906Sphk free(buf, M_TEMP); 73851906Sphk return (error); 73951906Sphk} 74051906Sphk 74159652Sgreen/* 74259652Sgreen * Thus begins the fullpath magic. 74359652Sgreen */ 74459652Sgreen 74559652Sgreen#undef STATNODE 74659652Sgreen#define STATNODE(name) \ 74759652Sgreen static u_int name; \ 74862622Sjhb SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 74959652Sgreen 75059652Sgreenstatic int disablefullpath; 75159652SgreenSYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, 75259652Sgreen &disablefullpath, 0, ""); 75359652Sgreen 75459652SgreenSTATNODE(numfullpathcalls); 75559652SgreenSTATNODE(numfullpathfail1); 75659652SgreenSTATNODE(numfullpathfail2); 75759652SgreenSTATNODE(numfullpathfail3); 75859652SgreenSTATNODE(numfullpathfail4); 75959652SgreenSTATNODE(numfullpathfound); 76059652Sgreen 76159652Sgreenint 76259652Sgreentextvp_fullpath(struct proc *p, char **retbuf, char **retfreebuf) { 76359652Sgreen char *bp, *buf; 76459652Sgreen int i, slash_prefixed; 76559652Sgreen struct filedesc *fdp; 76659652Sgreen struct namecache *ncp; 76759652Sgreen struct vnode *vp, *textvp; 76859652Sgreen 76959652Sgreen numfullpathcalls++; 77059652Sgreen if (disablefullpath) 77159652Sgreen return (ENODEV); 77259652Sgreen textvp = p->p_textvp; 77359652Sgreen if (textvp == NULL) 77459652Sgreen return (EINVAL); 77559652Sgreen buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 77659652Sgreen bp = buf + MAXPATHLEN - 1; 77759652Sgreen *bp = '\0'; 77859652Sgreen fdp = p->p_fd; 77959652Sgreen slash_prefixed = 0; 78059652Sgreen for (vp = textvp; vp != fdp->fd_rdir && vp != rootvnode;) { 78159652Sgreen if (vp->v_flag & VROOT) { 78259652Sgreen if (vp->v_mount == NULL) { /* forced unmount */ 78359652Sgreen free(buf, M_TEMP); 78459652Sgreen return (EBADF); 78559652Sgreen } 78659652Sgreen vp = vp->v_mount->mnt_vnodecovered; 78759652Sgreen continue; 78859652Sgreen } 78959652Sgreen if (vp != textvp && vp->v_dd->v_id != vp->v_ddid) { 79059652Sgreen numfullpathfail1++; 79159652Sgreen free(buf, M_TEMP); 79259652Sgreen return (ENOTDIR); 79359652Sgreen } 79459652Sgreen ncp = TAILQ_FIRST(&vp->v_cache_dst); 79559652Sgreen if (!ncp) { 79659652Sgreen numfullpathfail2++; 79759652Sgreen free(buf, M_TEMP); 79859652Sgreen return (ENOENT); 79959652Sgreen } 80059652Sgreen if (vp != textvp && ncp->nc_dvp != vp->v_dd) { 80159652Sgreen numfullpathfail3++; 80259652Sgreen free(buf, M_TEMP); 80359652Sgreen return (EBADF); 80459652Sgreen } 80559652Sgreen for (i = ncp->nc_nlen - 1; i >= 0; i--) { 80659652Sgreen if (bp == buf) { 80759652Sgreen numfullpathfail4++; 80859652Sgreen free(buf, M_TEMP); 80959652Sgreen return (ENOMEM); 81059652Sgreen } 81159652Sgreen *--bp = ncp->nc_name[i]; 81259652Sgreen } 81359652Sgreen if (bp == buf) { 81459652Sgreen numfullpathfail4++; 81559652Sgreen free(buf, M_TEMP); 81659652Sgreen return (ENOMEM); 81759652Sgreen } 81859652Sgreen *--bp = '/'; 81959652Sgreen slash_prefixed = 1; 82059652Sgreen vp = ncp->nc_dvp; 82159652Sgreen } 82259652Sgreen if (!slash_prefixed) { 82359652Sgreen if (bp == buf) { 82459652Sgreen numfullpathfail4++; 82559652Sgreen free(buf, M_TEMP); 82659652Sgreen return (ENOMEM); 82759652Sgreen } 82859652Sgreen *--bp = '/'; 82959652Sgreen } 83059652Sgreen numfullpathfound++; 83159652Sgreen *retbuf = bp; 83259652Sgreen *retfreebuf = buf; 83359652Sgreen return (0); 83459652Sgreen} 835