vfs_cache.c revision 31879
11541Srgrimes/* 222521Sdyson * Copyright (c) 1989, 1993, 1995 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 522521Sdyson * This code is derived from software contributed to Berkeley by 622521Sdyson * Poul-Henning Kamp of the FreeBSD Project. 722521Sdyson * 81541Srgrimes * Redistribution and use in source and binary forms, with or without 91541Srgrimes * modification, are permitted provided that the following conditions 101541Srgrimes * are met: 111541Srgrimes * 1. Redistributions of source code must retain the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer. 131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer in the 151541Srgrimes * documentation and/or other materials provided with the distribution. 161541Srgrimes * 3. All advertising materials mentioning features or use of this software 171541Srgrimes * must display the following acknowledgement: 181541Srgrimes * This product includes software developed by the University of 191541Srgrimes * California, Berkeley and its contributors. 201541Srgrimes * 4. Neither the name of the University nor the names of its contributors 211541Srgrimes * may be used to endorse or promote products derived from this software 221541Srgrimes * without specific prior written permission. 231541Srgrimes * 241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 271541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 341541Srgrimes * SUCH DAMAGE. 351541Srgrimes * 3623521Sbde * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 3731879Sbde * $Id: vfs_cache.c,v 1.36 1997/11/07 08:53:05 phk Exp $ 381541Srgrimes */ 391541Srgrimes 401541Srgrimes#include <sys/param.h> 411541Srgrimes#include <sys/systm.h> 4212820Sphk#include <sys/kernel.h> 4312820Sphk#include <sys/sysctl.h> 441541Srgrimes#include <sys/mount.h> 451541Srgrimes#include <sys/vnode.h> 461541Srgrimes#include <sys/namei.h> 471541Srgrimes#include <sys/malloc.h> 481541Srgrimes 4913490Sdyson 501541Srgrimes/* 511541Srgrimes * Name caching works as follows: 521541Srgrimes * 531541Srgrimes * Names found by directory scans are retained in a cache 541541Srgrimes * for future reference. It is managed LRU, so frequently 551541Srgrimes * used names will hang around. Cache is indexed by hash value 561541Srgrimes * obtained from (vp, name) where vp refers to the directory 571541Srgrimes * containing name. 581541Srgrimes * 5922521Sdyson * If it is a "negative" entry, (i.e. for a name that is known NOT to 6022521Sdyson * exist) the vnode pointer will be NULL. 616968Sphk * 621541Srgrimes * Upon reaching the last segment of a path, if the reference 631541Srgrimes * is for DELETE, or NOCACHE is set (rewrite), and the 641541Srgrimes * name is located in the cache, it will be dropped. 651541Srgrimes */ 661541Srgrimes 671541Srgrimes/* 681541Srgrimes * Structures associated with name cacheing. 691541Srgrimes */ 7022521Sdyson#define NCHHASH(dvp, cnp) \ 7129071Sphk (&nchashtbl[((dvp)->v_id + (cnp)->cn_hash) & nchash]) 7212820Sphkstatic LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 7325453Sphkstatic TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 7423521Sbdestatic u_long nchash; /* size of hash table */ 7529071SphkSYSCTL_INT(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 7625453Sphkstatic u_long ncnegfactor = 16; /* ratio of negative entries */ 7725453SphkSYSCTL_INT(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 7825453Sphkstatic u_long numneg; /* number of cache entries allocated */ 7925453SphkSYSCTL_INT(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 8023521Sbdestatic u_long numcache; /* number of cache entries allocated */ 8125453SphkSYSCTL_INT(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 8222521Sdysonstruct nchstats nchstats; /* cache effectiveness statistics */ 831541Srgrimes 8423521Sbdestatic int doingcache = 1; /* 1 => enable the cache */ 8523521SbdeSYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 8625453SphkSYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 8725453SphkSYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 8823521Sbde 8929788Sphk/* 9029788Sphk * The new name cache statistics 9129788Sphk */ 9229788SphkSYSCTL_NODE(_vfs, CTL_VFS, cache, CTLFLAG_RW, 0, 9329788Sphk "Name cache statistics"); 9429788Sphk#define STATNODE(mode, name, var) \ 9529788Sphk SYSCTL_INT(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 9629788SphkSTATNODE(CTLFLAG_RD, numneg, &numneg); 9729788SphkSTATNODE(CTLFLAG_RD, numcache, &numcache); 9829788Sphkstatic u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 9929788Sphkstatic u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 10029788Sphkstatic u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 10129788Sphkstatic u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 10229788Sphkstatic u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 10329804Sphkstatic u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 10429788Sphkstatic u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 10529788Sphkstatic u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 10629788Sphkstatic u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 10729788Sphkstatic u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 10829788Sphk 10929788Sphk 11025453Sphkstatic void cache_zap __P((struct namecache *ncp)); 1116968Sphk 11222521Sdyson/* 11325453Sphk * Flags in namecache.nc_flag 11425453Sphk */ 11525453Sphk#define NCF_WHITE 1 11625453Sphk/* 11722521Sdyson * Delete an entry from its hash list and move it to the front 11822521Sdyson * of the LRU list for immediate reuse. 11922521Sdyson */ 12025453Sphkstatic void 12125453Sphkcache_zap(ncp) 12225453Sphk struct namecache *ncp; 12325453Sphk{ 12425453Sphk LIST_REMOVE(ncp, nc_hash); 12525453Sphk LIST_REMOVE(ncp, nc_src); 12628954Sphk if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) 12728954Sphk vdrop(ncp->nc_dvp); 12825453Sphk if (ncp->nc_vp) { 12925453Sphk TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 13025453Sphk } else { 13125453Sphk TAILQ_REMOVE(&ncneg, ncp, nc_dst); 13225453Sphk numneg--; 13325453Sphk } 13425453Sphk numcache--; 13525453Sphk free(ncp, M_CACHE); 13622521Sdyson} 1376968Sphk 13822521Sdyson/* 13923521Sbde * Lookup an entry in the cache 1406968Sphk * 14123521Sbde * We don't do this if the segment name is long, simply so the cache 1426968Sphk * can avoid holding long names (which would either waste space, or 1431541Srgrimes * add greatly to the complexity). 1441541Srgrimes * 1456968Sphk * Lookup is called with dvp pointing to the directory to search, 14622521Sdyson * cnp pointing to the name of the entry being sought. If the lookup 14722521Sdyson * succeeds, the vnode is returned in *vpp, and a status of -1 is 14822521Sdyson * returned. If the lookup determines that the name does not exist 14922521Sdyson * (negative cacheing), a status of ENOENT is returned. If the lookup 15022521Sdyson * fails, a status of zero is returned. 1511541Srgrimes */ 1526968Sphk 1531541Srgrimesint 1541541Srgrimescache_lookup(dvp, vpp, cnp) 1551541Srgrimes struct vnode *dvp; 1561541Srgrimes struct vnode **vpp; 1571541Srgrimes struct componentname *cnp; 1581541Srgrimes{ 15931016Sphk register struct namecache *ncp; 1601541Srgrimes 1616928Sphk if (!doingcache) { 1626928Sphk cnp->cn_flags &= ~MAKEENTRY; 1631541Srgrimes return (0); 1646928Sphk } 16525453Sphk 16629788Sphk numcalls++; 16729788Sphk 16825453Sphk if (cnp->cn_nameptr[0] == '.') { 16925453Sphk if (cnp->cn_namelen == 1) { 17025453Sphk *vpp = dvp; 17129788Sphk dothits++; 17225453Sphk return (-1); 17325453Sphk } 17425453Sphk if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 17529788Sphk dotdothits++; 17625453Sphk if (dvp->v_dd->v_id != dvp->v_ddid || 17725453Sphk (cnp->cn_flags & MAKEENTRY) == 0) { 17825453Sphk dvp->v_ddid = 0; 17925453Sphk return (0); 18025453Sphk } 18125453Sphk *vpp = dvp->v_dd; 18225453Sphk return (-1); 18325453Sphk } 1841541Srgrimes } 1856968Sphk 18625453Sphk LIST_FOREACH(ncp, (NCHHASH(dvp, cnp)), nc_hash) { 18729788Sphk numchecks++; 18825453Sphk if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 18931879Sbde !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) 19022521Sdyson break; 1911541Srgrimes } 1926968Sphk 19322521Sdyson /* We failed to find an entry */ 19422521Sdyson if (ncp == 0) { 19529804Sphk if ((cnp->cn_flags & MAKEENTRY) == 0) { 19629804Sphk nummisszap++; 19729804Sphk } else { 19829804Sphk nummiss++; 19929804Sphk } 20022521Sdyson nchstats.ncs_miss++; 20122521Sdyson return (0); 20222521Sdyson } 20322521Sdyson 2046968Sphk /* We don't want to have an entry, so dump it */ 2056928Sphk if ((cnp->cn_flags & MAKEENTRY) == 0) { 20629788Sphk numposzaps++; 2071541Srgrimes nchstats.ncs_badhits++; 20825453Sphk cache_zap(ncp); 2096968Sphk return (0); 21023521Sbde } 2116968Sphk 2126968Sphk /* We found a "positive" match, return the vnode */ 21322521Sdyson if (ncp->nc_vp) { 21429788Sphk numposhits++; 2151541Srgrimes nchstats.ncs_goodhits++; 2161541Srgrimes *vpp = ncp->nc_vp; 2171541Srgrimes return (-1); 2181541Srgrimes } 2191541Srgrimes 2206968Sphk /* We found a negative match, and want to create it, so purge */ 2216968Sphk if (cnp->cn_nameiop == CREATE) { 22229788Sphk numnegzaps++; 2237013Sphk nchstats.ncs_badhits++; 22425453Sphk cache_zap(ncp); 2256968Sphk return (0); 2266968Sphk } 2276968Sphk 22829788Sphk numneghits++; 22922521Sdyson /* 23022521Sdyson * We found a "negative" match, ENOENT notifies client of this match. 23122521Sdyson * The nc_vpid field records whether this is a whiteout. 23222521Sdyson */ 23325453Sphk TAILQ_REMOVE(&ncneg, ncp, nc_dst); 23425453Sphk TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 2356968Sphk nchstats.ncs_neghits++; 23625453Sphk if (ncp->nc_flag & NCF_WHITE) 23725453Sphk cnp->cn_flags |= ISWHITEOUT; 2386968Sphk return (ENOENT); 2391541Srgrimes} 2401541Srgrimes 2411541Srgrimes/* 2426968Sphk * Add an entry to the cache. 2431541Srgrimes */ 2441549Srgrimesvoid 2451541Srgrimescache_enter(dvp, vp, cnp) 2461541Srgrimes struct vnode *dvp; 2471541Srgrimes struct vnode *vp; 2481541Srgrimes struct componentname *cnp; 2491541Srgrimes{ 2506928Sphk register struct namecache *ncp; 2516928Sphk register struct nchashhead *ncpp; 2521541Srgrimes 2531541Srgrimes if (!doingcache) 2541541Srgrimes return; 2556968Sphk 25625453Sphk if (cnp->cn_nameptr[0] == '.') { 25725453Sphk if (cnp->cn_namelen == 1) { 25825453Sphk return; 2596928Sphk } 26025453Sphk if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 26125453Sphk if (vp) { 26225453Sphk dvp->v_dd = vp; 26325453Sphk dvp->v_ddid = vp->v_id; 26425453Sphk } else { 26525453Sphk dvp->v_dd = dvp; 26625453Sphk dvp->v_ddid = 0; 26725453Sphk } 26825453Sphk return; 26925453Sphk } 2706968Sphk } 27125453Sphk 27225453Sphk ncp = (struct namecache *) 27325453Sphk malloc(sizeof *ncp + cnp->cn_namelen, M_CACHE, M_WAITOK); 27425453Sphk bzero((char *)ncp, sizeof *ncp); 27525453Sphk numcache++; 27628954Sphk if (!vp) { 27725453Sphk numneg++; 27828954Sphk ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0; 27929071Sphk } else if (vp->v_type == VDIR) { 28029071Sphk vp->v_dd = dvp; 28129071Sphk vp->v_ddid = dvp->v_id; 28228954Sphk } 28323521Sbde 28422521Sdyson /* 28522521Sdyson * Fill in cache info, if vp is NULL this is a "negative" cache entry. 28622521Sdyson * For negative entries, we have to record whether it is a whiteout. 28722521Sdyson * the whiteout flag is stored in the nc_vpid field which is 28822521Sdyson * otherwise unused. 28922521Sdyson */ 2901541Srgrimes ncp->nc_vp = vp; 2911541Srgrimes ncp->nc_dvp = dvp; 2921541Srgrimes ncp->nc_nlen = cnp->cn_namelen; 29331879Sbde bcopy(cnp->cn_nameptr, ncp->nc_name, ncp->nc_nlen); 29422521Sdyson ncpp = NCHHASH(dvp, cnp); 2956928Sphk LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 29628954Sphk if (LIST_EMPTY(&dvp->v_cache_src)) 29728954Sphk vhold(dvp); 29825453Sphk LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 29925453Sphk if (vp) { 30025453Sphk TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 30125453Sphk } else { 30225453Sphk TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 30325453Sphk } 30425453Sphk if (numneg*ncnegfactor > numcache) { 30525453Sphk ncp = TAILQ_FIRST(&ncneg); 30625453Sphk cache_zap(ncp); 30725453Sphk } 3081541Srgrimes} 3091541Srgrimes 3101541Srgrimes/* 3111541Srgrimes * Name cache initialization, from vfs_init() when we are booting 3121541Srgrimes */ 3131549Srgrimesvoid 3141541Srgrimesnchinit() 3151541Srgrimes{ 31623521Sbde 31725453Sphk TAILQ_INIT(&ncneg); 31829094Sphk nchashtbl = hashinit(desiredvnodes*2, M_CACHE, &nchash); 3191541Srgrimes} 3201541Srgrimes 3211541Srgrimes/* 32222521Sdyson * Invalidate all entries to particular vnode. 32323521Sbde * 32422521Sdyson * We actually just increment the v_id, that will do it. The stale entries 32522521Sdyson * will be purged by lookup as they get found. If the v_id wraps around, we 32622521Sdyson * need to ditch the entire cache, to avoid confusion. No valid vnode will 32722521Sdyson * ever have (v_id == 0). 3281541Srgrimes */ 3291549Srgrimesvoid 3301541Srgrimescache_purge(vp) 3311541Srgrimes struct vnode *vp; 3321541Srgrimes{ 33329094Sphk static u_long nextid; 3341541Srgrimes 33525453Sphk while (!LIST_EMPTY(&vp->v_cache_src)) 33625453Sphk cache_zap(LIST_FIRST(&vp->v_cache_src)); 33725453Sphk while (!TAILQ_EMPTY(&vp->v_cache_dst)) 33825453Sphk cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 33925453Sphk 34029094Sphk nextid++; 34129094Sphk while (nextid == vp->v_id || !nextid) 34229094Sphk continue; 34329094Sphk vp->v_id = nextid; 34425453Sphk vp->v_dd = vp; 34525453Sphk vp->v_ddid = 0; 3461541Srgrimes} 3471541Srgrimes 3481541Srgrimes/* 3496968Sphk * Flush all entries referencing a particular filesystem. 3501541Srgrimes * 3516968Sphk * Since we need to check it anyway, we will flush all the invalid 35212968Sphk * entries at the same time. 3531541Srgrimes */ 3541549Srgrimesvoid 3551541Srgrimescache_purgevfs(mp) 3561541Srgrimes struct mount *mp; 3571541Srgrimes{ 3586968Sphk struct nchashhead *ncpp; 35922521Sdyson struct namecache *ncp, *nnp; 3601541Srgrimes 3616968Sphk /* Scan hash tables for applicable entries */ 36229071Sphk for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 36325453Sphk for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) { 36425453Sphk nnp = LIST_NEXT(ncp, nc_hash); 36525453Sphk if (ncp->nc_dvp->v_mount == mp) { 36625453Sphk cache_zap(ncp); 3676968Sphk } 3681541Srgrimes } 3691541Srgrimes } 3701541Srgrimes} 37128787Sphk 37228787Sphk/* 37328787Sphk * Perform canonical checks and cache lookup and pass on to filesystem 37428787Sphk * through the vop_cachedlookup only if needed. 37528787Sphk */ 37628787Sphk 37728787Sphkint 37828787Sphkvfs_cache_lookup(ap) 37928787Sphk struct vop_lookup_args /* { 38028787Sphk struct vnode *a_dvp; 38128787Sphk struct vnode **a_vpp; 38228787Sphk struct componentname *a_cnp; 38328787Sphk } */ *ap; 38428787Sphk{ 38528787Sphk struct vnode *vdp; 38628787Sphk struct vnode *pdp; 38728787Sphk int lockparent; 38828787Sphk int error; 38928787Sphk struct vnode **vpp = ap->a_vpp; 39028787Sphk struct componentname *cnp = ap->a_cnp; 39128787Sphk struct ucred *cred = cnp->cn_cred; 39228787Sphk int flags = cnp->cn_flags; 39328787Sphk struct proc *p = cnp->cn_proc; 39428787Sphk u_long vpid; /* capability number of vnode */ 39528787Sphk 39628787Sphk *vpp = NULL; 39728787Sphk vdp = ap->a_dvp; 39828787Sphk lockparent = flags & LOCKPARENT; 39928787Sphk 40028787Sphk if (vdp->v_type != VDIR) 40128787Sphk return (ENOTDIR); 40228787Sphk 40328787Sphk if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) && 40428787Sphk (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 40528787Sphk return (EROFS); 40628787Sphk 40728787Sphk error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc); 40828787Sphk 40928787Sphk if (error) 41028787Sphk return (error); 41128787Sphk 41228787Sphk error = cache_lookup(vdp, vpp, cnp); 41328787Sphk 41428787Sphk if (!error) 41530439Sphk return (VOP_CACHEDLOOKUP(ap->a_dvp, ap->a_vpp, ap->a_cnp)); 41628787Sphk 41728787Sphk if (error == ENOENT) 41828787Sphk return (error); 41928787Sphk 42028787Sphk pdp = vdp; 42128787Sphk vdp = *vpp; 42228787Sphk vpid = vdp->v_id; 42328787Sphk if (pdp == vdp) { /* lookup on "." */ 42428787Sphk VREF(vdp); 42528787Sphk error = 0; 42628787Sphk } else if (flags & ISDOTDOT) { 42728787Sphk VOP_UNLOCK(pdp, 0, p); 42828787Sphk error = vget(vdp, LK_EXCLUSIVE, p); 42928787Sphk if (!error && lockparent && (flags & ISLASTCN)) 43028787Sphk error = vn_lock(pdp, LK_EXCLUSIVE, p); 43128787Sphk } else { 43228787Sphk error = vget(vdp, LK_EXCLUSIVE, p); 43328787Sphk if (!lockparent || error || !(flags & ISLASTCN)) 43428787Sphk VOP_UNLOCK(pdp, 0, p); 43528787Sphk } 43628787Sphk /* 43728787Sphk * Check that the capability number did not change 43828787Sphk * while we were waiting for the lock. 43928787Sphk */ 44028787Sphk if (!error) { 44128787Sphk if (vpid == vdp->v_id) 44228787Sphk return (0); 44328787Sphk vput(vdp); 44428787Sphk if (lockparent && pdp != vdp && (flags & ISLASTCN)) 44528787Sphk VOP_UNLOCK(pdp, 0, p); 44628787Sphk } 44728787Sphk error = vn_lock(pdp, LK_EXCLUSIVE, p); 44828787Sphk if (error) 44928787Sphk return (error); 45030474Sphk return (VOP_CACHEDLOOKUP(ap->a_dvp, ap->a_vpp, ap->a_cnp)); 45128787Sphk} 452