vfs_cache.c revision 23521
172017Scg/* 272017Scg * Copyright (c) 1989, 1993, 1995 372017Scg * The Regents of the University of California. All rights reserved. 472017Scg * 572017Scg * This code is derived from software contributed to Berkeley by 672017Scg * Poul-Henning Kamp of the FreeBSD Project. 772017Scg * 872017Scg * Redistribution and use in source and binary forms, with or without 972017Scg * modification, are permitted provided that the following conditions 1072017Scg * are met: 1172017Scg * 1. Redistributions of source code must retain the above copyright 1272017Scg * notice, this list of conditions and the following disclaimer. 1372017Scg * 2. Redistributions in binary form must reproduce the above copyright 1472017Scg * notice, this list of conditions and the following disclaimer in the 1572017Scg * documentation and/or other materials provided with the distribution. 1672017Scg * 3. All advertising materials mentioning features or use of this software 1772017Scg * must display the following acknowledgement: 1872017Scg * This product includes software developed by the University of 1972017Scg * California, Berkeley and its contributors. 2072017Scg * 4. Neither the name of the University nor the names of its contributors 2172017Scg * may be used to endorse or promote products derived from this software 2272017Scg * without specific prior written permission. 2372017Scg * 2472017Scg * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2572017Scg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2672017Scg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2772017Scg * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2872455Scg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2972455Scg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3072455Scg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3172017Scg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3272017Scg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3372017Scg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3472017Scg * SUCH DAMAGE. 35119287Simp * 36119287Simp * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 3772017Scg * $Id: vfs_cache.c,v 1.23 1997/02/22 09:39:31 peter Exp $ 3872017Scg */ 3972017Scg 4082180Scg#include <sys/param.h> 4182180Scg#include <sys/systm.h> 4284771Sorion#include <sys/kernel.h> 4372017Scg#include <sys/sysctl.h> 4472017Scg#include <sys/time.h> 4572017Scg#include <sys/mount.h> 4672017Scg#include <sys/vnode.h> 4772017Scg#include <sys/namei.h> 4872017Scg#include <sys/errno.h> 4972017Scg#include <sys/malloc.h> 5072017Scg 5172017Scg#define MAXVNODEUSE 32 5272017Scg 5372017Scg/* 5472017Scg * Name caching works as follows: 5572017Scg * 5672017Scg * Names found by directory scans are retained in a cache 5772017Scg * for future reference. It is managed LRU, so frequently 5872017Scg * used names will hang around. Cache is indexed by hash value 5972017Scg * obtained from (vp, name) where vp refers to the directory 6072017Scg * containing name. 6172017Scg * 6272017Scg * If it is a "negative" entry, (i.e. for a name that is known NOT to 6372017Scg * exist) the vnode pointer will be NULL. 6472017Scg * 6572017Scg * For simplicity (and economy of storage), names longer than 6672017Scg * a maximum length of NCHNAMLEN are not cached; they occur 6772017Scg * infrequently in any case, and are almost never of interest. 6874763Scg * 6974763Scg * Upon reaching the last segment of a path, if the reference 7072017Scg * is for DELETE, or NOCACHE is set (rewrite), and the 7172455Scg * name is located in the cache, it will be dropped. 7272455Scg */ 7372455Scg 7472017Scg/* 7572017Scg * Structures associated with name cacheing. 7672017Scg */ 7772017Scg#define NCHHASH(dvp, cnp) \ 7872017Scg (&nchashtbl[((dvp)->v_id + (cnp)->cn_hash) % nchash]) 7972017Scgstatic LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 8072017Scgstatic u_long nchash; /* size of hash table */ 8172017Scgstatic u_long numcache; /* number of cache entries allocated */ 8272017Scgstatic TAILQ_HEAD(, namecache) nclruhead; /* LRU chain */ 8372017Scgstruct nchstats nchstats; /* cache effectiveness statistics */ 8472017Scg 8572017Scgstatic int doingcache = 1; /* 1 => enable the cache */ 8672017ScgSYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 8772017Scg 8872017Scg#ifdef NCH_STATISTICS 8972017Scgu_long nchnbr; 9084771Sorion#define NCHNBR(ncp) (ncp)->nc_nbr = ++nchnbr; 9172017Scg#define NCHHIT(ncp) (ncp)->nc_hits++ 9272017Scg#else 9372017Scg#define NCHNBR(ncp) 9472017Scg#define NCHHIT(ncp) 9572017Scg#endif 9672017Scg 9772017Scg/* 9872017Scg * Delete an entry from its hash list and move it to the front 9972017Scg * of the LRU list for immediate reuse. 10072017Scg */ 10172017Scg#define PURGE(ncp) { \ 10272455Scg LIST_REMOVE(ncp, nc_hash); \ 10372017Scg ncp->nc_hash.le_prev = 0; \ 10472017Scg TAILQ_REMOVE(&nclruhead, ncp, nc_lru); \ 10572017Scg TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru); \ 10672017Scg} 10772017Scg 10872017Scg/* 10972017Scg * Move an entry that has been used to the tail of the LRU list 11072017Scg * so that it will be preserved for future use. 11172017Scg */ 11272017Scg#define TOUCH(ncp) { \ 11372017Scg if (ncp->nc_lru.tqe_next != 0) { \ 11472017Scg TAILQ_REMOVE(&nclruhead, ncp, nc_lru); \ 11572017Scg TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); \ 11672017Scg NCHNBR(ncp); \ 11772017Scg } \ 11872017Scg} 11972017Scg 12072017Scg/* 12172017Scg * Lookup an entry in the cache 12272017Scg * 12372017Scg * We don't do this if the segment name is long, simply so the cache 12472017Scg * can avoid holding long names (which would either waste space, or 12572017Scg * add greatly to the complexity). 12672017Scg * 12772017Scg * Lookup is called with dvp pointing to the directory to search, 12872017Scg * cnp pointing to the name of the entry being sought. If the lookup 12972017Scg * succeeds, the vnode is returned in *vpp, and a status of -1 is 13072017Scg * returned. If the lookup determines that the name does not exist 13172017Scg * (negative cacheing), a status of ENOENT is returned. If the lookup 13272017Scg * fails, a status of zero is returned. 13372017Scg */ 13472017Scg 13574763Scgint 13672017Scgcache_lookup(dvp, vpp, cnp) 13772017Scg struct vnode *dvp; 13872017Scg struct vnode **vpp; 13972017Scg struct componentname *cnp; 14072017Scg{ 14172017Scg register struct namecache *ncp, *nnp; 14272017Scg register struct nchashhead *ncpp; 14372017Scg 14472017Scg if (!doingcache) { 14572017Scg cnp->cn_flags &= ~MAKEENTRY; 14672017Scg return (0); 14772017Scg } 14872017Scg if (cnp->cn_namelen > NCHNAMLEN) { 14972017Scg nchstats.ncs_long++; 15072017Scg cnp->cn_flags &= ~MAKEENTRY; 15172017Scg return (0); 15272017Scg } 15372017Scg 15472017Scg ncpp = NCHHASH(dvp, cnp); 15572017Scg for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) { 15672017Scg nnp = ncp->nc_hash.le_next; 15772017Scg /* If one of the vp's went stale, don't bother anymore. */ 15872017Scg if ((ncp->nc_dvpid != ncp->nc_dvp->v_id) || 15972017Scg (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id)) { 16072017Scg nchstats.ncs_falsehits++; 16172017Scg PURGE(ncp); 16272017Scg continue; 16372017Scg } 16472017Scg /* Now that we know the vp's to be valid, is it ours ? */ 16572017Scg if (ncp->nc_dvp == dvp && 16672017Scg ncp->nc_nlen == cnp->cn_namelen && 16772017Scg !bcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen)) 16872017Scg break; 16972017Scg } 17072017Scg 17172017Scg /* We failed to find an entry */ 17272017Scg if (ncp == 0) { 17372017Scg nchstats.ncs_miss++; 17472017Scg return (0); 17572017Scg } 17672017Scg 17772017Scg NCHHIT(ncp); 17872017Scg 17972017Scg /* We don't want to have an entry, so dump it */ 18072017Scg if ((cnp->cn_flags & MAKEENTRY) == 0) { 18172017Scg nchstats.ncs_badhits++; 18272017Scg PURGE(ncp); 18372017Scg return (0); 18472017Scg } 18572017Scg 18672017Scg /* We found a "positive" match, return the vnode */ 18772017Scg if (ncp->nc_vp) { 18872017Scg nchstats.ncs_goodhits++; 18972017Scg TOUCH(ncp); 19072017Scg *vpp = ncp->nc_vp; 19172017Scg if ((*vpp)->v_usage < MAXVNODEUSE) 19272017Scg (*vpp)->v_usage++; 19372017Scg return (-1); 19472017Scg } 19572017Scg 19672017Scg /* We found a negative match, and want to create it, so purge */ 19772017Scg if (cnp->cn_nameiop == CREATE) { 19872017Scg nchstats.ncs_badhits++; 19972017Scg PURGE(ncp); 20072017Scg return (0); 20172017Scg } 20272017Scg 20372455Scg /* 20472017Scg * We found a "negative" match, ENOENT notifies client of this match. 20572017Scg * The nc_vpid field records whether this is a whiteout. 20672017Scg */ 20772017Scg nchstats.ncs_neghits++; 20872017Scg TOUCH(ncp); 20972017Scg cnp->cn_flags |= ncp->nc_vpid; 21072017Scg return (ENOENT); 21172017Scg} 21272017Scg 21372017Scg/* 21472017Scg * Add an entry to the cache. 21572017Scg */ 21672017Scgvoid 21772017Scgcache_enter(dvp, vp, cnp) 21872017Scg struct vnode *dvp; 21972017Scg struct vnode *vp; 22072017Scg struct componentname *cnp; 22172017Scg{ 22272017Scg register struct namecache *ncp; 22372017Scg register struct nchashhead *ncpp; 22472017Scg 22572017Scg if (!doingcache) 22672017Scg return; 22772017Scg 22872455Scg if (cnp->cn_namelen > NCHNAMLEN) { 22972017Scg printf("cache_enter: name too long"); 23072017Scg return; 23172017Scg } 23272017Scg 23372017Scg /* 23472017Scg * We allocate a new entry if we are less than the maximum 23572017Scg * allowed and the one at the front of the LRU list is in use. 23672455Scg * Otherwise we use the one at the front of the LRU list. 23772017Scg */ 23872017Scg if (numcache < desiredvnodes && 23972455Scg ((ncp = nclruhead.tqh_first) == NULL || 24072017Scg ncp->nc_hash.le_prev != 0)) { 24172017Scg /* Add one more entry */ 24272017Scg ncp = (struct namecache *) 24372017Scg malloc((u_long)sizeof *ncp, M_CACHE, M_WAITOK); 24472017Scg bzero((char *)ncp, sizeof *ncp); 24572017Scg numcache++; 24672017Scg } else if (ncp = nclruhead.tqh_first) { 24772017Scg /* reuse an old entry */ 24872017Scg TAILQ_REMOVE(&nclruhead, ncp, nc_lru); 24972017Scg if (ncp->nc_hash.le_prev != 0) { 25072017Scg LIST_REMOVE(ncp, nc_hash); 25172017Scg ncp->nc_hash.le_prev = 0; 25272455Scg } 25372017Scg } else { 25472017Scg /* give up */ 25572017Scg return; 25672017Scg } 25772455Scg 25872017Scg /* 25972017Scg * Fill in cache info, if vp is NULL this is a "negative" cache entry. 26072017Scg * For negative entries, we have to record whether it is a whiteout. 26172017Scg * the whiteout flag is stored in the nc_vpid field which is 26272455Scg * otherwise unused. 26372455Scg */ 26472017Scg ncp->nc_vp = vp; 26572017Scg if (vp) { 26672017Scg ncp->nc_vpid = vp->v_id; 26772017Scg if (vp->v_usage < MAXVNODEUSE) 26872017Scg ++vp->v_usage; 26972017Scg } else 27072017Scg ncp->nc_vpid = cnp->cn_flags & ISWHITEOUT; 27172017Scg ncp->nc_dvp = dvp; 27272017Scg ncp->nc_dvpid = dvp->v_id; 27372017Scg ncp->nc_nlen = cnp->cn_namelen; 27472017Scg bcopy(cnp->cn_nameptr, ncp->nc_name, (unsigned)ncp->nc_nlen); 27572017Scg TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); 27672017Scg ncpp = NCHHASH(dvp, cnp); 27772455Scg LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 27872455Scg} 27972017Scg 28072017Scg/* 28172017Scg * Name cache initialization, from vfs_init() when we are booting 28272017Scg */ 28372017Scgvoid 28472017Scgnchinit() 28572017Scg{ 28672455Scg 28772017Scg TAILQ_INIT(&nclruhead); 28872017Scg nchashtbl = phashinit(desiredvnodes, M_CACHE, &nchash); 28972017Scg} 29072017Scg 29172017Scg/* 29272455Scg * Invalidate all entries to particular vnode. 29372017Scg * 29472455Scg * We actually just increment the v_id, that will do it. The stale entries 29572017Scg * will be purged by lookup as they get found. If the v_id wraps around, we 29672017Scg * need to ditch the entire cache, to avoid confusion. No valid vnode will 29772017Scg * ever have (v_id == 0). 29872017Scg */ 29972017Scgvoid 30072017Scgcache_purge(vp) 30172017Scg struct vnode *vp; 30272017Scg{ 30372017Scg struct namecache *ncp; 30472017Scg struct nchashhead *ncpp; 30572017Scg static u_long nextvnodeid; 30672017Scg 30772017Scg vp->v_id = ++nextvnodeid; 30872017Scg if (nextvnodeid != 0) 30972017Scg return; 31072017Scg for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) { 31174763Scg while (ncp = ncpp->lh_first) 31272017Scg PURGE(ncp); 31372017Scg } 31472017Scg vp->v_id = ++nextvnodeid; 31572017Scg} 31672017Scg 31784771Sorion/* 31872017Scg * Flush all entries referencing a particular filesystem. 31972017Scg * 32072017Scg * Since we need to check it anyway, we will flush all the invalid 32172017Scg * entries at the same time. 32272017Scg */ 32372017Scgvoid 32472017Scgcache_purgevfs(mp) 32572017Scg struct mount *mp; 32672455Scg{ 32772017Scg struct nchashhead *ncpp; 32872017Scg struct namecache *ncp, *nnp; 32972017Scg 33072017Scg /* Scan hash tables for applicable entries */ 33172017Scg for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) { 33272017Scg for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) { 33372017Scg nnp = ncp->nc_hash.le_next; 33472017Scg if (ncp->nc_dvpid != ncp->nc_dvp->v_id || 33572017Scg (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id) || 33672017Scg ncp->nc_dvp->v_mount == mp) { 33772017Scg PURGE(ncp); 33872017Scg } 33972017Scg } 34072017Scg } 34184771Sorion} 34272455Scg