vfs_cache.c revision 23521
172017Scg/*
272017Scg * Copyright (c) 1989, 1993, 1995
372017Scg *	The Regents of the University of California.  All rights reserved.
472017Scg *
572017Scg * This code is derived from software contributed to Berkeley by
672017Scg * Poul-Henning Kamp of the FreeBSD Project.
772017Scg *
872017Scg * Redistribution and use in source and binary forms, with or without
972017Scg * modification, are permitted provided that the following conditions
1072017Scg * are met:
1172017Scg * 1. Redistributions of source code must retain the above copyright
1272017Scg *    notice, this list of conditions and the following disclaimer.
1372017Scg * 2. Redistributions in binary form must reproduce the above copyright
1472017Scg *    notice, this list of conditions and the following disclaimer in the
1572017Scg *    documentation and/or other materials provided with the distribution.
1672017Scg * 3. All advertising materials mentioning features or use of this software
1772017Scg *    must display the following acknowledgement:
1872017Scg *	This product includes software developed by the University of
1972017Scg *	California, Berkeley and its contributors.
2072017Scg * 4. Neither the name of the University nor the names of its contributors
2172017Scg *    may be used to endorse or promote products derived from this software
2272017Scg *    without specific prior written permission.
2372017Scg *
2472017Scg * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2572017Scg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2672017Scg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2772017Scg * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2872455Scg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2972455Scg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3072455Scg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3172017Scg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3272017Scg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3372017Scg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3472017Scg * SUCH DAMAGE.
35119287Simp *
36119287Simp *	@(#)vfs_cache.c	8.5 (Berkeley) 3/22/95
3772017Scg * $Id: vfs_cache.c,v 1.23 1997/02/22 09:39:31 peter Exp $
3872017Scg */
3972017Scg
4082180Scg#include <sys/param.h>
4182180Scg#include <sys/systm.h>
4284771Sorion#include <sys/kernel.h>
4372017Scg#include <sys/sysctl.h>
4472017Scg#include <sys/time.h>
4572017Scg#include <sys/mount.h>
4672017Scg#include <sys/vnode.h>
4772017Scg#include <sys/namei.h>
4872017Scg#include <sys/errno.h>
4972017Scg#include <sys/malloc.h>
5072017Scg
5172017Scg#define MAXVNODEUSE 32
5272017Scg
5372017Scg/*
5472017Scg * Name caching works as follows:
5572017Scg *
5672017Scg * Names found by directory scans are retained in a cache
5772017Scg * for future reference.  It is managed LRU, so frequently
5872017Scg * used names will hang around.  Cache is indexed by hash value
5972017Scg * obtained from (vp, name) where vp refers to the directory
6072017Scg * containing name.
6172017Scg *
6272017Scg * If it is a "negative" entry, (i.e. for a name that is known NOT to
6372017Scg * exist) the vnode pointer will be NULL.
6472017Scg *
6572017Scg * For simplicity (and economy of storage), names longer than
6672017Scg * a maximum length of NCHNAMLEN are not cached; they occur
6772017Scg * infrequently in any case, and are almost never of interest.
6874763Scg *
6974763Scg * Upon reaching the last segment of a path, if the reference
7072017Scg * is for DELETE, or NOCACHE is set (rewrite), and the
7172455Scg * name is located in the cache, it will be dropped.
7272455Scg */
7372455Scg
7472017Scg/*
7572017Scg * Structures associated with name cacheing.
7672017Scg */
7772017Scg#define NCHHASH(dvp, cnp) \
7872017Scg	(&nchashtbl[((dvp)->v_id + (cnp)->cn_hash) % nchash])
7972017Scgstatic LIST_HEAD(nchashhead, namecache) *nchashtbl;	/* Hash Table */
8072017Scgstatic u_long	nchash;			/* size of hash table */
8172017Scgstatic u_long	numcache;		/* number of cache entries allocated */
8272017Scgstatic TAILQ_HEAD(, namecache) nclruhead;	/* LRU chain */
8372017Scgstruct	nchstats nchstats;		/* cache effectiveness statistics */
8472017Scg
8572017Scgstatic int	doingcache = 1;		/* 1 => enable the cache */
8672017ScgSYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
8772017Scg
8872017Scg#ifdef NCH_STATISTICS
8972017Scgu_long	nchnbr;
9084771Sorion#define NCHNBR(ncp) (ncp)->nc_nbr = ++nchnbr;
9172017Scg#define NCHHIT(ncp) (ncp)->nc_hits++
9272017Scg#else
9372017Scg#define NCHNBR(ncp)
9472017Scg#define NCHHIT(ncp)
9572017Scg#endif
9672017Scg
9772017Scg/*
9872017Scg * Delete an entry from its hash list and move it to the front
9972017Scg * of the LRU list for immediate reuse.
10072017Scg */
10172017Scg#define PURGE(ncp)  {						\
10272455Scg	LIST_REMOVE(ncp, nc_hash);				\
10372017Scg	ncp->nc_hash.le_prev = 0;				\
10472017Scg	TAILQ_REMOVE(&nclruhead, ncp, nc_lru);			\
10572017Scg	TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);		\
10672017Scg}
10772017Scg
10872017Scg/*
10972017Scg * Move an entry that has been used to the tail of the LRU list
11072017Scg * so that it will be preserved for future use.
11172017Scg */
11272017Scg#define TOUCH(ncp)  {						\
11372017Scg	if (ncp->nc_lru.tqe_next != 0) {			\
11472017Scg		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);		\
11572017Scg		TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);	\
11672017Scg		NCHNBR(ncp);					\
11772017Scg	}							\
11872017Scg}
11972017Scg
12072017Scg/*
12172017Scg * Lookup an entry in the cache
12272017Scg *
12372017Scg * We don't do this if the segment name is long, simply so the cache
12472017Scg * can avoid holding long names (which would either waste space, or
12572017Scg * add greatly to the complexity).
12672017Scg *
12772017Scg * Lookup is called with dvp pointing to the directory to search,
12872017Scg * cnp pointing to the name of the entry being sought. If the lookup
12972017Scg * succeeds, the vnode is returned in *vpp, and a status of -1 is
13072017Scg * returned. If the lookup determines that the name does not exist
13172017Scg * (negative cacheing), a status of ENOENT is returned. If the lookup
13272017Scg * fails, a status of zero is returned.
13372017Scg */
13472017Scg
13574763Scgint
13672017Scgcache_lookup(dvp, vpp, cnp)
13772017Scg	struct vnode *dvp;
13872017Scg	struct vnode **vpp;
13972017Scg	struct componentname *cnp;
14072017Scg{
14172017Scg	register struct namecache *ncp, *nnp;
14272017Scg	register struct nchashhead *ncpp;
14372017Scg
14472017Scg	if (!doingcache) {
14572017Scg		cnp->cn_flags &= ~MAKEENTRY;
14672017Scg		return (0);
14772017Scg	}
14872017Scg	if (cnp->cn_namelen > NCHNAMLEN) {
14972017Scg		nchstats.ncs_long++;
15072017Scg		cnp->cn_flags &= ~MAKEENTRY;
15172017Scg		return (0);
15272017Scg	}
15372017Scg
15472017Scg	ncpp = NCHHASH(dvp, cnp);
15572017Scg	for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
15672017Scg		nnp = ncp->nc_hash.le_next;
15772017Scg		/* If one of the vp's went stale, don't bother anymore. */
15872017Scg		if ((ncp->nc_dvpid != ncp->nc_dvp->v_id) ||
15972017Scg		    (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id)) {
16072017Scg			nchstats.ncs_falsehits++;
16172017Scg			PURGE(ncp);
16272017Scg			continue;
16372017Scg		}
16472017Scg		/* Now that we know the vp's to be valid, is it ours ? */
16572017Scg		if (ncp->nc_dvp == dvp &&
16672017Scg		    ncp->nc_nlen == cnp->cn_namelen &&
16772017Scg		    !bcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
16872017Scg			break;
16972017Scg	}
17072017Scg
17172017Scg	/* We failed to find an entry */
17272017Scg	if (ncp == 0) {
17372017Scg		nchstats.ncs_miss++;
17472017Scg		return (0);
17572017Scg	}
17672017Scg
17772017Scg	NCHHIT(ncp);
17872017Scg
17972017Scg	/* We don't want to have an entry, so dump it */
18072017Scg	if ((cnp->cn_flags & MAKEENTRY) == 0) {
18172017Scg		nchstats.ncs_badhits++;
18272017Scg		PURGE(ncp);
18372017Scg		return (0);
18472017Scg	}
18572017Scg
18672017Scg	/* We found a "positive" match, return the vnode */
18772017Scg        if (ncp->nc_vp) {
18872017Scg		nchstats.ncs_goodhits++;
18972017Scg		TOUCH(ncp);
19072017Scg		*vpp = ncp->nc_vp;
19172017Scg		if ((*vpp)->v_usage < MAXVNODEUSE)
19272017Scg			(*vpp)->v_usage++;
19372017Scg		return (-1);
19472017Scg	}
19572017Scg
19672017Scg	/* We found a negative match, and want to create it, so purge */
19772017Scg	if (cnp->cn_nameiop == CREATE) {
19872017Scg		nchstats.ncs_badhits++;
19972017Scg		PURGE(ncp);
20072017Scg		return (0);
20172017Scg	}
20272017Scg
20372455Scg	/*
20472017Scg	 * We found a "negative" match, ENOENT notifies client of this match.
20572017Scg	 * The nc_vpid field records whether this is a whiteout.
20672017Scg	 */
20772017Scg	nchstats.ncs_neghits++;
20872017Scg	TOUCH(ncp);
20972017Scg	cnp->cn_flags |= ncp->nc_vpid;
21072017Scg	return (ENOENT);
21172017Scg}
21272017Scg
21372017Scg/*
21472017Scg * Add an entry to the cache.
21572017Scg */
21672017Scgvoid
21772017Scgcache_enter(dvp, vp, cnp)
21872017Scg	struct vnode *dvp;
21972017Scg	struct vnode *vp;
22072017Scg	struct componentname *cnp;
22172017Scg{
22272017Scg	register struct namecache *ncp;
22372017Scg	register struct nchashhead *ncpp;
22472017Scg
22572017Scg	if (!doingcache)
22672017Scg		return;
22772017Scg
22872455Scg	if (cnp->cn_namelen > NCHNAMLEN) {
22972017Scg		printf("cache_enter: name too long");
23072017Scg		return;
23172017Scg	}
23272017Scg
23372017Scg	/*
23472017Scg	 * We allocate a new entry if we are less than the maximum
23572017Scg	 * allowed and the one at the front of the LRU list is in use.
23672455Scg	 * Otherwise we use the one at the front of the LRU list.
23772017Scg	 */
23872017Scg	if (numcache < desiredvnodes &&
23972455Scg	    ((ncp = nclruhead.tqh_first) == NULL ||
24072017Scg	    ncp->nc_hash.le_prev != 0)) {
24172017Scg		/* Add one more entry */
24272017Scg		ncp = (struct namecache *)
24372017Scg			malloc((u_long)sizeof *ncp, M_CACHE, M_WAITOK);
24472017Scg		bzero((char *)ncp, sizeof *ncp);
24572017Scg		numcache++;
24672017Scg	} else if (ncp = nclruhead.tqh_first) {
24772017Scg		/* reuse an old entry */
24872017Scg		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
24972017Scg		if (ncp->nc_hash.le_prev != 0) {
25072017Scg			LIST_REMOVE(ncp, nc_hash);
25172017Scg			ncp->nc_hash.le_prev = 0;
25272455Scg		}
25372017Scg	} else {
25472017Scg		/* give up */
25572017Scg		return;
25672017Scg	}
25772455Scg
25872017Scg	/*
25972017Scg	 * Fill in cache info, if vp is NULL this is a "negative" cache entry.
26072017Scg	 * For negative entries, we have to record whether it is a whiteout.
26172017Scg	 * the whiteout flag is stored in the nc_vpid field which is
26272455Scg	 * otherwise unused.
26372455Scg	 */
26472017Scg	ncp->nc_vp = vp;
26572017Scg	if (vp) {
26672017Scg		ncp->nc_vpid = vp->v_id;
26772017Scg		if (vp->v_usage < MAXVNODEUSE)
26872017Scg			++vp->v_usage;
26972017Scg	} else
27072017Scg		ncp->nc_vpid = cnp->cn_flags & ISWHITEOUT;
27172017Scg	ncp->nc_dvp = dvp;
27272017Scg	ncp->nc_dvpid = dvp->v_id;
27372017Scg	ncp->nc_nlen = cnp->cn_namelen;
27472017Scg	bcopy(cnp->cn_nameptr, ncp->nc_name, (unsigned)ncp->nc_nlen);
27572017Scg	TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
27672017Scg	ncpp = NCHHASH(dvp, cnp);
27772455Scg	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
27872455Scg}
27972017Scg
28072017Scg/*
28172017Scg * Name cache initialization, from vfs_init() when we are booting
28272017Scg */
28372017Scgvoid
28472017Scgnchinit()
28572017Scg{
28672455Scg
28772017Scg	TAILQ_INIT(&nclruhead);
28872017Scg	nchashtbl = phashinit(desiredvnodes, M_CACHE, &nchash);
28972017Scg}
29072017Scg
29172017Scg/*
29272455Scg * Invalidate all entries to particular vnode.
29372017Scg *
29472455Scg * We actually just increment the v_id, that will do it. The stale entries
29572017Scg * will be purged by lookup as they get found. If the v_id wraps around, we
29672017Scg * need to ditch the entire cache, to avoid confusion. No valid vnode will
29772017Scg * ever have (v_id == 0).
29872017Scg */
29972017Scgvoid
30072017Scgcache_purge(vp)
30172017Scg	struct vnode *vp;
30272017Scg{
30372017Scg	struct namecache *ncp;
30472017Scg	struct nchashhead *ncpp;
30572017Scg	static u_long nextvnodeid;
30672017Scg
30772017Scg	vp->v_id = ++nextvnodeid;
30872017Scg	if (nextvnodeid != 0)
30972017Scg		return;
31072017Scg	for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) {
31174763Scg		while (ncp = ncpp->lh_first)
31272017Scg			PURGE(ncp);
31372017Scg	}
31472017Scg	vp->v_id = ++nextvnodeid;
31572017Scg}
31672017Scg
31784771Sorion/*
31872017Scg * Flush all entries referencing a particular filesystem.
31972017Scg *
32072017Scg * Since we need to check it anyway, we will flush all the invalid
32172017Scg * entries at the same time.
32272017Scg */
32372017Scgvoid
32472017Scgcache_purgevfs(mp)
32572017Scg	struct mount *mp;
32672455Scg{
32772017Scg	struct nchashhead *ncpp;
32872017Scg	struct namecache *ncp, *nnp;
32972017Scg
33072017Scg	/* Scan hash tables for applicable entries */
33172017Scg	for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) {
33272017Scg		for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
33372017Scg			nnp = ncp->nc_hash.le_next;
33472017Scg			if (ncp->nc_dvpid != ncp->nc_dvp->v_id ||
33572017Scg			    (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id) ||
33672017Scg			    ncp->nc_dvp->v_mount == mp) {
33772017Scg				PURGE(ncp);
33872017Scg			}
33972017Scg		}
34072017Scg	}
34184771Sorion}
34272455Scg