vfs_cache.c revision 22521
1/*
2 * Copyright (c) 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Poul-Henning Kamp of the FreeBSD Project.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	@(#)vfs_cache.c	8.3 (Berkeley) 8/22/94
37 * $FreeBSD: head/sys/kern/vfs_cache.c 22521 1997-02-10 02:22:35Z dyson $
38 */
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/kernel.h>
43#include <sys/sysctl.h>
44#include <sys/time.h>
45#include <sys/mount.h>
46#include <sys/vnode.h>
47#include <sys/namei.h>
48#include <sys/errno.h>
49#include <sys/malloc.h>
50
51#define MAXVNODEUSE 32
52
53/*
54 * Name caching works as follows:
55 *
56 * Names found by directory scans are retained in a cache
57 * for future reference.  It is managed LRU, so frequently
58 * used names will hang around.  Cache is indexed by hash value
59 * obtained from (vp, name) where vp refers to the directory
60 * containing name.
61 *
62 * If it is a "negative" entry, (i.e. for a name that is known NOT to
63 * exist) the vnode pointer will be NULL.
64 *
65 * For simplicity (and economy of storage), names longer than
66 * a maximum length of NCHNAMLEN are not cached; they occur
67 * infrequently in any case, and are almost never of interest.
68 *
69 * Upon reaching the last segment of a path, if the reference
70 * is for DELETE, or NOCACHE is set (rewrite), and the
71 * name is located in the cache, it will be dropped.
72 */
73
74/*
75 * Structures associated with name cacheing.
76 */
77#define NCHHASH(dvp, cnp) \
78	(&nchashtbl[((dvp)->v_id + (cnp)->cn_hash) & nchash])
79static LIST_HEAD(nchashhead, namecache) *nchashtbl;	/* Hash Table */
80static u_long nchash;			/* size of hash table - 1 */
81static int doingcache = 1;			/* 1 => enable the cache */
82SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
83static u_long numcache;			/* number of cache entries allocated */
84static TAILQ_HEAD(, namecache) nclruhead;	/* LRU chain */
85struct	nchstats nchstats;		/* cache effectiveness statistics */
86
87#ifdef NCH_STATISTICS
88u_long	nchnbr;
89#define NCHNBR(ncp) (ncp)->nc_nbr = ++nchnbr;
90#define NCHHIT(ncp) (ncp)->nc_hits++
91#else
92#define NCHNBR(ncp)
93#define NCHHIT(ncp)
94#endif
95
96/*
97 * Delete an entry from its hash list and move it to the front
98 * of the LRU list for immediate reuse.
99 */
100#define PURGE(ncp)  {						\
101	LIST_REMOVE(ncp, nc_hash);				\
102	ncp->nc_hash.le_prev = 0;				\
103	TAILQ_REMOVE(&nclruhead, ncp, nc_lru);			\
104	TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);		\
105}
106
107/*
108 * Move an entry that has been used to the tail of the LRU list
109 * so that it will be preserved for future use.
110 */
111#define TOUCH(ncp)  {						\
112	if (ncp->nc_lru.tqe_next != 0) {			\
113		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);		\
114		TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);	\
115		NCHNBR(ncp);					\
116	}							\
117}
118
119/*
120 * Lookup an entry in the cache
121 *
122 * We don't do this if the segment name is long, simply so the cache
123 * can avoid holding long names (which would either waste space, or
124 * add greatly to the complexity).
125 *
126 * Lookup is called with dvp pointing to the directory to search,
127 * cnp pointing to the name of the entry being sought. If the lookup
128 * succeeds, the vnode is returned in *vpp, and a status of -1 is
129 * returned. If the lookup determines that the name does not exist
130 * (negative cacheing), a status of ENOENT is returned. If the lookup
131 * fails, a status of zero is returned.
132 */
133
134int
135cache_lookup(dvp, vpp, cnp)
136	struct vnode *dvp;
137	struct vnode **vpp;
138	struct componentname *cnp;
139{
140	register struct namecache *ncp, *nnp;
141	register struct nchashhead *ncpp;
142
143	if (!doingcache) {
144		cnp->cn_flags &= ~MAKEENTRY;
145		return (0);
146	}
147
148	if (cnp->cn_namelen > NCHNAMLEN) {
149		nchstats.ncs_long++;
150		cnp->cn_flags &= ~MAKEENTRY;
151		return (0);
152	}
153
154	ncpp = NCHHASH(dvp, cnp);
155	for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
156		nnp = ncp->nc_hash.le_next;
157		/* If one of the vp's went stale, don't bother anymore. */
158		if ((ncp->nc_dvpid != ncp->nc_dvp->v_id) ||
159		    (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id)) {
160			nchstats.ncs_falsehits++;
161			PURGE(ncp);
162			continue;
163		}
164		/* Now that we know the vp's to be valid, is it ours ? */
165		if (ncp->nc_dvp == dvp &&
166		    ncp->nc_nlen == cnp->cn_namelen &&
167		    !bcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
168			break;
169	}
170
171	/* We failed to find an entry */
172	if (ncp == 0) {
173		nchstats.ncs_miss++;
174		return (0);
175	}
176
177	NCHHIT(ncp);
178
179	/* We don't want to have an entry, so dump it */
180	if ((cnp->cn_flags & MAKEENTRY) == 0) {
181		nchstats.ncs_badhits++;
182		PURGE(ncp);
183		return (0);
184	}
185
186	/* We found a "positive" match, return the vnode */
187        if (ncp->nc_vp) {
188		nchstats.ncs_goodhits++;
189		TOUCH(ncp);
190		*vpp = ncp->nc_vp;
191		if ((*vpp)->v_usage < MAXVNODEUSE)
192			(*vpp)->v_usage++;
193		return (-1);
194	}
195
196	/* We found a negative match, and want to create it, so purge */
197	if (cnp->cn_nameiop == CREATE) {
198		nchstats.ncs_badhits++;
199		PURGE(ncp);
200		return (0);
201	}
202
203	/*
204	 * We found a "negative" match, ENOENT notifies client of this match.
205	 * The nc_vpid field records whether this is a whiteout.
206	 */
207	nchstats.ncs_neghits++;
208	TOUCH(ncp);
209	cnp->cn_flags |= ncp->nc_vpid;
210	return (ENOENT);
211}
212
213/*
214 * Add an entry to the cache.
215 */
216void
217cache_enter(dvp, vp, cnp)
218	struct vnode *dvp;
219	struct vnode *vp;
220	struct componentname *cnp;
221{
222	register struct namecache *ncp;
223	register struct nchashhead *ncpp;
224
225	if (!doingcache)
226		return;
227
228#ifdef DIAGNOSTIC
229	if (cnp->cn_namelen > NCHNAMLEN) {
230		printf("cache_enter: name too long");
231		return;
232	}
233#endif
234
235	/*
236	 * We allocate a new entry if we are less than the maximum
237	 * allowed and the one at the front of the LRU list is in use.
238	 * Otherwise we use the one at the front of the LRU list.
239	 */
240	if (numcache < desiredvnodes &&
241	    ((ncp = nclruhead.tqh_first) == NULL ||
242	    ncp->nc_hash.le_prev != 0)) {
243		/* Add one more entry */
244		ncp = (struct namecache *)
245			malloc((u_long)sizeof *ncp, M_CACHE, M_WAITOK);
246		bzero((char *)ncp, sizeof *ncp);
247		numcache++;
248	} else if (ncp = nclruhead.tqh_first) {
249		/* reuse an old entry */
250		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
251		if (ncp->nc_hash.le_prev != 0) {
252			LIST_REMOVE(ncp, nc_hash);
253			ncp->nc_hash.le_prev = 0;
254		}
255	} else {
256		/* give up */
257		return;
258	}
259	/*
260	 * Fill in cache info, if vp is NULL this is a "negative" cache entry.
261	 * For negative entries, we have to record whether it is a whiteout.
262	 * the whiteout flag is stored in the nc_vpid field which is
263	 * otherwise unused.
264	 */
265	ncp->nc_vp = vp;
266	if (vp) {
267		ncp->nc_vpid = vp->v_id;
268		if (vp->v_usage < MAXVNODEUSE)
269			++vp->v_usage;
270	} else
271		ncp->nc_vpid = cnp->cn_flags & ISWHITEOUT;
272	ncp->nc_dvp = dvp;
273	ncp->nc_dvpid = dvp->v_id;
274	ncp->nc_nlen = cnp->cn_namelen;
275	bcopy(cnp->cn_nameptr, ncp->nc_name, (unsigned)ncp->nc_nlen);
276	TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
277	ncpp = NCHHASH(dvp, cnp);
278	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
279}
280
281/*
282 * Name cache initialization, from vfs_init() when we are booting
283 */
284void
285nchinit()
286{
287	TAILQ_INIT(&nclruhead);
288	nchashtbl = phashinit(desiredvnodes, M_CACHE, &nchash);
289}
290
291/*
292 * Invalidate all entries to particular vnode.
293 *
294 * We actually just increment the v_id, that will do it. The stale entries
295 * will be purged by lookup as they get found. If the v_id wraps around, we
296 * need to ditch the entire cache, to avoid confusion. No valid vnode will
297 * ever have (v_id == 0).
298 */
299void
300cache_purge(vp)
301	struct vnode *vp;
302{
303	struct namecache *ncp;
304	struct nchashhead *ncpp;
305	static u_long nextvnodeid;
306
307	vp->v_id = ++nextvnodeid;
308	if (nextvnodeid != 0)
309		return;
310	for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) {
311		while (ncp = ncpp->lh_first)
312			PURGE(ncp);
313	}
314	vp->v_id = ++nextvnodeid;
315}
316
317/*
318 * Flush all entries referencing a particular filesystem.
319 *
320 * Since we need to check it anyway, we will flush all the invalid
321 * entries at the same time.
322 */
323void
324cache_purgevfs(mp)
325	struct mount *mp;
326{
327	struct nchashhead *ncpp;
328	struct namecache *ncp, *nnp;
329
330	/* Scan hash tables for applicable entries */
331	for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) {
332		for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
333			nnp = ncp->nc_hash.le_next;
334			if (ncp->nc_dvpid != ncp->nc_dvp->v_id ||
335			    (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id) ||
336			    ncp->nc_dvp->v_mount == mp) {
337				PURGE(ncp);
338			}
339		}
340	}
341}
342