vfs_cache.c revision 29094
168651Skris/*
268651Skris * Copyright (c) 1989, 1993, 1995
368651Skris *	The Regents of the University of California.  All rights reserved.
468651Skris *
5290207Sjkim * This code is derived from software contributed to Berkeley by
668651Skris * Poul-Henning Kamp of the FreeBSD Project.
768651Skris *
868651Skris * Redistribution and use in source and binary forms, with or without
968651Skris * modification, are permitted provided that the following conditions
1068651Skris * are met:
1168651Skris * 1. Redistributions of source code must retain the above copyright
1268651Skris *    notice, this list of conditions and the following disclaimer.
1368651Skris * 2. Redistributions in binary form must reproduce the above copyright
1468651Skris *    notice, this list of conditions and the following disclaimer in the
1568651Skris *    documentation and/or other materials provided with the distribution.
16109998Smarkm * 3. All advertising materials mentioning features or use of this software
1768651Skris *    must display the following acknowledgement:
18109998Smarkm *	This product includes software developed by the University of
19109998Smarkm *	California, Berkeley and its contributors.
20109998Smarkm * 4. Neither the name of the University nor the names of its contributors
21109998Smarkm *    may be used to endorse or promote products derived from this software
22109998Smarkm *    without specific prior written permission.
23109998Smarkm *
24295009Sjkim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25295009Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2668651Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27109998Smarkm * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2868651Skris * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29109998Smarkm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30109998Smarkm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3168651Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3268651Skris * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3368651Skris * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3468651Skris * SUCH DAMAGE.
3568651Skris *
36109998Smarkm *	@(#)vfs_cache.c	8.5 (Berkeley) 3/22/95
3768651Skris * $Id: vfs_cache.c,v 1.30 1997/09/03 09:20:17 phk Exp $
3868651Skris */
3968651Skris
4068651Skris#include <sys/param.h>
4168651Skris#include <sys/systm.h>
4268651Skris#include <sys/kernel.h>
4368651Skris#include <sys/sysctl.h>
4468651Skris#include <sys/mount.h>
4568651Skris#include <sys/vnode.h>
4668651Skris#include <sys/namei.h>
4768651Skris#include <sys/malloc.h>
4868651Skris
4968651Skris
5068651Skris/*
5168651Skris * Name caching works as follows:
5268651Skris *
5368651Skris * Names found by directory scans are retained in a cache
5468651Skris * for future reference.  It is managed LRU, so frequently
5568651Skris * used names will hang around.  Cache is indexed by hash value
5668651Skris * obtained from (vp, name) where vp refers to the directory
5768651Skris * containing name.
5868651Skris *
59109998Smarkm * If it is a "negative" entry, (i.e. for a name that is known NOT to
6068651Skris * exist) the vnode pointer will be NULL.
6168651Skris *
6268651Skris * Upon reaching the last segment of a path, if the reference
6368651Skris * is for DELETE, or NOCACHE is set (rewrite), and the
6468651Skris * name is located in the cache, it will be dropped.
6568651Skris */
6668651Skris
6768651Skris/*
6868651Skris * Structures associated with name cacheing.
6968651Skris */
7068651Skris#define NCHHASH(dvp, cnp) \
7168651Skris	(&nchashtbl[((dvp)->v_id + (cnp)->cn_hash) & nchash])
7268651Skrisstatic LIST_HEAD(nchashhead, namecache) *nchashtbl;	/* Hash Table */
7368651Skrisstatic TAILQ_HEAD(, namecache) ncneg;	/* Hash Table */
7468651Skrisstatic u_long	nchash;			/* size of hash table */
7568651SkrisSYSCTL_INT(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "");
7668651Skrisstatic u_long	ncnegfactor = 16;	/* ratio of negative entries */
7768651SkrisSYSCTL_INT(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "");
7868651Skrisstatic u_long	numneg;		/* number of cache entries allocated */
7968651SkrisSYSCTL_INT(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "");
8068651Skrisstatic u_long	numcache;		/* number of cache entries allocated */
8168651SkrisSYSCTL_INT(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
8268651Skrisstruct	nchstats nchstats;		/* cache effectiveness statistics */
8368651Skris
8468651Skrisstatic int	doingcache = 1;		/* 1 => enable the cache */
8568651SkrisSYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
8668651SkrisSYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), "");
8768651SkrisSYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), "");
8868651Skris
8968651Skrisstatic void cache_zap __P((struct namecache *ncp));
9068651Skris
9168651Skris/*
92109998Smarkm * Flags in namecache.nc_flag
93109998Smarkm */
94109998Smarkm#define NCF_WHITE	1
9568651Skris/*
9668651Skris * Delete an entry from its hash list and move it to the front
9768651Skris * of the LRU list for immediate reuse.
9868651Skris */
9968651Skrisstatic void
10068651Skriscache_zap(ncp)
10168651Skris	struct namecache *ncp;
10268651Skris{
10368651Skris	LIST_REMOVE(ncp, nc_hash);
10468651Skris	LIST_REMOVE(ncp, nc_src);
10568651Skris	if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src))
10668651Skris		vdrop(ncp->nc_dvp);
10768651Skris	if (ncp->nc_vp) {
10868651Skris		TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
10968651Skris	} else {
11068651Skris		TAILQ_REMOVE(&ncneg, ncp, nc_dst);
11168651Skris		numneg--;
11268651Skris	}
11368651Skris	numcache--;
11468651Skris	free(ncp, M_CACHE);
11568651Skris}
11668651Skris
11768651Skris/*
11868651Skris * Lookup an entry in the cache
11968651Skris *
12068651Skris * We don't do this if the segment name is long, simply so the cache
12168651Skris * can avoid holding long names (which would either waste space, or
12268651Skris * add greatly to the complexity).
12368651Skris *
12468651Skris * Lookup is called with dvp pointing to the directory to search,
12568651Skris * cnp pointing to the name of the entry being sought. If the lookup
12668651Skris * succeeds, the vnode is returned in *vpp, and a status of -1 is
12768651Skris * returned. If the lookup determines that the name does not exist
12868651Skris * (negative cacheing), a status of ENOENT is returned. If the lookup
12968651Skris * fails, a status of zero is returned.
13068651Skris */
131109998Smarkm
132109998Smarkmint
133109998Smarkmcache_lookup(dvp, vpp, cnp)
134109998Smarkm	struct vnode *dvp;
135109998Smarkm	struct vnode **vpp;
13668651Skris	struct componentname *cnp;
13768651Skris{
13868651Skris	register struct namecache *ncp, *nnp;
13968651Skris	register struct nchashhead *ncpp;
14068651Skris
14168651Skris	if (!doingcache) {
14268651Skris		cnp->cn_flags &= ~MAKEENTRY;
14368651Skris		return (0);
14468651Skris	}
14568651Skris
14668651Skris	if (cnp->cn_nameptr[0] == '.') {
14768651Skris		if (cnp->cn_namelen == 1) {
14868651Skris			*vpp = dvp;
14968651Skris			return (-1);
15068651Skris		}
15168651Skris		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
15268651Skris			if (dvp->v_dd->v_id != dvp->v_ddid ||
15368651Skris			    (cnp->cn_flags & MAKEENTRY) == 0) {
15468651Skris				dvp->v_ddid = 0;
15568651Skris				return (0);
15668651Skris			}
15768651Skris			*vpp = dvp->v_dd;
15868651Skris			return (-1);
15968651Skris		}
16068651Skris	}
16168651Skris
16268651Skris	LIST_FOREACH(ncp, (NCHHASH(dvp, cnp)), nc_hash) {
16368651Skris		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
16468651Skris		    !bcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
16568651Skris			break;
16668651Skris	}
16768651Skris
16868651Skris	/* We failed to find an entry */
16968651Skris	if (ncp == 0) {
17068651Skris		nchstats.ncs_miss++;
17168651Skris		return (0);
17268651Skris	}
17368651Skris
17468651Skris	/* We don't want to have an entry, so dump it */
17568651Skris	if ((cnp->cn_flags & MAKEENTRY) == 0) {
17668651Skris		nchstats.ncs_badhits++;
17768651Skris		cache_zap(ncp);
17868651Skris		return (0);
17968651Skris	}
18068651Skris
18168651Skris	/* We found a "positive" match, return the vnode */
18268651Skris        if (ncp->nc_vp) {
18368651Skris		nchstats.ncs_goodhits++;
18468651Skris		*vpp = ncp->nc_vp;
18568651Skris		return (-1);
18668651Skris	}
18768651Skris
18868651Skris	/* We found a negative match, and want to create it, so purge */
18968651Skris	if (cnp->cn_nameiop == CREATE) {
19068651Skris		nchstats.ncs_badhits++;
19168651Skris		cache_zap(ncp);
19268651Skris		return (0);
193	}
194
195	/*
196	 * We found a "negative" match, ENOENT notifies client of this match.
197	 * The nc_vpid field records whether this is a whiteout.
198	 */
199	TAILQ_REMOVE(&ncneg, ncp, nc_dst);
200	TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
201	nchstats.ncs_neghits++;
202	if (ncp->nc_flag & NCF_WHITE)
203		cnp->cn_flags |= ISWHITEOUT;
204	return (ENOENT);
205}
206
207/*
208 * Add an entry to the cache.
209 */
210void
211cache_enter(dvp, vp, cnp)
212	struct vnode *dvp;
213	struct vnode *vp;
214	struct componentname *cnp;
215{
216	register struct namecache *ncp;
217	register struct nchashhead *ncpp;
218
219	if (!doingcache)
220		return;
221
222	if (cnp->cn_nameptr[0] == '.') {
223		if (cnp->cn_namelen == 1) {
224			return;
225		}
226		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
227			if (vp) {
228				dvp->v_dd = vp;
229				dvp->v_ddid = vp->v_id;
230			} else {
231				dvp->v_dd = dvp;
232				dvp->v_ddid = 0;
233			}
234			return;
235		}
236	}
237
238	ncp = (struct namecache *)
239		malloc(sizeof *ncp + cnp->cn_namelen, M_CACHE, M_WAITOK);
240	bzero((char *)ncp, sizeof *ncp);
241	numcache++;
242	if (!vp) {
243		numneg++;
244		ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0;
245	} else if (vp->v_type == VDIR) {
246		vp->v_dd = dvp;
247		vp->v_ddid = dvp->v_id;
248	}
249
250	/*
251	 * Fill in cache info, if vp is NULL this is a "negative" cache entry.
252	 * For negative entries, we have to record whether it is a whiteout.
253	 * the whiteout flag is stored in the nc_vpid field which is
254	 * otherwise unused.
255	 */
256	ncp->nc_vp = vp;
257	ncp->nc_dvp = dvp;
258	ncp->nc_nlen = cnp->cn_namelen;
259	bcopy(cnp->cn_nameptr, ncp->nc_name, (unsigned)ncp->nc_nlen);
260	ncpp = NCHHASH(dvp, cnp);
261	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
262	if (LIST_EMPTY(&dvp->v_cache_src))
263		vhold(dvp);
264	LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
265	if (vp) {
266		TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
267	} else {
268		TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
269	}
270	if (numneg*ncnegfactor > numcache) {
271		ncp = TAILQ_FIRST(&ncneg);
272		cache_zap(ncp);
273	}
274}
275
276/*
277 * Name cache initialization, from vfs_init() when we are booting
278 */
279void
280nchinit()
281{
282
283	TAILQ_INIT(&ncneg);
284	nchashtbl = hashinit(desiredvnodes*2, M_CACHE, &nchash);
285}
286
287/*
288 * Invalidate all entries to particular vnode.
289 *
290 * We actually just increment the v_id, that will do it. The stale entries
291 * will be purged by lookup as they get found. If the v_id wraps around, we
292 * need to ditch the entire cache, to avoid confusion. No valid vnode will
293 * ever have (v_id == 0).
294 */
295void
296cache_purge(vp)
297	struct vnode *vp;
298{
299	struct namecache *ncp;
300	struct nchashhead *ncpp;
301	static u_long nextid;
302
303	while (!LIST_EMPTY(&vp->v_cache_src))
304		cache_zap(LIST_FIRST(&vp->v_cache_src));
305	while (!TAILQ_EMPTY(&vp->v_cache_dst))
306		cache_zap(TAILQ_FIRST(&vp->v_cache_dst));
307
308	nextid++;
309	while (nextid == vp->v_id || !nextid)
310		continue;
311	vp->v_id = nextid;
312	vp->v_dd = vp;
313	vp->v_ddid = 0;
314}
315
316/*
317 * Flush all entries referencing a particular filesystem.
318 *
319 * Since we need to check it anyway, we will flush all the invalid
320 * entries at the same time.
321 */
322void
323cache_purgevfs(mp)
324	struct mount *mp;
325{
326	struct nchashhead *ncpp;
327	struct namecache *ncp, *nnp;
328
329	/* Scan hash tables for applicable entries */
330	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
331		for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) {
332			nnp = LIST_NEXT(ncp, nc_hash);
333			if (ncp->nc_dvp->v_mount == mp) {
334				cache_zap(ncp);
335			}
336		}
337	}
338}
339
340/*
341 * Perform canonical checks and cache lookup and pass on to filesystem
342 * through the vop_cachedlookup only if needed.
343 */
344
345int
346vfs_cache_lookup(ap)
347	struct vop_lookup_args /* {
348		struct vnode *a_dvp;
349		struct vnode **a_vpp;
350		struct componentname *a_cnp;
351	} */ *ap;
352{
353	struct vnode *vdp;
354	struct vnode *pdp;
355	int lockparent;
356	int error;
357	struct vnode **vpp = ap->a_vpp;
358	struct componentname *cnp = ap->a_cnp;
359	struct ucred *cred = cnp->cn_cred;
360	int flags = cnp->cn_flags;
361	struct proc *p = cnp->cn_proc;
362	u_long vpid;	/* capability number of vnode */
363
364	*vpp = NULL;
365	vdp = ap->a_dvp;
366	lockparent = flags & LOCKPARENT;
367
368	if (vdp->v_type != VDIR)
369                return (ENOTDIR);
370
371	if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) &&
372	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
373		return (EROFS);
374
375	error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc);
376
377	if (error)
378		return (error);
379
380	error = cache_lookup(vdp, vpp, cnp);
381
382	if (!error)
383		return (VCALL(vdp, VOFFSET(vop_cachedlookup),
384		    (struct vop_cachedlookup_args *)ap));
385
386	if (error == ENOENT)
387		return (error);
388
389	pdp = vdp;
390	vdp = *vpp;
391	vpid = vdp->v_id;
392	if (pdp == vdp) {   /* lookup on "." */
393		VREF(vdp);
394		error = 0;
395	} else if (flags & ISDOTDOT) {
396		VOP_UNLOCK(pdp, 0, p);
397		error = vget(vdp, LK_EXCLUSIVE, p);
398		if (!error && lockparent && (flags & ISLASTCN))
399			error = vn_lock(pdp, LK_EXCLUSIVE, p);
400	} else {
401		error = vget(vdp, LK_EXCLUSIVE, p);
402		if (!lockparent || error || !(flags & ISLASTCN))
403			VOP_UNLOCK(pdp, 0, p);
404	}
405	/*
406	 * Check that the capability number did not change
407	 * while we were waiting for the lock.
408	 */
409	if (!error) {
410		if (vpid == vdp->v_id)
411			return (0);
412		vput(vdp);
413		if (lockparent && pdp != vdp && (flags & ISLASTCN))
414			VOP_UNLOCK(pdp, 0, p);
415	}
416	error = vn_lock(pdp, LK_EXCLUSIVE, p);
417	if (error)
418		return (error);
419	return (VCALL(vdp, VOFFSET(vop_cachedlookup),
420	    (struct vop_cachedlookup_args *)ap));
421}
422