vfs_cache.c revision 84249
11541Srgrimes/*
222521Sdyson * Copyright (c) 1989, 1993, 1995
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
522521Sdyson * This code is derived from software contributed to Berkeley by
622521Sdyson * Poul-Henning Kamp of the FreeBSD Project.
722521Sdyson *
81541Srgrimes * Redistribution and use in source and binary forms, with or without
91541Srgrimes * modification, are permitted provided that the following conditions
101541Srgrimes * are met:
111541Srgrimes * 1. Redistributions of source code must retain the above copyright
121541Srgrimes *    notice, this list of conditions and the following disclaimer.
131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer in the
151541Srgrimes *    documentation and/or other materials provided with the distribution.
161541Srgrimes * 3. All advertising materials mentioning features or use of this software
171541Srgrimes *    must display the following acknowledgement:
181541Srgrimes *	This product includes software developed by the University of
191541Srgrimes *	California, Berkeley and its contributors.
201541Srgrimes * 4. Neither the name of the University nor the names of its contributors
211541Srgrimes *    may be used to endorse or promote products derived from this software
221541Srgrimes *    without specific prior written permission.
231541Srgrimes *
241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
271541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
341541Srgrimes * SUCH DAMAGE.
351541Srgrimes *
3623521Sbde *	@(#)vfs_cache.c	8.5 (Berkeley) 3/22/95
3750477Speter * $FreeBSD: head/sys/kern/vfs_cache.c 84249 2001-10-01 04:33:35Z dillon $
381541Srgrimes */
391541Srgrimes
401541Srgrimes#include <sys/param.h>
411541Srgrimes#include <sys/systm.h>
4212820Sphk#include <sys/kernel.h>
4376166Smarkm#include <sys/lock.h>
4412820Sphk#include <sys/sysctl.h>
451541Srgrimes#include <sys/mount.h>
461541Srgrimes#include <sys/vnode.h>
471541Srgrimes#include <sys/namei.h>
481541Srgrimes#include <sys/malloc.h>
4951906Sphk#include <sys/sysproto.h>
5051906Sphk#include <sys/proc.h>
5151906Sphk#include <sys/filedesc.h>
5274384Speter#include <sys/fnv_hash.h>
531541Srgrimes
5451906Sphk/*
5559652Sgreen * This structure describes the elements in the cache of recent
5659652Sgreen * names looked up by namei.
5759652Sgreen */
5859652Sgreen
5959652Sgreenstruct	namecache {
6060938Sjake	LIST_ENTRY(namecache) nc_hash;	/* hash chain */
6160938Sjake	LIST_ENTRY(namecache) nc_src;	/* source vnode list */
6260938Sjake	TAILQ_ENTRY(namecache) nc_dst;	/* destination vnode list */
6359652Sgreen	struct	vnode *nc_dvp;		/* vnode of parent of name */
6459652Sgreen	struct	vnode *nc_vp;		/* vnode the name refers to */
6559652Sgreen	u_char	nc_flag;		/* flag bits */
6659652Sgreen	u_char	nc_nlen;		/* length of name */
6759652Sgreen	char	nc_name[0];		/* segment name */
6859652Sgreen};
6959652Sgreen
7059652Sgreen/*
711541Srgrimes * Name caching works as follows:
721541Srgrimes *
731541Srgrimes * Names found by directory scans are retained in a cache
741541Srgrimes * for future reference.  It is managed LRU, so frequently
751541Srgrimes * used names will hang around.  Cache is indexed by hash value
761541Srgrimes * obtained from (vp, name) where vp refers to the directory
771541Srgrimes * containing name.
781541Srgrimes *
7922521Sdyson * If it is a "negative" entry, (i.e. for a name that is known NOT to
8022521Sdyson * exist) the vnode pointer will be NULL.
816968Sphk *
821541Srgrimes * Upon reaching the last segment of a path, if the reference
831541Srgrimes * is for DELETE, or NOCACHE is set (rewrite), and the
841541Srgrimes * name is located in the cache, it will be dropped.
851541Srgrimes */
861541Srgrimes
871541Srgrimes/*
881541Srgrimes * Structures associated with name cacheing.
891541Srgrimes */
9074501Speter#define NCHHASH(hash) \
9174501Speter	(&nchashtbl[(hash) & nchash])
9260938Sjakestatic LIST_HEAD(nchashhead, namecache) *nchashtbl;	/* Hash Table */
9360938Sjakestatic TAILQ_HEAD(, namecache) ncneg;	/* Hash Table */
9423521Sbdestatic u_long	nchash;			/* size of hash table */
9562622SjhbSYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "");
9625453Sphkstatic u_long	ncnegfactor = 16;	/* ratio of negative entries */
9762622SjhbSYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "");
9825453Sphkstatic u_long	numneg;		/* number of cache entries allocated */
9962622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "");
10023521Sbdestatic u_long	numcache;		/* number of cache entries allocated */
10162622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
10275654Stanimurastatic u_long	numcachehv;		/* number of cache entries with vnodes held */
10375654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, "");
10484249Sdillon#if 0
10575654Stanimurastatic u_long	numcachepl;		/* number of cache purge for leaf entries */
10675654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, "");
10784249Sdillon#endif
10822521Sdysonstruct	nchstats nchstats;		/* cache effectiveness statistics */
1091541Srgrimes
11023521Sbdestatic int	doingcache = 1;		/* 1 => enable the cache */
11123521SbdeSYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
11225453SphkSYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), "");
11325453SphkSYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), "");
11423521Sbde
11529788Sphk/*
11629788Sphk * The new name cache statistics
11729788Sphk */
11838984SbdeSYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics");
11929788Sphk#define STATNODE(mode, name, var) \
12062622Sjhb	SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
12129788SphkSTATNODE(CTLFLAG_RD, numneg, &numneg);
12229788SphkSTATNODE(CTLFLAG_RD, numcache, &numcache);
12329788Sphkstatic u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls);
12429788Sphkstatic u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits);
12529788Sphkstatic u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits);
12629788Sphkstatic u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks);
12729788Sphkstatic u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss);
12829804Sphkstatic u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap);
12929788Sphkstatic u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps);
13029788Sphkstatic u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits);
13129788Sphkstatic u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps);
13229788Sphkstatic u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits);
13329788Sphk
13468922SrwatsonSYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats,
13568922Srwatson        sizeof(nchstats), "LU", "VFS cache effectiveness statistics");
13629788Sphk
13768922Srwatson
13868922Srwatson
13925453Sphkstatic void cache_zap __P((struct namecache *ncp));
1406968Sphk
14169774Sphkstatic MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
14251906Sphk
14322521Sdyson/*
14425453Sphk * Flags in namecache.nc_flag
14525453Sphk */
14625453Sphk#define NCF_WHITE	1
14775402Speter
14825453Sphk/*
14975402Speter * Grab an atomic snapshot of the name cache hash chain lengths
15075402Speter */
15175402SpeterSYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats");
15275402Speter
15375402Speterstatic int
15475402Spetersysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
15575402Speter{
15675402Speter	int error;
15775402Speter	struct nchashhead *ncpp;
15875402Speter	struct namecache *ncp;
15975402Speter	int n_nchash;
16075402Speter	int count;
16175402Speter
16275402Speter	n_nchash = nchash + 1;	/* nchash is max index, not count */
16375402Speter	if (!req->oldptr)
16475402Speter		return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
16575402Speter
16675402Speter	/* Scan hash tables for applicable entries */
16775402Speter	for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
16875402Speter		count = 0;
16975402Speter		LIST_FOREACH(ncp, ncpp, nc_hash) {
17075402Speter			count++;
17175402Speter		}
17275402Speter		error = SYSCTL_OUT(req, (caddr_t)&count, sizeof(count));
17375402Speter		if (error)
17475402Speter			return (error);
17575402Speter	}
17675402Speter	return (0);
17775402Speter}
17875402SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD,
17975402Speter	0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths");
18075402Speter
18175402Speterstatic int
18275402Spetersysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
18375402Speter{
18475402Speter	int error;
18575402Speter	struct nchashhead *ncpp;
18675402Speter	struct namecache *ncp;
18775402Speter	int n_nchash;
18875402Speter	int count, maxlength, used, pct;
18975402Speter
19075402Speter	if (!req->oldptr)
19175402Speter		return SYSCTL_OUT(req, 0, 4 * sizeof(int));
19275402Speter
19375402Speter	n_nchash = nchash + 1;	/* nchash is max index, not count */
19475402Speter	used = 0;
19575402Speter	maxlength = 0;
19675402Speter
19775402Speter	/* Scan hash tables for applicable entries */
19875402Speter	for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
19975402Speter		count = 0;
20075402Speter		LIST_FOREACH(ncp, ncpp, nc_hash) {
20175402Speter			count++;
20275402Speter		}
20375402Speter		if (count)
20475402Speter			used++;
20575402Speter		if (maxlength < count)
20675402Speter			maxlength = count;
20775402Speter	}
20875402Speter	n_nchash = nchash + 1;
20975402Speter	pct = (used * 100 * 100) / n_nchash;
21075402Speter	error = SYSCTL_OUT(req, (caddr_t)&n_nchash, sizeof(n_nchash));
21175402Speter	if (error)
21275402Speter		return (error);
21375402Speter	error = SYSCTL_OUT(req, (caddr_t)&used, sizeof(used));
21475402Speter	if (error)
21575402Speter		return (error);
21675402Speter	error = SYSCTL_OUT(req, (caddr_t)&maxlength, sizeof(maxlength));
21775402Speter	if (error)
21875402Speter		return (error);
21975402Speter	error = SYSCTL_OUT(req, (caddr_t)&pct, sizeof(pct));
22075402Speter	if (error)
22175402Speter		return (error);
22275402Speter	return (0);
22375402Speter}
22475402SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD,
22575402Speter	0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths");
22675402Speter
22775402Speter/*
22822521Sdyson * Delete an entry from its hash list and move it to the front
22922521Sdyson * of the LRU list for immediate reuse.
23022521Sdyson */
23125453Sphkstatic void
23225453Sphkcache_zap(ncp)
23325453Sphk	struct namecache *ncp;
23425453Sphk{
23525453Sphk	LIST_REMOVE(ncp, nc_hash);
23625453Sphk	LIST_REMOVE(ncp, nc_src);
23775654Stanimura	if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
23828954Sphk		vdrop(ncp->nc_dvp);
23975654Stanimura		numcachehv--;
24075654Stanimura	}
24125453Sphk	if (ncp->nc_vp) {
24225453Sphk		TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
24325453Sphk	} else {
24425453Sphk		TAILQ_REMOVE(&ncneg, ncp, nc_dst);
24525453Sphk		numneg--;
24625453Sphk	}
24725453Sphk	numcache--;
24851906Sphk	free(ncp, M_VFSCACHE);
24922521Sdyson}
2506968Sphk
25122521Sdyson/*
25284249Sdillon * cache_leaf_test()
25384249Sdillon *
25484249Sdillon *      Test whether this (directory) vnode's namei cache entry contains
25584249Sdillon *      subdirectories or not.  Used to determine whether the directory is
25684249Sdillon *      a leaf in the namei cache or not.  Note: the directory may still
25784249Sdillon *      contain files in the namei cache.
25884249Sdillon *
25984249Sdillon *      Returns 0 if the directory is a leaf, -1 if it isn't.
26084249Sdillon */
26184249Sdillonint
26284249Sdilloncache_leaf_test(struct vnode *vp)
26384249Sdillon{
26484249Sdillon	struct namecache *ncpc;
26584249Sdillon
26684249Sdillon	for (ncpc = LIST_FIRST(&vp->v_cache_src);
26784249Sdillon	     ncpc != NULL;
26884249Sdillon	     ncpc = LIST_NEXT(ncpc, nc_src)
26984249Sdillon	) {
27084249Sdillon		if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR)
27184249Sdillon			return(-1);
27284249Sdillon	}
27384249Sdillon	return(0);
27484249Sdillon}
27584249Sdillon
27684249Sdillon/*
27723521Sbde * Lookup an entry in the cache
2786968Sphk *
27923521Sbde * We don't do this if the segment name is long, simply so the cache
2806968Sphk * can avoid holding long names (which would either waste space, or
2811541Srgrimes * add greatly to the complexity).
2821541Srgrimes *
2836968Sphk * Lookup is called with dvp pointing to the directory to search,
28422521Sdyson * cnp pointing to the name of the entry being sought. If the lookup
28522521Sdyson * succeeds, the vnode is returned in *vpp, and a status of -1 is
28622521Sdyson * returned. If the lookup determines that the name does not exist
28722521Sdyson * (negative cacheing), a status of ENOENT is returned. If the lookup
28822521Sdyson * fails, a status of zero is returned.
2891541Srgrimes */
2906968Sphk
2911541Srgrimesint
2921541Srgrimescache_lookup(dvp, vpp, cnp)
2931541Srgrimes	struct vnode *dvp;
2941541Srgrimes	struct vnode **vpp;
2951541Srgrimes	struct componentname *cnp;
2961541Srgrimes{
29751906Sphk	struct namecache *ncp;
29874384Speter	u_int32_t hash;
2991541Srgrimes
3006928Sphk	if (!doingcache) {
3016928Sphk		cnp->cn_flags &= ~MAKEENTRY;
3021541Srgrimes		return (0);
3036928Sphk	}
30425453Sphk
30529788Sphk	numcalls++;
30629788Sphk
30725453Sphk	if (cnp->cn_nameptr[0] == '.') {
30825453Sphk		if (cnp->cn_namelen == 1) {
30925453Sphk			*vpp = dvp;
31029788Sphk			dothits++;
31125453Sphk			return (-1);
31225453Sphk		}
31325453Sphk		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
31429788Sphk			dotdothits++;
31525453Sphk			if (dvp->v_dd->v_id != dvp->v_ddid ||
31625453Sphk			    (cnp->cn_flags & MAKEENTRY) == 0) {
31725453Sphk				dvp->v_ddid = 0;
31825453Sphk				return (0);
31925453Sphk			}
32025453Sphk			*vpp = dvp->v_dd;
32125453Sphk			return (-1);
32225453Sphk		}
3231541Srgrimes	}
3246968Sphk
32574501Speter	hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
32674501Speter	hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
32774501Speter	LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
32829788Sphk		numchecks++;
32925453Sphk		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
33031879Sbde		    !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
33122521Sdyson			break;
3321541Srgrimes	}
3336968Sphk
33422521Sdyson	/* We failed to find an entry */
33522521Sdyson	if (ncp == 0) {
33629804Sphk		if ((cnp->cn_flags & MAKEENTRY) == 0) {
33729804Sphk			nummisszap++;
33829804Sphk		} else {
33929804Sphk			nummiss++;
34029804Sphk		}
34122521Sdyson		nchstats.ncs_miss++;
34222521Sdyson		return (0);
34322521Sdyson	}
34422521Sdyson
3456968Sphk	/* We don't want to have an entry, so dump it */
3466928Sphk	if ((cnp->cn_flags & MAKEENTRY) == 0) {
34729788Sphk		numposzaps++;
3481541Srgrimes		nchstats.ncs_badhits++;
34925453Sphk		cache_zap(ncp);
3506968Sphk		return (0);
35123521Sbde	}
3526968Sphk
3536968Sphk	/* We found a "positive" match, return the vnode */
35422521Sdyson        if (ncp->nc_vp) {
35529788Sphk		numposhits++;
3561541Srgrimes		nchstats.ncs_goodhits++;
3571541Srgrimes		*vpp = ncp->nc_vp;
3581541Srgrimes		return (-1);
3591541Srgrimes	}
3601541Srgrimes
3616968Sphk	/* We found a negative match, and want to create it, so purge */
3626968Sphk	if (cnp->cn_nameiop == CREATE) {
36329788Sphk		numnegzaps++;
3647013Sphk		nchstats.ncs_badhits++;
36525453Sphk		cache_zap(ncp);
3666968Sphk		return (0);
3676968Sphk	}
3686968Sphk
36929788Sphk	numneghits++;
37022521Sdyson	/*
37122521Sdyson	 * We found a "negative" match, ENOENT notifies client of this match.
37222521Sdyson	 * The nc_vpid field records whether this is a whiteout.
37322521Sdyson	 */
37425453Sphk	TAILQ_REMOVE(&ncneg, ncp, nc_dst);
37525453Sphk	TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
3766968Sphk	nchstats.ncs_neghits++;
37725453Sphk	if (ncp->nc_flag & NCF_WHITE)
37825453Sphk		cnp->cn_flags |= ISWHITEOUT;
3796968Sphk	return (ENOENT);
3801541Srgrimes}
3811541Srgrimes
3821541Srgrimes/*
3836968Sphk * Add an entry to the cache.
3841541Srgrimes */
3851549Srgrimesvoid
3861541Srgrimescache_enter(dvp, vp, cnp)
3871541Srgrimes	struct vnode *dvp;
3881541Srgrimes	struct vnode *vp;
3891541Srgrimes	struct componentname *cnp;
3901541Srgrimes{
39151906Sphk	struct namecache *ncp;
39251906Sphk	struct nchashhead *ncpp;
39374384Speter	u_int32_t hash;
39451906Sphk	int len;
3951541Srgrimes
3961541Srgrimes	if (!doingcache)
3971541Srgrimes		return;
3986968Sphk
39925453Sphk	if (cnp->cn_nameptr[0] == '.') {
40025453Sphk		if (cnp->cn_namelen == 1) {
40125453Sphk			return;
4026928Sphk		}
40325453Sphk		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
40425453Sphk			if (vp) {
40525453Sphk				dvp->v_dd = vp;
40625453Sphk				dvp->v_ddid = vp->v_id;
40725453Sphk			} else {
40825453Sphk				dvp->v_dd = dvp;
40925453Sphk				dvp->v_ddid = 0;
41025453Sphk			}
41125453Sphk			return;
41225453Sphk		}
4136968Sphk	}
41425453Sphk
41525453Sphk	ncp = (struct namecache *)
41651906Sphk		malloc(sizeof *ncp + cnp->cn_namelen, M_VFSCACHE, M_WAITOK);
41725453Sphk	bzero((char *)ncp, sizeof *ncp);
41825453Sphk	numcache++;
41928954Sphk	if (!vp) {
42025453Sphk		numneg++;
42128954Sphk		ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0;
42229071Sphk	} else if (vp->v_type == VDIR) {
42329071Sphk		vp->v_dd = dvp;
42429071Sphk		vp->v_ddid = dvp->v_id;
42528954Sphk	}
42623521Sbde
42722521Sdyson	/*
42822521Sdyson	 * Fill in cache info, if vp is NULL this is a "negative" cache entry.
42922521Sdyson	 * For negative entries, we have to record whether it is a whiteout.
43022521Sdyson	 * the whiteout flag is stored in the nc_vpid field which is
43122521Sdyson	 * otherwise unused.
43222521Sdyson	 */
4331541Srgrimes	ncp->nc_vp = vp;
4341541Srgrimes	ncp->nc_dvp = dvp;
43551906Sphk	len = ncp->nc_nlen = cnp->cn_namelen;
43674501Speter	hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
43774384Speter	bcopy(cnp->cn_nameptr, ncp->nc_name, len);
43874501Speter	hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
43974501Speter	ncpp = NCHHASH(hash);
4406928Sphk	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
44175654Stanimura	if (LIST_EMPTY(&dvp->v_cache_src)) {
44228954Sphk		vhold(dvp);
44375654Stanimura		numcachehv++;
44475654Stanimura	}
44525453Sphk	LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
44625453Sphk	if (vp) {
44725453Sphk		TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
44825453Sphk	} else {
44925453Sphk		TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
45025453Sphk	}
45151906Sphk	if (numneg * ncnegfactor > numcache) {
45225453Sphk		ncp = TAILQ_FIRST(&ncneg);
45325453Sphk		cache_zap(ncp);
45425453Sphk	}
4551541Srgrimes}
4561541Srgrimes
4571541Srgrimes/*
4581541Srgrimes * Name cache initialization, from vfs_init() when we are booting
4591541Srgrimes */
46069664Speterstatic void
46169664Speternchinit(void *dummy __unused)
4621541Srgrimes{
46323521Sbde
46425453Sphk	TAILQ_INIT(&ncneg);
46569664Speter	nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
4661541Srgrimes}
46769664SpeterSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL)
4681541Srgrimes
46969664Speter
4701541Srgrimes/*
47146011Sphk * Invalidate all entries to a particular vnode.
47223521Sbde *
47346011Sphk * Remove all entries in the namecache relating to this vnode and
47446011Sphk * change the v_id.  We take the v_id from a global counter, since
47546011Sphk * it becomes a handy sequence number in crash-dumps that way.
47646011Sphk * No valid vnode will ever have (v_id == 0).
47746011Sphk *
47846011Sphk * XXX: Only time and the size of v_id prevents this from failing:
47946011Sphk * XXX: In theory we should hunt down all (struct vnode*, v_id)
48046011Sphk * XXX: soft references and nuke them, at least on the global
48146011Sphk * XXX: v_id wraparound.  The period of resistance can be extended
48246011Sphk * XXX: by incrementing each vnodes v_id individually instead of
48346011Sphk * XXX: using the global v_id.
4841541Srgrimes */
48546011Sphk
4861549Srgrimesvoid
4871541Srgrimescache_purge(vp)
4881541Srgrimes	struct vnode *vp;
4891541Srgrimes{
49029094Sphk	static u_long nextid;
4911541Srgrimes
49225453Sphk	while (!LIST_EMPTY(&vp->v_cache_src))
49325453Sphk		cache_zap(LIST_FIRST(&vp->v_cache_src));
49425453Sphk	while (!TAILQ_EMPTY(&vp->v_cache_dst))
49525453Sphk		cache_zap(TAILQ_FIRST(&vp->v_cache_dst));
49625453Sphk
49746011Sphk	do
49846011Sphk		nextid++;
49946011Sphk	while (nextid == vp->v_id || !nextid);
50029094Sphk	vp->v_id = nextid;
50125453Sphk	vp->v_dd = vp;
50225453Sphk	vp->v_ddid = 0;
5031541Srgrimes}
5041541Srgrimes
5051541Srgrimes/*
5066968Sphk * Flush all entries referencing a particular filesystem.
5071541Srgrimes *
5086968Sphk * Since we need to check it anyway, we will flush all the invalid
50912968Sphk * entries at the same time.
5101541Srgrimes */
5111549Srgrimesvoid
5121541Srgrimescache_purgevfs(mp)
5131541Srgrimes	struct mount *mp;
5141541Srgrimes{
5156968Sphk	struct nchashhead *ncpp;
51622521Sdyson	struct namecache *ncp, *nnp;
5171541Srgrimes
5186968Sphk	/* Scan hash tables for applicable entries */
51929071Sphk	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
52025453Sphk		for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) {
52125453Sphk			nnp = LIST_NEXT(ncp, nc_hash);
52225453Sphk			if (ncp->nc_dvp->v_mount == mp) {
52325453Sphk				cache_zap(ncp);
5246968Sphk			}
5251541Srgrimes		}
5261541Srgrimes	}
5271541Srgrimes}
52828787Sphk
52984249Sdillon#if 0
53084249Sdillon
53128787Sphk/*
53275654Stanimura * Flush all dirctory entries with no child directories held in
53375654Stanimura * the cache.
53475654Stanimura *
53575654Stanimura * Since we need to check it anyway, we will flush all the invalid
53675654Stanimura * entries at the same time.
53775654Stanimura */
53875654Stanimuravoid
53975654Stanimuracache_purgeleafdirs(ndir)
54075654Stanimura	int ndir;
54175654Stanimura{
54275654Stanimura	struct nchashhead *ncpp;
54375654Stanimura	struct namecache *ncp, *nnp, *ncpc, *nnpc;
54475654Stanimura	struct vnode *dvp;
54575654Stanimura
54675654Stanimura	/* Scan hash tables for applicable entries */
54775654Stanimura	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl && ndir > 0; ncpp--) {
54875654Stanimura		for (ncp = LIST_FIRST(ncpp); ncp != 0 && ndir > 0; ncp = nnp) {
54975654Stanimura			nnp = LIST_NEXT(ncp, nc_hash);
55075654Stanimura			if (ncp->nc_dvp != 0) {
55175654Stanimura				/*
55275654Stanimura				 * Skip over if nc_dvp of this cache holds
55375654Stanimura				 * a child directory, or the hold count of
55475654Stanimura				 * nc_dvp is greater than 1 (in which case
55575654Stanimura				 * nc_dvp is likely to be the working
55675654Stanimura				 * directory of a process).
55775654Stanimura				 */
55875654Stanimura				if (ncp->nc_dvp->v_holdcnt > 1)
55975654Stanimura					continue;
56075654Stanimura				for (ncpc = LIST_FIRST(&ncp->nc_dvp->v_cache_src);
56175654Stanimura				     ncpc != 0; ncpc = nnpc) {
56275654Stanimura					nnpc = LIST_NEXT(ncpc, nc_src);
56375654Stanimura					if (ncpc->nc_vp != 0 && ncpc->nc_vp->v_type == VDIR)
56475654Stanimura						break;
56575654Stanimura				}
56675654Stanimura				if (ncpc == 0) {
56775654Stanimura					/*
56875654Stanimura					 * Zap all of this directory's children,
56975654Stanimura					 * held in ncp->nc_dvp->v_cache_src.
57075654Stanimura					 */
57175654Stanimura					dvp = ncp->nc_dvp;
57275654Stanimura					while (!LIST_EMPTY(&dvp->v_cache_src))
57375654Stanimura						cache_zap(LIST_FIRST(&dvp->v_cache_src));
57475654Stanimura
57575654Stanimura					ndir--;
57675654Stanimura
57775654Stanimura					/* Restart in case where nnp is reclaimed. */
57875654Stanimura					nnp = LIST_FIRST(ncpp);
57975654Stanimura					continue;
58075654Stanimura				}
58175654Stanimura			}
58275654Stanimura		}
58375654Stanimura	}
58475654Stanimura	numcachepl++;
58575654Stanimura}
58675654Stanimura
58784249Sdillon#endif
58884249Sdillon
58975654Stanimura/*
59028787Sphk * Perform canonical checks and cache lookup and pass on to filesystem
59128787Sphk * through the vop_cachedlookup only if needed.
59228787Sphk */
59328787Sphk
59428787Sphkint
59528787Sphkvfs_cache_lookup(ap)
59628787Sphk	struct vop_lookup_args /* {
59728787Sphk		struct vnode *a_dvp;
59828787Sphk		struct vnode **a_vpp;
59928787Sphk		struct componentname *a_cnp;
60028787Sphk	} */ *ap;
60128787Sphk{
60265665Sbp	struct vnode *dvp, *vp;
60365665Sbp	int lockparent;
60428787Sphk	int error;
60528787Sphk	struct vnode **vpp = ap->a_vpp;
60628787Sphk	struct componentname *cnp = ap->a_cnp;
60728787Sphk	struct ucred *cred = cnp->cn_cred;
60828787Sphk	int flags = cnp->cn_flags;
60983366Sjulian	struct thread *td = cnp->cn_thread;
61028787Sphk	u_long vpid;	/* capability number of vnode */
61128787Sphk
61228787Sphk	*vpp = NULL;
61365665Sbp	dvp = ap->a_dvp;
61428787Sphk	lockparent = flags & LOCKPARENT;
61528787Sphk
61665665Sbp	if (dvp->v_type != VDIR)
61728787Sphk                return (ENOTDIR);
61828787Sphk
61965665Sbp	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
62028787Sphk	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
62128787Sphk		return (EROFS);
62228787Sphk
62383366Sjulian	error = VOP_ACCESS(dvp, VEXEC, cred, td);
62428787Sphk
62528787Sphk	if (error)
62628787Sphk		return (error);
62728787Sphk
62865665Sbp	error = cache_lookup(dvp, vpp, cnp);
62928787Sphk
63028787Sphk	if (!error)
63165665Sbp		return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
63228787Sphk
63328787Sphk	if (error == ENOENT)
63428787Sphk		return (error);
63528787Sphk
63665665Sbp	vp = *vpp;
63765665Sbp	vpid = vp->v_id;
63865973Sbp	cnp->cn_flags &= ~PDIRUNLOCK;
63965665Sbp	if (dvp == vp) {   /* lookup on "." */
64065665Sbp		VREF(vp);
64128787Sphk		error = 0;
64228787Sphk	} else if (flags & ISDOTDOT) {
64383366Sjulian		VOP_UNLOCK(dvp, 0, td);
64465973Sbp		cnp->cn_flags |= PDIRUNLOCK;
64583366Sjulian		error = vget(vp, LK_EXCLUSIVE, td);
64665973Sbp		if (!error && lockparent && (flags & ISLASTCN)) {
64783366Sjulian			if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0)
64865973Sbp				cnp->cn_flags &= ~PDIRUNLOCK;
64965973Sbp		}
65028787Sphk	} else {
65183366Sjulian		error = vget(vp, LK_EXCLUSIVE, td);
65265973Sbp		if (!lockparent || error || !(flags & ISLASTCN)) {
65383366Sjulian			VOP_UNLOCK(dvp, 0, td);
65465973Sbp			cnp->cn_flags |= PDIRUNLOCK;
65565973Sbp		}
65628787Sphk	}
65728787Sphk	/*
65828787Sphk	 * Check that the capability number did not change
65928787Sphk	 * while we were waiting for the lock.
66028787Sphk	 */
66128787Sphk	if (!error) {
66265665Sbp		if (vpid == vp->v_id)
66328787Sphk			return (0);
66465665Sbp		vput(vp);
66565973Sbp		if (lockparent && dvp != vp && (flags & ISLASTCN)) {
66683366Sjulian			VOP_UNLOCK(dvp, 0, td);
66765973Sbp			cnp->cn_flags |= PDIRUNLOCK;
66865973Sbp		}
66928787Sphk	}
67065973Sbp	if (cnp->cn_flags & PDIRUNLOCK) {
67183366Sjulian		error = vn_lock(dvp, LK_EXCLUSIVE, td);
67265973Sbp		if (error)
67365973Sbp			return (error);
67465973Sbp		cnp->cn_flags &= ~PDIRUNLOCK;
67565973Sbp	}
67665665Sbp	return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
67728787Sphk}
67851906Sphk
67951906Sphk
68051906Sphk#ifndef _SYS_SYSPROTO_H_
68151906Sphkstruct  __getcwd_args {
68251906Sphk	u_char	*buf;
68351906Sphk	u_int	buflen;
68451906Sphk};
68551906Sphk#endif
68651906Sphk
68751906Sphkstatic int disablecwd;
68851906SphkSYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, "");
68951906Sphk
69051906Sphkstatic u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls);
69151906Sphkstatic u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1);
69251906Sphkstatic u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2);
69351906Sphkstatic u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3);
69451906Sphkstatic u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4);
69551906Sphkstatic u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound);
69651906Sphkint
69783366Sjulian__getcwd(td, uap)
69883366Sjulian	struct thread *td;
69951906Sphk	struct __getcwd_args *uap;
70051906Sphk{
70151906Sphk	char *bp, *buf;
70251906Sphk	int error, i, slash_prefixed;
70351906Sphk	struct filedesc *fdp;
70451906Sphk	struct namecache *ncp;
70551906Sphk	struct vnode *vp;
70651906Sphk
70751906Sphk	numcwdcalls++;
70851906Sphk	if (disablecwd)
70951906Sphk		return (ENODEV);
71051906Sphk	if (uap->buflen < 2)
71151906Sphk		return (EINVAL);
71251906Sphk	if (uap->buflen > MAXPATHLEN)
71351906Sphk		uap->buflen = MAXPATHLEN;
71451906Sphk	buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK);
71551906Sphk	bp += uap->buflen - 1;
71651906Sphk	*bp = '\0';
71783366Sjulian	fdp = td->td_proc->p_fd;
71851906Sphk	slash_prefixed = 0;
71951906Sphk	for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) {
72051906Sphk		if (vp->v_flag & VROOT) {
72183000Siedowse			if (vp->v_mount == NULL) {	/* forced unmount */
72283000Siedowse				free(buf, M_TEMP);
72357199Speter				return (EBADF);
72483000Siedowse			}
72551906Sphk			vp = vp->v_mount->mnt_vnodecovered;
72651906Sphk			continue;
72751906Sphk		}
72851906Sphk		if (vp->v_dd->v_id != vp->v_ddid) {
72951906Sphk			numcwdfail1++;
73051906Sphk			free(buf, M_TEMP);
73151906Sphk			return (ENOTDIR);
73251906Sphk		}
73351906Sphk		ncp = TAILQ_FIRST(&vp->v_cache_dst);
73451906Sphk		if (!ncp) {
73551906Sphk			numcwdfail2++;
73651906Sphk			free(buf, M_TEMP);
73751906Sphk			return (ENOENT);
73851906Sphk		}
73951906Sphk		if (ncp->nc_dvp != vp->v_dd) {
74051906Sphk			numcwdfail3++;
74151906Sphk			free(buf, M_TEMP);
74251906Sphk			return (EBADF);
74351906Sphk		}
74451906Sphk		for (i = ncp->nc_nlen - 1; i >= 0; i--) {
74551906Sphk			if (bp == buf) {
74651906Sphk				numcwdfail4++;
74751906Sphk				free(buf, M_TEMP);
74851906Sphk				return (ENOMEM);
74951906Sphk			}
75051906Sphk			*--bp = ncp->nc_name[i];
75151906Sphk		}
75251906Sphk		if (bp == buf) {
75351906Sphk			numcwdfail4++;
75451906Sphk			free(buf, M_TEMP);
75551906Sphk			return (ENOMEM);
75651906Sphk		}
75751906Sphk		*--bp = '/';
75851906Sphk		slash_prefixed = 1;
75951906Sphk		vp = vp->v_dd;
76051906Sphk	}
76151906Sphk	if (!slash_prefixed) {
76251906Sphk		if (bp == buf) {
76351906Sphk			numcwdfail4++;
76451906Sphk			free(buf, M_TEMP);
76551906Sphk			return (ENOMEM);
76651906Sphk		}
76751906Sphk		*--bp = '/';
76851906Sphk	}
76951906Sphk	numcwdfound++;
77051906Sphk	error = copyout(bp, uap->buf, strlen(bp) + 1);
77151906Sphk	free(buf, M_TEMP);
77251906Sphk	return (error);
77351906Sphk}
77451906Sphk
77559652Sgreen/*
77659652Sgreen * Thus begins the fullpath magic.
77759652Sgreen */
77859652Sgreen
77959652Sgreen#undef STATNODE
78059652Sgreen#define STATNODE(name)							\
78159652Sgreen	static u_int name;						\
78262622Sjhb	SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "")
78359652Sgreen
78459652Sgreenstatic int disablefullpath;
78559652SgreenSYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW,
78659652Sgreen    &disablefullpath, 0, "");
78759652Sgreen
78859652SgreenSTATNODE(numfullpathcalls);
78959652SgreenSTATNODE(numfullpathfail1);
79059652SgreenSTATNODE(numfullpathfail2);
79159652SgreenSTATNODE(numfullpathfail3);
79259652SgreenSTATNODE(numfullpathfail4);
79359652SgreenSTATNODE(numfullpathfound);
79459652Sgreen
79559652Sgreenint
79659652Sgreentextvp_fullpath(struct proc *p, char **retbuf, char **retfreebuf) {
79759652Sgreen	char *bp, *buf;
79859652Sgreen	int i, slash_prefixed;
79959652Sgreen	struct filedesc *fdp;
80059652Sgreen	struct namecache *ncp;
80159652Sgreen	struct vnode *vp, *textvp;
80259652Sgreen
80359652Sgreen	numfullpathcalls++;
80459652Sgreen	if (disablefullpath)
80559652Sgreen		return (ENODEV);
80659652Sgreen	textvp = p->p_textvp;
80759652Sgreen	if (textvp == NULL)
80859652Sgreen		return (EINVAL);
80959652Sgreen	buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
81059652Sgreen	bp = buf + MAXPATHLEN - 1;
81159652Sgreen	*bp = '\0';
81259652Sgreen	fdp = p->p_fd;
81359652Sgreen	slash_prefixed = 0;
81459652Sgreen	for (vp = textvp; vp != fdp->fd_rdir && vp != rootvnode;) {
81559652Sgreen		if (vp->v_flag & VROOT) {
81659652Sgreen			if (vp->v_mount == NULL) {	/* forced unmount */
81759652Sgreen				free(buf, M_TEMP);
81859652Sgreen				return (EBADF);
81959652Sgreen			}
82059652Sgreen			vp = vp->v_mount->mnt_vnodecovered;
82159652Sgreen			continue;
82259652Sgreen		}
82359652Sgreen		if (vp != textvp && vp->v_dd->v_id != vp->v_ddid) {
82459652Sgreen			numfullpathfail1++;
82559652Sgreen			free(buf, M_TEMP);
82659652Sgreen			return (ENOTDIR);
82759652Sgreen		}
82859652Sgreen		ncp = TAILQ_FIRST(&vp->v_cache_dst);
82959652Sgreen		if (!ncp) {
83059652Sgreen			numfullpathfail2++;
83159652Sgreen			free(buf, M_TEMP);
83259652Sgreen			return (ENOENT);
83359652Sgreen		}
83459652Sgreen		if (vp != textvp && ncp->nc_dvp != vp->v_dd) {
83559652Sgreen			numfullpathfail3++;
83659652Sgreen			free(buf, M_TEMP);
83759652Sgreen			return (EBADF);
83859652Sgreen		}
83959652Sgreen		for (i = ncp->nc_nlen - 1; i >= 0; i--) {
84059652Sgreen			if (bp == buf) {
84159652Sgreen				numfullpathfail4++;
84259652Sgreen				free(buf, M_TEMP);
84359652Sgreen				return (ENOMEM);
84459652Sgreen			}
84559652Sgreen			*--bp = ncp->nc_name[i];
84659652Sgreen		}
84759652Sgreen		if (bp == buf) {
84859652Sgreen			numfullpathfail4++;
84959652Sgreen			free(buf, M_TEMP);
85059652Sgreen			return (ENOMEM);
85159652Sgreen		}
85259652Sgreen		*--bp = '/';
85359652Sgreen		slash_prefixed = 1;
85459652Sgreen		vp = ncp->nc_dvp;
85559652Sgreen	}
85659652Sgreen	if (!slash_prefixed) {
85759652Sgreen		if (bp == buf) {
85859652Sgreen			numfullpathfail4++;
85959652Sgreen			free(buf, M_TEMP);
86059652Sgreen			return (ENOMEM);
86159652Sgreen		}
86259652Sgreen		*--bp = '/';
86359652Sgreen	}
86459652Sgreen	numfullpathfound++;
86559652Sgreen	*retbuf = bp;
86659652Sgreen	*retfreebuf = buf;
86759652Sgreen	return (0);
86859652Sgreen}
869