vfs_cache.c revision 96616
11541Srgrimes/*
222521Sdyson * Copyright (c) 1989, 1993, 1995
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
522521Sdyson * This code is derived from software contributed to Berkeley by
622521Sdyson * Poul-Henning Kamp of the FreeBSD Project.
722521Sdyson *
81541Srgrimes * Redistribution and use in source and binary forms, with or without
91541Srgrimes * modification, are permitted provided that the following conditions
101541Srgrimes * are met:
111541Srgrimes * 1. Redistributions of source code must retain the above copyright
121541Srgrimes *    notice, this list of conditions and the following disclaimer.
131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer in the
151541Srgrimes *    documentation and/or other materials provided with the distribution.
161541Srgrimes * 3. All advertising materials mentioning features or use of this software
171541Srgrimes *    must display the following acknowledgement:
181541Srgrimes *	This product includes software developed by the University of
191541Srgrimes *	California, Berkeley and its contributors.
201541Srgrimes * 4. Neither the name of the University nor the names of its contributors
211541Srgrimes *    may be used to endorse or promote products derived from this software
221541Srgrimes *    without specific prior written permission.
231541Srgrimes *
241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
271541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
341541Srgrimes * SUCH DAMAGE.
351541Srgrimes *
3623521Sbde *	@(#)vfs_cache.c	8.5 (Berkeley) 3/22/95
3750477Speter * $FreeBSD: head/sys/kern/vfs_cache.c 96616 2002-05-14 21:59:49Z jeff $
381541Srgrimes */
391541Srgrimes
401541Srgrimes#include <sys/param.h>
411541Srgrimes#include <sys/systm.h>
4212820Sphk#include <sys/kernel.h>
4376166Smarkm#include <sys/lock.h>
4489316Salfred#include <sys/mutex.h>
4512820Sphk#include <sys/sysctl.h>
461541Srgrimes#include <sys/mount.h>
471541Srgrimes#include <sys/vnode.h>
481541Srgrimes#include <sys/namei.h>
491541Srgrimes#include <sys/malloc.h>
5051906Sphk#include <sys/sysproto.h>
5151906Sphk#include <sys/proc.h>
5251906Sphk#include <sys/filedesc.h>
5374384Speter#include <sys/fnv_hash.h>
541541Srgrimes
5551906Sphk/*
5659652Sgreen * This structure describes the elements in the cache of recent
5759652Sgreen * names looked up by namei.
5859652Sgreen */
5959652Sgreen
6059652Sgreenstruct	namecache {
6160938Sjake	LIST_ENTRY(namecache) nc_hash;	/* hash chain */
6260938Sjake	LIST_ENTRY(namecache) nc_src;	/* source vnode list */
6360938Sjake	TAILQ_ENTRY(namecache) nc_dst;	/* destination vnode list */
6459652Sgreen	struct	vnode *nc_dvp;		/* vnode of parent of name */
6559652Sgreen	struct	vnode *nc_vp;		/* vnode the name refers to */
6659652Sgreen	u_char	nc_flag;		/* flag bits */
6759652Sgreen	u_char	nc_nlen;		/* length of name */
6859652Sgreen	char	nc_name[0];		/* segment name */
6959652Sgreen};
7059652Sgreen
7159652Sgreen/*
721541Srgrimes * Name caching works as follows:
731541Srgrimes *
741541Srgrimes * Names found by directory scans are retained in a cache
751541Srgrimes * for future reference.  It is managed LRU, so frequently
761541Srgrimes * used names will hang around.  Cache is indexed by hash value
771541Srgrimes * obtained from (vp, name) where vp refers to the directory
781541Srgrimes * containing name.
791541Srgrimes *
8022521Sdyson * If it is a "negative" entry, (i.e. for a name that is known NOT to
8122521Sdyson * exist) the vnode pointer will be NULL.
826968Sphk *
831541Srgrimes * Upon reaching the last segment of a path, if the reference
841541Srgrimes * is for DELETE, or NOCACHE is set (rewrite), and the
851541Srgrimes * name is located in the cache, it will be dropped.
861541Srgrimes */
871541Srgrimes
881541Srgrimes/*
891541Srgrimes * Structures associated with name cacheing.
901541Srgrimes */
9174501Speter#define NCHHASH(hash) \
9274501Speter	(&nchashtbl[(hash) & nchash])
9360938Sjakestatic LIST_HEAD(nchashhead, namecache) *nchashtbl;	/* Hash Table */
9460938Sjakestatic TAILQ_HEAD(, namecache) ncneg;	/* Hash Table */
9523521Sbdestatic u_long	nchash;			/* size of hash table */
9662622SjhbSYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "");
9725453Sphkstatic u_long	ncnegfactor = 16;	/* ratio of negative entries */
9862622SjhbSYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "");
9991690Seivindstatic u_long	numneg;			/* number of cache entries allocated */
10062622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "");
10123521Sbdestatic u_long	numcache;		/* number of cache entries allocated */
10262622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
10375654Stanimurastatic u_long	numcachehv;		/* number of cache entries with vnodes held */
10475654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, "");
10584249Sdillon#if 0
10675654Stanimurastatic u_long	numcachepl;		/* number of cache purge for leaf entries */
10775654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, "");
10884249Sdillon#endif
10922521Sdysonstruct	nchstats nchstats;		/* cache effectiveness statistics */
1101541Srgrimes
11123521Sbdestatic int	doingcache = 1;		/* 1 => enable the cache */
11223521SbdeSYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
11391690Seivind
11491690Seivind/* Export size information to userland */
11525453SphkSYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), "");
11625453SphkSYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), "");
11723521Sbde
11829788Sphk/*
11929788Sphk * The new name cache statistics
12029788Sphk */
12138984SbdeSYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics");
12229788Sphk#define STATNODE(mode, name, var) \
12362622Sjhb	SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
12429788SphkSTATNODE(CTLFLAG_RD, numneg, &numneg);
12529788SphkSTATNODE(CTLFLAG_RD, numcache, &numcache);
12629788Sphkstatic u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls);
12729788Sphkstatic u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits);
12829788Sphkstatic u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits);
12929788Sphkstatic u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks);
13029788Sphkstatic u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss);
13129804Sphkstatic u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap);
13229788Sphkstatic u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps);
13329788Sphkstatic u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits);
13429788Sphkstatic u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps);
13529788Sphkstatic u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits);
13629788Sphk
13768922SrwatsonSYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats,
13868922Srwatson        sizeof(nchstats), "LU", "VFS cache effectiveness statistics");
13929788Sphk
14068922Srwatson
14168922Srwatson
14292723Salfredstatic void cache_zap(struct namecache *ncp);
1436968Sphk
14469774Sphkstatic MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
14551906Sphk
14622521Sdyson/*
14725453Sphk * Flags in namecache.nc_flag
14825453Sphk */
14925453Sphk#define NCF_WHITE	1
15075402Speter
15125453Sphk/*
15275402Speter * Grab an atomic snapshot of the name cache hash chain lengths
15375402Speter */
15475402SpeterSYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats");
15575402Speter
15675402Speterstatic int
15775402Spetersysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
15875402Speter{
15975402Speter	int error;
16075402Speter	struct nchashhead *ncpp;
16175402Speter	struct namecache *ncp;
16275402Speter	int n_nchash;
16375402Speter	int count;
16475402Speter
16575402Speter	n_nchash = nchash + 1;	/* nchash is max index, not count */
16675402Speter	if (!req->oldptr)
16775402Speter		return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
16875402Speter
16975402Speter	/* Scan hash tables for applicable entries */
17075402Speter	for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
17175402Speter		count = 0;
17275402Speter		LIST_FOREACH(ncp, ncpp, nc_hash) {
17375402Speter			count++;
17475402Speter		}
17575402Speter		error = SYSCTL_OUT(req, (caddr_t)&count, sizeof(count));
17675402Speter		if (error)
17775402Speter			return (error);
17875402Speter	}
17975402Speter	return (0);
18075402Speter}
18175402SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD,
18275402Speter	0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths");
18375402Speter
18475402Speterstatic int
18575402Spetersysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
18675402Speter{
18775402Speter	int error;
18875402Speter	struct nchashhead *ncpp;
18975402Speter	struct namecache *ncp;
19075402Speter	int n_nchash;
19175402Speter	int count, maxlength, used, pct;
19275402Speter
19375402Speter	if (!req->oldptr)
19475402Speter		return SYSCTL_OUT(req, 0, 4 * sizeof(int));
19575402Speter
19675402Speter	n_nchash = nchash + 1;	/* nchash is max index, not count */
19775402Speter	used = 0;
19875402Speter	maxlength = 0;
19975402Speter
20075402Speter	/* Scan hash tables for applicable entries */
20175402Speter	for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
20275402Speter		count = 0;
20375402Speter		LIST_FOREACH(ncp, ncpp, nc_hash) {
20475402Speter			count++;
20575402Speter		}
20675402Speter		if (count)
20775402Speter			used++;
20875402Speter		if (maxlength < count)
20975402Speter			maxlength = count;
21075402Speter	}
21175402Speter	n_nchash = nchash + 1;
21275402Speter	pct = (used * 100 * 100) / n_nchash;
21375402Speter	error = SYSCTL_OUT(req, (caddr_t)&n_nchash, sizeof(n_nchash));
21475402Speter	if (error)
21575402Speter		return (error);
21675402Speter	error = SYSCTL_OUT(req, (caddr_t)&used, sizeof(used));
21775402Speter	if (error)
21875402Speter		return (error);
21975402Speter	error = SYSCTL_OUT(req, (caddr_t)&maxlength, sizeof(maxlength));
22075402Speter	if (error)
22175402Speter		return (error);
22275402Speter	error = SYSCTL_OUT(req, (caddr_t)&pct, sizeof(pct));
22375402Speter	if (error)
22475402Speter		return (error);
22575402Speter	return (0);
22675402Speter}
22775402SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD,
22875402Speter	0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths");
22975402Speter
23075402Speter/*
23122521Sdyson * Delete an entry from its hash list and move it to the front
23222521Sdyson * of the LRU list for immediate reuse.
23322521Sdyson */
23425453Sphkstatic void
23525453Sphkcache_zap(ncp)
23625453Sphk	struct namecache *ncp;
23725453Sphk{
23825453Sphk	LIST_REMOVE(ncp, nc_hash);
23925453Sphk	LIST_REMOVE(ncp, nc_src);
24075654Stanimura	if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
24128954Sphk		vdrop(ncp->nc_dvp);
24275654Stanimura		numcachehv--;
24375654Stanimura	}
24425453Sphk	if (ncp->nc_vp) {
24525453Sphk		TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
24625453Sphk	} else {
24725453Sphk		TAILQ_REMOVE(&ncneg, ncp, nc_dst);
24825453Sphk		numneg--;
24925453Sphk	}
25025453Sphk	numcache--;
25151906Sphk	free(ncp, M_VFSCACHE);
25222521Sdyson}
2536968Sphk
25422521Sdyson/*
25584249Sdillon * cache_leaf_test()
25684249Sdillon *
25784249Sdillon *      Test whether this (directory) vnode's namei cache entry contains
25884249Sdillon *      subdirectories or not.  Used to determine whether the directory is
25984249Sdillon *      a leaf in the namei cache or not.  Note: the directory may still
26084249Sdillon *      contain files in the namei cache.
26184249Sdillon *
26284249Sdillon *      Returns 0 if the directory is a leaf, -1 if it isn't.
26384249Sdillon */
26484249Sdillonint
26584249Sdilloncache_leaf_test(struct vnode *vp)
26684249Sdillon{
26784249Sdillon	struct namecache *ncpc;
26884249Sdillon
26984249Sdillon	for (ncpc = LIST_FIRST(&vp->v_cache_src);
27084249Sdillon	     ncpc != NULL;
27184249Sdillon	     ncpc = LIST_NEXT(ncpc, nc_src)
27284249Sdillon	) {
27384249Sdillon		if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR)
27484249Sdillon			return(-1);
27584249Sdillon	}
27684249Sdillon	return(0);
27784249Sdillon}
27884249Sdillon
27984249Sdillon/*
28023521Sbde * Lookup an entry in the cache
2816968Sphk *
2826968Sphk * Lookup is called with dvp pointing to the directory to search,
28322521Sdyson * cnp pointing to the name of the entry being sought. If the lookup
28422521Sdyson * succeeds, the vnode is returned in *vpp, and a status of -1 is
28522521Sdyson * returned. If the lookup determines that the name does not exist
28622521Sdyson * (negative cacheing), a status of ENOENT is returned. If the lookup
28722521Sdyson * fails, a status of zero is returned.
2881541Srgrimes */
2896968Sphk
2901541Srgrimesint
2911541Srgrimescache_lookup(dvp, vpp, cnp)
2921541Srgrimes	struct vnode *dvp;
2931541Srgrimes	struct vnode **vpp;
2941541Srgrimes	struct componentname *cnp;
2951541Srgrimes{
29651906Sphk	struct namecache *ncp;
29774384Speter	u_int32_t hash;
2981541Srgrimes
2996928Sphk	if (!doingcache) {
3006928Sphk		cnp->cn_flags &= ~MAKEENTRY;
3011541Srgrimes		return (0);
3026928Sphk	}
30325453Sphk
30429788Sphk	numcalls++;
30529788Sphk
30625453Sphk	if (cnp->cn_nameptr[0] == '.') {
30725453Sphk		if (cnp->cn_namelen == 1) {
30825453Sphk			*vpp = dvp;
30929788Sphk			dothits++;
31025453Sphk			return (-1);
31125453Sphk		}
31225453Sphk		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
31329788Sphk			dotdothits++;
31425453Sphk			if (dvp->v_dd->v_id != dvp->v_ddid ||
31525453Sphk			    (cnp->cn_flags & MAKEENTRY) == 0) {
31625453Sphk				dvp->v_ddid = 0;
31725453Sphk				return (0);
31825453Sphk			}
31925453Sphk			*vpp = dvp->v_dd;
32025453Sphk			return (-1);
32125453Sphk		}
3221541Srgrimes	}
3236968Sphk
32474501Speter	hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
32574501Speter	hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
32674501Speter	LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
32729788Sphk		numchecks++;
32825453Sphk		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
32931879Sbde		    !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
33022521Sdyson			break;
3311541Srgrimes	}
3326968Sphk
33322521Sdyson	/* We failed to find an entry */
33422521Sdyson	if (ncp == 0) {
33529804Sphk		if ((cnp->cn_flags & MAKEENTRY) == 0) {
33629804Sphk			nummisszap++;
33729804Sphk		} else {
33829804Sphk			nummiss++;
33929804Sphk		}
34022521Sdyson		nchstats.ncs_miss++;
34122521Sdyson		return (0);
34222521Sdyson	}
34322521Sdyson
3446968Sphk	/* We don't want to have an entry, so dump it */
3456928Sphk	if ((cnp->cn_flags & MAKEENTRY) == 0) {
34629788Sphk		numposzaps++;
3471541Srgrimes		nchstats.ncs_badhits++;
34825453Sphk		cache_zap(ncp);
3496968Sphk		return (0);
35023521Sbde	}
3516968Sphk
3526968Sphk	/* We found a "positive" match, return the vnode */
35322521Sdyson        if (ncp->nc_vp) {
35429788Sphk		numposhits++;
3551541Srgrimes		nchstats.ncs_goodhits++;
3561541Srgrimes		*vpp = ncp->nc_vp;
3571541Srgrimes		return (-1);
3581541Srgrimes	}
3591541Srgrimes
3606968Sphk	/* We found a negative match, and want to create it, so purge */
3616968Sphk	if (cnp->cn_nameiop == CREATE) {
36229788Sphk		numnegzaps++;
3637013Sphk		nchstats.ncs_badhits++;
36425453Sphk		cache_zap(ncp);
3656968Sphk		return (0);
3666968Sphk	}
3676968Sphk
36829788Sphk	numneghits++;
36922521Sdyson	/*
37022521Sdyson	 * We found a "negative" match, ENOENT notifies client of this match.
37122521Sdyson	 * The nc_vpid field records whether this is a whiteout.
37222521Sdyson	 */
37325453Sphk	TAILQ_REMOVE(&ncneg, ncp, nc_dst);
37425453Sphk	TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
3756968Sphk	nchstats.ncs_neghits++;
37625453Sphk	if (ncp->nc_flag & NCF_WHITE)
37725453Sphk		cnp->cn_flags |= ISWHITEOUT;
3786968Sphk	return (ENOENT);
3791541Srgrimes}
3801541Srgrimes
3811541Srgrimes/*
3826968Sphk * Add an entry to the cache.
3831541Srgrimes */
3841549Srgrimesvoid
3851541Srgrimescache_enter(dvp, vp, cnp)
3861541Srgrimes	struct vnode *dvp;
3871541Srgrimes	struct vnode *vp;
3881541Srgrimes	struct componentname *cnp;
3891541Srgrimes{
39051906Sphk	struct namecache *ncp;
39151906Sphk	struct nchashhead *ncpp;
39274384Speter	u_int32_t hash;
39351906Sphk	int len;
3941541Srgrimes
3951541Srgrimes	if (!doingcache)
3961541Srgrimes		return;
3976968Sphk
39825453Sphk	if (cnp->cn_nameptr[0] == '.') {
39925453Sphk		if (cnp->cn_namelen == 1) {
40025453Sphk			return;
4016928Sphk		}
40225453Sphk		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
40325453Sphk			if (vp) {
40425453Sphk				dvp->v_dd = vp;
40525453Sphk				dvp->v_ddid = vp->v_id;
40625453Sphk			} else {
40725453Sphk				dvp->v_dd = dvp;
40825453Sphk				dvp->v_ddid = 0;
40925453Sphk			}
41025453Sphk			return;
41125453Sphk		}
4126968Sphk	}
41325453Sphk
41425453Sphk	ncp = (struct namecache *)
41551906Sphk		malloc(sizeof *ncp + cnp->cn_namelen, M_VFSCACHE, M_WAITOK);
41625453Sphk	bzero((char *)ncp, sizeof *ncp);
41725453Sphk	numcache++;
41828954Sphk	if (!vp) {
41925453Sphk		numneg++;
42028954Sphk		ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0;
42129071Sphk	} else if (vp->v_type == VDIR) {
42229071Sphk		vp->v_dd = dvp;
42329071Sphk		vp->v_ddid = dvp->v_id;
42428954Sphk	}
42523521Sbde
42622521Sdyson	/*
42722521Sdyson	 * Fill in cache info, if vp is NULL this is a "negative" cache entry.
42822521Sdyson	 * For negative entries, we have to record whether it is a whiteout.
42922521Sdyson	 * the whiteout flag is stored in the nc_vpid field which is
43022521Sdyson	 * otherwise unused.
43122521Sdyson	 */
4321541Srgrimes	ncp->nc_vp = vp;
4331541Srgrimes	ncp->nc_dvp = dvp;
43451906Sphk	len = ncp->nc_nlen = cnp->cn_namelen;
43574501Speter	hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
43674384Speter	bcopy(cnp->cn_nameptr, ncp->nc_name, len);
43774501Speter	hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
43874501Speter	ncpp = NCHHASH(hash);
4396928Sphk	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
44075654Stanimura	if (LIST_EMPTY(&dvp->v_cache_src)) {
44128954Sphk		vhold(dvp);
44275654Stanimura		numcachehv++;
44375654Stanimura	}
44425453Sphk	LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
44525453Sphk	if (vp) {
44625453Sphk		TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
44725453Sphk	} else {
44825453Sphk		TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
44925453Sphk	}
45051906Sphk	if (numneg * ncnegfactor > numcache) {
45125453Sphk		ncp = TAILQ_FIRST(&ncneg);
45225453Sphk		cache_zap(ncp);
45325453Sphk	}
4541541Srgrimes}
4551541Srgrimes
4561541Srgrimes/*
4571541Srgrimes * Name cache initialization, from vfs_init() when we are booting
4581541Srgrimes */
45969664Speterstatic void
46069664Speternchinit(void *dummy __unused)
4611541Srgrimes{
46223521Sbde
46325453Sphk	TAILQ_INIT(&ncneg);
46469664Speter	nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
4651541Srgrimes}
46669664SpeterSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL)
4671541Srgrimes
46869664Speter
4691541Srgrimes/*
47046011Sphk * Invalidate all entries to a particular vnode.
47123521Sbde *
47246011Sphk * Remove all entries in the namecache relating to this vnode and
47346011Sphk * change the v_id.  We take the v_id from a global counter, since
47446011Sphk * it becomes a handy sequence number in crash-dumps that way.
47546011Sphk * No valid vnode will ever have (v_id == 0).
47646011Sphk *
47746011Sphk * XXX: Only time and the size of v_id prevents this from failing:
47846011Sphk * XXX: In theory we should hunt down all (struct vnode*, v_id)
47946011Sphk * XXX: soft references and nuke them, at least on the global
48046011Sphk * XXX: v_id wraparound.  The period of resistance can be extended
48146011Sphk * XXX: by incrementing each vnodes v_id individually instead of
48246011Sphk * XXX: using the global v_id.
4831541Srgrimes */
48446011Sphk
4851549Srgrimesvoid
4861541Srgrimescache_purge(vp)
4871541Srgrimes	struct vnode *vp;
4881541Srgrimes{
48929094Sphk	static u_long nextid;
4901541Srgrimes
49125453Sphk	while (!LIST_EMPTY(&vp->v_cache_src))
49225453Sphk		cache_zap(LIST_FIRST(&vp->v_cache_src));
49325453Sphk	while (!TAILQ_EMPTY(&vp->v_cache_dst))
49425453Sphk		cache_zap(TAILQ_FIRST(&vp->v_cache_dst));
49525453Sphk
49646011Sphk	do
49746011Sphk		nextid++;
49846011Sphk	while (nextid == vp->v_id || !nextid);
49929094Sphk	vp->v_id = nextid;
50025453Sphk	vp->v_dd = vp;
50125453Sphk	vp->v_ddid = 0;
5021541Srgrimes}
5031541Srgrimes
5041541Srgrimes/*
5056968Sphk * Flush all entries referencing a particular filesystem.
5061541Srgrimes *
5076968Sphk * Since we need to check it anyway, we will flush all the invalid
50812968Sphk * entries at the same time.
5091541Srgrimes */
5101549Srgrimesvoid
5111541Srgrimescache_purgevfs(mp)
5121541Srgrimes	struct mount *mp;
5131541Srgrimes{
5146968Sphk	struct nchashhead *ncpp;
51522521Sdyson	struct namecache *ncp, *nnp;
5161541Srgrimes
5176968Sphk	/* Scan hash tables for applicable entries */
51829071Sphk	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
51925453Sphk		for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) {
52025453Sphk			nnp = LIST_NEXT(ncp, nc_hash);
52125453Sphk			if (ncp->nc_dvp->v_mount == mp) {
52225453Sphk				cache_zap(ncp);
5236968Sphk			}
5241541Srgrimes		}
5251541Srgrimes	}
5261541Srgrimes}
52728787Sphk
52828787Sphk/*
52928787Sphk * Perform canonical checks and cache lookup and pass on to filesystem
53028787Sphk * through the vop_cachedlookup only if needed.
53128787Sphk */
53228787Sphk
53328787Sphkint
53428787Sphkvfs_cache_lookup(ap)
53528787Sphk	struct vop_lookup_args /* {
53628787Sphk		struct vnode *a_dvp;
53728787Sphk		struct vnode **a_vpp;
53828787Sphk		struct componentname *a_cnp;
53928787Sphk	} */ *ap;
54028787Sphk{
54165665Sbp	struct vnode *dvp, *vp;
54265665Sbp	int lockparent;
54328787Sphk	int error;
54428787Sphk	struct vnode **vpp = ap->a_vpp;
54528787Sphk	struct componentname *cnp = ap->a_cnp;
54628787Sphk	struct ucred *cred = cnp->cn_cred;
54728787Sphk	int flags = cnp->cn_flags;
54883366Sjulian	struct thread *td = cnp->cn_thread;
54928787Sphk	u_long vpid;	/* capability number of vnode */
55028787Sphk
55128787Sphk	*vpp = NULL;
55265665Sbp	dvp = ap->a_dvp;
55328787Sphk	lockparent = flags & LOCKPARENT;
55428787Sphk
55565665Sbp	if (dvp->v_type != VDIR)
55628787Sphk                return (ENOTDIR);
55728787Sphk
55865665Sbp	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
55928787Sphk	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
56028787Sphk		return (EROFS);
56128787Sphk
56283366Sjulian	error = VOP_ACCESS(dvp, VEXEC, cred, td);
56328787Sphk
56428787Sphk	if (error)
56528787Sphk		return (error);
56628787Sphk
56765665Sbp	error = cache_lookup(dvp, vpp, cnp);
56828787Sphk
56996616Sjeff#ifdef LOOKUP_SHARED
57092130Sjeff	if (!error) {
57192130Sjeff		/* We do this because the rest of the system now expects to get
57292130Sjeff		 * a shared lock, which is later upgraded if LOCKSHARED is not
57392130Sjeff		 * set.  We have so many cases here because of bugs that yield
57492130Sjeff		 * inconsistant lock states.  This all badly needs to be fixed
57592130Sjeff		 */
57692130Sjeff		error = VOP_CACHEDLOOKUP(dvp, vpp, cnp);
57792130Sjeff		if (!error) {
57892130Sjeff			int flock;
57992130Sjeff
58092130Sjeff			flock = VOP_ISLOCKED(*vpp, td);
58192130Sjeff			if (flock != LK_EXCLUSIVE) {
58292130Sjeff				if (flock == 0) {
58392130Sjeff					if ((flags & ISLASTCN) &&
58492130Sjeff					    (flags & LOCKSHARED))
58592130Sjeff						VOP_LOCK(*vpp, LK_SHARED, td);
58692130Sjeff					else
58792130Sjeff						VOP_LOCK(*vpp, LK_EXCLUSIVE, td);
58892130Sjeff				}
58992130Sjeff			} else if ((flags & ISLASTCN) && (flags & LOCKSHARED))
59092130Sjeff				VOP_LOCK(*vpp, LK_DOWNGRADE, td);
59192130Sjeff		}
59292130Sjeff		return (error);
59392130Sjeff	}
59492130Sjeff#else
59528787Sphk	if (!error)
59665665Sbp		return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
59792130Sjeff#endif
59828787Sphk
59928787Sphk	if (error == ENOENT)
60028787Sphk		return (error);
60128787Sphk
60265665Sbp	vp = *vpp;
60365665Sbp	vpid = vp->v_id;
60465973Sbp	cnp->cn_flags &= ~PDIRUNLOCK;
60565665Sbp	if (dvp == vp) {   /* lookup on "." */
60665665Sbp		VREF(vp);
60728787Sphk		error = 0;
60828787Sphk	} else if (flags & ISDOTDOT) {
60983366Sjulian		VOP_UNLOCK(dvp, 0, td);
61065973Sbp		cnp->cn_flags |= PDIRUNLOCK;
61196616Sjeff#ifdef LOOKUP_SHARED
61292130Sjeff		if ((flags & ISLASTCN) && (flags & LOCKSHARED))
61392130Sjeff			error = vget(vp, LK_SHARED, td);
61492130Sjeff		else
61592130Sjeff			error = vget(vp, LK_EXCLUSIVE, td);
61692130Sjeff#else
61783366Sjulian		error = vget(vp, LK_EXCLUSIVE, td);
61892130Sjeff#endif
61992130Sjeff
62065973Sbp		if (!error && lockparent && (flags & ISLASTCN)) {
62183366Sjulian			if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0)
62265973Sbp				cnp->cn_flags &= ~PDIRUNLOCK;
62365973Sbp		}
62428787Sphk	} else {
62596616Sjeff#ifdef LOOKUP_SHARED
62692130Sjeff		if ((flags & ISLASTCN) && (flags & LOCKSHARED))
62792130Sjeff			error = vget(vp, LK_SHARED, td);
62892130Sjeff		else
62992130Sjeff			error = vget(vp, LK_EXCLUSIVE, td);
63092130Sjeff#else
63183366Sjulian		error = vget(vp, LK_EXCLUSIVE, td);
63292130Sjeff#endif
63365973Sbp		if (!lockparent || error || !(flags & ISLASTCN)) {
63483366Sjulian			VOP_UNLOCK(dvp, 0, td);
63565973Sbp			cnp->cn_flags |= PDIRUNLOCK;
63665973Sbp		}
63728787Sphk	}
63828787Sphk	/*
63928787Sphk	 * Check that the capability number did not change
64028787Sphk	 * while we were waiting for the lock.
64128787Sphk	 */
64228787Sphk	if (!error) {
64365665Sbp		if (vpid == vp->v_id)
64428787Sphk			return (0);
64565665Sbp		vput(vp);
64665973Sbp		if (lockparent && dvp != vp && (flags & ISLASTCN)) {
64783366Sjulian			VOP_UNLOCK(dvp, 0, td);
64865973Sbp			cnp->cn_flags |= PDIRUNLOCK;
64965973Sbp		}
65028787Sphk	}
65165973Sbp	if (cnp->cn_flags & PDIRUNLOCK) {
65283366Sjulian		error = vn_lock(dvp, LK_EXCLUSIVE, td);
65365973Sbp		if (error)
65465973Sbp			return (error);
65565973Sbp		cnp->cn_flags &= ~PDIRUNLOCK;
65665973Sbp	}
65796616Sjeff#ifdef LOOKUP_SHARED
65892130Sjeff	error = VOP_CACHEDLOOKUP(dvp, vpp, cnp);
65992130Sjeff
66092130Sjeff	if (!error) {
66192130Sjeff		int flock = 0;
66292130Sjeff
66392130Sjeff		flock = VOP_ISLOCKED(*vpp, td);
66492130Sjeff		if (flock != LK_EXCLUSIVE) {
66592130Sjeff			if (flock == 0) {
66692130Sjeff				if ((flags & ISLASTCN) && (flags & LOCKSHARED))
66792130Sjeff					VOP_LOCK(*vpp, LK_SHARED, td);
66892130Sjeff				else
66992130Sjeff					VOP_LOCK(*vpp, LK_EXCLUSIVE, td);
67092130Sjeff			}
67192130Sjeff		} else if ((flags & ISLASTCN) && (flags & LOCKSHARED))
67292130Sjeff			VOP_LOCK(*vpp, LK_DOWNGRADE, td);
67392130Sjeff	}
67492130Sjeff
67592130Sjeff	return (error);
67692130Sjeff#else
67765665Sbp	return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
67892130Sjeff#endif
67928787Sphk}
68051906Sphk
68151906Sphk
68251906Sphk#ifndef _SYS_SYSPROTO_H_
68351906Sphkstruct  __getcwd_args {
68451906Sphk	u_char	*buf;
68551906Sphk	u_int	buflen;
68651906Sphk};
68751906Sphk#endif
68851906Sphk
68991690Seivind/*
69091690Seivind * XXX All of these sysctls would probably be more productive dead.
69191690Seivind */
69251906Sphkstatic int disablecwd;
69391690SeivindSYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0,
69491690Seivind   "Disable the getcwd syscall");
69551906Sphk
69691690Seivind/* Various statistics for the getcwd syscall */
69751906Sphkstatic u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls);
69851906Sphkstatic u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1);
69951906Sphkstatic u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2);
70051906Sphkstatic u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3);
70151906Sphkstatic u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4);
70251906Sphkstatic u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound);
70391690Seivind
70491690Seivind/* Implementation of the getcwd syscall */
70551906Sphkint
70683366Sjulian__getcwd(td, uap)
70783366Sjulian	struct thread *td;
70851906Sphk	struct __getcwd_args *uap;
70951906Sphk{
71051906Sphk	char *bp, *buf;
71151906Sphk	int error, i, slash_prefixed;
71251906Sphk	struct filedesc *fdp;
71351906Sphk	struct namecache *ncp;
71451906Sphk	struct vnode *vp;
71551906Sphk
71651906Sphk	numcwdcalls++;
71751906Sphk	if (disablecwd)
71851906Sphk		return (ENODEV);
71951906Sphk	if (uap->buflen < 2)
72051906Sphk		return (EINVAL);
72151906Sphk	if (uap->buflen > MAXPATHLEN)
72251906Sphk		uap->buflen = MAXPATHLEN;
72351906Sphk	buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK);
72451906Sphk	bp += uap->buflen - 1;
72551906Sphk	*bp = '\0';
72683366Sjulian	fdp = td->td_proc->p_fd;
72751906Sphk	slash_prefixed = 0;
72889306Salfred	FILEDESC_LOCK(fdp);
72951906Sphk	for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) {
73051906Sphk		if (vp->v_flag & VROOT) {
73183000Siedowse			if (vp->v_mount == NULL) {	/* forced unmount */
73289306Salfred				FILEDESC_UNLOCK(fdp);
73383000Siedowse				free(buf, M_TEMP);
73457199Speter				return (EBADF);
73583000Siedowse			}
73651906Sphk			vp = vp->v_mount->mnt_vnodecovered;
73751906Sphk			continue;
73851906Sphk		}
73951906Sphk		if (vp->v_dd->v_id != vp->v_ddid) {
74089306Salfred			FILEDESC_UNLOCK(fdp);
74151906Sphk			numcwdfail1++;
74251906Sphk			free(buf, M_TEMP);
74351906Sphk			return (ENOTDIR);
74451906Sphk		}
74551906Sphk		ncp = TAILQ_FIRST(&vp->v_cache_dst);
74651906Sphk		if (!ncp) {
74789306Salfred			FILEDESC_UNLOCK(fdp);
74851906Sphk			numcwdfail2++;
74951906Sphk			free(buf, M_TEMP);
75051906Sphk			return (ENOENT);
75151906Sphk		}
75251906Sphk		if (ncp->nc_dvp != vp->v_dd) {
75389306Salfred			FILEDESC_UNLOCK(fdp);
75451906Sphk			numcwdfail3++;
75551906Sphk			free(buf, M_TEMP);
75651906Sphk			return (EBADF);
75751906Sphk		}
75851906Sphk		for (i = ncp->nc_nlen - 1; i >= 0; i--) {
75951906Sphk			if (bp == buf) {
76089306Salfred				FILEDESC_UNLOCK(fdp);
76151906Sphk				numcwdfail4++;
76251906Sphk				free(buf, M_TEMP);
76351906Sphk				return (ENOMEM);
76451906Sphk			}
76551906Sphk			*--bp = ncp->nc_name[i];
76651906Sphk		}
76751906Sphk		if (bp == buf) {
76889306Salfred			FILEDESC_UNLOCK(fdp);
76951906Sphk			numcwdfail4++;
77051906Sphk			free(buf, M_TEMP);
77151906Sphk			return (ENOMEM);
77251906Sphk		}
77351906Sphk		*--bp = '/';
77451906Sphk		slash_prefixed = 1;
77551906Sphk		vp = vp->v_dd;
77651906Sphk	}
77789306Salfred	FILEDESC_UNLOCK(fdp);
77851906Sphk	if (!slash_prefixed) {
77951906Sphk		if (bp == buf) {
78051906Sphk			numcwdfail4++;
78151906Sphk			free(buf, M_TEMP);
78251906Sphk			return (ENOMEM);
78351906Sphk		}
78451906Sphk		*--bp = '/';
78551906Sphk	}
78651906Sphk	numcwdfound++;
78751906Sphk	error = copyout(bp, uap->buf, strlen(bp) + 1);
78851906Sphk	free(buf, M_TEMP);
78951906Sphk	return (error);
79051906Sphk}
79151906Sphk
79259652Sgreen/*
79359652Sgreen * Thus begins the fullpath magic.
79459652Sgreen */
79559652Sgreen
79659652Sgreen#undef STATNODE
79759652Sgreen#define STATNODE(name)							\
79859652Sgreen	static u_int name;						\
79962622Sjhb	SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "")
80059652Sgreen
80159652Sgreenstatic int disablefullpath;
80291690SeivindSYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0,
80391690Seivind	"Disable the vn_fullpath function");
80459652Sgreen
80559652SgreenSTATNODE(numfullpathcalls);
80659652SgreenSTATNODE(numfullpathfail1);
80759652SgreenSTATNODE(numfullpathfail2);
80859652SgreenSTATNODE(numfullpathfail3);
80959652SgreenSTATNODE(numfullpathfail4);
81059652SgreenSTATNODE(numfullpathfound);
81159652Sgreen
81291690Seivind/*
81391690Seivind * Retrieve the full filesystem path that correspond to a vnode from the name
81491690Seivind * cache (if available)
81591690Seivind */
81659652Sgreenint
81785287Sdesvn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
81885287Sdes{
81959652Sgreen	char *bp, *buf;
82059652Sgreen	int i, slash_prefixed;
82159652Sgreen	struct filedesc *fdp;
82259652Sgreen	struct namecache *ncp;
82385287Sdes	struct vnode *vp;
82459652Sgreen
82559652Sgreen	numfullpathcalls++;
82659652Sgreen	if (disablefullpath)
82759652Sgreen		return (ENODEV);
82885287Sdes	if (vn == NULL)
82959652Sgreen		return (EINVAL);
83059652Sgreen	buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
83159652Sgreen	bp = buf + MAXPATHLEN - 1;
83259652Sgreen	*bp = '\0';
83385287Sdes	fdp = td->td_proc->p_fd;
83459652Sgreen	slash_prefixed = 0;
83589306Salfred	FILEDESC_LOCK(fdp);
83685287Sdes	for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) {
83759652Sgreen		if (vp->v_flag & VROOT) {
83859652Sgreen			if (vp->v_mount == NULL) {	/* forced unmount */
83989306Salfred				FILEDESC_UNLOCK(fdp);
84059652Sgreen				free(buf, M_TEMP);
84159652Sgreen				return (EBADF);
84259652Sgreen			}
84359652Sgreen			vp = vp->v_mount->mnt_vnodecovered;
84459652Sgreen			continue;
84559652Sgreen		}
84685287Sdes		if (vp != vn && vp->v_dd->v_id != vp->v_ddid) {
84789306Salfred			FILEDESC_UNLOCK(fdp);
84859652Sgreen			numfullpathfail1++;
84959652Sgreen			free(buf, M_TEMP);
85059652Sgreen			return (ENOTDIR);
85159652Sgreen		}
85259652Sgreen		ncp = TAILQ_FIRST(&vp->v_cache_dst);
85359652Sgreen		if (!ncp) {
85489306Salfred			FILEDESC_UNLOCK(fdp);
85559652Sgreen			numfullpathfail2++;
85659652Sgreen			free(buf, M_TEMP);
85759652Sgreen			return (ENOENT);
85859652Sgreen		}
85985287Sdes		if (vp != vn && ncp->nc_dvp != vp->v_dd) {
86089306Salfred			FILEDESC_UNLOCK(fdp);
86159652Sgreen			numfullpathfail3++;
86259652Sgreen			free(buf, M_TEMP);
86359652Sgreen			return (EBADF);
86459652Sgreen		}
86559652Sgreen		for (i = ncp->nc_nlen - 1; i >= 0; i--) {
86659652Sgreen			if (bp == buf) {
86789306Salfred				FILEDESC_UNLOCK(fdp);
86859652Sgreen				numfullpathfail4++;
86959652Sgreen				free(buf, M_TEMP);
87059652Sgreen				return (ENOMEM);
87159652Sgreen			}
87259652Sgreen			*--bp = ncp->nc_name[i];
87359652Sgreen		}
87459652Sgreen		if (bp == buf) {
87589306Salfred			FILEDESC_UNLOCK(fdp);
87659652Sgreen			numfullpathfail4++;
87759652Sgreen			free(buf, M_TEMP);
87859652Sgreen			return (ENOMEM);
87959652Sgreen		}
88059652Sgreen		*--bp = '/';
88159652Sgreen		slash_prefixed = 1;
88259652Sgreen		vp = ncp->nc_dvp;
88359652Sgreen	}
88459652Sgreen	if (!slash_prefixed) {
88559652Sgreen		if (bp == buf) {
88689306Salfred			FILEDESC_UNLOCK(fdp);
88759652Sgreen			numfullpathfail4++;
88859652Sgreen			free(buf, M_TEMP);
88959652Sgreen			return (ENOMEM);
89059652Sgreen		}
89159652Sgreen		*--bp = '/';
89259652Sgreen	}
89389306Salfred	FILEDESC_UNLOCK(fdp);
89459652Sgreen	numfullpathfound++;
89559652Sgreen	*retbuf = bp;
89685287Sdes	*freebuf = buf;
89759652Sgreen	return (0);
89859652Sgreen}
899