vfs_subr.c revision 9507
174011Sjhb/*
268685Sjhb * Copyright (c) 1989, 1993
368685Sjhb *	The Regents of the University of California.  All rights reserved.
468685Sjhb * (c) UNIX System Laboratories, Inc.
568685Sjhb * All or some portions of this file are derived from material licensed
668685Sjhb * to the University of California by American Telephone and Telegraph
768685Sjhb * Co. or Unix System Laboratories, Inc. and are reproduced herein with
868685Sjhb * the permission of UNIX System Laboratories, Inc.
968685Sjhb *
1068685Sjhb * Redistribution and use in source and binary forms, with or without
1168685Sjhb * modification, are permitted provided that the following conditions
1268685Sjhb * are met:
1368685Sjhb * 1. Redistributions of source code must retain the above copyright
1468685Sjhb *    notice, this list of conditions and the following disclaimer.
1568685Sjhb * 2. Redistributions in binary form must reproduce the above copyright
1668685Sjhb *    notice, this list of conditions and the following disclaimer in the
1768685Sjhb *    documentation and/or other materials provided with the distribution.
1868685Sjhb * 3. All advertising materials mentioning features or use of this software
1968685Sjhb *    must display the following acknowledgement:
2068685Sjhb *	This product includes software developed by the University of
2168685Sjhb *	California, Berkeley and its contributors.
2268685Sjhb * 4. Neither the name of the University nor the names of its contributors
2368685Sjhb *    may be used to endorse or promote products derived from this software
2468685Sjhb *    without specific prior written permission.
2568685Sjhb *
26302108Ssephe * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27206622Suqs * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2868685Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2968685Sjhb * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
3068685Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3168685Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3268685Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33150628Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3468685Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3568685Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3668685Sjhb * SUCH DAMAGE.
3768685Sjhb *
3868685Sjhb *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
3968685Sjhb * $Id: vfs_subr.c,v 1.33 1995/07/08 04:10:32 davidg Exp $
4068685Sjhb */
4184306Sru
4284306Sru/*
4368685Sjhb * External virtual filesystem routines
4489462Sru */
4568685Sjhb
4689462Sru#include <sys/param.h>
4768685Sjhb#include <sys/systm.h>
4889462Sru#include <sys/file.h>
4989462Sru#include <sys/proc.h>
5089462Sru#include <sys/mount.h>
5189462Sru#include <sys/time.h>
5268685Sjhb#include <sys/vnode.h>
5389462Sru#include <sys/stat.h>
54150628Sjhb#include <sys/namei.h>
55150628Sjhb#include <sys/ucred.h>
5689462Sru#include <sys/buf.h>
5789462Sru#include <sys/errno.h>
5889462Sru#include <sys/malloc.h>
5968685Sjhb#include <sys/domain.h>
6089462Sru#include <sys/mbuf.h>
6168685Sjhb
6289462Sru#include <vm/vm.h>
6368685Sjhb#include <sys/sysctl.h>
6489462Sru
65254617Sjkim#include <miscfs/specfs/specdev.h>
66254617Sjkim
67254617Sjkimvoid insmntque __P((struct vnode *, struct mount *));
68302108Ssephe
69302108Ssepheenum vtype iftovt_tab[16] = {
70254617Sjkim	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
7168685Sjhb	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
72288293Salc};
73288293Salcint vttoif_tab[9] = {
7468685Sjhb	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
7568685Sjhb	S_IFSOCK, S_IFIFO, S_IFMT,
7668685Sjhb};
7789192Sru
78115440Shmp/*
7968685Sjhb * Insq/Remq for the vnode usage lists.
8068685Sjhb */
8189192Sru#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
8289192Sru#define	bufremvn(bp) {  \
8389192Sru	LIST_REMOVE(bp, b_vnbufs); \
8468685Sjhb	(bp)->b_vnbufs.le_next = NOLIST; \
8589192Sru}
8668685Sjhb
8789192SruTAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
8868685Sjhbu_long freevnodes	= 0;
8989192Sru
9068685Sjhbstruct mntlist mountlist;	/* mounted filesystem list */
9189192Sru
9268685Sjhbint desiredvnodes;
9368685Sjhb
9468685Sjhb/*
9568685Sjhb * Initialize the vnode management data structures.
9668685Sjhb */
9787999Sjakevoid
9889192Sruvntblinit()
9989192Sru{
10089192Sru	desiredvnodes = maxproc + vm_object_cache_max;
10189192Sru
10289192Sru	TAILQ_INIT(&vnode_free_list);
10387999Sjake	TAILQ_INIT(&mountlist);
10489192Sru}
10587999Sjake
10689192Sru/*
10787999Sjake * Lock a filesystem.
10889192Sru * Used to prevent access to it while mounting and unmounting.
10987999Sjake */
11087999Sjakeint
11187999Sjakevfs_lock(mp)
11287999Sjake	register struct mount *mp;
113288293Salc{
114288293Salc
115288293Salc	while (mp->mnt_flag & MNT_MLOCK) {
116288293Salc		mp->mnt_flag |= MNT_MWAIT;
117288293Salc		(void) tsleep((caddr_t) mp, PVFS, "vfslck", 0);
118288293Salc	}
119288293Salc	mp->mnt_flag |= MNT_MLOCK;
120288293Salc	return (0);
121288293Salc}
122288293Salc
123288293Salc/*
124288293Salc * Unlock a locked filesystem.
125288293Salc * Panic if filesystem is not locked.
126288293Salc */
127288293Salcvoid
128288293Salcvfs_unlock(mp)
129288293Salc	register struct mount *mp;
130288293Salc{
131288293Salc
132288293Salc	if ((mp->mnt_flag & MNT_MLOCK) == 0)
133288293Salc		panic("vfs_unlock: not locked");
134288293Salc	mp->mnt_flag &= ~MNT_MLOCK;
135288293Salc	if (mp->mnt_flag & MNT_MWAIT) {
136288293Salc		mp->mnt_flag &= ~MNT_MWAIT;
13768685Sjhb		wakeup((caddr_t) mp);
138288293Salc	}
139288293Salc}
140288293Salc
141288293Salc/*
142288293Salc * Mark a mount point as busy.
143288293Salc * Used to synchronize access and to delay unmounting.
144288293Salc */
145288293Salcint
146288293Salcvfs_busy(mp)
147288293Salc	register struct mount *mp;
148288293Salc{
149288293Salc
150288293Salc	while (mp->mnt_flag & MNT_MPBUSY) {
151288293Salc		mp->mnt_flag |= MNT_MPWANT;
15289192Sru		(void) tsleep((caddr_t) &mp->mnt_flag, PVFS, "vfsbsy", 0);
15368685Sjhb	}
154115440Shmp	if (mp->mnt_flag & MNT_UNMOUNT)
15568685Sjhb		return (1);
156288293Salc	mp->mnt_flag |= MNT_MPBUSY;
157288293Salc	return (0);
15868685Sjhb}
15968685Sjhb
160288293Salc/*
161288293Salc * Free a busy filesystem.
162288293Salc * Panic if filesystem is not busy.
163288293Salc */
164288293Salcvoid
165288293Salcvfs_unbusy(mp)
166288293Salc	register struct mount *mp;
16789192Sru{
16868685Sjhb
169115440Shmp	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
17068685Sjhb		panic("vfs_unbusy: not busy");
171288293Salc	mp->mnt_flag &= ~MNT_MPBUSY;
172288293Salc	if (mp->mnt_flag & MNT_MPWANT) {
17368685Sjhb		mp->mnt_flag &= ~MNT_MPWANT;
17468685Sjhb		wakeup((caddr_t) &mp->mnt_flag);
175288293Salc	}
176288293Salc}
177288293Salc
178288293Salcvoid
179288293Salcvfs_unmountroot(rootfs)
180288293Salc	struct mount *rootfs;
181288293Salc{
182288293Salc	struct mount *mp = rootfs;
183288293Salc	int error;
184288293Salc
18568685Sjhb	if (vfs_busy(mp)) {
186288293Salc		printf("failed to unmount root\n");
187288293Salc		return;
188288293Salc	}
189288293Salc	mp->mnt_flag |= MNT_UNMOUNT;
190288293Salc	if ((error = vfs_lock(mp))) {
191288293Salc		printf("lock of root filesystem failed (%d)\n", error);
192288293Salc		return;
193288293Salc	}
194288293Salc	vnode_pager_umount(mp);	/* release cached vnodes */
19568685Sjhb	cache_purgevfs(mp);	/* remove cache entries for this file sys */
196288293Salc
197288293Salc	if ((error = VFS_SYNC(mp, MNT_WAIT, initproc->p_ucred, initproc)))
198288293Salc		printf("sync of root filesystem failed (%d)\n", error);
199288293Salc
200288293Salc	if ((error = VFS_UNMOUNT(mp, MNT_FORCE, initproc))) {
20168685Sjhb		printf("unmount of root filesystem failed (");
20268685Sjhb		if (error == EBUSY)
20368685Sjhb			printf("BUSY)\n");
20468685Sjhb		else
20568685Sjhb			printf("%d)\n", error);
20668685Sjhb	}
20768685Sjhb	mp->mnt_flag &= ~MNT_UNMOUNT;
20889192Sru	vfs_unbusy(mp);
20989192Sru}
21068685Sjhb
21168685Sjhb/*
21289192Sru * Unmount all filesystems.  Should only be called by halt().
21389192Sru */
21468685Sjhbvoid
21568685Sjhbvfs_unmountall()
21689192Sru{
21789192Sru	struct mount *mp, *mp_next, *rootfs = NULL;
21868685Sjhb	int error;
21968685Sjhb
220254617Sjkim	/* unmount all but rootfs */
22168685Sjhb	for (mp = mountlist.tqh_first; mp != NULL; mp = mp_next) {
222254617Sjkim		mp_next = mp->mnt_list.tqe_next;
22368685Sjhb
22468685Sjhb		if (mp->mnt_flag & MNT_ROOTFS) {
22568685Sjhb			rootfs = mp;
22668685Sjhb			continue;
22768685Sjhb		}
22889192Sru		error = dounmount(mp, MNT_FORCE, initproc);
22989192Sru		if (error) {
23089192Sru			printf("unmount of %s failed (", mp->mnt_stat.f_mntonname);
23189192Sru			if (error == EBUSY)
23289192Sru				printf("BUSY)\n");
23389192Sru			else
23468685Sjhb				printf("%d)\n", error);
235150628Sjhb		}
236150628Sjhb	}
237150628Sjhb
238150628Sjhb	/* and finally... */
239254617Sjkim	if (rootfs) {
240150628Sjhb		vfs_unmountroot(rootfs);
241150628Sjhb	} else {
242150628Sjhb		printf("no root filesystem\n");
243150628Sjhb	}
244150628Sjhb}
245150628Sjhb
246177276Spjd/*
247177276Spjd * Lookup a mount point by filesystem identifier.
248150628Sjhb */
249150628Sjhbstruct mount *
250150628Sjhbgetvfs(fsid)
251150628Sjhb	fsid_t *fsid;
252254617Sjkim{
25389192Sru	register struct mount *mp;
254254617Sjkim
25568685Sjhb	for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
25668685Sjhb		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
25768685Sjhb		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
25868685Sjhb			return (mp);
25968685Sjhb	}
260192536Sjhb	return ((struct mount *) 0);
26168685Sjhb}
262254617Sjkim
26389192Sru/*
264254617Sjkim * Get a new unique fsid
265254617Sjkim */
266254617Sjkimvoid
26768685Sjhbgetnewfsid(mp, mtype)
26868685Sjhb	struct mount *mp;
26968685Sjhb	int mtype;
27068685Sjhb{
27168685Sjhb	static u_short xxxfs_mntid;
27289192Sru
27389192Sru	fsid_t tfsid;
27489192Sru
27589192Sru	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
27689192Sru	mp->mnt_stat.f_fsid.val[1] = mtype;
27789192Sru	if (xxxfs_mntid == 0)
27889192Sru		++xxxfs_mntid;
279254617Sjkim	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
28068685Sjhb	tfsid.val[1] = mtype;
28189192Sru	if (mountlist.tqh_first != NULL) {
28289192Sru		while (getvfs(&tfsid)) {
28368685Sjhb			tfsid.val[0]++;
28468685Sjhb			xxxfs_mntid++;
28589192Sru		}
28689192Sru	}
28768685Sjhb	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
28868685Sjhb}
28989192Sru
29089192Sru/*
29168685Sjhb * Set vnode attributes to VNOVAL
29268685Sjhb */
29368685Sjhbvoid
29468685Sjhbvattr_null(vap)
29568685Sjhb	register struct vattr *vap;
29668685Sjhb{
297192536Sjhb
298254617Sjkim	vap->va_type = VNON;
299254617Sjkim	vap->va_size = VNOVAL;
300254617Sjkim	vap->va_bytes = VNOVAL;
301254617Sjkim	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
302254617Sjkim	    vap->va_fsid = vap->va_fileid =
303254617Sjkim	    vap->va_blocksize = vap->va_rdev =
304254617Sjkim	    vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
305254617Sjkim	    vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
306254617Sjkim	    vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
307254617Sjkim	    vap->va_flags = vap->va_gen = VNOVAL;
308254617Sjkim	vap->va_vaflags = 0;
309254617Sjkim}
310254617Sjkim
311254617Sjkim/*
312254617Sjkim * Routines having to do with the management of the vnode table.
313254617Sjkim */
314254617Sjkimextern int (**dead_vnodeop_p) ();
315254617Sjkimextern void vclean();
316254617Sjkim
317254617Sjkim/*
318302108Ssephe * Return the next vnode from the free list.
319302108Ssephe */
320302108Ssepheint
321302108Ssephegetnewvnode(tag, mp, vops, vpp)
322302108Ssephe	enum vtagtype tag;
323302108Ssephe	struct mount *mp;
324302108Ssephe	int (**vops) ();
325302108Ssephe	struct vnode **vpp;
326302108Ssephe{
327254617Sjkim	register struct vnode *vp;
328254617Sjkim
329254617Sjkim	vp = vnode_free_list.tqh_first;
330254617Sjkim	/*
331254617Sjkim	 * we allocate a new vnode if
332254617Sjkim	 * 	1. we don't have any free
333254617Sjkim	 *		Pretty obvious, we actually used to panic, but that
334254617Sjkim	 *		is a silly thing to do.
335254617Sjkim	 *	2. we havn't filled our pool yet
336254617Sjkim	 *		We don't want to trash the incore (VM-)vnodecache.
337254617Sjkim	 *	3. if less that 1/4th of our vnodes are free.
338302108Ssephe	 *		We don't want to trash the namei cache either.
339302108Ssephe	 */
340254617Sjkim	if (freevnodes < (numvnodes >> 2) ||
341254617Sjkim	    numvnodes < desiredvnodes ||
342254617Sjkim	    vp == NULL) {
343254617Sjkim		vp = (struct vnode *) malloc((u_long) sizeof *vp,
344254617Sjkim		    M_VNODE, M_WAITOK);
345254617Sjkim		bzero((char *) vp, sizeof *vp);
346254617Sjkim		numvnodes++;
34768685Sjhb	} else {
34889192Sru		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
34968685Sjhb		freevnodes--;
350150628Sjhb
351150628Sjhb		if (vp->v_usecount)
352150628Sjhb			panic("free vnode isn't");
353150628Sjhb
354150628Sjhb		/* see comment on why 0xdeadb is set at end of vgone (below) */
35568685Sjhb		vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb;
35689192Sru		vp->v_lease = NULL;
35768685Sjhb		if (vp->v_type != VBAD)
358254617Sjkim			vgone(vp);
35989192Sru#ifdef DIAGNOSTIC
360150628Sjhb		{
361150628Sjhb			int s;
362254617Sjkim
36368685Sjhb			if (vp->v_data)
364254617Sjkim				panic("cleaned vnode isn't");
365254617Sjkim			s = splbio();
366254617Sjkim			if (vp->v_numoutput)
367254617Sjkim				panic("Clean vnode has pending I/O's");
368302108Ssephe			splx(s);
369302108Ssephe		}
370254617Sjkim#endif
37168685Sjhb		vp->v_flag = 0;
37268685Sjhb		vp->v_lastr = 0;
37368685Sjhb		vp->v_ralen = 0;
37468685Sjhb		vp->v_maxra = 0;
37568685Sjhb		vp->v_lastw = 0;
37668685Sjhb		vp->v_lasta = 0;
37768685Sjhb		vp->v_cstart = 0;
37868685Sjhb		vp->v_clen = 0;
37968685Sjhb		vp->v_socket = 0;
38089192Sru		vp->v_writecount = 0;	/* XXX */
38168685Sjhb	}
38289192Sru	vp->v_type = VNON;
38368685Sjhb	cache_purge(vp);
38468685Sjhb	vp->v_tag = tag;
385150628Sjhb	vp->v_op = vops;
38668685Sjhb	insmntque(vp, mp);
387150628Sjhb	*vpp = vp;
38868685Sjhb	vp->v_usecount = 1;
38968685Sjhb	vp->v_data = 0;
390150628Sjhb	return (0);
391150628Sjhb}
392150628Sjhb
39368685Sjhb/*
394150628Sjhb * Move a vnode from one mount queue to another.
395150628Sjhb */
39668685Sjhbvoid
39768685Sjhbinsmntque(vp, mp)
39868685Sjhb	register struct vnode *vp;
39968685Sjhb	register struct mount *mp;
40068685Sjhb{
40168685Sjhb
40268685Sjhb	/*
40368685Sjhb	 * Delete from old mount point vnode list, if on one.
40468685Sjhb	 */
40568685Sjhb	if (vp->v_mount != NULL)
40668685Sjhb		LIST_REMOVE(vp, v_mntvnodes);
40768685Sjhb	/*
40868685Sjhb	 * Insert into list of vnodes for the new mount point, if available.
40968685Sjhb	 */
41068685Sjhb	if ((vp->v_mount = mp) == NULL)
41168685Sjhb		return;
41289192Sru	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
41389192Sru}
41489192Sru
41589192Sru/*
41689192Sru * Update outstanding I/O count and do wakeup if requested.
41789192Sru */
41868685Sjhbvoid
41968685Sjhbvwakeup(bp)
42068685Sjhb	register struct buf *bp;
42168685Sjhb{
42268685Sjhb	register struct vnode *vp;
42368685Sjhb
42468685Sjhb	bp->b_flags &= ~B_WRITEINPROG;
42568685Sjhb	if ((vp = bp->b_vp)) {
42689192Sru		vp->v_numoutput--;
42789192Sru		if (vp->v_numoutput < 0)
42889192Sru			panic("vwakeup: neg numoutput");
42989192Sru		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
43089192Sru			vp->v_flag &= ~VBWAIT;
43189192Sru			wakeup((caddr_t) &vp->v_numoutput);
432150628Sjhb		}
43389192Sru	}
43468685Sjhb}
43568685Sjhb
43668685Sjhb/*
437150628Sjhb * Flush out and invalidate all buffers associated with a vnode.
438150628Sjhb * Called with the underlying object locked.
439150628Sjhb */
440150628Sjhbint
441254617Sjkimvinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
442254617Sjkim	register struct vnode *vp;
443254617Sjkim	int flags;
444254617Sjkim	struct ucred *cred;
445254617Sjkim	struct proc *p;
446254617Sjkim	int slpflag, slptimeo;
447302108Ssephe{
448302108Ssephe	register struct buf *bp;
449302108Ssephe	struct buf *nbp, *blist;
450	int s, error;
451	vm_object_t object;
452
453	if (flags & V_SAVE) {
454		if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
455			return (error);
456		if (vp->v_dirtyblkhd.lh_first != NULL)
457			panic("vinvalbuf: dirty bufs");
458	}
459	for (;;) {
460		if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
461			while (blist && blist->b_lblkno < 0)
462				blist = blist->b_vnbufs.le_next;
463		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
464		    (flags & V_SAVEMETA))
465			while (blist && blist->b_lblkno < 0)
466				blist = blist->b_vnbufs.le_next;
467		if (!blist)
468			break;
469
470		for (bp = blist; bp; bp = nbp) {
471			nbp = bp->b_vnbufs.le_next;
472			if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
473				continue;
474			s = splbio();
475			if (bp->b_flags & B_BUSY) {
476				bp->b_flags |= B_WANTED;
477				error = tsleep((caddr_t) bp,
478				    slpflag | (PRIBIO + 1), "vinvalbuf",
479				    slptimeo);
480				splx(s);
481				if (error)
482					return (error);
483				break;
484			}
485			bremfree(bp);
486			bp->b_flags |= B_BUSY;
487			splx(s);
488			/*
489			 * XXX Since there are no node locks for NFS, I
490			 * believe there is a slight chance that a delayed
491			 * write will occur while sleeping just above, so
492			 * check for it.
493			 */
494			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
495				(void) VOP_BWRITE(bp);
496				break;
497			}
498			bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF);
499			brelse(bp);
500		}
501	}
502
503	s = splbio();
504	while (vp->v_numoutput > 0) {
505		vp->v_flag |= VBWAIT;
506		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
507	}
508	splx(s);
509
510	/*
511	 * Destroy the copy in the VM cache, too.
512	 */
513	object = vp->v_object;
514	if (object != NULL) {
515		vm_object_page_remove(object, 0, object->size,
516		    (flags & V_SAVE) ? TRUE : FALSE);
517	}
518	if (!(flags & V_SAVEMETA) &&
519	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
520		panic("vinvalbuf: flush failed");
521	return (0);
522}
523
524/*
525 * Associate a buffer with a vnode.
526 */
527void
528bgetvp(vp, bp)
529	register struct vnode *vp;
530	register struct buf *bp;
531{
532	int s;
533
534	if (bp->b_vp)
535		panic("bgetvp: not free");
536	VHOLD(vp);
537	bp->b_vp = vp;
538	if (vp->v_type == VBLK || vp->v_type == VCHR)
539		bp->b_dev = vp->v_rdev;
540	else
541		bp->b_dev = NODEV;
542	/*
543	 * Insert onto list for new vnode.
544	 */
545	s = splbio();
546	bufinsvn(bp, &vp->v_cleanblkhd);
547	splx(s);
548}
549
550/*
551 * Disassociate a buffer from a vnode.
552 */
553void
554brelvp(bp)
555	register struct buf *bp;
556{
557	struct vnode *vp;
558	int s;
559
560	if (bp->b_vp == (struct vnode *) 0)
561		panic("brelvp: NULL");
562	/*
563	 * Delete from old vnode list, if on one.
564	 */
565	s = splbio();
566	if (bp->b_vnbufs.le_next != NOLIST)
567		bufremvn(bp);
568	splx(s);
569
570	vp = bp->b_vp;
571	bp->b_vp = (struct vnode *) 0;
572	HOLDRELE(vp);
573}
574
575/*
576 * Associate a p-buffer with a vnode.
577 */
578void
579pbgetvp(vp, bp)
580	register struct vnode *vp;
581	register struct buf *bp;
582{
583	if (bp->b_vp)
584		panic("pbgetvp: not free");
585	VHOLD(vp);
586	bp->b_vp = vp;
587	if (vp->v_type == VBLK || vp->v_type == VCHR)
588		bp->b_dev = vp->v_rdev;
589	else
590		bp->b_dev = NODEV;
591}
592
593/*
594 * Disassociate a p-buffer from a vnode.
595 */
596void
597pbrelvp(bp)
598	register struct buf *bp;
599{
600	struct vnode *vp;
601
602	if (bp->b_vp == (struct vnode *) 0)
603		panic("brelvp: NULL");
604
605	vp = bp->b_vp;
606	bp->b_vp = (struct vnode *) 0;
607	HOLDRELE(vp);
608}
609
610/*
611 * Reassign a buffer from one vnode to another.
612 * Used to assign file specific control information
613 * (indirect blocks) to the vnode to which they belong.
614 */
615void
616reassignbuf(bp, newvp)
617	register struct buf *bp;
618	register struct vnode *newvp;
619{
620	register struct buflists *listheadp;
621
622	if (newvp == NULL) {
623		printf("reassignbuf: NULL");
624		return;
625	}
626	/*
627	 * Delete from old vnode list, if on one.
628	 */
629	if (bp->b_vnbufs.le_next != NOLIST)
630		bufremvn(bp);
631	/*
632	 * If dirty, put on list of dirty buffers; otherwise insert onto list
633	 * of clean buffers.
634	 */
635	if (bp->b_flags & B_DELWRI) {
636		struct buf *tbp;
637
638		tbp = newvp->v_dirtyblkhd.lh_first;
639		if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
640			bufinsvn(bp, &newvp->v_dirtyblkhd);
641		} else {
642			while (tbp->b_vnbufs.le_next && (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
643				tbp = tbp->b_vnbufs.le_next;
644			}
645			LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
646		}
647	} else {
648		listheadp = &newvp->v_cleanblkhd;
649		bufinsvn(bp, listheadp);
650	}
651}
652
653/*
654 * Create a vnode for a block device.
655 * Used for root filesystem, argdev, and swap areas.
656 * Also used for memory file system special devices.
657 */
658int
659bdevvp(dev, vpp)
660	dev_t dev;
661	struct vnode **vpp;
662{
663	register struct vnode *vp;
664	struct vnode *nvp;
665	int error;
666
667	if (dev == NODEV)
668		return (0);
669	error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
670	if (error) {
671		*vpp = 0;
672		return (error);
673	}
674	vp = nvp;
675	vp->v_type = VBLK;
676	if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
677		vput(vp);
678		vp = nvp;
679	}
680	*vpp = vp;
681	return (0);
682}
683
684/*
685 * Check to see if the new vnode represents a special device
686 * for which we already have a vnode (either because of
687 * bdevvp() or because of a different vnode representing
688 * the same block device). If such an alias exists, deallocate
689 * the existing contents and return the aliased vnode. The
690 * caller is responsible for filling it with its new contents.
691 */
692struct vnode *
693checkalias(nvp, nvp_rdev, mp)
694	register struct vnode *nvp;
695	dev_t nvp_rdev;
696	struct mount *mp;
697{
698	register struct vnode *vp;
699	struct vnode **vpp;
700
701	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
702		return (NULLVP);
703
704	vpp = &speclisth[SPECHASH(nvp_rdev)];
705loop:
706	for (vp = *vpp; vp; vp = vp->v_specnext) {
707		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
708			continue;
709		/*
710		 * Alias, but not in use, so flush it out.
711		 */
712		if (vp->v_usecount == 0) {
713			vgone(vp);
714			goto loop;
715		}
716		if (vget(vp, 1))
717			goto loop;
718		break;
719	}
720	if (vp == NULL || vp->v_tag != VT_NON) {
721		MALLOC(nvp->v_specinfo, struct specinfo *,
722		    sizeof(struct specinfo), M_VNODE, M_WAITOK);
723		nvp->v_rdev = nvp_rdev;
724		nvp->v_hashchain = vpp;
725		nvp->v_specnext = *vpp;
726		nvp->v_specflags = 0;
727		*vpp = nvp;
728		if (vp != NULL) {
729			nvp->v_flag |= VALIASED;
730			vp->v_flag |= VALIASED;
731			vput(vp);
732		}
733		return (NULLVP);
734	}
735	VOP_UNLOCK(vp);
736	vclean(vp, 0);
737	vp->v_op = nvp->v_op;
738	vp->v_tag = nvp->v_tag;
739	nvp->v_type = VNON;
740	insmntque(vp, mp);
741	return (vp);
742}
743
744/*
745 * Grab a particular vnode from the free list, increment its
746 * reference count and lock it. The vnode lock bit is set the
747 * vnode is being eliminated in vgone. The process is awakened
748 * when the transition is completed, and an error returned to
749 * indicate that the vnode is no longer usable (possibly having
750 * been changed to a new file system type).
751 */
752int
753vget(vp, lockflag)
754	register struct vnode *vp;
755	int lockflag;
756{
757
758	/*
759	 * If the vnode is in the process of being cleaned out for another
760	 * use, we wait for the cleaning to finish and then return failure.
761	 * Cleaning is determined either by checking that the VXLOCK flag is
762	 * set, or that the use count is zero with the back pointer set to
763	 * show that it has been removed from the free list by getnewvnode.
764	 * The VXLOCK flag may not have been set yet because vclean is blocked
765	 * in the VOP_LOCK call waiting for the VOP_INACTIVE to complete.
766	 */
767	if ((vp->v_flag & VXLOCK) ||
768	    (vp->v_usecount == 0 &&
769		vp->v_freelist.tqe_prev == (struct vnode **) 0xdeadb)) {
770		vp->v_flag |= VXWANT;
771		(void) tsleep((caddr_t) vp, PINOD, "vget", 0);
772		return (1);
773	}
774	if (vp->v_usecount == 0) {
775		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
776		freevnodes--;
777	}
778	vp->v_usecount++;
779	if (lockflag)
780		VOP_LOCK(vp);
781	return (0);
782}
783
784/*
785 * Vnode reference, just increment the count
786 */
787void
788vref(vp)
789	struct vnode *vp;
790{
791
792	if (vp->v_usecount <= 0)
793		panic("vref used where vget required");
794	vp->v_usecount++;
795}
796
797/*
798 * vput(), just unlock and vrele()
799 */
800void
801vput(vp)
802	register struct vnode *vp;
803{
804
805	VOP_UNLOCK(vp);
806	vrele(vp);
807}
808
809/*
810 * Vnode release.
811 * If count drops to zero, call inactive routine and return to freelist.
812 */
813void
814vrele(vp)
815	register struct vnode *vp;
816{
817
818#ifdef DIAGNOSTIC
819	if (vp == NULL)
820		panic("vrele: null vp");
821#endif
822	vp->v_usecount--;
823	if (vp->v_usecount > 0)
824		return;
825#ifdef DIAGNOSTIC
826	if (vp->v_usecount < 0 /* || vp->v_writecount < 0 */ ) {
827		vprint("vrele: negative ref count", vp);
828		panic("vrele: negative reference cnt");
829	}
830#endif
831	if (vp->v_flag & VAGE) {
832		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
833		vp->v_flag &= ~VAGE;
834	} else {
835		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
836	}
837	freevnodes++;
838
839	VOP_INACTIVE(vp);
840}
841
842/*
843 * Page or buffer structure gets a reference.
844 */
845void
846vhold(vp)
847	register struct vnode *vp;
848{
849
850	vp->v_holdcnt++;
851}
852
853/*
854 * Page or buffer structure frees a reference.
855 */
856void
857holdrele(vp)
858	register struct vnode *vp;
859{
860
861	if (vp->v_holdcnt <= 0)
862		panic("holdrele: holdcnt");
863	vp->v_holdcnt--;
864}
865
866/*
867 * Remove any vnodes in the vnode table belonging to mount point mp.
868 *
869 * If MNT_NOFORCE is specified, there should not be any active ones,
870 * return error if any are found (nb: this is a user error, not a
871 * system error). If MNT_FORCE is specified, detach any active vnodes
872 * that are found.
873 */
874#ifdef DIAGNOSTIC
875int busyprt = 0;		/* print out busy vnodes */
876struct ctldebug debug1 = {"busyprt", &busyprt};
877
878#endif
879
880int
881vflush(mp, skipvp, flags)
882	struct mount *mp;
883	struct vnode *skipvp;
884	int flags;
885{
886	register struct vnode *vp, *nvp;
887	int busy = 0;
888
889	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
890		panic("vflush: not busy");
891loop:
892	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
893		/*
894		 * Make sure this vnode wasn't reclaimed in getnewvnode().
895		 * Start over if it has (it won't be on the list anymore).
896		 */
897		if (vp->v_mount != mp)
898			goto loop;
899		nvp = vp->v_mntvnodes.le_next;
900		/*
901		 * Skip over a selected vnode.
902		 */
903		if (vp == skipvp)
904			continue;
905		/*
906		 * Skip over a vnodes marked VSYSTEM.
907		 */
908		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
909			continue;
910		/*
911		 * If WRITECLOSE is set, only flush out regular file vnodes
912		 * open for writing.
913		 */
914		if ((flags & WRITECLOSE) &&
915		    (vp->v_writecount == 0 || vp->v_type != VREG))
916			continue;
917		/*
918		 * With v_usecount == 0, all we need to do is clear out the
919		 * vnode data structures and we are done.
920		 */
921		if (vp->v_usecount == 0) {
922			vgone(vp);
923			continue;
924		}
925		/*
926		 * If FORCECLOSE is set, forcibly close the vnode. For block
927		 * or character devices, revert to an anonymous device. For
928		 * all other files, just kill them.
929		 */
930		if (flags & FORCECLOSE) {
931			if (vp->v_type != VBLK && vp->v_type != VCHR) {
932				vgone(vp);
933			} else {
934				vclean(vp, 0);
935				vp->v_op = spec_vnodeop_p;
936				insmntque(vp, (struct mount *) 0);
937			}
938			continue;
939		}
940#ifdef DIAGNOSTIC
941		if (busyprt)
942			vprint("vflush: busy vnode", vp);
943#endif
944		busy++;
945	}
946	if (busy)
947		return (EBUSY);
948	return (0);
949}
950
951/*
952 * Disassociate the underlying file system from a vnode.
953 */
954void
955vclean(vp, flags)
956	register struct vnode *vp;
957	int flags;
958{
959	int active;
960
961	/*
962	 * Check to see if the vnode is in use. If so we have to reference it
963	 * before we clean it out so that its count cannot fall to zero and
964	 * generate a race against ourselves to recycle it.
965	 */
966	if ((active = vp->v_usecount))
967		VREF(vp);
968	/*
969	 * Even if the count is zero, the VOP_INACTIVE routine may still have
970	 * the object locked while it cleans it out. The VOP_LOCK ensures that
971	 * the VOP_INACTIVE routine is done with its work. For active vnodes,
972	 * it ensures that no other activity can occur while the underlying
973	 * object is being cleaned out.
974	 */
975	VOP_LOCK(vp);
976	/*
977	 * Prevent the vnode from being recycled or brought into use while we
978	 * clean it out.
979	 */
980	if (vp->v_flag & VXLOCK)
981		panic("vclean: deadlock");
982	vp->v_flag |= VXLOCK;
983	/*
984	 * Clean out any buffers associated with the vnode.
985	 */
986	if (flags & DOCLOSE)
987		vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
988	/*
989	 * Any other processes trying to obtain this lock must first wait for
990	 * VXLOCK to clear, then call the new lock operation.
991	 */
992	VOP_UNLOCK(vp);
993	/*
994	 * If purging an active vnode, it must be closed and deactivated
995	 * before being reclaimed.
996	 */
997	if (active) {
998		if (flags & DOCLOSE)
999			VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
1000		VOP_INACTIVE(vp);
1001	}
1002	/*
1003	 * Reclaim the vnode.
1004	 */
1005	if (VOP_RECLAIM(vp))
1006		panic("vclean: cannot reclaim");
1007	if (active)
1008		vrele(vp);
1009
1010	/*
1011	 * Done with purge, notify sleepers of the grim news.
1012	 */
1013	vp->v_op = dead_vnodeop_p;
1014	vp->v_tag = VT_NON;
1015	vp->v_flag &= ~VXLOCK;
1016	if (vp->v_flag & VXWANT) {
1017		vp->v_flag &= ~VXWANT;
1018		wakeup((caddr_t) vp);
1019	}
1020}
1021
1022/*
1023 * Eliminate all activity associated with  the requested vnode
1024 * and with all vnodes aliased to the requested vnode.
1025 */
1026void
1027vgoneall(vp)
1028	register struct vnode *vp;
1029{
1030	register struct vnode *vq;
1031
1032	if (vp->v_flag & VALIASED) {
1033		/*
1034		 * If a vgone (or vclean) is already in progress, wait until
1035		 * it is done and return.
1036		 */
1037		if (vp->v_flag & VXLOCK) {
1038			vp->v_flag |= VXWANT;
1039			(void) tsleep((caddr_t) vp, PINOD, "vgall", 0);
1040			return;
1041		}
1042		/*
1043		 * Ensure that vp will not be vgone'd while we are eliminating
1044		 * its aliases.
1045		 */
1046		vp->v_flag |= VXLOCK;
1047		while (vp->v_flag & VALIASED) {
1048			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1049				if (vq->v_rdev != vp->v_rdev ||
1050				    vq->v_type != vp->v_type || vp == vq)
1051					continue;
1052				vgone(vq);
1053				break;
1054			}
1055		}
1056		/*
1057		 * Remove the lock so that vgone below will really eliminate
1058		 * the vnode after which time vgone will awaken any sleepers.
1059		 */
1060		vp->v_flag &= ~VXLOCK;
1061	}
1062	vgone(vp);
1063}
1064
1065/*
1066 * Eliminate all activity associated with a vnode
1067 * in preparation for reuse.
1068 */
1069void
1070vgone(vp)
1071	register struct vnode *vp;
1072{
1073	register struct vnode *vq;
1074	struct vnode *vx;
1075
1076	/*
1077	 * If a vgone (or vclean) is already in progress, wait until it is
1078	 * done and return.
1079	 */
1080	if (vp->v_flag & VXLOCK) {
1081		vp->v_flag |= VXWANT;
1082		(void) tsleep((caddr_t) vp, PINOD, "vgone", 0);
1083		return;
1084	}
1085	/*
1086	 * Clean out the filesystem specific data.
1087	 */
1088	vclean(vp, DOCLOSE);
1089	/*
1090	 * Delete from old mount point vnode list, if on one.
1091	 */
1092	if (vp->v_mount != NULL) {
1093		LIST_REMOVE(vp, v_mntvnodes);
1094		vp->v_mount = NULL;
1095	}
1096	/*
1097	 * If special device, remove it from special device alias list.
1098	 */
1099	if (vp->v_type == VBLK || vp->v_type == VCHR) {
1100		if (*vp->v_hashchain == vp) {
1101			*vp->v_hashchain = vp->v_specnext;
1102		} else {
1103			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1104				if (vq->v_specnext != vp)
1105					continue;
1106				vq->v_specnext = vp->v_specnext;
1107				break;
1108			}
1109			if (vq == NULL)
1110				panic("missing bdev");
1111		}
1112		if (vp->v_flag & VALIASED) {
1113			vx = NULL;
1114			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1115				if (vq->v_rdev != vp->v_rdev ||
1116				    vq->v_type != vp->v_type)
1117					continue;
1118				if (vx)
1119					break;
1120				vx = vq;
1121			}
1122			if (vx == NULL)
1123				panic("missing alias");
1124			if (vq == NULL)
1125				vx->v_flag &= ~VALIASED;
1126			vp->v_flag &= ~VALIASED;
1127		}
1128		FREE(vp->v_specinfo, M_VNODE);
1129		vp->v_specinfo = NULL;
1130	}
1131	/*
1132	 * If it is on the freelist and not already at the head, move it to
1133	 * the head of the list. The test of the back pointer and the
1134	 * reference count of zero is because it will be removed from the free
1135	 * list by getnewvnode, but will not have its reference count
1136	 * incremented until after calling vgone. If the reference count were
1137	 * incremented first, vgone would (incorrectly) try to close the
1138	 * previous instance of the underlying object. So, the back pointer is
1139	 * explicitly set to `0xdeadb' in getnewvnode after removing it from
1140	 * the freelist to ensure that we do not try to move it here.
1141	 */
1142	if (vp->v_usecount == 0 &&
1143	    vp->v_freelist.tqe_prev != (struct vnode **) 0xdeadb &&
1144	    vnode_free_list.tqh_first != vp) {
1145		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1146		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1147	}
1148	vp->v_type = VBAD;
1149}
1150
1151/*
1152 * Lookup a vnode by device number.
1153 */
1154int
1155vfinddev(dev, type, vpp)
1156	dev_t dev;
1157	enum vtype type;
1158	struct vnode **vpp;
1159{
1160	register struct vnode *vp;
1161
1162	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1163		if (dev != vp->v_rdev || type != vp->v_type)
1164			continue;
1165		*vpp = vp;
1166		return (1);
1167	}
1168	return (0);
1169}
1170
1171/*
1172 * Calculate the total number of references to a special device.
1173 */
1174int
1175vcount(vp)
1176	register struct vnode *vp;
1177{
1178	register struct vnode *vq, *vnext;
1179	int count;
1180
1181loop:
1182	if ((vp->v_flag & VALIASED) == 0)
1183		return (vp->v_usecount);
1184	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1185		vnext = vq->v_specnext;
1186		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1187			continue;
1188		/*
1189		 * Alias, but not in use, so flush it out.
1190		 */
1191		if (vq->v_usecount == 0 && vq != vp) {
1192			vgone(vq);
1193			goto loop;
1194		}
1195		count += vq->v_usecount;
1196	}
1197	return (count);
1198}
1199
1200/*
1201 * Print out a description of a vnode.
1202 */
1203static char *typename[] =
1204{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
1205
1206void
1207vprint(label, vp)
1208	char *label;
1209	register struct vnode *vp;
1210{
1211	char buf[64];
1212
1213	if (label != NULL)
1214		printf("%s: ", label);
1215	printf("type %s, usecount %d, writecount %d, refcount %ld,",
1216	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1217	    vp->v_holdcnt);
1218	buf[0] = '\0';
1219	if (vp->v_flag & VROOT)
1220		strcat(buf, "|VROOT");
1221	if (vp->v_flag & VTEXT)
1222		strcat(buf, "|VTEXT");
1223	if (vp->v_flag & VSYSTEM)
1224		strcat(buf, "|VSYSTEM");
1225	if (vp->v_flag & VXLOCK)
1226		strcat(buf, "|VXLOCK");
1227	if (vp->v_flag & VXWANT)
1228		strcat(buf, "|VXWANT");
1229	if (vp->v_flag & VBWAIT)
1230		strcat(buf, "|VBWAIT");
1231	if (vp->v_flag & VALIASED)
1232		strcat(buf, "|VALIASED");
1233	if (buf[0] != '\0')
1234		printf(" flags (%s)", &buf[1]);
1235	if (vp->v_data == NULL) {
1236		printf("\n");
1237	} else {
1238		printf("\n\t");
1239		VOP_PRINT(vp);
1240	}
1241}
1242
1243#ifdef DDB
1244/*
1245 * List all of the locked vnodes in the system.
1246 * Called when debugging the kernel.
1247 */
1248void
1249printlockedvnodes()
1250{
1251	register struct mount *mp;
1252	register struct vnode *vp;
1253
1254	printf("Locked vnodes\n");
1255	for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
1256		for (vp = mp->mnt_vnodelist.lh_first;
1257		    vp != NULL;
1258		    vp = vp->v_mntvnodes.le_next)
1259			if (VOP_ISLOCKED(vp))
1260				vprint((char *) 0, vp);
1261	}
1262}
1263#endif
1264
1265int kinfo_vdebug = 1;
1266int kinfo_vgetfailed;
1267
1268#define KINFO_VNODESLOP	10
1269/*
1270 * Dump vnode list (via sysctl).
1271 * Copyout address of vnode followed by vnode.
1272 */
1273/* ARGSUSED */
1274int
1275sysctl_vnode(where, sizep)
1276	char *where;
1277	size_t *sizep;
1278{
1279	register struct mount *mp, *nmp;
1280	struct vnode *vp;
1281	register char *bp = where, *savebp;
1282	char *ewhere;
1283	int error;
1284
1285#define VPTRSZ	sizeof (struct vnode *)
1286#define VNODESZ	sizeof (struct vnode)
1287	if (where == NULL) {
1288		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1289		return (0);
1290	}
1291	ewhere = where + *sizep;
1292
1293	for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
1294		nmp = mp->mnt_list.tqe_next;
1295		if (vfs_busy(mp))
1296			continue;
1297		savebp = bp;
1298again:
1299		for (vp = mp->mnt_vnodelist.lh_first;
1300		    vp != NULL;
1301		    vp = vp->v_mntvnodes.le_next) {
1302			/*
1303			 * Check that the vp is still associated with this
1304			 * filesystem.  RACE: could have been recycled onto
1305			 * the same filesystem.
1306			 */
1307			if (vp->v_mount != mp) {
1308				if (kinfo_vdebug)
1309					printf("kinfo: vp changed\n");
1310				bp = savebp;
1311				goto again;
1312			}
1313			if (bp + VPTRSZ + VNODESZ > ewhere) {
1314				*sizep = bp - where;
1315				return (ENOMEM);
1316			}
1317			if ((error = copyout((caddr_t) &vp, bp, VPTRSZ)) ||
1318			    (error = copyout((caddr_t) vp, bp + VPTRSZ, VNODESZ)))
1319				return (error);
1320			bp += VPTRSZ + VNODESZ;
1321		}
1322		vfs_unbusy(mp);
1323	}
1324
1325	*sizep = bp - where;
1326	return (0);
1327}
1328
1329/*
1330 * Check to see if a filesystem is mounted on a block device.
1331 */
1332int
1333vfs_mountedon(vp)
1334	register struct vnode *vp;
1335{
1336	register struct vnode *vq;
1337
1338	if (vp->v_specflags & SI_MOUNTEDON)
1339		return (EBUSY);
1340	if (vp->v_flag & VALIASED) {
1341		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1342			if (vq->v_rdev != vp->v_rdev ||
1343			    vq->v_type != vp->v_type)
1344				continue;
1345			if (vq->v_specflags & SI_MOUNTEDON)
1346				return (EBUSY);
1347		}
1348	}
1349	return (0);
1350}
1351
1352/*
1353 * Build hash lists of net addresses and hang them off the mount point.
1354 * Called by ufs_mount() to set up the lists of export addresses.
1355 */
1356static int
1357vfs_hang_addrlist(mp, nep, argp)
1358	struct mount *mp;
1359	struct netexport *nep;
1360	struct export_args *argp;
1361{
1362	register struct netcred *np;
1363	register struct radix_node_head *rnh;
1364	register int i;
1365	struct radix_node *rn;
1366	struct sockaddr *saddr, *smask = 0;
1367	struct domain *dom;
1368	int error;
1369
1370	if (argp->ex_addrlen == 0) {
1371		if (mp->mnt_flag & MNT_DEFEXPORTED)
1372			return (EPERM);
1373		np = &nep->ne_defexported;
1374		np->netc_exflags = argp->ex_flags;
1375		np->netc_anon = argp->ex_anon;
1376		np->netc_anon.cr_ref = 1;
1377		mp->mnt_flag |= MNT_DEFEXPORTED;
1378		return (0);
1379	}
1380	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1381	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
1382	bzero((caddr_t) np, i);
1383	saddr = (struct sockaddr *) (np + 1);
1384	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
1385		goto out;
1386	if (saddr->sa_len > argp->ex_addrlen)
1387		saddr->sa_len = argp->ex_addrlen;
1388	if (argp->ex_masklen) {
1389		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
1390		error = copyin(argp->ex_addr, (caddr_t) smask, argp->ex_masklen);
1391		if (error)
1392			goto out;
1393		if (smask->sa_len > argp->ex_masklen)
1394			smask->sa_len = argp->ex_masklen;
1395	}
1396	i = saddr->sa_family;
1397	if ((rnh = nep->ne_rtable[i]) == 0) {
1398		/*
1399		 * Seems silly to initialize every AF when most are not used,
1400		 * do so on demand here
1401		 */
1402		for (dom = domains; dom; dom = dom->dom_next)
1403			if (dom->dom_family == i && dom->dom_rtattach) {
1404				dom->dom_rtattach((void **) &nep->ne_rtable[i],
1405				    dom->dom_rtoffset);
1406				break;
1407			}
1408		if ((rnh = nep->ne_rtable[i]) == 0) {
1409			error = ENOBUFS;
1410			goto out;
1411		}
1412	}
1413	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
1414	    np->netc_rnodes);
1415	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
1416		error = EPERM;
1417		goto out;
1418	}
1419	np->netc_exflags = argp->ex_flags;
1420	np->netc_anon = argp->ex_anon;
1421	np->netc_anon.cr_ref = 1;
1422	return (0);
1423out:
1424	free(np, M_NETADDR);
1425	return (error);
1426}
1427
1428/* ARGSUSED */
1429static int
1430vfs_free_netcred(rn, w)
1431	struct radix_node *rn;
1432	caddr_t w;
1433{
1434	register struct radix_node_head *rnh = (struct radix_node_head *) w;
1435
1436	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
1437	free((caddr_t) rn, M_NETADDR);
1438	return (0);
1439}
1440
1441/*
1442 * Free the net address hash lists that are hanging off the mount points.
1443 */
1444static void
1445vfs_free_addrlist(nep)
1446	struct netexport *nep;
1447{
1448	register int i;
1449	register struct radix_node_head *rnh;
1450
1451	for (i = 0; i <= AF_MAX; i++)
1452		if ((rnh = nep->ne_rtable[i])) {
1453			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
1454			    (caddr_t) rnh);
1455			free((caddr_t) rnh, M_RTABLE);
1456			nep->ne_rtable[i] = 0;
1457		}
1458}
1459
1460int
1461vfs_export(mp, nep, argp)
1462	struct mount *mp;
1463	struct netexport *nep;
1464	struct export_args *argp;
1465{
1466	int error;
1467
1468	if (argp->ex_flags & MNT_DELEXPORT) {
1469		vfs_free_addrlist(nep);
1470		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1471	}
1472	if (argp->ex_flags & MNT_EXPORTED) {
1473		if ((error = vfs_hang_addrlist(mp, nep, argp)))
1474			return (error);
1475		mp->mnt_flag |= MNT_EXPORTED;
1476	}
1477	return (0);
1478}
1479
1480struct netcred *
1481vfs_export_lookup(mp, nep, nam)
1482	register struct mount *mp;
1483	struct netexport *nep;
1484	struct mbuf *nam;
1485{
1486	register struct netcred *np;
1487	register struct radix_node_head *rnh;
1488	struct sockaddr *saddr;
1489
1490	np = NULL;
1491	if (mp->mnt_flag & MNT_EXPORTED) {
1492		/*
1493		 * Lookup in the export list first.
1494		 */
1495		if (nam != NULL) {
1496			saddr = mtod(nam, struct sockaddr *);
1497			rnh = nep->ne_rtable[saddr->sa_family];
1498			if (rnh != NULL) {
1499				np = (struct netcred *)
1500				    (*rnh->rnh_matchaddr) ((caddr_t) saddr,
1501				    rnh);
1502				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1503					np = NULL;
1504			}
1505		}
1506		/*
1507		 * If no address match, use the default if it exists.
1508		 */
1509		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1510			np = &nep->ne_defexported;
1511	}
1512	return (np);
1513}
1514
1515
1516/*
1517 * perform msync on all vnodes under a mount point
1518 * the mount point must be locked.
1519 */
1520void
1521vfs_msync(struct mount *mp, int flags) {
1522	struct vnode *vp;
1523loop:
1524	for (vp = mp->mnt_vnodelist.lh_first;
1525	     vp != NULL;
1526	     vp = vp->v_mntvnodes.le_next) {
1527
1528		if (vp->v_mount != mp)
1529			goto loop;
1530		if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))
1531			continue;
1532		if (vp->v_object &&
1533		   (((vm_object_t) vp->v_object)->flags & OBJ_WRITEABLE)) {
1534			vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE);
1535		}
1536	}
1537}
1538