vfs_subr.c revision 29323
11541Srgrimes/*
21541Srgrimes * Copyright (c) 1989, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes * (c) UNIX System Laboratories, Inc.
51541Srgrimes * All or some portions of this file are derived from material licensed
61541Srgrimes * to the University of California by American Telephone and Telegraph
71541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with
81541Srgrimes * the permission of UNIX System Laboratories, Inc.
91541Srgrimes *
101541Srgrimes * Redistribution and use in source and binary forms, with or without
111541Srgrimes * modification, are permitted provided that the following conditions
121541Srgrimes * are met:
131541Srgrimes * 1. Redistributions of source code must retain the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer.
151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer in the
171541Srgrimes *    documentation and/or other materials provided with the distribution.
181541Srgrimes * 3. All advertising materials mentioning features or use of this software
191541Srgrimes *    must display the following acknowledgement:
201541Srgrimes *	This product includes software developed by the University of
211541Srgrimes *	California, Berkeley and its contributors.
221541Srgrimes * 4. Neither the name of the University nor the names of its contributors
231541Srgrimes *    may be used to endorse or promote products derived from this software
241541Srgrimes *    without specific prior written permission.
251541Srgrimes *
261541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
271541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
281541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
291541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
301541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
311541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
321541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
331541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3412662Sdg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
351541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
361541Srgrimes * SUCH DAMAGE.
371541Srgrimes *
381541Srgrimes *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
391541Srgrimes * $Id: vfs_subr.c,v 1.101 1997/09/07 16:20:46 bde Exp $
405455Sdg */
4112662Sdg
421541Srgrimes/*
431541Srgrimes * External virtual filesystem routines
441541Srgrimes */
451541Srgrimes#include "opt_ddb.h"
461541Srgrimes#include "opt_devfs.h"
471541Srgrimes
481541Srgrimes#include <sys/param.h>
491541Srgrimes#include <sys/systm.h>
501541Srgrimes#include <sys/kernel.h>
511541Srgrimes#include <sys/proc.h>
521541Srgrimes#include <sys/mount.h>
531541Srgrimes#include <sys/vnode.h>
541541Srgrimes#include <sys/stat.h>
5512642Sbde#include <sys/buf.h>
5612642Sbde#include <sys/malloc.h>
5712642Sbde#include <sys/domain.h>
5812642Sbde#include <sys/dirent.h>
5912642Sbde
601541Srgrimes#include <machine/limits.h>
615455Sdg
6212642Sbde#include <vm/vm.h>
631541Srgrimes#include <vm/vm_object.h>
645455Sdg#include <vm/vm_extern.h>
65#include <vm/vnode_pager.h>
66#include <sys/sysctl.h>
67
68#include <miscfs/specfs/specdev.h>
69
70#ifdef DDB
71extern void	printlockedvnodes __P((void));
72#endif
73static void	vclean __P((struct vnode *vp, int flags, struct proc *p));
74static void	vgonel __P((struct vnode *vp, struct proc *p));
75unsigned long	numvnodes;
76SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
77static void	vputrele __P((struct vnode *vp, int put));
78
79enum vtype iftovt_tab[16] = {
80	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
81	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
82};
83int vttoif_tab[9] = {
84	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
85	S_IFSOCK, S_IFIFO, S_IFMT,
86};
87
88/*
89 * Insq/Remq for the vnode usage lists.
90 */
91#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
92#define	bufremvn(bp) {							\
93	LIST_REMOVE(bp, b_vnbufs);					\
94	(bp)->b_vnbufs.le_next = NOLIST;				\
95}
96TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
97static u_long freevnodes = 0;
98SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
99
100struct mntlist mountlist;	/* mounted filesystem list */
101struct simplelock mountlist_slock;
102static struct simplelock mntid_slock;
103struct simplelock mntvnode_slock;
104struct simplelock vnode_free_list_slock;
105static struct simplelock spechash_slock;
106struct nfs_public nfs_pub;	/* publicly exported FS */
107
108int desiredvnodes;
109SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
110
111static void	vfs_free_addrlist __P((struct netexport *nep));
112static int	vfs_free_netcred __P((struct radix_node *rn, void *w));
113static int	vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
114				       struct export_args *argp));
115
116/*
117 * Initialize the vnode management data structures.
118 */
119void
120vntblinit()
121{
122
123	desiredvnodes = maxproc + vm_object_cache_max;
124	simple_lock_init(&mntvnode_slock);
125	simple_lock_init(&mntid_slock);
126	simple_lock_init(&spechash_slock);
127	TAILQ_INIT(&vnode_free_list);
128	simple_lock_init(&vnode_free_list_slock);
129	CIRCLEQ_INIT(&mountlist);
130}
131
132/*
133 * Mark a mount point as busy. Used to synchronize access and to delay
134 * unmounting. Interlock is not released on failure.
135 */
136int
137vfs_busy(mp, flags, interlkp, p)
138	struct mount *mp;
139	int flags;
140	struct simplelock *interlkp;
141	struct proc *p;
142{
143	int lkflags;
144
145	if (mp->mnt_flag & MNT_UNMOUNT) {
146		if (flags & LK_NOWAIT)
147			return (ENOENT);
148		mp->mnt_flag |= MNT_MWAIT;
149		if (interlkp) {
150			simple_unlock(interlkp);
151		}
152		/*
153		 * Since all busy locks are shared except the exclusive
154		 * lock granted when unmounting, the only place that a
155		 * wakeup needs to be done is at the release of the
156		 * exclusive lock at the end of dounmount.
157		 */
158		tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
159		if (interlkp) {
160			simple_lock(interlkp);
161		}
162		return (ENOENT);
163	}
164	lkflags = LK_SHARED;
165	if (interlkp)
166		lkflags |= LK_INTERLOCK;
167	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
168		panic("vfs_busy: unexpected lock failure");
169	return (0);
170}
171
172/*
173 * Free a busy filesystem.
174 */
175void
176vfs_unbusy(mp, p)
177	struct mount *mp;
178	struct proc *p;
179{
180
181	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
182}
183
184/*
185 * Lookup a filesystem type, and if found allocate and initialize
186 * a mount structure for it.
187 *
188 * Devname is usually updated by mount(8) after booting.
189 */
190int
191vfs_rootmountalloc(fstypename, devname, mpp)
192	char *fstypename;
193	char *devname;
194	struct mount **mpp;
195{
196	struct proc *p = curproc;	/* XXX */
197	struct vfsconf *vfsp;
198	struct mount *mp;
199
200	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
201		if (!strcmp(vfsp->vfc_name, fstypename))
202			break;
203	if (vfsp == NULL)
204		return (ENODEV);
205	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
206	bzero((char *)mp, (u_long)sizeof(struct mount));
207	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
208	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
209	LIST_INIT(&mp->mnt_vnodelist);
210	mp->mnt_vfc = vfsp;
211	mp->mnt_op = vfsp->vfc_vfsops;
212	mp->mnt_flag = MNT_RDONLY;
213	mp->mnt_vnodecovered = NULLVP;
214	vfsp->vfc_refcount++;
215	mp->mnt_stat.f_type = vfsp->vfc_typenum;
216	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
217	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
218	mp->mnt_stat.f_mntonname[0] = '/';
219	mp->mnt_stat.f_mntonname[1] = 0;
220	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
221	*mpp = mp;
222	return (0);
223}
224
225/*
226 * Find an appropriate filesystem to use for the root. If a filesystem
227 * has not been preselected, walk through the list of known filesystems
228 * trying those that have mountroot routines, and try them until one
229 * works or we have tried them all.
230 */
231#ifdef notdef	/* XXX JH */
232int
233lite2_vfs_mountroot(void)
234{
235	struct vfsconf *vfsp;
236	extern int (*lite2_mountroot)(void);
237	int error;
238
239	if (lite2_mountroot != NULL)
240		return ((*lite2_mountroot)());
241	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
242		if (vfsp->vfc_mountroot == NULL)
243			continue;
244		if ((error = (*vfsp->vfc_mountroot)()) == 0)
245			return (0);
246		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
247	}
248	return (ENODEV);
249}
250#endif
251
252/*
253 * Lookup a mount point by filesystem identifier.
254 */
255struct mount *
256vfs_getvfs(fsid)
257	fsid_t *fsid;
258{
259	register struct mount *mp;
260
261	simple_lock(&mountlist_slock);
262	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
263	    mp = mp->mnt_list.cqe_next) {
264		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
265		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
266			simple_unlock(&mountlist_slock);
267			return (mp);
268	    }
269	}
270	simple_unlock(&mountlist_slock);
271	return ((struct mount *) 0);
272}
273
274/*
275 * Get a new unique fsid
276 */
277void
278vfs_getnewfsid(mp)
279	struct mount *mp;
280{
281	static u_short xxxfs_mntid;
282
283	fsid_t tfsid;
284	int mtype;
285
286	simple_lock(&mntid_slock);
287	mtype = mp->mnt_vfc->vfc_typenum;
288	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
289	mp->mnt_stat.f_fsid.val[1] = mtype;
290	if (xxxfs_mntid == 0)
291		++xxxfs_mntid;
292	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
293	tfsid.val[1] = mtype;
294	if (mountlist.cqh_first != (void *)&mountlist) {
295		while (vfs_getvfs(&tfsid)) {
296			tfsid.val[0]++;
297			xxxfs_mntid++;
298		}
299	}
300	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
301	simple_unlock(&mntid_slock);
302}
303
304/*
305 * Set vnode attributes to VNOVAL
306 */
307void
308vattr_null(vap)
309	register struct vattr *vap;
310{
311
312	vap->va_type = VNON;
313	vap->va_size = VNOVAL;
314	vap->va_bytes = VNOVAL;
315	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
316	    vap->va_fsid = vap->va_fileid =
317	    vap->va_blocksize = vap->va_rdev =
318	    vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
319	    vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
320	    vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
321	    vap->va_flags = vap->va_gen = VNOVAL;
322	vap->va_vaflags = 0;
323}
324
325/*
326 * Routines having to do with the management of the vnode table.
327 */
328extern vop_t **dead_vnodeop_p;
329
330/*
331 * Return the next vnode from the free list.
332 */
333int
334getnewvnode(tag, mp, vops, vpp)
335	enum vtagtype tag;
336	struct mount *mp;
337	vop_t **vops;
338	struct vnode **vpp;
339{
340	struct proc *p = curproc;	/* XXX */
341	struct vnode *vp;
342
343	/*
344	 * We take the least recently used vnode from the freelist
345	 * if we can get it and it has no cached pages, and no
346	 * namecache entries are relative to it.
347	 * Otherwise we allocate a new vnode
348	 */
349
350	simple_lock(&vnode_free_list_slock);
351
352	if (freevnodes >= desiredvnodes) {
353		TAILQ_FOREACH(vp, &vnode_free_list, v_freelist) {
354			if (!simple_lock_try(&vp->v_interlock))
355				continue;
356			if (vp->v_usecount)
357				panic("free vnode isn't");
358
359			if (vp->v_object && vp->v_object->resident_page_count) {
360				/* Don't recycle if it's caching some pages */
361				simple_unlock(&vp->v_interlock);
362				continue;
363			} else if (LIST_FIRST(&vp->v_cache_src)) {
364				/* Don't recycle if active in the namecache */
365				simple_unlock(&vp->v_interlock);
366				continue;
367			} else {
368				break;
369			}
370		}
371	} else {
372		vp = NULL;
373	}
374
375	if (vp) {
376		vp->v_flag |= VDOOMED;
377		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
378		freevnodes--;
379		simple_unlock(&vnode_free_list_slock);
380		cache_purge(vp);
381		vp->v_lease = NULL;
382		if (vp->v_type != VBAD)
383			vgonel(vp, p);
384		else {
385			simple_unlock(&vp->v_interlock);
386		}
387
388#ifdef DIAGNOSTIC
389		{
390			int s;
391
392			if (vp->v_data)
393				panic("cleaned vnode isn't");
394			s = splbio();
395			if (vp->v_numoutput)
396				panic("Clean vnode has pending I/O's");
397			splx(s);
398		}
399#endif
400		vp->v_flag = 0;
401		vp->v_lastr = 0;
402		vp->v_lastw = 0;
403		vp->v_lasta = 0;
404		vp->v_cstart = 0;
405		vp->v_clen = 0;
406		vp->v_socket = 0;
407		vp->v_writecount = 0;	/* XXX */
408	} else {
409		simple_unlock(&vnode_free_list_slock);
410		vp = (struct vnode *) malloc((u_long) sizeof *vp,
411		    M_VNODE, M_WAITOK);
412		bzero((char *) vp, sizeof *vp);
413		vp->v_dd = vp;
414		cache_purge(vp);
415		LIST_INIT(&vp->v_cache_src);
416		TAILQ_INIT(&vp->v_cache_dst);
417		numvnodes++;
418	}
419
420	vp->v_type = VNON;
421	vp->v_tag = tag;
422	vp->v_op = vops;
423	insmntque(vp, mp);
424	*vpp = vp;
425	vp->v_usecount = 1;
426	vp->v_data = 0;
427	return (0);
428}
429
430/*
431 * Move a vnode from one mount queue to another.
432 */
433void
434insmntque(vp, mp)
435	register struct vnode *vp;
436	register struct mount *mp;
437{
438
439	simple_lock(&mntvnode_slock);
440	/*
441	 * Delete from old mount point vnode list, if on one.
442	 */
443	if (vp->v_mount != NULL)
444		LIST_REMOVE(vp, v_mntvnodes);
445	/*
446	 * Insert into list of vnodes for the new mount point, if available.
447	 */
448	if ((vp->v_mount = mp) == NULL) {
449		simple_unlock(&mntvnode_slock);
450		return;
451	}
452	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
453	simple_unlock(&mntvnode_slock);
454}
455
456/*
457 * Update outstanding I/O count and do wakeup if requested.
458 */
459void
460vwakeup(bp)
461	register struct buf *bp;
462{
463	register struct vnode *vp;
464
465	bp->b_flags &= ~B_WRITEINPROG;
466	if ((vp = bp->b_vp)) {
467		vp->v_numoutput--;
468		if (vp->v_numoutput < 0)
469			panic("vwakeup: neg numoutput");
470		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
471			vp->v_flag &= ~VBWAIT;
472			wakeup((caddr_t) &vp->v_numoutput);
473		}
474	}
475}
476
477/*
478 * Flush out and invalidate all buffers associated with a vnode.
479 * Called with the underlying object locked.
480 */
481int
482vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
483	register struct vnode *vp;
484	int flags;
485	struct ucred *cred;
486	struct proc *p;
487	int slpflag, slptimeo;
488{
489	register struct buf *bp;
490	struct buf *nbp, *blist;
491	int s, error;
492	vm_object_t object;
493
494	if (flags & V_SAVE) {
495		if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
496			return (error);
497		if (vp->v_dirtyblkhd.lh_first != NULL)
498			panic("vinvalbuf: dirty bufs");
499	}
500
501	s = splbio();
502	for (;;) {
503		if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
504			while (blist && blist->b_lblkno < 0)
505				blist = blist->b_vnbufs.le_next;
506		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
507		    (flags & V_SAVEMETA))
508			while (blist && blist->b_lblkno < 0)
509				blist = blist->b_vnbufs.le_next;
510		if (!blist)
511			break;
512
513		for (bp = blist; bp; bp = nbp) {
514			nbp = bp->b_vnbufs.le_next;
515			if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
516				continue;
517			if (bp->b_flags & B_BUSY) {
518				bp->b_flags |= B_WANTED;
519				error = tsleep((caddr_t) bp,
520				    slpflag | (PRIBIO + 1), "vinvalbuf",
521				    slptimeo);
522				if (error) {
523					splx(s);
524					return (error);
525				}
526				break;
527			}
528			bremfree(bp);
529			bp->b_flags |= B_BUSY;
530			/*
531			 * XXX Since there are no node locks for NFS, I
532			 * believe there is a slight chance that a delayed
533			 * write will occur while sleeping just above, so
534			 * check for it.
535			 */
536			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
537				(void) VOP_BWRITE(bp);
538				break;
539			}
540			bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF);
541			brelse(bp);
542		}
543	}
544
545	while (vp->v_numoutput > 0) {
546		vp->v_flag |= VBWAIT;
547		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
548	}
549
550	splx(s);
551
552	/*
553	 * Destroy the copy in the VM cache, too.
554	 */
555	object = vp->v_object;
556	if (object != NULL) {
557		vm_object_page_remove(object, 0, object->size,
558		    (flags & V_SAVE) ? TRUE : FALSE);
559	}
560	if (!(flags & V_SAVEMETA) &&
561	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
562		panic("vinvalbuf: flush failed");
563	return (0);
564}
565
566/*
567 * Associate a buffer with a vnode.
568 */
569void
570bgetvp(vp, bp)
571	register struct vnode *vp;
572	register struct buf *bp;
573{
574	int s;
575
576	if (bp->b_vp)
577		panic("bgetvp: not free");
578	vhold(vp);
579	bp->b_vp = vp;
580	if (vp->v_type == VBLK || vp->v_type == VCHR)
581		bp->b_dev = vp->v_rdev;
582	else
583		bp->b_dev = NODEV;
584	/*
585	 * Insert onto list for new vnode.
586	 */
587	s = splbio();
588	bufinsvn(bp, &vp->v_cleanblkhd);
589	splx(s);
590}
591
592/*
593 * Disassociate a buffer from a vnode.
594 */
595void
596brelvp(bp)
597	register struct buf *bp;
598{
599	struct vnode *vp;
600	int s;
601
602	if (bp->b_vp == (struct vnode *) 0)
603		panic("brelvp: NULL");
604	/*
605	 * Delete from old vnode list, if on one.
606	 */
607	s = splbio();
608	if (bp->b_vnbufs.le_next != NOLIST)
609		bufremvn(bp);
610	splx(s);
611
612	vp = bp->b_vp;
613	bp->b_vp = (struct vnode *) 0;
614	vdrop(vp);
615}
616
617/*
618 * Associate a p-buffer with a vnode.
619 */
620void
621pbgetvp(vp, bp)
622	register struct vnode *vp;
623	register struct buf *bp;
624{
625#if defined(DIAGNOSTIC)
626	if (bp->b_vp)
627		panic("pbgetvp: not free");
628#endif
629	bp->b_vp = vp;
630	if (vp->v_type == VBLK || vp->v_type == VCHR)
631		bp->b_dev = vp->v_rdev;
632	else
633		bp->b_dev = NODEV;
634}
635
636/*
637 * Disassociate a p-buffer from a vnode.
638 */
639void
640pbrelvp(bp)
641	register struct buf *bp;
642{
643	struct vnode *vp;
644
645#if defined(DIAGNOSTIC)
646	if (bp->b_vp == (struct vnode *) 0)
647		panic("pbrelvp: NULL");
648#endif
649
650	bp->b_vp = (struct vnode *) 0;
651}
652
653/*
654 * Reassign a buffer from one vnode to another.
655 * Used to assign file specific control information
656 * (indirect blocks) to the vnode to which they belong.
657 */
658void
659reassignbuf(bp, newvp)
660	register struct buf *bp;
661	register struct vnode *newvp;
662{
663	int s;
664
665	if (newvp == NULL) {
666		printf("reassignbuf: NULL");
667		return;
668	}
669
670	s = splbio();
671	/*
672	 * Delete from old vnode list, if on one.
673	 */
674	if (bp->b_vnbufs.le_next != NOLIST) {
675		bufremvn(bp);
676		vdrop(bp->b_vp);
677	}
678	/*
679	 * If dirty, put on list of dirty buffers; otherwise insert onto list
680	 * of clean buffers.
681	 */
682	if (bp->b_flags & B_DELWRI) {
683		struct buf *tbp;
684
685		tbp = newvp->v_dirtyblkhd.lh_first;
686		if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
687			bufinsvn(bp, &newvp->v_dirtyblkhd);
688		} else {
689			while (tbp->b_vnbufs.le_next &&
690				(tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
691				tbp = tbp->b_vnbufs.le_next;
692			}
693			LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
694		}
695	} else {
696		bufinsvn(bp, &newvp->v_cleanblkhd);
697	}
698	bp->b_vp = newvp;
699	vhold(bp->b_vp);
700	splx(s);
701}
702
703#ifndef DEVFS_ROOT
704/*
705 * Create a vnode for a block device.
706 * Used for mounting the root file system.
707 */
708int
709bdevvp(dev, vpp)
710	dev_t dev;
711	struct vnode **vpp;
712{
713	register struct vnode *vp;
714	struct vnode *nvp;
715	int error;
716
717	if (dev == NODEV)
718		return (0);
719	error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
720	if (error) {
721		*vpp = 0;
722		return (error);
723	}
724	vp = nvp;
725	vp->v_type = VBLK;
726	if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
727		vput(vp);
728		vp = nvp;
729	}
730	*vpp = vp;
731	return (0);
732}
733#endif /* !DEVFS_ROOT */
734
735/*
736 * Check to see if the new vnode represents a special device
737 * for which we already have a vnode (either because of
738 * bdevvp() or because of a different vnode representing
739 * the same block device). If such an alias exists, deallocate
740 * the existing contents and return the aliased vnode. The
741 * caller is responsible for filling it with its new contents.
742 */
743struct vnode *
744checkalias(nvp, nvp_rdev, mp)
745	register struct vnode *nvp;
746	dev_t nvp_rdev;
747	struct mount *mp;
748{
749	struct proc *p = curproc;	/* XXX */
750	struct vnode *vp;
751	struct vnode **vpp;
752
753	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
754		return (NULLVP);
755
756	vpp = &speclisth[SPECHASH(nvp_rdev)];
757loop:
758	simple_lock(&spechash_slock);
759	for (vp = *vpp; vp; vp = vp->v_specnext) {
760		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
761			continue;
762		/*
763		 * Alias, but not in use, so flush it out.
764		 */
765		simple_lock(&vp->v_interlock);
766		if (vp->v_usecount == 0) {
767			simple_unlock(&spechash_slock);
768			vgonel(vp, p);
769			goto loop;
770		}
771		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
772			simple_unlock(&spechash_slock);
773			goto loop;
774		}
775		break;
776	}
777	if (vp == NULL || vp->v_tag != VT_NON) {
778		MALLOC(nvp->v_specinfo, struct specinfo *,
779		    sizeof(struct specinfo), M_VNODE, M_WAITOK);
780		nvp->v_rdev = nvp_rdev;
781		nvp->v_hashchain = vpp;
782		nvp->v_specnext = *vpp;
783		nvp->v_specflags = 0;
784		simple_unlock(&spechash_slock);
785		*vpp = nvp;
786		if (vp != NULLVP) {
787			nvp->v_flag |= VALIASED;
788			vp->v_flag |= VALIASED;
789			vput(vp);
790		}
791		return (NULLVP);
792	}
793	simple_unlock(&spechash_slock);
794	VOP_UNLOCK(vp, 0, p);
795	simple_lock(&vp->v_interlock);
796	vclean(vp, 0, p);
797	vp->v_op = nvp->v_op;
798	vp->v_tag = nvp->v_tag;
799	nvp->v_type = VNON;
800	insmntque(vp, mp);
801	return (vp);
802}
803
804/*
805 * Grab a particular vnode from the free list, increment its
806 * reference count and lock it. The vnode lock bit is set the
807 * vnode is being eliminated in vgone. The process is awakened
808 * when the transition is completed, and an error returned to
809 * indicate that the vnode is no longer usable (possibly having
810 * been changed to a new file system type).
811 */
812int
813vget(vp, flags, p)
814	register struct vnode *vp;
815	int flags;
816	struct proc *p;
817{
818	int error;
819
820	/*
821	 * If the vnode is in the process of being cleaned out for
822	 * another use, we wait for the cleaning to finish and then
823	 * return failure. Cleaning is determined by checking that
824	 * the VXLOCK flag is set.
825	 */
826	if ((flags & LK_INTERLOCK) == 0) {
827		simple_lock(&vp->v_interlock);
828	}
829	if (vp->v_flag & VXLOCK) {
830		vp->v_flag |= VXWANT;
831		simple_unlock(&vp->v_interlock);
832		tsleep((caddr_t)vp, PINOD, "vget", 0);
833		return (ENOENT);
834	}
835	vp->v_usecount++;
836	if (VSHOULDBUSY(vp))
837		vbusy(vp);
838	/*
839	 * Create the VM object, if needed
840	 */
841	if ((vp->v_type == VREG) &&
842		((vp->v_object == NULL) ||
843			(vp->v_object->flags & OBJ_VFS_REF) == 0 ||
844			(vp->v_object->flags & OBJ_DEAD))) {
845		/*
846		 * XXX vfs_object_create probably needs the interlock.
847		 */
848		simple_unlock(&vp->v_interlock);
849		vfs_object_create(vp, curproc, curproc->p_ucred, 0);
850		simple_lock(&vp->v_interlock);
851	}
852	if (flags & LK_TYPE_MASK) {
853		if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
854			vrele(vp);
855		return (error);
856	}
857	simple_unlock(&vp->v_interlock);
858	return (0);
859}
860
861/*
862 * Stubs to use when there is no locking to be done on the underlying object.
863 * A minimal shared lock is necessary to ensure that the underlying object
864 * is not revoked while an operation is in progress. So, an active shared
865 * count is maintained in an auxillary vnode lock structure.
866 */
867int
868vop_sharedlock(ap)
869	struct vop_lock_args /* {
870		struct vnode *a_vp;
871		int a_flags;
872		struct proc *a_p;
873	} */ *ap;
874{
875	/*
876	 * This code cannot be used until all the non-locking filesystems
877	 * (notably NFS) are converted to properly lock and release nodes.
878	 * Also, certain vnode operations change the locking state within
879	 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
880	 * and symlink). Ideally these operations should not change the
881	 * lock state, but should be changed to let the caller of the
882	 * function unlock them. Otherwise all intermediate vnode layers
883	 * (such as union, umapfs, etc) must catch these functions to do
884	 * the necessary locking at their layer. Note that the inactive
885	 * and lookup operations also change their lock state, but this
886	 * cannot be avoided, so these two operations will always need
887	 * to be handled in intermediate layers.
888	 */
889	struct vnode *vp = ap->a_vp;
890	int vnflags, flags = ap->a_flags;
891
892	if (vp->v_vnlock == NULL) {
893		if ((flags & LK_TYPE_MASK) == LK_DRAIN)
894			return (0);
895		MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
896		    M_VNODE, M_WAITOK);
897		lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
898	}
899	switch (flags & LK_TYPE_MASK) {
900	case LK_DRAIN:
901		vnflags = LK_DRAIN;
902		break;
903	case LK_EXCLUSIVE:
904#ifdef DEBUG_VFS_LOCKS
905		/*
906		 * Normally, we use shared locks here, but that confuses
907		 * the locking assertions.
908		 */
909		vnflags = LK_EXCLUSIVE;
910		break;
911#endif
912	case LK_SHARED:
913		vnflags = LK_SHARED;
914		break;
915	case LK_UPGRADE:
916	case LK_EXCLUPGRADE:
917	case LK_DOWNGRADE:
918		return (0);
919	case LK_RELEASE:
920	default:
921		panic("vop_sharedlock: bad operation %d", flags & LK_TYPE_MASK);
922	}
923	if (flags & LK_INTERLOCK)
924		vnflags |= LK_INTERLOCK;
925	return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
926}
927
928/*
929 * Stubs to use when there is no locking to be done on the underlying object.
930 * A minimal shared lock is necessary to ensure that the underlying object
931 * is not revoked while an operation is in progress. So, an active shared
932 * count is maintained in an auxillary vnode lock structure.
933 */
934int
935vop_nolock(ap)
936	struct vop_lock_args /* {
937		struct vnode *a_vp;
938		int a_flags;
939		struct proc *a_p;
940	} */ *ap;
941{
942#ifdef notyet
943	/*
944	 * This code cannot be used until all the non-locking filesystems
945	 * (notably NFS) are converted to properly lock and release nodes.
946	 * Also, certain vnode operations change the locking state within
947	 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
948	 * and symlink). Ideally these operations should not change the
949	 * lock state, but should be changed to let the caller of the
950	 * function unlock them. Otherwise all intermediate vnode layers
951	 * (such as union, umapfs, etc) must catch these functions to do
952	 * the necessary locking at their layer. Note that the inactive
953	 * and lookup operations also change their lock state, but this
954	 * cannot be avoided, so these two operations will always need
955	 * to be handled in intermediate layers.
956	 */
957	struct vnode *vp = ap->a_vp;
958	int vnflags, flags = ap->a_flags;
959
960	if (vp->v_vnlock == NULL) {
961		if ((flags & LK_TYPE_MASK) == LK_DRAIN)
962			return (0);
963		MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
964		    M_VNODE, M_WAITOK);
965		lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
966	}
967	switch (flags & LK_TYPE_MASK) {
968	case LK_DRAIN:
969		vnflags = LK_DRAIN;
970		break;
971	case LK_EXCLUSIVE:
972	case LK_SHARED:
973		vnflags = LK_SHARED;
974		break;
975	case LK_UPGRADE:
976	case LK_EXCLUPGRADE:
977	case LK_DOWNGRADE:
978		return (0);
979	case LK_RELEASE:
980	default:
981		panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
982	}
983	if (flags & LK_INTERLOCK)
984		vnflags |= LK_INTERLOCK;
985	return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
986#else /* for now */
987	/*
988	 * Since we are not using the lock manager, we must clear
989	 * the interlock here.
990	 */
991	if (ap->a_flags & LK_INTERLOCK) {
992		simple_unlock(&ap->a_vp->v_interlock);
993	}
994	return (0);
995#endif
996}
997
998/*
999 * Do the inverse of vop_nolock, handling the interlock in a compatible way.
1000 */
1001int
1002vop_nounlock(ap)
1003	struct vop_unlock_args /* {
1004		struct vnode *a_vp;
1005		int a_flags;
1006		struct proc *a_p;
1007	} */ *ap;
1008{
1009	struct vnode *vp = ap->a_vp;
1010
1011	if (vp->v_vnlock == NULL) {
1012		if (ap->a_flags & LK_INTERLOCK)
1013			simple_unlock(&ap->a_vp->v_interlock);
1014		return (0);
1015	}
1016	return (lockmgr(vp->v_vnlock, LK_RELEASE | ap->a_flags,
1017		&ap->a_vp->v_interlock, ap->a_p));
1018}
1019
1020/*
1021 * Return whether or not the node is in use.
1022 */
1023int
1024vop_noislocked(ap)
1025	struct vop_islocked_args /* {
1026		struct vnode *a_vp;
1027	} */ *ap;
1028{
1029	struct vnode *vp = ap->a_vp;
1030
1031	if (vp->v_vnlock == NULL)
1032		return (0);
1033	return (lockstatus(vp->v_vnlock));
1034}
1035
1036/* #ifdef DIAGNOSTIC */
1037/*
1038 * Vnode reference, just increment the count
1039 */
1040void
1041vref(vp)
1042	struct vnode *vp;
1043{
1044	simple_lock(&vp->v_interlock);
1045	if (vp->v_usecount <= 0)
1046		panic("vref used where vget required");
1047
1048	vp->v_usecount++;
1049
1050	if ((vp->v_type == VREG) &&
1051		((vp->v_object == NULL) ||
1052			((vp->v_object->flags & OBJ_VFS_REF) == 0) ||
1053			(vp->v_object->flags & OBJ_DEAD))) {
1054		/*
1055		 * We need to lock to VP during the time that
1056		 * the object is created.  This is necessary to
1057		 * keep the system from re-entrantly doing it
1058		 * multiple times.
1059		 * XXX vfs_object_create probably needs the interlock?
1060		 */
1061		simple_unlock(&vp->v_interlock);
1062		vfs_object_create(vp, curproc, curproc->p_ucred, 0);
1063		return;
1064	}
1065	simple_unlock(&vp->v_interlock);
1066}
1067
1068/*
1069 * Vnode put/release.
1070 * If count drops to zero, call inactive routine and return to freelist.
1071 */
1072static void
1073vputrele(vp, put)
1074	struct vnode *vp;
1075	int put;
1076{
1077	struct proc *p = curproc;	/* XXX */
1078
1079#ifdef DIAGNOSTIC
1080	if (vp == NULL)
1081		panic("vputrele: null vp");
1082#endif
1083	simple_lock(&vp->v_interlock);
1084
1085	if ((vp->v_usecount == 2) &&
1086		vp->v_object &&
1087		(vp->v_object->flags & OBJ_VFS_REF)) {
1088		vp->v_usecount--;
1089		vp->v_object->flags &= ~OBJ_VFS_REF;
1090		if (put) {
1091			VOP_UNLOCK(vp, LK_INTERLOCK, p);
1092		} else {
1093			simple_unlock(&vp->v_interlock);
1094		}
1095		vm_object_deallocate(vp->v_object);
1096		return;
1097	}
1098
1099	if (vp->v_usecount > 1) {
1100		vp->v_usecount--;
1101		if (put) {
1102			VOP_UNLOCK(vp, LK_INTERLOCK, p);
1103		} else {
1104			simple_unlock(&vp->v_interlock);
1105		}
1106		return;
1107	}
1108
1109	if (vp->v_usecount < 1) {
1110#ifdef DIAGNOSTIC
1111		vprint("vputrele: negative ref count", vp);
1112#endif
1113		panic("vputrele: negative ref cnt");
1114	}
1115
1116	vp->v_usecount--;
1117	if (VSHOULDFREE(vp))
1118		vfree(vp);
1119	/*
1120	 * If we are doing a vput, the node is already locked, and we must
1121	 * call VOP_INACTIVE with the node locked.  So, in the case of
1122	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
1123	 */
1124	if (put) {
1125		simple_unlock(&vp->v_interlock);
1126		VOP_INACTIVE(vp, p);
1127	} else if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1128		VOP_INACTIVE(vp, p);
1129	}
1130}
1131
1132/*
1133 * vput(), just unlock and vrele()
1134 */
1135void
1136vput(vp)
1137	struct vnode *vp;
1138{
1139	vputrele(vp, 1);
1140}
1141
1142void
1143vrele(vp)
1144	struct vnode *vp;
1145{
1146	vputrele(vp, 0);
1147}
1148
1149/*
1150 * Somebody doesn't want the vnode recycled.
1151 */
1152void
1153vhold(vp)
1154	register struct vnode *vp;
1155{
1156
1157	simple_lock(&vp->v_interlock);
1158	vp->v_holdcnt++;
1159	if (VSHOULDBUSY(vp))
1160		vbusy(vp);
1161	simple_unlock(&vp->v_interlock);
1162}
1163
1164/*
1165 * One less who cares about this vnode.
1166 */
1167void
1168vdrop(vp)
1169	register struct vnode *vp;
1170{
1171
1172	simple_lock(&vp->v_interlock);
1173	if (vp->v_holdcnt <= 0)
1174		panic("holdrele: holdcnt");
1175	vp->v_holdcnt--;
1176	if (VSHOULDFREE(vp))
1177		vfree(vp);
1178	simple_unlock(&vp->v_interlock);
1179}
1180
1181/*
1182 * Remove any vnodes in the vnode table belonging to mount point mp.
1183 *
1184 * If MNT_NOFORCE is specified, there should not be any active ones,
1185 * return error if any are found (nb: this is a user error, not a
1186 * system error). If MNT_FORCE is specified, detach any active vnodes
1187 * that are found.
1188 */
1189#ifdef DIAGNOSTIC
1190static int busyprt = 0;		/* print out busy vnodes */
1191SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
1192#endif
1193
1194int
1195vflush(mp, skipvp, flags)
1196	struct mount *mp;
1197	struct vnode *skipvp;
1198	int flags;
1199{
1200	struct proc *p = curproc;	/* XXX */
1201	struct vnode *vp, *nvp;
1202	int busy = 0;
1203
1204	simple_lock(&mntvnode_slock);
1205loop:
1206	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1207		/*
1208		 * Make sure this vnode wasn't reclaimed in getnewvnode().
1209		 * Start over if it has (it won't be on the list anymore).
1210		 */
1211		if (vp->v_mount != mp)
1212			goto loop;
1213		nvp = vp->v_mntvnodes.le_next;
1214		/*
1215		 * Skip over a selected vnode.
1216		 */
1217		if (vp == skipvp)
1218			continue;
1219
1220		simple_lock(&vp->v_interlock);
1221		/*
1222		 * Skip over a vnodes marked VSYSTEM.
1223		 */
1224		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1225			simple_unlock(&vp->v_interlock);
1226			continue;
1227		}
1228		/*
1229		 * If WRITECLOSE is set, only flush out regular file vnodes
1230		 * open for writing.
1231		 */
1232		if ((flags & WRITECLOSE) &&
1233		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1234			simple_unlock(&vp->v_interlock);
1235			continue;
1236		}
1237
1238		/*
1239		 * With v_usecount == 0, all we need to do is clear out the
1240		 * vnode data structures and we are done.
1241		 */
1242		if (vp->v_usecount == 0) {
1243			simple_unlock(&mntvnode_slock);
1244			vgonel(vp, p);
1245			simple_lock(&mntvnode_slock);
1246			continue;
1247		}
1248
1249		/*
1250		 * If FORCECLOSE is set, forcibly close the vnode. For block
1251		 * or character devices, revert to an anonymous device. For
1252		 * all other files, just kill them.
1253		 */
1254		if (flags & FORCECLOSE) {
1255			simple_unlock(&mntvnode_slock);
1256			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1257				vgonel(vp, p);
1258			} else {
1259				vclean(vp, 0, p);
1260				vp->v_op = spec_vnodeop_p;
1261				insmntque(vp, (struct mount *) 0);
1262			}
1263			simple_lock(&mntvnode_slock);
1264			continue;
1265		}
1266#ifdef DIAGNOSTIC
1267		if (busyprt)
1268			vprint("vflush: busy vnode", vp);
1269#endif
1270		simple_unlock(&vp->v_interlock);
1271		busy++;
1272	}
1273	simple_unlock(&mntvnode_slock);
1274	if (busy)
1275		return (EBUSY);
1276	return (0);
1277}
1278
1279/*
1280 * Disassociate the underlying file system from a vnode.
1281 */
1282static void
1283vclean(struct vnode *vp, int flags, struct proc *p)
1284{
1285	int active, irefed;
1286	vm_object_t object;
1287
1288	/*
1289	 * Check to see if the vnode is in use. If so we have to reference it
1290	 * before we clean it out so that its count cannot fall to zero and
1291	 * generate a race against ourselves to recycle it.
1292	 */
1293	if ((active = vp->v_usecount))
1294		vp->v_usecount++;
1295	/*
1296	 * Prevent the vnode from being recycled or brought into use while we
1297	 * clean it out.
1298	 */
1299	if (vp->v_flag & VXLOCK)
1300		panic("vclean: deadlock");
1301	vp->v_flag |= VXLOCK;
1302	/*
1303	 * Even if the count is zero, the VOP_INACTIVE routine may still
1304	 * have the object locked while it cleans it out. The VOP_LOCK
1305	 * ensures that the VOP_INACTIVE routine is done with its work.
1306	 * For active vnodes, it ensures that no other activity can
1307	 * occur while the underlying object is being cleaned out.
1308	 */
1309	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1310
1311	object = vp->v_object;
1312	irefed = 0;
1313	if (object && ((object->flags & OBJ_DEAD) == 0)) {
1314		if (object->ref_count == 0) {
1315			vm_object_reference(object);
1316			irefed = 1;
1317		}
1318		++object->ref_count;
1319		pager_cache(object, FALSE);
1320	}
1321
1322	/*
1323	 * Clean out any buffers associated with the vnode.
1324	 */
1325	if (flags & DOCLOSE)
1326		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1327
1328	if (irefed) {
1329		vm_object_deallocate(object);
1330	}
1331
1332	/*
1333	 * If purging an active vnode, it must be closed and
1334	 * deactivated before being reclaimed. Note that the
1335	 * VOP_INACTIVE will unlock the vnode.
1336	 */
1337	if (active) {
1338		if (flags & DOCLOSE)
1339			VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1340		VOP_INACTIVE(vp, p);
1341	} else {
1342		/*
1343		 * Any other processes trying to obtain this lock must first
1344		 * wait for VXLOCK to clear, then call the new lock operation.
1345		 */
1346		VOP_UNLOCK(vp, 0, p);
1347	}
1348	/*
1349	 * Reclaim the vnode.
1350	 */
1351	if (VOP_RECLAIM(vp, p))
1352		panic("vclean: cannot reclaim");
1353	if (active)
1354		vrele(vp);
1355	cache_purge(vp);
1356	if (vp->v_vnlock) {
1357#ifdef DIAGNOSTIC
1358		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1359			vprint("vclean: lock not drained", vp);
1360#endif
1361		FREE(vp->v_vnlock, M_VNODE);
1362		vp->v_vnlock = NULL;
1363	}
1364
1365	/*
1366	 * Done with purge, notify sleepers of the grim news.
1367	 */
1368	vp->v_op = dead_vnodeop_p;
1369	vp->v_tag = VT_NON;
1370	vp->v_flag &= ~VXLOCK;
1371	if (vp->v_flag & VXWANT) {
1372		vp->v_flag &= ~VXWANT;
1373		wakeup((caddr_t) vp);
1374	}
1375}
1376
1377/*
1378 * Eliminate all activity associated with the requested vnode
1379 * and with all vnodes aliased to the requested vnode.
1380 */
1381int
1382vop_revoke(ap)
1383	struct vop_revoke_args /* {
1384		struct vnode *a_vp;
1385		int a_flags;
1386	} */ *ap;
1387{
1388	struct vnode *vp, *vq;
1389	struct proc *p = curproc;	/* XXX */
1390
1391#ifdef DIAGNOSTIC
1392	if ((ap->a_flags & REVOKEALL) == 0)
1393		panic("vop_revoke");
1394#endif
1395
1396	vp = ap->a_vp;
1397	simple_lock(&vp->v_interlock);
1398
1399	if (vp->v_flag & VALIASED) {
1400		/*
1401		 * If a vgone (or vclean) is already in progress,
1402		 * wait until it is done and return.
1403		 */
1404		if (vp->v_flag & VXLOCK) {
1405			vp->v_flag |= VXWANT;
1406			simple_unlock(&vp->v_interlock);
1407			tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1408			return (0);
1409		}
1410		/*
1411		 * Ensure that vp will not be vgone'd while we
1412		 * are eliminating its aliases.
1413		 */
1414		vp->v_flag |= VXLOCK;
1415		simple_unlock(&vp->v_interlock);
1416		while (vp->v_flag & VALIASED) {
1417			simple_lock(&spechash_slock);
1418			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1419				if (vq->v_rdev != vp->v_rdev ||
1420				    vq->v_type != vp->v_type || vp == vq)
1421					continue;
1422				simple_unlock(&spechash_slock);
1423				vgone(vq);
1424				break;
1425			}
1426			if (vq == NULLVP) {
1427				simple_unlock(&spechash_slock);
1428			}
1429		}
1430		/*
1431		 * Remove the lock so that vgone below will
1432		 * really eliminate the vnode after which time
1433		 * vgone will awaken any sleepers.
1434		 */
1435		simple_lock(&vp->v_interlock);
1436		vp->v_flag &= ~VXLOCK;
1437	}
1438	vgonel(vp, p);
1439	return (0);
1440}
1441
1442/*
1443 * Recycle an unused vnode to the front of the free list.
1444 * Release the passed interlock if the vnode will be recycled.
1445 */
1446int
1447vrecycle(vp, inter_lkp, p)
1448	struct vnode *vp;
1449	struct simplelock *inter_lkp;
1450	struct proc *p;
1451{
1452
1453	simple_lock(&vp->v_interlock);
1454	if (vp->v_usecount == 0) {
1455		if (inter_lkp) {
1456			simple_unlock(inter_lkp);
1457		}
1458		vgonel(vp, p);
1459		return (1);
1460	}
1461	simple_unlock(&vp->v_interlock);
1462	return (0);
1463}
1464
1465/*
1466 * Eliminate all activity associated with a vnode
1467 * in preparation for reuse.
1468 */
1469void
1470vgone(vp)
1471	register struct vnode *vp;
1472{
1473	struct proc *p = curproc;	/* XXX */
1474
1475	simple_lock(&vp->v_interlock);
1476	vgonel(vp, p);
1477}
1478
1479/*
1480 * vgone, with the vp interlock held.
1481 */
1482static void
1483vgonel(vp, p)
1484	struct vnode *vp;
1485	struct proc *p;
1486{
1487	struct vnode *vq;
1488	struct vnode *vx;
1489
1490	/*
1491	 * If a vgone (or vclean) is already in progress,
1492	 * wait until it is done and return.
1493	 */
1494	if (vp->v_flag & VXLOCK) {
1495		vp->v_flag |= VXWANT;
1496		simple_unlock(&vp->v_interlock);
1497		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1498		return;
1499	}
1500
1501	if (vp->v_object) {
1502		vp->v_object->flags |= OBJ_VNODE_GONE;
1503	}
1504
1505	/*
1506	 * Clean out the filesystem specific data.
1507	 */
1508	vclean(vp, DOCLOSE, p);
1509	/*
1510	 * Delete from old mount point vnode list, if on one.
1511	 */
1512	if (vp->v_mount != NULL)
1513		insmntque(vp, (struct mount *)0);
1514	/*
1515	 * If special device, remove it from special device alias list
1516	 * if it is on one.
1517	 */
1518	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1519		simple_lock(&spechash_slock);
1520		if (*vp->v_hashchain == vp) {
1521			*vp->v_hashchain = vp->v_specnext;
1522		} else {
1523			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1524				if (vq->v_specnext != vp)
1525					continue;
1526				vq->v_specnext = vp->v_specnext;
1527				break;
1528			}
1529			if (vq == NULL)
1530				panic("missing bdev");
1531		}
1532		if (vp->v_flag & VALIASED) {
1533			vx = NULL;
1534			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1535				if (vq->v_rdev != vp->v_rdev ||
1536				    vq->v_type != vp->v_type)
1537					continue;
1538				if (vx)
1539					break;
1540				vx = vq;
1541			}
1542			if (vx == NULL)
1543				panic("missing alias");
1544			if (vq == NULL)
1545				vx->v_flag &= ~VALIASED;
1546			vp->v_flag &= ~VALIASED;
1547		}
1548		simple_unlock(&spechash_slock);
1549		FREE(vp->v_specinfo, M_VNODE);
1550		vp->v_specinfo = NULL;
1551	}
1552
1553	/*
1554	 * If it is on the freelist and not already at the head,
1555	 * move it to the head of the list. The test of the back
1556	 * pointer and the reference count of zero is because
1557	 * it will be removed from the free list by getnewvnode,
1558	 * but will not have its reference count incremented until
1559	 * after calling vgone. If the reference count were
1560	 * incremented first, vgone would (incorrectly) try to
1561	 * close the previous instance of the underlying object.
1562	 */
1563	if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
1564		simple_lock(&vnode_free_list_slock);
1565		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1566		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1567		simple_unlock(&vnode_free_list_slock);
1568	}
1569
1570	vp->v_type = VBAD;
1571}
1572
1573/*
1574 * Lookup a vnode by device number.
1575 */
1576int
1577vfinddev(dev, type, vpp)
1578	dev_t dev;
1579	enum vtype type;
1580	struct vnode **vpp;
1581{
1582	register struct vnode *vp;
1583	int rc = 0;
1584
1585	simple_lock(&spechash_slock);
1586	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1587		if (dev != vp->v_rdev || type != vp->v_type)
1588			continue;
1589		*vpp = vp;
1590		rc = 1;
1591		break;
1592	}
1593	simple_unlock(&spechash_slock);
1594	return (rc);
1595}
1596
1597/*
1598 * Calculate the total number of references to a special device.
1599 */
1600int
1601vcount(vp)
1602	register struct vnode *vp;
1603{
1604	struct vnode *vq, *vnext;
1605	int count;
1606
1607loop:
1608	if ((vp->v_flag & VALIASED) == 0)
1609		return (vp->v_usecount);
1610	simple_lock(&spechash_slock);
1611	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1612		vnext = vq->v_specnext;
1613		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1614			continue;
1615		/*
1616		 * Alias, but not in use, so flush it out.
1617		 */
1618		if (vq->v_usecount == 0 && vq != vp) {
1619			simple_unlock(&spechash_slock);
1620			vgone(vq);
1621			goto loop;
1622		}
1623		count += vq->v_usecount;
1624	}
1625	simple_unlock(&spechash_slock);
1626	return (count);
1627}
1628
1629/*
1630 * Print out a description of a vnode.
1631 */
1632static char *typename[] =
1633{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
1634
1635void
1636vprint(label, vp)
1637	char *label;
1638	register struct vnode *vp;
1639{
1640	char buf[64];
1641
1642	if (label != NULL)
1643		printf("%s: %x: ", label, vp);
1644	else
1645		printf("%x: ", vp);
1646	printf("type %s, usecount %d, writecount %d, refcount %ld,",
1647	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1648	    vp->v_holdcnt);
1649	buf[0] = '\0';
1650	if (vp->v_flag & VROOT)
1651		strcat(buf, "|VROOT");
1652	if (vp->v_flag & VTEXT)
1653		strcat(buf, "|VTEXT");
1654	if (vp->v_flag & VSYSTEM)
1655		strcat(buf, "|VSYSTEM");
1656	if (vp->v_flag & VXLOCK)
1657		strcat(buf, "|VXLOCK");
1658	if (vp->v_flag & VXWANT)
1659		strcat(buf, "|VXWANT");
1660	if (vp->v_flag & VBWAIT)
1661		strcat(buf, "|VBWAIT");
1662	if (vp->v_flag & VALIASED)
1663		strcat(buf, "|VALIASED");
1664	if (vp->v_flag & VDOOMED)
1665		strcat(buf, "|VDOOMED");
1666	if (vp->v_flag & VFREE)
1667		strcat(buf, "|VFREE");
1668	if (buf[0] != '\0')
1669		printf(" flags (%s)", &buf[1]);
1670	if (vp->v_data == NULL) {
1671		printf("\n");
1672	} else {
1673		printf("\n\t");
1674		VOP_PRINT(vp);
1675	}
1676}
1677
1678#ifdef DDB
1679/*
1680 * List all of the locked vnodes in the system.
1681 * Called when debugging the kernel.
1682 */
1683void
1684printlockedvnodes()
1685{
1686	struct proc *p = curproc;	/* XXX */
1687	struct mount *mp, *nmp;
1688	struct vnode *vp;
1689
1690	printf("Locked vnodes\n");
1691	simple_lock(&mountlist_slock);
1692	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1693		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1694			nmp = mp->mnt_list.cqe_next;
1695			continue;
1696		}
1697		for (vp = mp->mnt_vnodelist.lh_first;
1698		     vp != NULL;
1699		     vp = vp->v_mntvnodes.le_next) {
1700			if (VOP_ISLOCKED(vp))
1701				vprint((char *)0, vp);
1702		}
1703		simple_lock(&mountlist_slock);
1704		nmp = mp->mnt_list.cqe_next;
1705		vfs_unbusy(mp, p);
1706	}
1707	simple_unlock(&mountlist_slock);
1708}
1709#endif
1710
1711/*
1712 * Top level filesystem related information gathering.
1713 */
1714static int	sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS);
1715
1716static int
1717vfs_sysctl SYSCTL_HANDLER_ARGS
1718{
1719	int *name = (int *)arg1 - 1;	/* XXX */
1720	u_int namelen = arg2 + 1;	/* XXX */
1721	struct vfsconf *vfsp;
1722
1723#ifndef NO_COMPAT_PRELITE2
1724	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
1725	if (namelen == 1)
1726		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
1727#endif
1728
1729#ifdef notyet
1730	/* all sysctl names at this level are at least name and field */
1731	if (namelen < 2)
1732		return (ENOTDIR);		/* overloaded */
1733	if (name[0] != VFS_GENERIC) {
1734		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1735			if (vfsp->vfc_typenum == name[0])
1736				break;
1737		if (vfsp == NULL)
1738			return (EOPNOTSUPP);
1739		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1740		    oldp, oldlenp, newp, newlen, p));
1741	}
1742#endif
1743	switch (name[1]) {
1744	case VFS_MAXTYPENUM:
1745		if (namelen != 2)
1746			return (ENOTDIR);
1747		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
1748	case VFS_CONF:
1749		if (namelen != 3)
1750			return (ENOTDIR);	/* overloaded */
1751		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1752			if (vfsp->vfc_typenum == name[2])
1753				break;
1754		if (vfsp == NULL)
1755			return (EOPNOTSUPP);
1756		return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
1757	}
1758	return (EOPNOTSUPP);
1759}
1760
1761SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
1762	"Generic filesystem");
1763
1764#ifndef NO_COMPAT_PRELITE2
1765
1766static int
1767sysctl_ovfs_conf SYSCTL_HANDLER_ARGS
1768{
1769	int error;
1770	struct vfsconf *vfsp;
1771	struct ovfsconf ovfs;
1772
1773	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
1774		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
1775		strcpy(ovfs.vfc_name, vfsp->vfc_name);
1776		ovfs.vfc_index = vfsp->vfc_typenum;
1777		ovfs.vfc_refcount = vfsp->vfc_refcount;
1778		ovfs.vfc_flags = vfsp->vfc_flags;
1779		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
1780		if (error)
1781			return error;
1782	}
1783	return 0;
1784}
1785
1786#endif /* !NO_COMPAT_PRELITE2 */
1787
1788int kinfo_vdebug = 1;
1789int kinfo_vgetfailed;
1790
1791#define KINFO_VNODESLOP	10
1792/*
1793 * Dump vnode list (via sysctl).
1794 * Copyout address of vnode followed by vnode.
1795 */
1796/* ARGSUSED */
1797static int
1798sysctl_vnode SYSCTL_HANDLER_ARGS
1799{
1800	struct proc *p = curproc;	/* XXX */
1801	struct mount *mp, *nmp;
1802	struct vnode *nvp, *vp;
1803	int error;
1804
1805#define VPTRSZ	sizeof (struct vnode *)
1806#define VNODESZ	sizeof (struct vnode)
1807
1808	req->lock = 0;
1809	if (!req->oldptr) /* Make an estimate */
1810		return (SYSCTL_OUT(req, 0,
1811			(numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
1812
1813	simple_lock(&mountlist_slock);
1814	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1815		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1816			nmp = mp->mnt_list.cqe_next;
1817			continue;
1818		}
1819again:
1820		simple_lock(&mntvnode_slock);
1821		for (vp = mp->mnt_vnodelist.lh_first;
1822		     vp != NULL;
1823		     vp = nvp) {
1824			/*
1825			 * Check that the vp is still associated with
1826			 * this filesystem.  RACE: could have been
1827			 * recycled onto the same filesystem.
1828			 */
1829			if (vp->v_mount != mp) {
1830				simple_unlock(&mntvnode_slock);
1831				if (kinfo_vdebug)
1832					printf("kinfo: vp changed\n");
1833				goto again;
1834			}
1835			nvp = vp->v_mntvnodes.le_next;
1836			simple_unlock(&mntvnode_slock);
1837			if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
1838			    (error = SYSCTL_OUT(req, vp, VNODESZ)))
1839				return (error);
1840			simple_lock(&mntvnode_slock);
1841		}
1842		simple_unlock(&mntvnode_slock);
1843		simple_lock(&mountlist_slock);
1844		nmp = mp->mnt_list.cqe_next;
1845		vfs_unbusy(mp, p);
1846	}
1847	simple_unlock(&mountlist_slock);
1848
1849	return (0);
1850}
1851
1852/*
1853 * XXX
1854 * Exporting the vnode list on large systems causes them to crash.
1855 * Exporting the vnode list on medium systems causes sysctl to coredump.
1856 */
1857#if 0
1858SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
1859	0, 0, sysctl_vnode, "S,vnode", "");
1860#endif
1861
1862/*
1863 * Check to see if a filesystem is mounted on a block device.
1864 */
1865int
1866vfs_mountedon(vp)
1867	struct vnode *vp;
1868{
1869	struct vnode *vq;
1870	int error = 0;
1871
1872	if (vp->v_specflags & SI_MOUNTEDON)
1873		return (EBUSY);
1874	if (vp->v_flag & VALIASED) {
1875		simple_lock(&spechash_slock);
1876		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1877			if (vq->v_rdev != vp->v_rdev ||
1878			    vq->v_type != vp->v_type)
1879				continue;
1880			if (vq->v_specflags & SI_MOUNTEDON) {
1881				error = EBUSY;
1882				break;
1883			}
1884		}
1885		simple_unlock(&spechash_slock);
1886	}
1887	return (error);
1888}
1889
1890/*
1891 * Unmount all filesystems. The list is traversed in reverse order
1892 * of mounting to avoid dependencies.
1893 */
1894void
1895vfs_unmountall()
1896{
1897	struct mount *mp, *nmp;
1898	struct proc *p = initproc;	/* XXX XXX should this be proc0? */
1899	int error;
1900
1901	/*
1902	 * Since this only runs when rebooting, it is not interlocked.
1903	 */
1904	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1905		nmp = mp->mnt_list.cqe_prev;
1906		error = dounmount(mp, MNT_FORCE, p);
1907		if (error) {
1908			printf("unmount of %s failed (",
1909			    mp->mnt_stat.f_mntonname);
1910			if (error == EBUSY)
1911				printf("BUSY)\n");
1912			else
1913				printf("%d)\n", error);
1914		}
1915	}
1916}
1917
1918/*
1919 * Build hash lists of net addresses and hang them off the mount point.
1920 * Called by ufs_mount() to set up the lists of export addresses.
1921 */
1922static int
1923vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
1924	struct export_args *argp)
1925{
1926	register struct netcred *np;
1927	register struct radix_node_head *rnh;
1928	register int i;
1929	struct radix_node *rn;
1930	struct sockaddr *saddr, *smask = 0;
1931	struct domain *dom;
1932	int error;
1933
1934	if (argp->ex_addrlen == 0) {
1935		if (mp->mnt_flag & MNT_DEFEXPORTED)
1936			return (EPERM);
1937		np = &nep->ne_defexported;
1938		np->netc_exflags = argp->ex_flags;
1939		np->netc_anon = argp->ex_anon;
1940		np->netc_anon.cr_ref = 1;
1941		mp->mnt_flag |= MNT_DEFEXPORTED;
1942		return (0);
1943	}
1944	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1945	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
1946	bzero((caddr_t) np, i);
1947	saddr = (struct sockaddr *) (np + 1);
1948	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
1949		goto out;
1950	if (saddr->sa_len > argp->ex_addrlen)
1951		saddr->sa_len = argp->ex_addrlen;
1952	if (argp->ex_masklen) {
1953		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
1954		error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
1955		if (error)
1956			goto out;
1957		if (smask->sa_len > argp->ex_masklen)
1958			smask->sa_len = argp->ex_masklen;
1959	}
1960	i = saddr->sa_family;
1961	if ((rnh = nep->ne_rtable[i]) == 0) {
1962		/*
1963		 * Seems silly to initialize every AF when most are not used,
1964		 * do so on demand here
1965		 */
1966		for (dom = domains; dom; dom = dom->dom_next)
1967			if (dom->dom_family == i && dom->dom_rtattach) {
1968				dom->dom_rtattach((void **) &nep->ne_rtable[i],
1969				    dom->dom_rtoffset);
1970				break;
1971			}
1972		if ((rnh = nep->ne_rtable[i]) == 0) {
1973			error = ENOBUFS;
1974			goto out;
1975		}
1976	}
1977	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
1978	    np->netc_rnodes);
1979	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
1980		error = EPERM;
1981		goto out;
1982	}
1983	np->netc_exflags = argp->ex_flags;
1984	np->netc_anon = argp->ex_anon;
1985	np->netc_anon.cr_ref = 1;
1986	return (0);
1987out:
1988	free(np, M_NETADDR);
1989	return (error);
1990}
1991
1992/* ARGSUSED */
1993static int
1994vfs_free_netcred(struct radix_node *rn, void *w)
1995{
1996	register struct radix_node_head *rnh = (struct radix_node_head *) w;
1997
1998	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
1999	free((caddr_t) rn, M_NETADDR);
2000	return (0);
2001}
2002
2003/*
2004 * Free the net address hash lists that are hanging off the mount points.
2005 */
2006static void
2007vfs_free_addrlist(struct netexport *nep)
2008{
2009	register int i;
2010	register struct radix_node_head *rnh;
2011
2012	for (i = 0; i <= AF_MAX; i++)
2013		if ((rnh = nep->ne_rtable[i])) {
2014			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
2015			    (caddr_t) rnh);
2016			free((caddr_t) rnh, M_RTABLE);
2017			nep->ne_rtable[i] = 0;
2018		}
2019}
2020
2021int
2022vfs_export(mp, nep, argp)
2023	struct mount *mp;
2024	struct netexport *nep;
2025	struct export_args *argp;
2026{
2027	int error;
2028
2029	if (argp->ex_flags & MNT_DELEXPORT) {
2030		if (mp->mnt_flag & MNT_EXPUBLIC) {
2031			vfs_setpublicfs(NULL, NULL, NULL);
2032			mp->mnt_flag &= ~MNT_EXPUBLIC;
2033		}
2034		vfs_free_addrlist(nep);
2035		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2036	}
2037	if (argp->ex_flags & MNT_EXPORTED) {
2038		if (argp->ex_flags & MNT_EXPUBLIC) {
2039			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
2040				return (error);
2041			mp->mnt_flag |= MNT_EXPUBLIC;
2042		}
2043		if ((error = vfs_hang_addrlist(mp, nep, argp)))
2044			return (error);
2045		mp->mnt_flag |= MNT_EXPORTED;
2046	}
2047	return (0);
2048}
2049
2050
2051/*
2052 * Set the publicly exported filesystem (WebNFS). Currently, only
2053 * one public filesystem is possible in the spec (RFC 2054 and 2055)
2054 */
2055int
2056vfs_setpublicfs(mp, nep, argp)
2057	struct mount *mp;
2058	struct netexport *nep;
2059	struct export_args *argp;
2060{
2061	int error;
2062	struct vnode *rvp;
2063	char *cp;
2064
2065	/*
2066	 * mp == NULL -> invalidate the current info, the FS is
2067	 * no longer exported. May be called from either vfs_export
2068	 * or unmount, so check if it hasn't already been done.
2069	 */
2070	if (mp == NULL) {
2071		if (nfs_pub.np_valid) {
2072			nfs_pub.np_valid = 0;
2073			if (nfs_pub.np_index != NULL) {
2074				FREE(nfs_pub.np_index, M_TEMP);
2075				nfs_pub.np_index = NULL;
2076			}
2077		}
2078		return (0);
2079	}
2080
2081	/*
2082	 * Only one allowed at a time.
2083	 */
2084	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
2085		return (EBUSY);
2086
2087	/*
2088	 * Get real filehandle for root of exported FS.
2089	 */
2090	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
2091	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
2092
2093	if ((error = VFS_ROOT(mp, &rvp)))
2094		return (error);
2095
2096	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
2097		return (error);
2098
2099	vput(rvp);
2100
2101	/*
2102	 * If an indexfile was specified, pull it in.
2103	 */
2104	if (argp->ex_indexfile != NULL) {
2105		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2106		    M_WAITOK);
2107		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2108		    MAXNAMLEN, (size_t *)0);
2109		if (!error) {
2110			/*
2111			 * Check for illegal filenames.
2112			 */
2113			for (cp = nfs_pub.np_index; *cp; cp++) {
2114				if (*cp == '/') {
2115					error = EINVAL;
2116					break;
2117				}
2118			}
2119		}
2120		if (error) {
2121			FREE(nfs_pub.np_index, M_TEMP);
2122			return (error);
2123		}
2124	}
2125
2126	nfs_pub.np_mount = mp;
2127	nfs_pub.np_valid = 1;
2128	return (0);
2129}
2130
2131struct netcred *
2132vfs_export_lookup(mp, nep, nam)
2133	register struct mount *mp;
2134	struct netexport *nep;
2135	struct sockaddr *nam;
2136{
2137	register struct netcred *np;
2138	register struct radix_node_head *rnh;
2139	struct sockaddr *saddr;
2140
2141	np = NULL;
2142	if (mp->mnt_flag & MNT_EXPORTED) {
2143		/*
2144		 * Lookup in the export list first.
2145		 */
2146		if (nam != NULL) {
2147			saddr = nam;
2148			rnh = nep->ne_rtable[saddr->sa_family];
2149			if (rnh != NULL) {
2150				np = (struct netcred *)
2151					(*rnh->rnh_matchaddr)((caddr_t)saddr,
2152							      rnh);
2153				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2154					np = NULL;
2155			}
2156		}
2157		/*
2158		 * If no address match, use the default if it exists.
2159		 */
2160		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2161			np = &nep->ne_defexported;
2162	}
2163	return (np);
2164}
2165
2166/*
2167 * perform msync on all vnodes under a mount point
2168 * the mount point must be locked.
2169 */
2170void
2171vfs_msync(struct mount *mp, int flags) {
2172	struct vnode *vp, *nvp;
2173loop:
2174	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
2175
2176		if (vp->v_mount != mp)
2177			goto loop;
2178		nvp = vp->v_mntvnodes.le_next;
2179		if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))
2180			continue;
2181		if (vp->v_object &&
2182		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
2183			vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE);
2184		}
2185	}
2186}
2187
2188/*
2189 * Create the VM object needed for VMIO and mmap support.  This
2190 * is done for all VREG files in the system.  Some filesystems might
2191 * afford the additional metadata buffering capability of the
2192 * VMIO code by making the device node be VMIO mode also.
2193 */
2194int
2195vfs_object_create(vp, p, cred, waslocked)
2196	struct vnode *vp;
2197	struct proc *p;
2198	struct ucred *cred;
2199	int waslocked;
2200{
2201	struct vattr vat;
2202	vm_object_t object;
2203	int error = 0;
2204
2205retry:
2206	if ((object = vp->v_object) == NULL) {
2207		if (vp->v_type == VREG) {
2208			if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
2209				goto retn;
2210			(void) vnode_pager_alloc(vp,
2211				OFF_TO_IDX(round_page(vat.va_size)), 0, 0);
2212		} else {
2213			/*
2214			 * This simply allocates the biggest object possible
2215			 * for a VBLK vnode.  This should be fixed, but doesn't
2216			 * cause any problems (yet).
2217			 */
2218			(void) vnode_pager_alloc(vp, INT_MAX, 0, 0);
2219		}
2220		vp->v_object->flags |= OBJ_VFS_REF;
2221	} else {
2222		if (object->flags & OBJ_DEAD) {
2223			if (waslocked)
2224				VOP_UNLOCK(vp, 0, p);
2225			tsleep(object, PVM, "vodead", 0);
2226			if (waslocked)
2227				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2228			goto retry;
2229		}
2230		if ((object->flags & OBJ_VFS_REF) == 0) {
2231			object->flags |= OBJ_VFS_REF;
2232			vm_object_reference(object);
2233		}
2234	}
2235	if (vp->v_object)
2236		vp->v_flag |= VVMIO;
2237
2238retn:
2239	return error;
2240}
2241
2242void
2243vfree(vp)
2244	struct vnode *vp;
2245{
2246	simple_lock(&vnode_free_list_slock);
2247	if (vp->v_flag & VAGE) {
2248		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
2249	} else {
2250		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2251	}
2252	freevnodes++;
2253	simple_unlock(&vnode_free_list_slock);
2254	vp->v_flag &= ~VAGE;
2255	vp->v_flag |= VFREE;
2256}
2257
2258void
2259vbusy(vp)
2260	struct vnode *vp;
2261{
2262	simple_lock(&vnode_free_list_slock);
2263	TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
2264	freevnodes--;
2265	simple_unlock(&vnode_free_list_slock);
2266	vp->v_flag &= ~VFREE;
2267}
2268