vfs_export.c revision 28954
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
39 * $Id: vfs_subr.c,v 1.95 1997/08/26 11:59:20 bde Exp $
40 */
41
42/*
43 * External virtual filesystem routines
44 */
45#include "opt_ddb.h"
46#include "opt_devfs.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/kernel.h>
51#include <sys/file.h>
52#include <sys/proc.h>
53#include <sys/mount.h>
54#include <sys/time.h>
55#include <sys/vnode.h>
56#include <sys/stat.h>
57#include <sys/namei.h>
58#include <sys/ucred.h>
59#include <sys/buf.h>
60#include <sys/errno.h>
61#include <sys/malloc.h>
62#include <sys/domain.h>
63#include <sys/mbuf.h>
64#include <sys/dirent.h>
65
66#include <machine/limits.h>
67
68#include <vm/vm.h>
69#include <vm/vm_param.h>
70#include <vm/vm_object.h>
71#include <vm/vm_extern.h>
72#include <vm/vm_pager.h>
73#include <vm/vnode_pager.h>
74#include <sys/sysctl.h>
75
76#include <miscfs/specfs/specdev.h>
77
78#ifdef DDB
79extern void	printlockedvnodes __P((void));
80#endif
81static void	vclean __P((struct vnode *vp, int flags, struct proc *p));
82static void	vgonel __P((struct vnode *vp, struct proc *p));
83unsigned long	numvnodes;
84SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
85static void	vputrele __P((struct vnode *vp, int put));
86
87enum vtype iftovt_tab[16] = {
88	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
89	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
90};
91int vttoif_tab[9] = {
92	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
93	S_IFSOCK, S_IFIFO, S_IFMT,
94};
95
96/*
97 * Insq/Remq for the vnode usage lists.
98 */
99#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
100#define	bufremvn(bp) {							\
101	LIST_REMOVE(bp, b_vnbufs);					\
102	(bp)->b_vnbufs.le_next = NOLIST;				\
103}
104TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
105static u_long freevnodes = 0;
106SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
107
108struct mntlist mountlist;	/* mounted filesystem list */
109struct simplelock mountlist_slock;
110static struct simplelock mntid_slock;
111struct simplelock mntvnode_slock;
112struct simplelock vnode_free_list_slock;
113static struct simplelock spechash_slock;
114struct nfs_public nfs_pub;	/* publicly exported FS */
115
116int desiredvnodes;
117SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
118
119static void	vfs_free_addrlist __P((struct netexport *nep));
120static int	vfs_free_netcred __P((struct radix_node *rn, void *w));
121static int	vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
122				       struct export_args *argp));
123
124/*
125 * Initialize the vnode management data structures.
126 */
127void
128vntblinit()
129{
130
131	desiredvnodes = maxproc + vm_object_cache_max;
132	simple_lock_init(&mntvnode_slock);
133	simple_lock_init(&mntid_slock);
134	simple_lock_init(&spechash_slock);
135	TAILQ_INIT(&vnode_free_list);
136	simple_lock_init(&vnode_free_list_slock);
137	CIRCLEQ_INIT(&mountlist);
138}
139
140/*
141 * Mark a mount point as busy. Used to synchronize access and to delay
142 * unmounting. Interlock is not released on failure.
143 */
144int
145vfs_busy(mp, flags, interlkp, p)
146	struct mount *mp;
147	int flags;
148	struct simplelock *interlkp;
149	struct proc *p;
150{
151	int lkflags;
152
153	if (mp->mnt_flag & MNT_UNMOUNT) {
154		if (flags & LK_NOWAIT)
155			return (ENOENT);
156		mp->mnt_flag |= MNT_MWAIT;
157		if (interlkp) {
158			simple_unlock(interlkp);
159		}
160		/*
161		 * Since all busy locks are shared except the exclusive
162		 * lock granted when unmounting, the only place that a
163		 * wakeup needs to be done is at the release of the
164		 * exclusive lock at the end of dounmount.
165		 */
166		tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
167		if (interlkp) {
168			simple_lock(interlkp);
169		}
170		return (ENOENT);
171	}
172	lkflags = LK_SHARED;
173	if (interlkp)
174		lkflags |= LK_INTERLOCK;
175	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
176		panic("vfs_busy: unexpected lock failure");
177	return (0);
178}
179
180/*
181 * Free a busy filesystem.
182 */
183void
184vfs_unbusy(mp, p)
185	struct mount *mp;
186	struct proc *p;
187{
188
189	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
190}
191
192/*
193 * Lookup a filesystem type, and if found allocate and initialize
194 * a mount structure for it.
195 *
196 * Devname is usually updated by mount(8) after booting.
197 */
198int
199vfs_rootmountalloc(fstypename, devname, mpp)
200	char *fstypename;
201	char *devname;
202	struct mount **mpp;
203{
204	struct proc *p = curproc;	/* XXX */
205	struct vfsconf *vfsp;
206	struct mount *mp;
207
208	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
209		if (!strcmp(vfsp->vfc_name, fstypename))
210			break;
211	if (vfsp == NULL)
212		return (ENODEV);
213	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
214	bzero((char *)mp, (u_long)sizeof(struct mount));
215	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
216	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
217	LIST_INIT(&mp->mnt_vnodelist);
218	mp->mnt_vfc = vfsp;
219	mp->mnt_op = vfsp->vfc_vfsops;
220	mp->mnt_flag = MNT_RDONLY;
221	mp->mnt_vnodecovered = NULLVP;
222	vfsp->vfc_refcount++;
223	mp->mnt_stat.f_type = vfsp->vfc_typenum;
224	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
225	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
226	mp->mnt_stat.f_mntonname[0] = '/';
227	mp->mnt_stat.f_mntonname[1] = 0;
228	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
229	*mpp = mp;
230	return (0);
231}
232
233/*
234 * Find an appropriate filesystem to use for the root. If a filesystem
235 * has not been preselected, walk through the list of known filesystems
236 * trying those that have mountroot routines, and try them until one
237 * works or we have tried them all.
238 */
239#ifdef notdef	/* XXX JH */
240int
241lite2_vfs_mountroot(void)
242{
243	struct vfsconf *vfsp;
244	extern int (*lite2_mountroot)(void);
245	int error;
246
247	if (lite2_mountroot != NULL)
248		return ((*lite2_mountroot)());
249	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
250		if (vfsp->vfc_mountroot == NULL)
251			continue;
252		if ((error = (*vfsp->vfc_mountroot)()) == 0)
253			return (0);
254		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
255	}
256	return (ENODEV);
257}
258#endif
259
260/*
261 * Lookup a mount point by filesystem identifier.
262 */
263struct mount *
264vfs_getvfs(fsid)
265	fsid_t *fsid;
266{
267	register struct mount *mp;
268
269	simple_lock(&mountlist_slock);
270	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
271	    mp = mp->mnt_list.cqe_next) {
272		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
273		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
274			simple_unlock(&mountlist_slock);
275			return (mp);
276	    }
277	}
278	simple_unlock(&mountlist_slock);
279	return ((struct mount *) 0);
280}
281
282/*
283 * Get a new unique fsid
284 */
285void
286vfs_getnewfsid(mp)
287	struct mount *mp;
288{
289	static u_short xxxfs_mntid;
290
291	fsid_t tfsid;
292	int mtype;
293
294	simple_lock(&mntid_slock);
295	mtype = mp->mnt_vfc->vfc_typenum;
296	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
297	mp->mnt_stat.f_fsid.val[1] = mtype;
298	if (xxxfs_mntid == 0)
299		++xxxfs_mntid;
300	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
301	tfsid.val[1] = mtype;
302	if (mountlist.cqh_first != (void *)&mountlist) {
303		while (vfs_getvfs(&tfsid)) {
304			tfsid.val[0]++;
305			xxxfs_mntid++;
306		}
307	}
308	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
309	simple_unlock(&mntid_slock);
310}
311
312/*
313 * Set vnode attributes to VNOVAL
314 */
315void
316vattr_null(vap)
317	register struct vattr *vap;
318{
319
320	vap->va_type = VNON;
321	vap->va_size = VNOVAL;
322	vap->va_bytes = VNOVAL;
323	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
324	    vap->va_fsid = vap->va_fileid =
325	    vap->va_blocksize = vap->va_rdev =
326	    vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
327	    vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
328	    vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
329	    vap->va_flags = vap->va_gen = VNOVAL;
330	vap->va_vaflags = 0;
331}
332
333/*
334 * Routines having to do with the management of the vnode table.
335 */
336extern vop_t **dead_vnodeop_p;
337
338/*
339 * Return the next vnode from the free list.
340 */
341int
342getnewvnode(tag, mp, vops, vpp)
343	enum vtagtype tag;
344	struct mount *mp;
345	vop_t **vops;
346	struct vnode **vpp;
347{
348	struct proc *p = curproc;	/* XXX */
349	struct vnode *vp;
350
351	/*
352	 * We take the least recently used vnode from the freelist
353	 * if we can get it and it has no cached pages, and no
354	 * namecache entries are relative to it.
355	 * Otherwise we allocate a new vnode
356	 */
357
358	simple_lock(&vnode_free_list_slock);
359
360	if (freevnodes >= desiredvnodes) {
361		TAILQ_FOREACH(vp, &vnode_free_list, v_freelist) {
362			if (!simple_lock_try(&vp->v_interlock))
363				continue;
364			if (vp->v_usecount)
365				panic("free vnode isn't");
366
367			if (vp->v_object && vp->v_object->resident_page_count) {
368				/* Don't recycle if it's caching some pages */
369				simple_unlock(&vp->v_interlock);
370				continue;
371			} else if (LIST_FIRST(&vp->v_cache_src)) {
372				/* Don't recycle if active in the namecache */
373				simple_unlock(&vp->v_interlock);
374				continue;
375			} else {
376				break;
377			}
378		}
379	} else {
380		vp = NULL;
381	}
382
383	if (vp) {
384		vp->v_flag |= VDOOMED;
385		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
386		freevnodes--;
387		simple_unlock(&vnode_free_list_slock);
388		cache_purge(vp);
389		vp->v_lease = NULL;
390		if (vp->v_type != VBAD)
391			vgonel(vp, p);
392		else {
393			simple_unlock(&vp->v_interlock);
394		}
395
396#ifdef DIAGNOSTIC
397		{
398			int s;
399
400			if (vp->v_data)
401				panic("cleaned vnode isn't");
402			s = splbio();
403			if (vp->v_numoutput)
404				panic("Clean vnode has pending I/O's");
405			splx(s);
406		}
407#endif
408		vp->v_flag = 0;
409		vp->v_lastr = 0;
410		vp->v_lastw = 0;
411		vp->v_lasta = 0;
412		vp->v_cstart = 0;
413		vp->v_clen = 0;
414		vp->v_socket = 0;
415		vp->v_writecount = 0;	/* XXX */
416	} else {
417		simple_unlock(&vnode_free_list_slock);
418		vp = (struct vnode *) malloc((u_long) sizeof *vp,
419		    M_VNODE, M_WAITOK);
420		bzero((char *) vp, sizeof *vp);
421		vp->v_dd = vp;
422		cache_purge(vp);
423		LIST_INIT(&vp->v_cache_src);
424		TAILQ_INIT(&vp->v_cache_dst);
425		numvnodes++;
426	}
427
428	vp->v_type = VNON;
429	vp->v_tag = tag;
430	vp->v_op = vops;
431	insmntque(vp, mp);
432	*vpp = vp;
433	vp->v_usecount = 1;
434	vp->v_data = 0;
435	return (0);
436}
437
438/*
439 * Move a vnode from one mount queue to another.
440 */
441void
442insmntque(vp, mp)
443	register struct vnode *vp;
444	register struct mount *mp;
445{
446
447	simple_lock(&mntvnode_slock);
448	/*
449	 * Delete from old mount point vnode list, if on one.
450	 */
451	if (vp->v_mount != NULL)
452		LIST_REMOVE(vp, v_mntvnodes);
453	/*
454	 * Insert into list of vnodes for the new mount point, if available.
455	 */
456	if ((vp->v_mount = mp) == NULL) {
457		simple_unlock(&mntvnode_slock);
458		return;
459	}
460	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
461	simple_unlock(&mntvnode_slock);
462}
463
464/*
465 * Update outstanding I/O count and do wakeup if requested.
466 */
467void
468vwakeup(bp)
469	register struct buf *bp;
470{
471	register struct vnode *vp;
472
473	bp->b_flags &= ~B_WRITEINPROG;
474	if ((vp = bp->b_vp)) {
475		vp->v_numoutput--;
476		if (vp->v_numoutput < 0)
477			panic("vwakeup: neg numoutput");
478		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
479			vp->v_flag &= ~VBWAIT;
480			wakeup((caddr_t) &vp->v_numoutput);
481		}
482	}
483}
484
485/*
486 * Flush out and invalidate all buffers associated with a vnode.
487 * Called with the underlying object locked.
488 */
489int
490vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
491	register struct vnode *vp;
492	int flags;
493	struct ucred *cred;
494	struct proc *p;
495	int slpflag, slptimeo;
496{
497	register struct buf *bp;
498	struct buf *nbp, *blist;
499	int s, error;
500	vm_object_t object;
501
502	if (flags & V_SAVE) {
503		if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
504			return (error);
505		if (vp->v_dirtyblkhd.lh_first != NULL)
506			panic("vinvalbuf: dirty bufs");
507	}
508
509	s = splbio();
510	for (;;) {
511		if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
512			while (blist && blist->b_lblkno < 0)
513				blist = blist->b_vnbufs.le_next;
514		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
515		    (flags & V_SAVEMETA))
516			while (blist && blist->b_lblkno < 0)
517				blist = blist->b_vnbufs.le_next;
518		if (!blist)
519			break;
520
521		for (bp = blist; bp; bp = nbp) {
522			nbp = bp->b_vnbufs.le_next;
523			if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
524				continue;
525			if (bp->b_flags & B_BUSY) {
526				bp->b_flags |= B_WANTED;
527				error = tsleep((caddr_t) bp,
528				    slpflag | (PRIBIO + 1), "vinvalbuf",
529				    slptimeo);
530				if (error) {
531					splx(s);
532					return (error);
533				}
534				break;
535			}
536			bremfree(bp);
537			bp->b_flags |= B_BUSY;
538			/*
539			 * XXX Since there are no node locks for NFS, I
540			 * believe there is a slight chance that a delayed
541			 * write will occur while sleeping just above, so
542			 * check for it.
543			 */
544			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
545				(void) VOP_BWRITE(bp);
546				break;
547			}
548			bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF);
549			brelse(bp);
550		}
551	}
552
553	while (vp->v_numoutput > 0) {
554		vp->v_flag |= VBWAIT;
555		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
556	}
557
558	splx(s);
559
560	/*
561	 * Destroy the copy in the VM cache, too.
562	 */
563	object = vp->v_object;
564	if (object != NULL) {
565		vm_object_page_remove(object, 0, object->size,
566		    (flags & V_SAVE) ? TRUE : FALSE);
567	}
568	if (!(flags & V_SAVEMETA) &&
569	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
570		panic("vinvalbuf: flush failed");
571	return (0);
572}
573
574/*
575 * Associate a buffer with a vnode.
576 */
577void
578bgetvp(vp, bp)
579	register struct vnode *vp;
580	register struct buf *bp;
581{
582	int s;
583
584	if (bp->b_vp)
585		panic("bgetvp: not free");
586	vhold(vp);
587	bp->b_vp = vp;
588	if (vp->v_type == VBLK || vp->v_type == VCHR)
589		bp->b_dev = vp->v_rdev;
590	else
591		bp->b_dev = NODEV;
592	/*
593	 * Insert onto list for new vnode.
594	 */
595	s = splbio();
596	bufinsvn(bp, &vp->v_cleanblkhd);
597	splx(s);
598}
599
600/*
601 * Disassociate a buffer from a vnode.
602 */
603void
604brelvp(bp)
605	register struct buf *bp;
606{
607	struct vnode *vp;
608	int s;
609
610	if (bp->b_vp == (struct vnode *) 0)
611		panic("brelvp: NULL");
612	/*
613	 * Delete from old vnode list, if on one.
614	 */
615	s = splbio();
616	if (bp->b_vnbufs.le_next != NOLIST)
617		bufremvn(bp);
618	splx(s);
619
620	vp = bp->b_vp;
621	bp->b_vp = (struct vnode *) 0;
622	vdrop(vp);
623}
624
625/*
626 * Associate a p-buffer with a vnode.
627 */
628void
629pbgetvp(vp, bp)
630	register struct vnode *vp;
631	register struct buf *bp;
632{
633#if defined(DIAGNOSTIC)
634	if (bp->b_vp)
635		panic("pbgetvp: not free");
636#endif
637	bp->b_vp = vp;
638	if (vp->v_type == VBLK || vp->v_type == VCHR)
639		bp->b_dev = vp->v_rdev;
640	else
641		bp->b_dev = NODEV;
642}
643
644/*
645 * Disassociate a p-buffer from a vnode.
646 */
647void
648pbrelvp(bp)
649	register struct buf *bp;
650{
651	struct vnode *vp;
652
653#if defined(DIAGNOSTIC)
654	if (bp->b_vp == (struct vnode *) 0)
655		panic("pbrelvp: NULL");
656#endif
657
658	bp->b_vp = (struct vnode *) 0;
659}
660
661/*
662 * Reassign a buffer from one vnode to another.
663 * Used to assign file specific control information
664 * (indirect blocks) to the vnode to which they belong.
665 */
666void
667reassignbuf(bp, newvp)
668	register struct buf *bp;
669	register struct vnode *newvp;
670{
671	int s;
672
673	if (newvp == NULL) {
674		printf("reassignbuf: NULL");
675		return;
676	}
677
678	s = splbio();
679	/*
680	 * Delete from old vnode list, if on one.
681	 */
682	if (bp->b_vnbufs.le_next != NOLIST) {
683		bufremvn(bp);
684		vdrop(bp->b_vp);
685	}
686	/*
687	 * If dirty, put on list of dirty buffers; otherwise insert onto list
688	 * of clean buffers.
689	 */
690	if (bp->b_flags & B_DELWRI) {
691		struct buf *tbp;
692
693		tbp = newvp->v_dirtyblkhd.lh_first;
694		if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
695			bufinsvn(bp, &newvp->v_dirtyblkhd);
696		} else {
697			while (tbp->b_vnbufs.le_next &&
698				(tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
699				tbp = tbp->b_vnbufs.le_next;
700			}
701			LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
702		}
703	} else {
704		bufinsvn(bp, &newvp->v_cleanblkhd);
705	}
706	bp->b_vp = newvp;
707	vhold(bp->b_vp);
708	splx(s);
709}
710
711#ifndef DEVFS_ROOT
712/*
713 * Create a vnode for a block device.
714 * Used for root filesystem, argdev, and swap areas.
715 * Also used for memory file system special devices.
716 */
717int
718bdevvp(dev, vpp)
719	dev_t dev;
720	struct vnode **vpp;
721{
722	register struct vnode *vp;
723	struct vnode *nvp;
724	int error;
725
726	if (dev == NODEV)
727		return (0);
728	error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
729	if (error) {
730		*vpp = 0;
731		return (error);
732	}
733	vp = nvp;
734	vp->v_type = VBLK;
735	if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
736		vput(vp);
737		vp = nvp;
738	}
739	*vpp = vp;
740	return (0);
741}
742#endif /* !DEVFS_ROOT */
743
744/*
745 * Check to see if the new vnode represents a special device
746 * for which we already have a vnode (either because of
747 * bdevvp() or because of a different vnode representing
748 * the same block device). If such an alias exists, deallocate
749 * the existing contents and return the aliased vnode. The
750 * caller is responsible for filling it with its new contents.
751 */
752struct vnode *
753checkalias(nvp, nvp_rdev, mp)
754	register struct vnode *nvp;
755	dev_t nvp_rdev;
756	struct mount *mp;
757{
758	struct proc *p = curproc;	/* XXX */
759	struct vnode *vp;
760	struct vnode **vpp;
761
762	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
763		return (NULLVP);
764
765	vpp = &speclisth[SPECHASH(nvp_rdev)];
766loop:
767	simple_lock(&spechash_slock);
768	for (vp = *vpp; vp; vp = vp->v_specnext) {
769		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
770			continue;
771		/*
772		 * Alias, but not in use, so flush it out.
773		 */
774		simple_lock(&vp->v_interlock);
775		if (vp->v_usecount == 0) {
776			simple_unlock(&spechash_slock);
777			vgonel(vp, p);
778			goto loop;
779		}
780		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
781			simple_unlock(&spechash_slock);
782			goto loop;
783		}
784		break;
785	}
786	if (vp == NULL || vp->v_tag != VT_NON) {
787		MALLOC(nvp->v_specinfo, struct specinfo *,
788		    sizeof(struct specinfo), M_VNODE, M_WAITOK);
789		nvp->v_rdev = nvp_rdev;
790		nvp->v_hashchain = vpp;
791		nvp->v_specnext = *vpp;
792		nvp->v_specflags = 0;
793		simple_unlock(&spechash_slock);
794		*vpp = nvp;
795		if (vp != NULLVP) {
796			nvp->v_flag |= VALIASED;
797			vp->v_flag |= VALIASED;
798			vput(vp);
799		}
800		return (NULLVP);
801	}
802	simple_unlock(&spechash_slock);
803	VOP_UNLOCK(vp, 0, p);
804	simple_lock(&vp->v_interlock);
805	vclean(vp, 0, p);
806	vp->v_op = nvp->v_op;
807	vp->v_tag = nvp->v_tag;
808	nvp->v_type = VNON;
809	insmntque(vp, mp);
810	return (vp);
811}
812
813/*
814 * Grab a particular vnode from the free list, increment its
815 * reference count and lock it. The vnode lock bit is set the
816 * vnode is being eliminated in vgone. The process is awakened
817 * when the transition is completed, and an error returned to
818 * indicate that the vnode is no longer usable (possibly having
819 * been changed to a new file system type).
820 */
821int
822vget(vp, flags, p)
823	register struct vnode *vp;
824	int flags;
825	struct proc *p;
826{
827	int error;
828
829	/*
830	 * If the vnode is in the process of being cleaned out for
831	 * another use, we wait for the cleaning to finish and then
832	 * return failure. Cleaning is determined by checking that
833	 * the VXLOCK flag is set.
834	 */
835	if ((flags & LK_INTERLOCK) == 0) {
836		simple_lock(&vp->v_interlock);
837	}
838	if (vp->v_flag & VXLOCK) {
839		vp->v_flag |= VXWANT;
840		simple_unlock(&vp->v_interlock);
841		tsleep((caddr_t)vp, PINOD, "vget", 0);
842		return (ENOENT);
843	}
844	vp->v_usecount++;
845	if (VSHOULDBUSY(vp))
846		vbusy(vp);
847	/*
848	 * Create the VM object, if needed
849	 */
850	if ((vp->v_type == VREG) &&
851		((vp->v_object == NULL) ||
852			(vp->v_object->flags & OBJ_VFS_REF) == 0 ||
853			(vp->v_object->flags & OBJ_DEAD))) {
854		/*
855		 * XXX vfs_object_create probably needs the interlock.
856		 */
857		simple_unlock(&vp->v_interlock);
858		vfs_object_create(vp, curproc, curproc->p_ucred, 0);
859		simple_lock(&vp->v_interlock);
860	}
861	if (flags & LK_TYPE_MASK) {
862		if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
863			vrele(vp);
864		return (error);
865	}
866	simple_unlock(&vp->v_interlock);
867	return (0);
868}
869
870/*
871 * Stubs to use when there is no locking to be done on the underlying object.
872 * A minimal shared lock is necessary to ensure that the underlying object
873 * is not revoked while an operation is in progress. So, an active shared
874 * count is maintained in an auxillary vnode lock structure.
875 */
876int
877vop_sharedlock(ap)
878	struct vop_lock_args /* {
879		struct vnode *a_vp;
880		int a_flags;
881		struct proc *a_p;
882	} */ *ap;
883{
884	/*
885	 * This code cannot be used until all the non-locking filesystems
886	 * (notably NFS) are converted to properly lock and release nodes.
887	 * Also, certain vnode operations change the locking state within
888	 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
889	 * and symlink). Ideally these operations should not change the
890	 * lock state, but should be changed to let the caller of the
891	 * function unlock them. Otherwise all intermediate vnode layers
892	 * (such as union, umapfs, etc) must catch these functions to do
893	 * the necessary locking at their layer. Note that the inactive
894	 * and lookup operations also change their lock state, but this
895	 * cannot be avoided, so these two operations will always need
896	 * to be handled in intermediate layers.
897	 */
898	struct vnode *vp = ap->a_vp;
899	int vnflags, flags = ap->a_flags;
900
901	if (vp->v_vnlock == NULL) {
902		if ((flags & LK_TYPE_MASK) == LK_DRAIN)
903			return (0);
904		MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
905		    M_VNODE, M_WAITOK);
906		lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
907	}
908	switch (flags & LK_TYPE_MASK) {
909	case LK_DRAIN:
910		vnflags = LK_DRAIN;
911		break;
912	case LK_EXCLUSIVE:
913#ifdef DEBUG_VFS_LOCKS
914		/*
915		 * Normally, we use shared locks here, but that confuses
916		 * the locking assertions.
917		 */
918		vnflags = LK_EXCLUSIVE;
919		break;
920#endif
921	case LK_SHARED:
922		vnflags = LK_SHARED;
923		break;
924	case LK_UPGRADE:
925	case LK_EXCLUPGRADE:
926	case LK_DOWNGRADE:
927		return (0);
928	case LK_RELEASE:
929	default:
930		panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
931	}
932	if (flags & LK_INTERLOCK)
933		vnflags |= LK_INTERLOCK;
934	return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
935}
936
937/*
938 * Stubs to use when there is no locking to be done on the underlying object.
939 * A minimal shared lock is necessary to ensure that the underlying object
940 * is not revoked while an operation is in progress. So, an active shared
941 * count is maintained in an auxillary vnode lock structure.
942 */
943int
944vop_nolock(ap)
945	struct vop_lock_args /* {
946		struct vnode *a_vp;
947		int a_flags;
948		struct proc *a_p;
949	} */ *ap;
950{
951#ifdef notyet
952	/*
953	 * This code cannot be used until all the non-locking filesystems
954	 * (notably NFS) are converted to properly lock and release nodes.
955	 * Also, certain vnode operations change the locking state within
956	 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
957	 * and symlink). Ideally these operations should not change the
958	 * lock state, but should be changed to let the caller of the
959	 * function unlock them. Otherwise all intermediate vnode layers
960	 * (such as union, umapfs, etc) must catch these functions to do
961	 * the necessary locking at their layer. Note that the inactive
962	 * and lookup operations also change their lock state, but this
963	 * cannot be avoided, so these two operations will always need
964	 * to be handled in intermediate layers.
965	 */
966	struct vnode *vp = ap->a_vp;
967	int vnflags, flags = ap->a_flags;
968
969	if (vp->v_vnlock == NULL) {
970		if ((flags & LK_TYPE_MASK) == LK_DRAIN)
971			return (0);
972		MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
973		    M_VNODE, M_WAITOK);
974		lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
975	}
976	switch (flags & LK_TYPE_MASK) {
977	case LK_DRAIN:
978		vnflags = LK_DRAIN;
979		break;
980	case LK_EXCLUSIVE:
981	case LK_SHARED:
982		vnflags = LK_SHARED;
983		break;
984	case LK_UPGRADE:
985	case LK_EXCLUPGRADE:
986	case LK_DOWNGRADE:
987		return (0);
988	case LK_RELEASE:
989	default:
990		panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
991	}
992	if (flags & LK_INTERLOCK)
993		vnflags |= LK_INTERLOCK;
994	return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
995#else /* for now */
996	/*
997	 * Since we are not using the lock manager, we must clear
998	 * the interlock here.
999	 */
1000	if (ap->a_flags & LK_INTERLOCK) {
1001		simple_unlock(&ap->a_vp->v_interlock);
1002	}
1003	return (0);
1004#endif
1005}
1006
1007/*
1008 * Do the inverse of vop_nolock, handling the interlock in a compatible way.
1009 */
1010int
1011vop_nounlock(ap)
1012	struct vop_unlock_args /* {
1013		struct vnode *a_vp;
1014		int a_flags;
1015		struct proc *a_p;
1016	} */ *ap;
1017{
1018	struct vnode *vp = ap->a_vp;
1019
1020	if (vp->v_vnlock == NULL) {
1021		if (ap->a_flags & LK_INTERLOCK)
1022			simple_unlock(&ap->a_vp->v_interlock);
1023		return (0);
1024	}
1025	return (lockmgr(vp->v_vnlock, LK_RELEASE | ap->a_flags,
1026		&ap->a_vp->v_interlock, ap->a_p));
1027}
1028
1029/*
1030 * Return whether or not the node is in use.
1031 */
1032int
1033vop_noislocked(ap)
1034	struct vop_islocked_args /* {
1035		struct vnode *a_vp;
1036	} */ *ap;
1037{
1038	struct vnode *vp = ap->a_vp;
1039
1040	if (vp->v_vnlock == NULL)
1041		return (0);
1042	return (lockstatus(vp->v_vnlock));
1043}
1044
1045/* #ifdef DIAGNOSTIC */
1046/*
1047 * Vnode reference, just increment the count
1048 */
1049void
1050vref(vp)
1051	struct vnode *vp;
1052{
1053	simple_lock(&vp->v_interlock);
1054	if (vp->v_usecount <= 0)
1055		panic("vref used where vget required");
1056
1057	vp->v_usecount++;
1058
1059	if ((vp->v_type == VREG) &&
1060		((vp->v_object == NULL) ||
1061			((vp->v_object->flags & OBJ_VFS_REF) == 0) ||
1062			(vp->v_object->flags & OBJ_DEAD))) {
1063		/*
1064		 * We need to lock to VP during the time that
1065		 * the object is created.  This is necessary to
1066		 * keep the system from re-entrantly doing it
1067		 * multiple times.
1068		 * XXX vfs_object_create probably needs the interlock?
1069		 */
1070		simple_unlock(&vp->v_interlock);
1071		vfs_object_create(vp, curproc, curproc->p_ucred, 0);
1072		return;
1073	}
1074	simple_unlock(&vp->v_interlock);
1075}
1076
1077/*
1078 * Vnode put/release.
1079 * If count drops to zero, call inactive routine and return to freelist.
1080 */
1081static void
1082vputrele(vp, put)
1083	struct vnode *vp;
1084	int put;
1085{
1086	struct proc *p = curproc;	/* XXX */
1087
1088#ifdef DIAGNOSTIC
1089	if (vp == NULL)
1090		panic("vputrele: null vp");
1091#endif
1092	simple_lock(&vp->v_interlock);
1093
1094	if ((vp->v_usecount == 2) &&
1095		vp->v_object &&
1096		(vp->v_object->flags & OBJ_VFS_REF)) {
1097		vp->v_usecount--;
1098		vp->v_object->flags &= ~OBJ_VFS_REF;
1099		if (put) {
1100			VOP_UNLOCK(vp, LK_INTERLOCK, p);
1101		} else {
1102			simple_unlock(&vp->v_interlock);
1103		}
1104		vm_object_deallocate(vp->v_object);
1105		return;
1106	}
1107
1108	if (vp->v_usecount > 1) {
1109		vp->v_usecount--;
1110		if (put) {
1111			VOP_UNLOCK(vp, LK_INTERLOCK, p);
1112		} else {
1113			simple_unlock(&vp->v_interlock);
1114		}
1115		return;
1116	}
1117
1118	if (vp->v_usecount < 1) {
1119#ifdef DIAGNOSTIC
1120		vprint("vputrele: negative ref count", vp);
1121#endif
1122		panic("vputrele: negative ref cnt");
1123	}
1124
1125	vp->v_holdcnt++; 	/* Make sure vnode isn't recycled */
1126
1127	/*
1128	 * If we are doing a vput, the node is already locked, and we must
1129	 * call VOP_INACTIVE with the node locked.  So, in the case of
1130	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
1131	 */
1132	if (put) {
1133		simple_unlock(&vp->v_interlock);
1134		VOP_INACTIVE(vp, p);
1135		simple_lock(&vp->v_interlock);
1136		vp->v_usecount--;
1137		vp->v_holdcnt--;
1138		if (VSHOULDFREE(vp))
1139			vfree(vp);
1140		simple_unlock(&vp->v_interlock);
1141	} else if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1142		VOP_INACTIVE(vp, p);
1143		vp->v_usecount--;
1144		vp->v_holdcnt--;
1145		if (VSHOULDFREE(vp))
1146			vfree(vp);
1147	}
1148}
1149
1150/*
1151 * vput(), just unlock and vrele()
1152 */
1153void
1154vput(vp)
1155	struct vnode *vp;
1156{
1157	vputrele(vp, 1);
1158}
1159
1160void
1161vrele(vp)
1162	struct vnode *vp;
1163{
1164	vputrele(vp, 0);
1165}
1166
1167/*
1168 * Somebody doesn't want the vnode recycled.
1169 */
1170void
1171vhold(vp)
1172	register struct vnode *vp;
1173{
1174
1175	simple_lock(&vp->v_interlock);
1176	vp->v_holdcnt++;
1177	if (VSHOULDBUSY(vp))
1178		vbusy(vp);
1179	simple_unlock(&vp->v_interlock);
1180}
1181
1182/*
1183 * One less who cares about this vnode.
1184 */
1185void
1186vdrop(vp)
1187	register struct vnode *vp;
1188{
1189
1190	simple_lock(&vp->v_interlock);
1191	if (vp->v_holdcnt <= 0)
1192		panic("holdrele: holdcnt");
1193	vp->v_holdcnt--;
1194	if (VSHOULDFREE(vp))
1195		vfree(vp);
1196	simple_unlock(&vp->v_interlock);
1197}
1198
1199/*
1200 * Remove any vnodes in the vnode table belonging to mount point mp.
1201 *
1202 * If MNT_NOFORCE is specified, there should not be any active ones,
1203 * return error if any are found (nb: this is a user error, not a
1204 * system error). If MNT_FORCE is specified, detach any active vnodes
1205 * that are found.
1206 */
1207#ifdef DIAGNOSTIC
1208static int busyprt = 0;		/* print out busy vnodes */
1209SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
1210#endif
1211
1212int
1213vflush(mp, skipvp, flags)
1214	struct mount *mp;
1215	struct vnode *skipvp;
1216	int flags;
1217{
1218	struct proc *p = curproc;	/* XXX */
1219	struct vnode *vp, *nvp;
1220	int busy = 0;
1221
1222	simple_lock(&mntvnode_slock);
1223loop:
1224	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1225		/*
1226		 * Make sure this vnode wasn't reclaimed in getnewvnode().
1227		 * Start over if it has (it won't be on the list anymore).
1228		 */
1229		if (vp->v_mount != mp)
1230			goto loop;
1231		nvp = vp->v_mntvnodes.le_next;
1232		/*
1233		 * Skip over a selected vnode.
1234		 */
1235		if (vp == skipvp)
1236			continue;
1237
1238		simple_lock(&vp->v_interlock);
1239		/*
1240		 * Skip over a vnodes marked VSYSTEM.
1241		 */
1242		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1243			simple_unlock(&vp->v_interlock);
1244			continue;
1245		}
1246		/*
1247		 * If WRITECLOSE is set, only flush out regular file vnodes
1248		 * open for writing.
1249		 */
1250		if ((flags & WRITECLOSE) &&
1251		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1252			simple_unlock(&vp->v_interlock);
1253			continue;
1254		}
1255
1256		/*
1257		 * With v_usecount == 0, all we need to do is clear out the
1258		 * vnode data structures and we are done.
1259		 */
1260		if (vp->v_usecount == 0) {
1261			simple_unlock(&mntvnode_slock);
1262			vgonel(vp, p);
1263			simple_lock(&mntvnode_slock);
1264			continue;
1265		}
1266
1267		/*
1268		 * If FORCECLOSE is set, forcibly close the vnode. For block
1269		 * or character devices, revert to an anonymous device. For
1270		 * all other files, just kill them.
1271		 */
1272		if (flags & FORCECLOSE) {
1273			simple_unlock(&mntvnode_slock);
1274			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1275				vgonel(vp, p);
1276			} else {
1277				vclean(vp, 0, p);
1278				vp->v_op = spec_vnodeop_p;
1279				insmntque(vp, (struct mount *) 0);
1280			}
1281			simple_lock(&mntvnode_slock);
1282			continue;
1283		}
1284#ifdef DIAGNOSTIC
1285		if (busyprt)
1286			vprint("vflush: busy vnode", vp);
1287#endif
1288		simple_unlock(&vp->v_interlock);
1289		busy++;
1290	}
1291	simple_unlock(&mntvnode_slock);
1292	if (busy)
1293		return (EBUSY);
1294	return (0);
1295}
1296
1297/*
1298 * Disassociate the underlying file system from a vnode.
1299 */
1300static void
1301vclean(struct vnode *vp, int flags, struct proc *p)
1302{
1303	int active, irefed;
1304	vm_object_t object;
1305
1306	/*
1307	 * Check to see if the vnode is in use. If so we have to reference it
1308	 * before we clean it out so that its count cannot fall to zero and
1309	 * generate a race against ourselves to recycle it.
1310	 */
1311	if ((active = vp->v_usecount))
1312		vp->v_usecount++;
1313	/*
1314	 * Prevent the vnode from being recycled or brought into use while we
1315	 * clean it out.
1316	 */
1317	if (vp->v_flag & VXLOCK)
1318		panic("vclean: deadlock");
1319	vp->v_flag |= VXLOCK;
1320	/*
1321	 * Even if the count is zero, the VOP_INACTIVE routine may still
1322	 * have the object locked while it cleans it out. The VOP_LOCK
1323	 * ensures that the VOP_INACTIVE routine is done with its work.
1324	 * For active vnodes, it ensures that no other activity can
1325	 * occur while the underlying object is being cleaned out.
1326	 */
1327	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1328
1329	object = vp->v_object;
1330	irefed = 0;
1331	if (object && ((object->flags & OBJ_DEAD) == 0)) {
1332		if (object->ref_count == 0) {
1333			vm_object_reference(object);
1334			irefed = 1;
1335		}
1336		++object->ref_count;
1337		pager_cache(object, FALSE);
1338	}
1339
1340	/*
1341	 * Clean out any buffers associated with the vnode.
1342	 */
1343	if (flags & DOCLOSE)
1344		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1345
1346	if (irefed) {
1347		vm_object_deallocate(object);
1348	}
1349
1350	/*
1351	 * If purging an active vnode, it must be closed and
1352	 * deactivated before being reclaimed. Note that the
1353	 * VOP_INACTIVE will unlock the vnode.
1354	 */
1355	if (active) {
1356		if (flags & DOCLOSE)
1357			VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1358		VOP_INACTIVE(vp, p);
1359	} else {
1360		/*
1361		 * Any other processes trying to obtain this lock must first
1362		 * wait for VXLOCK to clear, then call the new lock operation.
1363		 */
1364		VOP_UNLOCK(vp, 0, p);
1365	}
1366	/*
1367	 * Reclaim the vnode.
1368	 */
1369	if (VOP_RECLAIM(vp, p))
1370		panic("vclean: cannot reclaim");
1371	if (active)
1372		vrele(vp);
1373	cache_purge(vp);
1374	if (vp->v_vnlock) {
1375#ifdef DIAGNOSTIC
1376		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1377			vprint("vclean: lock not drained", vp);
1378#endif
1379		FREE(vp->v_vnlock, M_VNODE);
1380		vp->v_vnlock = NULL;
1381	}
1382
1383	/*
1384	 * Done with purge, notify sleepers of the grim news.
1385	 */
1386	vp->v_op = dead_vnodeop_p;
1387	vp->v_tag = VT_NON;
1388	vp->v_flag &= ~VXLOCK;
1389	if (vp->v_flag & VXWANT) {
1390		vp->v_flag &= ~VXWANT;
1391		wakeup((caddr_t) vp);
1392	}
1393}
1394
1395/*
1396 * Eliminate all activity associated with the requested vnode
1397 * and with all vnodes aliased to the requested vnode.
1398 */
1399int
1400vop_revoke(ap)
1401	struct vop_revoke_args /* {
1402		struct vnode *a_vp;
1403		int a_flags;
1404	} */ *ap;
1405{
1406	struct vnode *vp, *vq;
1407	struct proc *p = curproc;	/* XXX */
1408
1409#ifdef DIAGNOSTIC
1410	if ((ap->a_flags & REVOKEALL) == 0)
1411		panic("vop_revoke");
1412#endif
1413
1414	vp = ap->a_vp;
1415	simple_lock(&vp->v_interlock);
1416
1417	if (vp->v_flag & VALIASED) {
1418		/*
1419		 * If a vgone (or vclean) is already in progress,
1420		 * wait until it is done and return.
1421		 */
1422		if (vp->v_flag & VXLOCK) {
1423			vp->v_flag |= VXWANT;
1424			simple_unlock(&vp->v_interlock);
1425			tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1426			return (0);
1427		}
1428		/*
1429		 * Ensure that vp will not be vgone'd while we
1430		 * are eliminating its aliases.
1431		 */
1432		vp->v_flag |= VXLOCK;
1433		simple_unlock(&vp->v_interlock);
1434		while (vp->v_flag & VALIASED) {
1435			simple_lock(&spechash_slock);
1436			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1437				if (vq->v_rdev != vp->v_rdev ||
1438				    vq->v_type != vp->v_type || vp == vq)
1439					continue;
1440				simple_unlock(&spechash_slock);
1441				vgone(vq);
1442				break;
1443			}
1444			if (vq == NULLVP) {
1445				simple_unlock(&spechash_slock);
1446			}
1447		}
1448		/*
1449		 * Remove the lock so that vgone below will
1450		 * really eliminate the vnode after which time
1451		 * vgone will awaken any sleepers.
1452		 */
1453		simple_lock(&vp->v_interlock);
1454		vp->v_flag &= ~VXLOCK;
1455	}
1456	vgonel(vp, p);
1457	return (0);
1458}
1459
1460/*
1461 * Recycle an unused vnode to the front of the free list.
1462 * Release the passed interlock if the vnode will be recycled.
1463 */
1464int
1465vrecycle(vp, inter_lkp, p)
1466	struct vnode *vp;
1467	struct simplelock *inter_lkp;
1468	struct proc *p;
1469{
1470
1471	simple_lock(&vp->v_interlock);
1472	if (vp->v_usecount == 0) {
1473		if (inter_lkp) {
1474			simple_unlock(inter_lkp);
1475		}
1476		vgonel(vp, p);
1477		return (1);
1478	}
1479	simple_unlock(&vp->v_interlock);
1480	return (0);
1481}
1482
1483/*
1484 * Eliminate all activity associated with a vnode
1485 * in preparation for reuse.
1486 */
1487void
1488vgone(vp)
1489	register struct vnode *vp;
1490{
1491	struct proc *p = curproc;	/* XXX */
1492
1493	simple_lock(&vp->v_interlock);
1494	vgonel(vp, p);
1495}
1496
1497/*
1498 * vgone, with the vp interlock held.
1499 */
1500static void
1501vgonel(vp, p)
1502	struct vnode *vp;
1503	struct proc *p;
1504{
1505	struct vnode *vq;
1506	struct vnode *vx;
1507
1508	/*
1509	 * If a vgone (or vclean) is already in progress,
1510	 * wait until it is done and return.
1511	 */
1512	if (vp->v_flag & VXLOCK) {
1513		vp->v_flag |= VXWANT;
1514		simple_unlock(&vp->v_interlock);
1515		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1516		return;
1517	}
1518
1519	if (vp->v_object) {
1520		vp->v_object->flags |= OBJ_VNODE_GONE;
1521	}
1522
1523	/*
1524	 * Clean out the filesystem specific data.
1525	 */
1526	vclean(vp, DOCLOSE, p);
1527	/*
1528	 * Delete from old mount point vnode list, if on one.
1529	 */
1530	if (vp->v_mount != NULL)
1531		insmntque(vp, (struct mount *)0);
1532	/*
1533	 * If special device, remove it from special device alias list
1534	 * if it is on one.
1535	 */
1536	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1537		simple_lock(&spechash_slock);
1538		if (*vp->v_hashchain == vp) {
1539			*vp->v_hashchain = vp->v_specnext;
1540		} else {
1541			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1542				if (vq->v_specnext != vp)
1543					continue;
1544				vq->v_specnext = vp->v_specnext;
1545				break;
1546			}
1547			if (vq == NULL)
1548				panic("missing bdev");
1549		}
1550		if (vp->v_flag & VALIASED) {
1551			vx = NULL;
1552			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1553				if (vq->v_rdev != vp->v_rdev ||
1554				    vq->v_type != vp->v_type)
1555					continue;
1556				if (vx)
1557					break;
1558				vx = vq;
1559			}
1560			if (vx == NULL)
1561				panic("missing alias");
1562			if (vq == NULL)
1563				vx->v_flag &= ~VALIASED;
1564			vp->v_flag &= ~VALIASED;
1565		}
1566		simple_unlock(&spechash_slock);
1567		FREE(vp->v_specinfo, M_VNODE);
1568		vp->v_specinfo = NULL;
1569	}
1570
1571	/*
1572	 * If it is on the freelist and not already at the head,
1573	 * move it to the head of the list. The test of the back
1574	 * pointer and the reference count of zero is because
1575	 * it will be removed from the free list by getnewvnode,
1576	 * but will not have its reference count incremented until
1577	 * after calling vgone. If the reference count were
1578	 * incremented first, vgone would (incorrectly) try to
1579	 * close the previous instance of the underlying object.
1580	 */
1581	if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
1582		simple_lock(&vnode_free_list_slock);
1583		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1584		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1585		simple_unlock(&vnode_free_list_slock);
1586	}
1587
1588	vp->v_type = VBAD;
1589}
1590
1591/*
1592 * Lookup a vnode by device number.
1593 */
1594int
1595vfinddev(dev, type, vpp)
1596	dev_t dev;
1597	enum vtype type;
1598	struct vnode **vpp;
1599{
1600	register struct vnode *vp;
1601	int rc = 0;
1602
1603	simple_lock(&spechash_slock);
1604	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1605		if (dev != vp->v_rdev || type != vp->v_type)
1606			continue;
1607		*vpp = vp;
1608		rc = 1;
1609		break;
1610	}
1611	simple_unlock(&spechash_slock);
1612	return (rc);
1613}
1614
1615/*
1616 * Calculate the total number of references to a special device.
1617 */
1618int
1619vcount(vp)
1620	register struct vnode *vp;
1621{
1622	struct vnode *vq, *vnext;
1623	int count;
1624
1625loop:
1626	if ((vp->v_flag & VALIASED) == 0)
1627		return (vp->v_usecount);
1628	simple_lock(&spechash_slock);
1629	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1630		vnext = vq->v_specnext;
1631		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1632			continue;
1633		/*
1634		 * Alias, but not in use, so flush it out.
1635		 */
1636		if (vq->v_usecount == 0 && vq != vp) {
1637			simple_unlock(&spechash_slock);
1638			vgone(vq);
1639			goto loop;
1640		}
1641		count += vq->v_usecount;
1642	}
1643	simple_unlock(&spechash_slock);
1644	return (count);
1645}
1646
1647/*
1648 * Print out a description of a vnode.
1649 */
1650static char *typename[] =
1651{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
1652
1653void
1654vprint(label, vp)
1655	char *label;
1656	register struct vnode *vp;
1657{
1658	char buf[64];
1659
1660	if (label != NULL)
1661		printf("%s: %x: ", label, vp);
1662	else
1663		printf("%x: ", vp);
1664	printf("type %s, usecount %d, writecount %d, refcount %ld,",
1665	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1666	    vp->v_holdcnt);
1667	buf[0] = '\0';
1668	if (vp->v_flag & VROOT)
1669		strcat(buf, "|VROOT");
1670	if (vp->v_flag & VTEXT)
1671		strcat(buf, "|VTEXT");
1672	if (vp->v_flag & VSYSTEM)
1673		strcat(buf, "|VSYSTEM");
1674	if (vp->v_flag & VXLOCK)
1675		strcat(buf, "|VXLOCK");
1676	if (vp->v_flag & VXWANT)
1677		strcat(buf, "|VXWANT");
1678	if (vp->v_flag & VBWAIT)
1679		strcat(buf, "|VBWAIT");
1680	if (vp->v_flag & VALIASED)
1681		strcat(buf, "|VALIASED");
1682	if (vp->v_flag & VDOOMED)
1683		strcat(buf, "|VDOOMED");
1684	if (vp->v_flag & VFREE)
1685		strcat(buf, "|VFREE");
1686	if (buf[0] != '\0')
1687		printf(" flags (%s)", &buf[1]);
1688	if (vp->v_data == NULL) {
1689		printf("\n");
1690	} else {
1691		printf("\n\t");
1692		VOP_PRINT(vp);
1693	}
1694}
1695
1696#ifdef DDB
1697/*
1698 * List all of the locked vnodes in the system.
1699 * Called when debugging the kernel.
1700 */
1701void
1702printlockedvnodes()
1703{
1704	struct proc *p = curproc;	/* XXX */
1705	struct mount *mp, *nmp;
1706	struct vnode *vp;
1707
1708	printf("Locked vnodes\n");
1709	simple_lock(&mountlist_slock);
1710	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1711		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1712			nmp = mp->mnt_list.cqe_next;
1713			continue;
1714		}
1715		for (vp = mp->mnt_vnodelist.lh_first;
1716		     vp != NULL;
1717		     vp = vp->v_mntvnodes.le_next) {
1718			if (VOP_ISLOCKED(vp))
1719				vprint((char *)0, vp);
1720		}
1721		simple_lock(&mountlist_slock);
1722		nmp = mp->mnt_list.cqe_next;
1723		vfs_unbusy(mp, p);
1724	}
1725	simple_unlock(&mountlist_slock);
1726}
1727#endif
1728
1729/*
1730 * Top level filesystem related information gathering.
1731 */
1732static int	sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS);
1733
1734static int
1735vfs_sysctl SYSCTL_HANDLER_ARGS
1736{
1737	int *name = (int *)arg1 - 1;	/* XXX */
1738	u_int namelen = arg2 + 1;	/* XXX */
1739	struct vfsconf *vfsp;
1740
1741#ifndef NO_COMPAT_PRELITE2
1742	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
1743	if (namelen == 1)
1744		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
1745#endif
1746
1747#ifdef notyet
1748	/* all sysctl names at this level are at least name and field */
1749	if (namelen < 2)
1750		return (ENOTDIR);		/* overloaded */
1751	if (name[0] != VFS_GENERIC) {
1752		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1753			if (vfsp->vfc_typenum == name[0])
1754				break;
1755		if (vfsp == NULL)
1756			return (EOPNOTSUPP);
1757		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1758		    oldp, oldlenp, newp, newlen, p));
1759	}
1760#endif
1761	switch (name[1]) {
1762	case VFS_MAXTYPENUM:
1763		if (namelen != 2)
1764			return (ENOTDIR);
1765		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
1766	case VFS_CONF:
1767		if (namelen != 3)
1768			return (ENOTDIR);	/* overloaded */
1769		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1770			if (vfsp->vfc_typenum == name[2])
1771				break;
1772		if (vfsp == NULL)
1773			return (EOPNOTSUPP);
1774		return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
1775	}
1776	return (EOPNOTSUPP);
1777}
1778
1779SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
1780	"Generic filesystem");
1781
1782#ifndef NO_COMPAT_PRELITE2
1783
1784static int
1785sysctl_ovfs_conf SYSCTL_HANDLER_ARGS
1786{
1787	int error;
1788	struct vfsconf *vfsp;
1789	struct ovfsconf ovfs;
1790
1791	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
1792		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
1793		strcpy(ovfs.vfc_name, vfsp->vfc_name);
1794		ovfs.vfc_index = vfsp->vfc_typenum;
1795		ovfs.vfc_refcount = vfsp->vfc_refcount;
1796		ovfs.vfc_flags = vfsp->vfc_flags;
1797		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
1798		if (error)
1799			return error;
1800	}
1801	return 0;
1802}
1803
1804#endif /* !NO_COMPAT_PRELITE2 */
1805
1806int kinfo_vdebug = 1;
1807int kinfo_vgetfailed;
1808
1809#define KINFO_VNODESLOP	10
1810/*
1811 * Dump vnode list (via sysctl).
1812 * Copyout address of vnode followed by vnode.
1813 */
1814/* ARGSUSED */
1815static int
1816sysctl_vnode SYSCTL_HANDLER_ARGS
1817{
1818	struct proc *p = curproc;	/* XXX */
1819	struct mount *mp, *nmp;
1820	struct vnode *nvp, *vp;
1821	int error;
1822
1823#define VPTRSZ	sizeof (struct vnode *)
1824#define VNODESZ	sizeof (struct vnode)
1825
1826	req->lock = 0;
1827	if (!req->oldptr) /* Make an estimate */
1828		return (SYSCTL_OUT(req, 0,
1829			(numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
1830
1831	simple_lock(&mountlist_slock);
1832	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1833		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1834			nmp = mp->mnt_list.cqe_next;
1835			continue;
1836		}
1837again:
1838		simple_lock(&mntvnode_slock);
1839		for (vp = mp->mnt_vnodelist.lh_first;
1840		     vp != NULL;
1841		     vp = nvp) {
1842			/*
1843			 * Check that the vp is still associated with
1844			 * this filesystem.  RACE: could have been
1845			 * recycled onto the same filesystem.
1846			 */
1847			if (vp->v_mount != mp) {
1848				simple_unlock(&mntvnode_slock);
1849				if (kinfo_vdebug)
1850					printf("kinfo: vp changed\n");
1851				goto again;
1852			}
1853			nvp = vp->v_mntvnodes.le_next;
1854			simple_unlock(&mntvnode_slock);
1855			if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
1856			    (error = SYSCTL_OUT(req, vp, VNODESZ)))
1857				return (error);
1858			simple_lock(&mntvnode_slock);
1859		}
1860		simple_unlock(&mntvnode_slock);
1861		simple_lock(&mountlist_slock);
1862		nmp = mp->mnt_list.cqe_next;
1863		vfs_unbusy(mp, p);
1864	}
1865	simple_unlock(&mountlist_slock);
1866
1867	return (0);
1868}
1869
1870/*
1871 * XXX
1872 * Exporting the vnode list on large systems causes them to crash.
1873 * Exporting the vnode list on medium systems causes sysctl to coredump.
1874 */
1875#if 0
1876SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
1877	0, 0, sysctl_vnode, "S,vnode", "");
1878#endif
1879
1880/*
1881 * Check to see if a filesystem is mounted on a block device.
1882 */
1883int
1884vfs_mountedon(vp)
1885	struct vnode *vp;
1886{
1887	struct vnode *vq;
1888	int error = 0;
1889
1890	if (vp->v_specflags & SI_MOUNTEDON)
1891		return (EBUSY);
1892	if (vp->v_flag & VALIASED) {
1893		simple_lock(&spechash_slock);
1894		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1895			if (vq->v_rdev != vp->v_rdev ||
1896			    vq->v_type != vp->v_type)
1897				continue;
1898			if (vq->v_specflags & SI_MOUNTEDON) {
1899				error = EBUSY;
1900				break;
1901			}
1902		}
1903		simple_unlock(&spechash_slock);
1904	}
1905	return (error);
1906}
1907
1908/*
1909 * Unmount all filesystems. The list is traversed in reverse order
1910 * of mounting to avoid dependencies.
1911 */
1912void
1913vfs_unmountall()
1914{
1915	struct mount *mp, *nmp;
1916	struct proc *p = initproc;	/* XXX XXX should this be proc0? */
1917	int error;
1918
1919	/*
1920	 * Since this only runs when rebooting, it is not interlocked.
1921	 */
1922	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1923		nmp = mp->mnt_list.cqe_prev;
1924		error = dounmount(mp, MNT_FORCE, p);
1925		if (error) {
1926			printf("unmount of %s failed (",
1927			    mp->mnt_stat.f_mntonname);
1928			if (error == EBUSY)
1929				printf("BUSY)\n");
1930			else
1931				printf("%d)\n", error);
1932		}
1933	}
1934}
1935
1936/*
1937 * Build hash lists of net addresses and hang them off the mount point.
1938 * Called by ufs_mount() to set up the lists of export addresses.
1939 */
1940static int
1941vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
1942	struct export_args *argp)
1943{
1944	register struct netcred *np;
1945	register struct radix_node_head *rnh;
1946	register int i;
1947	struct radix_node *rn;
1948	struct sockaddr *saddr, *smask = 0;
1949	struct domain *dom;
1950	int error;
1951
1952	if (argp->ex_addrlen == 0) {
1953		if (mp->mnt_flag & MNT_DEFEXPORTED)
1954			return (EPERM);
1955		np = &nep->ne_defexported;
1956		np->netc_exflags = argp->ex_flags;
1957		np->netc_anon = argp->ex_anon;
1958		np->netc_anon.cr_ref = 1;
1959		mp->mnt_flag |= MNT_DEFEXPORTED;
1960		return (0);
1961	}
1962	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1963	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
1964	bzero((caddr_t) np, i);
1965	saddr = (struct sockaddr *) (np + 1);
1966	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
1967		goto out;
1968	if (saddr->sa_len > argp->ex_addrlen)
1969		saddr->sa_len = argp->ex_addrlen;
1970	if (argp->ex_masklen) {
1971		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
1972		error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
1973		if (error)
1974			goto out;
1975		if (smask->sa_len > argp->ex_masklen)
1976			smask->sa_len = argp->ex_masklen;
1977	}
1978	i = saddr->sa_family;
1979	if ((rnh = nep->ne_rtable[i]) == 0) {
1980		/*
1981		 * Seems silly to initialize every AF when most are not used,
1982		 * do so on demand here
1983		 */
1984		for (dom = domains; dom; dom = dom->dom_next)
1985			if (dom->dom_family == i && dom->dom_rtattach) {
1986				dom->dom_rtattach((void **) &nep->ne_rtable[i],
1987				    dom->dom_rtoffset);
1988				break;
1989			}
1990		if ((rnh = nep->ne_rtable[i]) == 0) {
1991			error = ENOBUFS;
1992			goto out;
1993		}
1994	}
1995	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
1996	    np->netc_rnodes);
1997	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
1998		error = EPERM;
1999		goto out;
2000	}
2001	np->netc_exflags = argp->ex_flags;
2002	np->netc_anon = argp->ex_anon;
2003	np->netc_anon.cr_ref = 1;
2004	return (0);
2005out:
2006	free(np, M_NETADDR);
2007	return (error);
2008}
2009
2010/* ARGSUSED */
2011static int
2012vfs_free_netcred(struct radix_node *rn, void *w)
2013{
2014	register struct radix_node_head *rnh = (struct radix_node_head *) w;
2015
2016	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
2017	free((caddr_t) rn, M_NETADDR);
2018	return (0);
2019}
2020
2021/*
2022 * Free the net address hash lists that are hanging off the mount points.
2023 */
2024static void
2025vfs_free_addrlist(struct netexport *nep)
2026{
2027	register int i;
2028	register struct radix_node_head *rnh;
2029
2030	for (i = 0; i <= AF_MAX; i++)
2031		if ((rnh = nep->ne_rtable[i])) {
2032			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
2033			    (caddr_t) rnh);
2034			free((caddr_t) rnh, M_RTABLE);
2035			nep->ne_rtable[i] = 0;
2036		}
2037}
2038
2039int
2040vfs_export(mp, nep, argp)
2041	struct mount *mp;
2042	struct netexport *nep;
2043	struct export_args *argp;
2044{
2045	int error;
2046
2047	if (argp->ex_flags & MNT_DELEXPORT) {
2048		if (mp->mnt_flag & MNT_EXPUBLIC) {
2049			vfs_setpublicfs(NULL, NULL, NULL);
2050			mp->mnt_flag &= ~MNT_EXPUBLIC;
2051		}
2052		vfs_free_addrlist(nep);
2053		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2054	}
2055	if (argp->ex_flags & MNT_EXPORTED) {
2056		if (argp->ex_flags & MNT_EXPUBLIC) {
2057			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
2058				return (error);
2059			mp->mnt_flag |= MNT_EXPUBLIC;
2060		}
2061		if ((error = vfs_hang_addrlist(mp, nep, argp)))
2062			return (error);
2063		mp->mnt_flag |= MNT_EXPORTED;
2064	}
2065	return (0);
2066}
2067
2068
2069/*
2070 * Set the publicly exported filesystem (WebNFS). Currently, only
2071 * one public filesystem is possible in the spec (RFC 2054 and 2055)
2072 */
2073int
2074vfs_setpublicfs(mp, nep, argp)
2075	struct mount *mp;
2076	struct netexport *nep;
2077	struct export_args *argp;
2078{
2079	int error;
2080	struct vnode *rvp;
2081	char *cp;
2082
2083	/*
2084	 * mp == NULL -> invalidate the current info, the FS is
2085	 * no longer exported. May be called from either vfs_export
2086	 * or unmount, so check if it hasn't already been done.
2087	 */
2088	if (mp == NULL) {
2089		if (nfs_pub.np_valid) {
2090			nfs_pub.np_valid = 0;
2091			if (nfs_pub.np_index != NULL) {
2092				FREE(nfs_pub.np_index, M_TEMP);
2093				nfs_pub.np_index = NULL;
2094			}
2095		}
2096		return (0);
2097	}
2098
2099	/*
2100	 * Only one allowed at a time.
2101	 */
2102	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
2103		return (EBUSY);
2104
2105	/*
2106	 * Get real filehandle for root of exported FS.
2107	 */
2108	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
2109	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
2110
2111	if ((error = VFS_ROOT(mp, &rvp)))
2112		return (error);
2113
2114	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
2115		return (error);
2116
2117	vput(rvp);
2118
2119	/*
2120	 * If an indexfile was specified, pull it in.
2121	 */
2122	if (argp->ex_indexfile != NULL) {
2123		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2124		    M_WAITOK);
2125		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2126		    MAXNAMLEN, (size_t *)0);
2127		if (!error) {
2128			/*
2129			 * Check for illegal filenames.
2130			 */
2131			for (cp = nfs_pub.np_index; *cp; cp++) {
2132				if (*cp == '/') {
2133					error = EINVAL;
2134					break;
2135				}
2136			}
2137		}
2138		if (error) {
2139			FREE(nfs_pub.np_index, M_TEMP);
2140			return (error);
2141		}
2142	}
2143
2144	nfs_pub.np_mount = mp;
2145	nfs_pub.np_valid = 1;
2146	return (0);
2147}
2148
2149struct netcred *
2150vfs_export_lookup(mp, nep, nam)
2151	register struct mount *mp;
2152	struct netexport *nep;
2153	struct sockaddr *nam;
2154{
2155	register struct netcred *np;
2156	register struct radix_node_head *rnh;
2157	struct sockaddr *saddr;
2158
2159	np = NULL;
2160	if (mp->mnt_flag & MNT_EXPORTED) {
2161		/*
2162		 * Lookup in the export list first.
2163		 */
2164		if (nam != NULL) {
2165			saddr = nam;
2166			rnh = nep->ne_rtable[saddr->sa_family];
2167			if (rnh != NULL) {
2168				np = (struct netcred *)
2169					(*rnh->rnh_matchaddr)((caddr_t)saddr,
2170							      rnh);
2171				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2172					np = NULL;
2173			}
2174		}
2175		/*
2176		 * If no address match, use the default if it exists.
2177		 */
2178		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2179			np = &nep->ne_defexported;
2180	}
2181	return (np);
2182}
2183
2184/*
2185 * perform msync on all vnodes under a mount point
2186 * the mount point must be locked.
2187 */
2188void
2189vfs_msync(struct mount *mp, int flags) {
2190	struct vnode *vp, *nvp;
2191loop:
2192	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
2193
2194		if (vp->v_mount != mp)
2195			goto loop;
2196		nvp = vp->v_mntvnodes.le_next;
2197		if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))
2198			continue;
2199		if (vp->v_object &&
2200		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
2201			vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE);
2202		}
2203	}
2204}
2205
2206/*
2207 * Create the VM object needed for VMIO and mmap support.  This
2208 * is done for all VREG files in the system.  Some filesystems might
2209 * afford the additional metadata buffering capability of the
2210 * VMIO code by making the device node be VMIO mode also.
2211 */
2212int
2213vfs_object_create(vp, p, cred, waslocked)
2214	struct vnode *vp;
2215	struct proc *p;
2216	struct ucred *cred;
2217	int waslocked;
2218{
2219	struct vattr vat;
2220	vm_object_t object;
2221	int error = 0;
2222
2223retry:
2224	if ((object = vp->v_object) == NULL) {
2225		if (vp->v_type == VREG) {
2226			if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
2227				goto retn;
2228			(void) vnode_pager_alloc(vp,
2229				OFF_TO_IDX(round_page(vat.va_size)), 0, 0);
2230		} else {
2231			/*
2232			 * This simply allocates the biggest object possible
2233			 * for a VBLK vnode.  This should be fixed, but doesn't
2234			 * cause any problems (yet).
2235			 */
2236			(void) vnode_pager_alloc(vp, INT_MAX, 0, 0);
2237		}
2238		vp->v_object->flags |= OBJ_VFS_REF;
2239	} else {
2240		if (object->flags & OBJ_DEAD) {
2241			if (waslocked)
2242				VOP_UNLOCK(vp, 0, p);
2243			tsleep(object, PVM, "vodead", 0);
2244			if (waslocked)
2245				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2246			goto retry;
2247		}
2248		if ((object->flags & OBJ_VFS_REF) == 0) {
2249			object->flags |= OBJ_VFS_REF;
2250			vm_object_reference(object);
2251		}
2252	}
2253	if (vp->v_object)
2254		vp->v_flag |= VVMIO;
2255
2256retn:
2257	return error;
2258}
2259
2260void
2261vfree(vp)
2262	struct vnode *vp;
2263{
2264	simple_lock(&vnode_free_list_slock);
2265	if (vp->v_flag & VAGE) {
2266		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
2267	} else {
2268		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2269	}
2270	freevnodes++;
2271	simple_unlock(&vnode_free_list_slock);
2272	vp->v_flag &= ~VAGE;
2273	vp->v_flag |= VFREE;
2274}
2275
2276void
2277vbusy(vp)
2278	struct vnode *vp;
2279{
2280	simple_lock(&vnode_free_list_slock);
2281	TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
2282	freevnodes--;
2283	simple_unlock(&vnode_free_list_slock);
2284	vp->v_flag &= ~VFREE;
2285}
2286