vfs_subr.c revision 29506
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
39 * $Id: vfs_subr.c,v 1.103 1997/09/14 02:49:06 peter Exp $
40 */
41
42/*
43 * External virtual filesystem routines
44 */
45#include "opt_ddb.h"
46#include "opt_devfs.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/kernel.h>
51#include <sys/proc.h>
52#include <sys/mount.h>
53#include <sys/vnode.h>
54#include <sys/stat.h>
55#include <sys/buf.h>
56#include <sys/malloc.h>
57#include <sys/poll.h>
58#include <sys/domain.h>
59#include <sys/dirent.h>
60
61#include <machine/limits.h>
62
63#include <vm/vm.h>
64#include <vm/vm_object.h>
65#include <vm/vm_extern.h>
66#include <vm/vnode_pager.h>
67#include <sys/sysctl.h>
68
69#include <miscfs/specfs/specdev.h>
70
71#ifdef DDB
72extern void	printlockedvnodes __P((void));
73#endif
74static void	vclean __P((struct vnode *vp, int flags, struct proc *p));
75static void	vgonel __P((struct vnode *vp, struct proc *p));
76unsigned long	numvnodes;
77SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
78static void	vputrele __P((struct vnode *vp, int put));
79
80enum vtype iftovt_tab[16] = {
81	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
82	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
83};
84int vttoif_tab[9] = {
85	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
86	S_IFSOCK, S_IFIFO, S_IFMT,
87};
88
89/*
90 * Insq/Remq for the vnode usage lists.
91 */
92#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
93#define	bufremvn(bp) {							\
94	LIST_REMOVE(bp, b_vnbufs);					\
95	(bp)->b_vnbufs.le_next = NOLIST;				\
96}
97TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
98static u_long freevnodes = 0;
99SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
100
101struct mntlist mountlist;	/* mounted filesystem list */
102struct simplelock mountlist_slock;
103static struct simplelock mntid_slock;
104struct simplelock mntvnode_slock;
105struct simplelock vnode_free_list_slock;
106static struct simplelock spechash_slock;
107struct nfs_public nfs_pub;	/* publicly exported FS */
108
109int desiredvnodes;
110SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
111
112static void	vfs_free_addrlist __P((struct netexport *nep));
113static int	vfs_free_netcred __P((struct radix_node *rn, void *w));
114static int	vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
115				       struct export_args *argp));
116
117/*
118 * Initialize the vnode management data structures.
119 */
120void
121vntblinit()
122{
123
124	desiredvnodes = maxproc + vm_object_cache_max;
125	simple_lock_init(&mntvnode_slock);
126	simple_lock_init(&mntid_slock);
127	simple_lock_init(&spechash_slock);
128	TAILQ_INIT(&vnode_free_list);
129	simple_lock_init(&vnode_free_list_slock);
130	CIRCLEQ_INIT(&mountlist);
131}
132
133/*
134 * Mark a mount point as busy. Used to synchronize access and to delay
135 * unmounting. Interlock is not released on failure.
136 */
137int
138vfs_busy(mp, flags, interlkp, p)
139	struct mount *mp;
140	int flags;
141	struct simplelock *interlkp;
142	struct proc *p;
143{
144	int lkflags;
145
146	if (mp->mnt_flag & MNT_UNMOUNT) {
147		if (flags & LK_NOWAIT)
148			return (ENOENT);
149		mp->mnt_flag |= MNT_MWAIT;
150		if (interlkp) {
151			simple_unlock(interlkp);
152		}
153		/*
154		 * Since all busy locks are shared except the exclusive
155		 * lock granted when unmounting, the only place that a
156		 * wakeup needs to be done is at the release of the
157		 * exclusive lock at the end of dounmount.
158		 */
159		tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
160		if (interlkp) {
161			simple_lock(interlkp);
162		}
163		return (ENOENT);
164	}
165	lkflags = LK_SHARED;
166	if (interlkp)
167		lkflags |= LK_INTERLOCK;
168	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
169		panic("vfs_busy: unexpected lock failure");
170	return (0);
171}
172
173/*
174 * Free a busy filesystem.
175 */
176void
177vfs_unbusy(mp, p)
178	struct mount *mp;
179	struct proc *p;
180{
181
182	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
183}
184
185/*
186 * Lookup a filesystem type, and if found allocate and initialize
187 * a mount structure for it.
188 *
189 * Devname is usually updated by mount(8) after booting.
190 */
191int
192vfs_rootmountalloc(fstypename, devname, mpp)
193	char *fstypename;
194	char *devname;
195	struct mount **mpp;
196{
197	struct proc *p = curproc;	/* XXX */
198	struct vfsconf *vfsp;
199	struct mount *mp;
200
201	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
202		if (!strcmp(vfsp->vfc_name, fstypename))
203			break;
204	if (vfsp == NULL)
205		return (ENODEV);
206	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
207	bzero((char *)mp, (u_long)sizeof(struct mount));
208	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
209	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
210	LIST_INIT(&mp->mnt_vnodelist);
211	mp->mnt_vfc = vfsp;
212	mp->mnt_op = vfsp->vfc_vfsops;
213	mp->mnt_flag = MNT_RDONLY;
214	mp->mnt_vnodecovered = NULLVP;
215	vfsp->vfc_refcount++;
216	mp->mnt_stat.f_type = vfsp->vfc_typenum;
217	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
218	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
219	mp->mnt_stat.f_mntonname[0] = '/';
220	mp->mnt_stat.f_mntonname[1] = 0;
221	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
222	*mpp = mp;
223	return (0);
224}
225
226/*
227 * Find an appropriate filesystem to use for the root. If a filesystem
228 * has not been preselected, walk through the list of known filesystems
229 * trying those that have mountroot routines, and try them until one
230 * works or we have tried them all.
231 */
232#ifdef notdef	/* XXX JH */
233int
234lite2_vfs_mountroot()
235{
236	struct vfsconf *vfsp;
237	extern int (*lite2_mountroot) __P((void));
238	int error;
239
240	if (lite2_mountroot != NULL)
241		return ((*lite2_mountroot)());
242	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
243		if (vfsp->vfc_mountroot == NULL)
244			continue;
245		if ((error = (*vfsp->vfc_mountroot)()) == 0)
246			return (0);
247		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
248	}
249	return (ENODEV);
250}
251#endif
252
253/*
254 * Lookup a mount point by filesystem identifier.
255 */
256struct mount *
257vfs_getvfs(fsid)
258	fsid_t *fsid;
259{
260	register struct mount *mp;
261
262	simple_lock(&mountlist_slock);
263	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
264	    mp = mp->mnt_list.cqe_next) {
265		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
266		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
267			simple_unlock(&mountlist_slock);
268			return (mp);
269	    }
270	}
271	simple_unlock(&mountlist_slock);
272	return ((struct mount *) 0);
273}
274
275/*
276 * Get a new unique fsid
277 */
278void
279vfs_getnewfsid(mp)
280	struct mount *mp;
281{
282	static u_short xxxfs_mntid;
283
284	fsid_t tfsid;
285	int mtype;
286
287	simple_lock(&mntid_slock);
288	mtype = mp->mnt_vfc->vfc_typenum;
289	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
290	mp->mnt_stat.f_fsid.val[1] = mtype;
291	if (xxxfs_mntid == 0)
292		++xxxfs_mntid;
293	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
294	tfsid.val[1] = mtype;
295	if (mountlist.cqh_first != (void *)&mountlist) {
296		while (vfs_getvfs(&tfsid)) {
297			tfsid.val[0]++;
298			xxxfs_mntid++;
299		}
300	}
301	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
302	simple_unlock(&mntid_slock);
303}
304
305/*
306 * Set vnode attributes to VNOVAL
307 */
308void
309vattr_null(vap)
310	register struct vattr *vap;
311{
312
313	vap->va_type = VNON;
314	vap->va_size = VNOVAL;
315	vap->va_bytes = VNOVAL;
316	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
317	    vap->va_fsid = vap->va_fileid =
318	    vap->va_blocksize = vap->va_rdev =
319	    vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
320	    vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
321	    vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
322	    vap->va_flags = vap->va_gen = VNOVAL;
323	vap->va_vaflags = 0;
324}
325
326/*
327 * Routines having to do with the management of the vnode table.
328 */
329extern vop_t **dead_vnodeop_p;
330
331/*
332 * Return the next vnode from the free list.
333 */
334int
335getnewvnode(tag, mp, vops, vpp)
336	enum vtagtype tag;
337	struct mount *mp;
338	vop_t **vops;
339	struct vnode **vpp;
340{
341	struct proc *p = curproc;	/* XXX */
342	struct vnode *vp;
343
344	/*
345	 * We take the least recently used vnode from the freelist
346	 * if we can get it and it has no cached pages, and no
347	 * namecache entries are relative to it.
348	 * Otherwise we allocate a new vnode
349	 */
350
351	simple_lock(&vnode_free_list_slock);
352
353	if (freevnodes >= desiredvnodes) {
354		TAILQ_FOREACH(vp, &vnode_free_list, v_freelist) {
355			if (!simple_lock_try(&vp->v_interlock))
356				continue;
357			if (vp->v_usecount)
358				panic("free vnode isn't");
359
360			if (vp->v_object && vp->v_object->resident_page_count) {
361				/* Don't recycle if it's caching some pages */
362				simple_unlock(&vp->v_interlock);
363				continue;
364			} else if (LIST_FIRST(&vp->v_cache_src)) {
365				/* Don't recycle if active in the namecache */
366				simple_unlock(&vp->v_interlock);
367				continue;
368			} else {
369				break;
370			}
371		}
372	} else {
373		vp = NULL;
374	}
375
376	if (vp) {
377		vp->v_flag |= VDOOMED;
378		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
379		freevnodes--;
380		simple_unlock(&vnode_free_list_slock);
381		cache_purge(vp);
382		vp->v_lease = NULL;
383		if (vp->v_type != VBAD)
384			vgonel(vp, p);
385		else {
386			simple_unlock(&vp->v_interlock);
387		}
388
389#ifdef DIAGNOSTIC
390		{
391			int s;
392
393			if (vp->v_data)
394				panic("cleaned vnode isn't");
395			s = splbio();
396			if (vp->v_numoutput)
397				panic("Clean vnode has pending I/O's");
398			splx(s);
399		}
400#endif
401		vp->v_flag = 0;
402		vp->v_lastr = 0;
403		vp->v_lastw = 0;
404		vp->v_lasta = 0;
405		vp->v_cstart = 0;
406		vp->v_clen = 0;
407		vp->v_socket = 0;
408		vp->v_writecount = 0;	/* XXX */
409	} else {
410		simple_unlock(&vnode_free_list_slock);
411		vp = (struct vnode *) malloc((u_long) sizeof *vp,
412		    M_VNODE, M_WAITOK);
413		bzero((char *) vp, sizeof *vp);
414		vp->v_dd = vp;
415		cache_purge(vp);
416		LIST_INIT(&vp->v_cache_src);
417		TAILQ_INIT(&vp->v_cache_dst);
418		numvnodes++;
419	}
420
421	vp->v_type = VNON;
422	vp->v_tag = tag;
423	vp->v_op = vops;
424	insmntque(vp, mp);
425	*vpp = vp;
426	vp->v_usecount = 1;
427	vp->v_data = 0;
428	return (0);
429}
430
431/*
432 * Move a vnode from one mount queue to another.
433 */
434void
435insmntque(vp, mp)
436	register struct vnode *vp;
437	register struct mount *mp;
438{
439
440	simple_lock(&mntvnode_slock);
441	/*
442	 * Delete from old mount point vnode list, if on one.
443	 */
444	if (vp->v_mount != NULL)
445		LIST_REMOVE(vp, v_mntvnodes);
446	/*
447	 * Insert into list of vnodes for the new mount point, if available.
448	 */
449	if ((vp->v_mount = mp) == NULL) {
450		simple_unlock(&mntvnode_slock);
451		return;
452	}
453	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
454	simple_unlock(&mntvnode_slock);
455}
456
457/*
458 * Update outstanding I/O count and do wakeup if requested.
459 */
460void
461vwakeup(bp)
462	register struct buf *bp;
463{
464	register struct vnode *vp;
465
466	bp->b_flags &= ~B_WRITEINPROG;
467	if ((vp = bp->b_vp)) {
468		vp->v_numoutput--;
469		if (vp->v_numoutput < 0)
470			panic("vwakeup: neg numoutput");
471		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
472			vp->v_flag &= ~VBWAIT;
473			wakeup((caddr_t) &vp->v_numoutput);
474		}
475	}
476}
477
478/*
479 * Flush out and invalidate all buffers associated with a vnode.
480 * Called with the underlying object locked.
481 */
482int
483vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
484	register struct vnode *vp;
485	int flags;
486	struct ucred *cred;
487	struct proc *p;
488	int slpflag, slptimeo;
489{
490	register struct buf *bp;
491	struct buf *nbp, *blist;
492	int s, error;
493	vm_object_t object;
494
495	if (flags & V_SAVE) {
496		if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
497			return (error);
498		if (vp->v_dirtyblkhd.lh_first != NULL)
499			panic("vinvalbuf: dirty bufs");
500	}
501
502	s = splbio();
503	for (;;) {
504		if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
505			while (blist && blist->b_lblkno < 0)
506				blist = blist->b_vnbufs.le_next;
507		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
508		    (flags & V_SAVEMETA))
509			while (blist && blist->b_lblkno < 0)
510				blist = blist->b_vnbufs.le_next;
511		if (!blist)
512			break;
513
514		for (bp = blist; bp; bp = nbp) {
515			nbp = bp->b_vnbufs.le_next;
516			if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
517				continue;
518			if (bp->b_flags & B_BUSY) {
519				bp->b_flags |= B_WANTED;
520				error = tsleep((caddr_t) bp,
521				    slpflag | (PRIBIO + 1), "vinvalbuf",
522				    slptimeo);
523				if (error) {
524					splx(s);
525					return (error);
526				}
527				break;
528			}
529			bremfree(bp);
530			bp->b_flags |= B_BUSY;
531			/*
532			 * XXX Since there are no node locks for NFS, I
533			 * believe there is a slight chance that a delayed
534			 * write will occur while sleeping just above, so
535			 * check for it.
536			 */
537			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
538				(void) VOP_BWRITE(bp);
539				break;
540			}
541			bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF);
542			brelse(bp);
543		}
544	}
545
546	while (vp->v_numoutput > 0) {
547		vp->v_flag |= VBWAIT;
548		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
549	}
550
551	splx(s);
552
553	/*
554	 * Destroy the copy in the VM cache, too.
555	 */
556	object = vp->v_object;
557	if (object != NULL) {
558		vm_object_page_remove(object, 0, object->size,
559		    (flags & V_SAVE) ? TRUE : FALSE);
560	}
561	if (!(flags & V_SAVEMETA) &&
562	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
563		panic("vinvalbuf: flush failed");
564	return (0);
565}
566
567/*
568 * Associate a buffer with a vnode.
569 */
570void
571bgetvp(vp, bp)
572	register struct vnode *vp;
573	register struct buf *bp;
574{
575	int s;
576
577	if (bp->b_vp)
578		panic("bgetvp: not free");
579	vhold(vp);
580	bp->b_vp = vp;
581	if (vp->v_type == VBLK || vp->v_type == VCHR)
582		bp->b_dev = vp->v_rdev;
583	else
584		bp->b_dev = NODEV;
585	/*
586	 * Insert onto list for new vnode.
587	 */
588	s = splbio();
589	bufinsvn(bp, &vp->v_cleanblkhd);
590	splx(s);
591}
592
593/*
594 * Disassociate a buffer from a vnode.
595 */
596void
597brelvp(bp)
598	register struct buf *bp;
599{
600	struct vnode *vp;
601	int s;
602
603	if (bp->b_vp == (struct vnode *) 0)
604		panic("brelvp: NULL");
605	/*
606	 * Delete from old vnode list, if on one.
607	 */
608	s = splbio();
609	if (bp->b_vnbufs.le_next != NOLIST)
610		bufremvn(bp);
611	splx(s);
612
613	vp = bp->b_vp;
614	bp->b_vp = (struct vnode *) 0;
615	vdrop(vp);
616}
617
618/*
619 * Associate a p-buffer with a vnode.
620 */
621void
622pbgetvp(vp, bp)
623	register struct vnode *vp;
624	register struct buf *bp;
625{
626#if defined(DIAGNOSTIC)
627	if (bp->b_vp)
628		panic("pbgetvp: not free");
629#endif
630	bp->b_vp = vp;
631	if (vp->v_type == VBLK || vp->v_type == VCHR)
632		bp->b_dev = vp->v_rdev;
633	else
634		bp->b_dev = NODEV;
635}
636
637/*
638 * Disassociate a p-buffer from a vnode.
639 */
640void
641pbrelvp(bp)
642	register struct buf *bp;
643{
644	struct vnode *vp;
645
646#if defined(DIAGNOSTIC)
647	if (bp->b_vp == (struct vnode *) 0)
648		panic("pbrelvp: NULL");
649#endif
650
651	bp->b_vp = (struct vnode *) 0;
652}
653
654/*
655 * Reassign a buffer from one vnode to another.
656 * Used to assign file specific control information
657 * (indirect blocks) to the vnode to which they belong.
658 */
659void
660reassignbuf(bp, newvp)
661	register struct buf *bp;
662	register struct vnode *newvp;
663{
664	int s;
665
666	if (newvp == NULL) {
667		printf("reassignbuf: NULL");
668		return;
669	}
670
671	s = splbio();
672	/*
673	 * Delete from old vnode list, if on one.
674	 */
675	if (bp->b_vnbufs.le_next != NOLIST) {
676		bufremvn(bp);
677		vdrop(bp->b_vp);
678	}
679	/*
680	 * If dirty, put on list of dirty buffers; otherwise insert onto list
681	 * of clean buffers.
682	 */
683	if (bp->b_flags & B_DELWRI) {
684		struct buf *tbp;
685
686		tbp = newvp->v_dirtyblkhd.lh_first;
687		if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
688			bufinsvn(bp, &newvp->v_dirtyblkhd);
689		} else {
690			while (tbp->b_vnbufs.le_next &&
691				(tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
692				tbp = tbp->b_vnbufs.le_next;
693			}
694			LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
695		}
696	} else {
697		bufinsvn(bp, &newvp->v_cleanblkhd);
698	}
699	bp->b_vp = newvp;
700	vhold(bp->b_vp);
701	splx(s);
702}
703
704#ifndef DEVFS_ROOT
705/*
706 * Create a vnode for a block device.
707 * Used for mounting the root file system.
708 */
709int
710bdevvp(dev, vpp)
711	dev_t dev;
712	struct vnode **vpp;
713{
714	register struct vnode *vp;
715	struct vnode *nvp;
716	int error;
717
718	if (dev == NODEV)
719		return (0);
720	error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
721	if (error) {
722		*vpp = 0;
723		return (error);
724	}
725	vp = nvp;
726	vp->v_type = VBLK;
727	if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
728		vput(vp);
729		vp = nvp;
730	}
731	*vpp = vp;
732	return (0);
733}
734#endif /* !DEVFS_ROOT */
735
736/*
737 * Check to see if the new vnode represents a special device
738 * for which we already have a vnode (either because of
739 * bdevvp() or because of a different vnode representing
740 * the same block device). If such an alias exists, deallocate
741 * the existing contents and return the aliased vnode. The
742 * caller is responsible for filling it with its new contents.
743 */
744struct vnode *
745checkalias(nvp, nvp_rdev, mp)
746	register struct vnode *nvp;
747	dev_t nvp_rdev;
748	struct mount *mp;
749{
750	struct proc *p = curproc;	/* XXX */
751	struct vnode *vp;
752	struct vnode **vpp;
753
754	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
755		return (NULLVP);
756
757	vpp = &speclisth[SPECHASH(nvp_rdev)];
758loop:
759	simple_lock(&spechash_slock);
760	for (vp = *vpp; vp; vp = vp->v_specnext) {
761		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
762			continue;
763		/*
764		 * Alias, but not in use, so flush it out.
765		 */
766		simple_lock(&vp->v_interlock);
767		if (vp->v_usecount == 0) {
768			simple_unlock(&spechash_slock);
769			vgonel(vp, p);
770			goto loop;
771		}
772		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
773			simple_unlock(&spechash_slock);
774			goto loop;
775		}
776		break;
777	}
778	if (vp == NULL || vp->v_tag != VT_NON) {
779		MALLOC(nvp->v_specinfo, struct specinfo *,
780		    sizeof(struct specinfo), M_VNODE, M_WAITOK);
781		nvp->v_rdev = nvp_rdev;
782		nvp->v_hashchain = vpp;
783		nvp->v_specnext = *vpp;
784		nvp->v_specflags = 0;
785		simple_unlock(&spechash_slock);
786		*vpp = nvp;
787		if (vp != NULLVP) {
788			nvp->v_flag |= VALIASED;
789			vp->v_flag |= VALIASED;
790			vput(vp);
791		}
792		return (NULLVP);
793	}
794	simple_unlock(&spechash_slock);
795	VOP_UNLOCK(vp, 0, p);
796	simple_lock(&vp->v_interlock);
797	vclean(vp, 0, p);
798	vp->v_op = nvp->v_op;
799	vp->v_tag = nvp->v_tag;
800	nvp->v_type = VNON;
801	insmntque(vp, mp);
802	return (vp);
803}
804
805/*
806 * Grab a particular vnode from the free list, increment its
807 * reference count and lock it. The vnode lock bit is set the
808 * vnode is being eliminated in vgone. The process is awakened
809 * when the transition is completed, and an error returned to
810 * indicate that the vnode is no longer usable (possibly having
811 * been changed to a new file system type).
812 */
813int
814vget(vp, flags, p)
815	register struct vnode *vp;
816	int flags;
817	struct proc *p;
818{
819	int error;
820
821	/*
822	 * If the vnode is in the process of being cleaned out for
823	 * another use, we wait for the cleaning to finish and then
824	 * return failure. Cleaning is determined by checking that
825	 * the VXLOCK flag is set.
826	 */
827	if ((flags & LK_INTERLOCK) == 0) {
828		simple_lock(&vp->v_interlock);
829	}
830	if (vp->v_flag & VXLOCK) {
831		vp->v_flag |= VXWANT;
832		simple_unlock(&vp->v_interlock);
833		tsleep((caddr_t)vp, PINOD, "vget", 0);
834		return (ENOENT);
835	}
836	vp->v_usecount++;
837	if (VSHOULDBUSY(vp))
838		vbusy(vp);
839	/*
840	 * Create the VM object, if needed
841	 */
842	if ((vp->v_type == VREG) &&
843		((vp->v_object == NULL) ||
844			(vp->v_object->flags & OBJ_VFS_REF) == 0 ||
845			(vp->v_object->flags & OBJ_DEAD))) {
846		/*
847		 * XXX vfs_object_create probably needs the interlock.
848		 */
849		simple_unlock(&vp->v_interlock);
850		vfs_object_create(vp, curproc, curproc->p_ucred, 0);
851		simple_lock(&vp->v_interlock);
852	}
853	if (flags & LK_TYPE_MASK) {
854		if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
855			vrele(vp);
856		return (error);
857	}
858	simple_unlock(&vp->v_interlock);
859	return (0);
860}
861
862/*
863 * Stubs to use when there is no locking to be done on the underlying object.
864 * A minimal shared lock is necessary to ensure that the underlying object
865 * is not revoked while an operation is in progress. So, an active shared
866 * count is maintained in an auxillary vnode lock structure.
867 */
868int
869vop_sharedlock(ap)
870	struct vop_lock_args /* {
871		struct vnode *a_vp;
872		int a_flags;
873		struct proc *a_p;
874	} */ *ap;
875{
876	/*
877	 * This code cannot be used until all the non-locking filesystems
878	 * (notably NFS) are converted to properly lock and release nodes.
879	 * Also, certain vnode operations change the locking state within
880	 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
881	 * and symlink). Ideally these operations should not change the
882	 * lock state, but should be changed to let the caller of the
883	 * function unlock them. Otherwise all intermediate vnode layers
884	 * (such as union, umapfs, etc) must catch these functions to do
885	 * the necessary locking at their layer. Note that the inactive
886	 * and lookup operations also change their lock state, but this
887	 * cannot be avoided, so these two operations will always need
888	 * to be handled in intermediate layers.
889	 */
890	struct vnode *vp = ap->a_vp;
891	int vnflags, flags = ap->a_flags;
892
893	if (vp->v_vnlock == NULL) {
894		if ((flags & LK_TYPE_MASK) == LK_DRAIN)
895			return (0);
896		MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
897		    M_VNODE, M_WAITOK);
898		lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
899	}
900	switch (flags & LK_TYPE_MASK) {
901	case LK_DRAIN:
902		vnflags = LK_DRAIN;
903		break;
904	case LK_EXCLUSIVE:
905#ifdef DEBUG_VFS_LOCKS
906		/*
907		 * Normally, we use shared locks here, but that confuses
908		 * the locking assertions.
909		 */
910		vnflags = LK_EXCLUSIVE;
911		break;
912#endif
913	case LK_SHARED:
914		vnflags = LK_SHARED;
915		break;
916	case LK_UPGRADE:
917	case LK_EXCLUPGRADE:
918	case LK_DOWNGRADE:
919		return (0);
920	case LK_RELEASE:
921	default:
922		panic("vop_sharedlock: bad operation %d", flags & LK_TYPE_MASK);
923	}
924	if (flags & LK_INTERLOCK)
925		vnflags |= LK_INTERLOCK;
926	return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
927}
928
929/*
930 * Stubs to use when there is no locking to be done on the underlying object.
931 * A minimal shared lock is necessary to ensure that the underlying object
932 * is not revoked while an operation is in progress. So, an active shared
933 * count is maintained in an auxillary vnode lock structure.
934 */
935int
936vop_nolock(ap)
937	struct vop_lock_args /* {
938		struct vnode *a_vp;
939		int a_flags;
940		struct proc *a_p;
941	} */ *ap;
942{
943#ifdef notyet
944	/*
945	 * This code cannot be used until all the non-locking filesystems
946	 * (notably NFS) are converted to properly lock and release nodes.
947	 * Also, certain vnode operations change the locking state within
948	 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
949	 * and symlink). Ideally these operations should not change the
950	 * lock state, but should be changed to let the caller of the
951	 * function unlock them. Otherwise all intermediate vnode layers
952	 * (such as union, umapfs, etc) must catch these functions to do
953	 * the necessary locking at their layer. Note that the inactive
954	 * and lookup operations also change their lock state, but this
955	 * cannot be avoided, so these two operations will always need
956	 * to be handled in intermediate layers.
957	 */
958	struct vnode *vp = ap->a_vp;
959	int vnflags, flags = ap->a_flags;
960
961	if (vp->v_vnlock == NULL) {
962		if ((flags & LK_TYPE_MASK) == LK_DRAIN)
963			return (0);
964		MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
965		    M_VNODE, M_WAITOK);
966		lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
967	}
968	switch (flags & LK_TYPE_MASK) {
969	case LK_DRAIN:
970		vnflags = LK_DRAIN;
971		break;
972	case LK_EXCLUSIVE:
973	case LK_SHARED:
974		vnflags = LK_SHARED;
975		break;
976	case LK_UPGRADE:
977	case LK_EXCLUPGRADE:
978	case LK_DOWNGRADE:
979		return (0);
980	case LK_RELEASE:
981	default:
982		panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
983	}
984	if (flags & LK_INTERLOCK)
985		vnflags |= LK_INTERLOCK;
986	return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
987#else /* for now */
988	/*
989	 * Since we are not using the lock manager, we must clear
990	 * the interlock here.
991	 */
992	if (ap->a_flags & LK_INTERLOCK) {
993		simple_unlock(&ap->a_vp->v_interlock);
994	}
995	return (0);
996#endif
997}
998
999/*
1000 * Do the inverse of vop_nolock, handling the interlock in a compatible way.
1001 */
1002int
1003vop_nounlock(ap)
1004	struct vop_unlock_args /* {
1005		struct vnode *a_vp;
1006		int a_flags;
1007		struct proc *a_p;
1008	} */ *ap;
1009{
1010	struct vnode *vp = ap->a_vp;
1011
1012	if (vp->v_vnlock == NULL) {
1013		if (ap->a_flags & LK_INTERLOCK)
1014			simple_unlock(&ap->a_vp->v_interlock);
1015		return (0);
1016	}
1017	return (lockmgr(vp->v_vnlock, LK_RELEASE | ap->a_flags,
1018		&ap->a_vp->v_interlock, ap->a_p));
1019}
1020
1021/*
1022 * Return whether or not the node is in use.
1023 */
1024int
1025vop_noislocked(ap)
1026	struct vop_islocked_args /* {
1027		struct vnode *a_vp;
1028	} */ *ap;
1029{
1030	struct vnode *vp = ap->a_vp;
1031
1032	if (vp->v_vnlock == NULL)
1033		return (0);
1034	return (lockstatus(vp->v_vnlock));
1035}
1036
1037/* #ifdef DIAGNOSTIC */
1038/*
1039 * Vnode reference, just increment the count
1040 */
1041void
1042vref(vp)
1043	struct vnode *vp;
1044{
1045	simple_lock(&vp->v_interlock);
1046	if (vp->v_usecount <= 0)
1047		panic("vref used where vget required");
1048
1049	vp->v_usecount++;
1050
1051	if ((vp->v_type == VREG) &&
1052		((vp->v_object == NULL) ||
1053			((vp->v_object->flags & OBJ_VFS_REF) == 0) ||
1054			(vp->v_object->flags & OBJ_DEAD))) {
1055		/*
1056		 * We need to lock to VP during the time that
1057		 * the object is created.  This is necessary to
1058		 * keep the system from re-entrantly doing it
1059		 * multiple times.
1060		 * XXX vfs_object_create probably needs the interlock?
1061		 */
1062		simple_unlock(&vp->v_interlock);
1063		vfs_object_create(vp, curproc, curproc->p_ucred, 0);
1064		return;
1065	}
1066	simple_unlock(&vp->v_interlock);
1067}
1068
1069/*
1070 * Vnode put/release.
1071 * If count drops to zero, call inactive routine and return to freelist.
1072 */
1073static void
1074vputrele(vp, put)
1075	struct vnode *vp;
1076	int put;
1077{
1078	struct proc *p = curproc;	/* XXX */
1079
1080#ifdef DIAGNOSTIC
1081	if (vp == NULL)
1082		panic("vputrele: null vp");
1083#endif
1084	simple_lock(&vp->v_interlock);
1085
1086	if ((vp->v_usecount == 2) &&
1087		vp->v_object &&
1088		(vp->v_object->flags & OBJ_VFS_REF)) {
1089		vp->v_usecount--;
1090		vp->v_object->flags &= ~OBJ_VFS_REF;
1091		if (put) {
1092			VOP_UNLOCK(vp, LK_INTERLOCK, p);
1093		} else {
1094			simple_unlock(&vp->v_interlock);
1095		}
1096		vm_object_deallocate(vp->v_object);
1097		return;
1098	}
1099
1100	if (vp->v_usecount > 1) {
1101		vp->v_usecount--;
1102		if (put) {
1103			VOP_UNLOCK(vp, LK_INTERLOCK, p);
1104		} else {
1105			simple_unlock(&vp->v_interlock);
1106		}
1107		return;
1108	}
1109
1110	if (vp->v_usecount < 1) {
1111#ifdef DIAGNOSTIC
1112		vprint("vputrele: negative ref count", vp);
1113#endif
1114		panic("vputrele: negative ref cnt");
1115	}
1116
1117	vp->v_usecount--;
1118	if (VSHOULDFREE(vp))
1119		vfree(vp);
1120	/*
1121	 * If we are doing a vput, the node is already locked, and we must
1122	 * call VOP_INACTIVE with the node locked.  So, in the case of
1123	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
1124	 */
1125	if (put) {
1126		simple_unlock(&vp->v_interlock);
1127		VOP_INACTIVE(vp, p);
1128	} else if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1129		VOP_INACTIVE(vp, p);
1130	}
1131}
1132
1133/*
1134 * vput(), just unlock and vrele()
1135 */
1136void
1137vput(vp)
1138	struct vnode *vp;
1139{
1140	vputrele(vp, 1);
1141}
1142
1143void
1144vrele(vp)
1145	struct vnode *vp;
1146{
1147	vputrele(vp, 0);
1148}
1149
1150/*
1151 * Somebody doesn't want the vnode recycled.
1152 */
1153void
1154vhold(vp)
1155	register struct vnode *vp;
1156{
1157
1158	simple_lock(&vp->v_interlock);
1159	vp->v_holdcnt++;
1160	if (VSHOULDBUSY(vp))
1161		vbusy(vp);
1162	simple_unlock(&vp->v_interlock);
1163}
1164
1165/*
1166 * One less who cares about this vnode.
1167 */
1168void
1169vdrop(vp)
1170	register struct vnode *vp;
1171{
1172
1173	simple_lock(&vp->v_interlock);
1174	if (vp->v_holdcnt <= 0)
1175		panic("holdrele: holdcnt");
1176	vp->v_holdcnt--;
1177	if (VSHOULDFREE(vp))
1178		vfree(vp);
1179	simple_unlock(&vp->v_interlock);
1180}
1181
1182/*
1183 * Remove any vnodes in the vnode table belonging to mount point mp.
1184 *
1185 * If MNT_NOFORCE is specified, there should not be any active ones,
1186 * return error if any are found (nb: this is a user error, not a
1187 * system error). If MNT_FORCE is specified, detach any active vnodes
1188 * that are found.
1189 */
1190#ifdef DIAGNOSTIC
1191static int busyprt = 0;		/* print out busy vnodes */
1192SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
1193#endif
1194
1195int
1196vflush(mp, skipvp, flags)
1197	struct mount *mp;
1198	struct vnode *skipvp;
1199	int flags;
1200{
1201	struct proc *p = curproc;	/* XXX */
1202	struct vnode *vp, *nvp;
1203	int busy = 0;
1204
1205	simple_lock(&mntvnode_slock);
1206loop:
1207	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1208		/*
1209		 * Make sure this vnode wasn't reclaimed in getnewvnode().
1210		 * Start over if it has (it won't be on the list anymore).
1211		 */
1212		if (vp->v_mount != mp)
1213			goto loop;
1214		nvp = vp->v_mntvnodes.le_next;
1215		/*
1216		 * Skip over a selected vnode.
1217		 */
1218		if (vp == skipvp)
1219			continue;
1220
1221		simple_lock(&vp->v_interlock);
1222		/*
1223		 * Skip over a vnodes marked VSYSTEM.
1224		 */
1225		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1226			simple_unlock(&vp->v_interlock);
1227			continue;
1228		}
1229		/*
1230		 * If WRITECLOSE is set, only flush out regular file vnodes
1231		 * open for writing.
1232		 */
1233		if ((flags & WRITECLOSE) &&
1234		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1235			simple_unlock(&vp->v_interlock);
1236			continue;
1237		}
1238
1239		/*
1240		 * With v_usecount == 0, all we need to do is clear out the
1241		 * vnode data structures and we are done.
1242		 */
1243		if (vp->v_usecount == 0) {
1244			simple_unlock(&mntvnode_slock);
1245			vgonel(vp, p);
1246			simple_lock(&mntvnode_slock);
1247			continue;
1248		}
1249
1250		/*
1251		 * If FORCECLOSE is set, forcibly close the vnode. For block
1252		 * or character devices, revert to an anonymous device. For
1253		 * all other files, just kill them.
1254		 */
1255		if (flags & FORCECLOSE) {
1256			simple_unlock(&mntvnode_slock);
1257			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1258				vgonel(vp, p);
1259			} else {
1260				vclean(vp, 0, p);
1261				vp->v_op = spec_vnodeop_p;
1262				insmntque(vp, (struct mount *) 0);
1263			}
1264			simple_lock(&mntvnode_slock);
1265			continue;
1266		}
1267#ifdef DIAGNOSTIC
1268		if (busyprt)
1269			vprint("vflush: busy vnode", vp);
1270#endif
1271		simple_unlock(&vp->v_interlock);
1272		busy++;
1273	}
1274	simple_unlock(&mntvnode_slock);
1275	if (busy)
1276		return (EBUSY);
1277	return (0);
1278}
1279
1280/*
1281 * Disassociate the underlying file system from a vnode.
1282 */
1283static void
1284vclean(vp, flags, p)
1285	struct vnode *vp;
1286	int flags;
1287	struct proc *p;
1288{
1289	int active, irefed;
1290	vm_object_t object;
1291
1292	/*
1293	 * Check to see if the vnode is in use. If so we have to reference it
1294	 * before we clean it out so that its count cannot fall to zero and
1295	 * generate a race against ourselves to recycle it.
1296	 */
1297	if ((active = vp->v_usecount))
1298		vp->v_usecount++;
1299	/*
1300	 * Prevent the vnode from being recycled or brought into use while we
1301	 * clean it out.
1302	 */
1303	if (vp->v_flag & VXLOCK)
1304		panic("vclean: deadlock");
1305	vp->v_flag |= VXLOCK;
1306	/*
1307	 * Even if the count is zero, the VOP_INACTIVE routine may still
1308	 * have the object locked while it cleans it out. The VOP_LOCK
1309	 * ensures that the VOP_INACTIVE routine is done with its work.
1310	 * For active vnodes, it ensures that no other activity can
1311	 * occur while the underlying object is being cleaned out.
1312	 */
1313	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1314
1315	object = vp->v_object;
1316	irefed = 0;
1317	if (object && ((object->flags & OBJ_DEAD) == 0)) {
1318		if (object->ref_count == 0) {
1319			vm_object_reference(object);
1320			irefed = 1;
1321		}
1322		++object->ref_count;
1323		pager_cache(object, FALSE);
1324	}
1325
1326	/*
1327	 * Clean out any buffers associated with the vnode.
1328	 */
1329	if (flags & DOCLOSE)
1330		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1331
1332	if (irefed) {
1333		vm_object_deallocate(object);
1334	}
1335
1336	/*
1337	 * If purging an active vnode, it must be closed and
1338	 * deactivated before being reclaimed. Note that the
1339	 * VOP_INACTIVE will unlock the vnode.
1340	 */
1341	if (active) {
1342		if (flags & DOCLOSE)
1343			VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1344		VOP_INACTIVE(vp, p);
1345	} else {
1346		/*
1347		 * Any other processes trying to obtain this lock must first
1348		 * wait for VXLOCK to clear, then call the new lock operation.
1349		 */
1350		VOP_UNLOCK(vp, 0, p);
1351	}
1352	/*
1353	 * Reclaim the vnode.
1354	 */
1355	if (VOP_RECLAIM(vp, p))
1356		panic("vclean: cannot reclaim");
1357	if (active)
1358		vrele(vp);
1359	cache_purge(vp);
1360	if (vp->v_vnlock) {
1361#ifdef DIAGNOSTIC
1362		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1363			vprint("vclean: lock not drained", vp);
1364#endif
1365		FREE(vp->v_vnlock, M_VNODE);
1366		vp->v_vnlock = NULL;
1367	}
1368
1369	/*
1370	 * Done with purge, notify sleepers of the grim news.
1371	 */
1372	vp->v_op = dead_vnodeop_p;
1373	vp->v_tag = VT_NON;
1374	vp->v_flag &= ~VXLOCK;
1375	if (vp->v_flag & VXWANT) {
1376		vp->v_flag &= ~VXWANT;
1377		wakeup((caddr_t) vp);
1378	}
1379}
1380
1381/*
1382 * Eliminate all activity associated with the requested vnode
1383 * and with all vnodes aliased to the requested vnode.
1384 */
1385int
1386vop_revoke(ap)
1387	struct vop_revoke_args /* {
1388		struct vnode *a_vp;
1389		int a_flags;
1390	} */ *ap;
1391{
1392	struct vnode *vp, *vq;
1393	struct proc *p = curproc;	/* XXX */
1394
1395#ifdef DIAGNOSTIC
1396	if ((ap->a_flags & REVOKEALL) == 0)
1397		panic("vop_revoke");
1398#endif
1399
1400	vp = ap->a_vp;
1401	simple_lock(&vp->v_interlock);
1402
1403	if (vp->v_flag & VALIASED) {
1404		/*
1405		 * If a vgone (or vclean) is already in progress,
1406		 * wait until it is done and return.
1407		 */
1408		if (vp->v_flag & VXLOCK) {
1409			vp->v_flag |= VXWANT;
1410			simple_unlock(&vp->v_interlock);
1411			tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1412			return (0);
1413		}
1414		/*
1415		 * Ensure that vp will not be vgone'd while we
1416		 * are eliminating its aliases.
1417		 */
1418		vp->v_flag |= VXLOCK;
1419		simple_unlock(&vp->v_interlock);
1420		while (vp->v_flag & VALIASED) {
1421			simple_lock(&spechash_slock);
1422			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1423				if (vq->v_rdev != vp->v_rdev ||
1424				    vq->v_type != vp->v_type || vp == vq)
1425					continue;
1426				simple_unlock(&spechash_slock);
1427				vgone(vq);
1428				break;
1429			}
1430			if (vq == NULLVP) {
1431				simple_unlock(&spechash_slock);
1432			}
1433		}
1434		/*
1435		 * Remove the lock so that vgone below will
1436		 * really eliminate the vnode after which time
1437		 * vgone will awaken any sleepers.
1438		 */
1439		simple_lock(&vp->v_interlock);
1440		vp->v_flag &= ~VXLOCK;
1441	}
1442	vgonel(vp, p);
1443	return (0);
1444}
1445
1446/*
1447 * Recycle an unused vnode to the front of the free list.
1448 * Release the passed interlock if the vnode will be recycled.
1449 */
1450int
1451vrecycle(vp, inter_lkp, p)
1452	struct vnode *vp;
1453	struct simplelock *inter_lkp;
1454	struct proc *p;
1455{
1456
1457	simple_lock(&vp->v_interlock);
1458	if (vp->v_usecount == 0) {
1459		if (inter_lkp) {
1460			simple_unlock(inter_lkp);
1461		}
1462		vgonel(vp, p);
1463		return (1);
1464	}
1465	simple_unlock(&vp->v_interlock);
1466	return (0);
1467}
1468
1469/*
1470 * Eliminate all activity associated with a vnode
1471 * in preparation for reuse.
1472 */
1473void
1474vgone(vp)
1475	register struct vnode *vp;
1476{
1477	struct proc *p = curproc;	/* XXX */
1478
1479	simple_lock(&vp->v_interlock);
1480	vgonel(vp, p);
1481}
1482
1483/*
1484 * vgone, with the vp interlock held.
1485 */
1486static void
1487vgonel(vp, p)
1488	struct vnode *vp;
1489	struct proc *p;
1490{
1491	struct vnode *vq;
1492	struct vnode *vx;
1493
1494	/*
1495	 * If a vgone (or vclean) is already in progress,
1496	 * wait until it is done and return.
1497	 */
1498	if (vp->v_flag & VXLOCK) {
1499		vp->v_flag |= VXWANT;
1500		simple_unlock(&vp->v_interlock);
1501		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1502		return;
1503	}
1504
1505	if (vp->v_object) {
1506		vp->v_object->flags |= OBJ_VNODE_GONE;
1507	}
1508
1509	/*
1510	 * Clean out the filesystem specific data.
1511	 */
1512	vclean(vp, DOCLOSE, p);
1513	/*
1514	 * Delete from old mount point vnode list, if on one.
1515	 */
1516	if (vp->v_mount != NULL)
1517		insmntque(vp, (struct mount *)0);
1518	/*
1519	 * If special device, remove it from special device alias list
1520	 * if it is on one.
1521	 */
1522	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1523		simple_lock(&spechash_slock);
1524		if (*vp->v_hashchain == vp) {
1525			*vp->v_hashchain = vp->v_specnext;
1526		} else {
1527			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1528				if (vq->v_specnext != vp)
1529					continue;
1530				vq->v_specnext = vp->v_specnext;
1531				break;
1532			}
1533			if (vq == NULL)
1534				panic("missing bdev");
1535		}
1536		if (vp->v_flag & VALIASED) {
1537			vx = NULL;
1538			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1539				if (vq->v_rdev != vp->v_rdev ||
1540				    vq->v_type != vp->v_type)
1541					continue;
1542				if (vx)
1543					break;
1544				vx = vq;
1545			}
1546			if (vx == NULL)
1547				panic("missing alias");
1548			if (vq == NULL)
1549				vx->v_flag &= ~VALIASED;
1550			vp->v_flag &= ~VALIASED;
1551		}
1552		simple_unlock(&spechash_slock);
1553		FREE(vp->v_specinfo, M_VNODE);
1554		vp->v_specinfo = NULL;
1555	}
1556
1557	/*
1558	 * If it is on the freelist and not already at the head,
1559	 * move it to the head of the list. The test of the back
1560	 * pointer and the reference count of zero is because
1561	 * it will be removed from the free list by getnewvnode,
1562	 * but will not have its reference count incremented until
1563	 * after calling vgone. If the reference count were
1564	 * incremented first, vgone would (incorrectly) try to
1565	 * close the previous instance of the underlying object.
1566	 */
1567	if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
1568		simple_lock(&vnode_free_list_slock);
1569		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1570		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1571		simple_unlock(&vnode_free_list_slock);
1572	}
1573
1574	vp->v_type = VBAD;
1575}
1576
1577/*
1578 * Lookup a vnode by device number.
1579 */
1580int
1581vfinddev(dev, type, vpp)
1582	dev_t dev;
1583	enum vtype type;
1584	struct vnode **vpp;
1585{
1586	register struct vnode *vp;
1587	int rc = 0;
1588
1589	simple_lock(&spechash_slock);
1590	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1591		if (dev != vp->v_rdev || type != vp->v_type)
1592			continue;
1593		*vpp = vp;
1594		rc = 1;
1595		break;
1596	}
1597	simple_unlock(&spechash_slock);
1598	return (rc);
1599}
1600
1601/*
1602 * Calculate the total number of references to a special device.
1603 */
1604int
1605vcount(vp)
1606	register struct vnode *vp;
1607{
1608	struct vnode *vq, *vnext;
1609	int count;
1610
1611loop:
1612	if ((vp->v_flag & VALIASED) == 0)
1613		return (vp->v_usecount);
1614	simple_lock(&spechash_slock);
1615	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1616		vnext = vq->v_specnext;
1617		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1618			continue;
1619		/*
1620		 * Alias, but not in use, so flush it out.
1621		 */
1622		if (vq->v_usecount == 0 && vq != vp) {
1623			simple_unlock(&spechash_slock);
1624			vgone(vq);
1625			goto loop;
1626		}
1627		count += vq->v_usecount;
1628	}
1629	simple_unlock(&spechash_slock);
1630	return (count);
1631}
1632
1633/*
1634 * Return true for select/poll.
1635 */
1636int
1637vop_nopoll(ap)
1638	struct vop_poll_args /* {
1639		struct vnode *a_vp;
1640		int  a_events;
1641		struct ucred *a_cred;
1642		struct proc *a_p;
1643	} */ *ap;
1644{
1645
1646	/*
1647	 * Just return what we were asked for.
1648	 */
1649	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1650}
1651
1652/*
1653 * Print out a description of a vnode.
1654 */
1655static char *typename[] =
1656{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
1657
1658void
1659vprint(label, vp)
1660	char *label;
1661	register struct vnode *vp;
1662{
1663	char buf[64];
1664
1665	if (label != NULL)
1666		printf("%s: %x: ", label, vp);
1667	else
1668		printf("%x: ", vp);
1669	printf("type %s, usecount %d, writecount %d, refcount %ld,",
1670	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1671	    vp->v_holdcnt);
1672	buf[0] = '\0';
1673	if (vp->v_flag & VROOT)
1674		strcat(buf, "|VROOT");
1675	if (vp->v_flag & VTEXT)
1676		strcat(buf, "|VTEXT");
1677	if (vp->v_flag & VSYSTEM)
1678		strcat(buf, "|VSYSTEM");
1679	if (vp->v_flag & VXLOCK)
1680		strcat(buf, "|VXLOCK");
1681	if (vp->v_flag & VXWANT)
1682		strcat(buf, "|VXWANT");
1683	if (vp->v_flag & VBWAIT)
1684		strcat(buf, "|VBWAIT");
1685	if (vp->v_flag & VALIASED)
1686		strcat(buf, "|VALIASED");
1687	if (vp->v_flag & VDOOMED)
1688		strcat(buf, "|VDOOMED");
1689	if (vp->v_flag & VFREE)
1690		strcat(buf, "|VFREE");
1691	if (buf[0] != '\0')
1692		printf(" flags (%s)", &buf[1]);
1693	if (vp->v_data == NULL) {
1694		printf("\n");
1695	} else {
1696		printf("\n\t");
1697		VOP_PRINT(vp);
1698	}
1699}
1700
1701#ifdef DDB
1702/*
1703 * List all of the locked vnodes in the system.
1704 * Called when debugging the kernel.
1705 */
1706void
1707printlockedvnodes()
1708{
1709	struct proc *p = curproc;	/* XXX */
1710	struct mount *mp, *nmp;
1711	struct vnode *vp;
1712
1713	printf("Locked vnodes\n");
1714	simple_lock(&mountlist_slock);
1715	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1716		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1717			nmp = mp->mnt_list.cqe_next;
1718			continue;
1719		}
1720		for (vp = mp->mnt_vnodelist.lh_first;
1721		     vp != NULL;
1722		     vp = vp->v_mntvnodes.le_next) {
1723			if (VOP_ISLOCKED(vp))
1724				vprint((char *)0, vp);
1725		}
1726		simple_lock(&mountlist_slock);
1727		nmp = mp->mnt_list.cqe_next;
1728		vfs_unbusy(mp, p);
1729	}
1730	simple_unlock(&mountlist_slock);
1731}
1732#endif
1733
1734/*
1735 * Top level filesystem related information gathering.
1736 */
1737static int	sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS);
1738
1739static int
1740vfs_sysctl SYSCTL_HANDLER_ARGS
1741{
1742	int *name = (int *)arg1 - 1;	/* XXX */
1743	u_int namelen = arg2 + 1;	/* XXX */
1744	struct vfsconf *vfsp;
1745
1746#ifndef NO_COMPAT_PRELITE2
1747	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
1748	if (namelen == 1)
1749		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
1750#endif
1751
1752#ifdef notyet
1753	/* all sysctl names at this level are at least name and field */
1754	if (namelen < 2)
1755		return (ENOTDIR);		/* overloaded */
1756	if (name[0] != VFS_GENERIC) {
1757		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1758			if (vfsp->vfc_typenum == name[0])
1759				break;
1760		if (vfsp == NULL)
1761			return (EOPNOTSUPP);
1762		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1763		    oldp, oldlenp, newp, newlen, p));
1764	}
1765#endif
1766	switch (name[1]) {
1767	case VFS_MAXTYPENUM:
1768		if (namelen != 2)
1769			return (ENOTDIR);
1770		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
1771	case VFS_CONF:
1772		if (namelen != 3)
1773			return (ENOTDIR);	/* overloaded */
1774		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1775			if (vfsp->vfc_typenum == name[2])
1776				break;
1777		if (vfsp == NULL)
1778			return (EOPNOTSUPP);
1779		return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
1780	}
1781	return (EOPNOTSUPP);
1782}
1783
1784SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
1785	"Generic filesystem");
1786
1787#ifndef NO_COMPAT_PRELITE2
1788
1789static int
1790sysctl_ovfs_conf SYSCTL_HANDLER_ARGS
1791{
1792	int error;
1793	struct vfsconf *vfsp;
1794	struct ovfsconf ovfs;
1795
1796	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
1797		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
1798		strcpy(ovfs.vfc_name, vfsp->vfc_name);
1799		ovfs.vfc_index = vfsp->vfc_typenum;
1800		ovfs.vfc_refcount = vfsp->vfc_refcount;
1801		ovfs.vfc_flags = vfsp->vfc_flags;
1802		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
1803		if (error)
1804			return error;
1805	}
1806	return 0;
1807}
1808
1809#endif /* !NO_COMPAT_PRELITE2 */
1810
1811int kinfo_vdebug = 1;
1812int kinfo_vgetfailed;
1813
1814#define KINFO_VNODESLOP	10
1815/*
1816 * Dump vnode list (via sysctl).
1817 * Copyout address of vnode followed by vnode.
1818 */
1819/* ARGSUSED */
1820static int
1821sysctl_vnode SYSCTL_HANDLER_ARGS
1822{
1823	struct proc *p = curproc;	/* XXX */
1824	struct mount *mp, *nmp;
1825	struct vnode *nvp, *vp;
1826	int error;
1827
1828#define VPTRSZ	sizeof (struct vnode *)
1829#define VNODESZ	sizeof (struct vnode)
1830
1831	req->lock = 0;
1832	if (!req->oldptr) /* Make an estimate */
1833		return (SYSCTL_OUT(req, 0,
1834			(numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
1835
1836	simple_lock(&mountlist_slock);
1837	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1838		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1839			nmp = mp->mnt_list.cqe_next;
1840			continue;
1841		}
1842again:
1843		simple_lock(&mntvnode_slock);
1844		for (vp = mp->mnt_vnodelist.lh_first;
1845		     vp != NULL;
1846		     vp = nvp) {
1847			/*
1848			 * Check that the vp is still associated with
1849			 * this filesystem.  RACE: could have been
1850			 * recycled onto the same filesystem.
1851			 */
1852			if (vp->v_mount != mp) {
1853				simple_unlock(&mntvnode_slock);
1854				if (kinfo_vdebug)
1855					printf("kinfo: vp changed\n");
1856				goto again;
1857			}
1858			nvp = vp->v_mntvnodes.le_next;
1859			simple_unlock(&mntvnode_slock);
1860			if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
1861			    (error = SYSCTL_OUT(req, vp, VNODESZ)))
1862				return (error);
1863			simple_lock(&mntvnode_slock);
1864		}
1865		simple_unlock(&mntvnode_slock);
1866		simple_lock(&mountlist_slock);
1867		nmp = mp->mnt_list.cqe_next;
1868		vfs_unbusy(mp, p);
1869	}
1870	simple_unlock(&mountlist_slock);
1871
1872	return (0);
1873}
1874
1875/*
1876 * XXX
1877 * Exporting the vnode list on large systems causes them to crash.
1878 * Exporting the vnode list on medium systems causes sysctl to coredump.
1879 */
1880#if 0
1881SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
1882	0, 0, sysctl_vnode, "S,vnode", "");
1883#endif
1884
1885/*
1886 * Check to see if a filesystem is mounted on a block device.
1887 */
1888int
1889vfs_mountedon(vp)
1890	struct vnode *vp;
1891{
1892	struct vnode *vq;
1893	int error = 0;
1894
1895	if (vp->v_specflags & SI_MOUNTEDON)
1896		return (EBUSY);
1897	if (vp->v_flag & VALIASED) {
1898		simple_lock(&spechash_slock);
1899		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1900			if (vq->v_rdev != vp->v_rdev ||
1901			    vq->v_type != vp->v_type)
1902				continue;
1903			if (vq->v_specflags & SI_MOUNTEDON) {
1904				error = EBUSY;
1905				break;
1906			}
1907		}
1908		simple_unlock(&spechash_slock);
1909	}
1910	return (error);
1911}
1912
1913/*
1914 * Unmount all filesystems. The list is traversed in reverse order
1915 * of mounting to avoid dependencies.
1916 */
1917void
1918vfs_unmountall()
1919{
1920	struct mount *mp, *nmp;
1921	struct proc *p = initproc;	/* XXX XXX should this be proc0? */
1922	int error;
1923
1924	/*
1925	 * Since this only runs when rebooting, it is not interlocked.
1926	 */
1927	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1928		nmp = mp->mnt_list.cqe_prev;
1929		error = dounmount(mp, MNT_FORCE, p);
1930		if (error) {
1931			printf("unmount of %s failed (",
1932			    mp->mnt_stat.f_mntonname);
1933			if (error == EBUSY)
1934				printf("BUSY)\n");
1935			else
1936				printf("%d)\n", error);
1937		}
1938	}
1939}
1940
1941/*
1942 * Build hash lists of net addresses and hang them off the mount point.
1943 * Called by ufs_mount() to set up the lists of export addresses.
1944 */
1945static int
1946vfs_hang_addrlist(mp, nep, argp)
1947	struct mount *mp;
1948	struct netexport *nep;
1949	struct export_args *argp;
1950{
1951	register struct netcred *np;
1952	register struct radix_node_head *rnh;
1953	register int i;
1954	struct radix_node *rn;
1955	struct sockaddr *saddr, *smask = 0;
1956	struct domain *dom;
1957	int error;
1958
1959	if (argp->ex_addrlen == 0) {
1960		if (mp->mnt_flag & MNT_DEFEXPORTED)
1961			return (EPERM);
1962		np = &nep->ne_defexported;
1963		np->netc_exflags = argp->ex_flags;
1964		np->netc_anon = argp->ex_anon;
1965		np->netc_anon.cr_ref = 1;
1966		mp->mnt_flag |= MNT_DEFEXPORTED;
1967		return (0);
1968	}
1969	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1970	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
1971	bzero((caddr_t) np, i);
1972	saddr = (struct sockaddr *) (np + 1);
1973	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
1974		goto out;
1975	if (saddr->sa_len > argp->ex_addrlen)
1976		saddr->sa_len = argp->ex_addrlen;
1977	if (argp->ex_masklen) {
1978		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
1979		error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
1980		if (error)
1981			goto out;
1982		if (smask->sa_len > argp->ex_masklen)
1983			smask->sa_len = argp->ex_masklen;
1984	}
1985	i = saddr->sa_family;
1986	if ((rnh = nep->ne_rtable[i]) == 0) {
1987		/*
1988		 * Seems silly to initialize every AF when most are not used,
1989		 * do so on demand here
1990		 */
1991		for (dom = domains; dom; dom = dom->dom_next)
1992			if (dom->dom_family == i && dom->dom_rtattach) {
1993				dom->dom_rtattach((void **) &nep->ne_rtable[i],
1994				    dom->dom_rtoffset);
1995				break;
1996			}
1997		if ((rnh = nep->ne_rtable[i]) == 0) {
1998			error = ENOBUFS;
1999			goto out;
2000		}
2001	}
2002	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
2003	    np->netc_rnodes);
2004	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
2005		error = EPERM;
2006		goto out;
2007	}
2008	np->netc_exflags = argp->ex_flags;
2009	np->netc_anon = argp->ex_anon;
2010	np->netc_anon.cr_ref = 1;
2011	return (0);
2012out:
2013	free(np, M_NETADDR);
2014	return (error);
2015}
2016
2017/* ARGSUSED */
2018static int
2019vfs_free_netcred(rn, w)
2020	struct radix_node *rn;
2021	void *w;
2022{
2023	register struct radix_node_head *rnh = (struct radix_node_head *) w;
2024
2025	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
2026	free((caddr_t) rn, M_NETADDR);
2027	return (0);
2028}
2029
2030/*
2031 * Free the net address hash lists that are hanging off the mount points.
2032 */
2033static void
2034vfs_free_addrlist(nep)
2035	struct netexport *nep;
2036{
2037	register int i;
2038	register struct radix_node_head *rnh;
2039
2040	for (i = 0; i <= AF_MAX; i++)
2041		if ((rnh = nep->ne_rtable[i])) {
2042			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
2043			    (caddr_t) rnh);
2044			free((caddr_t) rnh, M_RTABLE);
2045			nep->ne_rtable[i] = 0;
2046		}
2047}
2048
2049int
2050vfs_export(mp, nep, argp)
2051	struct mount *mp;
2052	struct netexport *nep;
2053	struct export_args *argp;
2054{
2055	int error;
2056
2057	if (argp->ex_flags & MNT_DELEXPORT) {
2058		if (mp->mnt_flag & MNT_EXPUBLIC) {
2059			vfs_setpublicfs(NULL, NULL, NULL);
2060			mp->mnt_flag &= ~MNT_EXPUBLIC;
2061		}
2062		vfs_free_addrlist(nep);
2063		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2064	}
2065	if (argp->ex_flags & MNT_EXPORTED) {
2066		if (argp->ex_flags & MNT_EXPUBLIC) {
2067			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
2068				return (error);
2069			mp->mnt_flag |= MNT_EXPUBLIC;
2070		}
2071		if ((error = vfs_hang_addrlist(mp, nep, argp)))
2072			return (error);
2073		mp->mnt_flag |= MNT_EXPORTED;
2074	}
2075	return (0);
2076}
2077
2078
2079/*
2080 * Set the publicly exported filesystem (WebNFS). Currently, only
2081 * one public filesystem is possible in the spec (RFC 2054 and 2055)
2082 */
2083int
2084vfs_setpublicfs(mp, nep, argp)
2085	struct mount *mp;
2086	struct netexport *nep;
2087	struct export_args *argp;
2088{
2089	int error;
2090	struct vnode *rvp;
2091	char *cp;
2092
2093	/*
2094	 * mp == NULL -> invalidate the current info, the FS is
2095	 * no longer exported. May be called from either vfs_export
2096	 * or unmount, so check if it hasn't already been done.
2097	 */
2098	if (mp == NULL) {
2099		if (nfs_pub.np_valid) {
2100			nfs_pub.np_valid = 0;
2101			if (nfs_pub.np_index != NULL) {
2102				FREE(nfs_pub.np_index, M_TEMP);
2103				nfs_pub.np_index = NULL;
2104			}
2105		}
2106		return (0);
2107	}
2108
2109	/*
2110	 * Only one allowed at a time.
2111	 */
2112	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
2113		return (EBUSY);
2114
2115	/*
2116	 * Get real filehandle for root of exported FS.
2117	 */
2118	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
2119	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
2120
2121	if ((error = VFS_ROOT(mp, &rvp)))
2122		return (error);
2123
2124	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
2125		return (error);
2126
2127	vput(rvp);
2128
2129	/*
2130	 * If an indexfile was specified, pull it in.
2131	 */
2132	if (argp->ex_indexfile != NULL) {
2133		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2134		    M_WAITOK);
2135		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2136		    MAXNAMLEN, (size_t *)0);
2137		if (!error) {
2138			/*
2139			 * Check for illegal filenames.
2140			 */
2141			for (cp = nfs_pub.np_index; *cp; cp++) {
2142				if (*cp == '/') {
2143					error = EINVAL;
2144					break;
2145				}
2146			}
2147		}
2148		if (error) {
2149			FREE(nfs_pub.np_index, M_TEMP);
2150			return (error);
2151		}
2152	}
2153
2154	nfs_pub.np_mount = mp;
2155	nfs_pub.np_valid = 1;
2156	return (0);
2157}
2158
2159struct netcred *
2160vfs_export_lookup(mp, nep, nam)
2161	register struct mount *mp;
2162	struct netexport *nep;
2163	struct sockaddr *nam;
2164{
2165	register struct netcred *np;
2166	register struct radix_node_head *rnh;
2167	struct sockaddr *saddr;
2168
2169	np = NULL;
2170	if (mp->mnt_flag & MNT_EXPORTED) {
2171		/*
2172		 * Lookup in the export list first.
2173		 */
2174		if (nam != NULL) {
2175			saddr = nam;
2176			rnh = nep->ne_rtable[saddr->sa_family];
2177			if (rnh != NULL) {
2178				np = (struct netcred *)
2179					(*rnh->rnh_matchaddr)((caddr_t)saddr,
2180							      rnh);
2181				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2182					np = NULL;
2183			}
2184		}
2185		/*
2186		 * If no address match, use the default if it exists.
2187		 */
2188		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2189			np = &nep->ne_defexported;
2190	}
2191	return (np);
2192}
2193
2194/*
2195 * perform msync on all vnodes under a mount point
2196 * the mount point must be locked.
2197 */
2198void
2199vfs_msync(struct mount *mp, int flags) {
2200	struct vnode *vp, *nvp;
2201loop:
2202	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
2203
2204		if (vp->v_mount != mp)
2205			goto loop;
2206		nvp = vp->v_mntvnodes.le_next;
2207		if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))
2208			continue;
2209		if (vp->v_object &&
2210		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
2211			vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE);
2212		}
2213	}
2214}
2215
2216/*
2217 * Create the VM object needed for VMIO and mmap support.  This
2218 * is done for all VREG files in the system.  Some filesystems might
2219 * afford the additional metadata buffering capability of the
2220 * VMIO code by making the device node be VMIO mode also.
2221 */
2222int
2223vfs_object_create(vp, p, cred, waslocked)
2224	struct vnode *vp;
2225	struct proc *p;
2226	struct ucred *cred;
2227	int waslocked;
2228{
2229	struct vattr vat;
2230	vm_object_t object;
2231	int error = 0;
2232
2233retry:
2234	if ((object = vp->v_object) == NULL) {
2235		if (vp->v_type == VREG) {
2236			if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
2237				goto retn;
2238			(void) vnode_pager_alloc(vp,
2239				OFF_TO_IDX(round_page(vat.va_size)), 0, 0);
2240		} else {
2241			/*
2242			 * This simply allocates the biggest object possible
2243			 * for a VBLK vnode.  This should be fixed, but doesn't
2244			 * cause any problems (yet).
2245			 */
2246			(void) vnode_pager_alloc(vp, INT_MAX, 0, 0);
2247		}
2248		vp->v_object->flags |= OBJ_VFS_REF;
2249	} else {
2250		if (object->flags & OBJ_DEAD) {
2251			if (waslocked)
2252				VOP_UNLOCK(vp, 0, p);
2253			tsleep(object, PVM, "vodead", 0);
2254			if (waslocked)
2255				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2256			goto retry;
2257		}
2258		if ((object->flags & OBJ_VFS_REF) == 0) {
2259			object->flags |= OBJ_VFS_REF;
2260			vm_object_reference(object);
2261		}
2262	}
2263	if (vp->v_object)
2264		vp->v_flag |= VVMIO;
2265
2266retn:
2267	return error;
2268}
2269
2270void
2271vfree(vp)
2272	struct vnode *vp;
2273{
2274	simple_lock(&vnode_free_list_slock);
2275	if (vp->v_flag & VAGE) {
2276		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
2277	} else {
2278		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2279	}
2280	freevnodes++;
2281	simple_unlock(&vnode_free_list_slock);
2282	vp->v_flag &= ~VAGE;
2283	vp->v_flag |= VFREE;
2284}
2285
2286void
2287vbusy(vp)
2288	struct vnode *vp;
2289{
2290	simple_lock(&vnode_free_list_slock);
2291	TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
2292	freevnodes--;
2293	simple_unlock(&vnode_free_list_slock);
2294	vp->v_flag &= ~VFREE;
2295}
2296