vfs_export.c revision 29358
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
39 * $Id: vfs_subr.c,v 1.102 1997/09/13 15:02:28 peter Exp $
40 */
41
42/*
43 * External virtual filesystem routines
44 */
45#include "opt_ddb.h"
46#include "opt_devfs.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/kernel.h>
51#include <sys/proc.h>
52#include <sys/mount.h>
53#include <sys/vnode.h>
54#include <sys/stat.h>
55#include <sys/buf.h>
56#include <sys/malloc.h>
57#include <sys/poll.h>
58#include <sys/domain.h>
59#include <sys/dirent.h>
60
61#include <machine/limits.h>
62
63#include <vm/vm.h>
64#include <vm/vm_object.h>
65#include <vm/vm_extern.h>
66#include <vm/vnode_pager.h>
67#include <sys/sysctl.h>
68
69#include <miscfs/specfs/specdev.h>
70
71#ifdef DDB
72extern void	printlockedvnodes __P((void));
73#endif
74static void	vclean __P((struct vnode *vp, int flags, struct proc *p));
75static void	vgonel __P((struct vnode *vp, struct proc *p));
76unsigned long	numvnodes;
77SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
78static void	vputrele __P((struct vnode *vp, int put));
79
80enum vtype iftovt_tab[16] = {
81	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
82	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
83};
84int vttoif_tab[9] = {
85	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
86	S_IFSOCK, S_IFIFO, S_IFMT,
87};
88
89/*
90 * Insq/Remq for the vnode usage lists.
91 */
92#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
93#define	bufremvn(bp) {							\
94	LIST_REMOVE(bp, b_vnbufs);					\
95	(bp)->b_vnbufs.le_next = NOLIST;				\
96}
97TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
98static u_long freevnodes = 0;
99SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
100
101struct mntlist mountlist;	/* mounted filesystem list */
102struct simplelock mountlist_slock;
103static struct simplelock mntid_slock;
104struct simplelock mntvnode_slock;
105struct simplelock vnode_free_list_slock;
106static struct simplelock spechash_slock;
107struct nfs_public nfs_pub;	/* publicly exported FS */
108
109int desiredvnodes;
110SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
111
112static void	vfs_free_addrlist __P((struct netexport *nep));
113static int	vfs_free_netcred __P((struct radix_node *rn, void *w));
114static int	vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
115				       struct export_args *argp));
116
117/*
118 * Initialize the vnode management data structures.
119 */
120void
121vntblinit()
122{
123
124	desiredvnodes = maxproc + vm_object_cache_max;
125	simple_lock_init(&mntvnode_slock);
126	simple_lock_init(&mntid_slock);
127	simple_lock_init(&spechash_slock);
128	TAILQ_INIT(&vnode_free_list);
129	simple_lock_init(&vnode_free_list_slock);
130	CIRCLEQ_INIT(&mountlist);
131}
132
133/*
134 * Mark a mount point as busy. Used to synchronize access and to delay
135 * unmounting. Interlock is not released on failure.
136 */
137int
138vfs_busy(mp, flags, interlkp, p)
139	struct mount *mp;
140	int flags;
141	struct simplelock *interlkp;
142	struct proc *p;
143{
144	int lkflags;
145
146	if (mp->mnt_flag & MNT_UNMOUNT) {
147		if (flags & LK_NOWAIT)
148			return (ENOENT);
149		mp->mnt_flag |= MNT_MWAIT;
150		if (interlkp) {
151			simple_unlock(interlkp);
152		}
153		/*
154		 * Since all busy locks are shared except the exclusive
155		 * lock granted when unmounting, the only place that a
156		 * wakeup needs to be done is at the release of the
157		 * exclusive lock at the end of dounmount.
158		 */
159		tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
160		if (interlkp) {
161			simple_lock(interlkp);
162		}
163		return (ENOENT);
164	}
165	lkflags = LK_SHARED;
166	if (interlkp)
167		lkflags |= LK_INTERLOCK;
168	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
169		panic("vfs_busy: unexpected lock failure");
170	return (0);
171}
172
173/*
174 * Free a busy filesystem.
175 */
176void
177vfs_unbusy(mp, p)
178	struct mount *mp;
179	struct proc *p;
180{
181
182	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
183}
184
185/*
186 * Lookup a filesystem type, and if found allocate and initialize
187 * a mount structure for it.
188 *
189 * Devname is usually updated by mount(8) after booting.
190 */
191int
192vfs_rootmountalloc(fstypename, devname, mpp)
193	char *fstypename;
194	char *devname;
195	struct mount **mpp;
196{
197	struct proc *p = curproc;	/* XXX */
198	struct vfsconf *vfsp;
199	struct mount *mp;
200
201	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
202		if (!strcmp(vfsp->vfc_name, fstypename))
203			break;
204	if (vfsp == NULL)
205		return (ENODEV);
206	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
207	bzero((char *)mp, (u_long)sizeof(struct mount));
208	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
209	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
210	LIST_INIT(&mp->mnt_vnodelist);
211	mp->mnt_vfc = vfsp;
212	mp->mnt_op = vfsp->vfc_vfsops;
213	mp->mnt_flag = MNT_RDONLY;
214	mp->mnt_vnodecovered = NULLVP;
215	vfsp->vfc_refcount++;
216	mp->mnt_stat.f_type = vfsp->vfc_typenum;
217	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
218	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
219	mp->mnt_stat.f_mntonname[0] = '/';
220	mp->mnt_stat.f_mntonname[1] = 0;
221	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
222	*mpp = mp;
223	return (0);
224}
225
226/*
227 * Find an appropriate filesystem to use for the root. If a filesystem
228 * has not been preselected, walk through the list of known filesystems
229 * trying those that have mountroot routines, and try them until one
230 * works or we have tried them all.
231 */
232#ifdef notdef	/* XXX JH */
233int
234lite2_vfs_mountroot(void)
235{
236	struct vfsconf *vfsp;
237	extern int (*lite2_mountroot)(void);
238	int error;
239
240	if (lite2_mountroot != NULL)
241		return ((*lite2_mountroot)());
242	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
243		if (vfsp->vfc_mountroot == NULL)
244			continue;
245		if ((error = (*vfsp->vfc_mountroot)()) == 0)
246			return (0);
247		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
248	}
249	return (ENODEV);
250}
251#endif
252
253/*
254 * Lookup a mount point by filesystem identifier.
255 */
256struct mount *
257vfs_getvfs(fsid)
258	fsid_t *fsid;
259{
260	register struct mount *mp;
261
262	simple_lock(&mountlist_slock);
263	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
264	    mp = mp->mnt_list.cqe_next) {
265		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
266		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
267			simple_unlock(&mountlist_slock);
268			return (mp);
269	    }
270	}
271	simple_unlock(&mountlist_slock);
272	return ((struct mount *) 0);
273}
274
275/*
276 * Get a new unique fsid
277 */
278void
279vfs_getnewfsid(mp)
280	struct mount *mp;
281{
282	static u_short xxxfs_mntid;
283
284	fsid_t tfsid;
285	int mtype;
286
287	simple_lock(&mntid_slock);
288	mtype = mp->mnt_vfc->vfc_typenum;
289	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
290	mp->mnt_stat.f_fsid.val[1] = mtype;
291	if (xxxfs_mntid == 0)
292		++xxxfs_mntid;
293	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
294	tfsid.val[1] = mtype;
295	if (mountlist.cqh_first != (void *)&mountlist) {
296		while (vfs_getvfs(&tfsid)) {
297			tfsid.val[0]++;
298			xxxfs_mntid++;
299		}
300	}
301	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
302	simple_unlock(&mntid_slock);
303}
304
305/*
306 * Set vnode attributes to VNOVAL
307 */
308void
309vattr_null(vap)
310	register struct vattr *vap;
311{
312
313	vap->va_type = VNON;
314	vap->va_size = VNOVAL;
315	vap->va_bytes = VNOVAL;
316	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
317	    vap->va_fsid = vap->va_fileid =
318	    vap->va_blocksize = vap->va_rdev =
319	    vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
320	    vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
321	    vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
322	    vap->va_flags = vap->va_gen = VNOVAL;
323	vap->va_vaflags = 0;
324}
325
326/*
327 * Routines having to do with the management of the vnode table.
328 */
329extern vop_t **dead_vnodeop_p;
330
331/*
332 * Return the next vnode from the free list.
333 */
334int
335getnewvnode(tag, mp, vops, vpp)
336	enum vtagtype tag;
337	struct mount *mp;
338	vop_t **vops;
339	struct vnode **vpp;
340{
341	struct proc *p = curproc;	/* XXX */
342	struct vnode *vp;
343
344	/*
345	 * We take the least recently used vnode from the freelist
346	 * if we can get it and it has no cached pages, and no
347	 * namecache entries are relative to it.
348	 * Otherwise we allocate a new vnode
349	 */
350
351	simple_lock(&vnode_free_list_slock);
352
353	if (freevnodes >= desiredvnodes) {
354		TAILQ_FOREACH(vp, &vnode_free_list, v_freelist) {
355			if (!simple_lock_try(&vp->v_interlock))
356				continue;
357			if (vp->v_usecount)
358				panic("free vnode isn't");
359
360			if (vp->v_object && vp->v_object->resident_page_count) {
361				/* Don't recycle if it's caching some pages */
362				simple_unlock(&vp->v_interlock);
363				continue;
364			} else if (LIST_FIRST(&vp->v_cache_src)) {
365				/* Don't recycle if active in the namecache */
366				simple_unlock(&vp->v_interlock);
367				continue;
368			} else {
369				break;
370			}
371		}
372	} else {
373		vp = NULL;
374	}
375
376	if (vp) {
377		vp->v_flag |= VDOOMED;
378		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
379		freevnodes--;
380		simple_unlock(&vnode_free_list_slock);
381		cache_purge(vp);
382		vp->v_lease = NULL;
383		if (vp->v_type != VBAD)
384			vgonel(vp, p);
385		else {
386			simple_unlock(&vp->v_interlock);
387		}
388
389#ifdef DIAGNOSTIC
390		{
391			int s;
392
393			if (vp->v_data)
394				panic("cleaned vnode isn't");
395			s = splbio();
396			if (vp->v_numoutput)
397				panic("Clean vnode has pending I/O's");
398			splx(s);
399		}
400#endif
401		vp->v_flag = 0;
402		vp->v_lastr = 0;
403		vp->v_lastw = 0;
404		vp->v_lasta = 0;
405		vp->v_cstart = 0;
406		vp->v_clen = 0;
407		vp->v_socket = 0;
408		vp->v_writecount = 0;	/* XXX */
409	} else {
410		simple_unlock(&vnode_free_list_slock);
411		vp = (struct vnode *) malloc((u_long) sizeof *vp,
412		    M_VNODE, M_WAITOK);
413		bzero((char *) vp, sizeof *vp);
414		vp->v_dd = vp;
415		cache_purge(vp);
416		LIST_INIT(&vp->v_cache_src);
417		TAILQ_INIT(&vp->v_cache_dst);
418		numvnodes++;
419	}
420
421	vp->v_type = VNON;
422	vp->v_tag = tag;
423	vp->v_op = vops;
424	insmntque(vp, mp);
425	*vpp = vp;
426	vp->v_usecount = 1;
427	vp->v_data = 0;
428	return (0);
429}
430
431/*
432 * Move a vnode from one mount queue to another.
433 */
434void
435insmntque(vp, mp)
436	register struct vnode *vp;
437	register struct mount *mp;
438{
439
440	simple_lock(&mntvnode_slock);
441	/*
442	 * Delete from old mount point vnode list, if on one.
443	 */
444	if (vp->v_mount != NULL)
445		LIST_REMOVE(vp, v_mntvnodes);
446	/*
447	 * Insert into list of vnodes for the new mount point, if available.
448	 */
449	if ((vp->v_mount = mp) == NULL) {
450		simple_unlock(&mntvnode_slock);
451		return;
452	}
453	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
454	simple_unlock(&mntvnode_slock);
455}
456
457/*
458 * Update outstanding I/O count and do wakeup if requested.
459 */
460void
461vwakeup(bp)
462	register struct buf *bp;
463{
464	register struct vnode *vp;
465
466	bp->b_flags &= ~B_WRITEINPROG;
467	if ((vp = bp->b_vp)) {
468		vp->v_numoutput--;
469		if (vp->v_numoutput < 0)
470			panic("vwakeup: neg numoutput");
471		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
472			vp->v_flag &= ~VBWAIT;
473			wakeup((caddr_t) &vp->v_numoutput);
474		}
475	}
476}
477
478/*
479 * Flush out and invalidate all buffers associated with a vnode.
480 * Called with the underlying object locked.
481 */
482int
483vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
484	register struct vnode *vp;
485	int flags;
486	struct ucred *cred;
487	struct proc *p;
488	int slpflag, slptimeo;
489{
490	register struct buf *bp;
491	struct buf *nbp, *blist;
492	int s, error;
493	vm_object_t object;
494
495	if (flags & V_SAVE) {
496		if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
497			return (error);
498		if (vp->v_dirtyblkhd.lh_first != NULL)
499			panic("vinvalbuf: dirty bufs");
500	}
501
502	s = splbio();
503	for (;;) {
504		if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
505			while (blist && blist->b_lblkno < 0)
506				blist = blist->b_vnbufs.le_next;
507		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
508		    (flags & V_SAVEMETA))
509			while (blist && blist->b_lblkno < 0)
510				blist = blist->b_vnbufs.le_next;
511		if (!blist)
512			break;
513
514		for (bp = blist; bp; bp = nbp) {
515			nbp = bp->b_vnbufs.le_next;
516			if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
517				continue;
518			if (bp->b_flags & B_BUSY) {
519				bp->b_flags |= B_WANTED;
520				error = tsleep((caddr_t) bp,
521				    slpflag | (PRIBIO + 1), "vinvalbuf",
522				    slptimeo);
523				if (error) {
524					splx(s);
525					return (error);
526				}
527				break;
528			}
529			bremfree(bp);
530			bp->b_flags |= B_BUSY;
531			/*
532			 * XXX Since there are no node locks for NFS, I
533			 * believe there is a slight chance that a delayed
534			 * write will occur while sleeping just above, so
535			 * check for it.
536			 */
537			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
538				(void) VOP_BWRITE(bp);
539				break;
540			}
541			bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF);
542			brelse(bp);
543		}
544	}
545
546	while (vp->v_numoutput > 0) {
547		vp->v_flag |= VBWAIT;
548		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
549	}
550
551	splx(s);
552
553	/*
554	 * Destroy the copy in the VM cache, too.
555	 */
556	object = vp->v_object;
557	if (object != NULL) {
558		vm_object_page_remove(object, 0, object->size,
559		    (flags & V_SAVE) ? TRUE : FALSE);
560	}
561	if (!(flags & V_SAVEMETA) &&
562	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
563		panic("vinvalbuf: flush failed");
564	return (0);
565}
566
567/*
568 * Associate a buffer with a vnode.
569 */
570void
571bgetvp(vp, bp)
572	register struct vnode *vp;
573	register struct buf *bp;
574{
575	int s;
576
577	if (bp->b_vp)
578		panic("bgetvp: not free");
579	vhold(vp);
580	bp->b_vp = vp;
581	if (vp->v_type == VBLK || vp->v_type == VCHR)
582		bp->b_dev = vp->v_rdev;
583	else
584		bp->b_dev = NODEV;
585	/*
586	 * Insert onto list for new vnode.
587	 */
588	s = splbio();
589	bufinsvn(bp, &vp->v_cleanblkhd);
590	splx(s);
591}
592
593/*
594 * Disassociate a buffer from a vnode.
595 */
596void
597brelvp(bp)
598	register struct buf *bp;
599{
600	struct vnode *vp;
601	int s;
602
603	if (bp->b_vp == (struct vnode *) 0)
604		panic("brelvp: NULL");
605	/*
606	 * Delete from old vnode list, if on one.
607	 */
608	s = splbio();
609	if (bp->b_vnbufs.le_next != NOLIST)
610		bufremvn(bp);
611	splx(s);
612
613	vp = bp->b_vp;
614	bp->b_vp = (struct vnode *) 0;
615	vdrop(vp);
616}
617
618/*
619 * Associate a p-buffer with a vnode.
620 */
621void
622pbgetvp(vp, bp)
623	register struct vnode *vp;
624	register struct buf *bp;
625{
626#if defined(DIAGNOSTIC)
627	if (bp->b_vp)
628		panic("pbgetvp: not free");
629#endif
630	bp->b_vp = vp;
631	if (vp->v_type == VBLK || vp->v_type == VCHR)
632		bp->b_dev = vp->v_rdev;
633	else
634		bp->b_dev = NODEV;
635}
636
637/*
638 * Disassociate a p-buffer from a vnode.
639 */
640void
641pbrelvp(bp)
642	register struct buf *bp;
643{
644	struct vnode *vp;
645
646#if defined(DIAGNOSTIC)
647	if (bp->b_vp == (struct vnode *) 0)
648		panic("pbrelvp: NULL");
649#endif
650
651	bp->b_vp = (struct vnode *) 0;
652}
653
654/*
655 * Reassign a buffer from one vnode to another.
656 * Used to assign file specific control information
657 * (indirect blocks) to the vnode to which they belong.
658 */
659void
660reassignbuf(bp, newvp)
661	register struct buf *bp;
662	register struct vnode *newvp;
663{
664	int s;
665
666	if (newvp == NULL) {
667		printf("reassignbuf: NULL");
668		return;
669	}
670
671	s = splbio();
672	/*
673	 * Delete from old vnode list, if on one.
674	 */
675	if (bp->b_vnbufs.le_next != NOLIST) {
676		bufremvn(bp);
677		vdrop(bp->b_vp);
678	}
679	/*
680	 * If dirty, put on list of dirty buffers; otherwise insert onto list
681	 * of clean buffers.
682	 */
683	if (bp->b_flags & B_DELWRI) {
684		struct buf *tbp;
685
686		tbp = newvp->v_dirtyblkhd.lh_first;
687		if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
688			bufinsvn(bp, &newvp->v_dirtyblkhd);
689		} else {
690			while (tbp->b_vnbufs.le_next &&
691				(tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
692				tbp = tbp->b_vnbufs.le_next;
693			}
694			LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
695		}
696	} else {
697		bufinsvn(bp, &newvp->v_cleanblkhd);
698	}
699	bp->b_vp = newvp;
700	vhold(bp->b_vp);
701	splx(s);
702}
703
704#ifndef DEVFS_ROOT
705/*
706 * Create a vnode for a block device.
707 * Used for mounting the root file system.
708 */
709int
710bdevvp(dev, vpp)
711	dev_t dev;
712	struct vnode **vpp;
713{
714	register struct vnode *vp;
715	struct vnode *nvp;
716	int error;
717
718	if (dev == NODEV)
719		return (0);
720	error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
721	if (error) {
722		*vpp = 0;
723		return (error);
724	}
725	vp = nvp;
726	vp->v_type = VBLK;
727	if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
728		vput(vp);
729		vp = nvp;
730	}
731	*vpp = vp;
732	return (0);
733}
734#endif /* !DEVFS_ROOT */
735
736/*
737 * Check to see if the new vnode represents a special device
738 * for which we already have a vnode (either because of
739 * bdevvp() or because of a different vnode representing
740 * the same block device). If such an alias exists, deallocate
741 * the existing contents and return the aliased vnode. The
742 * caller is responsible for filling it with its new contents.
743 */
744struct vnode *
745checkalias(nvp, nvp_rdev, mp)
746	register struct vnode *nvp;
747	dev_t nvp_rdev;
748	struct mount *mp;
749{
750	struct proc *p = curproc;	/* XXX */
751	struct vnode *vp;
752	struct vnode **vpp;
753
754	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
755		return (NULLVP);
756
757	vpp = &speclisth[SPECHASH(nvp_rdev)];
758loop:
759	simple_lock(&spechash_slock);
760	for (vp = *vpp; vp; vp = vp->v_specnext) {
761		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
762			continue;
763		/*
764		 * Alias, but not in use, so flush it out.
765		 */
766		simple_lock(&vp->v_interlock);
767		if (vp->v_usecount == 0) {
768			simple_unlock(&spechash_slock);
769			vgonel(vp, p);
770			goto loop;
771		}
772		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
773			simple_unlock(&spechash_slock);
774			goto loop;
775		}
776		break;
777	}
778	if (vp == NULL || vp->v_tag != VT_NON) {
779		MALLOC(nvp->v_specinfo, struct specinfo *,
780		    sizeof(struct specinfo), M_VNODE, M_WAITOK);
781		nvp->v_rdev = nvp_rdev;
782		nvp->v_hashchain = vpp;
783		nvp->v_specnext = *vpp;
784		nvp->v_specflags = 0;
785		simple_unlock(&spechash_slock);
786		*vpp = nvp;
787		if (vp != NULLVP) {
788			nvp->v_flag |= VALIASED;
789			vp->v_flag |= VALIASED;
790			vput(vp);
791		}
792		return (NULLVP);
793	}
794	simple_unlock(&spechash_slock);
795	VOP_UNLOCK(vp, 0, p);
796	simple_lock(&vp->v_interlock);
797	vclean(vp, 0, p);
798	vp->v_op = nvp->v_op;
799	vp->v_tag = nvp->v_tag;
800	nvp->v_type = VNON;
801	insmntque(vp, mp);
802	return (vp);
803}
804
805/*
806 * Grab a particular vnode from the free list, increment its
807 * reference count and lock it. The vnode lock bit is set the
808 * vnode is being eliminated in vgone. The process is awakened
809 * when the transition is completed, and an error returned to
810 * indicate that the vnode is no longer usable (possibly having
811 * been changed to a new file system type).
812 */
813int
814vget(vp, flags, p)
815	register struct vnode *vp;
816	int flags;
817	struct proc *p;
818{
819	int error;
820
821	/*
822	 * If the vnode is in the process of being cleaned out for
823	 * another use, we wait for the cleaning to finish and then
824	 * return failure. Cleaning is determined by checking that
825	 * the VXLOCK flag is set.
826	 */
827	if ((flags & LK_INTERLOCK) == 0) {
828		simple_lock(&vp->v_interlock);
829	}
830	if (vp->v_flag & VXLOCK) {
831		vp->v_flag |= VXWANT;
832		simple_unlock(&vp->v_interlock);
833		tsleep((caddr_t)vp, PINOD, "vget", 0);
834		return (ENOENT);
835	}
836	vp->v_usecount++;
837	if (VSHOULDBUSY(vp))
838		vbusy(vp);
839	/*
840	 * Create the VM object, if needed
841	 */
842	if ((vp->v_type == VREG) &&
843		((vp->v_object == NULL) ||
844			(vp->v_object->flags & OBJ_VFS_REF) == 0 ||
845			(vp->v_object->flags & OBJ_DEAD))) {
846		/*
847		 * XXX vfs_object_create probably needs the interlock.
848		 */
849		simple_unlock(&vp->v_interlock);
850		vfs_object_create(vp, curproc, curproc->p_ucred, 0);
851		simple_lock(&vp->v_interlock);
852	}
853	if (flags & LK_TYPE_MASK) {
854		if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
855			vrele(vp);
856		return (error);
857	}
858	simple_unlock(&vp->v_interlock);
859	return (0);
860}
861
862/*
863 * Stubs to use when there is no locking to be done on the underlying object.
864 * A minimal shared lock is necessary to ensure that the underlying object
865 * is not revoked while an operation is in progress. So, an active shared
866 * count is maintained in an auxillary vnode lock structure.
867 */
868int
869vop_sharedlock(ap)
870	struct vop_lock_args /* {
871		struct vnode *a_vp;
872		int a_flags;
873		struct proc *a_p;
874	} */ *ap;
875{
876	/*
877	 * This code cannot be used until all the non-locking filesystems
878	 * (notably NFS) are converted to properly lock and release nodes.
879	 * Also, certain vnode operations change the locking state within
880	 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
881	 * and symlink). Ideally these operations should not change the
882	 * lock state, but should be changed to let the caller of the
883	 * function unlock them. Otherwise all intermediate vnode layers
884	 * (such as union, umapfs, etc) must catch these functions to do
885	 * the necessary locking at their layer. Note that the inactive
886	 * and lookup operations also change their lock state, but this
887	 * cannot be avoided, so these two operations will always need
888	 * to be handled in intermediate layers.
889	 */
890	struct vnode *vp = ap->a_vp;
891	int vnflags, flags = ap->a_flags;
892
893	if (vp->v_vnlock == NULL) {
894		if ((flags & LK_TYPE_MASK) == LK_DRAIN)
895			return (0);
896		MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
897		    M_VNODE, M_WAITOK);
898		lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
899	}
900	switch (flags & LK_TYPE_MASK) {
901	case LK_DRAIN:
902		vnflags = LK_DRAIN;
903		break;
904	case LK_EXCLUSIVE:
905#ifdef DEBUG_VFS_LOCKS
906		/*
907		 * Normally, we use shared locks here, but that confuses
908		 * the locking assertions.
909		 */
910		vnflags = LK_EXCLUSIVE;
911		break;
912#endif
913	case LK_SHARED:
914		vnflags = LK_SHARED;
915		break;
916	case LK_UPGRADE:
917	case LK_EXCLUPGRADE:
918	case LK_DOWNGRADE:
919		return (0);
920	case LK_RELEASE:
921	default:
922		panic("vop_sharedlock: bad operation %d", flags & LK_TYPE_MASK);
923	}
924	if (flags & LK_INTERLOCK)
925		vnflags |= LK_INTERLOCK;
926	return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
927}
928
929/*
930 * Stubs to use when there is no locking to be done on the underlying object.
931 * A minimal shared lock is necessary to ensure that the underlying object
932 * is not revoked while an operation is in progress. So, an active shared
933 * count is maintained in an auxillary vnode lock structure.
934 */
935int
936vop_nolock(ap)
937	struct vop_lock_args /* {
938		struct vnode *a_vp;
939		int a_flags;
940		struct proc *a_p;
941	} */ *ap;
942{
943#ifdef notyet
944	/*
945	 * This code cannot be used until all the non-locking filesystems
946	 * (notably NFS) are converted to properly lock and release nodes.
947	 * Also, certain vnode operations change the locking state within
948	 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
949	 * and symlink). Ideally these operations should not change the
950	 * lock state, but should be changed to let the caller of the
951	 * function unlock them. Otherwise all intermediate vnode layers
952	 * (such as union, umapfs, etc) must catch these functions to do
953	 * the necessary locking at their layer. Note that the inactive
954	 * and lookup operations also change their lock state, but this
955	 * cannot be avoided, so these two operations will always need
956	 * to be handled in intermediate layers.
957	 */
958	struct vnode *vp = ap->a_vp;
959	int vnflags, flags = ap->a_flags;
960
961	if (vp->v_vnlock == NULL) {
962		if ((flags & LK_TYPE_MASK) == LK_DRAIN)
963			return (0);
964		MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
965		    M_VNODE, M_WAITOK);
966		lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
967	}
968	switch (flags & LK_TYPE_MASK) {
969	case LK_DRAIN:
970		vnflags = LK_DRAIN;
971		break;
972	case LK_EXCLUSIVE:
973	case LK_SHARED:
974		vnflags = LK_SHARED;
975		break;
976	case LK_UPGRADE:
977	case LK_EXCLUPGRADE:
978	case LK_DOWNGRADE:
979		return (0);
980	case LK_RELEASE:
981	default:
982		panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
983	}
984	if (flags & LK_INTERLOCK)
985		vnflags |= LK_INTERLOCK;
986	return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
987#else /* for now */
988	/*
989	 * Since we are not using the lock manager, we must clear
990	 * the interlock here.
991	 */
992	if (ap->a_flags & LK_INTERLOCK) {
993		simple_unlock(&ap->a_vp->v_interlock);
994	}
995	return (0);
996#endif
997}
998
999/*
1000 * Do the inverse of vop_nolock, handling the interlock in a compatible way.
1001 */
1002int
1003vop_nounlock(ap)
1004	struct vop_unlock_args /* {
1005		struct vnode *a_vp;
1006		int a_flags;
1007		struct proc *a_p;
1008	} */ *ap;
1009{
1010	struct vnode *vp = ap->a_vp;
1011
1012	if (vp->v_vnlock == NULL) {
1013		if (ap->a_flags & LK_INTERLOCK)
1014			simple_unlock(&ap->a_vp->v_interlock);
1015		return (0);
1016	}
1017	return (lockmgr(vp->v_vnlock, LK_RELEASE | ap->a_flags,
1018		&ap->a_vp->v_interlock, ap->a_p));
1019}
1020
1021/*
1022 * Return whether or not the node is in use.
1023 */
1024int
1025vop_noislocked(ap)
1026	struct vop_islocked_args /* {
1027		struct vnode *a_vp;
1028	} */ *ap;
1029{
1030	struct vnode *vp = ap->a_vp;
1031
1032	if (vp->v_vnlock == NULL)
1033		return (0);
1034	return (lockstatus(vp->v_vnlock));
1035}
1036
1037/* #ifdef DIAGNOSTIC */
1038/*
1039 * Vnode reference, just increment the count
1040 */
1041void
1042vref(vp)
1043	struct vnode *vp;
1044{
1045	simple_lock(&vp->v_interlock);
1046	if (vp->v_usecount <= 0)
1047		panic("vref used where vget required");
1048
1049	vp->v_usecount++;
1050
1051	if ((vp->v_type == VREG) &&
1052		((vp->v_object == NULL) ||
1053			((vp->v_object->flags & OBJ_VFS_REF) == 0) ||
1054			(vp->v_object->flags & OBJ_DEAD))) {
1055		/*
1056		 * We need to lock to VP during the time that
1057		 * the object is created.  This is necessary to
1058		 * keep the system from re-entrantly doing it
1059		 * multiple times.
1060		 * XXX vfs_object_create probably needs the interlock?
1061		 */
1062		simple_unlock(&vp->v_interlock);
1063		vfs_object_create(vp, curproc, curproc->p_ucred, 0);
1064		return;
1065	}
1066	simple_unlock(&vp->v_interlock);
1067}
1068
1069/*
1070 * Vnode put/release.
1071 * If count drops to zero, call inactive routine and return to freelist.
1072 */
1073static void
1074vputrele(vp, put)
1075	struct vnode *vp;
1076	int put;
1077{
1078	struct proc *p = curproc;	/* XXX */
1079
1080#ifdef DIAGNOSTIC
1081	if (vp == NULL)
1082		panic("vputrele: null vp");
1083#endif
1084	simple_lock(&vp->v_interlock);
1085
1086	if ((vp->v_usecount == 2) &&
1087		vp->v_object &&
1088		(vp->v_object->flags & OBJ_VFS_REF)) {
1089		vp->v_usecount--;
1090		vp->v_object->flags &= ~OBJ_VFS_REF;
1091		if (put) {
1092			VOP_UNLOCK(vp, LK_INTERLOCK, p);
1093		} else {
1094			simple_unlock(&vp->v_interlock);
1095		}
1096		vm_object_deallocate(vp->v_object);
1097		return;
1098	}
1099
1100	if (vp->v_usecount > 1) {
1101		vp->v_usecount--;
1102		if (put) {
1103			VOP_UNLOCK(vp, LK_INTERLOCK, p);
1104		} else {
1105			simple_unlock(&vp->v_interlock);
1106		}
1107		return;
1108	}
1109
1110	if (vp->v_usecount < 1) {
1111#ifdef DIAGNOSTIC
1112		vprint("vputrele: negative ref count", vp);
1113#endif
1114		panic("vputrele: negative ref cnt");
1115	}
1116
1117	vp->v_usecount--;
1118	if (VSHOULDFREE(vp))
1119		vfree(vp);
1120	/*
1121	 * If we are doing a vput, the node is already locked, and we must
1122	 * call VOP_INACTIVE with the node locked.  So, in the case of
1123	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
1124	 */
1125	if (put) {
1126		simple_unlock(&vp->v_interlock);
1127		VOP_INACTIVE(vp, p);
1128	} else if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1129		VOP_INACTIVE(vp, p);
1130	}
1131}
1132
1133/*
1134 * vput(), just unlock and vrele()
1135 */
1136void
1137vput(vp)
1138	struct vnode *vp;
1139{
1140	vputrele(vp, 1);
1141}
1142
1143void
1144vrele(vp)
1145	struct vnode *vp;
1146{
1147	vputrele(vp, 0);
1148}
1149
1150/*
1151 * Somebody doesn't want the vnode recycled.
1152 */
1153void
1154vhold(vp)
1155	register struct vnode *vp;
1156{
1157
1158	simple_lock(&vp->v_interlock);
1159	vp->v_holdcnt++;
1160	if (VSHOULDBUSY(vp))
1161		vbusy(vp);
1162	simple_unlock(&vp->v_interlock);
1163}
1164
1165/*
1166 * One less who cares about this vnode.
1167 */
1168void
1169vdrop(vp)
1170	register struct vnode *vp;
1171{
1172
1173	simple_lock(&vp->v_interlock);
1174	if (vp->v_holdcnt <= 0)
1175		panic("holdrele: holdcnt");
1176	vp->v_holdcnt--;
1177	if (VSHOULDFREE(vp))
1178		vfree(vp);
1179	simple_unlock(&vp->v_interlock);
1180}
1181
1182/*
1183 * Remove any vnodes in the vnode table belonging to mount point mp.
1184 *
1185 * If MNT_NOFORCE is specified, there should not be any active ones,
1186 * return error if any are found (nb: this is a user error, not a
1187 * system error). If MNT_FORCE is specified, detach any active vnodes
1188 * that are found.
1189 */
1190#ifdef DIAGNOSTIC
1191static int busyprt = 0;		/* print out busy vnodes */
1192SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
1193#endif
1194
1195int
1196vflush(mp, skipvp, flags)
1197	struct mount *mp;
1198	struct vnode *skipvp;
1199	int flags;
1200{
1201	struct proc *p = curproc;	/* XXX */
1202	struct vnode *vp, *nvp;
1203	int busy = 0;
1204
1205	simple_lock(&mntvnode_slock);
1206loop:
1207	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1208		/*
1209		 * Make sure this vnode wasn't reclaimed in getnewvnode().
1210		 * Start over if it has (it won't be on the list anymore).
1211		 */
1212		if (vp->v_mount != mp)
1213			goto loop;
1214		nvp = vp->v_mntvnodes.le_next;
1215		/*
1216		 * Skip over a selected vnode.
1217		 */
1218		if (vp == skipvp)
1219			continue;
1220
1221		simple_lock(&vp->v_interlock);
1222		/*
1223		 * Skip over a vnodes marked VSYSTEM.
1224		 */
1225		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1226			simple_unlock(&vp->v_interlock);
1227			continue;
1228		}
1229		/*
1230		 * If WRITECLOSE is set, only flush out regular file vnodes
1231		 * open for writing.
1232		 */
1233		if ((flags & WRITECLOSE) &&
1234		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1235			simple_unlock(&vp->v_interlock);
1236			continue;
1237		}
1238
1239		/*
1240		 * With v_usecount == 0, all we need to do is clear out the
1241		 * vnode data structures and we are done.
1242		 */
1243		if (vp->v_usecount == 0) {
1244			simple_unlock(&mntvnode_slock);
1245			vgonel(vp, p);
1246			simple_lock(&mntvnode_slock);
1247			continue;
1248		}
1249
1250		/*
1251		 * If FORCECLOSE is set, forcibly close the vnode. For block
1252		 * or character devices, revert to an anonymous device. For
1253		 * all other files, just kill them.
1254		 */
1255		if (flags & FORCECLOSE) {
1256			simple_unlock(&mntvnode_slock);
1257			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1258				vgonel(vp, p);
1259			} else {
1260				vclean(vp, 0, p);
1261				vp->v_op = spec_vnodeop_p;
1262				insmntque(vp, (struct mount *) 0);
1263			}
1264			simple_lock(&mntvnode_slock);
1265			continue;
1266		}
1267#ifdef DIAGNOSTIC
1268		if (busyprt)
1269			vprint("vflush: busy vnode", vp);
1270#endif
1271		simple_unlock(&vp->v_interlock);
1272		busy++;
1273	}
1274	simple_unlock(&mntvnode_slock);
1275	if (busy)
1276		return (EBUSY);
1277	return (0);
1278}
1279
1280/*
1281 * Disassociate the underlying file system from a vnode.
1282 */
1283static void
1284vclean(struct vnode *vp, int flags, struct proc *p)
1285{
1286	int active, irefed;
1287	vm_object_t object;
1288
1289	/*
1290	 * Check to see if the vnode is in use. If so we have to reference it
1291	 * before we clean it out so that its count cannot fall to zero and
1292	 * generate a race against ourselves to recycle it.
1293	 */
1294	if ((active = vp->v_usecount))
1295		vp->v_usecount++;
1296	/*
1297	 * Prevent the vnode from being recycled or brought into use while we
1298	 * clean it out.
1299	 */
1300	if (vp->v_flag & VXLOCK)
1301		panic("vclean: deadlock");
1302	vp->v_flag |= VXLOCK;
1303	/*
1304	 * Even if the count is zero, the VOP_INACTIVE routine may still
1305	 * have the object locked while it cleans it out. The VOP_LOCK
1306	 * ensures that the VOP_INACTIVE routine is done with its work.
1307	 * For active vnodes, it ensures that no other activity can
1308	 * occur while the underlying object is being cleaned out.
1309	 */
1310	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1311
1312	object = vp->v_object;
1313	irefed = 0;
1314	if (object && ((object->flags & OBJ_DEAD) == 0)) {
1315		if (object->ref_count == 0) {
1316			vm_object_reference(object);
1317			irefed = 1;
1318		}
1319		++object->ref_count;
1320		pager_cache(object, FALSE);
1321	}
1322
1323	/*
1324	 * Clean out any buffers associated with the vnode.
1325	 */
1326	if (flags & DOCLOSE)
1327		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1328
1329	if (irefed) {
1330		vm_object_deallocate(object);
1331	}
1332
1333	/*
1334	 * If purging an active vnode, it must be closed and
1335	 * deactivated before being reclaimed. Note that the
1336	 * VOP_INACTIVE will unlock the vnode.
1337	 */
1338	if (active) {
1339		if (flags & DOCLOSE)
1340			VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1341		VOP_INACTIVE(vp, p);
1342	} else {
1343		/*
1344		 * Any other processes trying to obtain this lock must first
1345		 * wait for VXLOCK to clear, then call the new lock operation.
1346		 */
1347		VOP_UNLOCK(vp, 0, p);
1348	}
1349	/*
1350	 * Reclaim the vnode.
1351	 */
1352	if (VOP_RECLAIM(vp, p))
1353		panic("vclean: cannot reclaim");
1354	if (active)
1355		vrele(vp);
1356	cache_purge(vp);
1357	if (vp->v_vnlock) {
1358#ifdef DIAGNOSTIC
1359		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1360			vprint("vclean: lock not drained", vp);
1361#endif
1362		FREE(vp->v_vnlock, M_VNODE);
1363		vp->v_vnlock = NULL;
1364	}
1365
1366	/*
1367	 * Done with purge, notify sleepers of the grim news.
1368	 */
1369	vp->v_op = dead_vnodeop_p;
1370	vp->v_tag = VT_NON;
1371	vp->v_flag &= ~VXLOCK;
1372	if (vp->v_flag & VXWANT) {
1373		vp->v_flag &= ~VXWANT;
1374		wakeup((caddr_t) vp);
1375	}
1376}
1377
1378/*
1379 * Eliminate all activity associated with the requested vnode
1380 * and with all vnodes aliased to the requested vnode.
1381 */
1382int
1383vop_revoke(ap)
1384	struct vop_revoke_args /* {
1385		struct vnode *a_vp;
1386		int a_flags;
1387	} */ *ap;
1388{
1389	struct vnode *vp, *vq;
1390	struct proc *p = curproc;	/* XXX */
1391
1392#ifdef DIAGNOSTIC
1393	if ((ap->a_flags & REVOKEALL) == 0)
1394		panic("vop_revoke");
1395#endif
1396
1397	vp = ap->a_vp;
1398	simple_lock(&vp->v_interlock);
1399
1400	if (vp->v_flag & VALIASED) {
1401		/*
1402		 * If a vgone (or vclean) is already in progress,
1403		 * wait until it is done and return.
1404		 */
1405		if (vp->v_flag & VXLOCK) {
1406			vp->v_flag |= VXWANT;
1407			simple_unlock(&vp->v_interlock);
1408			tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1409			return (0);
1410		}
1411		/*
1412		 * Ensure that vp will not be vgone'd while we
1413		 * are eliminating its aliases.
1414		 */
1415		vp->v_flag |= VXLOCK;
1416		simple_unlock(&vp->v_interlock);
1417		while (vp->v_flag & VALIASED) {
1418			simple_lock(&spechash_slock);
1419			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1420				if (vq->v_rdev != vp->v_rdev ||
1421				    vq->v_type != vp->v_type || vp == vq)
1422					continue;
1423				simple_unlock(&spechash_slock);
1424				vgone(vq);
1425				break;
1426			}
1427			if (vq == NULLVP) {
1428				simple_unlock(&spechash_slock);
1429			}
1430		}
1431		/*
1432		 * Remove the lock so that vgone below will
1433		 * really eliminate the vnode after which time
1434		 * vgone will awaken any sleepers.
1435		 */
1436		simple_lock(&vp->v_interlock);
1437		vp->v_flag &= ~VXLOCK;
1438	}
1439	vgonel(vp, p);
1440	return (0);
1441}
1442
1443/*
1444 * Recycle an unused vnode to the front of the free list.
1445 * Release the passed interlock if the vnode will be recycled.
1446 */
1447int
1448vrecycle(vp, inter_lkp, p)
1449	struct vnode *vp;
1450	struct simplelock *inter_lkp;
1451	struct proc *p;
1452{
1453
1454	simple_lock(&vp->v_interlock);
1455	if (vp->v_usecount == 0) {
1456		if (inter_lkp) {
1457			simple_unlock(inter_lkp);
1458		}
1459		vgonel(vp, p);
1460		return (1);
1461	}
1462	simple_unlock(&vp->v_interlock);
1463	return (0);
1464}
1465
1466/*
1467 * Eliminate all activity associated with a vnode
1468 * in preparation for reuse.
1469 */
1470void
1471vgone(vp)
1472	register struct vnode *vp;
1473{
1474	struct proc *p = curproc;	/* XXX */
1475
1476	simple_lock(&vp->v_interlock);
1477	vgonel(vp, p);
1478}
1479
1480/*
1481 * vgone, with the vp interlock held.
1482 */
1483static void
1484vgonel(vp, p)
1485	struct vnode *vp;
1486	struct proc *p;
1487{
1488	struct vnode *vq;
1489	struct vnode *vx;
1490
1491	/*
1492	 * If a vgone (or vclean) is already in progress,
1493	 * wait until it is done and return.
1494	 */
1495	if (vp->v_flag & VXLOCK) {
1496		vp->v_flag |= VXWANT;
1497		simple_unlock(&vp->v_interlock);
1498		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1499		return;
1500	}
1501
1502	if (vp->v_object) {
1503		vp->v_object->flags |= OBJ_VNODE_GONE;
1504	}
1505
1506	/*
1507	 * Clean out the filesystem specific data.
1508	 */
1509	vclean(vp, DOCLOSE, p);
1510	/*
1511	 * Delete from old mount point vnode list, if on one.
1512	 */
1513	if (vp->v_mount != NULL)
1514		insmntque(vp, (struct mount *)0);
1515	/*
1516	 * If special device, remove it from special device alias list
1517	 * if it is on one.
1518	 */
1519	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1520		simple_lock(&spechash_slock);
1521		if (*vp->v_hashchain == vp) {
1522			*vp->v_hashchain = vp->v_specnext;
1523		} else {
1524			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1525				if (vq->v_specnext != vp)
1526					continue;
1527				vq->v_specnext = vp->v_specnext;
1528				break;
1529			}
1530			if (vq == NULL)
1531				panic("missing bdev");
1532		}
1533		if (vp->v_flag & VALIASED) {
1534			vx = NULL;
1535			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1536				if (vq->v_rdev != vp->v_rdev ||
1537				    vq->v_type != vp->v_type)
1538					continue;
1539				if (vx)
1540					break;
1541				vx = vq;
1542			}
1543			if (vx == NULL)
1544				panic("missing alias");
1545			if (vq == NULL)
1546				vx->v_flag &= ~VALIASED;
1547			vp->v_flag &= ~VALIASED;
1548		}
1549		simple_unlock(&spechash_slock);
1550		FREE(vp->v_specinfo, M_VNODE);
1551		vp->v_specinfo = NULL;
1552	}
1553
1554	/*
1555	 * If it is on the freelist and not already at the head,
1556	 * move it to the head of the list. The test of the back
1557	 * pointer and the reference count of zero is because
1558	 * it will be removed from the free list by getnewvnode,
1559	 * but will not have its reference count incremented until
1560	 * after calling vgone. If the reference count were
1561	 * incremented first, vgone would (incorrectly) try to
1562	 * close the previous instance of the underlying object.
1563	 */
1564	if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
1565		simple_lock(&vnode_free_list_slock);
1566		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1567		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1568		simple_unlock(&vnode_free_list_slock);
1569	}
1570
1571	vp->v_type = VBAD;
1572}
1573
1574/*
1575 * Lookup a vnode by device number.
1576 */
1577int
1578vfinddev(dev, type, vpp)
1579	dev_t dev;
1580	enum vtype type;
1581	struct vnode **vpp;
1582{
1583	register struct vnode *vp;
1584	int rc = 0;
1585
1586	simple_lock(&spechash_slock);
1587	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1588		if (dev != vp->v_rdev || type != vp->v_type)
1589			continue;
1590		*vpp = vp;
1591		rc = 1;
1592		break;
1593	}
1594	simple_unlock(&spechash_slock);
1595	return (rc);
1596}
1597
1598/*
1599 * Calculate the total number of references to a special device.
1600 */
1601int
1602vcount(vp)
1603	register struct vnode *vp;
1604{
1605	struct vnode *vq, *vnext;
1606	int count;
1607
1608loop:
1609	if ((vp->v_flag & VALIASED) == 0)
1610		return (vp->v_usecount);
1611	simple_lock(&spechash_slock);
1612	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1613		vnext = vq->v_specnext;
1614		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1615			continue;
1616		/*
1617		 * Alias, but not in use, so flush it out.
1618		 */
1619		if (vq->v_usecount == 0 && vq != vp) {
1620			simple_unlock(&spechash_slock);
1621			vgone(vq);
1622			goto loop;
1623		}
1624		count += vq->v_usecount;
1625	}
1626	simple_unlock(&spechash_slock);
1627	return (count);
1628}
1629
1630/*
1631 * Return true for select/poll.
1632 */
1633int
1634vop_nopoll(ap)
1635	struct vop_poll_args /* {
1636		struct vnode *a_vp;
1637		int  a_events;
1638		struct ucred *a_cred;
1639		struct proc *a_p;
1640	} */ *ap;
1641{
1642
1643	/*
1644	 * Just return what we were asked for.
1645	 */
1646	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1647}
1648
1649/*
1650 * Print out a description of a vnode.
1651 */
1652static char *typename[] =
1653{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
1654
1655void
1656vprint(label, vp)
1657	char *label;
1658	register struct vnode *vp;
1659{
1660	char buf[64];
1661
1662	if (label != NULL)
1663		printf("%s: %x: ", label, vp);
1664	else
1665		printf("%x: ", vp);
1666	printf("type %s, usecount %d, writecount %d, refcount %ld,",
1667	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1668	    vp->v_holdcnt);
1669	buf[0] = '\0';
1670	if (vp->v_flag & VROOT)
1671		strcat(buf, "|VROOT");
1672	if (vp->v_flag & VTEXT)
1673		strcat(buf, "|VTEXT");
1674	if (vp->v_flag & VSYSTEM)
1675		strcat(buf, "|VSYSTEM");
1676	if (vp->v_flag & VXLOCK)
1677		strcat(buf, "|VXLOCK");
1678	if (vp->v_flag & VXWANT)
1679		strcat(buf, "|VXWANT");
1680	if (vp->v_flag & VBWAIT)
1681		strcat(buf, "|VBWAIT");
1682	if (vp->v_flag & VALIASED)
1683		strcat(buf, "|VALIASED");
1684	if (vp->v_flag & VDOOMED)
1685		strcat(buf, "|VDOOMED");
1686	if (vp->v_flag & VFREE)
1687		strcat(buf, "|VFREE");
1688	if (buf[0] != '\0')
1689		printf(" flags (%s)", &buf[1]);
1690	if (vp->v_data == NULL) {
1691		printf("\n");
1692	} else {
1693		printf("\n\t");
1694		VOP_PRINT(vp);
1695	}
1696}
1697
1698#ifdef DDB
1699/*
1700 * List all of the locked vnodes in the system.
1701 * Called when debugging the kernel.
1702 */
1703void
1704printlockedvnodes()
1705{
1706	struct proc *p = curproc;	/* XXX */
1707	struct mount *mp, *nmp;
1708	struct vnode *vp;
1709
1710	printf("Locked vnodes\n");
1711	simple_lock(&mountlist_slock);
1712	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1713		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1714			nmp = mp->mnt_list.cqe_next;
1715			continue;
1716		}
1717		for (vp = mp->mnt_vnodelist.lh_first;
1718		     vp != NULL;
1719		     vp = vp->v_mntvnodes.le_next) {
1720			if (VOP_ISLOCKED(vp))
1721				vprint((char *)0, vp);
1722		}
1723		simple_lock(&mountlist_slock);
1724		nmp = mp->mnt_list.cqe_next;
1725		vfs_unbusy(mp, p);
1726	}
1727	simple_unlock(&mountlist_slock);
1728}
1729#endif
1730
1731/*
1732 * Top level filesystem related information gathering.
1733 */
1734static int	sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS);
1735
1736static int
1737vfs_sysctl SYSCTL_HANDLER_ARGS
1738{
1739	int *name = (int *)arg1 - 1;	/* XXX */
1740	u_int namelen = arg2 + 1;	/* XXX */
1741	struct vfsconf *vfsp;
1742
1743#ifndef NO_COMPAT_PRELITE2
1744	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
1745	if (namelen == 1)
1746		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
1747#endif
1748
1749#ifdef notyet
1750	/* all sysctl names at this level are at least name and field */
1751	if (namelen < 2)
1752		return (ENOTDIR);		/* overloaded */
1753	if (name[0] != VFS_GENERIC) {
1754		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1755			if (vfsp->vfc_typenum == name[0])
1756				break;
1757		if (vfsp == NULL)
1758			return (EOPNOTSUPP);
1759		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1760		    oldp, oldlenp, newp, newlen, p));
1761	}
1762#endif
1763	switch (name[1]) {
1764	case VFS_MAXTYPENUM:
1765		if (namelen != 2)
1766			return (ENOTDIR);
1767		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
1768	case VFS_CONF:
1769		if (namelen != 3)
1770			return (ENOTDIR);	/* overloaded */
1771		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1772			if (vfsp->vfc_typenum == name[2])
1773				break;
1774		if (vfsp == NULL)
1775			return (EOPNOTSUPP);
1776		return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
1777	}
1778	return (EOPNOTSUPP);
1779}
1780
1781SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
1782	"Generic filesystem");
1783
1784#ifndef NO_COMPAT_PRELITE2
1785
1786static int
1787sysctl_ovfs_conf SYSCTL_HANDLER_ARGS
1788{
1789	int error;
1790	struct vfsconf *vfsp;
1791	struct ovfsconf ovfs;
1792
1793	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
1794		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
1795		strcpy(ovfs.vfc_name, vfsp->vfc_name);
1796		ovfs.vfc_index = vfsp->vfc_typenum;
1797		ovfs.vfc_refcount = vfsp->vfc_refcount;
1798		ovfs.vfc_flags = vfsp->vfc_flags;
1799		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
1800		if (error)
1801			return error;
1802	}
1803	return 0;
1804}
1805
1806#endif /* !NO_COMPAT_PRELITE2 */
1807
1808int kinfo_vdebug = 1;
1809int kinfo_vgetfailed;
1810
1811#define KINFO_VNODESLOP	10
1812/*
1813 * Dump vnode list (via sysctl).
1814 * Copyout address of vnode followed by vnode.
1815 */
1816/* ARGSUSED */
1817static int
1818sysctl_vnode SYSCTL_HANDLER_ARGS
1819{
1820	struct proc *p = curproc;	/* XXX */
1821	struct mount *mp, *nmp;
1822	struct vnode *nvp, *vp;
1823	int error;
1824
1825#define VPTRSZ	sizeof (struct vnode *)
1826#define VNODESZ	sizeof (struct vnode)
1827
1828	req->lock = 0;
1829	if (!req->oldptr) /* Make an estimate */
1830		return (SYSCTL_OUT(req, 0,
1831			(numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
1832
1833	simple_lock(&mountlist_slock);
1834	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1835		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1836			nmp = mp->mnt_list.cqe_next;
1837			continue;
1838		}
1839again:
1840		simple_lock(&mntvnode_slock);
1841		for (vp = mp->mnt_vnodelist.lh_first;
1842		     vp != NULL;
1843		     vp = nvp) {
1844			/*
1845			 * Check that the vp is still associated with
1846			 * this filesystem.  RACE: could have been
1847			 * recycled onto the same filesystem.
1848			 */
1849			if (vp->v_mount != mp) {
1850				simple_unlock(&mntvnode_slock);
1851				if (kinfo_vdebug)
1852					printf("kinfo: vp changed\n");
1853				goto again;
1854			}
1855			nvp = vp->v_mntvnodes.le_next;
1856			simple_unlock(&mntvnode_slock);
1857			if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
1858			    (error = SYSCTL_OUT(req, vp, VNODESZ)))
1859				return (error);
1860			simple_lock(&mntvnode_slock);
1861		}
1862		simple_unlock(&mntvnode_slock);
1863		simple_lock(&mountlist_slock);
1864		nmp = mp->mnt_list.cqe_next;
1865		vfs_unbusy(mp, p);
1866	}
1867	simple_unlock(&mountlist_slock);
1868
1869	return (0);
1870}
1871
1872/*
1873 * XXX
1874 * Exporting the vnode list on large systems causes them to crash.
1875 * Exporting the vnode list on medium systems causes sysctl to coredump.
1876 */
1877#if 0
1878SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
1879	0, 0, sysctl_vnode, "S,vnode", "");
1880#endif
1881
1882/*
1883 * Check to see if a filesystem is mounted on a block device.
1884 */
1885int
1886vfs_mountedon(vp)
1887	struct vnode *vp;
1888{
1889	struct vnode *vq;
1890	int error = 0;
1891
1892	if (vp->v_specflags & SI_MOUNTEDON)
1893		return (EBUSY);
1894	if (vp->v_flag & VALIASED) {
1895		simple_lock(&spechash_slock);
1896		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1897			if (vq->v_rdev != vp->v_rdev ||
1898			    vq->v_type != vp->v_type)
1899				continue;
1900			if (vq->v_specflags & SI_MOUNTEDON) {
1901				error = EBUSY;
1902				break;
1903			}
1904		}
1905		simple_unlock(&spechash_slock);
1906	}
1907	return (error);
1908}
1909
1910/*
1911 * Unmount all filesystems. The list is traversed in reverse order
1912 * of mounting to avoid dependencies.
1913 */
1914void
1915vfs_unmountall()
1916{
1917	struct mount *mp, *nmp;
1918	struct proc *p = initproc;	/* XXX XXX should this be proc0? */
1919	int error;
1920
1921	/*
1922	 * Since this only runs when rebooting, it is not interlocked.
1923	 */
1924	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1925		nmp = mp->mnt_list.cqe_prev;
1926		error = dounmount(mp, MNT_FORCE, p);
1927		if (error) {
1928			printf("unmount of %s failed (",
1929			    mp->mnt_stat.f_mntonname);
1930			if (error == EBUSY)
1931				printf("BUSY)\n");
1932			else
1933				printf("%d)\n", error);
1934		}
1935	}
1936}
1937
1938/*
1939 * Build hash lists of net addresses and hang them off the mount point.
1940 * Called by ufs_mount() to set up the lists of export addresses.
1941 */
1942static int
1943vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
1944	struct export_args *argp)
1945{
1946	register struct netcred *np;
1947	register struct radix_node_head *rnh;
1948	register int i;
1949	struct radix_node *rn;
1950	struct sockaddr *saddr, *smask = 0;
1951	struct domain *dom;
1952	int error;
1953
1954	if (argp->ex_addrlen == 0) {
1955		if (mp->mnt_flag & MNT_DEFEXPORTED)
1956			return (EPERM);
1957		np = &nep->ne_defexported;
1958		np->netc_exflags = argp->ex_flags;
1959		np->netc_anon = argp->ex_anon;
1960		np->netc_anon.cr_ref = 1;
1961		mp->mnt_flag |= MNT_DEFEXPORTED;
1962		return (0);
1963	}
1964	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1965	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
1966	bzero((caddr_t) np, i);
1967	saddr = (struct sockaddr *) (np + 1);
1968	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
1969		goto out;
1970	if (saddr->sa_len > argp->ex_addrlen)
1971		saddr->sa_len = argp->ex_addrlen;
1972	if (argp->ex_masklen) {
1973		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
1974		error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
1975		if (error)
1976			goto out;
1977		if (smask->sa_len > argp->ex_masklen)
1978			smask->sa_len = argp->ex_masklen;
1979	}
1980	i = saddr->sa_family;
1981	if ((rnh = nep->ne_rtable[i]) == 0) {
1982		/*
1983		 * Seems silly to initialize every AF when most are not used,
1984		 * do so on demand here
1985		 */
1986		for (dom = domains; dom; dom = dom->dom_next)
1987			if (dom->dom_family == i && dom->dom_rtattach) {
1988				dom->dom_rtattach((void **) &nep->ne_rtable[i],
1989				    dom->dom_rtoffset);
1990				break;
1991			}
1992		if ((rnh = nep->ne_rtable[i]) == 0) {
1993			error = ENOBUFS;
1994			goto out;
1995		}
1996	}
1997	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
1998	    np->netc_rnodes);
1999	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
2000		error = EPERM;
2001		goto out;
2002	}
2003	np->netc_exflags = argp->ex_flags;
2004	np->netc_anon = argp->ex_anon;
2005	np->netc_anon.cr_ref = 1;
2006	return (0);
2007out:
2008	free(np, M_NETADDR);
2009	return (error);
2010}
2011
2012/* ARGSUSED */
2013static int
2014vfs_free_netcred(struct radix_node *rn, void *w)
2015{
2016	register struct radix_node_head *rnh = (struct radix_node_head *) w;
2017
2018	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
2019	free((caddr_t) rn, M_NETADDR);
2020	return (0);
2021}
2022
2023/*
2024 * Free the net address hash lists that are hanging off the mount points.
2025 */
2026static void
2027vfs_free_addrlist(struct netexport *nep)
2028{
2029	register int i;
2030	register struct radix_node_head *rnh;
2031
2032	for (i = 0; i <= AF_MAX; i++)
2033		if ((rnh = nep->ne_rtable[i])) {
2034			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
2035			    (caddr_t) rnh);
2036			free((caddr_t) rnh, M_RTABLE);
2037			nep->ne_rtable[i] = 0;
2038		}
2039}
2040
2041int
2042vfs_export(mp, nep, argp)
2043	struct mount *mp;
2044	struct netexport *nep;
2045	struct export_args *argp;
2046{
2047	int error;
2048
2049	if (argp->ex_flags & MNT_DELEXPORT) {
2050		if (mp->mnt_flag & MNT_EXPUBLIC) {
2051			vfs_setpublicfs(NULL, NULL, NULL);
2052			mp->mnt_flag &= ~MNT_EXPUBLIC;
2053		}
2054		vfs_free_addrlist(nep);
2055		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2056	}
2057	if (argp->ex_flags & MNT_EXPORTED) {
2058		if (argp->ex_flags & MNT_EXPUBLIC) {
2059			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
2060				return (error);
2061			mp->mnt_flag |= MNT_EXPUBLIC;
2062		}
2063		if ((error = vfs_hang_addrlist(mp, nep, argp)))
2064			return (error);
2065		mp->mnt_flag |= MNT_EXPORTED;
2066	}
2067	return (0);
2068}
2069
2070
2071/*
2072 * Set the publicly exported filesystem (WebNFS). Currently, only
2073 * one public filesystem is possible in the spec (RFC 2054 and 2055)
2074 */
2075int
2076vfs_setpublicfs(mp, nep, argp)
2077	struct mount *mp;
2078	struct netexport *nep;
2079	struct export_args *argp;
2080{
2081	int error;
2082	struct vnode *rvp;
2083	char *cp;
2084
2085	/*
2086	 * mp == NULL -> invalidate the current info, the FS is
2087	 * no longer exported. May be called from either vfs_export
2088	 * or unmount, so check if it hasn't already been done.
2089	 */
2090	if (mp == NULL) {
2091		if (nfs_pub.np_valid) {
2092			nfs_pub.np_valid = 0;
2093			if (nfs_pub.np_index != NULL) {
2094				FREE(nfs_pub.np_index, M_TEMP);
2095				nfs_pub.np_index = NULL;
2096			}
2097		}
2098		return (0);
2099	}
2100
2101	/*
2102	 * Only one allowed at a time.
2103	 */
2104	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
2105		return (EBUSY);
2106
2107	/*
2108	 * Get real filehandle for root of exported FS.
2109	 */
2110	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
2111	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
2112
2113	if ((error = VFS_ROOT(mp, &rvp)))
2114		return (error);
2115
2116	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
2117		return (error);
2118
2119	vput(rvp);
2120
2121	/*
2122	 * If an indexfile was specified, pull it in.
2123	 */
2124	if (argp->ex_indexfile != NULL) {
2125		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2126		    M_WAITOK);
2127		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2128		    MAXNAMLEN, (size_t *)0);
2129		if (!error) {
2130			/*
2131			 * Check for illegal filenames.
2132			 */
2133			for (cp = nfs_pub.np_index; *cp; cp++) {
2134				if (*cp == '/') {
2135					error = EINVAL;
2136					break;
2137				}
2138			}
2139		}
2140		if (error) {
2141			FREE(nfs_pub.np_index, M_TEMP);
2142			return (error);
2143		}
2144	}
2145
2146	nfs_pub.np_mount = mp;
2147	nfs_pub.np_valid = 1;
2148	return (0);
2149}
2150
2151struct netcred *
2152vfs_export_lookup(mp, nep, nam)
2153	register struct mount *mp;
2154	struct netexport *nep;
2155	struct sockaddr *nam;
2156{
2157	register struct netcred *np;
2158	register struct radix_node_head *rnh;
2159	struct sockaddr *saddr;
2160
2161	np = NULL;
2162	if (mp->mnt_flag & MNT_EXPORTED) {
2163		/*
2164		 * Lookup in the export list first.
2165		 */
2166		if (nam != NULL) {
2167			saddr = nam;
2168			rnh = nep->ne_rtable[saddr->sa_family];
2169			if (rnh != NULL) {
2170				np = (struct netcred *)
2171					(*rnh->rnh_matchaddr)((caddr_t)saddr,
2172							      rnh);
2173				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2174					np = NULL;
2175			}
2176		}
2177		/*
2178		 * If no address match, use the default if it exists.
2179		 */
2180		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2181			np = &nep->ne_defexported;
2182	}
2183	return (np);
2184}
2185
2186/*
2187 * perform msync on all vnodes under a mount point
2188 * the mount point must be locked.
2189 */
2190void
2191vfs_msync(struct mount *mp, int flags) {
2192	struct vnode *vp, *nvp;
2193loop:
2194	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
2195
2196		if (vp->v_mount != mp)
2197			goto loop;
2198		nvp = vp->v_mntvnodes.le_next;
2199		if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))
2200			continue;
2201		if (vp->v_object &&
2202		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
2203			vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE);
2204		}
2205	}
2206}
2207
2208/*
2209 * Create the VM object needed for VMIO and mmap support.  This
2210 * is done for all VREG files in the system.  Some filesystems might
2211 * afford the additional metadata buffering capability of the
2212 * VMIO code by making the device node be VMIO mode also.
2213 */
2214int
2215vfs_object_create(vp, p, cred, waslocked)
2216	struct vnode *vp;
2217	struct proc *p;
2218	struct ucred *cred;
2219	int waslocked;
2220{
2221	struct vattr vat;
2222	vm_object_t object;
2223	int error = 0;
2224
2225retry:
2226	if ((object = vp->v_object) == NULL) {
2227		if (vp->v_type == VREG) {
2228			if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
2229				goto retn;
2230			(void) vnode_pager_alloc(vp,
2231				OFF_TO_IDX(round_page(vat.va_size)), 0, 0);
2232		} else {
2233			/*
2234			 * This simply allocates the biggest object possible
2235			 * for a VBLK vnode.  This should be fixed, but doesn't
2236			 * cause any problems (yet).
2237			 */
2238			(void) vnode_pager_alloc(vp, INT_MAX, 0, 0);
2239		}
2240		vp->v_object->flags |= OBJ_VFS_REF;
2241	} else {
2242		if (object->flags & OBJ_DEAD) {
2243			if (waslocked)
2244				VOP_UNLOCK(vp, 0, p);
2245			tsleep(object, PVM, "vodead", 0);
2246			if (waslocked)
2247				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2248			goto retry;
2249		}
2250		if ((object->flags & OBJ_VFS_REF) == 0) {
2251			object->flags |= OBJ_VFS_REF;
2252			vm_object_reference(object);
2253		}
2254	}
2255	if (vp->v_object)
2256		vp->v_flag |= VVMIO;
2257
2258retn:
2259	return error;
2260}
2261
2262void
2263vfree(vp)
2264	struct vnode *vp;
2265{
2266	simple_lock(&vnode_free_list_slock);
2267	if (vp->v_flag & VAGE) {
2268		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
2269	} else {
2270		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2271	}
2272	freevnodes++;
2273	simple_unlock(&vnode_free_list_slock);
2274	vp->v_flag &= ~VAGE;
2275	vp->v_flag |= VFREE;
2276}
2277
2278void
2279vbusy(vp)
2280	struct vnode *vp;
2281{
2282	simple_lock(&vnode_free_list_slock);
2283	TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
2284	freevnodes--;
2285	simple_unlock(&vnode_free_list_slock);
2286	vp->v_flag &= ~VFREE;
2287}
2288