vfs_subr.c revision 38618
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
39 * $Id: vfs_subr.c,v 1.160 1998/08/12 20:17:42 bde Exp $
40 */
41
42/*
43 * External virtual filesystem routines
44 */
45#include "opt_ddb.h"
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/kernel.h>
50#include <sys/proc.h>
51#include <sys/malloc.h>
52#include <sys/mount.h>
53#include <sys/socket.h>
54#include <sys/vnode.h>
55#include <sys/stat.h>
56#include <sys/buf.h>
57#include <sys/domain.h>
58#include <sys/dirent.h>
59#include <sys/vmmeter.h>
60
61#include <machine/limits.h>
62
63#include <vm/vm.h>
64#include <vm/vm_object.h>
65#include <vm/vm_extern.h>
66#include <vm/pmap.h>
67#include <vm/vm_map.h>
68#include <vm/vm_pager.h>
69#include <vm/vnode_pager.h>
70#include <vm/vm_zone.h>
71#include <sys/sysctl.h>
72
73#include <miscfs/specfs/specdev.h>
74
75static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
76
77static void	insmntque __P((struct vnode *vp, struct mount *mp));
78#ifdef DDB
79static void	printlockedvnodes __P((void));
80#endif
81static void	vclean __P((struct vnode *vp, int flags, struct proc *p));
82static void	vfree __P((struct vnode *));
83static void	vgonel __P((struct vnode *vp, struct proc *p));
84static unsigned long	numvnodes;
85SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
86
87enum vtype iftovt_tab[16] = {
88	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
89	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
90};
91int vttoif_tab[9] = {
92	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
93	S_IFSOCK, S_IFIFO, S_IFMT,
94};
95
96/*
97 * Insq/Remq for the vnode usage lists.
98 */
99#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
100#define	bufremvn(bp) {							\
101	LIST_REMOVE(bp, b_vnbufs);					\
102	(bp)->b_vnbufs.le_next = NOLIST;				\
103}
104
105static TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
106struct tobefreelist vnode_tobefree_list;	/* vnode free list */
107
108static u_long wantfreevnodes = 25;
109SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "");
110static u_long freevnodes = 0;
111SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
112
113int vfs_ioopt = 0;
114#ifdef ENABLE_VFS_IOOPT
115SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, "");
116#endif
117
118struct mntlist mountlist;	/* mounted filesystem list */
119struct simplelock mountlist_slock;
120static struct simplelock mntid_slock;
121struct simplelock mntvnode_slock;
122static struct simplelock vnode_free_list_slock;
123static struct simplelock spechash_slock;
124struct nfs_public nfs_pub;	/* publicly exported FS */
125static vm_zone_t vnode_zone;
126
127/*
128 * The workitem queue.
129 */
130#define SYNCER_MAXDELAY		32
131int syncer_maxdelay =		SYNCER_MAXDELAY;	/* maximum delay time */
132time_t syncdelay =		30;
133int rushjob;				/* number of slots to run ASAP */
134
135static int syncer_delayno = 0;
136static long syncer_mask;
137LIST_HEAD(synclist, vnode);
138static struct synclist *syncer_workitem_pending;
139
140int desiredvnodes;
141SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
142
143static void	vfs_free_addrlist __P((struct netexport *nep));
144static int	vfs_free_netcred __P((struct radix_node *rn, void *w));
145static int	vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
146				       struct export_args *argp));
147
148/*
149 * Initialize the vnode management data structures.
150 */
151void
152vntblinit()
153{
154
155	desiredvnodes = maxproc + cnt.v_page_count / 4;
156	simple_lock_init(&mntvnode_slock);
157	simple_lock_init(&mntid_slock);
158	simple_lock_init(&spechash_slock);
159	TAILQ_INIT(&vnode_free_list);
160	TAILQ_INIT(&vnode_tobefree_list);
161	simple_lock_init(&vnode_free_list_slock);
162	CIRCLEQ_INIT(&mountlist);
163	vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5);
164	/*
165	 * Initialize the filesystem syncer.
166	 */
167	syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE,
168		&syncer_mask);
169	syncer_maxdelay = syncer_mask + 1;
170}
171
172/*
173 * Mark a mount point as busy. Used to synchronize access and to delay
174 * unmounting. Interlock is not released on failure.
175 */
176int
177vfs_busy(mp, flags, interlkp, p)
178	struct mount *mp;
179	int flags;
180	struct simplelock *interlkp;
181	struct proc *p;
182{
183	int lkflags;
184
185	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
186		if (flags & LK_NOWAIT)
187			return (ENOENT);
188		mp->mnt_kern_flag |= MNTK_MWAIT;
189		if (interlkp) {
190			simple_unlock(interlkp);
191		}
192		/*
193		 * Since all busy locks are shared except the exclusive
194		 * lock granted when unmounting, the only place that a
195		 * wakeup needs to be done is at the release of the
196		 * exclusive lock at the end of dounmount.
197		 */
198		tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
199		if (interlkp) {
200			simple_lock(interlkp);
201		}
202		return (ENOENT);
203	}
204	lkflags = LK_SHARED | LK_NOPAUSE;
205	if (interlkp)
206		lkflags |= LK_INTERLOCK;
207	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
208		panic("vfs_busy: unexpected lock failure");
209	return (0);
210}
211
212/*
213 * Free a busy filesystem.
214 */
215void
216vfs_unbusy(mp, p)
217	struct mount *mp;
218	struct proc *p;
219{
220
221	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
222}
223
224/*
225 * Lookup a filesystem type, and if found allocate and initialize
226 * a mount structure for it.
227 *
228 * Devname is usually updated by mount(8) after booting.
229 */
230int
231vfs_rootmountalloc(fstypename, devname, mpp)
232	char *fstypename;
233	char *devname;
234	struct mount **mpp;
235{
236	struct proc *p = curproc;	/* XXX */
237	struct vfsconf *vfsp;
238	struct mount *mp;
239
240	if (fstypename == NULL)
241		return (ENODEV);
242	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
243		if (!strcmp(vfsp->vfc_name, fstypename))
244			break;
245	if (vfsp == NULL)
246		return (ENODEV);
247	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
248	bzero((char *)mp, (u_long)sizeof(struct mount));
249	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
250	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
251	LIST_INIT(&mp->mnt_vnodelist);
252	mp->mnt_vfc = vfsp;
253	mp->mnt_op = vfsp->vfc_vfsops;
254	mp->mnt_flag = MNT_RDONLY;
255	mp->mnt_vnodecovered = NULLVP;
256	vfsp->vfc_refcount++;
257	mp->mnt_stat.f_type = vfsp->vfc_typenum;
258	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
259	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
260	mp->mnt_stat.f_mntonname[0] = '/';
261	mp->mnt_stat.f_mntonname[1] = 0;
262	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
263	*mpp = mp;
264	return (0);
265}
266
267/*
268 * Find an appropriate filesystem to use for the root. If a filesystem
269 * has not been preselected, walk through the list of known filesystems
270 * trying those that have mountroot routines, and try them until one
271 * works or we have tried them all.
272 */
273#ifdef notdef	/* XXX JH */
274int
275lite2_vfs_mountroot()
276{
277	struct vfsconf *vfsp;
278	extern int (*lite2_mountroot) __P((void));
279	int error;
280
281	if (lite2_mountroot != NULL)
282		return ((*lite2_mountroot)());
283	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
284		if (vfsp->vfc_mountroot == NULL)
285			continue;
286		if ((error = (*vfsp->vfc_mountroot)()) == 0)
287			return (0);
288		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
289	}
290	return (ENODEV);
291}
292#endif
293
294/*
295 * Lookup a mount point by filesystem identifier.
296 */
297struct mount *
298vfs_getvfs(fsid)
299	fsid_t *fsid;
300{
301	register struct mount *mp;
302
303	simple_lock(&mountlist_slock);
304	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
305	    mp = mp->mnt_list.cqe_next) {
306		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
307		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
308			simple_unlock(&mountlist_slock);
309			return (mp);
310	    }
311	}
312	simple_unlock(&mountlist_slock);
313	return ((struct mount *) 0);
314}
315
316/*
317 * Get a new unique fsid
318 */
319void
320vfs_getnewfsid(mp)
321	struct mount *mp;
322{
323	static u_short xxxfs_mntid;
324
325	fsid_t tfsid;
326	int mtype;
327
328	simple_lock(&mntid_slock);
329	mtype = mp->mnt_vfc->vfc_typenum;
330	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
331	mp->mnt_stat.f_fsid.val[1] = mtype;
332	if (xxxfs_mntid == 0)
333		++xxxfs_mntid;
334	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
335	tfsid.val[1] = mtype;
336	if (mountlist.cqh_first != (void *)&mountlist) {
337		while (vfs_getvfs(&tfsid)) {
338			tfsid.val[0]++;
339			xxxfs_mntid++;
340		}
341	}
342	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
343	simple_unlock(&mntid_slock);
344}
345
346/*
347 * Set vnode attributes to VNOVAL
348 */
349void
350vattr_null(vap)
351	register struct vattr *vap;
352{
353
354	vap->va_type = VNON;
355	vap->va_size = VNOVAL;
356	vap->va_bytes = VNOVAL;
357	vap->va_mode = VNOVAL;
358	vap->va_nlink = VNOVAL;
359	vap->va_uid = VNOVAL;
360	vap->va_gid = VNOVAL;
361	vap->va_fsid = VNOVAL;
362	vap->va_fileid = VNOVAL;
363	vap->va_blocksize = VNOVAL;
364	vap->va_rdev = VNOVAL;
365	vap->va_atime.tv_sec = VNOVAL;
366	vap->va_atime.tv_nsec = VNOVAL;
367	vap->va_mtime.tv_sec = VNOVAL;
368	vap->va_mtime.tv_nsec = VNOVAL;
369	vap->va_ctime.tv_sec = VNOVAL;
370	vap->va_ctime.tv_nsec = VNOVAL;
371	vap->va_flags = VNOVAL;
372	vap->va_gen = VNOVAL;
373	vap->va_vaflags = 0;
374}
375
376/*
377 * Routines having to do with the management of the vnode table.
378 */
379extern vop_t **dead_vnodeop_p;
380
381/*
382 * Return the next vnode from the free list.
383 */
384int
385getnewvnode(tag, mp, vops, vpp)
386	enum vtagtype tag;
387	struct mount *mp;
388	vop_t **vops;
389	struct vnode **vpp;
390{
391	int s;
392	struct proc *p = curproc;	/* XXX */
393	struct vnode *vp, *tvp, *nvp;
394	vm_object_t object;
395	TAILQ_HEAD(freelst, vnode) vnode_tmp_list;
396
397	/*
398	 * We take the least recently used vnode from the freelist
399	 * if we can get it and it has no cached pages, and no
400	 * namecache entries are relative to it.
401	 * Otherwise we allocate a new vnode
402	 */
403
404	s = splbio();
405	simple_lock(&vnode_free_list_slock);
406	TAILQ_INIT(&vnode_tmp_list);
407
408	for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) {
409		nvp = TAILQ_NEXT(vp, v_freelist);
410		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
411		if (vp->v_flag & VAGE) {
412			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
413		} else {
414			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
415		}
416		vp->v_flag &= ~(VTBFREE|VAGE);
417		vp->v_flag |= VFREE;
418		if (vp->v_usecount)
419			panic("tobe free vnode isn't");
420		freevnodes++;
421	}
422
423	if (wantfreevnodes && freevnodes < wantfreevnodes) {
424		vp = NULL;
425	} else if (!wantfreevnodes && freevnodes <= desiredvnodes) {
426		/*
427		 * XXX: this is only here to be backwards compatible
428		 */
429		vp = NULL;
430	} else {
431		for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) {
432
433			nvp = TAILQ_NEXT(vp, v_freelist);
434
435			if (!simple_lock_try(&vp->v_interlock))
436				continue;
437			if (vp->v_usecount)
438				panic("free vnode isn't");
439
440			object = vp->v_object;
441			if (object && (object->resident_page_count || object->ref_count)) {
442				printf("object inconsistant state: RPC: %d, RC: %d\n",
443					object->resident_page_count, object->ref_count);
444				/* Don't recycle if it's caching some pages */
445				TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
446				TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist);
447				continue;
448			} else if (LIST_FIRST(&vp->v_cache_src)) {
449				/* Don't recycle if active in the namecache */
450				simple_unlock(&vp->v_interlock);
451				continue;
452			} else {
453				break;
454			}
455		}
456	}
457
458	for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) {
459		nvp = TAILQ_NEXT(tvp, v_freelist);
460		TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist);
461		TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist);
462		simple_unlock(&tvp->v_interlock);
463	}
464
465	if (vp) {
466		vp->v_flag |= VDOOMED;
467		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
468		freevnodes--;
469		simple_unlock(&vnode_free_list_slock);
470		cache_purge(vp);
471		vp->v_lease = NULL;
472		if (vp->v_type != VBAD) {
473			vgonel(vp, p);
474		} else {
475			simple_unlock(&vp->v_interlock);
476		}
477
478#ifdef DIAGNOSTIC
479		{
480			int s;
481
482			if (vp->v_data)
483				panic("cleaned vnode isn't");
484			s = splbio();
485			if (vp->v_numoutput)
486				panic("Clean vnode has pending I/O's");
487			splx(s);
488		}
489#endif
490		vp->v_flag = 0;
491		vp->v_lastr = 0;
492		vp->v_lastw = 0;
493		vp->v_lasta = 0;
494		vp->v_cstart = 0;
495		vp->v_clen = 0;
496		vp->v_socket = 0;
497		vp->v_writecount = 0;	/* XXX */
498		vp->v_maxio = 0;
499	} else {
500		simple_unlock(&vnode_free_list_slock);
501		vp = (struct vnode *) zalloc(vnode_zone);
502		bzero((char *) vp, sizeof *vp);
503		simple_lock_init(&vp->v_interlock);
504		vp->v_dd = vp;
505		cache_purge(vp);
506		LIST_INIT(&vp->v_cache_src);
507		TAILQ_INIT(&vp->v_cache_dst);
508		numvnodes++;
509	}
510
511	vp->v_type = VNON;
512	vp->v_tag = tag;
513	vp->v_op = vops;
514	insmntque(vp, mp);
515	*vpp = vp;
516	vp->v_usecount = 1;
517	vp->v_data = 0;
518	splx(s);
519
520	vfs_object_create(vp, p, p->p_ucred, TRUE);
521	return (0);
522}
523
524/*
525 * Move a vnode from one mount queue to another.
526 */
527static void
528insmntque(vp, mp)
529	register struct vnode *vp;
530	register struct mount *mp;
531{
532
533	simple_lock(&mntvnode_slock);
534	/*
535	 * Delete from old mount point vnode list, if on one.
536	 */
537	if (vp->v_mount != NULL)
538		LIST_REMOVE(vp, v_mntvnodes);
539	/*
540	 * Insert into list of vnodes for the new mount point, if available.
541	 */
542	if ((vp->v_mount = mp) == NULL) {
543		simple_unlock(&mntvnode_slock);
544		return;
545	}
546	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
547	simple_unlock(&mntvnode_slock);
548}
549
550/*
551 * Update outstanding I/O count and do wakeup if requested.
552 */
553void
554vwakeup(bp)
555	register struct buf *bp;
556{
557	register struct vnode *vp;
558
559	bp->b_flags &= ~B_WRITEINPROG;
560	if ((vp = bp->b_vp)) {
561		vp->v_numoutput--;
562		if (vp->v_numoutput < 0)
563			panic("vwakeup: neg numoutput");
564		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
565			vp->v_flag &= ~VBWAIT;
566			wakeup((caddr_t) &vp->v_numoutput);
567		}
568	}
569}
570
571/*
572 * Flush out and invalidate all buffers associated with a vnode.
573 * Called with the underlying object locked.
574 */
575int
576vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
577	register struct vnode *vp;
578	int flags;
579	struct ucred *cred;
580	struct proc *p;
581	int slpflag, slptimeo;
582{
583	register struct buf *bp;
584	struct buf *nbp, *blist;
585	int s, error;
586	vm_object_t object;
587
588	if (flags & V_SAVE) {
589		s = splbio();
590		while (vp->v_numoutput) {
591			vp->v_flag |= VBWAIT;
592			tsleep((caddr_t)&vp->v_numoutput,
593				slpflag | (PRIBIO + 1),
594				"vinvlbuf", slptimeo);
595		}
596		if (vp->v_dirtyblkhd.lh_first != NULL) {
597			splx(s);
598			if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
599				return (error);
600			s = splbio();
601			if (vp->v_numoutput > 0 ||
602			    vp->v_dirtyblkhd.lh_first != NULL)
603				panic("vinvalbuf: dirty bufs");
604		}
605		splx(s);
606  	}
607	s = splbio();
608	for (;;) {
609		if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
610			while (blist && blist->b_lblkno < 0)
611				blist = blist->b_vnbufs.le_next;
612		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
613		    (flags & V_SAVEMETA))
614			while (blist && blist->b_lblkno < 0)
615				blist = blist->b_vnbufs.le_next;
616		if (!blist)
617			break;
618
619		for (bp = blist; bp; bp = nbp) {
620			nbp = bp->b_vnbufs.le_next;
621			if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
622				continue;
623			if (bp->b_flags & B_BUSY) {
624				bp->b_flags |= B_WANTED;
625				error = tsleep((caddr_t) bp,
626				    slpflag | (PRIBIO + 4), "vinvalbuf",
627				    slptimeo);
628				if (error) {
629					splx(s);
630					return (error);
631				}
632				break;
633			}
634			/*
635			 * XXX Since there are no node locks for NFS, I
636			 * believe there is a slight chance that a delayed
637			 * write will occur while sleeping just above, so
638			 * check for it.  Note that vfs_bio_awrite expects
639			 * buffers to reside on a queue, while VOP_BWRITE and
640			 * brelse do not.
641			 */
642			if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) &&
643				(flags & V_SAVE)) {
644
645				if (bp->b_vp == vp) {
646					if (bp->b_flags & B_CLUSTEROK) {
647						vfs_bio_awrite(bp);
648					} else {
649						bremfree(bp);
650						bp->b_flags |= (B_BUSY | B_ASYNC);
651						VOP_BWRITE(bp);
652					}
653				} else {
654					bremfree(bp);
655					bp->b_flags |= B_BUSY;
656					(void) VOP_BWRITE(bp);
657				}
658				break;
659			}
660			bremfree(bp);
661			bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF | B_BUSY);
662			bp->b_flags &= ~B_ASYNC;
663			brelse(bp);
664		}
665	}
666
667	while (vp->v_numoutput > 0) {
668		vp->v_flag |= VBWAIT;
669		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
670	}
671
672	splx(s);
673
674	/*
675	 * Destroy the copy in the VM cache, too.
676	 */
677	simple_lock(&vp->v_interlock);
678	object = vp->v_object;
679	if (object != NULL) {
680		if (flags & V_SAVEMETA)
681			vm_object_page_remove(object, 0, object->size,
682				(flags & V_SAVE) ? TRUE : FALSE);
683		else
684			vm_object_page_remove(object, 0, 0,
685				(flags & V_SAVE) ? TRUE : FALSE);
686	}
687	simple_unlock(&vp->v_interlock);
688
689	if (!(flags & V_SAVEMETA) &&
690	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
691		panic("vinvalbuf: flush failed");
692	return (0);
693}
694
695/*
696 * Truncate a file's buffer and pages to a specified length.  This
697 * is in lieu of the old vinvalbuf mechanism, which performed unneeded
698 * sync activity.
699 */
700int
701vtruncbuf(vp, cred, p, length, blksize)
702	register struct vnode *vp;
703	struct ucred *cred;
704	struct proc *p;
705	off_t length;
706	int blksize;
707{
708	register struct buf *bp;
709	struct buf *nbp, *blist;
710	int s, error, anyfreed;
711	vm_object_t object;
712	int trunclbn;
713
714	/*
715	 * Round up to the *next* lbn.
716	 */
717	trunclbn = (length + blksize - 1) / blksize;
718
719	s = splbio();
720restart:
721	anyfreed = 1;
722	for (;anyfreed;) {
723		anyfreed = 0;
724		for ( bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
725
726			nbp = LIST_NEXT(bp, b_vnbufs);
727
728			if (bp->b_lblkno >= trunclbn) {
729				if (bp->b_flags & B_BUSY) {
730					bp->b_flags |= B_WANTED;
731					tsleep(bp, PRIBIO + 4, "vtrb1", 0);
732					goto restart;
733				} else {
734					bremfree(bp);
735					bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF);
736					bp->b_flags &= ~B_ASYNC;
737					brelse(bp);
738					anyfreed = 1;
739				}
740				if (nbp &&
741					((LIST_NEXT(nbp, b_vnbufs) == NOLIST) ||
742					 (nbp->b_vp != vp) ||
743					 (nbp->b_flags & B_DELWRI))) {
744					goto restart;
745				}
746			}
747		}
748
749		for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
750
751			nbp = LIST_NEXT(bp, b_vnbufs);
752
753			if (bp->b_lblkno >= trunclbn) {
754				if (bp->b_flags & B_BUSY) {
755					bp->b_flags |= B_WANTED;
756					tsleep(bp, PRIBIO + 4, "vtrb2", 0);
757					goto restart;
758				} else {
759					bremfree(bp);
760					bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF);
761					bp->b_flags &= ~B_ASYNC;
762					brelse(bp);
763					anyfreed = 1;
764				}
765				if (nbp &&
766					((LIST_NEXT(nbp, b_vnbufs) == NOLIST) ||
767					 (nbp->b_vp != vp) ||
768					 (nbp->b_flags & B_DELWRI) == 0)) {
769					goto restart;
770				}
771			}
772		}
773	}
774
775	if (length > 0) {
776restartsync:
777		for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
778
779			nbp = LIST_NEXT(bp, b_vnbufs);
780
781			if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) {
782				if (bp->b_flags & B_BUSY) {
783					bp->b_flags |= B_WANTED;
784					tsleep(bp, PRIBIO, "vtrb3", 0);
785				} else {
786					bremfree(bp);
787					bp->b_flags |= B_BUSY;
788					if (bp->b_vp == vp) {
789						bp->b_flags |= B_ASYNC;
790					} else {
791						bp->b_flags &= ~B_ASYNC;
792					}
793					VOP_BWRITE(bp);
794				}
795				goto restartsync;
796			}
797
798		}
799	}
800
801	while (vp->v_numoutput > 0) {
802		vp->v_flag |= VBWAIT;
803		tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0);
804	}
805
806	splx(s);
807
808	vnode_pager_setsize(vp, length);
809
810	return (0);
811}
812
813/*
814 * Associate a buffer with a vnode.
815 */
816void
817bgetvp(vp, bp)
818	register struct vnode *vp;
819	register struct buf *bp;
820{
821	int s;
822
823#if defined(DIAGNOSTIC)
824	if (bp->b_vp)
825		panic("bgetvp: not free");
826#endif
827	vhold(vp);
828	bp->b_vp = vp;
829	if (vp->v_type == VBLK || vp->v_type == VCHR)
830		bp->b_dev = vp->v_rdev;
831	else
832		bp->b_dev = NODEV;
833	/*
834	 * Insert onto list for new vnode.
835	 */
836	s = splbio();
837	bufinsvn(bp, &vp->v_cleanblkhd);
838	splx(s);
839}
840
841/*
842 * Disassociate a buffer from a vnode.
843 */
844void
845brelvp(bp)
846	register struct buf *bp;
847{
848	struct vnode *vp;
849	int s;
850
851#if defined(DIAGNOSTIC)
852	if (bp->b_vp == (struct vnode *) 0)
853		panic("brelvp: NULL");
854#endif
855
856	/*
857	 * Delete from old vnode list, if on one.
858	 */
859	vp = bp->b_vp;
860	s = splbio();
861	if (bp->b_vnbufs.le_next != NOLIST)
862		bufremvn(bp);
863	if ((vp->v_flag & VONWORKLST) && (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)) {
864		vp->v_flag &= ~VONWORKLST;
865		LIST_REMOVE(vp, v_synclist);
866	}
867	splx(s);
868	bp->b_vp = (struct vnode *) 0;
869	vdrop(vp);
870}
871
872/*
873 * The workitem queue.
874 *
875 * It is useful to delay writes of file data and filesystem metadata
876 * for tens of seconds so that quickly created and deleted files need
877 * not waste disk bandwidth being created and removed. To realize this,
878 * we append vnodes to a "workitem" queue. When running with a soft
879 * updates implementation, most pending metadata dependencies should
880 * not wait for more than a few seconds. Thus, mounted on block devices
881 * are delayed only about a half the time that file data is delayed.
882 * Similarly, directory updates are more critical, so are only delayed
883 * about a third the time that file data is delayed. Thus, there are
884 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
885 * one each second (driven off the filesystem syner process). The
886 * syncer_delayno variable indicates the next queue that is to be processed.
887 * Items that need to be processed soon are placed in this queue:
888 *
889 *	syncer_workitem_pending[syncer_delayno]
890 *
891 * A delay of fifteen seconds is done by placing the request fifteen
892 * entries later in the queue:
893 *
894 *	syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
895 *
896 */
897
898/*
899 * Add an item to the syncer work queue.
900 */
901void
902vn_syncer_add_to_worklist(vp, delay)
903	struct vnode *vp;
904	int delay;
905{
906	int s, slot;
907
908	s = splbio();
909
910	if (vp->v_flag & VONWORKLST) {
911		LIST_REMOVE(vp, v_synclist);
912	}
913
914	if (delay > syncer_maxdelay - 2)
915		delay = syncer_maxdelay - 2;
916	slot = (syncer_delayno + delay) & syncer_mask;
917
918	LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
919	vp->v_flag |= VONWORKLST;
920	splx(s);
921}
922
923static void sched_sync __P((void));
924static struct	proc *updateproc;
925static struct kproc_desc up_kp = {
926	"syncer",
927	sched_sync,
928	&updateproc
929};
930SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp)
931
932/*
933 * System filesystem synchronizer daemon.
934 */
935void
936sched_sync(void)
937{
938	struct synclist *slp;
939	struct vnode *vp;
940	long starttime;
941	int s;
942	struct proc *p = updateproc;
943
944	for (;;) {
945		starttime = time_second;
946
947		/*
948		 * Push files whose dirty time has expired.
949		 */
950		s = splbio();
951		slp = &syncer_workitem_pending[syncer_delayno];
952		syncer_delayno += 1;
953		if (syncer_delayno == syncer_maxdelay)
954			syncer_delayno = 0;
955		splx(s);
956
957		while ((vp = LIST_FIRST(slp)) != NULL) {
958			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
959			(void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
960			VOP_UNLOCK(vp, 0, p);
961			if (LIST_FIRST(slp) == vp) {
962				if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
963				    vp->v_type != VBLK)
964					panic("sched_sync: fsync failed");
965				/*
966				 * Move ourselves to the back of the sync list.
967				 */
968				LIST_REMOVE(vp, v_synclist);
969				vn_syncer_add_to_worklist(vp, syncdelay);
970			}
971		}
972
973		/*
974		 * Do soft update processing.
975		 */
976		if (bioops.io_sync)
977			(*bioops.io_sync)(NULL);
978
979		/*
980		 * The variable rushjob allows the kernel to speed up the
981		 * processing of the filesystem syncer process. A rushjob
982		 * value of N tells the filesystem syncer to process the next
983		 * N seconds worth of work on its queue ASAP. Currently rushjob
984		 * is used by the soft update code to speed up the filesystem
985		 * syncer process when the incore state is getting so far
986		 * ahead of the disk that the kernel memory pool is being
987		 * threatened with exhaustion.
988		 */
989		if (rushjob > 0) {
990			rushjob -= 1;
991			continue;
992		}
993		/*
994		 * If it has taken us less than a second to process the
995		 * current work, then wait. Otherwise start right over
996		 * again. We can still lose time if any single round
997		 * takes more than two seconds, but it does not really
998		 * matter as we are just trying to generally pace the
999		 * filesystem activity.
1000		 */
1001		if (time_second == starttime)
1002			tsleep(&lbolt, PPAUSE, "syncer", 0);
1003	}
1004}
1005
1006/*
1007 * Associate a p-buffer with a vnode.
1008 */
1009void
1010pbgetvp(vp, bp)
1011	register struct vnode *vp;
1012	register struct buf *bp;
1013{
1014#if defined(DIAGNOSTIC)
1015	if (bp->b_vp)
1016		panic("pbgetvp: not free");
1017#endif
1018	bp->b_vp = vp;
1019	if (vp->v_type == VBLK || vp->v_type == VCHR)
1020		bp->b_dev = vp->v_rdev;
1021	else
1022		bp->b_dev = NODEV;
1023}
1024
1025/*
1026 * Disassociate a p-buffer from a vnode.
1027 */
1028void
1029pbrelvp(bp)
1030	register struct buf *bp;
1031{
1032
1033#if defined(DIAGNOSTIC)
1034	if (bp->b_vp == (struct vnode *) 0)
1035		panic("pbrelvp: NULL");
1036#endif
1037
1038	bp->b_vp = (struct vnode *) 0;
1039}
1040
1041/*
1042 * Reassign a buffer from one vnode to another.
1043 * Used to assign file specific control information
1044 * (indirect blocks) to the vnode to which they belong.
1045 */
1046void
1047reassignbuf(bp, newvp)
1048	register struct buf *bp;
1049	register struct vnode *newvp;
1050{
1051	struct buflists *listheadp;
1052	int delay;
1053	int s;
1054
1055	if (newvp == NULL) {
1056		printf("reassignbuf: NULL");
1057		return;
1058	}
1059
1060	s = splbio();
1061	/*
1062	 * Delete from old vnode list, if on one.
1063	 */
1064	if (bp->b_vnbufs.le_next != NOLIST) {
1065		bufremvn(bp);
1066		vdrop(bp->b_vp);
1067	}
1068	/*
1069	 * If dirty, put on list of dirty buffers; otherwise insert onto list
1070	 * of clean buffers.
1071	 */
1072	if (bp->b_flags & B_DELWRI) {
1073		struct buf *tbp;
1074
1075		listheadp = &newvp->v_dirtyblkhd;
1076		if ((newvp->v_flag & VONWORKLST) == 0) {
1077			switch (newvp->v_type) {
1078			case VDIR:
1079				delay = syncdelay / 3;
1080				break;
1081			case VBLK:
1082				if (newvp->v_specmountpoint != NULL) {
1083					delay = syncdelay / 2;
1084					break;
1085				}
1086				/* fall through */
1087			default:
1088				delay = syncdelay;
1089			}
1090			vn_syncer_add_to_worklist(newvp, delay);
1091		}
1092		tbp = listheadp->lh_first;
1093		if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
1094			bufinsvn(bp, listheadp);
1095		} else {
1096			while (tbp->b_vnbufs.le_next &&
1097			    (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
1098				tbp = tbp->b_vnbufs.le_next;
1099			}
1100			LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
1101		}
1102	} else {
1103		bufinsvn(bp, &newvp->v_cleanblkhd);
1104		if ((newvp->v_flag & VONWORKLST) &&
1105			LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
1106			newvp->v_flag &= ~VONWORKLST;
1107			LIST_REMOVE(newvp, v_synclist);
1108		}
1109	}
1110	bp->b_vp = newvp;
1111	vhold(bp->b_vp);
1112	splx(s);
1113}
1114
1115#ifndef SLICE
1116/*
1117 * Create a vnode for a block device.
1118 * Used for mounting the root file system.
1119 */
1120int
1121bdevvp(dev, vpp)
1122	dev_t dev;
1123	struct vnode **vpp;
1124{
1125	register struct vnode *vp;
1126	struct vnode *nvp;
1127	int error;
1128
1129	if (dev == NODEV)
1130		return (0);
1131	error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
1132	if (error) {
1133		*vpp = 0;
1134		return (error);
1135	}
1136	vp = nvp;
1137	vp->v_type = VBLK;
1138	if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
1139		vput(vp);
1140		vp = nvp;
1141	}
1142	*vpp = vp;
1143	return (0);
1144}
1145#endif	/* !SLICE */
1146
1147/*
1148 * Check to see if the new vnode represents a special device
1149 * for which we already have a vnode (either because of
1150 * bdevvp() or because of a different vnode representing
1151 * the same block device). If such an alias exists, deallocate
1152 * the existing contents and return the aliased vnode. The
1153 * caller is responsible for filling it with its new contents.
1154 */
1155struct vnode *
1156checkalias(nvp, nvp_rdev, mp)
1157	register struct vnode *nvp;
1158	dev_t nvp_rdev;
1159	struct mount *mp;
1160{
1161	struct proc *p = curproc;	/* XXX */
1162	struct vnode *vp;
1163	struct vnode **vpp;
1164
1165	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
1166		return (NULLVP);
1167
1168	vpp = &speclisth[SPECHASH(nvp_rdev)];
1169loop:
1170	simple_lock(&spechash_slock);
1171	for (vp = *vpp; vp; vp = vp->v_specnext) {
1172		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
1173			continue;
1174		/*
1175		 * Alias, but not in use, so flush it out.
1176		 * Only alias active device nodes.
1177		 * Not sure why we don't re-use this like we do below.
1178		 */
1179		simple_lock(&vp->v_interlock);
1180		if (vp->v_usecount == 0) {
1181			simple_unlock(&spechash_slock);
1182			vgonel(vp, p);
1183			goto loop;
1184		}
1185		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
1186			/*
1187			 * It dissappeared, and we may have slept.
1188			 * Restart from the beginning
1189			 */
1190			simple_unlock(&spechash_slock);
1191			goto loop;
1192		}
1193		break;
1194	}
1195	/*
1196	 * It would be a lot clearer what is going on here if
1197	 * this had been expressed as:
1198	 * if ( vp && (vp->v_tag == VT_NULL))
1199	 * and the clauses had been swapped.
1200	 */
1201	if (vp == NULL || vp->v_tag != VT_NON) {
1202		/*
1203		 * Put the new vnode into the hash chain.
1204		 * and if there was an alias, connect them.
1205		 */
1206		MALLOC(nvp->v_specinfo, struct specinfo *,
1207		    sizeof(struct specinfo), M_VNODE, M_WAITOK);
1208		nvp->v_rdev = nvp_rdev;
1209		nvp->v_hashchain = vpp;
1210		nvp->v_specnext = *vpp;
1211		nvp->v_specmountpoint = NULL;
1212		simple_unlock(&spechash_slock);
1213		*vpp = nvp;
1214		if (vp != NULLVP) {
1215			nvp->v_flag |= VALIASED;
1216			vp->v_flag |= VALIASED;
1217			vput(vp);
1218		}
1219		return (NULLVP);
1220	}
1221	/*
1222	 * if ( vp && (vp->v_tag == VT_NULL))
1223	 * We have a vnode alias, but it is a trashed.
1224	 * Make it look like it's newley allocated. (by getnewvnode())
1225	 * The caller should use this instead.
1226	 */
1227	simple_unlock(&spechash_slock);
1228	VOP_UNLOCK(vp, 0, p);
1229	simple_lock(&vp->v_interlock);
1230	vclean(vp, 0, p);
1231	vp->v_op = nvp->v_op;
1232	vp->v_tag = nvp->v_tag;
1233	nvp->v_type = VNON;
1234	insmntque(vp, mp);
1235	return (vp);
1236}
1237
1238/*
1239 * Grab a particular vnode from the free list, increment its
1240 * reference count and lock it. The vnode lock bit is set the
1241 * vnode is being eliminated in vgone. The process is awakened
1242 * when the transition is completed, and an error returned to
1243 * indicate that the vnode is no longer usable (possibly having
1244 * been changed to a new file system type).
1245 */
1246int
1247vget(vp, flags, p)
1248	register struct vnode *vp;
1249	int flags;
1250	struct proc *p;
1251{
1252	int error;
1253
1254	/*
1255	 * If the vnode is in the process of being cleaned out for
1256	 * another use, we wait for the cleaning to finish and then
1257	 * return failure. Cleaning is determined by checking that
1258	 * the VXLOCK flag is set.
1259	 */
1260	if ((flags & LK_INTERLOCK) == 0) {
1261		simple_lock(&vp->v_interlock);
1262	}
1263	if (vp->v_flag & VXLOCK) {
1264		vp->v_flag |= VXWANT;
1265		simple_unlock(&vp->v_interlock);
1266		tsleep((caddr_t)vp, PINOD, "vget", 0);
1267		return (ENOENT);
1268	}
1269
1270	vp->v_usecount++;
1271
1272	if (VSHOULDBUSY(vp))
1273		vbusy(vp);
1274	if (flags & LK_TYPE_MASK) {
1275		if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) {
1276			/*
1277			 * must expand vrele here because we do not want
1278			 * to call VOP_INACTIVE if the reference count
1279			 * drops back to zero since it was never really
1280			 * active. We must remove it from the free list
1281			 * before sleeping so that multiple processes do
1282			 * not try to recycle it.
1283			 */
1284			simple_lock(&vp->v_interlock);
1285			vp->v_usecount--;
1286			if (VSHOULDFREE(vp))
1287				vfree(vp);
1288			simple_unlock(&vp->v_interlock);
1289		}
1290		return (error);
1291	}
1292	simple_unlock(&vp->v_interlock);
1293	return (0);
1294}
1295
1296void
1297vref(struct vnode *vp)
1298{
1299	simple_lock(&vp->v_interlock);
1300	vp->v_usecount++;
1301	simple_unlock(&vp->v_interlock);
1302}
1303
1304/*
1305 * Vnode put/release.
1306 * If count drops to zero, call inactive routine and return to freelist.
1307 */
1308void
1309vrele(vp)
1310	struct vnode *vp;
1311{
1312	struct proc *p = curproc;	/* XXX */
1313
1314#ifdef DIAGNOSTIC
1315	if (vp == NULL)
1316		panic("vrele: null vp");
1317#endif
1318	simple_lock(&vp->v_interlock);
1319
1320	if (vp->v_usecount > 1) {
1321
1322		vp->v_usecount--;
1323		simple_unlock(&vp->v_interlock);
1324
1325		return;
1326	}
1327
1328	if (vp->v_usecount == 1) {
1329
1330		vp->v_usecount--;
1331
1332		if (VSHOULDFREE(vp))
1333			vfree(vp);
1334	/*
1335	 * If we are doing a vput, the node is already locked, and we must
1336	 * call VOP_INACTIVE with the node locked.  So, in the case of
1337	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
1338	 */
1339		if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1340			VOP_INACTIVE(vp, p);
1341		}
1342
1343	} else {
1344#ifdef DIAGNOSTIC
1345		vprint("vrele: negative ref count", vp);
1346		simple_unlock(&vp->v_interlock);
1347#endif
1348		panic("vrele: negative ref cnt");
1349	}
1350}
1351
1352void
1353vput(vp)
1354	struct vnode *vp;
1355{
1356	struct proc *p = curproc;	/* XXX */
1357
1358#ifdef DIAGNOSTIC
1359	if (vp == NULL)
1360		panic("vput: null vp");
1361#endif
1362
1363	simple_lock(&vp->v_interlock);
1364
1365	if (vp->v_usecount > 1) {
1366
1367		vp->v_usecount--;
1368		VOP_UNLOCK(vp, LK_INTERLOCK, p);
1369		return;
1370
1371	}
1372
1373	if (vp->v_usecount == 1) {
1374
1375		vp->v_usecount--;
1376		if (VSHOULDFREE(vp))
1377			vfree(vp);
1378	/*
1379	 * If we are doing a vput, the node is already locked, and we must
1380	 * call VOP_INACTIVE with the node locked.  So, in the case of
1381	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
1382	 */
1383		simple_unlock(&vp->v_interlock);
1384		VOP_INACTIVE(vp, p);
1385
1386	} else {
1387#ifdef DIAGNOSTIC
1388		vprint("vput: negative ref count", vp);
1389#endif
1390		panic("vput: negative ref cnt");
1391	}
1392}
1393
1394/*
1395 * Somebody doesn't want the vnode recycled.
1396 */
1397void
1398vhold(vp)
1399	register struct vnode *vp;
1400{
1401	int s;
1402
1403  	s = splbio();
1404	vp->v_holdcnt++;
1405	if (VSHOULDBUSY(vp))
1406		vbusy(vp);
1407	splx(s);
1408}
1409
1410/*
1411 * One less who cares about this vnode.
1412 */
1413void
1414vdrop(vp)
1415	register struct vnode *vp;
1416{
1417	int s;
1418
1419	s = splbio();
1420	if (vp->v_holdcnt <= 0)
1421		panic("vdrop: holdcnt");
1422	vp->v_holdcnt--;
1423	if (VSHOULDFREE(vp))
1424		vfree(vp);
1425	splx(s);
1426}
1427
1428/*
1429 * Remove any vnodes in the vnode table belonging to mount point mp.
1430 *
1431 * If MNT_NOFORCE is specified, there should not be any active ones,
1432 * return error if any are found (nb: this is a user error, not a
1433 * system error). If MNT_FORCE is specified, detach any active vnodes
1434 * that are found.
1435 */
1436#ifdef DIAGNOSTIC
1437static int busyprt = 0;		/* print out busy vnodes */
1438SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
1439#endif
1440
1441int
1442vflush(mp, skipvp, flags)
1443	struct mount *mp;
1444	struct vnode *skipvp;
1445	int flags;
1446{
1447	struct proc *p = curproc;	/* XXX */
1448	struct vnode *vp, *nvp;
1449	int busy = 0;
1450
1451	simple_lock(&mntvnode_slock);
1452loop:
1453	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1454		/*
1455		 * Make sure this vnode wasn't reclaimed in getnewvnode().
1456		 * Start over if it has (it won't be on the list anymore).
1457		 */
1458		if (vp->v_mount != mp)
1459			goto loop;
1460		nvp = vp->v_mntvnodes.le_next;
1461		/*
1462		 * Skip over a selected vnode.
1463		 */
1464		if (vp == skipvp)
1465			continue;
1466
1467		simple_lock(&vp->v_interlock);
1468		/*
1469		 * Skip over a vnodes marked VSYSTEM.
1470		 */
1471		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1472			simple_unlock(&vp->v_interlock);
1473			continue;
1474		}
1475		/*
1476		 * If WRITECLOSE is set, only flush out regular file vnodes
1477		 * open for writing.
1478		 */
1479		if ((flags & WRITECLOSE) &&
1480		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1481			simple_unlock(&vp->v_interlock);
1482			continue;
1483		}
1484
1485		/*
1486		 * With v_usecount == 0, all we need to do is clear out the
1487		 * vnode data structures and we are done.
1488		 */
1489		if (vp->v_usecount == 0) {
1490			simple_unlock(&mntvnode_slock);
1491			vgonel(vp, p);
1492			simple_lock(&mntvnode_slock);
1493			continue;
1494		}
1495
1496		/*
1497		 * If FORCECLOSE is set, forcibly close the vnode. For block
1498		 * or character devices, revert to an anonymous device. For
1499		 * all other files, just kill them.
1500		 */
1501		if (flags & FORCECLOSE) {
1502			simple_unlock(&mntvnode_slock);
1503			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1504				vgonel(vp, p);
1505			} else {
1506				vclean(vp, 0, p);
1507				vp->v_op = spec_vnodeop_p;
1508				insmntque(vp, (struct mount *) 0);
1509			}
1510			simple_lock(&mntvnode_slock);
1511			continue;
1512		}
1513#ifdef DIAGNOSTIC
1514		if (busyprt)
1515			vprint("vflush: busy vnode", vp);
1516#endif
1517		simple_unlock(&vp->v_interlock);
1518		busy++;
1519	}
1520	simple_unlock(&mntvnode_slock);
1521	if (busy)
1522		return (EBUSY);
1523	return (0);
1524}
1525
1526/*
1527 * Disassociate the underlying file system from a vnode.
1528 */
1529static void
1530vclean(vp, flags, p)
1531	struct vnode *vp;
1532	int flags;
1533	struct proc *p;
1534{
1535	int active;
1536	vm_object_t obj;
1537
1538	/*
1539	 * Check to see if the vnode is in use. If so we have to reference it
1540	 * before we clean it out so that its count cannot fall to zero and
1541	 * generate a race against ourselves to recycle it.
1542	 */
1543	if ((active = vp->v_usecount))
1544		vp->v_usecount++;
1545
1546	/*
1547	 * Prevent the vnode from being recycled or brought into use while we
1548	 * clean it out.
1549	 */
1550	if (vp->v_flag & VXLOCK)
1551		panic("vclean: deadlock");
1552	vp->v_flag |= VXLOCK;
1553	/*
1554	 * Even if the count is zero, the VOP_INACTIVE routine may still
1555	 * have the object locked while it cleans it out. The VOP_LOCK
1556	 * ensures that the VOP_INACTIVE routine is done with its work.
1557	 * For active vnodes, it ensures that no other activity can
1558	 * occur while the underlying object is being cleaned out.
1559	 */
1560	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1561
1562	/*
1563	 * Clean out any buffers associated with the vnode.
1564	 */
1565	vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1566	if (obj = vp->v_object) {
1567		if (obj->ref_count == 0) {
1568			/*
1569			 * This is a normal way of shutting down the object/vnode
1570			 * association.
1571			 */
1572			vm_object_terminate(obj);
1573		} else {
1574			/*
1575			 * Woe to the process that tries to page now :-).
1576			 */
1577			vm_pager_deallocate(obj);
1578		}
1579	}
1580
1581	/*
1582	 * If purging an active vnode, it must be closed and
1583	 * deactivated before being reclaimed. Note that the
1584	 * VOP_INACTIVE will unlock the vnode.
1585	 */
1586	if (active) {
1587		if (flags & DOCLOSE)
1588			VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1589		VOP_INACTIVE(vp, p);
1590	} else {
1591		/*
1592		 * Any other processes trying to obtain this lock must first
1593		 * wait for VXLOCK to clear, then call the new lock operation.
1594		 */
1595		VOP_UNLOCK(vp, 0, p);
1596	}
1597	/*
1598	 * Reclaim the vnode.
1599	 */
1600	if (VOP_RECLAIM(vp, p))
1601		panic("vclean: cannot reclaim");
1602
1603	if (active)
1604		vrele(vp);
1605
1606	cache_purge(vp);
1607	if (vp->v_vnlock) {
1608#if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */
1609#ifdef DIAGNOSTIC
1610		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1611			vprint("vclean: lock not drained", vp);
1612#endif
1613#endif
1614		FREE(vp->v_vnlock, M_VNODE);
1615		vp->v_vnlock = NULL;
1616	}
1617
1618	if (VSHOULDFREE(vp))
1619		vfree(vp);
1620
1621	/*
1622	 * Done with purge, notify sleepers of the grim news.
1623	 */
1624	vp->v_op = dead_vnodeop_p;
1625	vn_pollgone(vp);
1626	vp->v_tag = VT_NON;
1627	vp->v_flag &= ~VXLOCK;
1628	if (vp->v_flag & VXWANT) {
1629		vp->v_flag &= ~VXWANT;
1630		wakeup((caddr_t) vp);
1631	}
1632}
1633
1634/*
1635 * Eliminate all activity associated with the requested vnode
1636 * and with all vnodes aliased to the requested vnode.
1637 */
1638int
1639vop_revoke(ap)
1640	struct vop_revoke_args /* {
1641		struct vnode *a_vp;
1642		int a_flags;
1643	} */ *ap;
1644{
1645	struct vnode *vp, *vq;
1646	struct proc *p = curproc;	/* XXX */
1647
1648#ifdef DIAGNOSTIC
1649	if ((ap->a_flags & REVOKEALL) == 0)
1650		panic("vop_revoke");
1651#endif
1652
1653	vp = ap->a_vp;
1654	simple_lock(&vp->v_interlock);
1655
1656	if (vp->v_flag & VALIASED) {
1657		/*
1658		 * If a vgone (or vclean) is already in progress,
1659		 * wait until it is done and return.
1660		 */
1661		if (vp->v_flag & VXLOCK) {
1662			vp->v_flag |= VXWANT;
1663			simple_unlock(&vp->v_interlock);
1664			tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1665			return (0);
1666		}
1667		/*
1668		 * Ensure that vp will not be vgone'd while we
1669		 * are eliminating its aliases.
1670		 */
1671		vp->v_flag |= VXLOCK;
1672		simple_unlock(&vp->v_interlock);
1673		while (vp->v_flag & VALIASED) {
1674			simple_lock(&spechash_slock);
1675			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1676				if (vq->v_rdev != vp->v_rdev ||
1677				    vq->v_type != vp->v_type || vp == vq)
1678					continue;
1679				simple_unlock(&spechash_slock);
1680				vgone(vq);
1681				break;
1682			}
1683			if (vq == NULLVP) {
1684				simple_unlock(&spechash_slock);
1685			}
1686		}
1687		/*
1688		 * Remove the lock so that vgone below will
1689		 * really eliminate the vnode after which time
1690		 * vgone will awaken any sleepers.
1691		 */
1692		simple_lock(&vp->v_interlock);
1693		vp->v_flag &= ~VXLOCK;
1694		if (vp->v_flag & VXWANT) {
1695			vp->v_flag &= ~VXWANT;
1696			wakeup(vp);
1697		}
1698	}
1699	vgonel(vp, p);
1700	return (0);
1701}
1702
1703/*
1704 * Recycle an unused vnode to the front of the free list.
1705 * Release the passed interlock if the vnode will be recycled.
1706 */
1707int
1708vrecycle(vp, inter_lkp, p)
1709	struct vnode *vp;
1710	struct simplelock *inter_lkp;
1711	struct proc *p;
1712{
1713
1714	simple_lock(&vp->v_interlock);
1715	if (vp->v_usecount == 0) {
1716		if (inter_lkp) {
1717			simple_unlock(inter_lkp);
1718		}
1719		vgonel(vp, p);
1720		return (1);
1721	}
1722	simple_unlock(&vp->v_interlock);
1723	return (0);
1724}
1725
1726/*
1727 * Eliminate all activity associated with a vnode
1728 * in preparation for reuse.
1729 */
1730void
1731vgone(vp)
1732	register struct vnode *vp;
1733{
1734	struct proc *p = curproc;	/* XXX */
1735
1736	simple_lock(&vp->v_interlock);
1737	vgonel(vp, p);
1738}
1739
1740/*
1741 * vgone, with the vp interlock held.
1742 */
1743static void
1744vgonel(vp, p)
1745	struct vnode *vp;
1746	struct proc *p;
1747{
1748	int s;
1749	struct vnode *vq;
1750	struct vnode *vx;
1751
1752	/*
1753	 * If a vgone (or vclean) is already in progress,
1754	 * wait until it is done and return.
1755	 */
1756	if (vp->v_flag & VXLOCK) {
1757		vp->v_flag |= VXWANT;
1758		simple_unlock(&vp->v_interlock);
1759		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1760		return;
1761	}
1762
1763	/*
1764	 * Clean out the filesystem specific data.
1765	 */
1766	vclean(vp, DOCLOSE, p);
1767	simple_lock(&vp->v_interlock);
1768
1769	/*
1770	 * Delete from old mount point vnode list, if on one.
1771	 */
1772	if (vp->v_mount != NULL)
1773		insmntque(vp, (struct mount *)0);
1774	/*
1775	 * If special device, remove it from special device alias list
1776	 * if it is on one.
1777	 */
1778	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1779		simple_lock(&spechash_slock);
1780		if (*vp->v_hashchain == vp) {
1781			*vp->v_hashchain = vp->v_specnext;
1782		} else {
1783			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1784				if (vq->v_specnext != vp)
1785					continue;
1786				vq->v_specnext = vp->v_specnext;
1787				break;
1788			}
1789			if (vq == NULL)
1790				panic("missing bdev");
1791		}
1792		if (vp->v_flag & VALIASED) {
1793			vx = NULL;
1794			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1795				if (vq->v_rdev != vp->v_rdev ||
1796				    vq->v_type != vp->v_type)
1797					continue;
1798				if (vx)
1799					break;
1800				vx = vq;
1801			}
1802			if (vx == NULL)
1803				panic("missing alias");
1804			if (vq == NULL)
1805				vx->v_flag &= ~VALIASED;
1806			vp->v_flag &= ~VALIASED;
1807		}
1808		simple_unlock(&spechash_slock);
1809		FREE(vp->v_specinfo, M_VNODE);
1810		vp->v_specinfo = NULL;
1811	}
1812
1813	/*
1814	 * If it is on the freelist and not already at the head,
1815	 * move it to the head of the list. The test of the back
1816	 * pointer and the reference count of zero is because
1817	 * it will be removed from the free list by getnewvnode,
1818	 * but will not have its reference count incremented until
1819	 * after calling vgone. If the reference count were
1820	 * incremented first, vgone would (incorrectly) try to
1821	 * close the previous instance of the underlying object.
1822	 */
1823	if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
1824		s = splbio();
1825		simple_lock(&vnode_free_list_slock);
1826		if (vp->v_flag & VFREE) {
1827			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1828		} else if (vp->v_flag & VTBFREE) {
1829			TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
1830			vp->v_flag &= ~VTBFREE;
1831			freevnodes++;
1832		} else
1833			freevnodes++;
1834		vp->v_flag |= VFREE;
1835		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1836		simple_unlock(&vnode_free_list_slock);
1837		splx(s);
1838	}
1839
1840	vp->v_type = VBAD;
1841	simple_unlock(&vp->v_interlock);
1842}
1843
1844/*
1845 * Lookup a vnode by device number.
1846 */
1847int
1848vfinddev(dev, type, vpp)
1849	dev_t dev;
1850	enum vtype type;
1851	struct vnode **vpp;
1852{
1853	register struct vnode *vp;
1854	int rc = 0;
1855
1856	simple_lock(&spechash_slock);
1857	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1858		if (dev != vp->v_rdev || type != vp->v_type)
1859			continue;
1860		*vpp = vp;
1861		rc = 1;
1862		break;
1863	}
1864	simple_unlock(&spechash_slock);
1865	return (rc);
1866}
1867
1868/*
1869 * Calculate the total number of references to a special device.
1870 */
1871int
1872vcount(vp)
1873	register struct vnode *vp;
1874{
1875	struct vnode *vq, *vnext;
1876	int count;
1877
1878loop:
1879	if ((vp->v_flag & VALIASED) == 0)
1880		return (vp->v_usecount);
1881	simple_lock(&spechash_slock);
1882	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1883		vnext = vq->v_specnext;
1884		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1885			continue;
1886		/*
1887		 * Alias, but not in use, so flush it out.
1888		 */
1889		if (vq->v_usecount == 0 && vq != vp) {
1890			simple_unlock(&spechash_slock);
1891			vgone(vq);
1892			goto loop;
1893		}
1894		count += vq->v_usecount;
1895	}
1896	simple_unlock(&spechash_slock);
1897	return (count);
1898}
1899/*
1900 * Print out a description of a vnode.
1901 */
1902static char *typename[] =
1903{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
1904
1905void
1906vprint(label, vp)
1907	char *label;
1908	register struct vnode *vp;
1909{
1910	char buf[64];
1911
1912	if (label != NULL)
1913		printf("%s: %p: ", label, (void *)vp);
1914	else
1915		printf("%p: ", (void *)vp);
1916	printf("type %s, usecount %d, writecount %d, refcount %d,",
1917	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1918	    vp->v_holdcnt);
1919	buf[0] = '\0';
1920	if (vp->v_flag & VROOT)
1921		strcat(buf, "|VROOT");
1922	if (vp->v_flag & VTEXT)
1923		strcat(buf, "|VTEXT");
1924	if (vp->v_flag & VSYSTEM)
1925		strcat(buf, "|VSYSTEM");
1926	if (vp->v_flag & VXLOCK)
1927		strcat(buf, "|VXLOCK");
1928	if (vp->v_flag & VXWANT)
1929		strcat(buf, "|VXWANT");
1930	if (vp->v_flag & VBWAIT)
1931		strcat(buf, "|VBWAIT");
1932	if (vp->v_flag & VALIASED)
1933		strcat(buf, "|VALIASED");
1934	if (vp->v_flag & VDOOMED)
1935		strcat(buf, "|VDOOMED");
1936	if (vp->v_flag & VFREE)
1937		strcat(buf, "|VFREE");
1938	if (vp->v_flag & VOBJBUF)
1939		strcat(buf, "|VOBJBUF");
1940	if (buf[0] != '\0')
1941		printf(" flags (%s)", &buf[1]);
1942	if (vp->v_data == NULL) {
1943		printf("\n");
1944	} else {
1945		printf("\n\t");
1946		VOP_PRINT(vp);
1947	}
1948}
1949
1950#ifdef DDB
1951/*
1952 * List all of the locked vnodes in the system.
1953 * Called when debugging the kernel.
1954 */
1955static void
1956printlockedvnodes()
1957{
1958	struct proc *p = curproc;	/* XXX */
1959	struct mount *mp, *nmp;
1960	struct vnode *vp;
1961
1962	printf("Locked vnodes\n");
1963	simple_lock(&mountlist_slock);
1964	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1965		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1966			nmp = mp->mnt_list.cqe_next;
1967			continue;
1968		}
1969		for (vp = mp->mnt_vnodelist.lh_first;
1970		     vp != NULL;
1971		     vp = vp->v_mntvnodes.le_next) {
1972			if (VOP_ISLOCKED(vp))
1973				vprint((char *)0, vp);
1974		}
1975		simple_lock(&mountlist_slock);
1976		nmp = mp->mnt_list.cqe_next;
1977		vfs_unbusy(mp, p);
1978	}
1979	simple_unlock(&mountlist_slock);
1980}
1981#endif
1982
1983/*
1984 * Top level filesystem related information gathering.
1985 */
1986static int	sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS);
1987
1988static int
1989vfs_sysctl SYSCTL_HANDLER_ARGS
1990{
1991	int *name = (int *)arg1 - 1;	/* XXX */
1992	u_int namelen = arg2 + 1;	/* XXX */
1993	struct vfsconf *vfsp;
1994
1995#if 1 || defined(COMPAT_PRELITE2)
1996	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
1997	if (namelen == 1)
1998		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
1999#endif
2000
2001#ifdef notyet
2002	/* all sysctl names at this level are at least name and field */
2003	if (namelen < 2)
2004		return (ENOTDIR);		/* overloaded */
2005	if (name[0] != VFS_GENERIC) {
2006		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2007			if (vfsp->vfc_typenum == name[0])
2008				break;
2009		if (vfsp == NULL)
2010			return (EOPNOTSUPP);
2011		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
2012		    oldp, oldlenp, newp, newlen, p));
2013	}
2014#endif
2015	switch (name[1]) {
2016	case VFS_MAXTYPENUM:
2017		if (namelen != 2)
2018			return (ENOTDIR);
2019		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
2020	case VFS_CONF:
2021		if (namelen != 3)
2022			return (ENOTDIR);	/* overloaded */
2023		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2024			if (vfsp->vfc_typenum == name[2])
2025				break;
2026		if (vfsp == NULL)
2027			return (EOPNOTSUPP);
2028		return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
2029	}
2030	return (EOPNOTSUPP);
2031}
2032
2033SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
2034	"Generic filesystem");
2035
2036#if 1 || defined(COMPAT_PRELITE2)
2037
2038static int
2039sysctl_ovfs_conf SYSCTL_HANDLER_ARGS
2040{
2041	int error;
2042	struct vfsconf *vfsp;
2043	struct ovfsconf ovfs;
2044
2045	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
2046		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
2047		strcpy(ovfs.vfc_name, vfsp->vfc_name);
2048		ovfs.vfc_index = vfsp->vfc_typenum;
2049		ovfs.vfc_refcount = vfsp->vfc_refcount;
2050		ovfs.vfc_flags = vfsp->vfc_flags;
2051		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
2052		if (error)
2053			return error;
2054	}
2055	return 0;
2056}
2057
2058#endif /* 1 || COMPAT_PRELITE2 */
2059
2060static volatile int kinfo_vdebug = 1;
2061
2062#if 0
2063#define KINFO_VNODESLOP	10
2064/*
2065 * Dump vnode list (via sysctl).
2066 * Copyout address of vnode followed by vnode.
2067 */
2068/* ARGSUSED */
2069static int
2070sysctl_vnode SYSCTL_HANDLER_ARGS
2071{
2072	struct proc *p = curproc;	/* XXX */
2073	struct mount *mp, *nmp;
2074	struct vnode *nvp, *vp;
2075	int error;
2076
2077#define VPTRSZ	sizeof (struct vnode *)
2078#define VNODESZ	sizeof (struct vnode)
2079
2080	req->lock = 0;
2081	if (!req->oldptr) /* Make an estimate */
2082		return (SYSCTL_OUT(req, 0,
2083			(numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
2084
2085	simple_lock(&mountlist_slock);
2086	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2087		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
2088			nmp = mp->mnt_list.cqe_next;
2089			continue;
2090		}
2091again:
2092		simple_lock(&mntvnode_slock);
2093		for (vp = mp->mnt_vnodelist.lh_first;
2094		     vp != NULL;
2095		     vp = nvp) {
2096			/*
2097			 * Check that the vp is still associated with
2098			 * this filesystem.  RACE: could have been
2099			 * recycled onto the same filesystem.
2100			 */
2101			if (vp->v_mount != mp) {
2102				simple_unlock(&mntvnode_slock);
2103				if (kinfo_vdebug)
2104					printf("kinfo: vp changed\n");
2105				goto again;
2106			}
2107			nvp = vp->v_mntvnodes.le_next;
2108			simple_unlock(&mntvnode_slock);
2109			if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
2110			    (error = SYSCTL_OUT(req, vp, VNODESZ)))
2111				return (error);
2112			simple_lock(&mntvnode_slock);
2113		}
2114		simple_unlock(&mntvnode_slock);
2115		simple_lock(&mountlist_slock);
2116		nmp = mp->mnt_list.cqe_next;
2117		vfs_unbusy(mp, p);
2118	}
2119	simple_unlock(&mountlist_slock);
2120
2121	return (0);
2122}
2123#endif
2124
2125/*
2126 * XXX
2127 * Exporting the vnode list on large systems causes them to crash.
2128 * Exporting the vnode list on medium systems causes sysctl to coredump.
2129 */
2130#if 0
2131SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
2132	0, 0, sysctl_vnode, "S,vnode", "");
2133#endif
2134
2135/*
2136 * Check to see if a filesystem is mounted on a block device.
2137 */
2138int
2139vfs_mountedon(vp)
2140	struct vnode *vp;
2141{
2142	struct vnode *vq;
2143	int error = 0;
2144
2145	if (vp->v_specmountpoint != NULL)
2146		return (EBUSY);
2147	if (vp->v_flag & VALIASED) {
2148		simple_lock(&spechash_slock);
2149		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
2150			if (vq->v_rdev != vp->v_rdev ||
2151			    vq->v_type != vp->v_type)
2152				continue;
2153			if (vq->v_specmountpoint != NULL) {
2154				error = EBUSY;
2155				break;
2156			}
2157		}
2158		simple_unlock(&spechash_slock);
2159	}
2160	return (error);
2161}
2162
2163/*
2164 * Unmount all filesystems. The list is traversed in reverse order
2165 * of mounting to avoid dependencies.
2166 */
2167void
2168vfs_unmountall()
2169{
2170	struct mount *mp, *nmp;
2171	struct proc *p;
2172	int error;
2173
2174	if (curproc != NULL)
2175		p = curproc;
2176	else
2177		p = initproc;	/* XXX XXX should this be proc0? */
2178	/*
2179	 * Since this only runs when rebooting, it is not interlocked.
2180	 */
2181	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2182		nmp = mp->mnt_list.cqe_prev;
2183		error = dounmount(mp, MNT_FORCE, p);
2184		if (error) {
2185			printf("unmount of %s failed (",
2186			    mp->mnt_stat.f_mntonname);
2187			if (error == EBUSY)
2188				printf("BUSY)\n");
2189			else
2190				printf("%d)\n", error);
2191		}
2192	}
2193}
2194
2195/*
2196 * Build hash lists of net addresses and hang them off the mount point.
2197 * Called by ufs_mount() to set up the lists of export addresses.
2198 */
2199static int
2200vfs_hang_addrlist(mp, nep, argp)
2201	struct mount *mp;
2202	struct netexport *nep;
2203	struct export_args *argp;
2204{
2205	register struct netcred *np;
2206	register struct radix_node_head *rnh;
2207	register int i;
2208	struct radix_node *rn;
2209	struct sockaddr *saddr, *smask = 0;
2210	struct domain *dom;
2211	int error;
2212
2213	if (argp->ex_addrlen == 0) {
2214		if (mp->mnt_flag & MNT_DEFEXPORTED)
2215			return (EPERM);
2216		np = &nep->ne_defexported;
2217		np->netc_exflags = argp->ex_flags;
2218		np->netc_anon = argp->ex_anon;
2219		np->netc_anon.cr_ref = 1;
2220		mp->mnt_flag |= MNT_DEFEXPORTED;
2221		return (0);
2222	}
2223	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2224	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
2225	bzero((caddr_t) np, i);
2226	saddr = (struct sockaddr *) (np + 1);
2227	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
2228		goto out;
2229	if (saddr->sa_len > argp->ex_addrlen)
2230		saddr->sa_len = argp->ex_addrlen;
2231	if (argp->ex_masklen) {
2232		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
2233		error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
2234		if (error)
2235			goto out;
2236		if (smask->sa_len > argp->ex_masklen)
2237			smask->sa_len = argp->ex_masklen;
2238	}
2239	i = saddr->sa_family;
2240	if ((rnh = nep->ne_rtable[i]) == 0) {
2241		/*
2242		 * Seems silly to initialize every AF when most are not used,
2243		 * do so on demand here
2244		 */
2245		for (dom = domains; dom; dom = dom->dom_next)
2246			if (dom->dom_family == i && dom->dom_rtattach) {
2247				dom->dom_rtattach((void **) &nep->ne_rtable[i],
2248				    dom->dom_rtoffset);
2249				break;
2250			}
2251		if ((rnh = nep->ne_rtable[i]) == 0) {
2252			error = ENOBUFS;
2253			goto out;
2254		}
2255	}
2256	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
2257	    np->netc_rnodes);
2258	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
2259		error = EPERM;
2260		goto out;
2261	}
2262	np->netc_exflags = argp->ex_flags;
2263	np->netc_anon = argp->ex_anon;
2264	np->netc_anon.cr_ref = 1;
2265	return (0);
2266out:
2267	free(np, M_NETADDR);
2268	return (error);
2269}
2270
2271/* ARGSUSED */
2272static int
2273vfs_free_netcred(rn, w)
2274	struct radix_node *rn;
2275	void *w;
2276{
2277	register struct radix_node_head *rnh = (struct radix_node_head *) w;
2278
2279	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
2280	free((caddr_t) rn, M_NETADDR);
2281	return (0);
2282}
2283
2284/*
2285 * Free the net address hash lists that are hanging off the mount points.
2286 */
2287static void
2288vfs_free_addrlist(nep)
2289	struct netexport *nep;
2290{
2291	register int i;
2292	register struct radix_node_head *rnh;
2293
2294	for (i = 0; i <= AF_MAX; i++)
2295		if ((rnh = nep->ne_rtable[i])) {
2296			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
2297			    (caddr_t) rnh);
2298			free((caddr_t) rnh, M_RTABLE);
2299			nep->ne_rtable[i] = 0;
2300		}
2301}
2302
2303int
2304vfs_export(mp, nep, argp)
2305	struct mount *mp;
2306	struct netexport *nep;
2307	struct export_args *argp;
2308{
2309	int error;
2310
2311	if (argp->ex_flags & MNT_DELEXPORT) {
2312		if (mp->mnt_flag & MNT_EXPUBLIC) {
2313			vfs_setpublicfs(NULL, NULL, NULL);
2314			mp->mnt_flag &= ~MNT_EXPUBLIC;
2315		}
2316		vfs_free_addrlist(nep);
2317		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2318	}
2319	if (argp->ex_flags & MNT_EXPORTED) {
2320		if (argp->ex_flags & MNT_EXPUBLIC) {
2321			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
2322				return (error);
2323			mp->mnt_flag |= MNT_EXPUBLIC;
2324		}
2325		if ((error = vfs_hang_addrlist(mp, nep, argp)))
2326			return (error);
2327		mp->mnt_flag |= MNT_EXPORTED;
2328	}
2329	return (0);
2330}
2331
2332
2333/*
2334 * Set the publicly exported filesystem (WebNFS). Currently, only
2335 * one public filesystem is possible in the spec (RFC 2054 and 2055)
2336 */
2337int
2338vfs_setpublicfs(mp, nep, argp)
2339	struct mount *mp;
2340	struct netexport *nep;
2341	struct export_args *argp;
2342{
2343	int error;
2344	struct vnode *rvp;
2345	char *cp;
2346
2347	/*
2348	 * mp == NULL -> invalidate the current info, the FS is
2349	 * no longer exported. May be called from either vfs_export
2350	 * or unmount, so check if it hasn't already been done.
2351	 */
2352	if (mp == NULL) {
2353		if (nfs_pub.np_valid) {
2354			nfs_pub.np_valid = 0;
2355			if (nfs_pub.np_index != NULL) {
2356				FREE(nfs_pub.np_index, M_TEMP);
2357				nfs_pub.np_index = NULL;
2358			}
2359		}
2360		return (0);
2361	}
2362
2363	/*
2364	 * Only one allowed at a time.
2365	 */
2366	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
2367		return (EBUSY);
2368
2369	/*
2370	 * Get real filehandle for root of exported FS.
2371	 */
2372	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
2373	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
2374
2375	if ((error = VFS_ROOT(mp, &rvp)))
2376		return (error);
2377
2378	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
2379		return (error);
2380
2381	vput(rvp);
2382
2383	/*
2384	 * If an indexfile was specified, pull it in.
2385	 */
2386	if (argp->ex_indexfile != NULL) {
2387		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2388		    M_WAITOK);
2389		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2390		    MAXNAMLEN, (size_t *)0);
2391		if (!error) {
2392			/*
2393			 * Check for illegal filenames.
2394			 */
2395			for (cp = nfs_pub.np_index; *cp; cp++) {
2396				if (*cp == '/') {
2397					error = EINVAL;
2398					break;
2399				}
2400			}
2401		}
2402		if (error) {
2403			FREE(nfs_pub.np_index, M_TEMP);
2404			return (error);
2405		}
2406	}
2407
2408	nfs_pub.np_mount = mp;
2409	nfs_pub.np_valid = 1;
2410	return (0);
2411}
2412
2413struct netcred *
2414vfs_export_lookup(mp, nep, nam)
2415	register struct mount *mp;
2416	struct netexport *nep;
2417	struct sockaddr *nam;
2418{
2419	register struct netcred *np;
2420	register struct radix_node_head *rnh;
2421	struct sockaddr *saddr;
2422
2423	np = NULL;
2424	if (mp->mnt_flag & MNT_EXPORTED) {
2425		/*
2426		 * Lookup in the export list first.
2427		 */
2428		if (nam != NULL) {
2429			saddr = nam;
2430			rnh = nep->ne_rtable[saddr->sa_family];
2431			if (rnh != NULL) {
2432				np = (struct netcred *)
2433					(*rnh->rnh_matchaddr)((caddr_t)saddr,
2434							      rnh);
2435				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2436					np = NULL;
2437			}
2438		}
2439		/*
2440		 * If no address match, use the default if it exists.
2441		 */
2442		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2443			np = &nep->ne_defexported;
2444	}
2445	return (np);
2446}
2447
2448/*
2449 * perform msync on all vnodes under a mount point
2450 * the mount point must be locked.
2451 */
2452void
2453vfs_msync(struct mount *mp, int flags) {
2454	struct vnode *vp, *nvp;
2455	struct vm_object *obj;
2456	int anyio, tries;
2457
2458	tries = 5;
2459loop:
2460	anyio = 0;
2461	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
2462
2463		nvp = vp->v_mntvnodes.le_next;
2464
2465		if (vp->v_mount != mp) {
2466			goto loop;
2467		}
2468
2469		if (vp->v_flag & VXLOCK)	/* XXX: what if MNT_WAIT? */
2470			continue;
2471
2472		if (flags != MNT_WAIT) {
2473			obj = vp->v_object;
2474			if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0)
2475				continue;
2476			if (VOP_ISLOCKED(vp))
2477				continue;
2478		}
2479
2480		simple_lock(&vp->v_interlock);
2481		if (vp->v_object &&
2482		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
2483			if (!vget(vp,
2484				LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {
2485				if (vp->v_object) {
2486					vm_object_page_clean(vp->v_object, 0, 0, TRUE);
2487					anyio = 1;
2488				}
2489				vput(vp);
2490			}
2491		} else {
2492			simple_unlock(&vp->v_interlock);
2493		}
2494	}
2495	if (anyio && (--tries > 0))
2496		goto loop;
2497}
2498
2499/*
2500 * Create the VM object needed for VMIO and mmap support.  This
2501 * is done for all VREG files in the system.  Some filesystems might
2502 * afford the additional metadata buffering capability of the
2503 * VMIO code by making the device node be VMIO mode also.
2504 *
2505 * If !waslocked, must be called with interlock.
2506 */
2507int
2508vfs_object_create(vp, p, cred, waslocked)
2509	struct vnode *vp;
2510	struct proc *p;
2511	struct ucred *cred;
2512	int waslocked;
2513{
2514	struct vattr vat;
2515	vm_object_t object;
2516	int error = 0;
2517
2518	if ((vp->v_type != VREG) && (vp->v_type != VBLK)) {
2519		if (!waslocked)
2520			simple_unlock(&vp->v_interlock);
2521		return 0;
2522	}
2523
2524	if (!waslocked)
2525		vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, p);
2526
2527retry:
2528	if ((object = vp->v_object) == NULL) {
2529		if (vp->v_type == VREG) {
2530			if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
2531				goto retn;
2532			object = vnode_pager_alloc(vp,
2533				OFF_TO_IDX(round_page(vat.va_size)), 0, 0);
2534		} else if (major(vp->v_rdev) < nblkdev) {
2535			/*
2536			 * This simply allocates the biggest object possible
2537			 * for a VBLK vnode.  This should be fixed, but doesn't
2538			 * cause any problems (yet).
2539			 */
2540			object = vnode_pager_alloc(vp, INT_MAX, 0, 0);
2541		}
2542		object->ref_count--;
2543		vp->v_usecount--;
2544	} else {
2545		if (object->flags & OBJ_DEAD) {
2546			VOP_UNLOCK(vp, 0, p);
2547			tsleep(object, PVM, "vodead", 0);
2548			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2549			goto retry;
2550		}
2551	}
2552
2553	if (vp->v_object) {
2554		vp->v_flag |= VOBJBUF;
2555	}
2556
2557retn:
2558	if (!waslocked) {
2559		simple_lock(&vp->v_interlock);
2560		VOP_UNLOCK(vp, LK_INTERLOCK, p);
2561	}
2562
2563	return error;
2564}
2565
2566static void
2567vfree(vp)
2568	struct vnode *vp;
2569{
2570	int s;
2571
2572	s = splbio();
2573	simple_lock(&vnode_free_list_slock);
2574	if (vp->v_flag & VTBFREE) {
2575		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
2576		vp->v_flag &= ~VTBFREE;
2577	}
2578	if (vp->v_flag & VAGE) {
2579		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
2580	} else {
2581		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2582	}
2583	freevnodes++;
2584	simple_unlock(&vnode_free_list_slock);
2585	vp->v_flag &= ~VAGE;
2586	vp->v_flag |= VFREE;
2587	splx(s);
2588}
2589
2590void
2591vbusy(vp)
2592	struct vnode *vp;
2593{
2594	int s;
2595
2596	s = splbio();
2597	simple_lock(&vnode_free_list_slock);
2598	if (vp->v_flag & VTBFREE) {
2599		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
2600		vp->v_flag &= ~VTBFREE;
2601	} else {
2602		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
2603		freevnodes--;
2604	}
2605	simple_unlock(&vnode_free_list_slock);
2606	vp->v_flag &= ~(VFREE|VAGE);
2607	splx(s);
2608}
2609
2610/*
2611 * Record a process's interest in events which might happen to
2612 * a vnode.  Because poll uses the historic select-style interface
2613 * internally, this routine serves as both the ``check for any
2614 * pending events'' and the ``record my interest in future events''
2615 * functions.  (These are done together, while the lock is held,
2616 * to avoid race conditions.)
2617 */
2618int
2619vn_pollrecord(vp, p, events)
2620	struct vnode *vp;
2621	struct proc *p;
2622	short events;
2623{
2624	simple_lock(&vp->v_pollinfo.vpi_lock);
2625	if (vp->v_pollinfo.vpi_revents & events) {
2626		/*
2627		 * This leaves events we are not interested
2628		 * in available for the other process which
2629		 * which presumably had requested them
2630		 * (otherwise they would never have been
2631		 * recorded).
2632		 */
2633		events &= vp->v_pollinfo.vpi_revents;
2634		vp->v_pollinfo.vpi_revents &= ~events;
2635
2636		simple_unlock(&vp->v_pollinfo.vpi_lock);
2637		return events;
2638	}
2639	vp->v_pollinfo.vpi_events |= events;
2640	selrecord(p, &vp->v_pollinfo.vpi_selinfo);
2641	simple_unlock(&vp->v_pollinfo.vpi_lock);
2642	return 0;
2643}
2644
2645/*
2646 * Note the occurrence of an event.  If the VN_POLLEVENT macro is used,
2647 * it is possible for us to miss an event due to race conditions, but
2648 * that condition is expected to be rare, so for the moment it is the
2649 * preferred interface.
2650 */
2651void
2652vn_pollevent(vp, events)
2653	struct vnode *vp;
2654	short events;
2655{
2656	simple_lock(&vp->v_pollinfo.vpi_lock);
2657	if (vp->v_pollinfo.vpi_events & events) {
2658		/*
2659		 * We clear vpi_events so that we don't
2660		 * call selwakeup() twice if two events are
2661		 * posted before the polling process(es) is
2662		 * awakened.  This also ensures that we take at
2663		 * most one selwakeup() if the polling process
2664		 * is no longer interested.  However, it does
2665		 * mean that only one event can be noticed at
2666		 * a time.  (Perhaps we should only clear those
2667		 * event bits which we note?) XXX
2668		 */
2669		vp->v_pollinfo.vpi_events = 0;	/* &= ~events ??? */
2670		vp->v_pollinfo.vpi_revents |= events;
2671		selwakeup(&vp->v_pollinfo.vpi_selinfo);
2672	}
2673	simple_unlock(&vp->v_pollinfo.vpi_lock);
2674}
2675
2676/*
2677 * Wake up anyone polling on vp because it is being revoked.
2678 * This depends on dead_poll() returning POLLHUP for correct
2679 * behavior.
2680 */
2681void
2682vn_pollgone(vp)
2683	struct vnode *vp;
2684{
2685	simple_lock(&vp->v_pollinfo.vpi_lock);
2686	if (vp->v_pollinfo.vpi_events) {
2687		vp->v_pollinfo.vpi_events = 0;
2688		selwakeup(&vp->v_pollinfo.vpi_selinfo);
2689	}
2690	simple_unlock(&vp->v_pollinfo.vpi_lock);
2691}
2692
2693
2694
2695/*
2696 * Routine to create and manage a filesystem syncer vnode.
2697 */
2698#define sync_close ((int (*) __P((struct  vop_close_args *)))nullop)
2699int	sync_fsync __P((struct  vop_fsync_args *));
2700int	sync_inactive __P((struct  vop_inactive_args *));
2701int	sync_reclaim  __P((struct  vop_reclaim_args *));
2702#define sync_lock ((int (*) __P((struct  vop_lock_args *)))vop_nolock)
2703#define sync_unlock ((int (*) __P((struct  vop_unlock_args *)))vop_nounlock)
2704int	sync_print __P((struct vop_print_args *));
2705#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked)
2706
2707vop_t **sync_vnodeop_p;
2708struct vnodeopv_entry_desc sync_vnodeop_entries[] = {
2709	{ &vop_default_desc,	(vop_t *) vop_eopnotsupp },
2710	{ &vop_close_desc,	(vop_t *) sync_close },		/* close */
2711	{ &vop_fsync_desc,	(vop_t *) sync_fsync },		/* fsync */
2712	{ &vop_inactive_desc,	(vop_t *) sync_inactive },	/* inactive */
2713	{ &vop_reclaim_desc,	(vop_t *) sync_reclaim },	/* reclaim */
2714	{ &vop_lock_desc,	(vop_t *) sync_lock },		/* lock */
2715	{ &vop_unlock_desc,	(vop_t *) sync_unlock },	/* unlock */
2716	{ &vop_print_desc,	(vop_t *) sync_print },		/* print */
2717	{ &vop_islocked_desc,	(vop_t *) sync_islocked },	/* islocked */
2718	{ NULL, NULL }
2719};
2720struct vnodeopv_desc sync_vnodeop_opv_desc =
2721	{ &sync_vnodeop_p, sync_vnodeop_entries };
2722
2723VNODEOP_SET(sync_vnodeop_opv_desc);
2724
2725/*
2726 * Create a new filesystem syncer vnode for the specified mount point.
2727 */
2728int
2729vfs_allocate_syncvnode(mp)
2730	struct mount *mp;
2731{
2732	struct vnode *vp;
2733	static long start, incr, next;
2734	int error;
2735
2736	/* Allocate a new vnode */
2737	if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) {
2738		mp->mnt_syncer = NULL;
2739		return (error);
2740	}
2741	vp->v_type = VNON;
2742	/*
2743	 * Place the vnode onto the syncer worklist. We attempt to
2744	 * scatter them about on the list so that they will go off
2745	 * at evenly distributed times even if all the filesystems
2746	 * are mounted at once.
2747	 */
2748	next += incr;
2749	if (next == 0 || next > syncer_maxdelay) {
2750		start /= 2;
2751		incr /= 2;
2752		if (start == 0) {
2753			start = syncer_maxdelay / 2;
2754			incr = syncer_maxdelay;
2755		}
2756		next = start;
2757	}
2758	vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0);
2759	mp->mnt_syncer = vp;
2760	return (0);
2761}
2762
2763/*
2764 * Do a lazy sync of the filesystem.
2765 */
2766int
2767sync_fsync(ap)
2768	struct vop_fsync_args /* {
2769		struct vnode *a_vp;
2770		struct ucred *a_cred;
2771		int a_waitfor;
2772		struct proc *a_p;
2773	} */ *ap;
2774{
2775	struct vnode *syncvp = ap->a_vp;
2776	struct mount *mp = syncvp->v_mount;
2777	struct proc *p = ap->a_p;
2778	int asyncflag;
2779
2780	/*
2781	 * We only need to do something if this is a lazy evaluation.
2782	 */
2783	if (ap->a_waitfor != MNT_LAZY)
2784		return (0);
2785
2786	/*
2787	 * Move ourselves to the back of the sync list.
2788	 */
2789	vn_syncer_add_to_worklist(syncvp, syncdelay);
2790
2791	/*
2792	 * Walk the list of vnodes pushing all that are dirty and
2793	 * not already on the sync list.
2794	 */
2795	simple_lock(&mountlist_slock);
2796	if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) {
2797		simple_unlock(&mountlist_slock);
2798		return (0);
2799	}
2800	asyncflag = mp->mnt_flag & MNT_ASYNC;
2801	mp->mnt_flag &= ~MNT_ASYNC;
2802	vfs_msync(mp, MNT_NOWAIT);
2803	VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
2804	if (asyncflag)
2805		mp->mnt_flag |= MNT_ASYNC;
2806	vfs_unbusy(mp, p);
2807	return (0);
2808}
2809
2810/*
2811 * The syncer vnode is no referenced.
2812 */
2813int
2814sync_inactive(ap)
2815	struct vop_inactive_args /* {
2816		struct vnode *a_vp;
2817		struct proc *a_p;
2818	} */ *ap;
2819{
2820
2821	vgone(ap->a_vp);
2822	return (0);
2823}
2824
2825/*
2826 * The syncer vnode is no longer needed and is being decommissioned.
2827 */
2828int
2829sync_reclaim(ap)
2830	struct vop_reclaim_args /* {
2831		struct vnode *a_vp;
2832	} */ *ap;
2833{
2834	struct vnode *vp = ap->a_vp;
2835
2836	vp->v_mount->mnt_syncer = NULL;
2837	if (vp->v_flag & VONWORKLST) {
2838		LIST_REMOVE(vp, v_synclist);
2839		vp->v_flag &= ~VONWORKLST;
2840	}
2841
2842	return (0);
2843}
2844
2845/*
2846 * Print out a syncer vnode.
2847 */
2848int
2849sync_print(ap)
2850	struct vop_print_args /* {
2851		struct vnode *a_vp;
2852	} */ *ap;
2853{
2854	struct vnode *vp = ap->a_vp;
2855
2856	printf("syncer vnode");
2857	if (vp->v_vnlock != NULL)
2858		lockmgr_printinfo(vp->v_vnlock);
2859	printf("\n");
2860	return (0);
2861}
2862