vfs_subr.c revision 1.131
1/*	$OpenBSD: vfs_subr.c,v 1.131 2006/07/08 20:01:13 thib Exp $	*/
2/*	$NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $	*/
3
4/*
5 * Copyright (c) 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
38 */
39
40/*
41 * External virtual filesystem routines
42 */
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/proc.h>
47#include <sys/mount.h>
48#include <sys/time.h>
49#include <sys/fcntl.h>
50#include <sys/kernel.h>
51#include <sys/vnode.h>
52#include <sys/stat.h>
53#include <sys/namei.h>
54#include <sys/ucred.h>
55#include <sys/buf.h>
56#include <sys/errno.h>
57#include <sys/malloc.h>
58#include <sys/domain.h>
59#include <sys/mbuf.h>
60#include <sys/syscallargs.h>
61#include <sys/pool.h>
62
63#include <uvm/uvm_extern.h>
64#include <sys/sysctl.h>
65
66#include <miscfs/specfs/specdev.h>
67
68enum vtype iftovt_tab[16] = {
69	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
70	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
71};
72
73int	vttoif_tab[9] = {
74	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
75	S_IFSOCK, S_IFIFO, S_IFMT,
76};
77
78int doforce = 1;		/* 1 => permit forcible unmounting */
79int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
80int suid_clear = 1;		/* 1 => clear SUID / SGID on owner change */
81
82/*
83 * Insq/Remq for the vnode usage lists.
84 */
85#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
86#define	bufremvn(bp) {							\
87	LIST_REMOVE(bp, b_vnbufs);					\
88	LIST_NEXT(bp, b_vnbufs) = NOLIST;				\
89}
90
91struct freelst vnode_hold_list;	/* list of vnodes referencing buffers */
92struct freelst vnode_free_list;	/* vnode free list */
93
94struct mntlist mountlist;	/* mounted filesystem list */
95static struct simplelock mntid_slock;
96struct simplelock mntvnode_slock;
97struct simplelock vnode_free_list_slock;
98struct simplelock spechash_slock;
99
100void	vclean(struct vnode *, int, struct proc *);
101
102void insmntque(struct vnode *, struct mount *);
103int getdevvp(dev_t, struct vnode **, enum vtype);
104
105int vfs_hang_addrlist(struct mount *, struct netexport *,
106				  struct export_args *);
107int vfs_free_netcred(struct radix_node *, void *);
108void vfs_free_addrlist(struct netexport *);
109void vputonfreelist(struct vnode *);
110
111int vflush_vnode(struct vnode *, void *);
112
113#ifdef DEBUG
114void printlockedvnodes(void);
115#endif
116
117#define VN_KNOTE(vp, b) \
118	KNOTE((struct klist *)&vp->v_selectinfo.vsi_selinfo.si_note, (b))
119
120struct pool vnode_pool;
121
122/*
123 * Initialize the vnode management data structures.
124 */
125void
126vntblinit(void)
127{
128
129	pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodes",
130	    &pool_allocator_nointr);
131	simple_lock_init(&mntvnode_slock);
132	simple_lock_init(&mntid_slock);
133	simple_lock_init(&spechash_slock);
134	TAILQ_INIT(&vnode_hold_list);
135	TAILQ_INIT(&vnode_free_list);
136	simple_lock_init(&vnode_free_list_slock);
137	CIRCLEQ_INIT(&mountlist);
138	/*
139	 * Initialize the filesystem syncer.
140	 */
141	vn_initialize_syncerd();
142}
143
144/*
145 * Mark a mount point as busy. Used to synchronize access and to delay
146 * unmounting.
147 *
148 * Default behaviour is to attempt getting a READ lock and in case of an
149 * ongoing unmount, to wait for it to finish and then return failure.
150 */
151int
152vfs_busy(struct mount *mp, int flags)
153{
154	int rwflags = 0;
155
156	/* new mountpoints need their lock initialised */
157	if (mp->mnt_lock.rwl_name == NULL)
158		rw_init(&mp->mnt_lock, "vfslock");
159
160	if (flags & VB_WRITE)
161		rwflags |= RW_WRITE;
162	else
163		rwflags |= RW_READ;
164
165	if (flags & VB_WAIT)
166		rwflags |= RW_SLEEPFAIL;
167	else
168		rwflags |= RW_NOSLEEP;
169
170	if (rw_enter(&mp->mnt_lock, rwflags))
171		return (EBUSY);
172
173	return (0);
174}
175
176/*
177 * Free a busy file system
178 */
179void
180vfs_unbusy(struct mount *mp)
181{
182	rw_exit(&mp->mnt_lock);
183}
184
185int
186vfs_isbusy(struct mount *mp)
187{
188	if (RWLOCK_OWNER(&mp->mnt_lock) > 0)
189		return (1);
190	else
191		return (0);
192}
193
194/*
195 * Lookup a filesystem type, and if found allocate and initialize
196 * a mount structure for it.
197 *
198 * Devname is usually updated by mount(8) after booting.
199 */
200int
201vfs_rootmountalloc(char *fstypename, char *devname, struct mount **mpp)
202{
203	struct vfsconf *vfsp;
204	struct mount *mp;
205
206	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
207		if (!strcmp(vfsp->vfc_name, fstypename))
208			break;
209	if (vfsp == NULL)
210		return (ENODEV);
211	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
212	bzero((char *)mp, (u_long)sizeof(struct mount));
213	(void) vfs_busy(mp, VB_READ|VB_NOWAIT);
214	LIST_INIT(&mp->mnt_vnodelist);
215	mp->mnt_vfc = vfsp;
216	mp->mnt_op = vfsp->vfc_vfsops;
217	mp->mnt_flag = MNT_RDONLY;
218	mp->mnt_vnodecovered = NULLVP;
219	vfsp->vfc_refcount++;
220	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
221	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
222	mp->mnt_stat.f_mntonname[0] = '/';
223	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
224	*mpp = mp;
225 	return (0);
226 }
227
228/*
229 * Find an appropriate filesystem to use for the root. If a filesystem
230 * has not been preselected, walk through the list of known filesystems
231 * trying those that have mountroot routines, and try them until one
232 * works or we have tried them all.
233 */
234int
235vfs_mountroot(void)
236{
237	struct vfsconf *vfsp;
238	int error;
239
240	if (mountroot != NULL)
241		return ((*mountroot)());
242	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
243		if (vfsp->vfc_mountroot == NULL)
244			continue;
245		if ((error = (*vfsp->vfc_mountroot)()) == 0)
246			return (0);
247		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
248 	}
249	return (ENODEV);
250}
251
252/*
253 * Lookup a mount point by filesystem identifier.
254 */
255struct mount *
256vfs_getvfs(fsid_t *fsid)
257{
258	struct mount *mp;
259
260	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
261		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
262		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
263			return (mp);
264		}
265	}
266
267	return ((struct mount *)0);
268}
269
270
271/*
272 * Get a new unique fsid
273 */
274void
275vfs_getnewfsid(struct mount *mp)
276{
277	static u_short xxxfs_mntid;
278
279	fsid_t tfsid;
280	int mtype;
281
282	simple_lock(&mntid_slock);
283	mtype = mp->mnt_vfc->vfc_typenum;
284	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
285	mp->mnt_stat.f_fsid.val[1] = mtype;
286	if (xxxfs_mntid == 0)
287		++xxxfs_mntid;
288	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
289	tfsid.val[1] = mtype;
290	if (!CIRCLEQ_EMPTY(&mountlist)) {
291		while (vfs_getvfs(&tfsid)) {
292			tfsid.val[0]++;
293			xxxfs_mntid++;
294		}
295	}
296	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
297	simple_unlock(&mntid_slock);
298}
299
300/*
301 * Make a 'unique' number from a mount type name.
302 * Note that this is no longer used for ffs which
303 * now has an on-disk filesystem id.
304 */
305long
306makefstype(char *type)
307{
308	long rv;
309
310	for (rv = 0; *type; type++) {
311		rv <<= 2;
312		rv ^= *type;
313	}
314	return rv;
315}
316
317/*
318 * Set vnode attributes to VNOVAL
319 */
320void
321vattr_null(struct vattr *vap)
322{
323
324	vap->va_type = VNON;
325	/* XXX These next two used to be one line, but for a GCC bug. */
326	vap->va_size = VNOVAL;
327	vap->va_bytes = VNOVAL;
328	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
329		vap->va_fsid = vap->va_fileid =
330		vap->va_blocksize = vap->va_rdev =
331		vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
332		vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
333		vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
334		vap->va_flags = vap->va_gen = VNOVAL;
335	vap->va_vaflags = 0;
336}
337
338/*
339 * Routines having to do with the management of the vnode table.
340 */
341extern int (**dead_vnodeop_p)(void *);
342long numvnodes;
343
344/*
345 * Return the next vnode from the free list.
346 */
347int
348getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
349    struct vnode **vpp)
350{
351	struct proc *p = curproc;
352	struct freelst *listhd;
353	static int toggle;
354	struct vnode *vp;
355	int s;
356
357	/*
358	 * We must choose whether to allocate a new vnode or recycle an
359	 * existing one. The criterion for allocating a new one is that
360	 * the total number of vnodes is less than the number desired or
361	 * there are no vnodes on either free list. Generally we only
362	 * want to recycle vnodes that have no buffers associated with
363	 * them, so we look first on the vnode_free_list. If it is empty,
364	 * we next consider vnodes with referencing buffers on the
365	 * vnode_hold_list. The toggle ensures that half the time we
366	 * will use a buffer from the vnode_hold_list, and half the time
367	 * we will allocate a new one unless the list has grown to twice
368	 * the desired size. We are reticent to recycle vnodes from the
369	 * vnode_hold_list because we will lose the identity of all its
370	 * referencing buffers.
371	 */
372	toggle ^= 1;
373	if (numvnodes > 2 * desiredvnodes)
374		toggle = 0;
375
376	simple_lock(&vnode_free_list_slock);
377	s = splbio();
378	if ((numvnodes < desiredvnodes) ||
379	    ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) &&
380	    ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) {
381		splx(s);
382		simple_unlock(&vnode_free_list_slock);
383		vp = pool_get(&vnode_pool, PR_WAITOK);
384		bzero((char *)vp, sizeof *vp);
385		simple_lock_init(&vp->v_interlock);
386		numvnodes++;
387	} else {
388		for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
389		    vp = TAILQ_NEXT(vp, v_freelist)) {
390			if (simple_lock_try(&vp->v_interlock)) {
391				if (VOP_ISLOCKED(vp) == 0)
392					break;
393				else
394					simple_unlock(&vp->v_interlock);
395			}
396		}
397		/*
398		 * Unless this is a bad time of the month, at most
399		 * the first NCPUS items on the free list are
400		 * locked, so this is close enough to being empty.
401		 */
402		if (vp == NULL) {
403			splx(s);
404			simple_unlock(&vnode_free_list_slock);
405			tablefull("vnode");
406			*vpp = 0;
407			return (ENFILE);
408		}
409
410#ifdef DIAGNOSTIC
411		if (vp->v_usecount) {
412			vprint("free vnode", vp);
413			panic("free vnode isn't");
414		}
415#endif
416
417		TAILQ_REMOVE(listhd, vp, v_freelist);
418		vp->v_bioflag &= ~VBIOONFREELIST;
419		splx(s);
420
421		simple_unlock(&vnode_free_list_slock);
422		if (vp->v_type != VBAD)
423			vgonel(vp, p);
424		else
425			simple_unlock(&vp->v_interlock);
426#ifdef DIAGNOSTIC
427		if (vp->v_data) {
428			vprint("cleaned vnode", vp);
429			panic("cleaned vnode isn't");
430		}
431		s = splbio();
432		if (vp->v_numoutput)
433			panic("Clean vnode has pending I/O's");
434		splx(s);
435#endif
436		vp->v_flag = 0;
437		vp->v_socket = 0;
438	}
439	vp->v_type = VNON;
440	cache_purge(vp);
441	vp->v_tag = tag;
442	vp->v_op = vops;
443	insmntque(vp, mp);
444	*vpp = vp;
445	vp->v_usecount = 1;
446	vp->v_data = 0;
447	simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
448	return (0);
449}
450
451/*
452 * Move a vnode from one mount queue to another.
453 */
454void
455insmntque(struct vnode *vp, struct mount *mp)
456{
457	simple_lock(&mntvnode_slock);
458
459	/*
460	 * Delete from old mount point vnode list, if on one.
461	 */
462	if (vp->v_mount != NULL)
463		LIST_REMOVE(vp, v_mntvnodes);
464	/*
465	 * Insert into list of vnodes for the new mount point, if available.
466	 */
467	if ((vp->v_mount = mp) != NULL)
468		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
469
470	simple_unlock(&mntvnode_slock);
471}
472
473/*
474 * Create a vnode for a block device.
475 * Used for root filesystem, argdev, and swap areas.
476 * Also used for memory file system special devices.
477 */
478int
479bdevvp(dev_t dev, struct vnode **vpp)
480{
481	return (getdevvp(dev, vpp, VBLK));
482}
483
484/*
485 * Create a vnode for a character device.
486 * Used for console handling.
487 */
488int
489cdevvp(dev_t dev, struct vnode **vpp)
490{
491	return (getdevvp(dev, vpp, VCHR));
492}
493
494/*
495 * Create a vnode for a device.
496 * Used by bdevvp (block device) for root file system etc.,
497 * and by cdevvp (character device) for console.
498 */
499int
500getdevvp(dev_t dev, struct vnode **vpp, enum vtype type)
501{
502	struct vnode *vp;
503	struct vnode *nvp;
504	int error;
505
506	if (dev == NODEV) {
507		*vpp = NULLVP;
508		return (0);
509	}
510	error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
511	if (error) {
512		*vpp = NULLVP;
513		return (error);
514	}
515	vp = nvp;
516	vp->v_type = type;
517	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
518		vput(vp);
519		vp = nvp;
520	}
521	*vpp = vp;
522	return (0);
523}
524
525/*
526 * Check to see if the new vnode represents a special device
527 * for which we already have a vnode (either because of
528 * bdevvp() or because of a different vnode representing
529 * the same block device). If such an alias exists, deallocate
530 * the existing contents and return the aliased vnode. The
531 * caller is responsible for filling it with its new contents.
532 */
533struct vnode *
534checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp)
535{
536	struct proc *p = curproc;
537	struct vnode *vp;
538	struct vnode **vpp;
539
540	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
541		return (NULLVP);
542
543	vpp = &speclisth[SPECHASH(nvp_rdev)];
544loop:
545	simple_lock(&spechash_slock);
546	for (vp = *vpp; vp; vp = vp->v_specnext) {
547		simple_lock(&vp->v_interlock);
548		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) {
549			simple_unlock(&vp->v_interlock);
550			continue;
551		}
552		/*
553		 * Alias, but not in use, so flush it out.
554		 */
555		if (vp->v_usecount == 0) {
556			simple_unlock(&spechash_slock);
557			vgonel(vp, p);
558			goto loop;
559		}
560		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
561			simple_unlock(&spechash_slock);
562			goto loop;
563		}
564		break;
565	}
566
567	/*
568	 * Common case is actually in the if statement
569	 */
570	if (vp == NULL || !(vp->v_tag == VT_NON && vp->v_type == VBLK)) {
571		MALLOC(nvp->v_specinfo, struct specinfo *,
572			sizeof(struct specinfo), M_VNODE, M_WAITOK);
573		nvp->v_rdev = nvp_rdev;
574		nvp->v_hashchain = vpp;
575		nvp->v_specnext = *vpp;
576		nvp->v_specmountpoint = NULL;
577		nvp->v_speclockf = NULL;
578		bzero(nvp->v_specbitmap, sizeof(nvp->v_specbitmap));
579		simple_unlock(&spechash_slock);
580		*vpp = nvp;
581		if (vp != NULLVP) {
582			nvp->v_flag |= VALIASED;
583			vp->v_flag |= VALIASED;
584			vput(vp);
585		}
586		return (NULLVP);
587	}
588
589	/*
590	 * This code is the uncommon case. It is called in case
591	 * we found an alias that was VT_NON && vtype of VBLK
592	 * This means we found a block device that was created
593	 * using bdevvp.
594	 * An example of such a vnode is the root partition device vnode
595	 * created in ffs_mountroot.
596	 *
597	 * The vnodes created by bdevvp should not be aliased (why?).
598	 */
599
600	simple_unlock(&spechash_slock);
601	VOP_UNLOCK(vp, 0, p);
602	simple_lock(&vp->v_interlock);
603	vclean(vp, 0, p);
604	vp->v_op = nvp->v_op;
605	vp->v_tag = nvp->v_tag;
606	nvp->v_type = VNON;
607	insmntque(vp, mp);
608	return (vp);
609}
610
611/*
612 * Grab a particular vnode from the free list, increment its
613 * reference count and lock it. If the vnode lock bit is set,
614 * the vnode is being eliminated in vgone. In that case, we
615 * cannot grab it, so the process is awakened when the
616 * transition is completed, and an error code is returned to
617 * indicate that the vnode is no longer usable, possibly
618 * having been changed to a new file system type.
619 */
620int
621vget(struct vnode *vp, int flags, struct proc *p)
622{
623	int error, s, onfreelist;
624
625	/*
626	 * If the vnode is in the process of being cleaned out for
627	 * another use, we wait for the cleaning to finish and then
628	 * return failure. Cleaning is determined by checking that
629	 * the VXLOCK flag is set.
630	 */
631	if ((flags & LK_INTERLOCK) == 0) {
632		simple_lock(&vp->v_interlock);
633		flags |= LK_INTERLOCK;
634	}
635
636	if (vp->v_flag & VXLOCK) {
637		if (flags & LK_NOWAIT) {
638			simple_unlock(&vp->v_interlock);
639			return (EBUSY);
640		}
641
642 		vp->v_flag |= VXWANT;
643		ltsleep(vp, PINOD | PNORELOCK, "vget", 0, &vp->v_interlock);
644		return (ENOENT);
645 	}
646
647	onfreelist = vp->v_bioflag & VBIOONFREELIST;
648	if (vp->v_usecount == 0 && onfreelist) {
649		s = splbio();
650		simple_lock(&vnode_free_list_slock);
651		if (vp->v_holdcnt > 0)
652			TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
653		else
654			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
655		simple_unlock(&vnode_free_list_slock);
656		vp->v_bioflag &= ~VBIOONFREELIST;
657		splx(s);
658	}
659
660 	vp->v_usecount++;
661	if (flags & LK_TYPE_MASK) {
662		if ((error = vn_lock(vp, flags, p)) != 0) {
663			vp->v_usecount--;
664			if (vp->v_usecount == 0 && onfreelist)
665				vputonfreelist(vp);
666
667			simple_unlock(&vp->v_interlock);
668		}
669		return (error);
670	}
671
672	simple_unlock(&vp->v_interlock);
673
674	return (0);
675}
676
677
678#ifdef DIAGNOSTIC
679/*
680 * Vnode reference.
681 */
682void
683vref(struct vnode *vp)
684{
685	simple_lock(&vp->v_interlock);
686	if (vp->v_usecount == 0)
687		panic("vref used where vget required");
688	vp->v_usecount++;
689	simple_unlock(&vp->v_interlock);
690}
691#endif /* DIAGNOSTIC */
692
693void
694vputonfreelist(struct vnode *vp)
695{
696	int s;
697	struct freelst *lst;
698
699	s = splbio();
700#ifdef DIAGNOSTIC
701	if (vp->v_usecount != 0)
702		panic("Use count is not zero!");
703
704	if (vp->v_bioflag & VBIOONFREELIST) {
705		vprint("vnode already on free list: ", vp);
706		panic("vnode already on free list");
707	}
708#endif
709
710	vp->v_bioflag |= VBIOONFREELIST;
711
712	if (vp->v_holdcnt > 0)
713		lst = &vnode_hold_list;
714	else
715		lst = &vnode_free_list;
716
717	if (vp->v_type == VBAD)
718		TAILQ_INSERT_HEAD(lst, vp, v_freelist);
719	else
720		TAILQ_INSERT_TAIL(lst, vp, v_freelist);
721
722	splx(s);
723}
724
725/*
726 * vput(), just unlock and vrele()
727 */
728void
729vput(struct vnode *vp)
730{
731	struct proc *p = curproc;
732
733#ifdef DIAGNOSTIC
734	if (vp == NULL)
735		panic("vput: null vp");
736#endif
737	simple_lock(&vp->v_interlock);
738
739#ifdef DIAGNOSTIC
740	if (vp->v_usecount == 0) {
741		vprint("vput: bad ref count", vp);
742		panic("vput: ref cnt");
743	}
744#endif
745	vp->v_usecount--;
746	if (vp->v_usecount > 0) {
747		simple_unlock(&vp->v_interlock);
748		VOP_UNLOCK(vp, 0, p);
749		return;
750	}
751
752#ifdef DIAGNOSTIC
753	if (vp->v_writecount != 0) {
754		vprint("vput: bad writecount", vp);
755		panic("vput: v_writecount != 0");
756	}
757#endif
758	simple_unlock(&vp->v_interlock);
759
760	VOP_INACTIVE(vp, p);
761
762	simple_lock(&vp->v_interlock);
763
764	if (vp->v_usecount == 0 && !(vp->v_bioflag & VBIOONFREELIST))
765		vputonfreelist(vp);
766
767	simple_unlock(&vp->v_interlock);
768}
769
770/*
771 * Vnode release - use for active VNODES.
772 * If count drops to zero, call inactive routine and return to freelist.
773 */
774void
775vrele(struct vnode *vp)
776{
777	struct proc *p = curproc;
778
779#ifdef DIAGNOSTIC
780	if (vp == NULL)
781		panic("vrele: null vp");
782#endif
783	simple_lock(&vp->v_interlock);
784#ifdef DIAGNOSTIC
785	if (vp->v_usecount == 0) {
786		vprint("vrele: bad ref count", vp);
787		panic("vrele: ref cnt");
788	}
789#endif
790	vp->v_usecount--;
791	if (vp->v_usecount > 0) {
792		simple_unlock(&vp->v_interlock);
793		return;
794	}
795
796#ifdef DIAGNOSTIC
797	if (vp->v_writecount != 0) {
798		vprint("vrele: bad writecount", vp);
799		panic("vrele: v_writecount != 0");
800	}
801#endif
802
803	if (vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p)) {
804#ifdef DIAGNOSTIC
805		vprint("vrele: cannot lock", vp);
806#endif
807		return;
808	}
809
810	VOP_INACTIVE(vp, p);
811
812	simple_lock(&vp->v_interlock);
813
814	if (vp->v_usecount == 0 && !(vp->v_bioflag & VBIOONFREELIST))
815		vputonfreelist(vp);
816
817	simple_unlock(&vp->v_interlock);
818}
819
820void vhold(struct vnode *vp);
821
822/*
823 * Page or buffer structure gets a reference.
824 */
825void
826vhold(struct vnode *vp)
827{
828	/*
829	 * If it is on the freelist and the hold count is currently
830	 * zero, move it to the hold list.
831	 */
832  	simple_lock(&vp->v_interlock);
833	if ((vp->v_bioflag & VBIOONFREELIST) &&
834	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
835		simple_lock(&vnode_free_list_slock);
836		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
837		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
838		simple_unlock(&vnode_free_list_slock);
839	}
840	vp->v_holdcnt++;
841	simple_unlock(&vp->v_interlock);
842}
843
844/*
845 * Remove any vnodes in the vnode table belonging to mount point mp.
846 *
847 * If MNT_NOFORCE is specified, there should not be any active ones,
848 * return error if any are found (nb: this is a user error, not a
849 * system error). If MNT_FORCE is specified, detach any active vnodes
850 * that are found.
851 */
852#ifdef DEBUG
853int busyprt = 0;	/* print out busy vnodes */
854struct ctldebug debug1 = { "busyprt", &busyprt };
855#endif
856
857int
858vfs_mount_foreach_vnode(struct mount *mp,
859    int (*func)(struct vnode *, void *), void *arg) {
860	struct vnode *vp, *nvp;
861	int error = 0;
862
863	simple_lock(&mntvnode_slock);
864loop:
865	for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) {
866		if (vp->v_mount != mp)
867			goto loop;
868		nvp = LIST_NEXT(vp, v_mntvnodes);
869		simple_lock(&vp->v_interlock);
870		simple_unlock(&mntvnode_slock);
871
872		error = func(vp, arg);
873
874		simple_lock(&mntvnode_slock);
875
876		if (error != 0)
877			break;
878	}
879	simple_unlock(&mntvnode_slock);
880
881	return (error);
882}
883
884struct vflush_args {
885	struct vnode *skipvp;
886	int busy;
887	int flags;
888};
889
890int
891vflush_vnode(struct vnode *vp, void *arg) {
892	struct vflush_args *va = arg;
893	struct proc *p = curproc;
894
895	if (vp == va->skipvp) {
896		simple_unlock(&vp->v_interlock);
897		return (0);
898	}
899
900	if ((va->flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
901		simple_unlock(&vp->v_interlock);
902		return (0);
903	}
904
905	/*
906	 * If WRITECLOSE is set, only flush out regular file
907	 * vnodes open for writing.
908	 */
909	if ((va->flags & WRITECLOSE) &&
910	    (vp->v_writecount == 0 || vp->v_type != VREG)) {
911		simple_unlock(&vp->v_interlock);
912		return (0);
913	}
914
915	/*
916	 * With v_usecount == 0, all we need to do is clear
917	 * out the vnode data structures and we are done.
918	 */
919	if (vp->v_usecount == 0) {
920		vgonel(vp, p);
921		return (0);
922	}
923
924	/*
925	 * If FORCECLOSE is set, forcibly close the vnode.
926	 * For block or character devices, revert to an
927	 * anonymous device. For all other files, just kill them.
928	 */
929	if (va->flags & FORCECLOSE) {
930		if (vp->v_type != VBLK && vp->v_type != VCHR) {
931			vgonel(vp, p);
932		} else {
933			vclean(vp, 0, p);
934			vp->v_op = spec_vnodeop_p;
935			insmntque(vp, (struct mount *)0);
936		}
937		return (0);
938	}
939
940#ifdef DEBUG
941	if (busyprt)
942		vprint("vflush: busy vnode", vp);
943#endif
944	simple_unlock(&vp->v_interlock);
945	va->busy++;
946	return (0);
947}
948
949int
950vflush(struct mount *mp, struct vnode *skipvp, int flags)
951{
952	struct vflush_args va;
953	va.skipvp = skipvp;
954	va.busy = 0;
955	va.flags = flags;
956
957	vfs_mount_foreach_vnode(mp, vflush_vnode, &va);
958
959	if (va.busy)
960		return (EBUSY);
961	return (0);
962}
963
964/*
965 * Disassociate the underlying file system from a vnode.
966 * The vnode interlock is held on entry.
967 */
968void
969vclean(struct vnode *vp, int flags, struct proc *p)
970{
971	int active;
972
973	/*
974	 * Check to see if the vnode is in use.
975	 * If so we have to reference it before we clean it out
976	 * so that its count cannot fall to zero and generate a
977	 * race against ourselves to recycle it.
978	 */
979	if ((active = vp->v_usecount) != 0)
980		vp->v_usecount++;
981
982	/*
983	 * Prevent the vnode from being recycled or
984	 * brought into use while we clean it out.
985	 */
986	if (vp->v_flag & VXLOCK)
987		panic("vclean: deadlock");
988	vp->v_flag |= VXLOCK;
989	/*
990	 * Even if the count is zero, the VOP_INACTIVE routine may still
991	 * have the object locked while it cleans it out. The VOP_LOCK
992	 * ensures that the VOP_INACTIVE routine is done with its work.
993	 * For active vnodes, it ensures that no other activity can
994	 * occur while the underlying object is being cleaned out.
995	 */
996	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
997
998	/*
999	 * Clean out any VM data associated with the vnode.
1000	 */
1001	uvm_vnp_terminate(vp);
1002	/*
1003	 * Clean out any buffers associated with the vnode.
1004	 */
1005	if (flags & DOCLOSE)
1006		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1007	/*
1008	 * If purging an active vnode, it must be closed and
1009	 * deactivated before being reclaimed. Note that the
1010	 * VOP_INACTIVE will unlock the vnode
1011	 */
1012	if (active) {
1013		if (flags & DOCLOSE)
1014			VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
1015		VOP_INACTIVE(vp, p);
1016	} else {
1017		/*
1018		 * Any other processes trying to obtain this lock must first
1019		 * wait for VXLOCK to clear, then call the new lock operation.
1020		 */
1021		VOP_UNLOCK(vp, 0, p);
1022	}
1023
1024	/*
1025	 * Reclaim the vnode.
1026	 */
1027	if (VOP_RECLAIM(vp, p))
1028		panic("vclean: cannot reclaim");
1029	if (active) {
1030		simple_lock(&vp->v_interlock);
1031
1032		vp->v_usecount--;
1033		if (vp->v_usecount == 0) {
1034			if (vp->v_holdcnt > 0)
1035				panic("vclean: not clean");
1036			vputonfreelist(vp);
1037		}
1038
1039		simple_unlock(&vp->v_interlock);
1040	}
1041	cache_purge(vp);
1042
1043	/*
1044	 * Done with purge, notify sleepers of the grim news.
1045	 */
1046	vp->v_op = dead_vnodeop_p;
1047	simple_lock(&vp->v_selectinfo.vsi_lock);
1048	VN_KNOTE(vp, NOTE_REVOKE);
1049	simple_unlock(&vp->v_selectinfo.vsi_lock);
1050	vp->v_tag = VT_NON;
1051	vp->v_flag &= ~VXLOCK;
1052#ifdef VFSDEBUG
1053	vp->v_flag &= ~VLOCKSWORK;
1054#endif
1055	if (vp->v_flag & VXWANT) {
1056		vp->v_flag &= ~VXWANT;
1057		wakeup(vp);
1058	}
1059}
1060
1061/*
1062 * Recycle an unused vnode to the front of the free list.
1063 * Release the passed interlock if the vnode will be recycled.
1064 */
1065int
1066vrecycle(struct vnode *vp, struct simplelock *inter_lkp, struct proc *p)
1067{
1068	simple_lock(&vp->v_interlock);
1069	if (vp->v_usecount == 0) {
1070		if (inter_lkp)
1071			simple_unlock(inter_lkp);
1072		vgonel(vp, p);
1073		return (1);
1074	}
1075	simple_unlock(&vp->v_interlock);
1076	return (0);
1077}
1078
1079/*
1080 * Eliminate all activity associated with a vnode
1081 * in preparation for reuse.
1082 */
1083void
1084vgone(struct vnode *vp)
1085{
1086	struct proc *p = curproc;
1087
1088	simple_lock (&vp->v_interlock);
1089	vgonel(vp, p);
1090}
1091
1092/*
1093 * vgone, with the vp interlock held.
1094 */
1095void
1096vgonel(struct vnode *vp, struct proc *p)
1097{
1098	struct vnode *vq;
1099	struct vnode *vx;
1100	struct mount *mp;
1101	int flags;
1102
1103	/*
1104	 * If a vgone (or vclean) is already in progress,
1105	 * wait until it is done and return.
1106	 */
1107	if (vp->v_flag & VXLOCK) {
1108		vp->v_flag |= VXWANT;
1109		ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
1110		return;
1111	}
1112
1113	/*
1114	 * Clean out the filesystem specific data.
1115	 */
1116	vclean(vp, DOCLOSE, p);
1117	/*
1118	 * Delete from old mount point vnode list, if on one.
1119	 */
1120	if (vp->v_mount != NULL)
1121		insmntque(vp, (struct mount *)0);
1122	/*
1123	 * If special device, remove it from special device alias list
1124	 * if it is on one.
1125	 */
1126	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1127		simple_lock(&spechash_slock);
1128		if (*vp->v_hashchain == vp) {
1129			*vp->v_hashchain = vp->v_specnext;
1130		} else {
1131			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1132				if (vq->v_specnext != vp)
1133					continue;
1134				vq->v_specnext = vp->v_specnext;
1135				break;
1136			}
1137			if (vq == NULL)
1138				panic("missing bdev");
1139		}
1140		if (vp->v_flag & VALIASED) {
1141			vx = NULL;
1142			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1143				if (vq->v_rdev != vp->v_rdev ||
1144				    vq->v_type != vp->v_type)
1145					continue;
1146				if (vx)
1147					break;
1148				vx = vq;
1149			}
1150			if (vx == NULL)
1151				panic("missing alias");
1152			if (vq == NULL)
1153				vx->v_flag &= ~VALIASED;
1154			vp->v_flag &= ~VALIASED;
1155		}
1156		simple_unlock(&spechash_slock);
1157
1158		/*
1159		 * If we have a mount point associated with the vnode, we must
1160		 * flush it out now, as to not leave a dangling zombie mount
1161		 * point laying around in VFS.
1162		 */
1163		mp = vp->v_specmountpoint;
1164		if (mp != NULL) {
1165			if (!vfs_busy(mp, VB_WRITE|VB_WAIT)) {
1166				flags = MNT_FORCE | MNT_DOOMED;
1167				dounmount(mp, flags, p, NULL);
1168			}
1169		}
1170
1171		FREE(vp->v_specinfo, M_VNODE);
1172		vp->v_specinfo = NULL;
1173	}
1174	/*
1175	 * If it is on the freelist and not already at the head,
1176	 * move it to the head of the list.
1177	 */
1178	vp->v_type = VBAD;
1179
1180	/*
1181	 * Move onto the free list, unless we were called from
1182	 * getnewvnode and we're not on any free list
1183	 */
1184	if (vp->v_usecount == 0 &&
1185	    (vp->v_bioflag & VBIOONFREELIST)) {
1186		int s;
1187
1188		simple_lock(&vnode_free_list_slock);
1189		s = splbio();
1190
1191		if (vp->v_holdcnt > 0)
1192			panic("vgonel: not clean");
1193
1194		if (TAILQ_FIRST(&vnode_free_list) != vp) {
1195			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1196			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1197		}
1198		splx(s);
1199		simple_unlock(&vnode_free_list_slock);
1200	}
1201}
1202
1203/*
1204 * Lookup a vnode by device number.
1205 */
1206int
1207vfinddev(dev_t dev, enum vtype type, struct vnode **vpp)
1208{
1209	struct vnode *vp;
1210	int rc =0;
1211
1212	simple_lock(&spechash_slock);
1213	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1214		if (dev != vp->v_rdev || type != vp->v_type)
1215			continue;
1216		*vpp = vp;
1217		rc = 1;
1218		break;
1219	}
1220	simple_unlock(&spechash_slock);
1221	return (rc);
1222}
1223
1224/*
1225 * Revoke all the vnodes corresponding to the specified minor number
1226 * range (endpoints inclusive) of the specified major.
1227 */
1228void
1229vdevgone(int maj, int minl, int minh, enum vtype type)
1230{
1231	struct vnode *vp;
1232	int mn;
1233
1234	for (mn = minl; mn <= minh; mn++)
1235		if (vfinddev(makedev(maj, mn), type, &vp))
1236			VOP_REVOKE(vp, REVOKEALL);
1237}
1238
1239/*
1240 * Calculate the total number of references to a special device.
1241 */
1242int
1243vcount(struct vnode *vp)
1244{
1245	struct vnode *vq, *vnext;
1246	int count;
1247
1248loop:
1249	if ((vp->v_flag & VALIASED) == 0)
1250		return (vp->v_usecount);
1251	simple_lock(&spechash_slock);
1252	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1253		vnext = vq->v_specnext;
1254		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1255			continue;
1256		/*
1257		 * Alias, but not in use, so flush it out.
1258		 */
1259		if (vq->v_usecount == 0 && vq != vp) {
1260			simple_unlock(&spechash_slock);
1261			vgone(vq);
1262			goto loop;
1263		}
1264		count += vq->v_usecount;
1265	}
1266	simple_unlock(&spechash_slock);
1267	return (count);
1268}
1269
1270#ifdef DIAGNOSTIC
1271/*
1272 * Print out a description of a vnode.
1273 */
1274static char *typename[] =
1275   { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1276
1277void
1278vprint(char *label, struct vnode *vp)
1279{
1280	char buf[64];
1281
1282	if (label != NULL)
1283		printf("%s: ", label);
1284	printf("%p, type %s, use %u, write %u, hold %u,",
1285		vp, typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1286		vp->v_holdcnt);
1287	buf[0] = '\0';
1288	if (vp->v_flag & VROOT)
1289		strlcat(buf, "|VROOT", sizeof buf);
1290	if (vp->v_flag & VTEXT)
1291		strlcat(buf, "|VTEXT", sizeof buf);
1292	if (vp->v_flag & VSYSTEM)
1293		strlcat(buf, "|VSYSTEM", sizeof buf);
1294	if (vp->v_flag & VXLOCK)
1295		strlcat(buf, "|VXLOCK", sizeof buf);
1296	if (vp->v_flag & VXWANT)
1297		strlcat(buf, "|VXWANT", sizeof buf);
1298	if (vp->v_bioflag & VBIOWAIT)
1299		strlcat(buf, "|VBIOWAIT", sizeof buf);
1300	if (vp->v_bioflag & VBIOONFREELIST)
1301		strlcat(buf, "|VBIOONFREELIST", sizeof buf);
1302	if (vp->v_bioflag & VBIOONSYNCLIST)
1303		strlcat(buf, "|VBIOONSYNCLIST", sizeof buf);
1304	if (vp->v_flag & VALIASED)
1305		strlcat(buf, "|VALIASED", sizeof buf);
1306	if (buf[0] != '\0')
1307		printf(" flags (%s)", &buf[1]);
1308	if (vp->v_data == NULL) {
1309		printf("\n");
1310	} else {
1311		printf("\n\t");
1312		VOP_PRINT(vp);
1313	}
1314}
1315#endif /* DIAGNOSTIC */
1316
1317#ifdef DEBUG
1318/*
1319 * List all of the locked vnodes in the system.
1320 * Called when debugging the kernel.
1321 */
1322void
1323printlockedvnodes(void)
1324{
1325	struct mount *mp, *nmp;
1326	struct vnode *vp;
1327
1328	printf("Locked vnodes\n");
1329
1330	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1331	    mp = nmp) {
1332		if (vfs_busy(mp, VB_READ|VB_NOWAIT)) {
1333			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1334			continue;
1335		}
1336		LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1337			if (VOP_ISLOCKED(vp))
1338				vprint((char *)0, vp);
1339		}
1340		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1341		vfs_unbusy(mp);
1342 	}
1343
1344}
1345#endif
1346
1347/*
1348 * Top level filesystem related information gathering.
1349 */
1350int
1351vfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1352    size_t newlen, struct proc *p)
1353{
1354	struct vfsconf *vfsp;
1355
1356	/* all sysctl names at this level are at least name and field */
1357	if (namelen < 2)
1358		return (ENOTDIR);		/* overloaded */
1359
1360	if (name[0] != VFS_GENERIC) {
1361		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1362			if (vfsp->vfc_typenum == name[0])
1363				break;
1364
1365		if (vfsp == NULL)
1366			return (EOPNOTSUPP);
1367
1368		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1369		    oldp, oldlenp, newp, newlen, p));
1370	}
1371
1372	switch (name[1]) {
1373	case VFS_MAXTYPENUM:
1374		return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1375
1376	case VFS_CONF:
1377		if (namelen < 3)
1378			return (ENOTDIR);	/* overloaded */
1379
1380		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1381			if (vfsp->vfc_typenum == name[2])
1382				break;
1383
1384		if (vfsp == NULL)
1385			return (EOPNOTSUPP);
1386
1387		return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
1388		    sizeof(struct vfsconf)));
1389	}
1390
1391	return (EOPNOTSUPP);
1392}
1393
1394int kinfo_vdebug = 1;
1395int kinfo_vgetfailed;
1396#define KINFO_VNODESLOP	10
1397/*
1398 * Dump vnode list (via sysctl).
1399 * Copyout address of vnode followed by vnode.
1400 */
1401/* ARGSUSED */
1402int
1403sysctl_vnode(char *where, size_t *sizep, struct proc *p)
1404{
1405	struct mount *mp, *nmp;
1406	struct vnode *vp, *nvp;
1407	char *bp = where, *savebp;
1408	char *ewhere;
1409	int error;
1410
1411	if (where == NULL) {
1412		*sizep = (numvnodes + KINFO_VNODESLOP) * sizeof(struct e_vnode);
1413		return (0);
1414	}
1415	ewhere = where + *sizep;
1416
1417	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1418	    mp = nmp) {
1419		if (vfs_busy(mp, VB_READ|VB_NOWAIT)) {
1420			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1421			continue;
1422		}
1423		savebp = bp;
1424again:
1425		for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL;
1426		    vp = nvp) {
1427			/*
1428			 * Check that the vp is still associated with
1429			 * this filesystem.  RACE: could have been
1430			 * recycled onto the same filesystem.
1431			 */
1432			if (vp->v_mount != mp) {
1433				simple_unlock(&mntvnode_slock);
1434				if (kinfo_vdebug)
1435					printf("kinfo: vp changed\n");
1436				bp = savebp;
1437				goto again;
1438			}
1439			nvp = LIST_NEXT(vp, v_mntvnodes);
1440			if (bp + sizeof(struct e_vnode) > ewhere) {
1441				simple_unlock(&mntvnode_slock);
1442				*sizep = bp - where;
1443				vfs_unbusy(mp);
1444				return (ENOMEM);
1445			}
1446			if ((error = copyout(&vp,
1447			    &((struct e_vnode *)bp)->vptr,
1448			    sizeof(struct vnode *))) ||
1449			   (error = copyout(vp,
1450			    &((struct e_vnode *)bp)->vnode,
1451			    sizeof(struct vnode)))) {
1452				vfs_unbusy(mp);
1453				return (error);
1454			}
1455			bp += sizeof(struct e_vnode);
1456			simple_lock(&mntvnode_slock);
1457		}
1458
1459		simple_unlock(&mntvnode_slock);
1460		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1461		vfs_unbusy(mp);
1462	}
1463
1464	*sizep = bp - where;
1465
1466	return (0);
1467}
1468
1469/*
1470 * Check to see if a filesystem is mounted on a block device.
1471 */
1472int
1473vfs_mountedon(struct vnode *vp)
1474{
1475	struct vnode *vq;
1476	int error = 0;
1477
1478 	if (vp->v_specmountpoint != NULL)
1479		return (EBUSY);
1480	if (vp->v_flag & VALIASED) {
1481		simple_lock(&spechash_slock);
1482		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1483			if (vq->v_rdev != vp->v_rdev ||
1484			    vq->v_type != vp->v_type)
1485				continue;
1486			if (vq->v_specmountpoint != NULL) {
1487				error = EBUSY;
1488				break;
1489			}
1490 		}
1491		simple_unlock(&spechash_slock);
1492	}
1493	return (error);
1494}
1495
1496/*
1497 * Build hash lists of net addresses and hang them off the mount point.
1498 * Called by ufs_mount() to set up the lists of export addresses.
1499 */
1500int
1501vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
1502    struct export_args *argp)
1503{
1504	struct netcred *np;
1505	struct radix_node_head *rnh;
1506	int i;
1507	struct radix_node *rn;
1508	struct sockaddr *saddr, *smask = 0;
1509	struct domain *dom;
1510	int error;
1511
1512	if (argp->ex_addrlen == 0) {
1513		if (mp->mnt_flag & MNT_DEFEXPORTED)
1514			return (EPERM);
1515		np = &nep->ne_defexported;
1516		np->netc_exflags = argp->ex_flags;
1517		np->netc_anon = argp->ex_anon;
1518		np->netc_anon.cr_ref = 1;
1519		mp->mnt_flag |= MNT_DEFEXPORTED;
1520		return (0);
1521	}
1522	if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN ||
1523	    argp->ex_addrlen < 0 || argp->ex_masklen < 0)
1524		return (EINVAL);
1525	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1526	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1527	bzero(np, i);
1528	saddr = (struct sockaddr *)(np + 1);
1529	error = copyin(argp->ex_addr, saddr, argp->ex_addrlen);
1530	if (error)
1531		goto out;
1532	if (saddr->sa_len > argp->ex_addrlen)
1533		saddr->sa_len = argp->ex_addrlen;
1534	if (argp->ex_masklen) {
1535		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1536		error = copyin(argp->ex_mask, smask, argp->ex_masklen);
1537		if (error)
1538			goto out;
1539		if (smask->sa_len > argp->ex_masklen)
1540			smask->sa_len = argp->ex_masklen;
1541	}
1542	i = saddr->sa_family;
1543	if (i < 0 || i > AF_MAX) {
1544		error = EINVAL;
1545		goto out;
1546	}
1547	if ((rnh = nep->ne_rtable[i]) == 0) {
1548		/*
1549		 * Seems silly to initialize every AF when most are not
1550		 * used, do so on demand here
1551		 */
1552		for (dom = domains; dom; dom = dom->dom_next)
1553			if (dom->dom_family == i && dom->dom_rtattach) {
1554				dom->dom_rtattach((void **)&nep->ne_rtable[i],
1555					dom->dom_rtoffset);
1556				break;
1557			}
1558		if ((rnh = nep->ne_rtable[i]) == 0) {
1559			error = ENOBUFS;
1560			goto out;
1561		}
1562	}
1563	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1564		np->netc_rnodes);
1565	if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1566		error = EPERM;
1567		goto out;
1568	}
1569	np->netc_exflags = argp->ex_flags;
1570	np->netc_anon = argp->ex_anon;
1571	np->netc_anon.cr_ref = 1;
1572	return (0);
1573out:
1574	free(np, M_NETADDR);
1575	return (error);
1576}
1577
1578/* ARGSUSED */
1579int
1580vfs_free_netcred(struct radix_node *rn, void *w)
1581{
1582	struct radix_node_head *rnh = (struct radix_node_head *)w;
1583
1584	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh, NULL);
1585	free(rn, M_NETADDR);
1586	return (0);
1587}
1588
1589/*
1590 * Free the net address hash lists that are hanging off the mount points.
1591 */
1592void
1593vfs_free_addrlist(struct netexport *nep)
1594{
1595	int i;
1596	struct radix_node_head *rnh;
1597
1598	for (i = 0; i <= AF_MAX; i++)
1599		if ((rnh = nep->ne_rtable[i]) != NULL) {
1600			(*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1601			free(rnh, M_RTABLE);
1602			nep->ne_rtable[i] = 0;
1603		}
1604}
1605
1606int
1607vfs_export(struct mount *mp, struct netexport *nep, struct export_args *argp)
1608{
1609	int error;
1610
1611	if (argp->ex_flags & MNT_DELEXPORT) {
1612		vfs_free_addrlist(nep);
1613		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1614	}
1615	if (argp->ex_flags & MNT_EXPORTED) {
1616		if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1617			return (error);
1618		mp->mnt_flag |= MNT_EXPORTED;
1619	}
1620	return (0);
1621}
1622
1623struct netcred *
1624vfs_export_lookup(struct mount *mp, struct netexport *nep, struct mbuf *nam)
1625{
1626	struct netcred *np;
1627	struct radix_node_head *rnh;
1628	struct sockaddr *saddr;
1629
1630	np = NULL;
1631	if (mp->mnt_flag & MNT_EXPORTED) {
1632		/*
1633		 * Lookup in the export list first.
1634		 */
1635		if (nam != NULL) {
1636			saddr = mtod(nam, struct sockaddr *);
1637			rnh = nep->ne_rtable[saddr->sa_family];
1638			if (rnh != NULL) {
1639				np = (struct netcred *)
1640					(*rnh->rnh_matchaddr)((caddr_t)saddr,
1641					    rnh);
1642				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1643					np = NULL;
1644			}
1645		}
1646		/*
1647		 * If no address match, use the default if it exists.
1648		 */
1649		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1650			np = &nep->ne_defexported;
1651	}
1652	return (np);
1653}
1654
1655/*
1656 * Do the usual access checking.
1657 * file_mode, uid and gid are from the vnode in question,
1658 * while acc_mode and cred are from the VOP_ACCESS parameter list
1659 */
1660int
1661vaccess(mode_t file_mode, uid_t uid, gid_t gid, mode_t acc_mode,
1662    struct ucred *cred)
1663{
1664	mode_t mask;
1665
1666	/* User id 0 always gets access. */
1667	if (cred->cr_uid == 0)
1668		return 0;
1669
1670	mask = 0;
1671
1672	/* Otherwise, check the owner. */
1673	if (cred->cr_uid == uid) {
1674		if (acc_mode & VEXEC)
1675			mask |= S_IXUSR;
1676		if (acc_mode & VREAD)
1677			mask |= S_IRUSR;
1678		if (acc_mode & VWRITE)
1679			mask |= S_IWUSR;
1680		return (file_mode & mask) == mask ? 0 : EACCES;
1681	}
1682
1683	/* Otherwise, check the groups. */
1684	if (cred->cr_gid == gid || groupmember(gid, cred)) {
1685		if (acc_mode & VEXEC)
1686			mask |= S_IXGRP;
1687		if (acc_mode & VREAD)
1688			mask |= S_IRGRP;
1689		if (acc_mode & VWRITE)
1690			mask |= S_IWGRP;
1691		return (file_mode & mask) == mask ? 0 : EACCES;
1692	}
1693
1694	/* Otherwise, check everyone else. */
1695	if (acc_mode & VEXEC)
1696		mask |= S_IXOTH;
1697	if (acc_mode & VREAD)
1698		mask |= S_IROTH;
1699	if (acc_mode & VWRITE)
1700		mask |= S_IWOTH;
1701	return (file_mode & mask) == mask ? 0 : EACCES;
1702}
1703
1704/*
1705 * Unmount all file systems.
1706 * We traverse the list in reverse order under the assumption that doing so
1707 * will avoid needing to worry about dependencies.
1708 */
1709void
1710vfs_unmountall(void)
1711{
1712	struct mount *mp, *nmp;
1713	int allerror, error, again = 1;
1714
1715 retry:
1716	allerror = 0;
1717	for (mp = CIRCLEQ_LAST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1718	    mp = nmp) {
1719		nmp = CIRCLEQ_PREV(mp, mnt_list);
1720		if ((vfs_busy(mp, VB_WRITE|VB_NOWAIT)) != 0)
1721			continue;
1722		if ((error = dounmount(mp, MNT_FORCE, curproc, NULL)) != 0) {
1723			printf("unmount of %s failed with error %d\n",
1724			    mp->mnt_stat.f_mntonname, error);
1725			allerror = 1;
1726		}
1727	}
1728
1729	if (allerror) {
1730		printf("WARNING: some file systems would not unmount\n");
1731		if (again) {
1732			printf("retrying\n");
1733			again = 0;
1734			goto retry;
1735		}
1736	}
1737}
1738
1739/*
1740 * Sync and unmount file systems before shutting down.
1741 */
1742void
1743vfs_shutdown(void)
1744{
1745#ifdef ACCOUNTING
1746	extern void acct_shutdown(void);
1747
1748	acct_shutdown();
1749#endif
1750
1751	/* XXX Should suspend scheduling. */
1752	(void) spl0();
1753
1754	printf("syncing disks... ");
1755
1756	if (panicstr == 0) {
1757		/* Sync before unmount, in case we hang on something. */
1758		sys_sync(&proc0, (void *)0, (register_t *)0);
1759
1760		/* Unmount file systems. */
1761		vfs_unmountall();
1762	}
1763
1764	if (vfs_syncwait(1))
1765		printf("giving up\n");
1766	else
1767		printf("done\n");
1768}
1769
1770/*
1771 * perform sync() operation and wait for buffers to flush.
1772 * assumtions: called w/ scheduler disabled and physical io enabled
1773 * for now called at spl0() XXX
1774 */
1775int
1776vfs_syncwait(int verbose)
1777{
1778	struct buf *bp;
1779	int iter, nbusy, dcount, s;
1780	struct proc *p;
1781
1782	p = curproc? curproc : &proc0;
1783	sys_sync(p, (void *)0, (register_t *)0);
1784
1785	/* Wait for sync to finish. */
1786	dcount = 10000;
1787	for (iter = 0; iter < 20; iter++) {
1788		nbusy = 0;
1789		for (bp = &buf[nbuf]; --bp >= buf; ) {
1790			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1791				nbusy++;
1792			/*
1793			 * With soft updates, some buffers that are
1794			 * written will be remarked as dirty until other
1795			 * buffers are written.
1796			 */
1797			if (bp->b_flags & B_DELWRI) {
1798				s = splbio();
1799				bremfree(bp);
1800				bp->b_flags |= B_BUSY;
1801				splx(s);
1802				nbusy++;
1803				bawrite(bp);
1804				if (dcount-- <= 0) {
1805					if (verbose)
1806						printf("softdep ");
1807					return 1;
1808				}
1809			}
1810		}
1811		if (nbusy == 0)
1812			break;
1813		if (verbose)
1814			printf("%d ", nbusy);
1815		DELAY(40000 * iter);
1816	}
1817
1818	return nbusy;
1819}
1820
1821/*
1822 * posix file system related system variables.
1823 */
1824int
1825fs_posix_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1826    void *newp, size_t newlen, struct proc *p)
1827{
1828	/* all sysctl names at this level are terminal */
1829	if (namelen != 1)
1830		return (ENOTDIR);
1831
1832	switch (name[0]) {
1833	case FS_POSIX_SETUID:
1834		if (newp && securelevel > 0)
1835			return (EPERM);
1836		return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear));
1837	default:
1838		return (EOPNOTSUPP);
1839	}
1840	/* NOTREACHED */
1841}
1842
1843/*
1844 * file system related system variables.
1845 */
1846int
1847fs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1848    size_t newlen, struct proc *p)
1849{
1850	sysctlfn *fn;
1851
1852	switch (name[0]) {
1853	case FS_POSIX:
1854		fn = fs_posix_sysctl;
1855		break;
1856	default:
1857		return (EOPNOTSUPP);
1858	}
1859	return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p);
1860}
1861
1862
1863/*
1864 * Routines dealing with vnodes and buffers
1865 */
1866
1867/*
1868 * Wait for all outstanding I/Os to complete
1869 *
1870 * Manipulates v_numoutput. Must be called at splbio()
1871 */
1872int
1873vwaitforio(struct vnode *vp, int slpflag, char *wmesg, int timeo)
1874{
1875	int error = 0;
1876
1877	splassert(IPL_BIO);
1878
1879	while (vp->v_numoutput) {
1880		vp->v_bioflag |= VBIOWAIT;
1881		error = tsleep(&vp->v_numoutput,
1882		    slpflag | (PRIBIO + 1), wmesg, timeo);
1883		if (error)
1884			break;
1885	}
1886
1887	return (error);
1888}
1889
1890/*
1891 * Update outstanding I/O count and do wakeup if requested.
1892 *
1893 * Manipulates v_numoutput. Must be called at splbio()
1894 */
1895void
1896vwakeup(struct vnode *vp)
1897{
1898	splassert(IPL_BIO);
1899
1900	if (vp != NULL) {
1901		if (vp->v_numoutput-- == 0)
1902			panic("vwakeup: neg numoutput");
1903		if ((vp->v_bioflag & VBIOWAIT) && vp->v_numoutput == 0) {
1904			vp->v_bioflag &= ~VBIOWAIT;
1905			wakeup(&vp->v_numoutput);
1906		}
1907	}
1908}
1909
1910/*
1911 * Flush out and invalidate all buffers associated with a vnode.
1912 * Called with the underlying object locked.
1913 */
1914int
1915vinvalbuf(struct vnode *vp, int flags, struct ucred *cred, struct proc *p,
1916    int slpflag, int slptimeo)
1917{
1918	struct buf *bp;
1919	struct buf *nbp, *blist;
1920	int s, error;
1921
1922#ifdef	VFSDEBUG
1923	if ((vp->v_flag & VLOCKSWORK) && !VOP_ISLOCKED(vp))
1924		panic("vinvalbuf(): vp isn't locked");
1925#endif
1926
1927	if (flags & V_SAVE) {
1928		s = splbio();
1929		vwaitforio(vp, 0, "vinvalbuf", 0);
1930		if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
1931			splx(s);
1932			if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
1933				return (error);
1934			s = splbio();
1935			if (vp->v_numoutput > 0 ||
1936			    !LIST_EMPTY(&vp->v_dirtyblkhd))
1937				panic("vinvalbuf: dirty bufs");
1938		}
1939		splx(s);
1940	}
1941loop:
1942	s = splbio();
1943	for (;;) {
1944		if ((blist = LIST_FIRST(&vp->v_cleanblkhd)) &&
1945		    (flags & V_SAVEMETA))
1946			while (blist && blist->b_lblkno < 0)
1947				blist = LIST_NEXT(blist, b_vnbufs);
1948		if (blist == NULL &&
1949		    (blist = LIST_FIRST(&vp->v_dirtyblkhd)) &&
1950		    (flags & V_SAVEMETA))
1951			while (blist && blist->b_lblkno < 0)
1952				blist = LIST_NEXT(blist, b_vnbufs);
1953		if (!blist)
1954			break;
1955
1956		for (bp = blist; bp; bp = nbp) {
1957			nbp = LIST_NEXT(bp, b_vnbufs);
1958			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
1959				continue;
1960			if (bp->b_flags & B_BUSY) {
1961				bp->b_flags |= B_WANTED;
1962				error = tsleep(bp, slpflag | (PRIBIO + 1),
1963				    "vinvalbuf", slptimeo);
1964				if (error) {
1965					splx(s);
1966					return (error);
1967				}
1968				break;
1969			}
1970			bremfree(bp);
1971			bp->b_flags |= B_BUSY;
1972			/*
1973			 * XXX Since there are no node locks for NFS, I believe
1974			 * there is a slight chance that a delayed write will
1975			 * occur while sleeping just above, so check for it.
1976			 */
1977			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
1978				splx(s);
1979				(void) VOP_BWRITE(bp);
1980				goto loop;
1981			}
1982			bp->b_flags |= B_INVAL;
1983			brelse(bp);
1984		}
1985	}
1986	if (!(flags & V_SAVEMETA) &&
1987	    (!LIST_EMPTY(&vp->v_dirtyblkhd) || !LIST_EMPTY(&vp->v_cleanblkhd)))
1988		panic("vinvalbuf: flush failed");
1989	splx(s);
1990	return (0);
1991}
1992
1993void
1994vflushbuf(struct vnode *vp, int sync)
1995{
1996	struct buf *bp, *nbp;
1997	int s;
1998
1999loop:
2000	s = splbio();
2001	for (bp = LIST_FIRST(&vp->v_dirtyblkhd);
2002	    bp != LIST_END(&vp->v_dirtyblkhd); bp = nbp) {
2003		nbp = LIST_NEXT(bp, b_vnbufs);
2004		if ((bp->b_flags & B_BUSY))
2005			continue;
2006		if ((bp->b_flags & B_DELWRI) == 0)
2007			panic("vflushbuf: not dirty");
2008		bremfree(bp);
2009		bp->b_flags |= B_BUSY;
2010		splx(s);
2011		/*
2012		 * Wait for I/O associated with indirect blocks to complete,
2013		 * since there is no way to quickly wait for them below.
2014		 */
2015		if (bp->b_vp == vp || sync == 0)
2016			(void) bawrite(bp);
2017		else
2018			(void) bwrite(bp);
2019		goto loop;
2020	}
2021	if (sync == 0) {
2022		splx(s);
2023		return;
2024	}
2025	vwaitforio(vp, 0, "vflushbuf", 0);
2026	if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
2027		splx(s);
2028#ifdef DIAGNOSTIC
2029		vprint("vflushbuf: dirty", vp);
2030#endif
2031		goto loop;
2032	}
2033	splx(s);
2034}
2035
2036/*
2037 * Associate a buffer with a vnode.
2038 *
2039 * Manipulates buffer vnode queues. Must be called at splbio().
2040 */
2041void
2042bgetvp(struct vnode *vp, struct buf *bp)
2043{
2044	splassert(IPL_BIO);
2045
2046
2047	if (bp->b_vp)
2048		panic("bgetvp: not free");
2049	vhold(vp);
2050	bp->b_vp = vp;
2051	if (vp->v_type == VBLK || vp->v_type == VCHR)
2052		bp->b_dev = vp->v_rdev;
2053	else
2054		bp->b_dev = NODEV;
2055	/*
2056	 * Insert onto list for new vnode.
2057	 */
2058	bufinsvn(bp, &vp->v_cleanblkhd);
2059}
2060
2061/*
2062 * Disassociate a buffer from a vnode.
2063 *
2064 * Manipulates vnode buffer queues. Must be called at splbio().
2065 */
2066void
2067brelvp(struct buf *bp)
2068{
2069	struct vnode *vp;
2070
2071	splassert(IPL_BIO);
2072
2073	if ((vp = bp->b_vp) == (struct vnode *) 0)
2074		panic("brelvp: NULL");
2075	/*
2076	 * Delete from old vnode list, if on one.
2077	 */
2078	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
2079		bufremvn(bp);
2080	if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2081	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2082		vp->v_bioflag &= ~VBIOONSYNCLIST;
2083		LIST_REMOVE(vp, v_synclist);
2084	}
2085	bp->b_vp = (struct vnode *) 0;
2086
2087	simple_lock(&vp->v_interlock);
2088#ifdef DIAGNOSTIC
2089	if (vp->v_holdcnt == 0)
2090		panic("brelvp: holdcnt");
2091#endif
2092	vp->v_holdcnt--;
2093
2094	/*
2095	 * If it is on the holdlist and the hold count drops to
2096	 * zero, move it to the free list.
2097	 */
2098	if ((vp->v_bioflag & VBIOONFREELIST) &&
2099	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
2100		simple_lock(&vnode_free_list_slock);
2101		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
2102		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2103		simple_unlock(&vnode_free_list_slock);
2104	}
2105	simple_unlock(&vp->v_interlock);
2106}
2107
2108/*
2109 * Replaces the current vnode associated with the buffer, if any,
2110 * with a new vnode.
2111 *
2112 * If an output I/O is pending on the buffer, the old vnode
2113 * I/O count is adjusted.
2114 *
2115 * Ignores vnode buffer queues. Must be called at splbio().
2116 */
2117void
2118buf_replacevnode(struct buf *bp, struct vnode *newvp)
2119{
2120	struct vnode *oldvp = bp->b_vp;
2121
2122	splassert(IPL_BIO);
2123
2124	if (oldvp)
2125		brelvp(bp);
2126
2127	if ((bp->b_flags & (B_READ | B_DONE)) == 0) {
2128		newvp->v_numoutput++;	/* put it on swapdev */
2129		vwakeup(oldvp);
2130	}
2131
2132	bgetvp(newvp, bp);
2133	bufremvn(bp);
2134}
2135
2136/*
2137 * Used to assign buffers to the appropriate clean or dirty list on
2138 * the vnode and to add newly dirty vnodes to the appropriate
2139 * filesystem syncer list.
2140 *
2141 * Manipulates vnode buffer queues. Must be called at splbio().
2142 */
2143void
2144reassignbuf(struct buf *bp)
2145{
2146	struct buflists *listheadp;
2147	int delay;
2148	struct vnode *vp = bp->b_vp;
2149
2150	splassert(IPL_BIO);
2151
2152	/*
2153	 * Delete from old vnode list, if on one.
2154	 */
2155	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
2156		bufremvn(bp);
2157
2158	/*
2159	 * If dirty, put on list of dirty buffers;
2160	 * otherwise insert onto list of clean buffers.
2161	 */
2162	if ((bp->b_flags & B_DELWRI) == 0) {
2163		listheadp = &vp->v_cleanblkhd;
2164		if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2165		    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2166			vp->v_bioflag &= ~VBIOONSYNCLIST;
2167			LIST_REMOVE(vp, v_synclist);
2168		}
2169	} else {
2170		listheadp = &vp->v_dirtyblkhd;
2171		if ((vp->v_bioflag & VBIOONSYNCLIST) == 0) {
2172			switch (vp->v_type) {
2173			case VDIR:
2174				delay = syncdelay / 2;
2175				break;
2176			case VBLK:
2177				if (vp->v_specmountpoint != NULL) {
2178					delay = syncdelay / 3;
2179					break;
2180				}
2181				/* fall through */
2182			default:
2183				delay = syncdelay;
2184			}
2185			vn_syncer_add_to_worklist(vp, delay);
2186		}
2187	}
2188	bufinsvn(bp, listheadp);
2189}
2190
2191int
2192vfs_register(struct vfsconf *vfs)
2193{
2194	struct vfsconf *vfsp;
2195	struct vfsconf **vfspp;
2196
2197#ifdef DIAGNOSTIC
2198	/* Paranoia? */
2199	if (vfs->vfc_refcount != 0)
2200		printf("vfs_register called with vfc_refcount > 0\n");
2201#endif
2202
2203	/* Check if filesystem already known */
2204	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2205	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next)
2206		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2207			return (EEXIST);
2208
2209	if (vfs->vfc_typenum > maxvfsconf)
2210		maxvfsconf = vfs->vfc_typenum;
2211
2212	vfs->vfc_next = NULL;
2213
2214	/* Add to the end of the list */
2215	*vfspp = vfs;
2216
2217	/* Call vfs_init() */
2218	if (vfs->vfc_vfsops->vfs_init)
2219		(*(vfs->vfc_vfsops->vfs_init))(vfs);
2220
2221	return 0;
2222}
2223
2224int
2225vfs_unregister(struct vfsconf *vfs)
2226{
2227	struct vfsconf *vfsp;
2228	struct vfsconf **vfspp;
2229	int maxtypenum;
2230
2231	/* Find our vfsconf struct */
2232	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2233	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next) {
2234		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2235			break;
2236	}
2237
2238	if (!vfsp)			/* Not found */
2239		return (ENOENT);
2240
2241	if (vfsp->vfc_refcount)		/* In use */
2242		return (EBUSY);
2243
2244	/* Remove from list and free */
2245	*vfspp = vfsp->vfc_next;
2246
2247	maxtypenum = 0;
2248
2249	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2250		if (vfsp->vfc_typenum > maxtypenum)
2251			maxtypenum = vfsp->vfc_typenum;
2252
2253	maxvfsconf = maxtypenum;
2254	return 0;
2255}
2256
2257/*
2258 * Check if vnode represents a disk device
2259 */
2260int
2261vn_isdisk(struct vnode *vp, int *errp)
2262{
2263	if (vp->v_type != VBLK && vp->v_type != VCHR)
2264		return (0);
2265
2266	return (1);
2267}
2268