vfs_subr.c revision 1.123
1/*	$OpenBSD: vfs_subr.c,v 1.123 2006/04/19 11:55:55 pedro Exp $	*/
2/*	$NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $	*/
3
4/*
5 * Copyright (c) 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
38 */
39
40/*
41 * External virtual filesystem routines
42 */
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/proc.h>
47#include <sys/mount.h>
48#include <sys/time.h>
49#include <sys/fcntl.h>
50#include <sys/kernel.h>
51#include <sys/vnode.h>
52#include <sys/stat.h>
53#include <sys/namei.h>
54#include <sys/ucred.h>
55#include <sys/buf.h>
56#include <sys/errno.h>
57#include <sys/malloc.h>
58#include <sys/domain.h>
59#include <sys/mbuf.h>
60#include <sys/syscallargs.h>
61#include <sys/pool.h>
62
63#include <uvm/uvm_extern.h>
64#include <sys/sysctl.h>
65
66#include <miscfs/specfs/specdev.h>
67
68enum vtype iftovt_tab[16] = {
69	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
70	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
71};
72
73int	vttoif_tab[9] = {
74	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
75	S_IFSOCK, S_IFIFO, S_IFMT,
76};
77
78int doforce = 1;		/* 1 => permit forcible unmounting */
79int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
80int suid_clear = 1;		/* 1 => clear SUID / SGID on owner change */
81
82/*
83 * Insq/Remq for the vnode usage lists.
84 */
85#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
86#define	bufremvn(bp) {							\
87	LIST_REMOVE(bp, b_vnbufs);					\
88	LIST_NEXT(bp, b_vnbufs) = NOLIST;				\
89}
90
91struct freelst vnode_hold_list;	/* list of vnodes referencing buffers */
92struct freelst vnode_free_list;	/* vnode free list */
93
94struct mntlist mountlist;	/* mounted filesystem list */
95static struct simplelock mntid_slock;
96struct simplelock mntvnode_slock;
97struct simplelock vnode_free_list_slock;
98struct simplelock spechash_slock;
99
100void	vclean(struct vnode *, int, struct proc *);
101
102void insmntque(struct vnode *, struct mount *);
103int getdevvp(dev_t, struct vnode **, enum vtype);
104
105int vfs_hang_addrlist(struct mount *, struct netexport *,
106				  struct export_args *);
107int vfs_free_netcred(struct radix_node *, void *);
108void vfs_free_addrlist(struct netexport *);
109void vputonfreelist(struct vnode *);
110
111int vflush_vnode(struct vnode *, void *);
112
113#ifdef DEBUG
114void printlockedvnodes(void);
115#endif
116
117#define VN_KNOTE(vp, b) \
118	KNOTE((struct klist *)&vp->v_selectinfo.vsi_selinfo.si_note, (b))
119
120struct pool vnode_pool;
121
122/*
123 * Initialize the vnode management data structures.
124 */
125void
126vntblinit(void)
127{
128
129	pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodes",
130	    &pool_allocator_nointr);
131	simple_lock_init(&mntvnode_slock);
132	simple_lock_init(&mntid_slock);
133	simple_lock_init(&spechash_slock);
134	TAILQ_INIT(&vnode_hold_list);
135	TAILQ_INIT(&vnode_free_list);
136	simple_lock_init(&vnode_free_list_slock);
137	CIRCLEQ_INIT(&mountlist);
138	/*
139	 * Initialize the filesystem syncer.
140	 */
141	vn_initialize_syncerd();
142}
143
144/*
145 * Mark a mount point as busy. Used to synchronize access and to delay
146 * unmounting. Interlock is not released on failure.
147 *
148 * historical behavior:
149 *  - LK_NOWAIT means that we should just ignore the mount point if it's
150 *     being unmounted.
151 *  - no flags means that we should sleep on the mountpoint and then
152 *     fail.
153 */
154int
155vfs_busy(struct mount *mp, int flags, struct simplelock *interlkp)
156{
157	int lkflags;
158
159	switch (flags) {
160	case LK_NOWAIT:
161		lkflags = LK_SHARED|LK_NOWAIT;
162		break;
163	case 0:
164		lkflags = LK_SHARED;
165		break;
166	default:
167		lkflags = flags;
168	}
169
170	/*
171	 * Always sleepfail. We will only sleep for an exclusive lock
172	 * and the exclusive lock will only be acquired when unmounting.
173	 */
174	lkflags |= LK_SLEEPFAIL;
175
176	if (interlkp)
177		lkflags |= LK_INTERLOCK;
178	if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
179		return (ENOENT);
180	return (0);
181}
182
183/*
184 * Free a busy file system
185 */
186void
187vfs_unbusy(struct mount *mp)
188{
189	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
190}
191
192int
193vfs_isbusy(struct mount *mp)
194{
195	return (lockstatus(&mp->mnt_lock));
196}
197
198/*
199 * Lookup a filesystem type, and if found allocate and initialize
200 * a mount structure for it.
201 *
202 * Devname is usually updated by mount(8) after booting.
203 */
204int
205vfs_rootmountalloc(char *fstypename, char *devname, struct mount **mpp)
206{
207	struct vfsconf *vfsp;
208	struct mount *mp;
209
210	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
211		if (!strcmp(vfsp->vfc_name, fstypename))
212			break;
213	if (vfsp == NULL)
214		return (ENODEV);
215	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
216	bzero((char *)mp, (u_long)sizeof(struct mount));
217	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
218	(void)vfs_busy(mp, LK_NOWAIT, NULL);
219	LIST_INIT(&mp->mnt_vnodelist);
220	mp->mnt_vfc = vfsp;
221	mp->mnt_op = vfsp->vfc_vfsops;
222	mp->mnt_flag = MNT_RDONLY;
223	mp->mnt_vnodecovered = NULLVP;
224	vfsp->vfc_refcount++;
225	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
226	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
227	mp->mnt_stat.f_mntonname[0] = '/';
228	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
229	*mpp = mp;
230 	return (0);
231 }
232
233/*
234 * Find an appropriate filesystem to use for the root. If a filesystem
235 * has not been preselected, walk through the list of known filesystems
236 * trying those that have mountroot routines, and try them until one
237 * works or we have tried them all.
238 */
239int
240vfs_mountroot(void)
241{
242	struct vfsconf *vfsp;
243	int error;
244
245	if (mountroot != NULL)
246		return ((*mountroot)());
247	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
248		if (vfsp->vfc_mountroot == NULL)
249			continue;
250		if ((error = (*vfsp->vfc_mountroot)()) == 0)
251			return (0);
252		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
253 	}
254	return (ENODEV);
255}
256
257/*
258 * Lookup a mount point by filesystem identifier.
259 */
260struct mount *
261vfs_getvfs(fsid_t *fsid)
262{
263	struct mount *mp;
264
265	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
266		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
267		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
268			return (mp);
269		}
270	}
271
272	return ((struct mount *)0);
273}
274
275
276/*
277 * Get a new unique fsid
278 */
279void
280vfs_getnewfsid(struct mount *mp)
281{
282	static u_short xxxfs_mntid;
283
284	fsid_t tfsid;
285	int mtype;
286
287	simple_lock(&mntid_slock);
288	mtype = mp->mnt_vfc->vfc_typenum;
289	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
290	mp->mnt_stat.f_fsid.val[1] = mtype;
291	if (xxxfs_mntid == 0)
292		++xxxfs_mntid;
293	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
294	tfsid.val[1] = mtype;
295	if (!CIRCLEQ_EMPTY(&mountlist)) {
296		while (vfs_getvfs(&tfsid)) {
297			tfsid.val[0]++;
298			xxxfs_mntid++;
299		}
300	}
301	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
302	simple_unlock(&mntid_slock);
303}
304
305/*
306 * Make a 'unique' number from a mount type name.
307 * Note that this is no longer used for ffs which
308 * now has an on-disk filesystem id.
309 */
310long
311makefstype(char *type)
312{
313	long rv;
314
315	for (rv = 0; *type; type++) {
316		rv <<= 2;
317		rv ^= *type;
318	}
319	return rv;
320}
321
322/*
323 * Set vnode attributes to VNOVAL
324 */
325void
326vattr_null(struct vattr *vap)
327{
328
329	vap->va_type = VNON;
330	/* XXX These next two used to be one line, but for a GCC bug. */
331	vap->va_size = VNOVAL;
332	vap->va_bytes = VNOVAL;
333	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
334		vap->va_fsid = vap->va_fileid =
335		vap->va_blocksize = vap->va_rdev =
336		vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
337		vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
338		vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
339		vap->va_flags = vap->va_gen = VNOVAL;
340	vap->va_vaflags = 0;
341}
342
343/*
344 * Routines having to do with the management of the vnode table.
345 */
346extern int (**dead_vnodeop_p)(void *);
347long numvnodes;
348
349/*
350 * Return the next vnode from the free list.
351 */
352int
353getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
354    struct vnode **vpp)
355{
356	struct proc *p = curproc;
357	struct freelst *listhd;
358	static int toggle;
359	struct vnode *vp;
360	int s;
361
362	/*
363	 * We must choose whether to allocate a new vnode or recycle an
364	 * existing one. The criterion for allocating a new one is that
365	 * the total number of vnodes is less than the number desired or
366	 * there are no vnodes on either free list. Generally we only
367	 * want to recycle vnodes that have no buffers associated with
368	 * them, so we look first on the vnode_free_list. If it is empty,
369	 * we next consider vnodes with referencing buffers on the
370	 * vnode_hold_list. The toggle ensures that half the time we
371	 * will use a buffer from the vnode_hold_list, and half the time
372	 * we will allocate a new one unless the list has grown to twice
373	 * the desired size. We are reticent to recycle vnodes from the
374	 * vnode_hold_list because we will lose the identity of all its
375	 * referencing buffers.
376	 */
377	toggle ^= 1;
378	if (numvnodes > 2 * desiredvnodes)
379		toggle = 0;
380
381	simple_lock(&vnode_free_list_slock);
382	s = splbio();
383	if ((numvnodes < desiredvnodes) ||
384	    ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) &&
385	    ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) {
386		splx(s);
387		simple_unlock(&vnode_free_list_slock);
388		vp = pool_get(&vnode_pool, PR_WAITOK);
389		bzero((char *)vp, sizeof *vp);
390		simple_lock_init(&vp->v_interlock);
391		numvnodes++;
392	} else {
393		for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
394		    vp = TAILQ_NEXT(vp, v_freelist)) {
395			if (simple_lock_try(&vp->v_interlock)) {
396				if (VOP_ISLOCKED(vp) == 0)
397					break;
398				else
399					simple_unlock(&vp->v_interlock);
400			}
401		}
402		/*
403		 * Unless this is a bad time of the month, at most
404		 * the first NCPUS items on the free list are
405		 * locked, so this is close enough to being empty.
406		 */
407		if (vp == NULL) {
408			splx(s);
409			simple_unlock(&vnode_free_list_slock);
410			tablefull("vnode");
411			*vpp = 0;
412			return (ENFILE);
413		}
414
415#ifdef DIAGNOSTIC
416		if (vp->v_usecount) {
417			vprint("free vnode", vp);
418			panic("free vnode isn't");
419		}
420#endif
421
422		TAILQ_REMOVE(listhd, vp, v_freelist);
423		vp->v_bioflag &= ~VBIOONFREELIST;
424		splx(s);
425
426		simple_unlock(&vnode_free_list_slock);
427		if (vp->v_type != VBAD)
428			vgonel(vp, p);
429		else
430			simple_unlock(&vp->v_interlock);
431#ifdef DIAGNOSTIC
432		if (vp->v_data) {
433			vprint("cleaned vnode", vp);
434			panic("cleaned vnode isn't");
435		}
436		s = splbio();
437		if (vp->v_numoutput)
438			panic("Clean vnode has pending I/O's");
439		splx(s);
440#endif
441		vp->v_flag = 0;
442		vp->v_socket = 0;
443	}
444	vp->v_type = VNON;
445	cache_purge(vp);
446	vp->v_tag = tag;
447	vp->v_op = vops;
448	insmntque(vp, mp);
449	*vpp = vp;
450	vp->v_usecount = 1;
451	vp->v_data = 0;
452	simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
453	return (0);
454}
455
456/*
457 * Move a vnode from one mount queue to another.
458 */
459void
460insmntque(struct vnode *vp, struct mount *mp)
461{
462	simple_lock(&mntvnode_slock);
463
464	/*
465	 * Delete from old mount point vnode list, if on one.
466	 */
467	if (vp->v_mount != NULL)
468		LIST_REMOVE(vp, v_mntvnodes);
469	/*
470	 * Insert into list of vnodes for the new mount point, if available.
471	 */
472	if ((vp->v_mount = mp) != NULL)
473		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
474
475	simple_unlock(&mntvnode_slock);
476}
477
478/*
479 * Create a vnode for a block device.
480 * Used for root filesystem, argdev, and swap areas.
481 * Also used for memory file system special devices.
482 */
483int
484bdevvp(dev_t dev, struct vnode **vpp)
485{
486	return (getdevvp(dev, vpp, VBLK));
487}
488
489/*
490 * Create a vnode for a character device.
491 * Used for console handling.
492 */
493int
494cdevvp(dev_t dev, struct vnode **vpp)
495{
496	return (getdevvp(dev, vpp, VCHR));
497}
498
499/*
500 * Create a vnode for a device.
501 * Used by bdevvp (block device) for root file system etc.,
502 * and by cdevvp (character device) for console.
503 */
504int
505getdevvp(dev_t dev, struct vnode **vpp, enum vtype type)
506{
507	struct vnode *vp;
508	struct vnode *nvp;
509	int error;
510
511	if (dev == NODEV) {
512		*vpp = NULLVP;
513		return (0);
514	}
515	error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
516	if (error) {
517		*vpp = NULLVP;
518		return (error);
519	}
520	vp = nvp;
521	vp->v_type = type;
522	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
523		vput(vp);
524		vp = nvp;
525	}
526	*vpp = vp;
527	return (0);
528}
529
530/*
531 * Check to see if the new vnode represents a special device
532 * for which we already have a vnode (either because of
533 * bdevvp() or because of a different vnode representing
534 * the same block device). If such an alias exists, deallocate
535 * the existing contents and return the aliased vnode. The
536 * caller is responsible for filling it with its new contents.
537 */
538struct vnode *
539checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp)
540{
541	struct proc *p = curproc;
542	struct vnode *vp;
543	struct vnode **vpp;
544
545	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
546		return (NULLVP);
547
548	vpp = &speclisth[SPECHASH(nvp_rdev)];
549loop:
550	simple_lock(&spechash_slock);
551	for (vp = *vpp; vp; vp = vp->v_specnext) {
552		simple_lock(&vp->v_interlock);
553		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) {
554			simple_unlock(&vp->v_interlock);
555			continue;
556		}
557		/*
558		 * Alias, but not in use, so flush it out.
559		 */
560		if (vp->v_usecount == 0) {
561			simple_unlock(&spechash_slock);
562			vgonel(vp, p);
563			goto loop;
564		}
565		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
566			simple_unlock(&spechash_slock);
567			goto loop;
568		}
569		break;
570	}
571
572	/*
573	 * Common case is actually in the if statement
574	 */
575	if (vp == NULL || !(vp->v_tag == VT_NON && vp->v_type == VBLK)) {
576		MALLOC(nvp->v_specinfo, struct specinfo *,
577			sizeof(struct specinfo), M_VNODE, M_WAITOK);
578		nvp->v_rdev = nvp_rdev;
579		nvp->v_hashchain = vpp;
580		nvp->v_specnext = *vpp;
581		nvp->v_specmountpoint = NULL;
582		nvp->v_speclockf = NULL;
583		simple_unlock(&spechash_slock);
584		*vpp = nvp;
585		if (vp != NULLVP) {
586			nvp->v_flag |= VALIASED;
587			vp->v_flag |= VALIASED;
588			vput(vp);
589		}
590		return (NULLVP);
591	}
592
593	/*
594	 * This code is the uncommon case. It is called in case
595	 * we found an alias that was VT_NON && vtype of VBLK
596	 * This means we found a block device that was created
597	 * using bdevvp.
598	 * An example of such a vnode is the root partition device vnode
599	 * created in ffs_mountroot.
600	 *
601	 * The vnodes created by bdevvp should not be aliased (why?).
602	 */
603
604	simple_unlock(&spechash_slock);
605	VOP_UNLOCK(vp, 0, p);
606	simple_lock(&vp->v_interlock);
607	vclean(vp, 0, p);
608	vp->v_op = nvp->v_op;
609	vp->v_tag = nvp->v_tag;
610	nvp->v_type = VNON;
611	insmntque(vp, mp);
612	return (vp);
613}
614
615/*
616 * Grab a particular vnode from the free list, increment its
617 * reference count and lock it. If the vnode lock bit is set,
618 * the vnode is being eliminated in vgone. In that case, we
619 * cannot grab it, so the process is awakened when the
620 * transition is completed, and an error code is returned to
621 * indicate that the vnode is no longer usable, possibly
622 * having been changed to a new file system type.
623 */
624int
625vget(struct vnode *vp, int flags, struct proc *p)
626{
627	int error, s, onfreelist;
628
629	/*
630	 * If the vnode is in the process of being cleaned out for
631	 * another use, we wait for the cleaning to finish and then
632	 * return failure. Cleaning is determined by checking that
633	 * the VXLOCK flag is set.
634	 */
635	if ((flags & LK_INTERLOCK) == 0) {
636		simple_lock(&vp->v_interlock);
637		flags |= LK_INTERLOCK;
638	}
639
640	if (vp->v_flag & VXLOCK) {
641		if (flags & LK_NOWAIT) {
642			simple_unlock(&vp->v_interlock);
643			return (EBUSY);
644		}
645
646 		vp->v_flag |= VXWANT;
647		ltsleep(vp, PINOD | PNORELOCK, "vget", 0, &vp->v_interlock);
648		return (ENOENT);
649 	}
650
651	onfreelist = vp->v_bioflag & VBIOONFREELIST;
652	if (vp->v_usecount == 0 && onfreelist) {
653		s = splbio();
654		simple_lock(&vnode_free_list_slock);
655		if (vp->v_holdcnt > 0)
656			TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
657		else
658			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
659		simple_unlock(&vnode_free_list_slock);
660		vp->v_bioflag &= ~VBIOONFREELIST;
661		splx(s);
662	}
663
664 	vp->v_usecount++;
665	if (flags & LK_TYPE_MASK) {
666		if ((error = vn_lock(vp, flags, p)) != 0) {
667			vp->v_usecount--;
668			if (vp->v_usecount == 0 && onfreelist)
669				vputonfreelist(vp);
670
671			simple_unlock(&vp->v_interlock);
672		}
673		return (error);
674	}
675
676	simple_unlock(&vp->v_interlock);
677
678	return (0);
679}
680
681
682#ifdef DIAGNOSTIC
683/*
684 * Vnode reference.
685 */
686void
687vref(struct vnode *vp)
688{
689	simple_lock(&vp->v_interlock);
690	if (vp->v_usecount == 0)
691		panic("vref used where vget required");
692	vp->v_usecount++;
693	simple_unlock(&vp->v_interlock);
694}
695#endif /* DIAGNOSTIC */
696
697void
698vputonfreelist(struct vnode *vp)
699{
700	int s;
701	struct freelst *lst;
702
703	s = splbio();
704#ifdef DIAGNOSTIC
705	if (vp->v_usecount != 0)
706		panic("Use count is not zero!");
707
708	if (vp->v_bioflag & VBIOONFREELIST) {
709		vprint("vnode already on free list: ", vp);
710		panic("vnode already on free list");
711	}
712#endif
713
714	vp->v_bioflag |= VBIOONFREELIST;
715
716	if (vp->v_holdcnt > 0)
717		lst = &vnode_hold_list;
718	else
719		lst = &vnode_free_list;
720
721	if (vp->v_type == VBAD)
722		TAILQ_INSERT_HEAD(lst, vp, v_freelist);
723	else
724		TAILQ_INSERT_TAIL(lst, vp, v_freelist);
725
726	splx(s);
727}
728
729/*
730 * vput(), just unlock and vrele()
731 */
732void
733vput(struct vnode *vp)
734{
735	struct proc *p = curproc;
736
737#ifdef DIAGNOSTIC
738	if (vp == NULL)
739		panic("vput: null vp");
740#endif
741	simple_lock(&vp->v_interlock);
742
743#ifdef DIAGNOSTIC
744	if (vp->v_usecount == 0) {
745		vprint("vput: bad ref count", vp);
746		panic("vput: ref cnt");
747	}
748#endif
749	vp->v_usecount--;
750	if (vp->v_usecount > 0) {
751		simple_unlock(&vp->v_interlock);
752		VOP_UNLOCK(vp, 0, p);
753		return;
754	}
755
756#ifdef DIAGNOSTIC
757	if (vp->v_writecount != 0) {
758		vprint("vput: bad writecount", vp);
759		panic("vput: v_writecount != 0");
760	}
761#endif
762	simple_unlock(&vp->v_interlock);
763
764	VOP_INACTIVE(vp, p);
765
766	simple_lock(&vp->v_interlock);
767
768	if (vp->v_usecount == 0 && !(vp->v_bioflag & VBIOONFREELIST))
769		vputonfreelist(vp);
770
771	simple_unlock(&vp->v_interlock);
772}
773
774/*
775 * Vnode release - use for active VNODES.
776 * If count drops to zero, call inactive routine and return to freelist.
777 */
778void
779vrele(struct vnode *vp)
780{
781	struct proc *p = curproc;
782
783#ifdef DIAGNOSTIC
784	if (vp == NULL)
785		panic("vrele: null vp");
786#endif
787	simple_lock(&vp->v_interlock);
788#ifdef DIAGNOSTIC
789	if (vp->v_usecount == 0) {
790		vprint("vrele: bad ref count", vp);
791		panic("vrele: ref cnt");
792	}
793#endif
794	vp->v_usecount--;
795	if (vp->v_usecount > 0) {
796		simple_unlock(&vp->v_interlock);
797		return;
798	}
799
800#ifdef DIAGNOSTIC
801	if (vp->v_writecount != 0) {
802		vprint("vrele: bad writecount", vp);
803		panic("vrele: v_writecount != 0");
804	}
805#endif
806
807	if (vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p)) {
808#ifdef DIAGNOSTIC
809		vprint("vrele: cannot lock", vp);
810#endif
811		return;
812	}
813
814	VOP_INACTIVE(vp, p);
815
816	simple_lock(&vp->v_interlock);
817
818	if (vp->v_usecount == 0 && !(vp->v_bioflag & VBIOONFREELIST))
819		vputonfreelist(vp);
820
821	simple_unlock(&vp->v_interlock);
822}
823
824void vhold(struct vnode *vp);
825
826/*
827 * Page or buffer structure gets a reference.
828 */
829void
830vhold(struct vnode *vp)
831{
832	/*
833	 * If it is on the freelist and the hold count is currently
834	 * zero, move it to the hold list.
835	 */
836  	simple_lock(&vp->v_interlock);
837	if ((vp->v_bioflag & VBIOONFREELIST) &&
838	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
839		simple_lock(&vnode_free_list_slock);
840		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
841		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
842		simple_unlock(&vnode_free_list_slock);
843	}
844	vp->v_holdcnt++;
845	simple_unlock(&vp->v_interlock);
846}
847
848/*
849 * Remove any vnodes in the vnode table belonging to mount point mp.
850 *
851 * If MNT_NOFORCE is specified, there should not be any active ones,
852 * return error if any are found (nb: this is a user error, not a
853 * system error). If MNT_FORCE is specified, detach any active vnodes
854 * that are found.
855 */
856#ifdef DEBUG
857int busyprt = 0;	/* print out busy vnodes */
858struct ctldebug debug1 = { "busyprt", &busyprt };
859#endif
860
861int
862vfs_mount_foreach_vnode(struct mount *mp,
863    int (*func)(struct vnode *, void *), void *arg) {
864	struct vnode *vp, *nvp;
865	int error = 0;
866
867	simple_lock(&mntvnode_slock);
868loop:
869	for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) {
870		if (vp->v_mount != mp)
871			goto loop;
872		nvp = LIST_NEXT(vp, v_mntvnodes);
873		simple_lock(&vp->v_interlock);
874		simple_unlock(&mntvnode_slock);
875
876		error = func(vp, arg);
877
878		simple_lock(&mntvnode_slock);
879
880		if (error != 0)
881			break;
882	}
883	simple_unlock(&mntvnode_slock);
884
885	return (error);
886}
887
888struct vflush_args {
889	struct vnode *skipvp;
890	int busy;
891	int flags;
892};
893
894int
895vflush_vnode(struct vnode *vp, void *arg) {
896	struct vflush_args *va = arg;
897	struct proc *p = curproc;
898
899	if (vp == va->skipvp) {
900		simple_unlock(&vp->v_interlock);
901		return (0);
902	}
903
904	if ((va->flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
905		simple_unlock(&vp->v_interlock);
906		return (0);
907	}
908
909	/*
910	 * If WRITECLOSE is set, only flush out regular file
911	 * vnodes open for writing.
912	 */
913	if ((va->flags & WRITECLOSE) &&
914	    (vp->v_writecount == 0 || vp->v_type != VREG)) {
915		simple_unlock(&vp->v_interlock);
916		return (0);
917	}
918
919	/*
920	 * With v_usecount == 0, all we need to do is clear
921	 * out the vnode data structures and we are done.
922	 */
923	if (vp->v_usecount == 0) {
924		vgonel(vp, p);
925		return (0);
926	}
927
928	/*
929	 * If FORCECLOSE is set, forcibly close the vnode.
930	 * For block or character devices, revert to an
931	 * anonymous device. For all other files, just kill them.
932	 */
933	if (va->flags & FORCECLOSE) {
934		if (vp->v_type != VBLK && vp->v_type != VCHR) {
935			vgonel(vp, p);
936		} else {
937			vclean(vp, 0, p);
938			vp->v_op = spec_vnodeop_p;
939			insmntque(vp, (struct mount *)0);
940		}
941		return (0);
942	}
943
944#ifdef DEBUG
945	if (busyprt)
946		vprint("vflush: busy vnode", vp);
947#endif
948	simple_unlock(&vp->v_interlock);
949	va->busy++;
950	return (0);
951}
952
953int
954vflush(struct mount *mp, struct vnode *skipvp, int flags)
955{
956	struct vflush_args va;
957	va.skipvp = skipvp;
958	va.busy = 0;
959	va.flags = flags;
960
961	vfs_mount_foreach_vnode(mp, vflush_vnode, &va);
962
963	if (va.busy)
964		return (EBUSY);
965	return (0);
966}
967
968/*
969 * Disassociate the underlying file system from a vnode.
970 * The vnode interlock is held on entry.
971 */
972void
973vclean(struct vnode *vp, int flags, struct proc *p)
974{
975	int active;
976
977	/*
978	 * Check to see if the vnode is in use.
979	 * If so we have to reference it before we clean it out
980	 * so that its count cannot fall to zero and generate a
981	 * race against ourselves to recycle it.
982	 */
983	if ((active = vp->v_usecount) != 0)
984		vp->v_usecount++;
985
986	/*
987	 * Prevent the vnode from being recycled or
988	 * brought into use while we clean it out.
989	 */
990	if (vp->v_flag & VXLOCK)
991		panic("vclean: deadlock");
992	vp->v_flag |= VXLOCK;
993	/*
994	 * Even if the count is zero, the VOP_INACTIVE routine may still
995	 * have the object locked while it cleans it out. The VOP_LOCK
996	 * ensures that the VOP_INACTIVE routine is done with its work.
997	 * For active vnodes, it ensures that no other activity can
998	 * occur while the underlying object is being cleaned out.
999	 */
1000	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1001
1002	/*
1003	 * Clean out any VM data associated with the vnode.
1004	 */
1005	uvm_vnp_terminate(vp);
1006	/*
1007	 * Clean out any buffers associated with the vnode.
1008	 */
1009	if (flags & DOCLOSE)
1010		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1011	/*
1012	 * If purging an active vnode, it must be closed and
1013	 * deactivated before being reclaimed. Note that the
1014	 * VOP_INACTIVE will unlock the vnode
1015	 */
1016	if (active) {
1017		if (flags & DOCLOSE)
1018			VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
1019		VOP_INACTIVE(vp, p);
1020	} else {
1021		/*
1022		 * Any other processes trying to obtain this lock must first
1023		 * wait for VXLOCK to clear, then call the new lock operation.
1024		 */
1025		VOP_UNLOCK(vp, 0, p);
1026	}
1027
1028	/*
1029	 * Reclaim the vnode.
1030	 */
1031	if (VOP_RECLAIM(vp, p))
1032		panic("vclean: cannot reclaim");
1033	if (active) {
1034		simple_lock(&vp->v_interlock);
1035
1036		vp->v_usecount--;
1037		if (vp->v_usecount == 0) {
1038			if (vp->v_holdcnt > 0)
1039				panic("vclean: not clean");
1040			vputonfreelist(vp);
1041		}
1042
1043		simple_unlock(&vp->v_interlock);
1044	}
1045	cache_purge(vp);
1046
1047	/*
1048	 * Done with purge, notify sleepers of the grim news.
1049	 */
1050	vp->v_op = dead_vnodeop_p;
1051	simple_lock(&vp->v_selectinfo.vsi_lock);
1052	VN_KNOTE(vp, NOTE_REVOKE);
1053	simple_unlock(&vp->v_selectinfo.vsi_lock);
1054	vp->v_tag = VT_NON;
1055	vp->v_flag &= ~VXLOCK;
1056#ifdef VFSDEBUG
1057	vp->v_flag &= ~VLOCKSWORK;
1058#endif
1059	if (vp->v_flag & VXWANT) {
1060		vp->v_flag &= ~VXWANT;
1061		wakeup(vp);
1062	}
1063}
1064
1065/*
1066 * Recycle an unused vnode to the front of the free list.
1067 * Release the passed interlock if the vnode will be recycled.
1068 */
1069int
1070vrecycle(struct vnode *vp, struct simplelock *inter_lkp, struct proc *p)
1071{
1072	simple_lock(&vp->v_interlock);
1073	if (vp->v_usecount == 0) {
1074		if (inter_lkp)
1075			simple_unlock(inter_lkp);
1076		vgonel(vp, p);
1077		return (1);
1078	}
1079	simple_unlock(&vp->v_interlock);
1080	return (0);
1081}
1082
1083/*
1084 * Eliminate all activity associated with a vnode
1085 * in preparation for reuse.
1086 */
1087void
1088vgone(struct vnode *vp)
1089{
1090	struct proc *p = curproc;
1091
1092	simple_lock (&vp->v_interlock);
1093	vgonel(vp, p);
1094}
1095
1096/*
1097 * vgone, with the vp interlock held.
1098 */
1099void
1100vgonel(struct vnode *vp, struct proc *p)
1101{
1102	struct vnode *vq;
1103	struct vnode *vx;
1104	struct mount *mp;
1105	int flags;
1106
1107	/*
1108	 * If a vgone (or vclean) is already in progress,
1109	 * wait until it is done and return.
1110	 */
1111	if (vp->v_flag & VXLOCK) {
1112		vp->v_flag |= VXWANT;
1113		ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
1114		return;
1115	}
1116
1117	/*
1118	 * Clean out the filesystem specific data.
1119	 */
1120	vclean(vp, DOCLOSE, p);
1121	/*
1122	 * Delete from old mount point vnode list, if on one.
1123	 */
1124	if (vp->v_mount != NULL)
1125		insmntque(vp, (struct mount *)0);
1126	/*
1127	 * If special device, remove it from special device alias list
1128	 * if it is on one.
1129	 */
1130	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1131		simple_lock(&spechash_slock);
1132		if (*vp->v_hashchain == vp) {
1133			*vp->v_hashchain = vp->v_specnext;
1134		} else {
1135			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1136				if (vq->v_specnext != vp)
1137					continue;
1138				vq->v_specnext = vp->v_specnext;
1139				break;
1140			}
1141			if (vq == NULL)
1142				panic("missing bdev");
1143		}
1144		if (vp->v_flag & VALIASED) {
1145			vx = NULL;
1146			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1147				if (vq->v_rdev != vp->v_rdev ||
1148				    vq->v_type != vp->v_type)
1149					continue;
1150				if (vx)
1151					break;
1152				vx = vq;
1153			}
1154			if (vx == NULL)
1155				panic("missing alias");
1156			if (vq == NULL)
1157				vx->v_flag &= ~VALIASED;
1158			vp->v_flag &= ~VALIASED;
1159		}
1160		simple_unlock(&spechash_slock);
1161
1162		/*
1163		 * If we have a mount point associated with the vnode, we must
1164		 * flush it out now, as to not leave a dangling zombie mount
1165		 * point laying around in VFS.
1166		 */
1167		mp = vp->v_specmountpoint;
1168		if (mp != NULL) {
1169			if (!vfs_busy(mp, LK_EXCLUSIVE, NULL)) {
1170				flags = MNT_FORCE | MNT_DOOMED;
1171				dounmount(mp, flags, p, NULL);
1172			}
1173		}
1174
1175		FREE(vp->v_specinfo, M_VNODE);
1176		vp->v_specinfo = NULL;
1177	}
1178	/*
1179	 * If it is on the freelist and not already at the head,
1180	 * move it to the head of the list.
1181	 */
1182	vp->v_type = VBAD;
1183
1184	/*
1185	 * Move onto the free list, unless we were called from
1186	 * getnewvnode and we're not on any free list
1187	 */
1188	if (vp->v_usecount == 0 &&
1189	    (vp->v_bioflag & VBIOONFREELIST)) {
1190		int s;
1191
1192		simple_lock(&vnode_free_list_slock);
1193		s = splbio();
1194
1195		if (vp->v_holdcnt > 0)
1196			panic("vgonel: not clean");
1197
1198		if (TAILQ_FIRST(&vnode_free_list) != vp) {
1199			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1200			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1201		}
1202		splx(s);
1203		simple_unlock(&vnode_free_list_slock);
1204	}
1205}
1206
1207/*
1208 * Lookup a vnode by device number.
1209 */
1210int
1211vfinddev(dev_t dev, enum vtype type, struct vnode **vpp)
1212{
1213	struct vnode *vp;
1214	int rc =0;
1215
1216	simple_lock(&spechash_slock);
1217	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1218		if (dev != vp->v_rdev || type != vp->v_type)
1219			continue;
1220		*vpp = vp;
1221		rc = 1;
1222		break;
1223	}
1224	simple_unlock(&spechash_slock);
1225	return (rc);
1226}
1227
1228/*
1229 * Revoke all the vnodes corresponding to the specified minor number
1230 * range (endpoints inclusive) of the specified major.
1231 */
1232void
1233vdevgone(int maj, int minl, int minh, enum vtype type)
1234{
1235	struct vnode *vp;
1236	int mn;
1237
1238	for (mn = minl; mn <= minh; mn++)
1239		if (vfinddev(makedev(maj, mn), type, &vp))
1240			VOP_REVOKE(vp, REVOKEALL);
1241}
1242
1243/*
1244 * Calculate the total number of references to a special device.
1245 */
1246int
1247vcount(struct vnode *vp)
1248{
1249	struct vnode *vq, *vnext;
1250	int count;
1251
1252loop:
1253	if ((vp->v_flag & VALIASED) == 0)
1254		return (vp->v_usecount);
1255	simple_lock(&spechash_slock);
1256	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1257		vnext = vq->v_specnext;
1258		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1259			continue;
1260		/*
1261		 * Alias, but not in use, so flush it out.
1262		 */
1263		if (vq->v_usecount == 0 && vq != vp) {
1264			simple_unlock(&spechash_slock);
1265			vgone(vq);
1266			goto loop;
1267		}
1268		count += vq->v_usecount;
1269	}
1270	simple_unlock(&spechash_slock);
1271	return (count);
1272}
1273
1274#ifdef DIAGNOSTIC
1275/*
1276 * Print out a description of a vnode.
1277 */
1278static char *typename[] =
1279   { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1280
1281void
1282vprint(char *label, struct vnode *vp)
1283{
1284	char buf[64];
1285
1286	if (label != NULL)
1287		printf("%s: ", label);
1288	printf("type %s, usecount %u, writecount %u, holdcount %u,",
1289		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1290		vp->v_holdcnt);
1291	buf[0] = '\0';
1292	if (vp->v_flag & VROOT)
1293		strlcat(buf, "|VROOT", sizeof buf);
1294	if (vp->v_flag & VTEXT)
1295		strlcat(buf, "|VTEXT", sizeof buf);
1296	if (vp->v_flag & VSYSTEM)
1297		strlcat(buf, "|VSYSTEM", sizeof buf);
1298	if (vp->v_flag & VXLOCK)
1299		strlcat(buf, "|VXLOCK", sizeof buf);
1300	if (vp->v_flag & VXWANT)
1301		strlcat(buf, "|VXWANT", sizeof buf);
1302	if (vp->v_bioflag & VBIOWAIT)
1303		strlcat(buf, "|VBIOWAIT", sizeof buf);
1304	if (vp->v_bioflag & VBIOONFREELIST)
1305		strlcat(buf, "|VBIOONFREELIST", sizeof buf);
1306	if (vp->v_bioflag & VBIOONSYNCLIST)
1307		strlcat(buf, "|VBIOONSYNCLIST", sizeof buf);
1308	if (vp->v_flag & VALIASED)
1309		strlcat(buf, "|VALIASED", sizeof buf);
1310	if (buf[0] != '\0')
1311		printf(" flags (%s)", &buf[1]);
1312	if (vp->v_data == NULL) {
1313		printf("\n");
1314	} else {
1315		printf("\n\t");
1316		VOP_PRINT(vp);
1317	}
1318}
1319#endif /* DIAGNOSTIC */
1320
1321#ifdef DEBUG
1322/*
1323 * List all of the locked vnodes in the system.
1324 * Called when debugging the kernel.
1325 */
1326void
1327printlockedvnodes(void)
1328{
1329	struct mount *mp, *nmp;
1330	struct vnode *vp;
1331
1332	printf("Locked vnodes\n");
1333
1334	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1335	    mp = nmp) {
1336		if (vfs_busy(mp, LK_NOWAIT, NULL)) {
1337			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1338			continue;
1339		}
1340		LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1341			if (VOP_ISLOCKED(vp))
1342				vprint((char *)0, vp);
1343		}
1344		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1345		vfs_unbusy(mp);
1346 	}
1347
1348}
1349#endif
1350
1351/*
1352 * Top level filesystem related information gathering.
1353 */
1354int
1355vfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1356    size_t newlen, struct proc *p)
1357{
1358	struct vfsconf *vfsp;
1359
1360	/* all sysctl names at this level are at least name and field */
1361	if (namelen < 2)
1362		return (ENOTDIR);		/* overloaded */
1363	if (name[0] != VFS_GENERIC) {
1364		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1365			if (vfsp->vfc_typenum == name[0])
1366				break;
1367		if (vfsp == NULL)
1368			return (EOPNOTSUPP);
1369		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1370		    oldp, oldlenp, newp, newlen, p));
1371	}
1372	switch (name[1]) {
1373	case VFS_MAXTYPENUM:
1374		return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1375	case VFS_CONF:
1376		if (namelen < 3)
1377			return (ENOTDIR);	/* overloaded */
1378		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1379			if (vfsp->vfc_typenum == name[2])
1380				break;
1381		if (vfsp == NULL)
1382			return (EOPNOTSUPP);
1383		return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
1384		    sizeof(struct vfsconf)));
1385	}
1386	return (EOPNOTSUPP);
1387}
1388
1389int kinfo_vdebug = 1;
1390int kinfo_vgetfailed;
1391#define KINFO_VNODESLOP	10
1392/*
1393 * Dump vnode list (via sysctl).
1394 * Copyout address of vnode followed by vnode.
1395 */
1396/* ARGSUSED */
1397int
1398sysctl_vnode(char *where, size_t *sizep, struct proc *p)
1399{
1400	struct mount *mp, *nmp;
1401	struct vnode *vp, *nvp;
1402	char *bp = where, *savebp;
1403	char *ewhere;
1404	int error;
1405
1406	if (where == NULL) {
1407		*sizep = (numvnodes + KINFO_VNODESLOP) * sizeof(struct e_vnode);
1408		return (0);
1409	}
1410	ewhere = where + *sizep;
1411
1412	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1413	    mp = nmp) {
1414		if (vfs_busy(mp, LK_NOWAIT, NULL)) {
1415			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1416			continue;
1417		}
1418		savebp = bp;
1419again:
1420		for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL;
1421		    vp = nvp) {
1422			/*
1423			 * Check that the vp is still associated with
1424			 * this filesystem.  RACE: could have been
1425			 * recycled onto the same filesystem.
1426			 */
1427			if (vp->v_mount != mp) {
1428				simple_unlock(&mntvnode_slock);
1429				if (kinfo_vdebug)
1430					printf("kinfo: vp changed\n");
1431				bp = savebp;
1432				goto again;
1433			}
1434			nvp = LIST_NEXT(vp, v_mntvnodes);
1435			if (bp + sizeof(struct e_vnode) > ewhere) {
1436				simple_unlock(&mntvnode_slock);
1437				*sizep = bp - where;
1438				vfs_unbusy(mp);
1439				return (ENOMEM);
1440			}
1441			if ((error = copyout(&vp,
1442			    &((struct e_vnode *)bp)->vptr,
1443			    sizeof(struct vnode *))) ||
1444			   (error = copyout(vp,
1445			    &((struct e_vnode *)bp)->vnode,
1446			    sizeof(struct vnode)))) {
1447				vfs_unbusy(mp);
1448				return (error);
1449			}
1450			bp += sizeof(struct e_vnode);
1451			simple_lock(&mntvnode_slock);
1452		}
1453
1454		simple_unlock(&mntvnode_slock);
1455		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1456		vfs_unbusy(mp);
1457	}
1458
1459	*sizep = bp - where;
1460
1461	return (0);
1462}
1463
1464/*
1465 * Check to see if a filesystem is mounted on a block device.
1466 */
1467int
1468vfs_mountedon(struct vnode *vp)
1469{
1470	struct vnode *vq;
1471	int error = 0;
1472
1473 	if (vp->v_specmountpoint != NULL)
1474		return (EBUSY);
1475	if (vp->v_flag & VALIASED) {
1476		simple_lock(&spechash_slock);
1477		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1478			if (vq->v_rdev != vp->v_rdev ||
1479			    vq->v_type != vp->v_type)
1480				continue;
1481			if (vq->v_specmountpoint != NULL) {
1482				error = EBUSY;
1483				break;
1484			}
1485 		}
1486		simple_unlock(&spechash_slock);
1487	}
1488	return (error);
1489}
1490
1491/*
1492 * Build hash lists of net addresses and hang them off the mount point.
1493 * Called by ufs_mount() to set up the lists of export addresses.
1494 */
1495int
1496vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
1497    struct export_args *argp)
1498{
1499	struct netcred *np;
1500	struct radix_node_head *rnh;
1501	int i;
1502	struct radix_node *rn;
1503	struct sockaddr *saddr, *smask = 0;
1504	struct domain *dom;
1505	int error;
1506
1507	if (argp->ex_addrlen == 0) {
1508		if (mp->mnt_flag & MNT_DEFEXPORTED)
1509			return (EPERM);
1510		np = &nep->ne_defexported;
1511		np->netc_exflags = argp->ex_flags;
1512		np->netc_anon = argp->ex_anon;
1513		np->netc_anon.cr_ref = 1;
1514		mp->mnt_flag |= MNT_DEFEXPORTED;
1515		return (0);
1516	}
1517	if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN ||
1518	    argp->ex_addrlen < 0 || argp->ex_masklen < 0)
1519		return (EINVAL);
1520	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1521	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1522	bzero(np, i);
1523	saddr = (struct sockaddr *)(np + 1);
1524	error = copyin(argp->ex_addr, saddr, argp->ex_addrlen);
1525	if (error)
1526		goto out;
1527	if (saddr->sa_len > argp->ex_addrlen)
1528		saddr->sa_len = argp->ex_addrlen;
1529	if (argp->ex_masklen) {
1530		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1531		error = copyin(argp->ex_mask, smask, argp->ex_masklen);
1532		if (error)
1533			goto out;
1534		if (smask->sa_len > argp->ex_masklen)
1535			smask->sa_len = argp->ex_masklen;
1536	}
1537	i = saddr->sa_family;
1538	if (i < 0 || i > AF_MAX) {
1539		error = EINVAL;
1540		goto out;
1541	}
1542	if ((rnh = nep->ne_rtable[i]) == 0) {
1543		/*
1544		 * Seems silly to initialize every AF when most are not
1545		 * used, do so on demand here
1546		 */
1547		for (dom = domains; dom; dom = dom->dom_next)
1548			if (dom->dom_family == i && dom->dom_rtattach) {
1549				dom->dom_rtattach((void **)&nep->ne_rtable[i],
1550					dom->dom_rtoffset);
1551				break;
1552			}
1553		if ((rnh = nep->ne_rtable[i]) == 0) {
1554			error = ENOBUFS;
1555			goto out;
1556		}
1557	}
1558	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1559		np->netc_rnodes);
1560	if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1561		error = EPERM;
1562		goto out;
1563	}
1564	np->netc_exflags = argp->ex_flags;
1565	np->netc_anon = argp->ex_anon;
1566	np->netc_anon.cr_ref = 1;
1567	return (0);
1568out:
1569	free(np, M_NETADDR);
1570	return (error);
1571}
1572
1573/* ARGSUSED */
1574int
1575vfs_free_netcred(struct radix_node *rn, void *w)
1576{
1577	struct radix_node_head *rnh = (struct radix_node_head *)w;
1578
1579	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh, NULL);
1580	free(rn, M_NETADDR);
1581	return (0);
1582}
1583
1584/*
1585 * Free the net address hash lists that are hanging off the mount points.
1586 */
1587void
1588vfs_free_addrlist(struct netexport *nep)
1589{
1590	int i;
1591	struct radix_node_head *rnh;
1592
1593	for (i = 0; i <= AF_MAX; i++)
1594		if ((rnh = nep->ne_rtable[i]) != NULL) {
1595			(*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1596			free(rnh, M_RTABLE);
1597			nep->ne_rtable[i] = 0;
1598		}
1599}
1600
1601int
1602vfs_export(struct mount *mp, struct netexport *nep, struct export_args *argp)
1603{
1604	int error;
1605
1606	if (argp->ex_flags & MNT_DELEXPORT) {
1607		vfs_free_addrlist(nep);
1608		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1609	}
1610	if (argp->ex_flags & MNT_EXPORTED) {
1611		if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1612			return (error);
1613		mp->mnt_flag |= MNT_EXPORTED;
1614	}
1615	return (0);
1616}
1617
1618struct netcred *
1619vfs_export_lookup(struct mount *mp, struct netexport *nep, struct mbuf *nam)
1620{
1621	struct netcred *np;
1622	struct radix_node_head *rnh;
1623	struct sockaddr *saddr;
1624
1625	np = NULL;
1626	if (mp->mnt_flag & MNT_EXPORTED) {
1627		/*
1628		 * Lookup in the export list first.
1629		 */
1630		if (nam != NULL) {
1631			saddr = mtod(nam, struct sockaddr *);
1632			rnh = nep->ne_rtable[saddr->sa_family];
1633			if (rnh != NULL) {
1634				np = (struct netcred *)
1635					(*rnh->rnh_matchaddr)((caddr_t)saddr,
1636					    rnh);
1637				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1638					np = NULL;
1639			}
1640		}
1641		/*
1642		 * If no address match, use the default if it exists.
1643		 */
1644		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1645			np = &nep->ne_defexported;
1646	}
1647	return (np);
1648}
1649
1650/*
1651 * Do the usual access checking.
1652 * file_mode, uid and gid are from the vnode in question,
1653 * while acc_mode and cred are from the VOP_ACCESS parameter list
1654 */
1655int
1656vaccess(mode_t file_mode, uid_t uid, gid_t gid, mode_t acc_mode,
1657    struct ucred *cred)
1658{
1659	mode_t mask;
1660
1661	/* User id 0 always gets access. */
1662	if (cred->cr_uid == 0)
1663		return 0;
1664
1665	mask = 0;
1666
1667	/* Otherwise, check the owner. */
1668	if (cred->cr_uid == uid) {
1669		if (acc_mode & VEXEC)
1670			mask |= S_IXUSR;
1671		if (acc_mode & VREAD)
1672			mask |= S_IRUSR;
1673		if (acc_mode & VWRITE)
1674			mask |= S_IWUSR;
1675		return (file_mode & mask) == mask ? 0 : EACCES;
1676	}
1677
1678	/* Otherwise, check the groups. */
1679	if (cred->cr_gid == gid || groupmember(gid, cred)) {
1680		if (acc_mode & VEXEC)
1681			mask |= S_IXGRP;
1682		if (acc_mode & VREAD)
1683			mask |= S_IRGRP;
1684		if (acc_mode & VWRITE)
1685			mask |= S_IWGRP;
1686		return (file_mode & mask) == mask ? 0 : EACCES;
1687	}
1688
1689	/* Otherwise, check everyone else. */
1690	if (acc_mode & VEXEC)
1691		mask |= S_IXOTH;
1692	if (acc_mode & VREAD)
1693		mask |= S_IROTH;
1694	if (acc_mode & VWRITE)
1695		mask |= S_IWOTH;
1696	return (file_mode & mask) == mask ? 0 : EACCES;
1697}
1698
1699/*
1700 * Unmount all file systems.
1701 * We traverse the list in reverse order under the assumption that doing so
1702 * will avoid needing to worry about dependencies.
1703 */
1704void
1705vfs_unmountall(void)
1706{
1707	struct mount *mp, *nmp;
1708	int allerror, error, again = 1;
1709
1710 retry:
1711	allerror = 0;
1712	for (mp = CIRCLEQ_LAST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1713	    mp = nmp) {
1714		nmp = CIRCLEQ_PREV(mp, mnt_list);
1715		if ((vfs_busy(mp, LK_EXCLUSIVE|LK_NOWAIT, NULL)) != 0)
1716			continue;
1717		if ((error = dounmount(mp, MNT_FORCE, curproc, NULL)) != 0) {
1718			printf("unmount of %s failed with error %d\n",
1719			    mp->mnt_stat.f_mntonname, error);
1720			allerror = 1;
1721		}
1722	}
1723
1724	if (allerror) {
1725		printf("WARNING: some file systems would not unmount\n");
1726		if (again) {
1727			printf("retrying\n");
1728			again = 0;
1729			goto retry;
1730		}
1731	}
1732}
1733
1734/*
1735 * Sync and unmount file systems before shutting down.
1736 */
1737void
1738vfs_shutdown(void)
1739{
1740#ifdef ACCOUNTING
1741	extern void acct_shutdown(void);
1742
1743	acct_shutdown();
1744#endif
1745
1746	/* XXX Should suspend scheduling. */
1747	(void) spl0();
1748
1749	printf("syncing disks... ");
1750
1751	if (panicstr == 0) {
1752		/* Sync before unmount, in case we hang on something. */
1753		sys_sync(&proc0, (void *)0, (register_t *)0);
1754
1755		/* Unmount file systems. */
1756		vfs_unmountall();
1757	}
1758
1759	if (vfs_syncwait(1))
1760		printf("giving up\n");
1761	else
1762		printf("done\n");
1763}
1764
1765/*
1766 * perform sync() operation and wait for buffers to flush.
1767 * assumtions: called w/ scheduler disabled and physical io enabled
1768 * for now called at spl0() XXX
1769 */
1770int
1771vfs_syncwait(int verbose)
1772{
1773	struct buf *bp;
1774	int iter, nbusy, dcount, s;
1775	struct proc *p;
1776
1777	p = curproc? curproc : &proc0;
1778	sys_sync(p, (void *)0, (register_t *)0);
1779
1780	/* Wait for sync to finish. */
1781	dcount = 10000;
1782	for (iter = 0; iter < 20; iter++) {
1783		nbusy = 0;
1784		for (bp = &buf[nbuf]; --bp >= buf; ) {
1785			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1786				nbusy++;
1787			/*
1788			 * With soft updates, some buffers that are
1789			 * written will be remarked as dirty until other
1790			 * buffers are written.
1791			 */
1792			if (bp->b_flags & B_DELWRI) {
1793				s = splbio();
1794				bremfree(bp);
1795				bp->b_flags |= B_BUSY;
1796				splx(s);
1797				nbusy++;
1798				bawrite(bp);
1799				if (dcount-- <= 0) {
1800					if (verbose)
1801						printf("softdep ");
1802					return 1;
1803				}
1804			}
1805		}
1806		if (nbusy == 0)
1807			break;
1808		if (verbose)
1809			printf("%d ", nbusy);
1810		DELAY(40000 * iter);
1811	}
1812
1813	return nbusy;
1814}
1815
1816/*
1817 * posix file system related system variables.
1818 */
1819int
1820fs_posix_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1821    void *newp, size_t newlen, struct proc *p)
1822{
1823	/* all sysctl names at this level are terminal */
1824	if (namelen != 1)
1825		return (ENOTDIR);
1826
1827	switch (name[0]) {
1828	case FS_POSIX_SETUID:
1829		if (newp && securelevel > 0)
1830			return (EPERM);
1831		return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear));
1832	default:
1833		return (EOPNOTSUPP);
1834	}
1835	/* NOTREACHED */
1836}
1837
1838/*
1839 * file system related system variables.
1840 */
1841int
1842fs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1843    size_t newlen, struct proc *p)
1844{
1845	sysctlfn *fn;
1846
1847	switch (name[0]) {
1848	case FS_POSIX:
1849		fn = fs_posix_sysctl;
1850		break;
1851	default:
1852		return (EOPNOTSUPP);
1853	}
1854	return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p);
1855}
1856
1857
1858/*
1859 * Routines dealing with vnodes and buffers
1860 */
1861
1862/*
1863 * Wait for all outstanding I/Os to complete
1864 *
1865 * Manipulates v_numoutput. Must be called at splbio()
1866 */
1867int
1868vwaitforio(struct vnode *vp, int slpflag, char *wmesg, int timeo)
1869{
1870	int error = 0;
1871
1872	splassert(IPL_BIO);
1873
1874	while (vp->v_numoutput) {
1875		vp->v_bioflag |= VBIOWAIT;
1876		error = tsleep(&vp->v_numoutput,
1877		    slpflag | (PRIBIO + 1), wmesg, timeo);
1878		if (error)
1879			break;
1880	}
1881
1882	return (error);
1883}
1884
1885/*
1886 * Update outstanding I/O count and do wakeup if requested.
1887 *
1888 * Manipulates v_numoutput. Must be called at splbio()
1889 */
1890void
1891vwakeup(struct vnode *vp)
1892{
1893	splassert(IPL_BIO);
1894
1895	if (vp != NULL) {
1896		if (vp->v_numoutput-- == 0)
1897			panic("vwakeup: neg numoutput");
1898		if ((vp->v_bioflag & VBIOWAIT) && vp->v_numoutput == 0) {
1899			vp->v_bioflag &= ~VBIOWAIT;
1900			wakeup(&vp->v_numoutput);
1901		}
1902	}
1903}
1904
1905/*
1906 * Flush out and invalidate all buffers associated with a vnode.
1907 * Called with the underlying object locked.
1908 */
1909int
1910vinvalbuf(struct vnode *vp, int flags, struct ucred *cred, struct proc *p,
1911    int slpflag, int slptimeo)
1912{
1913	struct buf *bp;
1914	struct buf *nbp, *blist;
1915	int s, error;
1916
1917	if (flags & V_SAVE) {
1918		s = splbio();
1919		vwaitforio(vp, 0, "vinvalbuf", 0);
1920		if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
1921			splx(s);
1922			if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
1923				return (error);
1924			s = splbio();
1925			if (vp->v_numoutput > 0 ||
1926			    !LIST_EMPTY(&vp->v_dirtyblkhd))
1927				panic("vinvalbuf: dirty bufs");
1928		}
1929		splx(s);
1930	}
1931loop:
1932	s = splbio();
1933	for (;;) {
1934		if ((blist = LIST_FIRST(&vp->v_cleanblkhd)) &&
1935		    (flags & V_SAVEMETA))
1936			while (blist && blist->b_lblkno < 0)
1937				blist = LIST_NEXT(blist, b_vnbufs);
1938		if (blist == NULL &&
1939		    (blist = LIST_FIRST(&vp->v_dirtyblkhd)) &&
1940		    (flags & V_SAVEMETA))
1941			while (blist && blist->b_lblkno < 0)
1942				blist = LIST_NEXT(blist, b_vnbufs);
1943		if (!blist)
1944			break;
1945
1946		for (bp = blist; bp; bp = nbp) {
1947			nbp = LIST_NEXT(bp, b_vnbufs);
1948			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
1949				continue;
1950			if (bp->b_flags & B_BUSY) {
1951				bp->b_flags |= B_WANTED;
1952				error = tsleep(bp, slpflag | (PRIBIO + 1),
1953				    "vinvalbuf", slptimeo);
1954				if (error) {
1955					splx(s);
1956					return (error);
1957				}
1958				break;
1959			}
1960			bremfree(bp);
1961			bp->b_flags |= B_BUSY;
1962			/*
1963			 * XXX Since there are no node locks for NFS, I believe
1964			 * there is a slight chance that a delayed write will
1965			 * occur while sleeping just above, so check for it.
1966			 */
1967			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
1968				splx(s);
1969				(void) VOP_BWRITE(bp);
1970				goto loop;
1971			}
1972			bp->b_flags |= B_INVAL;
1973			brelse(bp);
1974		}
1975	}
1976	if (!(flags & V_SAVEMETA) &&
1977	    (!LIST_EMPTY(&vp->v_dirtyblkhd) || !LIST_EMPTY(&vp->v_cleanblkhd)))
1978		panic("vinvalbuf: flush failed");
1979	splx(s);
1980	return (0);
1981}
1982
1983void
1984vflushbuf(struct vnode *vp, int sync)
1985{
1986	struct buf *bp, *nbp;
1987	int s;
1988
1989loop:
1990	s = splbio();
1991	for (bp = LIST_FIRST(&vp->v_dirtyblkhd);
1992	    bp != LIST_END(&vp->v_dirtyblkhd); bp = nbp) {
1993		nbp = LIST_NEXT(bp, b_vnbufs);
1994		if ((bp->b_flags & B_BUSY))
1995			continue;
1996		if ((bp->b_flags & B_DELWRI) == 0)
1997			panic("vflushbuf: not dirty");
1998		bremfree(bp);
1999		bp->b_flags |= B_BUSY;
2000		splx(s);
2001		/*
2002		 * Wait for I/O associated with indirect blocks to complete,
2003		 * since there is no way to quickly wait for them below.
2004		 */
2005		if (bp->b_vp == vp || sync == 0)
2006			(void) bawrite(bp);
2007		else
2008			(void) bwrite(bp);
2009		goto loop;
2010	}
2011	if (sync == 0) {
2012		splx(s);
2013		return;
2014	}
2015	vwaitforio(vp, 0, "vflushbuf", 0);
2016	if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
2017		splx(s);
2018#ifdef DIAGNOSTIC
2019		vprint("vflushbuf: dirty", vp);
2020#endif
2021		goto loop;
2022	}
2023	splx(s);
2024}
2025
2026/*
2027 * Associate a buffer with a vnode.
2028 *
2029 * Manipulates buffer vnode queues. Must be called at splbio().
2030 */
2031void
2032bgetvp(struct vnode *vp, struct buf *bp)
2033{
2034	splassert(IPL_BIO);
2035
2036
2037	if (bp->b_vp)
2038		panic("bgetvp: not free");
2039	vhold(vp);
2040	bp->b_vp = vp;
2041	if (vp->v_type == VBLK || vp->v_type == VCHR)
2042		bp->b_dev = vp->v_rdev;
2043	else
2044		bp->b_dev = NODEV;
2045	/*
2046	 * Insert onto list for new vnode.
2047	 */
2048	bufinsvn(bp, &vp->v_cleanblkhd);
2049}
2050
2051/*
2052 * Disassociate a buffer from a vnode.
2053 *
2054 * Manipulates vnode buffer queues. Must be called at splbio().
2055 */
2056void
2057brelvp(struct buf *bp)
2058{
2059	struct vnode *vp;
2060
2061	splassert(IPL_BIO);
2062
2063	if ((vp = bp->b_vp) == (struct vnode *) 0)
2064		panic("brelvp: NULL");
2065	/*
2066	 * Delete from old vnode list, if on one.
2067	 */
2068	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
2069		bufremvn(bp);
2070	if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2071	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2072		vp->v_bioflag &= ~VBIOONSYNCLIST;
2073		LIST_REMOVE(vp, v_synclist);
2074	}
2075	bp->b_vp = (struct vnode *) 0;
2076
2077	simple_lock(&vp->v_interlock);
2078#ifdef DIAGNOSTIC
2079	if (vp->v_holdcnt == 0)
2080		panic("brelvp: holdcnt");
2081#endif
2082	vp->v_holdcnt--;
2083
2084	/*
2085	 * If it is on the holdlist and the hold count drops to
2086	 * zero, move it to the free list.
2087	 */
2088	if ((vp->v_bioflag & VBIOONFREELIST) &&
2089	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
2090		simple_lock(&vnode_free_list_slock);
2091		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
2092		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2093		simple_unlock(&vnode_free_list_slock);
2094	}
2095	simple_unlock(&vp->v_interlock);
2096}
2097
2098/*
2099 * Replaces the current vnode associated with the buffer, if any,
2100 * with a new vnode.
2101 *
2102 * If an output I/O is pending on the buffer, the old vnode
2103 * I/O count is adjusted.
2104 *
2105 * Ignores vnode buffer queues. Must be called at splbio().
2106 */
2107void
2108buf_replacevnode(struct buf *bp, struct vnode *newvp)
2109{
2110	struct vnode *oldvp = bp->b_vp;
2111
2112	splassert(IPL_BIO);
2113
2114	if (oldvp)
2115		brelvp(bp);
2116
2117	if ((bp->b_flags & (B_READ | B_DONE)) == 0) {
2118		newvp->v_numoutput++;	/* put it on swapdev */
2119		vwakeup(oldvp);
2120	}
2121
2122	bgetvp(newvp, bp);
2123	bufremvn(bp);
2124}
2125
2126/*
2127 * Used to assign buffers to the appropriate clean or dirty list on
2128 * the vnode and to add newly dirty vnodes to the appropriate
2129 * filesystem syncer list.
2130 *
2131 * Manipulates vnode buffer queues. Must be called at splbio().
2132 */
2133void
2134reassignbuf(struct buf *bp)
2135{
2136	struct buflists *listheadp;
2137	int delay;
2138	struct vnode *vp = bp->b_vp;
2139
2140	splassert(IPL_BIO);
2141
2142	/*
2143	 * Delete from old vnode list, if on one.
2144	 */
2145	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
2146		bufremvn(bp);
2147
2148	/*
2149	 * If dirty, put on list of dirty buffers;
2150	 * otherwise insert onto list of clean buffers.
2151	 */
2152	if ((bp->b_flags & B_DELWRI) == 0) {
2153		listheadp = &vp->v_cleanblkhd;
2154		if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2155		    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2156			vp->v_bioflag &= ~VBIOONSYNCLIST;
2157			LIST_REMOVE(vp, v_synclist);
2158		}
2159	} else {
2160		listheadp = &vp->v_dirtyblkhd;
2161		if ((vp->v_bioflag & VBIOONSYNCLIST) == 0) {
2162			switch (vp->v_type) {
2163			case VDIR:
2164				delay = syncdelay / 2;
2165				break;
2166			case VBLK:
2167				if (vp->v_specmountpoint != NULL) {
2168					delay = syncdelay / 3;
2169					break;
2170				}
2171				/* fall through */
2172			default:
2173				delay = syncdelay;
2174			}
2175			vn_syncer_add_to_worklist(vp, delay);
2176		}
2177	}
2178	bufinsvn(bp, listheadp);
2179}
2180
2181int
2182vfs_register(struct vfsconf *vfs)
2183{
2184	struct vfsconf *vfsp;
2185	struct vfsconf **vfspp;
2186
2187#ifdef DIAGNOSTIC
2188	/* Paranoia? */
2189	if (vfs->vfc_refcount != 0)
2190		printf("vfs_register called with vfc_refcount > 0\n");
2191#endif
2192
2193	/* Check if filesystem already known */
2194	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2195	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next)
2196		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2197			return (EEXIST);
2198
2199	if (vfs->vfc_typenum > maxvfsconf)
2200		maxvfsconf = vfs->vfc_typenum;
2201
2202	vfs->vfc_next = NULL;
2203
2204	/* Add to the end of the list */
2205	*vfspp = vfs;
2206
2207	/* Call vfs_init() */
2208	if (vfs->vfc_vfsops->vfs_init)
2209		(*(vfs->vfc_vfsops->vfs_init))(vfs);
2210
2211	return 0;
2212}
2213
2214int
2215vfs_unregister(struct vfsconf *vfs)
2216{
2217	struct vfsconf *vfsp;
2218	struct vfsconf **vfspp;
2219	int maxtypenum;
2220
2221	/* Find our vfsconf struct */
2222	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2223	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next) {
2224		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2225			break;
2226	}
2227
2228	if (!vfsp)			/* Not found */
2229		return (ENOENT);
2230
2231	if (vfsp->vfc_refcount)		/* In use */
2232		return (EBUSY);
2233
2234	/* Remove from list and free */
2235	*vfspp = vfsp->vfc_next;
2236
2237	maxtypenum = 0;
2238
2239	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2240		if (vfsp->vfc_typenum > maxtypenum)
2241			maxtypenum = vfsp->vfc_typenum;
2242
2243	maxvfsconf = maxtypenum;
2244	return 0;
2245}
2246
2247/*
2248 * Check if vnode represents a disk device
2249 */
2250int
2251vn_isdisk(struct vnode *vp, int *errp)
2252{
2253	if (vp->v_type != VBLK && vp->v_type != VCHR)
2254		return (0);
2255
2256	return (1);
2257}
2258