vfs_subr.c revision 1.67
1/*	$OpenBSD: vfs_subr.c,v 1.67 2001/09/19 22:52:41 csapuntz Exp $	*/
2/*	$NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $	*/
3
4/*
5 * Copyright (c) 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by the University of
24 *	California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
42 */
43
44/*
45 * External virtual filesystem routines
46 */
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/proc.h>
51#include <sys/mount.h>
52#include <sys/time.h>
53#include <sys/fcntl.h>
54#include <sys/kernel.h>
55#include <sys/vnode.h>
56#include <sys/stat.h>
57#include <sys/namei.h>
58#include <sys/ucred.h>
59#include <sys/buf.h>
60#include <sys/errno.h>
61#include <sys/malloc.h>
62#include <sys/domain.h>
63#include <sys/mbuf.h>
64#include <sys/syscallargs.h>
65#include <sys/pool.h>
66
67#include <vm/vm.h>
68#include <sys/sysctl.h>
69
70#include <miscfs/specfs/specdev.h>
71
72#include <uvm/uvm_extern.h>
73
74enum vtype iftovt_tab[16] = {
75	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
76	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
77};
78int	vttoif_tab[9] = {
79	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
80	S_IFSOCK, S_IFIFO, S_IFMT,
81};
82
83int doforce = 1;		/* 1 => permit forcible unmounting */
84int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
85int suid_clear = 1;		/* 1 => clear SUID / SGID on owner change */
86
87/*
88 * Insq/Remq for the vnode usage lists.
89 */
90#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
91#define	bufremvn(bp) {							\
92	LIST_REMOVE(bp, b_vnbufs);					\
93	(bp)->b_vnbufs.le_next = NOLIST;				\
94}
95
96struct freelst vnode_hold_list;   /* list of vnodes referencing buffers */
97struct freelst vnode_free_list;   /* vnode free list */
98
99struct mntlist mountlist;			/* mounted filesystem list */
100struct simplelock mountlist_slock;
101static struct simplelock mntid_slock;
102struct simplelock mntvnode_slock;
103struct simplelock vnode_free_list_slock;
104struct simplelock spechash_slock;
105
106void	vclean __P((struct vnode *, int, struct proc *));
107
108void insmntque __P((struct vnode *, struct mount *));
109int getdevvp __P((dev_t, struct vnode **, enum vtype));
110
111int vfs_hang_addrlist __P((struct mount *, struct netexport *,
112				  struct export_args *));
113int vfs_free_netcred __P((struct radix_node *, void *));
114void vfs_free_addrlist __P((struct netexport *));
115static __inline__ void vputonfreelist __P((struct vnode *));
116
117#ifdef DEBUG
118void printlockedvnodes __P((void));
119#endif
120
121#define VN_KNOTE(vp, b) \
122	KNOTE((struct klist *)&vp->v_selectinfo.vsi_selinfo.si_note, (b))
123
124struct pool vnode_pool;
125
126/*
127 * Initialize the vnode management data structures.
128 */
129void
130vntblinit()
131{
132
133	pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodes",
134		0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE);
135	simple_lock_init(&mntvnode_slock);
136	simple_lock_init(&mntid_slock);
137	simple_lock_init(&spechash_slock);
138	TAILQ_INIT(&vnode_hold_list);
139	TAILQ_INIT(&vnode_free_list);
140	simple_lock_init(&vnode_free_list_slock);
141	CIRCLEQ_INIT(&mountlist);
142	simple_lock_init(&mountlist_slock);
143	/*
144	 * Initialize the filesystem syncer.
145	 */
146	vn_initialize_syncerd();
147}
148
149
150/*
151 * Mark a mount point as busy. Used to synchronize access and to delay
152 * unmounting. Interlock is not released on failure.
153 */
154
155int
156vfs_busy(mp, flags, interlkp, p)
157	struct mount *mp;
158	int flags;
159	struct simplelock *interlkp;
160	struct proc *p;
161{
162	int lkflags;
163
164	if (mp->mnt_flag & MNT_UNMOUNT) {
165		if (flags & LK_NOWAIT)
166			return (ENOENT);
167		mp->mnt_flag |= MNT_MWAIT;
168		if (interlkp)
169			simple_unlock(interlkp);
170		/*
171		 * Since all busy locks are shared except the exclusive
172		 * lock granted when unmounting, the only place that a
173		 * wakeup needs to be done is at the release of the
174		 * exclusive lock at the end of dounmount.
175		 */
176 		sleep((caddr_t)mp, PVFS);
177		if (interlkp)
178			simple_lock(interlkp);
179		return (ENOENT);
180	}
181	lkflags = LK_SHARED;
182	if (interlkp)
183		lkflags |= LK_INTERLOCK;
184	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
185		panic("vfs_busy: unexpected lock failure");
186	return (0);
187}
188
189
190/*
191 * Free a busy file system
192 */
193void
194vfs_unbusy(mp, p)
195	struct mount *mp;
196	struct proc *p;
197{
198	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
199}
200
201/*
202 * Lookup a filesystem type, and if found allocate and initialize
203 * a mount structure for it.
204 *
205 * Devname is usually updated by mount(8) after booting.
206 */
207
208int
209vfs_rootmountalloc(fstypename, devname, mpp)
210	char *fstypename;
211	char *devname;
212	struct mount **mpp;
213{
214	struct proc *p = curproc;	/* XXX */
215	struct vfsconf *vfsp;
216	struct mount *mp;
217
218	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
219		if (!strcmp(vfsp->vfc_name, fstypename))
220			break;
221	if (vfsp == NULL)
222		return (ENODEV);
223	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
224	bzero((char *)mp, (u_long)sizeof(struct mount));
225	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
226	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
227	LIST_INIT(&mp->mnt_vnodelist);
228	mp->mnt_vfc = vfsp;
229	mp->mnt_op = vfsp->vfc_vfsops;
230	mp->mnt_flag = MNT_RDONLY;
231	mp->mnt_vnodecovered = NULLVP;
232	vfsp->vfc_refcount++;
233	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
234	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
235	mp->mnt_stat.f_mntonname[0] = '/';
236	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
237	*mpp = mp;
238 	return (0);
239 }
240
241/*
242 * Find an appropriate filesystem to use for the root. If a filesystem
243 * has not been preselected, walk through the list of known filesystems
244 * trying those that have mountroot routines, and try them until one
245 * works or we have tried them all.
246  */
247int
248vfs_mountroot()
249{
250	struct vfsconf *vfsp;
251	extern int (*mountroot)(void);
252	int error;
253
254	if (mountroot != NULL)
255		return ((*mountroot)());
256	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
257		if (vfsp->vfc_mountroot == NULL)
258			continue;
259		if ((error = (*vfsp->vfc_mountroot)()) == 0)
260			return (0);
261		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
262 	}
263	return (ENODEV);
264}
265
266/*
267 * Lookup a mount point by filesystem identifier.
268 */
269struct mount *
270vfs_getvfs(fsid)
271	fsid_t *fsid;
272{
273	register struct mount *mp;
274
275	simple_lock(&mountlist_slock);
276	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
277		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
278		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
279			simple_unlock(&mountlist_slock);
280			return (mp);
281		}
282	}
283	simple_unlock(&mountlist_slock);
284	return ((struct mount *)0);
285}
286
287
288/*
289 * Get a new unique fsid
290 */
291void
292vfs_getnewfsid(mp)
293	struct mount *mp;
294{
295	static u_short xxxfs_mntid;
296
297	fsid_t tfsid;
298	int mtype;
299
300	simple_lock(&mntid_slock);
301	mtype = mp->mnt_vfc->vfc_typenum;
302	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
303	mp->mnt_stat.f_fsid.val[1] = mtype;
304	if (xxxfs_mntid == 0)
305		++xxxfs_mntid;
306	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
307	tfsid.val[1] = mtype;
308	if (!CIRCLEQ_EMPTY(&mountlist)) {
309		while (vfs_getvfs(&tfsid)) {
310			tfsid.val[0]++;
311			xxxfs_mntid++;
312		}
313	}
314	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
315	simple_unlock(&mntid_slock);
316}
317
318/*
319 * Make a 'unique' number from a mount type name.
320 * Note that this is no longer used for ffs which
321 * now has an on-disk filesystem id.
322 */
323long
324makefstype(type)
325	char *type;
326{
327	long rv;
328
329	for (rv = 0; *type; type++) {
330		rv <<= 2;
331		rv ^= *type;
332	}
333	return rv;
334}
335
336/*
337 * Set vnode attributes to VNOVAL
338 */
339void
340vattr_null(vap)
341	register struct vattr *vap;
342{
343
344	vap->va_type = VNON;
345	/* XXX These next two used to be one line, but for a GCC bug. */
346	vap->va_size = VNOVAL;
347	vap->va_bytes = VNOVAL;
348	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
349		vap->va_fsid = vap->va_fileid =
350		vap->va_blocksize = vap->va_rdev =
351		vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
352		vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
353		vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
354		vap->va_flags = vap->va_gen = VNOVAL;
355	vap->va_vaflags = 0;
356}
357
358/*
359 * Routines having to do with the management of the vnode table.
360 */
361extern int (**dead_vnodeop_p) __P((void *));
362long numvnodes;
363
364/*
365 * Return the next vnode from the free list.
366 */
367int
368getnewvnode(tag, mp, vops, vpp)
369	enum vtagtype tag;
370	struct mount *mp;
371	int (**vops) __P((void *));
372	struct vnode **vpp;
373{
374	struct proc *p = curproc;			/* XXX */
375	struct freelst *listhd;
376	static int toggle;
377	struct vnode *vp;
378	int s;
379
380	/*
381	 * We must choose whether to allocate a new vnode or recycle an
382	 * existing one. The criterion for allocating a new one is that
383	 * the total number of vnodes is less than the number desired or
384	 * there are no vnodes on either free list. Generally we only
385	 * want to recycle vnodes that have no buffers associated with
386	 * them, so we look first on the vnode_free_list. If it is empty,
387	 * we next consider vnodes with referencing buffers on the
388	 * vnode_hold_list. The toggle ensures that half the time we
389	 * will use a buffer from the vnode_hold_list, and half the time
390	 * we will allocate a new one unless the list has grown to twice
391	 * the desired size. We are reticent to recycle vnodes from the
392	 * vnode_hold_list because we will lose the identity of all its
393	 * referencing buffers.
394	 */
395	toggle ^= 1;
396	if (numvnodes > 2 * desiredvnodes)
397		toggle = 0;
398
399	simple_lock(&vnode_free_list_slock);
400	s = splbio();
401	if ((numvnodes < desiredvnodes) ||
402	    ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) &&
403	    ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) {
404		splx(s);
405		simple_unlock(&vnode_free_list_slock);
406		vp = pool_get(&vnode_pool, PR_WAITOK);
407		bzero((char *)vp, sizeof *vp);
408		numvnodes++;
409	} else {
410		for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
411		    vp = TAILQ_NEXT(vp, v_freelist)) {
412			if (simple_lock_try(&vp->v_interlock))
413				break;
414		}
415		/*
416		 * Unless this is a bad time of the month, at most
417		 * the first NCPUS items on the free list are
418		 * locked, so this is close enough to being empty.
419		 */
420		if (vp == NULLVP) {
421			splx(s);
422			simple_unlock(&vnode_free_list_slock);
423			tablefull("vnode");
424			*vpp = 0;
425			return (ENFILE);
426		}
427		if (vp->v_usecount) {
428			vprint("free vnode", vp);
429			panic("free vnode isn't");
430		}
431
432		TAILQ_REMOVE(listhd, vp, v_freelist);
433		vp->v_bioflag &= ~VBIOONFREELIST;
434		splx(s);
435
436		simple_unlock(&vnode_free_list_slock);
437		if (vp->v_type != VBAD)
438			vgonel(vp, p);
439		else
440			simple_unlock(&vp->v_interlock);
441#ifdef DIAGNOSTIC
442		if (vp->v_data) {
443			vprint("cleaned vnode", vp);
444			panic("cleaned vnode isn't");
445		}
446		s = splbio();
447		if (vp->v_numoutput)
448			panic("Clean vnode has pending I/O's");
449		splx(s);
450#endif
451		vp->v_flag = 0;
452		vp->v_bioflag = 0;
453		vp->v_socket = 0;
454	}
455	vp->v_type = VNON;
456	cache_purge(vp);
457	vp->v_tag = tag;
458	vp->v_op = vops;
459	insmntque(vp, mp);
460	*vpp = vp;
461	vp->v_usecount = 1;
462	vp->v_data = 0;
463	simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
464	return (0);
465}
466
467/*
468 * Move a vnode from one mount queue to another.
469 */
470void
471insmntque(vp, mp)
472	register struct vnode *vp;
473	register struct mount *mp;
474{
475	simple_lock(&mntvnode_slock);
476	/*
477	 * Delete from old mount point vnode list, if on one.
478	 */
479
480	if (vp->v_mount != NULL)
481		LIST_REMOVE(vp, v_mntvnodes);
482	/*
483	 * Insert into list of vnodes for the new mount point, if available.
484	 */
485	if ((vp->v_mount = mp) != NULL)
486		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
487	simple_unlock(&mntvnode_slock);
488}
489
490
491/*
492 * Create a vnode for a block device.
493 * Used for root filesystem, argdev, and swap areas.
494 * Also used for memory file system special devices.
495 */
496int
497bdevvp(dev, vpp)
498	dev_t dev;
499	struct vnode **vpp;
500{
501
502	return (getdevvp(dev, vpp, VBLK));
503}
504
505/*
506 * Create a vnode for a character device.
507 * Used for kernfs and some console handling.
508 */
509int
510cdevvp(dev, vpp)
511	dev_t dev;
512	struct vnode **vpp;
513{
514
515	return (getdevvp(dev, vpp, VCHR));
516}
517
518/*
519 * Create a vnode for a device.
520 * Used by bdevvp (block device) for root file system etc.,
521 * and by cdevvp (character device) for console and kernfs.
522 */
523int
524getdevvp(dev, vpp, type)
525	dev_t dev;
526	struct vnode **vpp;
527	enum vtype type;
528{
529	register struct vnode *vp;
530	struct vnode *nvp;
531	int error;
532
533	if (dev == NODEV) {
534		*vpp = NULLVP;
535		return (0);
536	}
537	error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
538	if (error) {
539		*vpp = NULLVP;
540		return (error);
541	}
542	vp = nvp;
543	vp->v_type = type;
544	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
545		vput(vp);
546		vp = nvp;
547	}
548	*vpp = vp;
549	return (0);
550}
551
552/*
553 * Check to see if the new vnode represents a special device
554 * for which we already have a vnode (either because of
555 * bdevvp() or because of a different vnode representing
556 * the same block device). If such an alias exists, deallocate
557 * the existing contents and return the aliased vnode. The
558 * caller is responsible for filling it with its new contents.
559 */
560struct vnode *
561checkalias(nvp, nvp_rdev, mp)
562	register struct vnode *nvp;
563	dev_t nvp_rdev;
564	struct mount *mp;
565{
566	struct proc *p = curproc;
567	register struct vnode *vp;
568	struct vnode **vpp;
569
570	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
571		return (NULLVP);
572
573	vpp = &speclisth[SPECHASH(nvp_rdev)];
574loop:
575	simple_lock(&spechash_slock);
576	for (vp = *vpp; vp; vp = vp->v_specnext) {
577		simple_lock(&vp->v_interlock);
578		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) {
579			simple_unlock(&vp->v_interlock);
580			continue;
581		}
582		/*
583		 * Alias, but not in use, so flush it out.
584		 */
585		if (vp->v_usecount == 0) {
586			simple_unlock(&spechash_slock);
587			vgonel(vp, p);
588			goto loop;
589		}
590		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
591			simple_unlock(&spechash_slock);
592			goto loop;
593		}
594		break;
595	}
596
597	/*
598	 * Common case is actually in the if statement
599	 */
600	if (vp == NULL || !(vp->v_tag == VT_NON && vp->v_type == VBLK)) {
601		MALLOC(nvp->v_specinfo, struct specinfo *,
602			sizeof(struct specinfo), M_VNODE, M_WAITOK);
603		nvp->v_rdev = nvp_rdev;
604		nvp->v_hashchain = vpp;
605		nvp->v_specnext = *vpp;
606		nvp->v_specmountpoint = NULL;
607		nvp->v_speclockf = NULL;
608		simple_unlock(&spechash_slock);
609		*vpp = nvp;
610		if (vp != NULLVP) {
611			nvp->v_flag |= VALIASED;
612			vp->v_flag |= VALIASED;
613			vput(vp);
614		}
615		return (NULLVP);
616	}
617
618	/*
619	 * This code is the uncommon case. It is called in case
620	 * we found an alias that was VT_NON && vtype of VBLK
621	 * This means we found a block device that was created
622	 * using bdevvp.
623	 * An example of such a vnode is the root partition device vnode
624	 * created in ffs_mountroot.
625	 *
626	 * The vnodes created by bdevvp should not be aliased (why?).
627	 */
628
629	simple_unlock(&spechash_slock);
630	VOP_UNLOCK(vp, 0, p);
631	simple_lock(&vp->v_interlock);
632	vclean(vp, 0, p);
633	vp->v_op = nvp->v_op;
634	vp->v_tag = nvp->v_tag;
635	nvp->v_type = VNON;
636	insmntque(vp, mp);
637	return (vp);
638}
639
640/*
641 * Grab a particular vnode from the free list, increment its
642 * reference count and lock it. The vnode lock bit is set the
643 * vnode is being eliminated in vgone. The process is awakened
644 * when the transition is completed, and an error returned to
645 * indicate that the vnode is no longer usable (possibly having
646 * been changed to a new file system type).
647 */
648int
649vget(vp, flags, p)
650	struct vnode *vp;
651	int flags;
652	struct proc *p;
653{
654	int error;
655	int s;
656	/*
657	 * If the vnode is in the process of being cleaned out for
658	 * another use, we wait for the cleaning to finish and then
659	 * return failure. Cleaning is determined by checking that
660	 * the VXLOCK flag is set.
661	 */
662	if ((flags & LK_INTERLOCK) == 0) {
663		simple_lock(&vp->v_interlock);
664		flags |= LK_INTERLOCK;
665	}
666	if (vp->v_flag & VXLOCK) {
667 		vp->v_flag |= VXWANT;
668		simple_unlock(&vp->v_interlock);
669		tsleep((caddr_t)vp, PINOD, "vget", 0);
670		return (ENOENT);
671 	}
672	if (vp->v_usecount == 0 &&
673	    (vp->v_bioflag & VBIOONFREELIST)) {
674		s = splbio();
675		simple_lock(&vnode_free_list_slock);
676		if (vp->v_holdcnt > 0)
677			TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
678		else
679			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
680		simple_unlock(&vnode_free_list_slock);
681		vp->v_bioflag &= ~VBIOONFREELIST;
682		splx(s);
683	}
684 	vp->v_usecount++;
685	if (flags & LK_TYPE_MASK) {
686		if ((error = vn_lock(vp, flags, p)) != 0) {
687			vp->v_usecount--;
688			if (vp->v_usecount == 0)
689				vputonfreelist(vp);
690
691			simple_unlock(&vp->v_interlock);
692		}
693		return (error);
694	}
695	simple_unlock(&vp->v_interlock);
696	return (0);
697}
698
699
700#ifdef DIAGNOSTIC
701/*
702 * Vnode reference.
703 */
704void
705vref(vp)
706	struct vnode *vp;
707{
708	simple_lock(&vp->v_interlock);
709	if (vp->v_usecount == 0)
710		panic("vref used where vget required");
711	vp->v_usecount++;
712	simple_unlock(&vp->v_interlock);
713}
714#endif /* DIAGNOSTIC */
715
716static __inline__ void
717vputonfreelist(vp)
718	struct vnode *vp;
719{
720	int s;
721	struct freelst *lst;
722
723	s = splbio();
724#ifdef DIAGNOSTIC
725	if (vp->v_usecount != 0)
726		panic("Use count is not zero!");
727
728	if (vp->v_bioflag & VBIOONFREELIST) {
729		vprint("vnode already on free list: ", vp);
730		panic("vnode already on free list");
731	}
732#endif
733
734	vp->v_bioflag |= VBIOONFREELIST;
735
736	if (vp->v_holdcnt > 0)
737		lst = &vnode_hold_list;
738	else
739		lst = &vnode_free_list;
740
741	if (vp->v_type == VBAD)
742		TAILQ_INSERT_HEAD(lst, vp, v_freelist);
743	else
744		TAILQ_INSERT_TAIL(lst, vp, v_freelist);
745
746	splx(s);
747}
748
749/*
750 * vput(), just unlock and vrele()
751 */
752void
753vput(vp)
754	register struct vnode *vp;
755{
756	struct proc *p = curproc;	/* XXX */
757
758#ifdef DIAGNOSTIC
759	if (vp == NULL)
760		panic("vput: null vp");
761#endif
762	simple_lock(&vp->v_interlock);
763
764#ifdef DIAGNOSTIC
765	if (vp->v_usecount == 0) {
766		vprint("vput: bad ref count", vp);
767		panic("vput: ref cnt");
768	}
769#endif
770	vp->v_usecount--;
771	if (vp->v_usecount > 0) {
772		simple_unlock(&vp->v_interlock);
773		VOP_UNLOCK(vp, 0, p);
774		return;
775	}
776
777#ifdef DIAGNOSTIC
778	if (vp->v_writecount != 0) {
779		vprint("vput: bad writecount", vp);
780		panic("vput: v_writecount != 0");
781	}
782#endif
783	vputonfreelist(vp);
784
785	simple_unlock(&vp->v_interlock);
786
787	VOP_INACTIVE(vp, p);
788}
789
790/*
791 * Vnode release - use for active VNODES.
792 * If count drops to zero, call inactive routine and return to freelist.
793 */
794void
795vrele(vp)
796	register struct vnode *vp;
797{
798	struct proc *p = curproc;	/* XXX */
799
800#ifdef DIAGNOSTIC
801	if (vp == NULL)
802		panic("vrele: null vp");
803#endif
804	simple_lock(&vp->v_interlock);
805#ifdef DIAGNOSTIC
806	if (vp->v_usecount == 0) {
807		vprint("vrele: bad ref count", vp);
808		panic("vrele: ref cnt");
809	}
810#endif
811	vp->v_usecount--;
812	if (vp->v_usecount > 0) {
813		simple_unlock(&vp->v_interlock);
814		return;
815	}
816
817#ifdef DIAGNOSTIC
818	if (vp->v_writecount != 0) {
819		vprint("vrele: bad writecount", vp);
820		panic("vrele: v_writecount != 0");
821	}
822#endif
823	vputonfreelist(vp);
824
825	if (vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p) == 0)
826		VOP_INACTIVE(vp, p);
827}
828
829void vhold __P((struct vnode *vp));
830
831/*
832 * Page or buffer structure gets a reference.
833 */
834void
835vhold(vp)
836	register struct vnode *vp;
837{
838
839	/*
840	 * If it is on the freelist and the hold count is currently
841	 * zero, move it to the hold list.
842	 */
843  	simple_lock(&vp->v_interlock);
844	if ((vp->v_bioflag & VBIOONFREELIST) &&
845	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
846		simple_lock(&vnode_free_list_slock);
847		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
848		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
849		simple_unlock(&vnode_free_list_slock);
850	}
851	vp->v_holdcnt++;
852	simple_unlock(&vp->v_interlock);
853}
854
855/*
856 * Remove any vnodes in the vnode table belonging to mount point mp.
857 *
858 * If MNT_NOFORCE is specified, there should not be any active ones,
859 * return error if any are found (nb: this is a user error, not a
860 * system error). If MNT_FORCE is specified, detach any active vnodes
861 * that are found.
862 */
863#ifdef DEBUG
864int busyprt = 0;	/* print out busy vnodes */
865struct ctldebug debug1 = { "busyprt", &busyprt };
866#endif
867
868int
869vflush(mp, skipvp, flags)
870	struct mount *mp;
871	struct vnode *skipvp;
872	int flags;
873{
874	struct proc *p = curproc;
875	register struct vnode *vp, *nvp;
876	int busy = 0;
877
878	simple_lock(&mntvnode_slock);
879loop:
880	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
881		if (vp->v_mount != mp)
882			goto loop;
883		nvp = vp->v_mntvnodes.le_next;
884		/*
885		 * Skip over a selected vnode.
886		 */
887		if (vp == skipvp)
888			continue;
889
890		simple_lock(&vp->v_interlock);
891		/*
892		 * Skip over a vnodes marked VSYSTEM.
893		 */
894		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
895			simple_unlock(&vp->v_interlock);
896			continue;
897		}
898		/*
899		 * If WRITECLOSE is set, only flush out regular file
900		 * vnodes open for writing.
901		 */
902		if ((flags & WRITECLOSE) &&
903		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
904			simple_unlock(&vp->v_interlock);
905			continue;
906		}
907		/*
908		 * With v_usecount == 0, all we need to do is clear
909		 * out the vnode data structures and we are done.
910		 */
911		if (vp->v_usecount == 0) {
912			simple_unlock(&mntvnode_slock);
913			vgonel(vp, p);
914			simple_lock(&mntvnode_slock);
915			continue;
916		}
917		/*
918		 * If FORCECLOSE is set, forcibly close the vnode.
919		 * For block or character devices, revert to an
920		 * anonymous device. For all other files, just kill them.
921		 */
922		if (flags & FORCECLOSE) {
923			simple_unlock(&mntvnode_slock);
924			if (vp->v_type != VBLK && vp->v_type != VCHR) {
925				vgonel(vp, p);
926			} else {
927				vclean(vp, 0, p);
928				vp->v_op = spec_vnodeop_p;
929				insmntque(vp, (struct mount *)0);
930			}
931			simple_lock(&mntvnode_slock);
932			continue;
933		}
934#ifdef DEBUG
935		if (busyprt)
936			vprint("vflush: busy vnode", vp);
937#endif
938		simple_unlock(&vp->v_interlock);
939		busy++;
940	}
941	simple_unlock(&mntvnode_slock);
942	if (busy)
943		return (EBUSY);
944	return (0);
945}
946
947/*
948 * Disassociate the underlying file system from a vnode.
949 * The vnode interlock is held on entry.
950 */
951void
952vclean(vp, flags, p)
953	register struct vnode *vp;
954	int flags;
955	struct proc *p;
956{
957	int active;
958
959	/*
960	 * Check to see if the vnode is in use.
961	 * If so we have to reference it before we clean it out
962	 * so that its count cannot fall to zero and generate a
963	 * race against ourselves to recycle it.
964	 */
965	if ((active = vp->v_usecount) != 0)
966		vp->v_usecount++;
967
968	/*
969	 * Prevent the vnode from being recycled or
970	 * brought into use while we clean it out.
971	 */
972	if (vp->v_flag & VXLOCK)
973		panic("vclean: deadlock");
974	vp->v_flag |= VXLOCK;
975	/*
976	 * Even if the count is zero, the VOP_INACTIVE routine may still
977	 * have the object locked while it cleans it out. The VOP_LOCK
978	 * ensures that the VOP_INACTIVE routine is done with its work.
979	 * For active vnodes, it ensures that no other activity can
980	 * occur while the underlying object is being cleaned out.
981	 */
982	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
983
984	/*
985	 * clean out any VM data associated with the vnode.
986	 */
987	uvm_vnp_terminate(vp);
988	/*
989	 * Clean out any buffers associated with the vnode.
990	 */
991	if (flags & DOCLOSE)
992		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
993	/*
994	 * If purging an active vnode, it must be closed and
995	 * deactivated before being reclaimed. Note that the
996	 * VOP_INACTIVE will unlock the vnode
997	 */
998	if (active) {
999		if (flags & DOCLOSE)
1000			VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
1001		VOP_INACTIVE(vp, p);
1002	} else {
1003		/*
1004		 * Any other processes trying to obtain this lock must first
1005		 * wait for VXLOCK to clear, then call the new lock operation.
1006		 */
1007		VOP_UNLOCK(vp, 0, p);
1008	}
1009
1010	/*
1011	 * Reclaim the vnode.
1012	 */
1013	if (VOP_RECLAIM(vp, p))
1014		panic("vclean: cannot reclaim");
1015	if (active) {
1016		simple_lock(&vp->v_interlock);
1017
1018		vp->v_usecount--;
1019		if (vp->v_usecount == 0) {
1020			if (vp->v_holdcnt > 0)
1021				panic("vclean: not clean");
1022			vputonfreelist(vp);
1023		}
1024
1025		simple_unlock(&vp->v_interlock);
1026	}
1027	cache_purge(vp);
1028	if (vp->v_vnlock) {
1029		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1030			vprint("vclean: lock not drained", vp);
1031		FREE(vp->v_vnlock, M_VNODE);
1032		vp->v_vnlock = NULL;
1033	}
1034
1035	/*
1036	 * Done with purge, notify sleepers of the grim news.
1037	 */
1038	vp->v_op = dead_vnodeop_p;
1039	simple_lock(&vp->v_selectinfo.vsi_lock);
1040	VN_KNOTE(vp, NOTE_REVOKE);
1041	simple_unlock(&vp->v_selectinfo.vsi_lock);
1042	vp->v_tag = VT_NON;
1043	vp->v_flag &= ~VXLOCK;
1044#ifdef DIAGNOSTIC
1045	vp->v_flag &= ~VLOCKSWORK;
1046#endif
1047	if (vp->v_flag & VXWANT) {
1048		vp->v_flag &= ~VXWANT;
1049		wakeup((caddr_t)vp);
1050	}
1051}
1052
1053
1054
1055/*
1056 * Recycle an unused vnode to the front of the free list.
1057 * Release the passed interlock if the vnode will be recycled.
1058 */
1059int
1060vrecycle(vp, inter_lkp, p)
1061	struct vnode *vp;
1062	struct simplelock *inter_lkp;
1063	struct proc *p;
1064{
1065
1066	simple_lock(&vp->v_interlock);
1067	if (vp->v_usecount == 0) {
1068		if (inter_lkp)
1069			simple_unlock(inter_lkp);
1070		vgonel(vp, p);
1071		return (1);
1072	}
1073	simple_unlock(&vp->v_interlock);
1074	return (0);
1075}
1076
1077
1078/*
1079 * Eliminate all activity associated with a vnode
1080 * in preparation for reuse.
1081 */
1082void
1083vgone(vp)
1084	register struct vnode *vp;
1085{
1086	struct proc *p = curproc;
1087
1088	simple_lock (&vp->v_interlock);
1089	vgonel(vp, p);
1090}
1091
1092/*
1093 * vgone, with the vp interlock held.
1094 */
1095void
1096vgonel(vp, p)
1097	struct vnode *vp;
1098	struct proc *p;
1099{
1100	register struct vnode *vq;
1101	struct vnode *vx;
1102
1103	/*
1104	 * If a vgone (or vclean) is already in progress,
1105	 * wait until it is done and return.
1106	 */
1107	if (vp->v_flag & VXLOCK) {
1108		vp->v_flag |= VXWANT;
1109		simple_unlock(&vp->v_interlock);
1110		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1111		return;
1112	}
1113	/*
1114	 * Clean out the filesystem specific data.
1115	 */
1116	vclean(vp, DOCLOSE, p);
1117	/*
1118	 * Delete from old mount point vnode list, if on one.
1119	 */
1120	if (vp->v_mount != NULL)
1121		insmntque(vp, (struct mount *)0);
1122	/*
1123	 * If special device, remove it from special device alias list
1124	 * if it is on one.
1125	 */
1126	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1127		simple_lock(&spechash_slock);
1128		if (*vp->v_hashchain == vp) {
1129			*vp->v_hashchain = vp->v_specnext;
1130		} else {
1131			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1132				if (vq->v_specnext != vp)
1133					continue;
1134				vq->v_specnext = vp->v_specnext;
1135				break;
1136			}
1137			if (vq == NULL)
1138				panic("missing bdev");
1139		}
1140		if (vp->v_flag & VALIASED) {
1141			vx = NULL;
1142			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1143				if (vq->v_rdev != vp->v_rdev ||
1144				    vq->v_type != vp->v_type)
1145					continue;
1146				if (vx)
1147					break;
1148				vx = vq;
1149			}
1150			if (vx == NULL)
1151				panic("missing alias");
1152			if (vq == NULL)
1153				vx->v_flag &= ~VALIASED;
1154			vp->v_flag &= ~VALIASED;
1155		}
1156		simple_unlock(&spechash_slock);
1157		FREE(vp->v_specinfo, M_VNODE);
1158		vp->v_specinfo = NULL;
1159	}
1160	/*
1161	 * If it is on the freelist and not already at the head,
1162	 * move it to the head of the list.
1163	 */
1164	vp->v_type = VBAD;
1165
1166	/*
1167	 * Move onto the free list, unless we were called from
1168	 * getnewvnode and we're not on any free list
1169	 */
1170	if (vp->v_usecount == 0 &&
1171	    (vp->v_bioflag & VBIOONFREELIST)) {
1172		int s;
1173
1174		simple_lock(&vnode_free_list_slock);
1175		s = splbio();
1176
1177		if (vp->v_holdcnt > 0)
1178			panic("vgonel: not clean");
1179
1180		if (TAILQ_FIRST(&vnode_free_list) != vp) {
1181			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1182			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1183		}
1184		splx(s);
1185		simple_unlock(&vnode_free_list_slock);
1186	}
1187}
1188
1189/*
1190 * Lookup a vnode by device number.
1191 */
1192int
1193vfinddev(dev, type, vpp)
1194	dev_t dev;
1195	enum vtype type;
1196	struct vnode **vpp;
1197{
1198	register struct vnode *vp;
1199	int rc =0;
1200
1201	simple_lock(&spechash_slock);
1202	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1203		if (dev != vp->v_rdev || type != vp->v_type)
1204			continue;
1205		*vpp = vp;
1206		rc = 1;
1207		break;
1208	}
1209	simple_unlock(&spechash_slock);
1210	return (rc);
1211}
1212
1213/*
1214 * Revoke all the vnodes corresponding to the specified minor number
1215 * range (endpoints inclusive) of the specified major.
1216 */
1217void
1218vdevgone(maj, minl, minh, type)
1219	int maj, minl, minh;
1220	enum vtype type;
1221{
1222	struct vnode *vp;
1223	int mn;
1224
1225	for (mn = minl; mn <= minh; mn++)
1226		if (vfinddev(makedev(maj, mn), type, &vp))
1227			VOP_REVOKE(vp, REVOKEALL);
1228}
1229
1230/*
1231 * Calculate the total number of references to a special device.
1232 */
1233int
1234vcount(vp)
1235	struct vnode *vp;
1236{
1237	struct vnode *vq, *vnext;
1238	int count;
1239
1240loop:
1241	if ((vp->v_flag & VALIASED) == 0)
1242		return (vp->v_usecount);
1243	simple_lock(&spechash_slock);
1244	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1245		vnext = vq->v_specnext;
1246		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1247			continue;
1248		/*
1249		 * Alias, but not in use, so flush it out.
1250		 */
1251		if (vq->v_usecount == 0 && vq != vp) {
1252			simple_unlock(&spechash_slock);
1253			vgone(vq);
1254			goto loop;
1255		}
1256		count += vq->v_usecount;
1257	}
1258	simple_unlock(&spechash_slock);
1259	return (count);
1260}
1261
1262/*
1263 * Print out a description of a vnode.
1264 */
1265static char *typename[] =
1266   { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1267
1268void
1269vprint(label, vp)
1270	char *label;
1271	register struct vnode *vp;
1272{
1273	char buf[64];
1274
1275	if (label != NULL)
1276		printf("%s: ", label);
1277	printf("type %s, usecount %u, writecount %u, holdcount %u,",
1278		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1279		vp->v_holdcnt);
1280	buf[0] = '\0';
1281	if (vp->v_flag & VROOT)
1282		strcat(buf, "|VROOT");
1283	if (vp->v_flag & VTEXT)
1284		strcat(buf, "|VTEXT");
1285	if (vp->v_flag & VSYSTEM)
1286		strcat(buf, "|VSYSTEM");
1287	if (vp->v_flag & VXLOCK)
1288		strcat(buf, "|VXLOCK");
1289	if (vp->v_flag & VXWANT)
1290		strcat(buf, "|VXWANT");
1291	if (vp->v_bioflag & VBIOWAIT)
1292		strcat(buf, "| VBIOWAIT");
1293	if (vp->v_flag & VALIASED)
1294		strcat(buf, "|VALIASED");
1295	if (buf[0] != '\0')
1296		printf(" flags (%s)", &buf[1]);
1297	if (vp->v_data == NULL) {
1298		printf("\n");
1299	} else {
1300		printf("\n\t");
1301		VOP_PRINT(vp);
1302	}
1303}
1304
1305#ifdef DEBUG
1306/*
1307 * List all of the locked vnodes in the system.
1308 * Called when debugging the kernel.
1309 */
1310void
1311printlockedvnodes()
1312{
1313	struct proc *p = curproc;
1314	register struct mount *mp, *nmp;
1315	register struct vnode *vp;
1316
1317	printf("Locked vnodes\n");
1318	simple_lock(&mountlist_slock);
1319	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1320	    mp = nmp) {
1321		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1322			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1323			continue;
1324		}
1325		for (vp = mp->mnt_vnodelist.lh_first; vp;
1326		    vp = vp->v_mntvnodes.le_next) {
1327			if (VOP_ISLOCKED(vp))
1328				vprint((char *)0, vp);
1329		}
1330		simple_lock(&mountlist_slock);
1331		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1332		vfs_unbusy(mp, p);
1333 	}
1334	simple_unlock(&mountlist_slock);
1335
1336}
1337#endif
1338
1339/*
1340 * Top level filesystem related information gathering.
1341 */
1342int
1343vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1344	int *name;
1345	u_int namelen;
1346	void *oldp;
1347	size_t *oldlenp;
1348	void *newp;
1349	size_t newlen;
1350	struct proc *p;
1351{
1352	struct vfsconf *vfsp;
1353
1354	/* all sysctl names at this level are at least name and field */
1355	if (namelen < 2)
1356		return (ENOTDIR);		/* overloaded */
1357	if (name[0] != VFS_GENERIC) {
1358		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1359			if (vfsp->vfc_typenum == name[0])
1360				break;
1361		if (vfsp == NULL)
1362			return (EOPNOTSUPP);
1363		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1364		    oldp, oldlenp, newp, newlen, p));
1365	}
1366	switch (name[1]) {
1367	case VFS_MAXTYPENUM:
1368		return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1369	case VFS_CONF:
1370		if (namelen < 3)
1371			return (ENOTDIR);	/* overloaded */
1372		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1373			if (vfsp->vfc_typenum == name[2])
1374				break;
1375		if (vfsp == NULL)
1376			return (EOPNOTSUPP);
1377		return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
1378		    sizeof(struct vfsconf)));
1379	}
1380	return (EOPNOTSUPP);
1381}
1382
1383
1384int kinfo_vdebug = 1;
1385int kinfo_vgetfailed;
1386#define KINFO_VNODESLOP	10
1387/*
1388 * Dump vnode list (via sysctl).
1389 * Copyout address of vnode followed by vnode.
1390 */
1391/* ARGSUSED */
1392int
1393sysctl_vnode(where, sizep, p)
1394	char *where;
1395	size_t *sizep;
1396	struct proc *p;
1397{
1398	register struct mount *mp, *nmp;
1399	struct vnode *vp, *nvp;
1400	register char *bp = where, *savebp;
1401	char *ewhere;
1402	int error;
1403
1404#define VPTRSZ	sizeof (struct vnode *)
1405#define VNODESZ	sizeof (struct vnode)
1406	if (where == NULL) {
1407		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1408		return (0);
1409	}
1410	ewhere = where + *sizep;
1411
1412	simple_lock(&mountlist_slock);
1413	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1414	    mp = nmp) {
1415		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1416			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1417			continue;
1418		}
1419		savebp = bp;
1420again:
1421		for (vp = mp->mnt_vnodelist.lh_first; vp != NULL;
1422		    vp = nvp) {
1423			/*
1424			 * Check that the vp is still associated with
1425			 * this filesystem.  RACE: could have been
1426			 * recycled onto the same filesystem.
1427			 */
1428			if (vp->v_mount != mp) {
1429				simple_unlock(&mntvnode_slock);
1430				if (kinfo_vdebug)
1431					printf("kinfo: vp changed\n");
1432				bp = savebp;
1433				goto again;
1434			}
1435			nvp = vp->v_mntvnodes.le_next;
1436			if (bp + VPTRSZ + VNODESZ > ewhere) {
1437				simple_unlock(&mntvnode_slock);
1438				*sizep = bp - where;
1439				return (ENOMEM);
1440			}
1441			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1442			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
1443				return (error);
1444			bp += VPTRSZ + VNODESZ;
1445			simple_lock(&mntvnode_slock);
1446		}
1447
1448		simple_unlock(&mntvnode_slock);
1449		simple_lock(&mountlist_slock);
1450		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1451		vfs_unbusy(mp, p);
1452	}
1453
1454	simple_unlock(&mountlist_slock);
1455
1456	*sizep = bp - where;
1457	return (0);
1458}
1459
1460/*
1461 * Check to see if a filesystem is mounted on a block device.
1462 */
1463int
1464vfs_mountedon(vp)
1465	register struct vnode *vp;
1466{
1467	register struct vnode *vq;
1468	int error = 0;
1469
1470 	if (vp->v_specmountpoint != NULL)
1471		return (EBUSY);
1472	if (vp->v_flag & VALIASED) {
1473		simple_lock(&spechash_slock);
1474		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1475			if (vq->v_rdev != vp->v_rdev ||
1476			    vq->v_type != vp->v_type)
1477				continue;
1478			if (vq->v_specmountpoint != NULL) {
1479				error = EBUSY;
1480				break;
1481			}
1482 		}
1483		simple_unlock(&spechash_slock);
1484	}
1485	return (error);
1486}
1487
1488/*
1489 * Build hash lists of net addresses and hang them off the mount point.
1490 * Called by ufs_mount() to set up the lists of export addresses.
1491 */
1492int
1493vfs_hang_addrlist(mp, nep, argp)
1494	struct mount *mp;
1495	struct netexport *nep;
1496	struct export_args *argp;
1497{
1498	register struct netcred *np;
1499	register struct radix_node_head *rnh;
1500	register int i;
1501	struct radix_node *rn;
1502	struct sockaddr *saddr, *smask = 0;
1503	struct domain *dom;
1504	int error;
1505
1506	if (argp->ex_addrlen == 0) {
1507		if (mp->mnt_flag & MNT_DEFEXPORTED)
1508			return (EPERM);
1509		np = &nep->ne_defexported;
1510		np->netc_exflags = argp->ex_flags;
1511		np->netc_anon = argp->ex_anon;
1512		np->netc_anon.cr_ref = 1;
1513		mp->mnt_flag |= MNT_DEFEXPORTED;
1514		return (0);
1515	}
1516	if (argp->ex_addrlen > MLEN)
1517		return (EINVAL);
1518	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1519	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1520	bzero((caddr_t)np, i);
1521	saddr = (struct sockaddr *)(np + 1);
1522	error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
1523	if (error)
1524		goto out;
1525	if (saddr->sa_len > argp->ex_addrlen)
1526		saddr->sa_len = argp->ex_addrlen;
1527	if (argp->ex_masklen) {
1528		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1529		error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
1530		if (error)
1531			goto out;
1532		if (smask->sa_len > argp->ex_masklen)
1533			smask->sa_len = argp->ex_masklen;
1534	}
1535	i = saddr->sa_family;
1536	if ((rnh = nep->ne_rtable[i]) == 0) {
1537		/*
1538		 * Seems silly to initialize every AF when most are not
1539		 * used, do so on demand here
1540		 */
1541		for (dom = domains; dom; dom = dom->dom_next)
1542			if (dom->dom_family == i && dom->dom_rtattach) {
1543				dom->dom_rtattach((void **)&nep->ne_rtable[i],
1544					dom->dom_rtoffset);
1545				break;
1546			}
1547		if ((rnh = nep->ne_rtable[i]) == 0) {
1548			error = ENOBUFS;
1549			goto out;
1550		}
1551	}
1552	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1553		np->netc_rnodes);
1554	if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1555		error = EPERM;
1556		goto out;
1557	}
1558	np->netc_exflags = argp->ex_flags;
1559	np->netc_anon = argp->ex_anon;
1560	np->netc_anon.cr_ref = 1;
1561	return (0);
1562out:
1563	free(np, M_NETADDR);
1564	return (error);
1565}
1566
1567/* ARGSUSED */
1568int
1569vfs_free_netcred(rn, w)
1570	struct radix_node *rn;
1571	void *w;
1572{
1573	register struct radix_node_head *rnh = (struct radix_node_head *)w;
1574
1575	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1576	free((caddr_t)rn, M_NETADDR);
1577	return (0);
1578}
1579
1580/*
1581 * Free the net address hash lists that are hanging off the mount points.
1582 */
1583void
1584vfs_free_addrlist(nep)
1585	struct netexport *nep;
1586{
1587	register int i;
1588	register struct radix_node_head *rnh;
1589
1590	for (i = 0; i <= AF_MAX; i++)
1591		if ((rnh = nep->ne_rtable[i]) != NULL) {
1592			(*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1593			free((caddr_t)rnh, M_RTABLE);
1594			nep->ne_rtable[i] = 0;
1595		}
1596}
1597
1598int
1599vfs_export(mp, nep, argp)
1600	struct mount *mp;
1601	struct netexport *nep;
1602	struct export_args *argp;
1603{
1604	int error;
1605
1606	if (argp->ex_flags & MNT_DELEXPORT) {
1607		vfs_free_addrlist(nep);
1608		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1609	}
1610	if (argp->ex_flags & MNT_EXPORTED) {
1611		if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1612			return (error);
1613		mp->mnt_flag |= MNT_EXPORTED;
1614	}
1615	return (0);
1616}
1617
1618struct netcred *
1619vfs_export_lookup(mp, nep, nam)
1620	register struct mount *mp;
1621	struct netexport *nep;
1622	struct mbuf *nam;
1623{
1624	register struct netcred *np;
1625	register struct radix_node_head *rnh;
1626	struct sockaddr *saddr;
1627
1628	np = NULL;
1629	if (mp->mnt_flag & MNT_EXPORTED) {
1630		/*
1631		 * Lookup in the export list first.
1632		 */
1633		if (nam != NULL) {
1634			saddr = mtod(nam, struct sockaddr *);
1635			rnh = nep->ne_rtable[saddr->sa_family];
1636			if (rnh != NULL) {
1637				np = (struct netcred *)
1638					(*rnh->rnh_matchaddr)((caddr_t)saddr,
1639					    rnh);
1640				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1641					np = NULL;
1642			}
1643		}
1644		/*
1645		 * If no address match, use the default if it exists.
1646		 */
1647		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1648			np = &nep->ne_defexported;
1649	}
1650	return (np);
1651}
1652
1653/*
1654 * Do the usual access checking.
1655 * file_mode, uid and gid are from the vnode in question,
1656 * while acc_mode and cred are from the VOP_ACCESS parameter list
1657 */
1658int
1659vaccess(file_mode, uid, gid, acc_mode, cred)
1660	mode_t file_mode;
1661	uid_t uid;
1662	gid_t gid;
1663	mode_t acc_mode;
1664	struct ucred *cred;
1665{
1666	mode_t mask;
1667
1668	/* User id 0 always gets access. */
1669	if (cred->cr_uid == 0)
1670		return 0;
1671
1672	mask = 0;
1673
1674	/* Otherwise, check the owner. */
1675	if (cred->cr_uid == uid) {
1676		if (acc_mode & VEXEC)
1677			mask |= S_IXUSR;
1678		if (acc_mode & VREAD)
1679			mask |= S_IRUSR;
1680		if (acc_mode & VWRITE)
1681			mask |= S_IWUSR;
1682		return (file_mode & mask) == mask ? 0 : EACCES;
1683	}
1684
1685	/* Otherwise, check the groups. */
1686	if (cred->cr_gid == gid || groupmember(gid, cred)) {
1687		if (acc_mode & VEXEC)
1688			mask |= S_IXGRP;
1689		if (acc_mode & VREAD)
1690			mask |= S_IRGRP;
1691		if (acc_mode & VWRITE)
1692			mask |= S_IWGRP;
1693		return (file_mode & mask) == mask ? 0 : EACCES;
1694	}
1695
1696	/* Otherwise, check everyone else. */
1697	if (acc_mode & VEXEC)
1698		mask |= S_IXOTH;
1699	if (acc_mode & VREAD)
1700		mask |= S_IROTH;
1701	if (acc_mode & VWRITE)
1702		mask |= S_IWOTH;
1703	return (file_mode & mask) == mask ? 0 : EACCES;
1704}
1705
1706/*
1707 * Unmount all file systems.
1708 * We traverse the list in reverse order under the assumption that doing so
1709 * will avoid needing to worry about dependencies.
1710 */
1711void
1712vfs_unmountall()
1713{
1714	register struct mount *mp, *nmp;
1715	int allerror, error, again = 1;
1716
1717 retry:
1718	allerror = 0;
1719	for (mp = CIRCLEQ_LAST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1720	    mp = nmp) {
1721		nmp = CIRCLEQ_PREV(mp, mnt_list);
1722		if ((error = dounmount(mp, MNT_FORCE, curproc)) != 0) {
1723			printf("unmount of %s failed with error %d\n",
1724			    mp->mnt_stat.f_mntonname, error);
1725			allerror = 1;
1726		}
1727	}
1728
1729	if (allerror) {
1730		printf("WARNING: some file systems would not unmount\n");
1731		if (again) {
1732			printf("retrying\n");
1733			again = 0;
1734			goto retry;
1735		}
1736	}
1737}
1738
1739/*
1740 * Sync and unmount file systems before shutting down.
1741 */
1742void
1743vfs_shutdown()
1744{
1745	/* XXX Should suspend scheduling. */
1746	(void) spl0();
1747
1748	printf("syncing disks... ");
1749
1750	if (panicstr == 0) {
1751		/* Sync before unmount, in case we hang on something. */
1752		sys_sync(&proc0, (void *)0, (register_t *)0);
1753
1754		/* Unmount file systems. */
1755		vfs_unmountall();
1756	}
1757
1758	if (vfs_syncwait(1))
1759		printf("giving up\n");
1760	else
1761		printf("done\n");
1762}
1763
1764/*
1765 * perform sync() operation and wait for buffers to flush.
1766 * assumtions: called w/ scheduler disabled and physical io enabled
1767 * for now called at spl0() XXX
1768 */
1769int
1770vfs_syncwait(verbose)
1771	int verbose;
1772{
1773	register struct buf *bp;
1774	int iter, nbusy, dcount, s;
1775	struct proc *p;
1776
1777	p = curproc? curproc : &proc0;
1778	sys_sync(p, (void *)0, (register_t *)0);
1779
1780	/* Wait for sync to finish. */
1781	dcount = 10000;
1782	for (iter = 0; iter < 20; iter++) {
1783		nbusy = 0;
1784		for (bp = &buf[nbuf]; --bp >= buf; ) {
1785			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1786				nbusy++;
1787			/*
1788			 * With soft updates, some buffers that are
1789			 * written will be remarked as dirty until other
1790			 * buffers are written.
1791			 */
1792			if (bp->b_flags & B_DELWRI) {
1793				s = splbio();
1794				bremfree(bp);
1795				bp->b_flags |= B_BUSY;
1796				splx(s);
1797				nbusy++;
1798				bawrite(bp);
1799				if (dcount-- <= 0) {
1800					if (verbose)
1801						printf("softdep ");
1802					return 1;
1803				}
1804			}
1805		}
1806		if (nbusy == 0)
1807			break;
1808		if (verbose)
1809			printf("%d ", nbusy);
1810		DELAY(40000 * iter);
1811	}
1812
1813	return nbusy;
1814}
1815
1816/*
1817 * posix file system related system variables.
1818 */
1819int
1820fs_posix_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1821	int *name;
1822	u_int namelen;
1823	void *oldp;
1824	size_t *oldlenp;
1825	void *newp;
1826	size_t newlen;
1827	struct proc *p;
1828{
1829	/* all sysctl names at this level are terminal */
1830	if (namelen != 1)
1831		return (ENOTDIR);
1832
1833	switch (name[0]) {
1834	case FS_POSIX_SETUID:
1835		if (newp && securelevel > 0)
1836			return (EPERM);
1837		return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear));
1838	default:
1839		return (EOPNOTSUPP);
1840	}
1841	/* NOTREACHED */
1842}
1843
1844/*
1845 * file system related system variables.
1846 */
1847int
1848fs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1849	int *name;
1850	u_int namelen;
1851	void *oldp;
1852	size_t *oldlenp;
1853	void *newp;
1854	size_t newlen;
1855	struct proc *p;
1856{
1857	sysctlfn *fn;
1858
1859	switch (name[0]) {
1860	case FS_POSIX:
1861		fn = fs_posix_sysctl;
1862		break;
1863	default:
1864		return (EOPNOTSUPP);
1865	}
1866	return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p);
1867}
1868
1869
1870/*
1871 * Routines dealing with vnodes and buffers
1872 */
1873
1874/*
1875 * Wait for all outstanding I/Os to complete
1876 *
1877 * Manipulates v_numoutput. Must be called at splbio()
1878 */
1879int
1880vwaitforio(vp, slpflag, wmesg, timeo)
1881	struct vnode *vp;
1882	int slpflag, timeo;
1883	char *wmesg;
1884{
1885	int error = 0;
1886
1887	while (vp->v_numoutput) {
1888		vp->v_bioflag |= VBIOWAIT;
1889		error = tsleep((caddr_t)&vp->v_numoutput,
1890		    slpflag | (PRIBIO + 1), wmesg, timeo);
1891		if (error)
1892			break;
1893	}
1894
1895	return (error);
1896}
1897
1898
1899/*
1900 * Update outstanding I/O count and do wakeup if requested.
1901 *
1902 * Manipulates v_numoutput. Must be called at splbio()
1903 */
1904void
1905vwakeup(vp)
1906	struct vnode *vp;
1907{
1908	if (vp != NULL) {
1909		if (vp->v_numoutput-- == 0)
1910			panic("vwakeup: neg numoutput");
1911		if ((vp->v_bioflag & VBIOWAIT) && vp->v_numoutput == 0) {
1912			vp->v_bioflag &= ~VBIOWAIT;
1913			wakeup((caddr_t)&vp->v_numoutput);
1914		}
1915	}
1916}
1917
1918/*
1919 * Flush out and invalidate all buffers associated with a vnode.
1920 * Called with the underlying object locked.
1921 */
1922int
1923vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
1924	register struct vnode *vp;
1925	int flags;
1926	struct ucred *cred;
1927	struct proc *p;
1928	int slpflag, slptimeo;
1929{
1930	register struct buf *bp;
1931	struct buf *nbp, *blist;
1932	int s, error;
1933
1934	if (flags & V_SAVE) {
1935		s = splbio();
1936		vwaitforio(vp, 0, "vinvalbuf", 0);
1937		if (vp->v_dirtyblkhd.lh_first != NULL) {
1938			splx(s);
1939			if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
1940				return (error);
1941			s = splbio();
1942			if (vp->v_numoutput > 0 ||
1943			    vp->v_dirtyblkhd.lh_first != NULL)
1944				panic("vinvalbuf: dirty bufs");
1945		}
1946		splx(s);
1947	}
1948loop:
1949	s = splbio();
1950	for (;;) {
1951		if ((blist = vp->v_cleanblkhd.lh_first) &&
1952		    (flags & V_SAVEMETA))
1953			while (blist && blist->b_lblkno < 0)
1954				blist = blist->b_vnbufs.le_next;
1955		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
1956		    (flags & V_SAVEMETA))
1957			while (blist && blist->b_lblkno < 0)
1958				blist = blist->b_vnbufs.le_next;
1959		if (!blist)
1960			break;
1961
1962		for (bp = blist; bp; bp = nbp) {
1963			nbp = bp->b_vnbufs.le_next;
1964			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
1965				continue;
1966			if (bp->b_flags & B_BUSY) {
1967				bp->b_flags |= B_WANTED;
1968				error = tsleep((caddr_t)bp,
1969					slpflag | (PRIBIO + 1), "vinvalbuf",
1970					slptimeo);
1971				if (error) {
1972					splx(s);
1973					return (error);
1974				}
1975				break;
1976			}
1977			bremfree(bp);
1978			bp->b_flags |= B_BUSY;
1979			/*
1980			 * XXX Since there are no node locks for NFS, I believe
1981			 * there is a slight chance that a delayed write will
1982			 * occur while sleeping just above, so check for it.
1983			 */
1984			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
1985				splx(s);
1986				(void) VOP_BWRITE(bp);
1987				goto loop;
1988			}
1989			bp->b_flags |= B_INVAL;
1990			brelse(bp);
1991		}
1992	}
1993	if (!(flags & V_SAVEMETA) &&
1994	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
1995		panic("vinvalbuf: flush failed");
1996	splx(s);
1997	return (0);
1998}
1999
2000void
2001vflushbuf(vp, sync)
2002	register struct vnode *vp;
2003	int sync;
2004{
2005	register struct buf *bp, *nbp;
2006	int s;
2007
2008loop:
2009	s = splbio();
2010	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
2011		nbp = bp->b_vnbufs.le_next;
2012		if ((bp->b_flags & B_BUSY))
2013			continue;
2014		if ((bp->b_flags & B_DELWRI) == 0)
2015			panic("vflushbuf: not dirty");
2016		bremfree(bp);
2017		bp->b_flags |= B_BUSY;
2018		splx(s);
2019		/*
2020		 * Wait for I/O associated with indirect blocks to complete,
2021		 * since there is no way to quickly wait for them below.
2022		 */
2023		if (bp->b_vp == vp || sync == 0)
2024			(void) bawrite(bp);
2025		else
2026			(void) bwrite(bp);
2027		goto loop;
2028	}
2029	if (sync == 0) {
2030		splx(s);
2031		return;
2032	}
2033	vwaitforio(vp, 0, "vflushbuf", 0);
2034	if (vp->v_dirtyblkhd.lh_first != NULL) {
2035		splx(s);
2036		vprint("vflushbuf: dirty", vp);
2037		goto loop;
2038	}
2039	splx(s);
2040}
2041
2042/*
2043 * Associate a buffer with a vnode.
2044 *
2045 * Manipulates buffer vnode queues. Must be called at splbio().
2046 */
2047void
2048bgetvp(vp, bp)
2049	register struct vnode *vp;
2050	register struct buf *bp;
2051{
2052
2053	if (bp->b_vp)
2054		panic("bgetvp: not free");
2055	vhold(vp);
2056	bp->b_vp = vp;
2057	if (vp->v_type == VBLK || vp->v_type == VCHR)
2058		bp->b_dev = vp->v_rdev;
2059	else
2060		bp->b_dev = NODEV;
2061	/*
2062	 * Insert onto list for new vnode.
2063	 */
2064	bufinsvn(bp, &vp->v_cleanblkhd);
2065}
2066
2067/*
2068 * Disassociate a buffer from a vnode.
2069 *
2070 * Manipulates vnode buffer queues. Must be called at splbio().
2071 */
2072void
2073brelvp(bp)
2074	register struct buf *bp;
2075{
2076	struct vnode *vp;
2077
2078	if ((vp = bp->b_vp) == (struct vnode *) 0)
2079		panic("brelvp: NULL");
2080	/*
2081	 * Delete from old vnode list, if on one.
2082	 */
2083	if (bp->b_vnbufs.le_next != NOLIST)
2084		bufremvn(bp);
2085	if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2086	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2087		vp->v_bioflag &= ~VBIOONSYNCLIST;
2088		LIST_REMOVE(vp, v_synclist);
2089	}
2090	bp->b_vp = (struct vnode *) 0;
2091
2092	simple_lock(&vp->v_interlock);
2093#ifdef DIAGNOSTIC
2094	if (vp->v_holdcnt == 0)
2095		panic("brelvp: holdcnt");
2096#endif
2097	vp->v_holdcnt--;
2098
2099	/*
2100	 * If it is on the holdlist and the hold count drops to
2101	 * zero, move it to the free list.
2102	 */
2103	if ((vp->v_bioflag & VBIOONFREELIST) &&
2104	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
2105		simple_lock(&vnode_free_list_slock);
2106		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
2107		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2108		simple_unlock(&vnode_free_list_slock);
2109	}
2110	simple_unlock(&vp->v_interlock);
2111}
2112
2113/*
2114 * Replaces the current vnode associated with the buffer, if any
2115 * with a new vnode.
2116 *
2117 * If an output I/O is pending on the buffer, the old vnode is
2118 * I/O count is adjusted.
2119 *
2120 * Ignores vnode buffer queues. Must be called at splbio().
2121 */
2122void
2123buf_replacevnode(bp, newvp)
2124	struct buf *bp;
2125	struct vnode *newvp;
2126{
2127	struct vnode *oldvp = bp->b_vp;
2128
2129	if (oldvp)
2130		brelvp(bp);
2131
2132	if ((bp->b_flags & (B_READ | B_DONE)) == 0) {
2133		newvp->v_numoutput++;	/* put it on swapdev */
2134		vwakeup(oldvp);
2135	}
2136
2137	bgetvp(newvp, bp);
2138	bufremvn(bp);
2139}
2140
2141/*
2142 * Used to assign buffers to the appropriate clean or dirty list on
2143 * the vnode and to add newly dirty vnodes to the appropriate
2144 * filesystem syncer list.
2145 *
2146 * Manipulates vnode buffer queues. Must be called at splbio().
2147 */
2148void
2149reassignbuf(bp)
2150	register struct buf *bp;
2151{
2152	struct buflists *listheadp;
2153	int delay;
2154	struct vnode *vp = bp->b_vp;
2155
2156	if (vp == NULL) {
2157		printf("reassignbuf: NULL");
2158		return;
2159	}
2160	/*
2161	 * Delete from old vnode list, if on one.
2162	 */
2163	if (bp->b_vnbufs.le_next != NOLIST)
2164		bufremvn(bp);
2165	/*
2166	 * If dirty, put on list of dirty buffers;
2167	 * otherwise insert onto list of clean buffers.
2168	 */
2169	if ((bp->b_flags & B_DELWRI) == 0) {
2170		listheadp = &vp->v_cleanblkhd;
2171		if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2172		    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2173			vp->v_bioflag &= ~VBIOONSYNCLIST;
2174			LIST_REMOVE(vp, v_synclist);
2175		}
2176	} else {
2177		listheadp = &vp->v_dirtyblkhd;
2178		if ((vp->v_bioflag & VBIOONSYNCLIST) == 0) {
2179			switch (vp->v_type) {
2180			case VDIR:
2181				delay = syncdelay / 2;
2182				break;
2183			case VBLK:
2184				if (vp->v_specmountpoint != NULL) {
2185					delay = syncdelay / 3;
2186					break;
2187				}
2188				/* fall through */
2189			default:
2190				delay = syncdelay;
2191			}
2192			vn_syncer_add_to_worklist(vp, delay);
2193		}
2194	}
2195	bufinsvn(bp, listheadp);
2196}
2197
2198int
2199vfs_register(vfs)
2200	struct vfsconf *vfs;
2201{
2202	struct vfsconf *vfsp;
2203	struct vfsconf **vfspp;
2204
2205#ifdef DIAGNOSTIC
2206	/* Paranoia? */
2207	if (vfs->vfc_refcount != 0)
2208		printf("vfs_register called with vfc_refcount > 0\n");
2209#endif
2210
2211	/* Check if filesystem already known */
2212	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2213	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next)
2214		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2215			return (EEXIST);
2216
2217	if (vfs->vfc_typenum > maxvfsconf)
2218		maxvfsconf = vfs->vfc_typenum;
2219
2220	vfs->vfc_next = NULL;
2221
2222	/* Add to the end of the list */
2223	*vfspp = vfs;
2224
2225	/* Call vfs_init() */
2226	if (vfs->vfc_vfsops->vfs_init)
2227		(*(vfs->vfc_vfsops->vfs_init))(vfs);
2228
2229	return 0;
2230}
2231
2232int
2233vfs_unregister(vfs)
2234	struct vfsconf *vfs;
2235{
2236	struct vfsconf *vfsp;
2237	struct vfsconf **vfspp;
2238	int maxtypenum;
2239
2240	/* Find our vfsconf struct */
2241	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2242	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next) {
2243		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2244			break;
2245	}
2246
2247	if (!vfsp)			/* Not found */
2248		return (ENOENT);
2249
2250	if (vfsp->vfc_refcount)		/* In use */
2251		return (EBUSY);
2252
2253	/* Remove from list and free */
2254	*vfspp = vfsp->vfc_next;
2255
2256	maxtypenum = 0;
2257
2258	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2259		if (vfsp->vfc_typenum > maxtypenum)
2260			maxtypenum = vfsp->vfc_typenum;
2261
2262	maxvfsconf = maxtypenum;
2263	return 0;
2264}
2265
2266/*
2267 * Check if vnode represents a disk device
2268 */
2269int
2270vn_isdisk(vp, errp)
2271	struct vnode *vp;
2272	int *errp;
2273{
2274	if (vp->v_type != VBLK && vp->v_type != VCHR)
2275		return (0);
2276
2277	return (1);
2278}
2279