vfs_subr.c revision 1.99
1/*	$OpenBSD: vfs_subr.c,v 1.99 2004/05/27 08:25:53 tedu Exp $	*/
2/*	$NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $	*/
3
4/*
5 * Copyright (c) 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
38 */
39
40/*
41 * External virtual filesystem routines
42 */
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/proc.h>
47#include <sys/mount.h>
48#include <sys/time.h>
49#include <sys/fcntl.h>
50#include <sys/kernel.h>
51#include <sys/vnode.h>
52#include <sys/stat.h>
53#include <sys/namei.h>
54#include <sys/ucred.h>
55#include <sys/buf.h>
56#include <sys/errno.h>
57#include <sys/malloc.h>
58#include <sys/domain.h>
59#include <sys/mbuf.h>
60#include <sys/syscallargs.h>
61#include <sys/pool.h>
62
63#include <uvm/uvm_extern.h>
64#include <sys/sysctl.h>
65
66#include <miscfs/specfs/specdev.h>
67
68enum vtype iftovt_tab[16] = {
69	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
70	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
71};
72int	vttoif_tab[9] = {
73	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
74	S_IFSOCK, S_IFIFO, S_IFMT,
75};
76
77int doforce = 1;		/* 1 => permit forcible unmounting */
78int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
79int suid_clear = 1;		/* 1 => clear SUID / SGID on owner change */
80
81/*
82 * Insq/Remq for the vnode usage lists.
83 */
84#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
85#define	bufremvn(bp) {							\
86	LIST_REMOVE(bp, b_vnbufs);					\
87	(bp)->b_vnbufs.le_next = NOLIST;				\
88}
89
90struct freelst vnode_hold_list;   /* list of vnodes referencing buffers */
91struct freelst vnode_free_list;   /* vnode free list */
92
93struct mntlist mountlist;			/* mounted filesystem list */
94struct simplelock mountlist_slock;
95static struct simplelock mntid_slock;
96struct simplelock mntvnode_slock;
97struct simplelock vnode_free_list_slock;
98struct simplelock spechash_slock;
99
100void	vclean(struct vnode *, int, struct proc *);
101
102void insmntque(struct vnode *, struct mount *);
103int getdevvp(dev_t, struct vnode **, enum vtype);
104
105int vfs_hang_addrlist(struct mount *, struct netexport *,
106				  struct export_args *);
107int vfs_free_netcred(struct radix_node *, void *);
108void vfs_free_addrlist(struct netexport *);
109static __inline__ void vputonfreelist(struct vnode *);
110
111int vflush_vnode(struct vnode *, void *);
112
113#ifdef DEBUG
114void printlockedvnodes(void);
115#endif
116
117#define VN_KNOTE(vp, b) \
118	KNOTE((struct klist *)&vp->v_selectinfo.vsi_selinfo.si_note, (b))
119
120struct pool vnode_pool;
121
122/*
123 * Initialize the vnode management data structures.
124 */
125void
126vntblinit()
127{
128
129	pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodes",
130	    &pool_allocator_nointr);
131	simple_lock_init(&mntvnode_slock);
132	simple_lock_init(&mntid_slock);
133	simple_lock_init(&spechash_slock);
134	TAILQ_INIT(&vnode_hold_list);
135	TAILQ_INIT(&vnode_free_list);
136	simple_lock_init(&vnode_free_list_slock);
137	CIRCLEQ_INIT(&mountlist);
138	simple_lock_init(&mountlist_slock);
139	/*
140	 * Initialize the filesystem syncer.
141	 */
142	vn_initialize_syncerd();
143}
144
145/*
146 * Mark a mount point as busy. Used to synchronize access and to delay
147 * unmounting. Interlock is not released on failure.
148 *
149 * historical behavior:
150 *  - LK_NOWAIT means that we should just ignore the mount point if it's
151 *     being unmounted.
152 *  - no flags means that we should sleep on the mountpoint and then
153 *     fail.
154 */
155
156int
157vfs_busy(struct mount *mp, int flags, struct simplelock *interlkp,
158    struct proc *p)
159{
160	int lkflags;
161
162	switch (flags) {
163	case LK_NOWAIT:
164		lkflags = LK_SHARED|LK_NOWAIT;
165		break;
166	case 0:
167		lkflags = LK_SHARED;
168		break;
169	default:
170		lkflags = flags;
171	}
172
173	/*
174	 * Always sleepfail. We will only sleep for an exclusive lock
175	 * and the exclusive lock will only be acquired when unmounting.
176	 */
177	lkflags |= LK_SLEEPFAIL;
178
179	if (interlkp)
180		lkflags |= LK_INTERLOCK;
181	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
182		return (ENOENT);
183	return (0);
184}
185
186
187/*
188 * Free a busy file system
189 */
190void
191vfs_unbusy(struct mount *mp, struct proc *p)
192{
193	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
194}
195
196int
197vfs_isbusy(struct mount *mp)
198{
199	return (lockstatus(&mp->mnt_lock));
200}
201
202/*
203 * Lookup a filesystem type, and if found allocate and initialize
204 * a mount structure for it.
205 *
206 * Devname is usually updated by mount(8) after booting.
207 */
208
209int
210vfs_rootmountalloc(fstypename, devname, mpp)
211	char *fstypename;
212	char *devname;
213	struct mount **mpp;
214{
215	struct proc *p = curproc;	/* XXX */
216	struct vfsconf *vfsp;
217	struct mount *mp;
218
219	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
220		if (!strcmp(vfsp->vfc_name, fstypename))
221			break;
222	if (vfsp == NULL)
223		return (ENODEV);
224	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
225	bzero((char *)mp, (u_long)sizeof(struct mount));
226	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
227	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
228	LIST_INIT(&mp->mnt_vnodelist);
229	mp->mnt_vfc = vfsp;
230	mp->mnt_op = vfsp->vfc_vfsops;
231	mp->mnt_flag = MNT_RDONLY;
232	mp->mnt_vnodecovered = NULLVP;
233	vfsp->vfc_refcount++;
234	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
235	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
236	mp->mnt_stat.f_mntonname[0] = '/';
237	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
238	*mpp = mp;
239 	return (0);
240 }
241
242/*
243 * Find an appropriate filesystem to use for the root. If a filesystem
244 * has not been preselected, walk through the list of known filesystems
245 * trying those that have mountroot routines, and try them until one
246 * works or we have tried them all.
247  */
248int
249vfs_mountroot()
250{
251	struct vfsconf *vfsp;
252	int error;
253
254	if (mountroot != NULL)
255		return ((*mountroot)());
256	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
257		if (vfsp->vfc_mountroot == NULL)
258			continue;
259		if ((error = (*vfsp->vfc_mountroot)()) == 0)
260			return (0);
261		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
262 	}
263	return (ENODEV);
264}
265
266/*
267 * Lookup a mount point by filesystem identifier.
268 */
269struct mount *
270vfs_getvfs(fsid)
271	fsid_t *fsid;
272{
273	register struct mount *mp;
274
275	simple_lock(&mountlist_slock);
276	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
277		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
278		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
279			simple_unlock(&mountlist_slock);
280			return (mp);
281		}
282	}
283	simple_unlock(&mountlist_slock);
284	return ((struct mount *)0);
285}
286
287
288/*
289 * Get a new unique fsid
290 */
291void
292vfs_getnewfsid(mp)
293	struct mount *mp;
294{
295	static u_short xxxfs_mntid;
296
297	fsid_t tfsid;
298	int mtype;
299
300	simple_lock(&mntid_slock);
301	mtype = mp->mnt_vfc->vfc_typenum;
302	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
303	mp->mnt_stat.f_fsid.val[1] = mtype;
304	if (xxxfs_mntid == 0)
305		++xxxfs_mntid;
306	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
307	tfsid.val[1] = mtype;
308	if (!CIRCLEQ_EMPTY(&mountlist)) {
309		while (vfs_getvfs(&tfsid)) {
310			tfsid.val[0]++;
311			xxxfs_mntid++;
312		}
313	}
314	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
315	simple_unlock(&mntid_slock);
316}
317
318/*
319 * Make a 'unique' number from a mount type name.
320 * Note that this is no longer used for ffs which
321 * now has an on-disk filesystem id.
322 */
323long
324makefstype(type)
325	char *type;
326{
327	long rv;
328
329	for (rv = 0; *type; type++) {
330		rv <<= 2;
331		rv ^= *type;
332	}
333	return rv;
334}
335
336/*
337 * Set vnode attributes to VNOVAL
338 */
339void
340vattr_null(vap)
341	register struct vattr *vap;
342{
343
344	vap->va_type = VNON;
345	/* XXX These next two used to be one line, but for a GCC bug. */
346	vap->va_size = VNOVAL;
347	vap->va_bytes = VNOVAL;
348	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
349		vap->va_fsid = vap->va_fileid =
350		vap->va_blocksize = vap->va_rdev =
351		vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
352		vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
353		vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
354		vap->va_flags = vap->va_gen = VNOVAL;
355	vap->va_vaflags = 0;
356}
357
358/*
359 * Routines having to do with the management of the vnode table.
360 */
361extern int (**dead_vnodeop_p)(void *);
362long numvnodes;
363
364/*
365 * Return the next vnode from the free list.
366 */
367int
368getnewvnode(tag, mp, vops, vpp)
369	enum vtagtype tag;
370	struct mount *mp;
371	int (**vops)(void *);
372	struct vnode **vpp;
373{
374	struct proc *p = curproc;			/* XXX */
375	struct freelst *listhd;
376	static int toggle;
377	struct vnode *vp;
378	int s;
379
380	/*
381	 * We must choose whether to allocate a new vnode or recycle an
382	 * existing one. The criterion for allocating a new one is that
383	 * the total number of vnodes is less than the number desired or
384	 * there are no vnodes on either free list. Generally we only
385	 * want to recycle vnodes that have no buffers associated with
386	 * them, so we look first on the vnode_free_list. If it is empty,
387	 * we next consider vnodes with referencing buffers on the
388	 * vnode_hold_list. The toggle ensures that half the time we
389	 * will use a buffer from the vnode_hold_list, and half the time
390	 * we will allocate a new one unless the list has grown to twice
391	 * the desired size. We are reticent to recycle vnodes from the
392	 * vnode_hold_list because we will lose the identity of all its
393	 * referencing buffers.
394	 */
395	toggle ^= 1;
396	if (numvnodes > 2 * desiredvnodes)
397		toggle = 0;
398
399	simple_lock(&vnode_free_list_slock);
400	s = splbio();
401	if ((numvnodes < desiredvnodes) ||
402	    ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) &&
403	    ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) {
404		splx(s);
405		simple_unlock(&vnode_free_list_slock);
406		vp = pool_get(&vnode_pool, PR_WAITOK);
407		bzero((char *)vp, sizeof *vp);
408		simple_lock_init(&vp->v_interlock);
409		numvnodes++;
410	} else {
411		for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
412		    vp = TAILQ_NEXT(vp, v_freelist)) {
413			if (simple_lock_try(&vp->v_interlock)) {
414				if ((vp->v_flag & VLAYER) == 0)
415					break;
416				if (VOP_ISLOCKED(vp) == 0)
417					break;
418				else
419					simple_unlock(&vp->v_interlock);
420			}
421		}
422		/*
423		 * Unless this is a bad time of the month, at most
424		 * the first NCPUS items on the free list are
425		 * locked, so this is close enough to being empty.
426		 */
427		if (vp == NULL) {
428			splx(s);
429			simple_unlock(&vnode_free_list_slock);
430			tablefull("vnode");
431			*vpp = 0;
432			return (ENFILE);
433		}
434		if (vp->v_usecount) {
435			vprint("free vnode", vp);
436			panic("free vnode isn't");
437		}
438
439		TAILQ_REMOVE(listhd, vp, v_freelist);
440		vp->v_bioflag &= ~VBIOONFREELIST;
441		splx(s);
442
443		simple_unlock(&vnode_free_list_slock);
444		if (vp->v_type != VBAD)
445			vgonel(vp, p);
446		else
447			simple_unlock(&vp->v_interlock);
448#ifdef DIAGNOSTIC
449		if (vp->v_data) {
450			vprint("cleaned vnode", vp);
451			panic("cleaned vnode isn't");
452		}
453		s = splbio();
454		if (vp->v_numoutput)
455			panic("Clean vnode has pending I/O's");
456		splx(s);
457#endif
458		vp->v_flag = 0;
459		vp->v_socket = 0;
460	}
461	vp->v_type = VNON;
462	cache_purge(vp);
463	vp->v_vnlock = NULL;
464	lockinit(&vp->v_lock, PVFS, "v_lock", 0, 0);
465	vp->v_tag = tag;
466	vp->v_op = vops;
467	insmntque(vp, mp);
468	*vpp = vp;
469	vp->v_usecount = 1;
470	vp->v_data = 0;
471	simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
472	return (0);
473}
474
475/*
476 * Move a vnode from one mount queue to another.
477 */
478void
479insmntque(vp, mp)
480	register struct vnode *vp;
481	register struct mount *mp;
482{
483	simple_lock(&mntvnode_slock);
484	/*
485	 * Delete from old mount point vnode list, if on one.
486	 */
487
488	if (vp->v_mount != NULL)
489		LIST_REMOVE(vp, v_mntvnodes);
490	/*
491	 * Insert into list of vnodes for the new mount point, if available.
492	 */
493	if ((vp->v_mount = mp) != NULL)
494		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
495	simple_unlock(&mntvnode_slock);
496}
497
498
499/*
500 * Create a vnode for a block device.
501 * Used for root filesystem, argdev, and swap areas.
502 * Also used for memory file system special devices.
503 */
504int
505bdevvp(dev, vpp)
506	dev_t dev;
507	struct vnode **vpp;
508{
509
510	return (getdevvp(dev, vpp, VBLK));
511}
512
513/*
514 * Create a vnode for a character device.
515 * Used for kernfs and some console handling.
516 */
517int
518cdevvp(dev, vpp)
519	dev_t dev;
520	struct vnode **vpp;
521{
522
523	return (getdevvp(dev, vpp, VCHR));
524}
525
526/*
527 * Create a vnode for a device.
528 * Used by bdevvp (block device) for root file system etc.,
529 * and by cdevvp (character device) for console and kernfs.
530 */
531int
532getdevvp(dev, vpp, type)
533	dev_t dev;
534	struct vnode **vpp;
535	enum vtype type;
536{
537	register struct vnode *vp;
538	struct vnode *nvp;
539	int error;
540
541	if (dev == NODEV) {
542		*vpp = NULLVP;
543		return (0);
544	}
545	error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
546	if (error) {
547		*vpp = NULLVP;
548		return (error);
549	}
550	vp = nvp;
551	vp->v_type = type;
552	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
553		vput(vp);
554		vp = nvp;
555	}
556	*vpp = vp;
557	return (0);
558}
559
560/*
561 * Check to see if the new vnode represents a special device
562 * for which we already have a vnode (either because of
563 * bdevvp() or because of a different vnode representing
564 * the same block device). If such an alias exists, deallocate
565 * the existing contents and return the aliased vnode. The
566 * caller is responsible for filling it with its new contents.
567 */
568struct vnode *
569checkalias(nvp, nvp_rdev, mp)
570	register struct vnode *nvp;
571	dev_t nvp_rdev;
572	struct mount *mp;
573{
574	struct proc *p = curproc;
575	register struct vnode *vp;
576	struct vnode **vpp;
577
578	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
579		return (NULLVP);
580
581	vpp = &speclisth[SPECHASH(nvp_rdev)];
582loop:
583	simple_lock(&spechash_slock);
584	for (vp = *vpp; vp; vp = vp->v_specnext) {
585		simple_lock(&vp->v_interlock);
586		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) {
587			simple_unlock(&vp->v_interlock);
588			continue;
589		}
590		/*
591		 * Alias, but not in use, so flush it out.
592		 */
593		if (vp->v_usecount == 0) {
594			simple_unlock(&spechash_slock);
595			vgonel(vp, p);
596			goto loop;
597		}
598		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
599			simple_unlock(&spechash_slock);
600			goto loop;
601		}
602		break;
603	}
604
605	/*
606	 * Common case is actually in the if statement
607	 */
608	if (vp == NULL || !(vp->v_tag == VT_NON && vp->v_type == VBLK)) {
609		MALLOC(nvp->v_specinfo, struct specinfo *,
610			sizeof(struct specinfo), M_VNODE, M_WAITOK);
611		nvp->v_rdev = nvp_rdev;
612		nvp->v_hashchain = vpp;
613		nvp->v_specnext = *vpp;
614		nvp->v_specmountpoint = NULL;
615		nvp->v_speclockf = NULL;
616		simple_unlock(&spechash_slock);
617		*vpp = nvp;
618		if (vp != NULLVP) {
619			nvp->v_flag |= VALIASED;
620			vp->v_flag |= VALIASED;
621			vput(vp);
622		}
623		return (NULLVP);
624	}
625
626	/*
627	 * This code is the uncommon case. It is called in case
628	 * we found an alias that was VT_NON && vtype of VBLK
629	 * This means we found a block device that was created
630	 * using bdevvp.
631	 * An example of such a vnode is the root partition device vnode
632	 * created in ffs_mountroot.
633	 *
634	 * The vnodes created by bdevvp should not be aliased (why?).
635	 */
636
637	simple_unlock(&spechash_slock);
638	VOP_UNLOCK(vp, 0, p);
639	simple_lock(&vp->v_interlock);
640	vclean(vp, 0, p);
641	vp->v_vnlock = NULL;
642	lockinit(&vp->v_lock, PVFS, "v_lock", 0, 0);
643	vp->v_op = nvp->v_op;
644	vp->v_tag = nvp->v_tag;
645	nvp->v_type = VNON;
646	insmntque(vp, mp);
647	return (vp);
648}
649
650/*
651 * Grab a particular vnode from the free list, increment its
652 * reference count and lock it. The vnode lock bit is set the
653 * vnode is being eliminated in vgone. The process is awakened
654 * when the transition is completed, and an error returned to
655 * indicate that the vnode is no longer usable (possibly having
656 * been changed to a new file system type).
657 */
658int
659vget(vp, flags, p)
660	struct vnode *vp;
661	int flags;
662	struct proc *p;
663{
664	int error;
665	int s;
666	/*
667	 * If the vnode is in the process of being cleaned out for
668	 * another use, we wait for the cleaning to finish and then
669	 * return failure. Cleaning is determined by checking that
670	 * the VXLOCK flag is set.
671	 */
672	if ((flags & LK_INTERLOCK) == 0) {
673		simple_lock(&vp->v_interlock);
674		flags |= LK_INTERLOCK;
675	}
676	if (vp->v_flag & VXLOCK) {
677 		vp->v_flag |= VXWANT;
678		simple_unlock(&vp->v_interlock);
679		tsleep(vp, PINOD, "vget", 0);
680		return (ENOENT);
681 	}
682	if (vp->v_usecount == 0 &&
683	    (vp->v_bioflag & VBIOONFREELIST)) {
684		s = splbio();
685		simple_lock(&vnode_free_list_slock);
686		if (vp->v_holdcnt > 0)
687			TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
688		else
689			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
690		simple_unlock(&vnode_free_list_slock);
691		vp->v_bioflag &= ~VBIOONFREELIST;
692		splx(s);
693	}
694 	vp->v_usecount++;
695	if (flags & LK_TYPE_MASK) {
696		if ((error = vn_lock(vp, flags, p)) != 0) {
697			vp->v_usecount--;
698			if (vp->v_usecount == 0)
699				vputonfreelist(vp);
700
701			simple_unlock(&vp->v_interlock);
702		}
703		return (error);
704	}
705	simple_unlock(&vp->v_interlock);
706	return (0);
707}
708
709
710#ifdef DIAGNOSTIC
711/*
712 * Vnode reference.
713 */
714void
715vref(vp)
716	struct vnode *vp;
717{
718	simple_lock(&vp->v_interlock);
719	if (vp->v_usecount == 0)
720		panic("vref used where vget required");
721	vp->v_usecount++;
722	simple_unlock(&vp->v_interlock);
723}
724#endif /* DIAGNOSTIC */
725
726static __inline__ void
727vputonfreelist(vp)
728	struct vnode *vp;
729{
730	int s;
731	struct freelst *lst;
732
733	s = splbio();
734#ifdef DIAGNOSTIC
735	if (vp->v_usecount != 0)
736		panic("Use count is not zero!");
737
738	if (vp->v_bioflag & VBIOONFREELIST) {
739		vprint("vnode already on free list: ", vp);
740		panic("vnode already on free list");
741	}
742#endif
743
744	vp->v_bioflag |= VBIOONFREELIST;
745
746	if (vp->v_holdcnt > 0)
747		lst = &vnode_hold_list;
748	else
749		lst = &vnode_free_list;
750
751	if (vp->v_type == VBAD)
752		TAILQ_INSERT_HEAD(lst, vp, v_freelist);
753	else
754		TAILQ_INSERT_TAIL(lst, vp, v_freelist);
755
756	splx(s);
757}
758
759/*
760 * vput(), just unlock and vrele()
761 */
762void
763vput(vp)
764	register struct vnode *vp;
765{
766	struct proc *p = curproc;	/* XXX */
767
768#ifdef DIAGNOSTIC
769	if (vp == NULL)
770		panic("vput: null vp");
771#endif
772	simple_lock(&vp->v_interlock);
773
774#ifdef DIAGNOSTIC
775	if (vp->v_usecount == 0) {
776		vprint("vput: bad ref count", vp);
777		panic("vput: ref cnt");
778	}
779#endif
780	vp->v_usecount--;
781	if (vp->v_usecount > 0) {
782		simple_unlock(&vp->v_interlock);
783		VOP_UNLOCK(vp, 0, p);
784		return;
785	}
786
787#ifdef DIAGNOSTIC
788	if (vp->v_writecount != 0) {
789		vprint("vput: bad writecount", vp);
790		panic("vput: v_writecount != 0");
791	}
792#endif
793	vputonfreelist(vp);
794
795	simple_unlock(&vp->v_interlock);
796
797	VOP_INACTIVE(vp, p);
798}
799
800/*
801 * Vnode release - use for active VNODES.
802 * If count drops to zero, call inactive routine and return to freelist.
803 */
804void
805vrele(vp)
806	register struct vnode *vp;
807{
808	struct proc *p = curproc;	/* XXX */
809
810#ifdef DIAGNOSTIC
811	if (vp == NULL)
812		panic("vrele: null vp");
813#endif
814	simple_lock(&vp->v_interlock);
815#ifdef DIAGNOSTIC
816	if (vp->v_usecount == 0) {
817		vprint("vrele: bad ref count", vp);
818		panic("vrele: ref cnt");
819	}
820#endif
821	vp->v_usecount--;
822	if (vp->v_usecount > 0) {
823		simple_unlock(&vp->v_interlock);
824		return;
825	}
826
827#ifdef DIAGNOSTIC
828	if (vp->v_writecount != 0) {
829		vprint("vrele: bad writecount", vp);
830		panic("vrele: v_writecount != 0");
831	}
832#endif
833	vputonfreelist(vp);
834
835	if (vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p) == 0)
836		VOP_INACTIVE(vp, p);
837}
838
839void vhold(struct vnode *vp);
840
841/*
842 * Page or buffer structure gets a reference.
843 */
844void
845vhold(vp)
846	register struct vnode *vp;
847{
848
849	/*
850	 * If it is on the freelist and the hold count is currently
851	 * zero, move it to the hold list.
852	 */
853  	simple_lock(&vp->v_interlock);
854	if ((vp->v_bioflag & VBIOONFREELIST) &&
855	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
856		simple_lock(&vnode_free_list_slock);
857		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
858		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
859		simple_unlock(&vnode_free_list_slock);
860	}
861	vp->v_holdcnt++;
862	simple_unlock(&vp->v_interlock);
863}
864
865/*
866 * Remove any vnodes in the vnode table belonging to mount point mp.
867 *
868 * If MNT_NOFORCE is specified, there should not be any active ones,
869 * return error if any are found (nb: this is a user error, not a
870 * system error). If MNT_FORCE is specified, detach any active vnodes
871 * that are found.
872 */
873#ifdef DEBUG
874int busyprt = 0;	/* print out busy vnodes */
875struct ctldebug debug1 = { "busyprt", &busyprt };
876#endif
877
878int
879vfs_mount_foreach_vnode(struct mount *mp,
880    int (*func)(struct vnode *, void *), void *arg) {
881	struct vnode *vp, *nvp;
882	int error = 0;
883
884	simple_lock(&mntvnode_slock);
885loop:
886	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
887		if (vp->v_mount != mp)
888			goto loop;
889		nvp = vp->v_mntvnodes.le_next;
890		simple_lock(&vp->v_interlock);
891		simple_unlock(&mntvnode_slock);
892
893		error = func(vp, arg);
894
895		simple_lock(&mntvnode_slock);
896
897		if (error != 0)
898			break;
899	}
900	simple_unlock(&mntvnode_slock);
901
902	return (error);
903}
904
905
906struct vflush_args {
907	struct vnode *skipvp;
908	int busy;
909	int flags;
910};
911
912int
913vflush_vnode(struct vnode *vp, void *arg) {
914	struct vflush_args *va = arg;
915	struct proc *p = curproc;
916
917	if (vp == va->skipvp) {
918		simple_unlock(&vp->v_interlock);
919		return (0);
920	}
921
922	if ((va->flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
923		simple_unlock(&vp->v_interlock);
924		return (0);
925	}
926
927	/*
928	 * If WRITECLOSE is set, only flush out regular file
929	 * vnodes open for writing.
930	 */
931	if ((va->flags & WRITECLOSE) &&
932	    (vp->v_writecount == 0 || vp->v_type != VREG)) {
933		simple_unlock(&vp->v_interlock);
934		return (0);
935	}
936
937	/*
938	 * With v_usecount == 0, all we need to do is clear
939	 * out the vnode data structures and we are done.
940	 */
941	if (vp->v_usecount == 0) {
942		vgonel(vp, p);
943		return (0);
944	}
945
946	/*
947	 * If FORCECLOSE is set, forcibly close the vnode.
948	 * For block or character devices, revert to an
949	 * anonymous device. For all other files, just kill them.
950	 */
951	if (va->flags & FORCECLOSE) {
952		if (vp->v_type != VBLK && vp->v_type != VCHR) {
953			vgonel(vp, p);
954		} else {
955			vclean(vp, 0, p);
956			vp->v_op = spec_vnodeop_p;
957			insmntque(vp, (struct mount *)0);
958		}
959		return (0);
960	}
961
962#ifdef DEBUG
963	if (busyprt)
964		vprint("vflush: busy vnode", vp);
965#endif
966	simple_unlock(&vp->v_interlock);
967	va->busy++;
968	return (0);
969}
970
971int
972vflush(mp, skipvp, flags)
973	struct mount *mp;
974	struct vnode *skipvp;
975	int flags;
976{
977	struct vflush_args va;
978	va.skipvp = skipvp;
979	va.busy = 0;
980	va.flags = flags;
981
982	vfs_mount_foreach_vnode(mp, vflush_vnode, &va);
983
984	if (va.busy)
985		return (EBUSY);
986	return (0);
987}
988
989/*
990 * Disassociate the underlying file system from a vnode.
991 * The vnode interlock is held on entry.
992 */
993void
994vclean(vp, flags, p)
995	register struct vnode *vp;
996	int flags;
997	struct proc *p;
998{
999	int active;
1000
1001	/*
1002	 * Check to see if the vnode is in use.
1003	 * If so we have to reference it before we clean it out
1004	 * so that its count cannot fall to zero and generate a
1005	 * race against ourselves to recycle it.
1006	 */
1007	if ((active = vp->v_usecount) != 0)
1008		vp->v_usecount++;
1009
1010	/*
1011	 * Prevent the vnode from being recycled or
1012	 * brought into use while we clean it out.
1013	 */
1014	if (vp->v_flag & VXLOCK)
1015		panic("vclean: deadlock");
1016	vp->v_flag |= VXLOCK;
1017	/*
1018	 * Even if the count is zero, the VOP_INACTIVE routine may still
1019	 * have the object locked while it cleans it out. The VOP_LOCK
1020	 * ensures that the VOP_INACTIVE routine is done with its work.
1021	 * For active vnodes, it ensures that no other activity can
1022	 * occur while the underlying object is being cleaned out.
1023	 */
1024	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1025
1026	/*
1027	 * clean out any VM data associated with the vnode.
1028	 */
1029	uvm_vnp_terminate(vp);
1030	/*
1031	 * Clean out any buffers associated with the vnode.
1032	 */
1033	if (flags & DOCLOSE)
1034		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1035	/*
1036	 * If purging an active vnode, it must be closed and
1037	 * deactivated before being reclaimed. Note that the
1038	 * VOP_INACTIVE will unlock the vnode
1039	 */
1040	if (active) {
1041		if (flags & DOCLOSE)
1042			VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
1043		VOP_INACTIVE(vp, p);
1044	} else {
1045		/*
1046		 * Any other processes trying to obtain this lock must first
1047		 * wait for VXLOCK to clear, then call the new lock operation.
1048		 */
1049		VOP_UNLOCK(vp, 0, p);
1050	}
1051
1052	/*
1053	 * Reclaim the vnode.
1054	 */
1055	if (VOP_RECLAIM(vp, p))
1056		panic("vclean: cannot reclaim");
1057	if (active) {
1058		simple_lock(&vp->v_interlock);
1059
1060		vp->v_usecount--;
1061		if (vp->v_usecount == 0) {
1062			if (vp->v_holdcnt > 0)
1063				panic("vclean: not clean");
1064			vputonfreelist(vp);
1065		}
1066
1067		simple_unlock(&vp->v_interlock);
1068	}
1069	cache_purge(vp);
1070
1071	/*
1072	 * Done with purge, notify sleepers of the grim news.
1073	 */
1074	vp->v_op = dead_vnodeop_p;
1075	simple_lock(&vp->v_selectinfo.vsi_lock);
1076	VN_KNOTE(vp, NOTE_REVOKE);
1077	simple_unlock(&vp->v_selectinfo.vsi_lock);
1078	vp->v_tag = VT_NON;
1079	vp->v_flag &= ~VXLOCK;
1080#ifdef DIAGNOSTIC
1081	vp->v_flag &= ~VLOCKSWORK;
1082#endif
1083	if (vp->v_flag & VXWANT) {
1084		vp->v_flag &= ~VXWANT;
1085		wakeup(vp);
1086	}
1087}
1088
1089
1090
1091/*
1092 * Recycle an unused vnode to the front of the free list.
1093 * Release the passed interlock if the vnode will be recycled.
1094 */
1095int
1096vrecycle(vp, inter_lkp, p)
1097	struct vnode *vp;
1098	struct simplelock *inter_lkp;
1099	struct proc *p;
1100{
1101
1102	simple_lock(&vp->v_interlock);
1103	if (vp->v_usecount == 0) {
1104		if (inter_lkp)
1105			simple_unlock(inter_lkp);
1106		vgonel(vp, p);
1107		return (1);
1108	}
1109	simple_unlock(&vp->v_interlock);
1110	return (0);
1111}
1112
1113
1114/*
1115 * Eliminate all activity associated with a vnode
1116 * in preparation for reuse.
1117 */
1118void
1119vgone(vp)
1120	register struct vnode *vp;
1121{
1122	struct proc *p = curproc;
1123
1124	simple_lock (&vp->v_interlock);
1125	vgonel(vp, p);
1126}
1127
1128/*
1129 * vgone, with the vp interlock held.
1130 */
1131void
1132vgonel(vp, p)
1133	struct vnode *vp;
1134	struct proc *p;
1135{
1136	register struct vnode *vq;
1137	struct vnode *vx;
1138
1139	/*
1140	 * If a vgone (or vclean) is already in progress,
1141	 * wait until it is done and return.
1142	 */
1143	if (vp->v_flag & VXLOCK) {
1144		vp->v_flag |= VXWANT;
1145		simple_unlock(&vp->v_interlock);
1146		tsleep(vp, PINOD, "vgone", 0);
1147		return;
1148	}
1149	/*
1150	 * Clean out the filesystem specific data.
1151	 */
1152	vclean(vp, DOCLOSE, p);
1153	/*
1154	 * Delete from old mount point vnode list, if on one.
1155	 */
1156	if (vp->v_mount != NULL)
1157		insmntque(vp, (struct mount *)0);
1158	/*
1159	 * If special device, remove it from special device alias list
1160	 * if it is on one.
1161	 */
1162	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1163		simple_lock(&spechash_slock);
1164		if (*vp->v_hashchain == vp) {
1165			*vp->v_hashchain = vp->v_specnext;
1166		} else {
1167			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1168				if (vq->v_specnext != vp)
1169					continue;
1170				vq->v_specnext = vp->v_specnext;
1171				break;
1172			}
1173			if (vq == NULL)
1174				panic("missing bdev");
1175		}
1176		if (vp->v_flag & VALIASED) {
1177			vx = NULL;
1178			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1179				if (vq->v_rdev != vp->v_rdev ||
1180				    vq->v_type != vp->v_type)
1181					continue;
1182				if (vx)
1183					break;
1184				vx = vq;
1185			}
1186			if (vx == NULL)
1187				panic("missing alias");
1188			if (vq == NULL)
1189				vx->v_flag &= ~VALIASED;
1190			vp->v_flag &= ~VALIASED;
1191		}
1192		simple_unlock(&spechash_slock);
1193		FREE(vp->v_specinfo, M_VNODE);
1194		vp->v_specinfo = NULL;
1195	}
1196	/*
1197	 * If it is on the freelist and not already at the head,
1198	 * move it to the head of the list.
1199	 */
1200	vp->v_type = VBAD;
1201
1202	/*
1203	 * Move onto the free list, unless we were called from
1204	 * getnewvnode and we're not on any free list
1205	 */
1206	if (vp->v_usecount == 0 &&
1207	    (vp->v_bioflag & VBIOONFREELIST)) {
1208		int s;
1209
1210		simple_lock(&vnode_free_list_slock);
1211		s = splbio();
1212
1213		if (vp->v_holdcnt > 0)
1214			panic("vgonel: not clean");
1215
1216		if (TAILQ_FIRST(&vnode_free_list) != vp) {
1217			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1218			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1219		}
1220		splx(s);
1221		simple_unlock(&vnode_free_list_slock);
1222	}
1223}
1224
1225/*
1226 * Lookup a vnode by device number.
1227 */
1228int
1229vfinddev(dev, type, vpp)
1230	dev_t dev;
1231	enum vtype type;
1232	struct vnode **vpp;
1233{
1234	register struct vnode *vp;
1235	int rc =0;
1236
1237	simple_lock(&spechash_slock);
1238	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1239		if (dev != vp->v_rdev || type != vp->v_type)
1240			continue;
1241		*vpp = vp;
1242		rc = 1;
1243		break;
1244	}
1245	simple_unlock(&spechash_slock);
1246	return (rc);
1247}
1248
1249/*
1250 * Revoke all the vnodes corresponding to the specified minor number
1251 * range (endpoints inclusive) of the specified major.
1252 */
1253void
1254vdevgone(maj, minl, minh, type)
1255	int maj, minl, minh;
1256	enum vtype type;
1257{
1258	struct vnode *vp;
1259	int mn;
1260
1261	for (mn = minl; mn <= minh; mn++)
1262		if (vfinddev(makedev(maj, mn), type, &vp))
1263			VOP_REVOKE(vp, REVOKEALL);
1264}
1265
1266/*
1267 * Calculate the total number of references to a special device.
1268 */
1269int
1270vcount(vp)
1271	struct vnode *vp;
1272{
1273	struct vnode *vq, *vnext;
1274	int count;
1275
1276loop:
1277	if ((vp->v_flag & VALIASED) == 0)
1278		return (vp->v_usecount);
1279	simple_lock(&spechash_slock);
1280	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1281		vnext = vq->v_specnext;
1282		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1283			continue;
1284		/*
1285		 * Alias, but not in use, so flush it out.
1286		 */
1287		if (vq->v_usecount == 0 && vq != vp) {
1288			simple_unlock(&spechash_slock);
1289			vgone(vq);
1290			goto loop;
1291		}
1292		count += vq->v_usecount;
1293	}
1294	simple_unlock(&spechash_slock);
1295	return (count);
1296}
1297
1298/*
1299 * Print out a description of a vnode.
1300 */
1301static char *typename[] =
1302   { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1303
1304void
1305vprint(label, vp)
1306	char *label;
1307	register struct vnode *vp;
1308{
1309	char buf[64];
1310
1311	if (label != NULL)
1312		printf("%s: ", label);
1313	printf("type %s, usecount %u, writecount %u, holdcount %u,",
1314		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1315		vp->v_holdcnt);
1316	buf[0] = '\0';
1317	if (vp->v_flag & VROOT)
1318		strlcat(buf, "|VROOT", sizeof buf);
1319	if (vp->v_flag & VTEXT)
1320		strlcat(buf, "|VTEXT", sizeof buf);
1321	if (vp->v_flag & VSYSTEM)
1322		strlcat(buf, "|VSYSTEM", sizeof buf);
1323	if (vp->v_flag & VXLOCK)
1324		strlcat(buf, "|VXLOCK", sizeof buf);
1325	if (vp->v_flag & VXWANT)
1326		strlcat(buf, "|VXWANT", sizeof buf);
1327	if (vp->v_bioflag & VBIOWAIT)
1328		strlcat(buf, "| VBIOWAIT", sizeof buf);
1329	if (vp->v_flag & VALIASED)
1330		strlcat(buf, "|VALIASED", sizeof buf);
1331	if (buf[0] != '\0')
1332		printf(" flags (%s)", &buf[1]);
1333	if (vp->v_data == NULL) {
1334		printf("\n");
1335	} else {
1336		printf("\n\t");
1337		VOP_PRINT(vp);
1338	}
1339}
1340
1341#ifdef DEBUG
1342/*
1343 * List all of the locked vnodes in the system.
1344 * Called when debugging the kernel.
1345 */
1346void
1347printlockedvnodes()
1348{
1349	struct proc *p = curproc;
1350	register struct mount *mp, *nmp;
1351	register struct vnode *vp;
1352
1353	printf("Locked vnodes\n");
1354	simple_lock(&mountlist_slock);
1355	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1356	    mp = nmp) {
1357		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1358			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1359			continue;
1360		}
1361		for (vp = mp->mnt_vnodelist.lh_first; vp;
1362		    vp = vp->v_mntvnodes.le_next) {
1363			if (VOP_ISLOCKED(vp))
1364				vprint((char *)0, vp);
1365		}
1366		simple_lock(&mountlist_slock);
1367		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1368		vfs_unbusy(mp, p);
1369 	}
1370	simple_unlock(&mountlist_slock);
1371
1372}
1373#endif
1374
1375/*
1376 * Top level filesystem related information gathering.
1377 */
1378int
1379vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1380	int *name;
1381	u_int namelen;
1382	void *oldp;
1383	size_t *oldlenp;
1384	void *newp;
1385	size_t newlen;
1386	struct proc *p;
1387{
1388	struct vfsconf *vfsp;
1389
1390	/* all sysctl names at this level are at least name and field */
1391	if (namelen < 2)
1392		return (ENOTDIR);		/* overloaded */
1393	if (name[0] != VFS_GENERIC) {
1394		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1395			if (vfsp->vfc_typenum == name[0])
1396				break;
1397		if (vfsp == NULL)
1398			return (EOPNOTSUPP);
1399		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1400		    oldp, oldlenp, newp, newlen, p));
1401	}
1402	switch (name[1]) {
1403	case VFS_MAXTYPENUM:
1404		return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1405	case VFS_CONF:
1406		if (namelen < 3)
1407			return (ENOTDIR);	/* overloaded */
1408		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1409			if (vfsp->vfc_typenum == name[2])
1410				break;
1411		if (vfsp == NULL)
1412			return (EOPNOTSUPP);
1413		return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
1414		    sizeof(struct vfsconf)));
1415	}
1416	return (EOPNOTSUPP);
1417}
1418
1419
1420int kinfo_vdebug = 1;
1421int kinfo_vgetfailed;
1422#define KINFO_VNODESLOP	10
1423/*
1424 * Dump vnode list (via sysctl).
1425 * Copyout address of vnode followed by vnode.
1426 */
1427/* ARGSUSED */
1428int
1429sysctl_vnode(where, sizep, p)
1430	char *where;
1431	size_t *sizep;
1432	struct proc *p;
1433{
1434	register struct mount *mp, *nmp;
1435	struct vnode *vp, *nvp;
1436	register char *bp = where, *savebp;
1437	char *ewhere;
1438	int error;
1439
1440	if (where == NULL) {
1441		*sizep = (numvnodes + KINFO_VNODESLOP) * sizeof(struct e_vnode);
1442		return (0);
1443	}
1444	ewhere = where + *sizep;
1445
1446	simple_lock(&mountlist_slock);
1447	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1448	    mp = nmp) {
1449		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1450			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1451			continue;
1452		}
1453		savebp = bp;
1454again:
1455		for (vp = mp->mnt_vnodelist.lh_first; vp != NULL;
1456		    vp = nvp) {
1457			/*
1458			 * Check that the vp is still associated with
1459			 * this filesystem.  RACE: could have been
1460			 * recycled onto the same filesystem.
1461			 */
1462			if (vp->v_mount != mp) {
1463				simple_unlock(&mntvnode_slock);
1464				if (kinfo_vdebug)
1465					printf("kinfo: vp changed\n");
1466				bp = savebp;
1467				goto again;
1468			}
1469			nvp = vp->v_mntvnodes.le_next;
1470			if (bp + sizeof(struct e_vnode) > ewhere) {
1471				simple_unlock(&mntvnode_slock);
1472				*sizep = bp - where;
1473				vfs_unbusy(mp, p);
1474				return (ENOMEM);
1475			}
1476			if ((error = copyout(&vp,
1477			    &((struct e_vnode *)bp)->vptr,
1478			    sizeof(struct vnode *))) ||
1479			   (error = copyout(vp,
1480			    &((struct e_vnode *)bp)->vnode,
1481			    sizeof(struct vnode)))) {
1482				vfs_unbusy(mp, p);
1483				return (error);
1484			}
1485			bp += sizeof(struct e_vnode);
1486			simple_lock(&mntvnode_slock);
1487		}
1488
1489		simple_unlock(&mntvnode_slock);
1490		simple_lock(&mountlist_slock);
1491		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1492		vfs_unbusy(mp, p);
1493	}
1494
1495	simple_unlock(&mountlist_slock);
1496
1497	*sizep = bp - where;
1498	return (0);
1499}
1500
1501/*
1502 * Check to see if a filesystem is mounted on a block device.
1503 */
1504int
1505vfs_mountedon(vp)
1506	register struct vnode *vp;
1507{
1508	register struct vnode *vq;
1509	int error = 0;
1510
1511 	if (vp->v_specmountpoint != NULL)
1512		return (EBUSY);
1513	if (vp->v_flag & VALIASED) {
1514		simple_lock(&spechash_slock);
1515		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1516			if (vq->v_rdev != vp->v_rdev ||
1517			    vq->v_type != vp->v_type)
1518				continue;
1519			if (vq->v_specmountpoint != NULL) {
1520				error = EBUSY;
1521				break;
1522			}
1523 		}
1524		simple_unlock(&spechash_slock);
1525	}
1526	return (error);
1527}
1528
1529/*
1530 * Build hash lists of net addresses and hang them off the mount point.
1531 * Called by ufs_mount() to set up the lists of export addresses.
1532 */
1533int
1534vfs_hang_addrlist(mp, nep, argp)
1535	struct mount *mp;
1536	struct netexport *nep;
1537	struct export_args *argp;
1538{
1539	register struct netcred *np;
1540	register struct radix_node_head *rnh;
1541	register int i;
1542	struct radix_node *rn;
1543	struct sockaddr *saddr, *smask = 0;
1544	struct domain *dom;
1545	int error;
1546
1547	if (argp->ex_addrlen == 0) {
1548		if (mp->mnt_flag & MNT_DEFEXPORTED)
1549			return (EPERM);
1550		np = &nep->ne_defexported;
1551		np->netc_exflags = argp->ex_flags;
1552		np->netc_anon = argp->ex_anon;
1553		np->netc_anon.cr_ref = 1;
1554		mp->mnt_flag |= MNT_DEFEXPORTED;
1555		return (0);
1556	}
1557	if (argp->ex_addrlen > MLEN)
1558		return (EINVAL);
1559	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1560	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1561	bzero(np, i);
1562	saddr = (struct sockaddr *)(np + 1);
1563	error = copyin(argp->ex_addr, saddr, argp->ex_addrlen);
1564	if (error)
1565		goto out;
1566	if (saddr->sa_len > argp->ex_addrlen)
1567		saddr->sa_len = argp->ex_addrlen;
1568	if (argp->ex_masklen) {
1569		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1570		error = copyin(argp->ex_mask, smask, argp->ex_masklen);
1571		if (error)
1572			goto out;
1573		if (smask->sa_len > argp->ex_masklen)
1574			smask->sa_len = argp->ex_masklen;
1575	}
1576	i = saddr->sa_family;
1577	if (i < 0 || i > AF_MAX) {
1578		error = EINVAL;
1579		goto out;
1580	}
1581	if ((rnh = nep->ne_rtable[i]) == 0) {
1582		/*
1583		 * Seems silly to initialize every AF when most are not
1584		 * used, do so on demand here
1585		 */
1586		for (dom = domains; dom; dom = dom->dom_next)
1587			if (dom->dom_family == i && dom->dom_rtattach) {
1588				dom->dom_rtattach((void **)&nep->ne_rtable[i],
1589					dom->dom_rtoffset);
1590				break;
1591			}
1592		if ((rnh = nep->ne_rtable[i]) == 0) {
1593			error = ENOBUFS;
1594			goto out;
1595		}
1596	}
1597	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1598		np->netc_rnodes);
1599	if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1600		error = EPERM;
1601		goto out;
1602	}
1603	np->netc_exflags = argp->ex_flags;
1604	np->netc_anon = argp->ex_anon;
1605	np->netc_anon.cr_ref = 1;
1606	return (0);
1607out:
1608	free(np, M_NETADDR);
1609	return (error);
1610}
1611
1612/* ARGSUSED */
1613int
1614vfs_free_netcred(rn, w)
1615	struct radix_node *rn;
1616	void *w;
1617{
1618	register struct radix_node_head *rnh = (struct radix_node_head *)w;
1619
1620	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh, NULL);
1621	free(rn, M_NETADDR);
1622	return (0);
1623}
1624
1625/*
1626 * Free the net address hash lists that are hanging off the mount points.
1627 */
1628void
1629vfs_free_addrlist(nep)
1630	struct netexport *nep;
1631{
1632	register int i;
1633	register struct radix_node_head *rnh;
1634
1635	for (i = 0; i <= AF_MAX; i++)
1636		if ((rnh = nep->ne_rtable[i]) != NULL) {
1637			(*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1638			free(rnh, M_RTABLE);
1639			nep->ne_rtable[i] = 0;
1640		}
1641}
1642
1643int
1644vfs_export(mp, nep, argp)
1645	struct mount *mp;
1646	struct netexport *nep;
1647	struct export_args *argp;
1648{
1649	int error;
1650
1651	if (argp->ex_flags & MNT_DELEXPORT) {
1652		vfs_free_addrlist(nep);
1653		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1654	}
1655	if (argp->ex_flags & MNT_EXPORTED) {
1656		if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1657			return (error);
1658		mp->mnt_flag |= MNT_EXPORTED;
1659	}
1660	return (0);
1661}
1662
1663struct netcred *
1664vfs_export_lookup(mp, nep, nam)
1665	register struct mount *mp;
1666	struct netexport *nep;
1667	struct mbuf *nam;
1668{
1669	register struct netcred *np;
1670	register struct radix_node_head *rnh;
1671	struct sockaddr *saddr;
1672
1673	np = NULL;
1674	if (mp->mnt_flag & MNT_EXPORTED) {
1675		/*
1676		 * Lookup in the export list first.
1677		 */
1678		if (nam != NULL) {
1679			saddr = mtod(nam, struct sockaddr *);
1680			rnh = nep->ne_rtable[saddr->sa_family];
1681			if (rnh != NULL) {
1682				np = (struct netcred *)
1683					(*rnh->rnh_matchaddr)((caddr_t)saddr,
1684					    rnh);
1685				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1686					np = NULL;
1687			}
1688		}
1689		/*
1690		 * If no address match, use the default if it exists.
1691		 */
1692		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1693			np = &nep->ne_defexported;
1694	}
1695	return (np);
1696}
1697
1698/*
1699 * Do the usual access checking.
1700 * file_mode, uid and gid are from the vnode in question,
1701 * while acc_mode and cred are from the VOP_ACCESS parameter list
1702 */
1703int
1704vaccess(file_mode, uid, gid, acc_mode, cred)
1705	mode_t file_mode;
1706	uid_t uid;
1707	gid_t gid;
1708	mode_t acc_mode;
1709	struct ucred *cred;
1710{
1711	mode_t mask;
1712
1713	/* User id 0 always gets access. */
1714	if (cred->cr_uid == 0)
1715		return 0;
1716
1717	mask = 0;
1718
1719	/* Otherwise, check the owner. */
1720	if (cred->cr_uid == uid) {
1721		if (acc_mode & VEXEC)
1722			mask |= S_IXUSR;
1723		if (acc_mode & VREAD)
1724			mask |= S_IRUSR;
1725		if (acc_mode & VWRITE)
1726			mask |= S_IWUSR;
1727		return (file_mode & mask) == mask ? 0 : EACCES;
1728	}
1729
1730	/* Otherwise, check the groups. */
1731	if (cred->cr_gid == gid || groupmember(gid, cred)) {
1732		if (acc_mode & VEXEC)
1733			mask |= S_IXGRP;
1734		if (acc_mode & VREAD)
1735			mask |= S_IRGRP;
1736		if (acc_mode & VWRITE)
1737			mask |= S_IWGRP;
1738		return (file_mode & mask) == mask ? 0 : EACCES;
1739	}
1740
1741	/* Otherwise, check everyone else. */
1742	if (acc_mode & VEXEC)
1743		mask |= S_IXOTH;
1744	if (acc_mode & VREAD)
1745		mask |= S_IROTH;
1746	if (acc_mode & VWRITE)
1747		mask |= S_IWOTH;
1748	return (file_mode & mask) == mask ? 0 : EACCES;
1749}
1750
1751/*
1752 * Unmount all file systems.
1753 * We traverse the list in reverse order under the assumption that doing so
1754 * will avoid needing to worry about dependencies.
1755 */
1756void
1757vfs_unmountall(void)
1758{
1759	struct mount *mp, *nmp;
1760	int allerror, error, again = 1;
1761	struct proc *p = curproc;
1762
1763 retry:
1764	allerror = 0;
1765	for (mp = CIRCLEQ_LAST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1766	    mp = nmp) {
1767		nmp = CIRCLEQ_PREV(mp, mnt_list);
1768		if ((vfs_busy(mp, LK_EXCLUSIVE|LK_NOWAIT, NULL, p)) != 0)
1769			continue;
1770		if ((error = dounmount(mp, MNT_FORCE, curproc, NULL)) != 0) {
1771			printf("unmount of %s failed with error %d\n",
1772			    mp->mnt_stat.f_mntonname, error);
1773			allerror = 1;
1774		}
1775	}
1776
1777	if (allerror) {
1778		printf("WARNING: some file systems would not unmount\n");
1779		if (again) {
1780			printf("retrying\n");
1781			again = 0;
1782			goto retry;
1783		}
1784	}
1785}
1786
1787/*
1788 * Sync and unmount file systems before shutting down.
1789 */
1790void
1791vfs_shutdown()
1792{
1793	extern void acct_shutdown(void);
1794
1795	/* XXX Should suspend scheduling. */
1796	(void) spl0();
1797
1798	printf("syncing disks... ");
1799
1800	acct_shutdown();
1801
1802	if (panicstr == 0) {
1803		/* Sync before unmount, in case we hang on something. */
1804		sys_sync(&proc0, (void *)0, (register_t *)0);
1805
1806		/* Unmount file systems. */
1807		vfs_unmountall();
1808	}
1809
1810	if (vfs_syncwait(1))
1811		printf("giving up\n");
1812	else
1813		printf("done\n");
1814}
1815
1816/*
1817 * perform sync() operation and wait for buffers to flush.
1818 * assumtions: called w/ scheduler disabled and physical io enabled
1819 * for now called at spl0() XXX
1820 */
1821int
1822vfs_syncwait(verbose)
1823	int verbose;
1824{
1825	register struct buf *bp;
1826	int iter, nbusy, dcount, s;
1827	struct proc *p;
1828
1829	p = curproc? curproc : &proc0;
1830	sys_sync(p, (void *)0, (register_t *)0);
1831
1832	/* Wait for sync to finish. */
1833	dcount = 10000;
1834	for (iter = 0; iter < 20; iter++) {
1835		nbusy = 0;
1836		for (bp = &buf[nbuf]; --bp >= buf; ) {
1837			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1838				nbusy++;
1839			/*
1840			 * With soft updates, some buffers that are
1841			 * written will be remarked as dirty until other
1842			 * buffers are written.
1843			 */
1844			if (bp->b_flags & B_DELWRI) {
1845				s = splbio();
1846				bremfree(bp);
1847				bp->b_flags |= B_BUSY;
1848				splx(s);
1849				nbusy++;
1850				bawrite(bp);
1851				if (dcount-- <= 0) {
1852					if (verbose)
1853						printf("softdep ");
1854					return 1;
1855				}
1856			}
1857		}
1858		if (nbusy == 0)
1859			break;
1860		if (verbose)
1861			printf("%d ", nbusy);
1862		DELAY(40000 * iter);
1863	}
1864
1865	return nbusy;
1866}
1867
1868/*
1869 * posix file system related system variables.
1870 */
1871int
1872fs_posix_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1873	int *name;
1874	u_int namelen;
1875	void *oldp;
1876	size_t *oldlenp;
1877	void *newp;
1878	size_t newlen;
1879	struct proc *p;
1880{
1881	/* all sysctl names at this level are terminal */
1882	if (namelen != 1)
1883		return (ENOTDIR);
1884
1885	switch (name[0]) {
1886	case FS_POSIX_SETUID:
1887		if (newp && securelevel > 0)
1888			return (EPERM);
1889		return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear));
1890	default:
1891		return (EOPNOTSUPP);
1892	}
1893	/* NOTREACHED */
1894}
1895
1896/*
1897 * file system related system variables.
1898 */
1899int
1900fs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1901	int *name;
1902	u_int namelen;
1903	void *oldp;
1904	size_t *oldlenp;
1905	void *newp;
1906	size_t newlen;
1907	struct proc *p;
1908{
1909	sysctlfn *fn;
1910
1911	switch (name[0]) {
1912	case FS_POSIX:
1913		fn = fs_posix_sysctl;
1914		break;
1915	default:
1916		return (EOPNOTSUPP);
1917	}
1918	return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p);
1919}
1920
1921
1922/*
1923 * Routines dealing with vnodes and buffers
1924 */
1925
1926/*
1927 * Wait for all outstanding I/Os to complete
1928 *
1929 * Manipulates v_numoutput. Must be called at splbio()
1930 */
1931int
1932vwaitforio(vp, slpflag, wmesg, timeo)
1933	struct vnode *vp;
1934	int slpflag, timeo;
1935	char *wmesg;
1936{
1937	int error = 0;
1938
1939	splassert(IPL_BIO);
1940
1941	while (vp->v_numoutput) {
1942		vp->v_bioflag |= VBIOWAIT;
1943		error = tsleep(&vp->v_numoutput,
1944		    slpflag | (PRIBIO + 1), wmesg, timeo);
1945		if (error)
1946			break;
1947	}
1948
1949	return (error);
1950}
1951
1952
1953/*
1954 * Update outstanding I/O count and do wakeup if requested.
1955 *
1956 * Manipulates v_numoutput. Must be called at splbio()
1957 */
1958void
1959vwakeup(vp)
1960	struct vnode *vp;
1961{
1962	splassert(IPL_BIO);
1963
1964	if (vp != NULL) {
1965		if (vp->v_numoutput-- == 0)
1966			panic("vwakeup: neg numoutput");
1967		if ((vp->v_bioflag & VBIOWAIT) && vp->v_numoutput == 0) {
1968			vp->v_bioflag &= ~VBIOWAIT;
1969			wakeup(&vp->v_numoutput);
1970		}
1971	}
1972}
1973
1974/*
1975 * Flush out and invalidate all buffers associated with a vnode.
1976 * Called with the underlying object locked.
1977 */
1978int
1979vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
1980	register struct vnode *vp;
1981	int flags;
1982	struct ucred *cred;
1983	struct proc *p;
1984	int slpflag, slptimeo;
1985{
1986	register struct buf *bp;
1987	struct buf *nbp, *blist;
1988	int s, error;
1989
1990	if (flags & V_SAVE) {
1991		s = splbio();
1992		vwaitforio(vp, 0, "vinvalbuf", 0);
1993		if (vp->v_dirtyblkhd.lh_first != NULL) {
1994			splx(s);
1995			if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
1996				return (error);
1997			s = splbio();
1998			if (vp->v_numoutput > 0 ||
1999			    vp->v_dirtyblkhd.lh_first != NULL)
2000				panic("vinvalbuf: dirty bufs");
2001		}
2002		splx(s);
2003	}
2004loop:
2005	s = splbio();
2006	for (;;) {
2007		if ((blist = vp->v_cleanblkhd.lh_first) &&
2008		    (flags & V_SAVEMETA))
2009			while (blist && blist->b_lblkno < 0)
2010				blist = blist->b_vnbufs.le_next;
2011		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
2012		    (flags & V_SAVEMETA))
2013			while (blist && blist->b_lblkno < 0)
2014				blist = blist->b_vnbufs.le_next;
2015		if (!blist)
2016			break;
2017
2018		for (bp = blist; bp; bp = nbp) {
2019			nbp = bp->b_vnbufs.le_next;
2020			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
2021				continue;
2022			if (bp->b_flags & B_BUSY) {
2023				bp->b_flags |= B_WANTED;
2024				error = tsleep(bp, slpflag | (PRIBIO + 1),
2025				    "vinvalbuf", slptimeo);
2026				if (error) {
2027					splx(s);
2028					return (error);
2029				}
2030				break;
2031			}
2032			bremfree(bp);
2033			bp->b_flags |= B_BUSY;
2034			/*
2035			 * XXX Since there are no node locks for NFS, I believe
2036			 * there is a slight chance that a delayed write will
2037			 * occur while sleeping just above, so check for it.
2038			 */
2039			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
2040				splx(s);
2041				(void) VOP_BWRITE(bp);
2042				goto loop;
2043			}
2044			bp->b_flags |= B_INVAL;
2045			brelse(bp);
2046		}
2047	}
2048	if (!(flags & V_SAVEMETA) &&
2049	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
2050		panic("vinvalbuf: flush failed");
2051	splx(s);
2052	return (0);
2053}
2054
2055void
2056vflushbuf(vp, sync)
2057	register struct vnode *vp;
2058	int sync;
2059{
2060	register struct buf *bp, *nbp;
2061	int s;
2062
2063loop:
2064	s = splbio();
2065	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
2066		nbp = bp->b_vnbufs.le_next;
2067		if ((bp->b_flags & B_BUSY))
2068			continue;
2069		if ((bp->b_flags & B_DELWRI) == 0)
2070			panic("vflushbuf: not dirty");
2071		bremfree(bp);
2072		bp->b_flags |= B_BUSY;
2073		splx(s);
2074		/*
2075		 * Wait for I/O associated with indirect blocks to complete,
2076		 * since there is no way to quickly wait for them below.
2077		 */
2078		if (bp->b_vp == vp || sync == 0)
2079			(void) bawrite(bp);
2080		else
2081			(void) bwrite(bp);
2082		goto loop;
2083	}
2084	if (sync == 0) {
2085		splx(s);
2086		return;
2087	}
2088	vwaitforio(vp, 0, "vflushbuf", 0);
2089	if (vp->v_dirtyblkhd.lh_first != NULL) {
2090		splx(s);
2091		vprint("vflushbuf: dirty", vp);
2092		goto loop;
2093	}
2094	splx(s);
2095}
2096
2097/*
2098 * Associate a buffer with a vnode.
2099 *
2100 * Manipulates buffer vnode queues. Must be called at splbio().
2101 */
2102void
2103bgetvp(vp, bp)
2104	register struct vnode *vp;
2105	register struct buf *bp;
2106{
2107	splassert(IPL_BIO);
2108
2109
2110	if (bp->b_vp)
2111		panic("bgetvp: not free");
2112	vhold(vp);
2113	bp->b_vp = vp;
2114	if (vp->v_type == VBLK || vp->v_type == VCHR)
2115		bp->b_dev = vp->v_rdev;
2116	else
2117		bp->b_dev = NODEV;
2118	/*
2119	 * Insert onto list for new vnode.
2120	 */
2121	bufinsvn(bp, &vp->v_cleanblkhd);
2122}
2123
2124/*
2125 * Disassociate a buffer from a vnode.
2126 *
2127 * Manipulates vnode buffer queues. Must be called at splbio().
2128 */
2129void
2130brelvp(bp)
2131	register struct buf *bp;
2132{
2133	struct vnode *vp;
2134
2135	splassert(IPL_BIO);
2136
2137	if ((vp = bp->b_vp) == (struct vnode *) 0)
2138		panic("brelvp: NULL");
2139	/*
2140	 * Delete from old vnode list, if on one.
2141	 */
2142	if (bp->b_vnbufs.le_next != NOLIST)
2143		bufremvn(bp);
2144	if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2145	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2146		vp->v_bioflag &= ~VBIOONSYNCLIST;
2147		LIST_REMOVE(vp, v_synclist);
2148	}
2149	bp->b_vp = (struct vnode *) 0;
2150
2151	simple_lock(&vp->v_interlock);
2152#ifdef DIAGNOSTIC
2153	if (vp->v_holdcnt == 0)
2154		panic("brelvp: holdcnt");
2155#endif
2156	vp->v_holdcnt--;
2157
2158	/*
2159	 * If it is on the holdlist and the hold count drops to
2160	 * zero, move it to the free list.
2161	 */
2162	if ((vp->v_bioflag & VBIOONFREELIST) &&
2163	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
2164		simple_lock(&vnode_free_list_slock);
2165		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
2166		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2167		simple_unlock(&vnode_free_list_slock);
2168	}
2169	simple_unlock(&vp->v_interlock);
2170}
2171
2172/*
2173 * Replaces the current vnode associated with the buffer, if any
2174 * with a new vnode.
2175 *
2176 * If an output I/O is pending on the buffer, the old vnode is
2177 * I/O count is adjusted.
2178 *
2179 * Ignores vnode buffer queues. Must be called at splbio().
2180 */
2181void
2182buf_replacevnode(bp, newvp)
2183	struct buf *bp;
2184	struct vnode *newvp;
2185{
2186	struct vnode *oldvp = bp->b_vp;
2187
2188	splassert(IPL_BIO);
2189
2190	if (oldvp)
2191		brelvp(bp);
2192
2193	if ((bp->b_flags & (B_READ | B_DONE)) == 0) {
2194		newvp->v_numoutput++;	/* put it on swapdev */
2195		vwakeup(oldvp);
2196	}
2197
2198	bgetvp(newvp, bp);
2199	bufremvn(bp);
2200}
2201
2202/*
2203 * Used to assign buffers to the appropriate clean or dirty list on
2204 * the vnode and to add newly dirty vnodes to the appropriate
2205 * filesystem syncer list.
2206 *
2207 * Manipulates vnode buffer queues. Must be called at splbio().
2208 */
2209void
2210reassignbuf(bp)
2211	struct buf *bp;
2212{
2213	struct buflists *listheadp;
2214	int delay;
2215	struct vnode *vp = bp->b_vp;
2216
2217	splassert(IPL_BIO);
2218
2219	/*
2220	 * Delete from old vnode list, if on one.
2221	 */
2222	if (bp->b_vnbufs.le_next != NOLIST)
2223		bufremvn(bp);
2224	/*
2225	 * If dirty, put on list of dirty buffers;
2226	 * otherwise insert onto list of clean buffers.
2227	 */
2228	if ((bp->b_flags & B_DELWRI) == 0) {
2229		listheadp = &vp->v_cleanblkhd;
2230		if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2231		    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2232			vp->v_bioflag &= ~VBIOONSYNCLIST;
2233			LIST_REMOVE(vp, v_synclist);
2234		}
2235	} else {
2236		listheadp = &vp->v_dirtyblkhd;
2237		if ((vp->v_bioflag & VBIOONSYNCLIST) == 0) {
2238			switch (vp->v_type) {
2239			case VDIR:
2240				delay = syncdelay / 2;
2241				break;
2242			case VBLK:
2243				if (vp->v_specmountpoint != NULL) {
2244					delay = syncdelay / 3;
2245					break;
2246				}
2247				/* fall through */
2248			default:
2249				delay = syncdelay;
2250			}
2251			vn_syncer_add_to_worklist(vp, delay);
2252		}
2253	}
2254	bufinsvn(bp, listheadp);
2255}
2256
2257int
2258vfs_register(vfs)
2259	struct vfsconf *vfs;
2260{
2261	struct vfsconf *vfsp;
2262	struct vfsconf **vfspp;
2263
2264#ifdef DIAGNOSTIC
2265	/* Paranoia? */
2266	if (vfs->vfc_refcount != 0)
2267		printf("vfs_register called with vfc_refcount > 0\n");
2268#endif
2269
2270	/* Check if filesystem already known */
2271	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2272	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next)
2273		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2274			return (EEXIST);
2275
2276	if (vfs->vfc_typenum > maxvfsconf)
2277		maxvfsconf = vfs->vfc_typenum;
2278
2279	vfs->vfc_next = NULL;
2280
2281	/* Add to the end of the list */
2282	*vfspp = vfs;
2283
2284	/* Call vfs_init() */
2285	if (vfs->vfc_vfsops->vfs_init)
2286		(*(vfs->vfc_vfsops->vfs_init))(vfs);
2287
2288	return 0;
2289}
2290
2291int
2292vfs_unregister(vfs)
2293	struct vfsconf *vfs;
2294{
2295	struct vfsconf *vfsp;
2296	struct vfsconf **vfspp;
2297	int maxtypenum;
2298
2299	/* Find our vfsconf struct */
2300	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2301	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next) {
2302		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2303			break;
2304	}
2305
2306	if (!vfsp)			/* Not found */
2307		return (ENOENT);
2308
2309	if (vfsp->vfc_refcount)		/* In use */
2310		return (EBUSY);
2311
2312	/* Remove from list and free */
2313	*vfspp = vfsp->vfc_next;
2314
2315	maxtypenum = 0;
2316
2317	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2318		if (vfsp->vfc_typenum > maxtypenum)
2319			maxtypenum = vfsp->vfc_typenum;
2320
2321	maxvfsconf = maxtypenum;
2322	return 0;
2323}
2324
2325/*
2326 * Check if vnode represents a disk device
2327 */
2328int
2329vn_isdisk(vp, errp)
2330	struct vnode *vp;
2331	int *errp;
2332{
2333	if (vp->v_type != VBLK && vp->v_type != VCHR)
2334		return (0);
2335
2336	return (1);
2337}
2338