vfs_subr.c revision 1.111
1/*	$OpenBSD: vfs_subr.c,v 1.111 2005/05/01 12:28:18 pedro Exp $	*/
2/*	$NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $	*/
3
4/*
5 * Copyright (c) 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
38 */
39
40/*
41 * External virtual filesystem routines
42 */
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/proc.h>
47#include <sys/mount.h>
48#include <sys/time.h>
49#include <sys/fcntl.h>
50#include <sys/kernel.h>
51#include <sys/vnode.h>
52#include <sys/stat.h>
53#include <sys/namei.h>
54#include <sys/ucred.h>
55#include <sys/buf.h>
56#include <sys/errno.h>
57#include <sys/malloc.h>
58#include <sys/domain.h>
59#include <sys/mbuf.h>
60#include <sys/syscallargs.h>
61#include <sys/pool.h>
62
63#include <uvm/uvm_extern.h>
64#include <sys/sysctl.h>
65
66#include <miscfs/specfs/specdev.h>
67
68enum vtype iftovt_tab[16] = {
69	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
70	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
71};
72int	vttoif_tab[9] = {
73	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
74	S_IFSOCK, S_IFIFO, S_IFMT,
75};
76
77int doforce = 1;		/* 1 => permit forcible unmounting */
78int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
79int suid_clear = 1;		/* 1 => clear SUID / SGID on owner change */
80
81/*
82 * Insq/Remq for the vnode usage lists.
83 */
84#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
85#define	bufremvn(bp) {							\
86	LIST_REMOVE(bp, b_vnbufs);					\
87	LIST_NEXT(bp, b_vnbufs) = NOLIST;				\
88}
89
90struct freelst vnode_hold_list;	/* list of vnodes referencing buffers */
91struct freelst vnode_free_list;	/* vnode free list */
92
93struct mntlist mountlist;	/* mounted filesystem list */
94struct simplelock mountlist_slock;
95static struct simplelock mntid_slock;
96struct simplelock mntvnode_slock;
97struct simplelock vnode_free_list_slock;
98struct simplelock spechash_slock;
99
100void	vclean(struct vnode *, int, struct proc *);
101
102void insmntque(struct vnode *, struct mount *);
103int getdevvp(dev_t, struct vnode **, enum vtype);
104
105int vfs_hang_addrlist(struct mount *, struct netexport *,
106				  struct export_args *);
107int vfs_free_netcred(struct radix_node *, void *);
108void vfs_free_addrlist(struct netexport *);
109void vputonfreelist(struct vnode *);
110
111int vflush_vnode(struct vnode *, void *);
112
113#ifdef DEBUG
114void printlockedvnodes(void);
115#endif
116
117#define VN_KNOTE(vp, b) \
118	KNOTE((struct klist *)&vp->v_selectinfo.vsi_selinfo.si_note, (b))
119
120struct pool vnode_pool;
121
122/*
123 * Initialize the vnode management data structures.
124 */
125void
126vntblinit()
127{
128
129	pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodes",
130	    &pool_allocator_nointr);
131	simple_lock_init(&mntvnode_slock);
132	simple_lock_init(&mntid_slock);
133	simple_lock_init(&spechash_slock);
134	TAILQ_INIT(&vnode_hold_list);
135	TAILQ_INIT(&vnode_free_list);
136	simple_lock_init(&vnode_free_list_slock);
137	CIRCLEQ_INIT(&mountlist);
138	simple_lock_init(&mountlist_slock);
139	/*
140	 * Initialize the filesystem syncer.
141	 */
142	vn_initialize_syncerd();
143}
144
145/*
146 * Mark a mount point as busy. Used to synchronize access and to delay
147 * unmounting. Interlock is not released on failure.
148 *
149 * historical behavior:
150 *  - LK_NOWAIT means that we should just ignore the mount point if it's
151 *     being unmounted.
152 *  - no flags means that we should sleep on the mountpoint and then
153 *     fail.
154 */
155int
156vfs_busy(struct mount *mp, int flags, struct simplelock *interlkp,
157    struct proc *p)
158{
159	int lkflags;
160
161	switch (flags) {
162	case LK_NOWAIT:
163		lkflags = LK_SHARED|LK_NOWAIT;
164		break;
165	case 0:
166		lkflags = LK_SHARED;
167		break;
168	default:
169		lkflags = flags;
170	}
171
172	/*
173	 * Always sleepfail. We will only sleep for an exclusive lock
174	 * and the exclusive lock will only be acquired when unmounting.
175	 */
176	lkflags |= LK_SLEEPFAIL;
177
178	if (interlkp)
179		lkflags |= LK_INTERLOCK;
180	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
181		return (ENOENT);
182	return (0);
183}
184
185
186/*
187 * Free a busy file system
188 */
189void
190vfs_unbusy(struct mount *mp, struct proc *p)
191{
192	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
193}
194
195int
196vfs_isbusy(struct mount *mp)
197{
198	return (lockstatus(&mp->mnt_lock));
199}
200
201/*
202 * Lookup a filesystem type, and if found allocate and initialize
203 * a mount structure for it.
204 *
205 * Devname is usually updated by mount(8) after booting.
206 */
207int
208vfs_rootmountalloc(fstypename, devname, mpp)
209	char *fstypename;
210	char *devname;
211	struct mount **mpp;
212{
213	struct proc *p = curproc;	/* XXX */
214	struct vfsconf *vfsp;
215	struct mount *mp;
216
217	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
218		if (!strcmp(vfsp->vfc_name, fstypename))
219			break;
220	if (vfsp == NULL)
221		return (ENODEV);
222	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
223	bzero((char *)mp, (u_long)sizeof(struct mount));
224	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
225	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
226	LIST_INIT(&mp->mnt_vnodelist);
227	mp->mnt_vfc = vfsp;
228	mp->mnt_op = vfsp->vfc_vfsops;
229	mp->mnt_flag = MNT_RDONLY;
230	mp->mnt_vnodecovered = NULLVP;
231	vfsp->vfc_refcount++;
232	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
233	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
234	mp->mnt_stat.f_mntonname[0] = '/';
235	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
236	*mpp = mp;
237 	return (0);
238 }
239
240/*
241 * Find an appropriate filesystem to use for the root. If a filesystem
242 * has not been preselected, walk through the list of known filesystems
243 * trying those that have mountroot routines, and try them until one
244 * works or we have tried them all.
245 */
246int
247vfs_mountroot()
248{
249	struct vfsconf *vfsp;
250	int error;
251
252	if (mountroot != NULL)
253		return ((*mountroot)());
254	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
255		if (vfsp->vfc_mountroot == NULL)
256			continue;
257		if ((error = (*vfsp->vfc_mountroot)()) == 0)
258			return (0);
259		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
260 	}
261	return (ENODEV);
262}
263
264/*
265 * Lookup a mount point by filesystem identifier.
266 */
267struct mount *
268vfs_getvfs(fsid)
269	fsid_t *fsid;
270{
271	register struct mount *mp;
272
273	simple_lock(&mountlist_slock);
274	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
275		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
276		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
277			simple_unlock(&mountlist_slock);
278			return (mp);
279		}
280	}
281	simple_unlock(&mountlist_slock);
282	return ((struct mount *)0);
283}
284
285
286/*
287 * Get a new unique fsid
288 */
289void
290vfs_getnewfsid(mp)
291	struct mount *mp;
292{
293	static u_short xxxfs_mntid;
294
295	fsid_t tfsid;
296	int mtype;
297
298	simple_lock(&mntid_slock);
299	mtype = mp->mnt_vfc->vfc_typenum;
300	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
301	mp->mnt_stat.f_fsid.val[1] = mtype;
302	if (xxxfs_mntid == 0)
303		++xxxfs_mntid;
304	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
305	tfsid.val[1] = mtype;
306	if (!CIRCLEQ_EMPTY(&mountlist)) {
307		while (vfs_getvfs(&tfsid)) {
308			tfsid.val[0]++;
309			xxxfs_mntid++;
310		}
311	}
312	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
313	simple_unlock(&mntid_slock);
314}
315
316/*
317 * Make a 'unique' number from a mount type name.
318 * Note that this is no longer used for ffs which
319 * now has an on-disk filesystem id.
320 */
321long
322makefstype(type)
323	char *type;
324{
325	long rv;
326
327	for (rv = 0; *type; type++) {
328		rv <<= 2;
329		rv ^= *type;
330	}
331	return rv;
332}
333
334/*
335 * Set vnode attributes to VNOVAL
336 */
337void
338vattr_null(vap)
339	register struct vattr *vap;
340{
341
342	vap->va_type = VNON;
343	/* XXX These next two used to be one line, but for a GCC bug. */
344	vap->va_size = VNOVAL;
345	vap->va_bytes = VNOVAL;
346	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
347		vap->va_fsid = vap->va_fileid =
348		vap->va_blocksize = vap->va_rdev =
349		vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
350		vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
351		vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
352		vap->va_flags = vap->va_gen = VNOVAL;
353	vap->va_vaflags = 0;
354}
355
356/*
357 * Routines having to do with the management of the vnode table.
358 */
359extern int (**dead_vnodeop_p)(void *);
360long numvnodes;
361
362/*
363 * Return the next vnode from the free list.
364 */
365int
366getnewvnode(tag, mp, vops, vpp)
367	enum vtagtype tag;
368	struct mount *mp;
369	int (**vops)(void *);
370	struct vnode **vpp;
371{
372	struct proc *p = curproc;			/* XXX */
373	struct freelst *listhd;
374	static int toggle;
375	struct vnode *vp;
376	int s;
377
378	/*
379	 * We must choose whether to allocate a new vnode or recycle an
380	 * existing one. The criterion for allocating a new one is that
381	 * the total number of vnodes is less than the number desired or
382	 * there are no vnodes on either free list. Generally we only
383	 * want to recycle vnodes that have no buffers associated with
384	 * them, so we look first on the vnode_free_list. If it is empty,
385	 * we next consider vnodes with referencing buffers on the
386	 * vnode_hold_list. The toggle ensures that half the time we
387	 * will use a buffer from the vnode_hold_list, and half the time
388	 * we will allocate a new one unless the list has grown to twice
389	 * the desired size. We are reticent to recycle vnodes from the
390	 * vnode_hold_list because we will lose the identity of all its
391	 * referencing buffers.
392	 */
393	toggle ^= 1;
394	if (numvnodes > 2 * desiredvnodes)
395		toggle = 0;
396
397	simple_lock(&vnode_free_list_slock);
398	s = splbio();
399	if ((numvnodes < desiredvnodes) ||
400	    ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) &&
401	    ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) {
402		splx(s);
403		simple_unlock(&vnode_free_list_slock);
404		vp = pool_get(&vnode_pool, PR_WAITOK);
405		bzero((char *)vp, sizeof *vp);
406		simple_lock_init(&vp->v_interlock);
407		numvnodes++;
408	} else {
409		for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
410		    vp = TAILQ_NEXT(vp, v_freelist)) {
411			if (simple_lock_try(&vp->v_interlock)) {
412				if ((vp->v_flag & VLAYER) == 0)
413					break;
414				if (VOP_ISLOCKED(vp) == 0)
415					break;
416				else
417					simple_unlock(&vp->v_interlock);
418			}
419		}
420		/*
421		 * Unless this is a bad time of the month, at most
422		 * the first NCPUS items on the free list are
423		 * locked, so this is close enough to being empty.
424		 */
425		if (vp == NULL) {
426			splx(s);
427			simple_unlock(&vnode_free_list_slock);
428			tablefull("vnode");
429			*vpp = 0;
430			return (ENFILE);
431		}
432		if (vp->v_usecount) {
433			vprint("free vnode", vp);
434			panic("free vnode isn't");
435		}
436
437		TAILQ_REMOVE(listhd, vp, v_freelist);
438		vp->v_bioflag &= ~VBIOONFREELIST;
439		splx(s);
440
441		simple_unlock(&vnode_free_list_slock);
442		if (vp->v_type != VBAD)
443			vgonel(vp, p);
444		else
445			simple_unlock(&vp->v_interlock);
446#ifdef DIAGNOSTIC
447		if (vp->v_data) {
448			vprint("cleaned vnode", vp);
449			panic("cleaned vnode isn't");
450		}
451		s = splbio();
452		if (vp->v_numoutput)
453			panic("Clean vnode has pending I/O's");
454		splx(s);
455#endif
456		vp->v_flag = 0;
457		vp->v_socket = 0;
458	}
459	vp->v_type = VNON;
460	cache_purge(vp);
461	vp->v_vnlock = NULL;
462	lockinit(&vp->v_lock, PVFS, "v_lock", 0, 0);
463	vp->v_tag = tag;
464	vp->v_op = vops;
465	insmntque(vp, mp);
466	*vpp = vp;
467	vp->v_usecount = 1;
468	vp->v_data = 0;
469	simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
470	return (0);
471}
472
473/*
474 * Move a vnode from one mount queue to another.
475 */
476void
477insmntque(vp, mp)
478	register struct vnode *vp;
479	register struct mount *mp;
480{
481	simple_lock(&mntvnode_slock);
482
483	/*
484	 * Delete from old mount point vnode list, if on one.
485	 */
486	if (vp->v_mount != NULL)
487		LIST_REMOVE(vp, v_mntvnodes);
488	/*
489	 * Insert into list of vnodes for the new mount point, if available.
490	 */
491	if ((vp->v_mount = mp) != NULL)
492		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
493
494	simple_unlock(&mntvnode_slock);
495}
496
497
498/*
499 * Create a vnode for a block device.
500 * Used for root filesystem, argdev, and swap areas.
501 * Also used for memory file system special devices.
502 */
503int
504bdevvp(dev, vpp)
505	dev_t dev;
506	struct vnode **vpp;
507{
508
509	return (getdevvp(dev, vpp, VBLK));
510}
511
512/*
513 * Create a vnode for a character device.
514 * Used for kernfs and some console handling.
515 */
516int
517cdevvp(dev, vpp)
518	dev_t dev;
519	struct vnode **vpp;
520{
521
522	return (getdevvp(dev, vpp, VCHR));
523}
524
525/*
526 * Create a vnode for a device.
527 * Used by bdevvp (block device) for root file system etc.,
528 * and by cdevvp (character device) for console and kernfs.
529 */
530int
531getdevvp(dev, vpp, type)
532	dev_t dev;
533	struct vnode **vpp;
534	enum vtype type;
535{
536	register struct vnode *vp;
537	struct vnode *nvp;
538	int error;
539
540	if (dev == NODEV) {
541		*vpp = NULLVP;
542		return (0);
543	}
544	error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
545	if (error) {
546		*vpp = NULLVP;
547		return (error);
548	}
549	vp = nvp;
550	vp->v_type = type;
551	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
552		vput(vp);
553		vp = nvp;
554	}
555	*vpp = vp;
556	return (0);
557}
558
559/*
560 * Check to see if the new vnode represents a special device
561 * for which we already have a vnode (either because of
562 * bdevvp() or because of a different vnode representing
563 * the same block device). If such an alias exists, deallocate
564 * the existing contents and return the aliased vnode. The
565 * caller is responsible for filling it with its new contents.
566 */
567struct vnode *
568checkalias(nvp, nvp_rdev, mp)
569	register struct vnode *nvp;
570	dev_t nvp_rdev;
571	struct mount *mp;
572{
573	struct proc *p = curproc;
574	register struct vnode *vp;
575	struct vnode **vpp;
576
577	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
578		return (NULLVP);
579
580	vpp = &speclisth[SPECHASH(nvp_rdev)];
581loop:
582	simple_lock(&spechash_slock);
583	for (vp = *vpp; vp; vp = vp->v_specnext) {
584		simple_lock(&vp->v_interlock);
585		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) {
586			simple_unlock(&vp->v_interlock);
587			continue;
588		}
589		/*
590		 * Alias, but not in use, so flush it out.
591		 */
592		if (vp->v_usecount == 0) {
593			simple_unlock(&spechash_slock);
594			vgonel(vp, p);
595			goto loop;
596		}
597		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
598			simple_unlock(&spechash_slock);
599			goto loop;
600		}
601		break;
602	}
603
604	/*
605	 * Common case is actually in the if statement
606	 */
607	if (vp == NULL || !(vp->v_tag == VT_NON && vp->v_type == VBLK)) {
608		MALLOC(nvp->v_specinfo, struct specinfo *,
609			sizeof(struct specinfo), M_VNODE, M_WAITOK);
610		nvp->v_rdev = nvp_rdev;
611		nvp->v_hashchain = vpp;
612		nvp->v_specnext = *vpp;
613		nvp->v_specmountpoint = NULL;
614		nvp->v_speclockf = NULL;
615		simple_unlock(&spechash_slock);
616		*vpp = nvp;
617		if (vp != NULLVP) {
618			nvp->v_flag |= VALIASED;
619			vp->v_flag |= VALIASED;
620			vput(vp);
621		}
622		return (NULLVP);
623	}
624
625	/*
626	 * This code is the uncommon case. It is called in case
627	 * we found an alias that was VT_NON && vtype of VBLK
628	 * This means we found a block device that was created
629	 * using bdevvp.
630	 * An example of such a vnode is the root partition device vnode
631	 * created in ffs_mountroot.
632	 *
633	 * The vnodes created by bdevvp should not be aliased (why?).
634	 */
635
636	simple_unlock(&spechash_slock);
637	VOP_UNLOCK(vp, 0, p);
638	simple_lock(&vp->v_interlock);
639	vclean(vp, 0, p);
640	vp->v_vnlock = NULL;
641	lockinit(&vp->v_lock, PVFS, "v_lock", 0, 0);
642	vp->v_op = nvp->v_op;
643	vp->v_tag = nvp->v_tag;
644	nvp->v_type = VNON;
645	insmntque(vp, mp);
646	return (vp);
647}
648
649/*
650 * Grab a particular vnode from the free list, increment its
651 * reference count and lock it. If the vnode lock bit is set,
652 * the vnode is being eliminated in vgone. In that case, we
653 * cannot grab it, so the process is awakened when the
654 * transition is completed, and an error code is returned to
655 * indicate that the vnode is no longer usable, possibly
656 * having been changed to a new file system type.
657 */
658int
659vget(vp, flags, p)
660	struct vnode *vp;
661	int flags;
662	struct proc *p;
663{
664	int error, s, onfreelist;
665
666	/*
667	 * If the vnode is in the process of being cleaned out for
668	 * another use, we wait for the cleaning to finish and then
669	 * return failure. Cleaning is determined by checking that
670	 * the VXLOCK flag is set.
671	 */
672	if ((flags & LK_INTERLOCK) == 0) {
673		simple_lock(&vp->v_interlock);
674		flags |= LK_INTERLOCK;
675	}
676
677	if (vp->v_flag & VXLOCK) {
678		if (flags & LK_NOWAIT) {
679			simple_unlock(&vp->v_interlock);
680			return (EBUSY);
681		}
682
683 		vp->v_flag |= VXWANT;
684		ltsleep(vp, PINOD | PNORELOCK, "vget", 0, &vp->v_interlock);
685		return (ENOENT);
686 	}
687
688	onfreelist = vp->v_bioflag & VBIOONFREELIST;
689	if (vp->v_usecount == 0 && onfreelist) {
690		s = splbio();
691		simple_lock(&vnode_free_list_slock);
692		if (vp->v_holdcnt > 0)
693			TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
694		else
695			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
696		simple_unlock(&vnode_free_list_slock);
697		vp->v_bioflag &= ~VBIOONFREELIST;
698		splx(s);
699	}
700
701 	vp->v_usecount++;
702	if (flags & LK_TYPE_MASK) {
703		if ((error = vn_lock(vp, flags, p)) != 0) {
704			vp->v_usecount--;
705			if (vp->v_usecount == 0 && onfreelist)
706				vputonfreelist(vp);
707
708			simple_unlock(&vp->v_interlock);
709		}
710		return (error);
711	}
712
713	simple_unlock(&vp->v_interlock);
714
715	return (0);
716}
717
718
719#ifdef DIAGNOSTIC
720/*
721 * Vnode reference.
722 */
723void
724vref(vp)
725	struct vnode *vp;
726{
727	simple_lock(&vp->v_interlock);
728	if (vp->v_usecount == 0)
729		panic("vref used where vget required");
730	vp->v_usecount++;
731	simple_unlock(&vp->v_interlock);
732}
733#endif /* DIAGNOSTIC */
734
735void
736vputonfreelist(struct vnode *vp)
737{
738	int s;
739	struct freelst *lst;
740
741	s = splbio();
742#ifdef DIAGNOSTIC
743	if (vp->v_usecount != 0)
744		panic("Use count is not zero!");
745
746	if (vp->v_bioflag & VBIOONFREELIST) {
747		vprint("vnode already on free list: ", vp);
748		panic("vnode already on free list");
749	}
750#endif
751
752	vp->v_bioflag |= VBIOONFREELIST;
753
754	if (vp->v_holdcnt > 0)
755		lst = &vnode_hold_list;
756	else
757		lst = &vnode_free_list;
758
759	if (vp->v_type == VBAD)
760		TAILQ_INSERT_HEAD(lst, vp, v_freelist);
761	else
762		TAILQ_INSERT_TAIL(lst, vp, v_freelist);
763
764	splx(s);
765}
766
767/*
768 * vput(), just unlock and vrele()
769 */
770void
771vput(vp)
772	register struct vnode *vp;
773{
774	struct proc *p = curproc;	/* XXX */
775
776#ifdef DIAGNOSTIC
777	if (vp == NULL)
778		panic("vput: null vp");
779#endif
780	simple_lock(&vp->v_interlock);
781
782#ifdef DIAGNOSTIC
783	if (vp->v_usecount == 0) {
784		vprint("vput: bad ref count", vp);
785		panic("vput: ref cnt");
786	}
787#endif
788	vp->v_usecount--;
789	if (vp->v_usecount > 0) {
790		simple_unlock(&vp->v_interlock);
791		VOP_UNLOCK(vp, 0, p);
792		return;
793	}
794
795#ifdef DIAGNOSTIC
796	if (vp->v_writecount != 0) {
797		vprint("vput: bad writecount", vp);
798		panic("vput: v_writecount != 0");
799	}
800#endif
801	simple_unlock(&vp->v_interlock);
802
803	VOP_INACTIVE(vp, p);
804
805	simple_lock(&vp->v_interlock);
806
807	if (vp->v_usecount == 0)
808		vputonfreelist(vp);
809
810	simple_unlock(&vp->v_interlock);
811}
812
813/*
814 * Vnode release - use for active VNODES.
815 * If count drops to zero, call inactive routine and return to freelist.
816 */
817void
818vrele(vp)
819	register struct vnode *vp;
820{
821	struct proc *p = curproc;	/* XXX */
822
823#ifdef DIAGNOSTIC
824	if (vp == NULL)
825		panic("vrele: null vp");
826#endif
827	simple_lock(&vp->v_interlock);
828#ifdef DIAGNOSTIC
829	if (vp->v_usecount == 0) {
830		vprint("vrele: bad ref count", vp);
831		panic("vrele: ref cnt");
832	}
833#endif
834	vp->v_usecount--;
835	if (vp->v_usecount > 0) {
836		simple_unlock(&vp->v_interlock);
837		return;
838	}
839
840#ifdef DIAGNOSTIC
841	if (vp->v_writecount != 0) {
842		vprint("vrele: bad writecount", vp);
843		panic("vrele: v_writecount != 0");
844	}
845#endif
846	if (vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p)) {
847		vprint("vrele: cannot lock", vp);
848		return;
849	}
850
851	VOP_INACTIVE(vp, p);
852
853	simple_lock(&vp->v_interlock);
854
855	if (vp->v_usecount == 0)
856		vputonfreelist(vp);
857
858	simple_unlock(&vp->v_interlock);
859}
860
861void vhold(struct vnode *vp);
862
863/*
864 * Page or buffer structure gets a reference.
865 */
866void
867vhold(vp)
868	register struct vnode *vp;
869{
870
871	/*
872	 * If it is on the freelist and the hold count is currently
873	 * zero, move it to the hold list.
874	 */
875  	simple_lock(&vp->v_interlock);
876	if ((vp->v_bioflag & VBIOONFREELIST) &&
877	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
878		simple_lock(&vnode_free_list_slock);
879		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
880		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
881		simple_unlock(&vnode_free_list_slock);
882	}
883	vp->v_holdcnt++;
884	simple_unlock(&vp->v_interlock);
885}
886
887/*
888 * Remove any vnodes in the vnode table belonging to mount point mp.
889 *
890 * If MNT_NOFORCE is specified, there should not be any active ones,
891 * return error if any are found (nb: this is a user error, not a
892 * system error). If MNT_FORCE is specified, detach any active vnodes
893 * that are found.
894 */
895#ifdef DEBUG
896int busyprt = 0;	/* print out busy vnodes */
897struct ctldebug debug1 = { "busyprt", &busyprt };
898#endif
899
900int
901vfs_mount_foreach_vnode(struct mount *mp,
902    int (*func)(struct vnode *, void *), void *arg) {
903	struct vnode *vp, *nvp;
904	int error = 0;
905
906	simple_lock(&mntvnode_slock);
907loop:
908	for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) {
909		if (vp->v_mount != mp)
910			goto loop;
911		nvp = LIST_NEXT(vp, v_mntvnodes);
912		simple_lock(&vp->v_interlock);
913		simple_unlock(&mntvnode_slock);
914
915		error = func(vp, arg);
916
917		simple_lock(&mntvnode_slock);
918
919		if (error != 0)
920			break;
921	}
922	simple_unlock(&mntvnode_slock);
923
924	return (error);
925}
926
927struct vflush_args {
928	struct vnode *skipvp;
929	int busy;
930	int flags;
931};
932
933int
934vflush_vnode(struct vnode *vp, void *arg) {
935	struct vflush_args *va = arg;
936	struct proc *p = curproc;
937
938	if (vp == va->skipvp) {
939		simple_unlock(&vp->v_interlock);
940		return (0);
941	}
942
943	if ((va->flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
944		simple_unlock(&vp->v_interlock);
945		return (0);
946	}
947
948	/*
949	 * If WRITECLOSE is set, only flush out regular file
950	 * vnodes open for writing.
951	 */
952	if ((va->flags & WRITECLOSE) &&
953	    (vp->v_writecount == 0 || vp->v_type != VREG)) {
954		simple_unlock(&vp->v_interlock);
955		return (0);
956	}
957
958	/*
959	 * With v_usecount == 0, all we need to do is clear
960	 * out the vnode data structures and we are done.
961	 */
962	if (vp->v_usecount == 0) {
963		vgonel(vp, p);
964		return (0);
965	}
966
967	/*
968	 * If FORCECLOSE is set, forcibly close the vnode.
969	 * For block or character devices, revert to an
970	 * anonymous device. For all other files, just kill them.
971	 */
972	if (va->flags & FORCECLOSE) {
973		if (vp->v_type != VBLK && vp->v_type != VCHR) {
974			vgonel(vp, p);
975		} else {
976			vclean(vp, 0, p);
977			vp->v_op = spec_vnodeop_p;
978			insmntque(vp, (struct mount *)0);
979		}
980		return (0);
981	}
982
983#ifdef DEBUG
984	if (busyprt)
985		vprint("vflush: busy vnode", vp);
986#endif
987	simple_unlock(&vp->v_interlock);
988	va->busy++;
989	return (0);
990}
991
992int
993vflush(mp, skipvp, flags)
994	struct mount *mp;
995	struct vnode *skipvp;
996	int flags;
997{
998	struct vflush_args va;
999	va.skipvp = skipvp;
1000	va.busy = 0;
1001	va.flags = flags;
1002
1003	vfs_mount_foreach_vnode(mp, vflush_vnode, &va);
1004
1005	if (va.busy)
1006		return (EBUSY);
1007	return (0);
1008}
1009
1010/*
1011 * Disassociate the underlying file system from a vnode.
1012 * The vnode interlock is held on entry.
1013 */
1014void
1015vclean(vp, flags, p)
1016	register struct vnode *vp;
1017	int flags;
1018	struct proc *p;
1019{
1020	int active;
1021
1022	/*
1023	 * Check to see if the vnode is in use.
1024	 * If so we have to reference it before we clean it out
1025	 * so that its count cannot fall to zero and generate a
1026	 * race against ourselves to recycle it.
1027	 */
1028	if ((active = vp->v_usecount) != 0)
1029		vp->v_usecount++;
1030
1031	/*
1032	 * Prevent the vnode from being recycled or
1033	 * brought into use while we clean it out.
1034	 */
1035	if (vp->v_flag & VXLOCK)
1036		panic("vclean: deadlock");
1037	vp->v_flag |= VXLOCK;
1038	/*
1039	 * Even if the count is zero, the VOP_INACTIVE routine may still
1040	 * have the object locked while it cleans it out. The VOP_LOCK
1041	 * ensures that the VOP_INACTIVE routine is done with its work.
1042	 * For active vnodes, it ensures that no other activity can
1043	 * occur while the underlying object is being cleaned out.
1044	 */
1045	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1046
1047	/*
1048	 * Clean out any VM data associated with the vnode.
1049	 */
1050	uvm_vnp_terminate(vp);
1051	/*
1052	 * Clean out any buffers associated with the vnode.
1053	 */
1054	if (flags & DOCLOSE)
1055		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1056	/*
1057	 * If purging an active vnode, it must be closed and
1058	 * deactivated before being reclaimed. Note that the
1059	 * VOP_INACTIVE will unlock the vnode
1060	 */
1061	if (active) {
1062		if (flags & DOCLOSE)
1063			VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
1064		VOP_INACTIVE(vp, p);
1065	} else {
1066		/*
1067		 * Any other processes trying to obtain this lock must first
1068		 * wait for VXLOCK to clear, then call the new lock operation.
1069		 */
1070		VOP_UNLOCK(vp, 0, p);
1071	}
1072
1073	/*
1074	 * Reclaim the vnode.
1075	 */
1076	if (VOP_RECLAIM(vp, p))
1077		panic("vclean: cannot reclaim");
1078	if (active) {
1079		simple_lock(&vp->v_interlock);
1080
1081		vp->v_usecount--;
1082		if (vp->v_usecount == 0) {
1083			if (vp->v_holdcnt > 0)
1084				panic("vclean: not clean");
1085			vputonfreelist(vp);
1086		}
1087
1088		simple_unlock(&vp->v_interlock);
1089	}
1090	cache_purge(vp);
1091
1092	/*
1093	 * Done with purge, notify sleepers of the grim news.
1094	 */
1095	vp->v_op = dead_vnodeop_p;
1096	simple_lock(&vp->v_selectinfo.vsi_lock);
1097	VN_KNOTE(vp, NOTE_REVOKE);
1098	simple_unlock(&vp->v_selectinfo.vsi_lock);
1099	vp->v_tag = VT_NON;
1100	vp->v_flag &= ~VXLOCK;
1101#ifdef DIAGNOSTIC
1102	vp->v_flag &= ~VLOCKSWORK;
1103#endif
1104	if (vp->v_flag & VXWANT) {
1105		vp->v_flag &= ~VXWANT;
1106		wakeup(vp);
1107	}
1108}
1109
1110/*
1111 * Recycle an unused vnode to the front of the free list.
1112 * Release the passed interlock if the vnode will be recycled.
1113 */
1114int
1115vrecycle(vp, inter_lkp, p)
1116	struct vnode *vp;
1117	struct simplelock *inter_lkp;
1118	struct proc *p;
1119{
1120
1121	simple_lock(&vp->v_interlock);
1122	if (vp->v_usecount == 0) {
1123		if (inter_lkp)
1124			simple_unlock(inter_lkp);
1125		vgonel(vp, p);
1126		return (1);
1127	}
1128	simple_unlock(&vp->v_interlock);
1129	return (0);
1130}
1131
1132
1133/*
1134 * Eliminate all activity associated with a vnode
1135 * in preparation for reuse.
1136 */
1137void
1138vgone(vp)
1139	register struct vnode *vp;
1140{
1141	struct proc *p = curproc;
1142
1143	simple_lock (&vp->v_interlock);
1144	vgonel(vp, p);
1145}
1146
1147/*
1148 * vgone, with the vp interlock held.
1149 */
1150void
1151vgonel(vp, p)
1152	struct vnode *vp;
1153	struct proc *p;
1154{
1155	register struct vnode *vq;
1156	struct vnode *vx;
1157
1158	/*
1159	 * If a vgone (or vclean) is already in progress,
1160	 * wait until it is done and return.
1161	 */
1162	if (vp->v_flag & VXLOCK) {
1163		vp->v_flag |= VXWANT;
1164		ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
1165		return;
1166	}
1167
1168	/*
1169	 * Clean out the filesystem specific data.
1170	 */
1171	vclean(vp, DOCLOSE, p);
1172	/*
1173	 * Delete from old mount point vnode list, if on one.
1174	 */
1175	if (vp->v_mount != NULL)
1176		insmntque(vp, (struct mount *)0);
1177	/*
1178	 * If special device, remove it from special device alias list
1179	 * if it is on one.
1180	 */
1181	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1182		simple_lock(&spechash_slock);
1183		if (*vp->v_hashchain == vp) {
1184			*vp->v_hashchain = vp->v_specnext;
1185		} else {
1186			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1187				if (vq->v_specnext != vp)
1188					continue;
1189				vq->v_specnext = vp->v_specnext;
1190				break;
1191			}
1192			if (vq == NULL)
1193				panic("missing bdev");
1194		}
1195		if (vp->v_flag & VALIASED) {
1196			vx = NULL;
1197			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1198				if (vq->v_rdev != vp->v_rdev ||
1199				    vq->v_type != vp->v_type)
1200					continue;
1201				if (vx)
1202					break;
1203				vx = vq;
1204			}
1205			if (vx == NULL)
1206				panic("missing alias");
1207			if (vq == NULL)
1208				vx->v_flag &= ~VALIASED;
1209			vp->v_flag &= ~VALIASED;
1210		}
1211		simple_unlock(&spechash_slock);
1212		FREE(vp->v_specinfo, M_VNODE);
1213		vp->v_specinfo = NULL;
1214	}
1215	/*
1216	 * If it is on the freelist and not already at the head,
1217	 * move it to the head of the list.
1218	 */
1219	vp->v_type = VBAD;
1220
1221	/*
1222	 * Move onto the free list, unless we were called from
1223	 * getnewvnode and we're not on any free list
1224	 */
1225	if (vp->v_usecount == 0 &&
1226	    (vp->v_bioflag & VBIOONFREELIST)) {
1227		int s;
1228
1229		simple_lock(&vnode_free_list_slock);
1230		s = splbio();
1231
1232		if (vp->v_holdcnt > 0)
1233			panic("vgonel: not clean");
1234
1235		if (TAILQ_FIRST(&vnode_free_list) != vp) {
1236			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1237			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1238		}
1239		splx(s);
1240		simple_unlock(&vnode_free_list_slock);
1241	}
1242}
1243
1244/*
1245 * Lookup a vnode by device number.
1246 */
1247int
1248vfinddev(dev, type, vpp)
1249	dev_t dev;
1250	enum vtype type;
1251	struct vnode **vpp;
1252{
1253	register struct vnode *vp;
1254	int rc =0;
1255
1256	simple_lock(&spechash_slock);
1257	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1258		if (dev != vp->v_rdev || type != vp->v_type)
1259			continue;
1260		*vpp = vp;
1261		rc = 1;
1262		break;
1263	}
1264	simple_unlock(&spechash_slock);
1265	return (rc);
1266}
1267
1268/*
1269 * Revoke all the vnodes corresponding to the specified minor number
1270 * range (endpoints inclusive) of the specified major.
1271 */
1272void
1273vdevgone(maj, minl, minh, type)
1274	int maj, minl, minh;
1275	enum vtype type;
1276{
1277	struct vnode *vp;
1278	int mn;
1279
1280	for (mn = minl; mn <= minh; mn++)
1281		if (vfinddev(makedev(maj, mn), type, &vp))
1282			VOP_REVOKE(vp, REVOKEALL);
1283}
1284
1285/*
1286 * Calculate the total number of references to a special device.
1287 */
1288int
1289vcount(vp)
1290	struct vnode *vp;
1291{
1292	struct vnode *vq, *vnext;
1293	int count;
1294
1295loop:
1296	if ((vp->v_flag & VALIASED) == 0)
1297		return (vp->v_usecount);
1298	simple_lock(&spechash_slock);
1299	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1300		vnext = vq->v_specnext;
1301		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1302			continue;
1303		/*
1304		 * Alias, but not in use, so flush it out.
1305		 */
1306		if (vq->v_usecount == 0 && vq != vp) {
1307			simple_unlock(&spechash_slock);
1308			vgone(vq);
1309			goto loop;
1310		}
1311		count += vq->v_usecount;
1312	}
1313	simple_unlock(&spechash_slock);
1314	return (count);
1315}
1316
1317/*
1318 * Print out a description of a vnode.
1319 */
1320static char *typename[] =
1321   { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1322
1323void
1324vprint(label, vp)
1325	char *label;
1326	register struct vnode *vp;
1327{
1328	char buf[64];
1329
1330	if (label != NULL)
1331		printf("%s: ", label);
1332	printf("type %s, usecount %u, writecount %u, holdcount %u,",
1333		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1334		vp->v_holdcnt);
1335	buf[0] = '\0';
1336	if (vp->v_flag & VROOT)
1337		strlcat(buf, "|VROOT", sizeof buf);
1338	if (vp->v_flag & VTEXT)
1339		strlcat(buf, "|VTEXT", sizeof buf);
1340	if (vp->v_flag & VSYSTEM)
1341		strlcat(buf, "|VSYSTEM", sizeof buf);
1342	if (vp->v_flag & VXLOCK)
1343		strlcat(buf, "|VXLOCK", sizeof buf);
1344	if (vp->v_flag & VXWANT)
1345		strlcat(buf, "|VXWANT", sizeof buf);
1346	if (vp->v_bioflag & VBIOWAIT)
1347		strlcat(buf, "|VBIOWAIT", sizeof buf);
1348	if (vp->v_bioflag & VBIOONFREELIST)
1349		strlcat(buf, "|VBIOONFREELIST", sizeof buf);
1350	if (vp->v_bioflag & VBIOONSYNCLIST)
1351		strlcat(buf, "|VBIOONSYNCLIST", sizeof buf);
1352	if (vp->v_flag & VALIASED)
1353		strlcat(buf, "|VALIASED", sizeof buf);
1354	if (buf[0] != '\0')
1355		printf(" flags (%s)", &buf[1]);
1356	if (vp->v_data == NULL) {
1357		printf("\n");
1358	} else {
1359		printf("\n\t");
1360		VOP_PRINT(vp);
1361	}
1362}
1363
1364#ifdef DEBUG
1365/*
1366 * List all of the locked vnodes in the system.
1367 * Called when debugging the kernel.
1368 */
1369void
1370printlockedvnodes()
1371{
1372	struct proc *p = curproc;
1373	register struct mount *mp, *nmp;
1374	register struct vnode *vp;
1375
1376	printf("Locked vnodes\n");
1377	simple_lock(&mountlist_slock);
1378	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1379	    mp = nmp) {
1380		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1381			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1382			continue;
1383		}
1384		LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1385			if (VOP_ISLOCKED(vp))
1386				vprint((char *)0, vp);
1387		}
1388		simple_lock(&mountlist_slock);
1389		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1390		vfs_unbusy(mp, p);
1391 	}
1392	simple_unlock(&mountlist_slock);
1393
1394}
1395#endif
1396
1397/*
1398 * Top level filesystem related information gathering.
1399 */
1400int
1401vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1402	int *name;
1403	u_int namelen;
1404	void *oldp;
1405	size_t *oldlenp;
1406	void *newp;
1407	size_t newlen;
1408	struct proc *p;
1409{
1410	struct vfsconf *vfsp;
1411
1412	/* all sysctl names at this level are at least name and field */
1413	if (namelen < 2)
1414		return (ENOTDIR);		/* overloaded */
1415	if (name[0] != VFS_GENERIC) {
1416		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1417			if (vfsp->vfc_typenum == name[0])
1418				break;
1419		if (vfsp == NULL)
1420			return (EOPNOTSUPP);
1421		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1422		    oldp, oldlenp, newp, newlen, p));
1423	}
1424	switch (name[1]) {
1425	case VFS_MAXTYPENUM:
1426		return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1427	case VFS_CONF:
1428		if (namelen < 3)
1429			return (ENOTDIR);	/* overloaded */
1430		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1431			if (vfsp->vfc_typenum == name[2])
1432				break;
1433		if (vfsp == NULL)
1434			return (EOPNOTSUPP);
1435		return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
1436		    sizeof(struct vfsconf)));
1437	}
1438	return (EOPNOTSUPP);
1439}
1440
1441int kinfo_vdebug = 1;
1442int kinfo_vgetfailed;
1443#define KINFO_VNODESLOP	10
1444/*
1445 * Dump vnode list (via sysctl).
1446 * Copyout address of vnode followed by vnode.
1447 */
1448/* ARGSUSED */
1449int
1450sysctl_vnode(where, sizep, p)
1451	char *where;
1452	size_t *sizep;
1453	struct proc *p;
1454{
1455	register struct mount *mp, *nmp;
1456	struct vnode *vp, *nvp;
1457	register char *bp = where, *savebp;
1458	char *ewhere;
1459	int error;
1460
1461	if (where == NULL) {
1462		*sizep = (numvnodes + KINFO_VNODESLOP) * sizeof(struct e_vnode);
1463		return (0);
1464	}
1465	ewhere = where + *sizep;
1466
1467	simple_lock(&mountlist_slock);
1468	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1469	    mp = nmp) {
1470		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1471			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1472			continue;
1473		}
1474		savebp = bp;
1475again:
1476		for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL;
1477		    vp = nvp) {
1478			/*
1479			 * Check that the vp is still associated with
1480			 * this filesystem.  RACE: could have been
1481			 * recycled onto the same filesystem.
1482			 */
1483			if (vp->v_mount != mp) {
1484				simple_unlock(&mntvnode_slock);
1485				if (kinfo_vdebug)
1486					printf("kinfo: vp changed\n");
1487				bp = savebp;
1488				goto again;
1489			}
1490			nvp = LIST_NEXT(vp, v_mntvnodes);
1491			if (bp + sizeof(struct e_vnode) > ewhere) {
1492				simple_unlock(&mntvnode_slock);
1493				*sizep = bp - where;
1494				vfs_unbusy(mp, p);
1495				return (ENOMEM);
1496			}
1497			if ((error = copyout(&vp,
1498			    &((struct e_vnode *)bp)->vptr,
1499			    sizeof(struct vnode *))) ||
1500			   (error = copyout(vp,
1501			    &((struct e_vnode *)bp)->vnode,
1502			    sizeof(struct vnode)))) {
1503				vfs_unbusy(mp, p);
1504				return (error);
1505			}
1506			bp += sizeof(struct e_vnode);
1507			simple_lock(&mntvnode_slock);
1508		}
1509
1510		simple_unlock(&mntvnode_slock);
1511		simple_lock(&mountlist_slock);
1512		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1513		vfs_unbusy(mp, p);
1514	}
1515
1516	simple_unlock(&mountlist_slock);
1517
1518	*sizep = bp - where;
1519	return (0);
1520}
1521
1522/*
1523 * Check to see if a filesystem is mounted on a block device.
1524 */
1525int
1526vfs_mountedon(vp)
1527	register struct vnode *vp;
1528{
1529	register struct vnode *vq;
1530	int error = 0;
1531
1532 	if (vp->v_specmountpoint != NULL)
1533		return (EBUSY);
1534	if (vp->v_flag & VALIASED) {
1535		simple_lock(&spechash_slock);
1536		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1537			if (vq->v_rdev != vp->v_rdev ||
1538			    vq->v_type != vp->v_type)
1539				continue;
1540			if (vq->v_specmountpoint != NULL) {
1541				error = EBUSY;
1542				break;
1543			}
1544 		}
1545		simple_unlock(&spechash_slock);
1546	}
1547	return (error);
1548}
1549
1550/*
1551 * Build hash lists of net addresses and hang them off the mount point.
1552 * Called by ufs_mount() to set up the lists of export addresses.
1553 */
1554int
1555vfs_hang_addrlist(mp, nep, argp)
1556	struct mount *mp;
1557	struct netexport *nep;
1558	struct export_args *argp;
1559{
1560	register struct netcred *np;
1561	register struct radix_node_head *rnh;
1562	register int i;
1563	struct radix_node *rn;
1564	struct sockaddr *saddr, *smask = 0;
1565	struct domain *dom;
1566	int error;
1567
1568	if (argp->ex_addrlen == 0) {
1569		if (mp->mnt_flag & MNT_DEFEXPORTED)
1570			return (EPERM);
1571		np = &nep->ne_defexported;
1572		np->netc_exflags = argp->ex_flags;
1573		np->netc_anon = argp->ex_anon;
1574		np->netc_anon.cr_ref = 1;
1575		mp->mnt_flag |= MNT_DEFEXPORTED;
1576		return (0);
1577	}
1578	if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN ||
1579	    argp->ex_addrlen < 0 || argp->ex_masklen < 0)
1580		return (EINVAL);
1581	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1582	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1583	bzero(np, i);
1584	saddr = (struct sockaddr *)(np + 1);
1585	error = copyin(argp->ex_addr, saddr, argp->ex_addrlen);
1586	if (error)
1587		goto out;
1588	if (saddr->sa_len > argp->ex_addrlen)
1589		saddr->sa_len = argp->ex_addrlen;
1590	if (argp->ex_masklen) {
1591		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1592		error = copyin(argp->ex_mask, smask, argp->ex_masklen);
1593		if (error)
1594			goto out;
1595		if (smask->sa_len > argp->ex_masklen)
1596			smask->sa_len = argp->ex_masklen;
1597	}
1598	i = saddr->sa_family;
1599	if (i < 0 || i > AF_MAX) {
1600		error = EINVAL;
1601		goto out;
1602	}
1603	if ((rnh = nep->ne_rtable[i]) == 0) {
1604		/*
1605		 * Seems silly to initialize every AF when most are not
1606		 * used, do so on demand here
1607		 */
1608		for (dom = domains; dom; dom = dom->dom_next)
1609			if (dom->dom_family == i && dom->dom_rtattach) {
1610				dom->dom_rtattach((void **)&nep->ne_rtable[i],
1611					dom->dom_rtoffset);
1612				break;
1613			}
1614		if ((rnh = nep->ne_rtable[i]) == 0) {
1615			error = ENOBUFS;
1616			goto out;
1617		}
1618	}
1619	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1620		np->netc_rnodes);
1621	if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1622		error = EPERM;
1623		goto out;
1624	}
1625	np->netc_exflags = argp->ex_flags;
1626	np->netc_anon = argp->ex_anon;
1627	np->netc_anon.cr_ref = 1;
1628	return (0);
1629out:
1630	free(np, M_NETADDR);
1631	return (error);
1632}
1633
1634/* ARGSUSED */
1635int
1636vfs_free_netcred(rn, w)
1637	struct radix_node *rn;
1638	void *w;
1639{
1640	register struct radix_node_head *rnh = (struct radix_node_head *)w;
1641
1642	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh, NULL);
1643	free(rn, M_NETADDR);
1644	return (0);
1645}
1646
1647/*
1648 * Free the net address hash lists that are hanging off the mount points.
1649 */
1650void
1651vfs_free_addrlist(nep)
1652	struct netexport *nep;
1653{
1654	register int i;
1655	register struct radix_node_head *rnh;
1656
1657	for (i = 0; i <= AF_MAX; i++)
1658		if ((rnh = nep->ne_rtable[i]) != NULL) {
1659			(*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1660			free(rnh, M_RTABLE);
1661			nep->ne_rtable[i] = 0;
1662		}
1663}
1664
1665int
1666vfs_export(mp, nep, argp)
1667	struct mount *mp;
1668	struct netexport *nep;
1669	struct export_args *argp;
1670{
1671	int error;
1672
1673	if (argp->ex_flags & MNT_DELEXPORT) {
1674		vfs_free_addrlist(nep);
1675		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1676	}
1677	if (argp->ex_flags & MNT_EXPORTED) {
1678		if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1679			return (error);
1680		mp->mnt_flag |= MNT_EXPORTED;
1681	}
1682	return (0);
1683}
1684
1685struct netcred *
1686vfs_export_lookup(mp, nep, nam)
1687	register struct mount *mp;
1688	struct netexport *nep;
1689	struct mbuf *nam;
1690{
1691	register struct netcred *np;
1692	register struct radix_node_head *rnh;
1693	struct sockaddr *saddr;
1694
1695	np = NULL;
1696	if (mp->mnt_flag & MNT_EXPORTED) {
1697		/*
1698		 * Lookup in the export list first.
1699		 */
1700		if (nam != NULL) {
1701			saddr = mtod(nam, struct sockaddr *);
1702			rnh = nep->ne_rtable[saddr->sa_family];
1703			if (rnh != NULL) {
1704				np = (struct netcred *)
1705					(*rnh->rnh_matchaddr)((caddr_t)saddr,
1706					    rnh);
1707				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1708					np = NULL;
1709			}
1710		}
1711		/*
1712		 * If no address match, use the default if it exists.
1713		 */
1714		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1715			np = &nep->ne_defexported;
1716	}
1717	return (np);
1718}
1719
1720/*
1721 * Do the usual access checking.
1722 * file_mode, uid and gid are from the vnode in question,
1723 * while acc_mode and cred are from the VOP_ACCESS parameter list
1724 */
1725int
1726vaccess(file_mode, uid, gid, acc_mode, cred)
1727	mode_t file_mode;
1728	uid_t uid;
1729	gid_t gid;
1730	mode_t acc_mode;
1731	struct ucred *cred;
1732{
1733	mode_t mask;
1734
1735	/* User id 0 always gets access. */
1736	if (cred->cr_uid == 0)
1737		return 0;
1738
1739	mask = 0;
1740
1741	/* Otherwise, check the owner. */
1742	if (cred->cr_uid == uid) {
1743		if (acc_mode & VEXEC)
1744			mask |= S_IXUSR;
1745		if (acc_mode & VREAD)
1746			mask |= S_IRUSR;
1747		if (acc_mode & VWRITE)
1748			mask |= S_IWUSR;
1749		return (file_mode & mask) == mask ? 0 : EACCES;
1750	}
1751
1752	/* Otherwise, check the groups. */
1753	if (cred->cr_gid == gid || groupmember(gid, cred)) {
1754		if (acc_mode & VEXEC)
1755			mask |= S_IXGRP;
1756		if (acc_mode & VREAD)
1757			mask |= S_IRGRP;
1758		if (acc_mode & VWRITE)
1759			mask |= S_IWGRP;
1760		return (file_mode & mask) == mask ? 0 : EACCES;
1761	}
1762
1763	/* Otherwise, check everyone else. */
1764	if (acc_mode & VEXEC)
1765		mask |= S_IXOTH;
1766	if (acc_mode & VREAD)
1767		mask |= S_IROTH;
1768	if (acc_mode & VWRITE)
1769		mask |= S_IWOTH;
1770	return (file_mode & mask) == mask ? 0 : EACCES;
1771}
1772
1773/*
1774 * Unmount all file systems.
1775 * We traverse the list in reverse order under the assumption that doing so
1776 * will avoid needing to worry about dependencies.
1777 */
1778void
1779vfs_unmountall(void)
1780{
1781	struct mount *mp, *nmp;
1782	int allerror, error, again = 1;
1783	struct proc *p = curproc;
1784
1785 retry:
1786	allerror = 0;
1787	for (mp = CIRCLEQ_LAST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1788	    mp = nmp) {
1789		nmp = CIRCLEQ_PREV(mp, mnt_list);
1790		if ((vfs_busy(mp, LK_EXCLUSIVE|LK_NOWAIT, NULL, p)) != 0)
1791			continue;
1792		if ((error = dounmount(mp, MNT_FORCE, curproc, NULL)) != 0) {
1793			printf("unmount of %s failed with error %d\n",
1794			    mp->mnt_stat.f_mntonname, error);
1795			allerror = 1;
1796		}
1797	}
1798
1799	if (allerror) {
1800		printf("WARNING: some file systems would not unmount\n");
1801		if (again) {
1802			printf("retrying\n");
1803			again = 0;
1804			goto retry;
1805		}
1806	}
1807}
1808
1809/*
1810 * Sync and unmount file systems before shutting down.
1811 */
1812void
1813vfs_shutdown()
1814{
1815#ifdef ACCOUNTING
1816	extern void acct_shutdown(void);
1817
1818	acct_shutdown();
1819#endif
1820
1821	/* XXX Should suspend scheduling. */
1822	(void) spl0();
1823
1824	printf("syncing disks... ");
1825
1826	if (panicstr == 0) {
1827		/* Sync before unmount, in case we hang on something. */
1828		sys_sync(&proc0, (void *)0, (register_t *)0);
1829
1830		/* Unmount file systems. */
1831		vfs_unmountall();
1832	}
1833
1834	if (vfs_syncwait(1))
1835		printf("giving up\n");
1836	else
1837		printf("done\n");
1838}
1839
1840/*
1841 * perform sync() operation and wait for buffers to flush.
1842 * assumtions: called w/ scheduler disabled and physical io enabled
1843 * for now called at spl0() XXX
1844 */
1845int
1846vfs_syncwait(verbose)
1847	int verbose;
1848{
1849	register struct buf *bp;
1850	int iter, nbusy, dcount, s;
1851	struct proc *p;
1852
1853	p = curproc? curproc : &proc0;
1854	sys_sync(p, (void *)0, (register_t *)0);
1855
1856	/* Wait for sync to finish. */
1857	dcount = 10000;
1858	for (iter = 0; iter < 20; iter++) {
1859		nbusy = 0;
1860		for (bp = &buf[nbuf]; --bp >= buf; ) {
1861			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1862				nbusy++;
1863			/*
1864			 * With soft updates, some buffers that are
1865			 * written will be remarked as dirty until other
1866			 * buffers are written.
1867			 */
1868			if (bp->b_flags & B_DELWRI) {
1869				s = splbio();
1870				bremfree(bp);
1871				bp->b_flags |= B_BUSY;
1872				splx(s);
1873				nbusy++;
1874				bawrite(bp);
1875				if (dcount-- <= 0) {
1876					if (verbose)
1877						printf("softdep ");
1878					return 1;
1879				}
1880			}
1881		}
1882		if (nbusy == 0)
1883			break;
1884		if (verbose)
1885			printf("%d ", nbusy);
1886		DELAY(40000 * iter);
1887	}
1888
1889	return nbusy;
1890}
1891
1892/*
1893 * posix file system related system variables.
1894 */
1895int
1896fs_posix_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1897	int *name;
1898	u_int namelen;
1899	void *oldp;
1900	size_t *oldlenp;
1901	void *newp;
1902	size_t newlen;
1903	struct proc *p;
1904{
1905	/* all sysctl names at this level are terminal */
1906	if (namelen != 1)
1907		return (ENOTDIR);
1908
1909	switch (name[0]) {
1910	case FS_POSIX_SETUID:
1911		if (newp && securelevel > 0)
1912			return (EPERM);
1913		return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear));
1914	default:
1915		return (EOPNOTSUPP);
1916	}
1917	/* NOTREACHED */
1918}
1919
1920/*
1921 * file system related system variables.
1922 */
1923int
1924fs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1925	int *name;
1926	u_int namelen;
1927	void *oldp;
1928	size_t *oldlenp;
1929	void *newp;
1930	size_t newlen;
1931	struct proc *p;
1932{
1933	sysctlfn *fn;
1934
1935	switch (name[0]) {
1936	case FS_POSIX:
1937		fn = fs_posix_sysctl;
1938		break;
1939	default:
1940		return (EOPNOTSUPP);
1941	}
1942	return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p);
1943}
1944
1945
1946/*
1947 * Routines dealing with vnodes and buffers
1948 */
1949
1950/*
1951 * Wait for all outstanding I/Os to complete
1952 *
1953 * Manipulates v_numoutput. Must be called at splbio()
1954 */
1955int
1956vwaitforio(vp, slpflag, wmesg, timeo)
1957	struct vnode *vp;
1958	int slpflag, timeo;
1959	char *wmesg;
1960{
1961	int error = 0;
1962
1963	splassert(IPL_BIO);
1964
1965	while (vp->v_numoutput) {
1966		vp->v_bioflag |= VBIOWAIT;
1967		error = tsleep(&vp->v_numoutput,
1968		    slpflag | (PRIBIO + 1), wmesg, timeo);
1969		if (error)
1970			break;
1971	}
1972
1973	return (error);
1974}
1975
1976/*
1977 * Update outstanding I/O count and do wakeup if requested.
1978 *
1979 * Manipulates v_numoutput. Must be called at splbio()
1980 */
1981void
1982vwakeup(vp)
1983	struct vnode *vp;
1984{
1985	splassert(IPL_BIO);
1986
1987	if (vp != NULL) {
1988		if (vp->v_numoutput-- == 0)
1989			panic("vwakeup: neg numoutput");
1990		if ((vp->v_bioflag & VBIOWAIT) && vp->v_numoutput == 0) {
1991			vp->v_bioflag &= ~VBIOWAIT;
1992			wakeup(&vp->v_numoutput);
1993		}
1994	}
1995}
1996
1997/*
1998 * Flush out and invalidate all buffers associated with a vnode.
1999 * Called with the underlying object locked.
2000 */
2001int
2002vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
2003	register struct vnode *vp;
2004	int flags;
2005	struct ucred *cred;
2006	struct proc *p;
2007	int slpflag, slptimeo;
2008{
2009	register struct buf *bp;
2010	struct buf *nbp, *blist;
2011	int s, error;
2012
2013	if (flags & V_SAVE) {
2014		s = splbio();
2015		vwaitforio(vp, 0, "vinvalbuf", 0);
2016		if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
2017			splx(s);
2018			if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
2019				return (error);
2020			s = splbio();
2021			if (vp->v_numoutput > 0 ||
2022			    !LIST_EMPTY(&vp->v_dirtyblkhd))
2023				panic("vinvalbuf: dirty bufs");
2024		}
2025		splx(s);
2026	}
2027loop:
2028	s = splbio();
2029	for (;;) {
2030		if ((blist = LIST_FIRST(&vp->v_cleanblkhd)) &&
2031		    (flags & V_SAVEMETA))
2032			while (blist && blist->b_lblkno < 0)
2033				blist = LIST_NEXT(blist, b_vnbufs);
2034		if (blist == NULL &&
2035		    (blist = LIST_FIRST(&vp->v_dirtyblkhd)) &&
2036		    (flags & V_SAVEMETA))
2037			while (blist && blist->b_lblkno < 0)
2038				blist = LIST_NEXT(blist, b_vnbufs);
2039		if (!blist)
2040			break;
2041
2042		for (bp = blist; bp; bp = nbp) {
2043			nbp = LIST_NEXT(bp, b_vnbufs);
2044			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
2045				continue;
2046			if (bp->b_flags & B_BUSY) {
2047				bp->b_flags |= B_WANTED;
2048				error = tsleep(bp, slpflag | (PRIBIO + 1),
2049				    "vinvalbuf", slptimeo);
2050				if (error) {
2051					splx(s);
2052					return (error);
2053				}
2054				break;
2055			}
2056			bremfree(bp);
2057			bp->b_flags |= B_BUSY;
2058			/*
2059			 * XXX Since there are no node locks for NFS, I believe
2060			 * there is a slight chance that a delayed write will
2061			 * occur while sleeping just above, so check for it.
2062			 */
2063			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
2064				splx(s);
2065				(void) VOP_BWRITE(bp);
2066				goto loop;
2067			}
2068			bp->b_flags |= B_INVAL;
2069			brelse(bp);
2070		}
2071	}
2072	if (!(flags & V_SAVEMETA) &&
2073	    (!LIST_EMPTY(&vp->v_dirtyblkhd) || !LIST_EMPTY(&vp->v_cleanblkhd)))
2074		panic("vinvalbuf: flush failed");
2075	splx(s);
2076	return (0);
2077}
2078
2079void
2080vflushbuf(vp, sync)
2081	register struct vnode *vp;
2082	int sync;
2083{
2084	register struct buf *bp, *nbp;
2085	int s;
2086
2087loop:
2088	s = splbio();
2089	for (bp = LIST_FIRST(&vp->v_dirtyblkhd);
2090	    bp != LIST_END(&vp->v_dirtyblkhd); bp = nbp) {
2091		nbp = LIST_NEXT(bp, b_vnbufs);
2092		if ((bp->b_flags & B_BUSY))
2093			continue;
2094		if ((bp->b_flags & B_DELWRI) == 0)
2095			panic("vflushbuf: not dirty");
2096		bremfree(bp);
2097		bp->b_flags |= B_BUSY;
2098		splx(s);
2099		/*
2100		 * Wait for I/O associated with indirect blocks to complete,
2101		 * since there is no way to quickly wait for them below.
2102		 */
2103		if (bp->b_vp == vp || sync == 0)
2104			(void) bawrite(bp);
2105		else
2106			(void) bwrite(bp);
2107		goto loop;
2108	}
2109	if (sync == 0) {
2110		splx(s);
2111		return;
2112	}
2113	vwaitforio(vp, 0, "vflushbuf", 0);
2114	if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
2115		splx(s);
2116		vprint("vflushbuf: dirty", vp);
2117		goto loop;
2118	}
2119	splx(s);
2120}
2121
2122/*
2123 * Associate a buffer with a vnode.
2124 *
2125 * Manipulates buffer vnode queues. Must be called at splbio().
2126 */
2127void
2128bgetvp(vp, bp)
2129	register struct vnode *vp;
2130	register struct buf *bp;
2131{
2132	splassert(IPL_BIO);
2133
2134
2135	if (bp->b_vp)
2136		panic("bgetvp: not free");
2137	vhold(vp);
2138	bp->b_vp = vp;
2139	if (vp->v_type == VBLK || vp->v_type == VCHR)
2140		bp->b_dev = vp->v_rdev;
2141	else
2142		bp->b_dev = NODEV;
2143	/*
2144	 * Insert onto list for new vnode.
2145	 */
2146	bufinsvn(bp, &vp->v_cleanblkhd);
2147}
2148
2149/*
2150 * Disassociate a buffer from a vnode.
2151 *
2152 * Manipulates vnode buffer queues. Must be called at splbio().
2153 */
2154void
2155brelvp(bp)
2156	register struct buf *bp;
2157{
2158	struct vnode *vp;
2159
2160	splassert(IPL_BIO);
2161
2162	if ((vp = bp->b_vp) == (struct vnode *) 0)
2163		panic("brelvp: NULL");
2164	/*
2165	 * Delete from old vnode list, if on one.
2166	 */
2167	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
2168		bufremvn(bp);
2169	if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2170	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2171		vp->v_bioflag &= ~VBIOONSYNCLIST;
2172		LIST_REMOVE(vp, v_synclist);
2173	}
2174	bp->b_vp = (struct vnode *) 0;
2175
2176	simple_lock(&vp->v_interlock);
2177#ifdef DIAGNOSTIC
2178	if (vp->v_holdcnt == 0)
2179		panic("brelvp: holdcnt");
2180#endif
2181	vp->v_holdcnt--;
2182
2183	/*
2184	 * If it is on the holdlist and the hold count drops to
2185	 * zero, move it to the free list.
2186	 */
2187	if ((vp->v_bioflag & VBIOONFREELIST) &&
2188	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
2189		simple_lock(&vnode_free_list_slock);
2190		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
2191		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2192		simple_unlock(&vnode_free_list_slock);
2193	}
2194	simple_unlock(&vp->v_interlock);
2195}
2196
2197/*
2198 * Replaces the current vnode associated with the buffer, if any,
2199 * with a new vnode.
2200 *
2201 * If an output I/O is pending on the buffer, the old vnode
2202 * I/O count is adjusted.
2203 *
2204 * Ignores vnode buffer queues. Must be called at splbio().
2205 */
2206void
2207buf_replacevnode(bp, newvp)
2208	struct buf *bp;
2209	struct vnode *newvp;
2210{
2211	struct vnode *oldvp = bp->b_vp;
2212
2213	splassert(IPL_BIO);
2214
2215	if (oldvp)
2216		brelvp(bp);
2217
2218	if ((bp->b_flags & (B_READ | B_DONE)) == 0) {
2219		newvp->v_numoutput++;	/* put it on swapdev */
2220		vwakeup(oldvp);
2221	}
2222
2223	bgetvp(newvp, bp);
2224	bufremvn(bp);
2225}
2226
2227/*
2228 * Used to assign buffers to the appropriate clean or dirty list on
2229 * the vnode and to add newly dirty vnodes to the appropriate
2230 * filesystem syncer list.
2231 *
2232 * Manipulates vnode buffer queues. Must be called at splbio().
2233 */
2234void
2235reassignbuf(bp)
2236	struct buf *bp;
2237{
2238	struct buflists *listheadp;
2239	int delay;
2240	struct vnode *vp = bp->b_vp;
2241
2242	splassert(IPL_BIO);
2243
2244	/*
2245	 * Delete from old vnode list, if on one.
2246	 */
2247	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
2248		bufremvn(bp);
2249
2250	/*
2251	 * If dirty, put on list of dirty buffers;
2252	 * otherwise insert onto list of clean buffers.
2253	 */
2254	if ((bp->b_flags & B_DELWRI) == 0) {
2255		listheadp = &vp->v_cleanblkhd;
2256		if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2257		    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2258			vp->v_bioflag &= ~VBIOONSYNCLIST;
2259			LIST_REMOVE(vp, v_synclist);
2260		}
2261	} else {
2262		listheadp = &vp->v_dirtyblkhd;
2263		if ((vp->v_bioflag & VBIOONSYNCLIST) == 0) {
2264			switch (vp->v_type) {
2265			case VDIR:
2266				delay = syncdelay / 2;
2267				break;
2268			case VBLK:
2269				if (vp->v_specmountpoint != NULL) {
2270					delay = syncdelay / 3;
2271					break;
2272				}
2273				/* fall through */
2274			default:
2275				delay = syncdelay;
2276			}
2277			vn_syncer_add_to_worklist(vp, delay);
2278		}
2279	}
2280	bufinsvn(bp, listheadp);
2281}
2282
2283int
2284vfs_register(vfs)
2285	struct vfsconf *vfs;
2286{
2287	struct vfsconf *vfsp;
2288	struct vfsconf **vfspp;
2289
2290#ifdef DIAGNOSTIC
2291	/* Paranoia? */
2292	if (vfs->vfc_refcount != 0)
2293		printf("vfs_register called with vfc_refcount > 0\n");
2294#endif
2295
2296	/* Check if filesystem already known */
2297	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2298	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next)
2299		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2300			return (EEXIST);
2301
2302	if (vfs->vfc_typenum > maxvfsconf)
2303		maxvfsconf = vfs->vfc_typenum;
2304
2305	vfs->vfc_next = NULL;
2306
2307	/* Add to the end of the list */
2308	*vfspp = vfs;
2309
2310	/* Call vfs_init() */
2311	if (vfs->vfc_vfsops->vfs_init)
2312		(*(vfs->vfc_vfsops->vfs_init))(vfs);
2313
2314	return 0;
2315}
2316
2317int
2318vfs_unregister(vfs)
2319	struct vfsconf *vfs;
2320{
2321	struct vfsconf *vfsp;
2322	struct vfsconf **vfspp;
2323	int maxtypenum;
2324
2325	/* Find our vfsconf struct */
2326	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2327	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next) {
2328		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2329			break;
2330	}
2331
2332	if (!vfsp)			/* Not found */
2333		return (ENOENT);
2334
2335	if (vfsp->vfc_refcount)		/* In use */
2336		return (EBUSY);
2337
2338	/* Remove from list and free */
2339	*vfspp = vfsp->vfc_next;
2340
2341	maxtypenum = 0;
2342
2343	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2344		if (vfsp->vfc_typenum > maxtypenum)
2345			maxtypenum = vfsp->vfc_typenum;
2346
2347	maxvfsconf = maxtypenum;
2348	return 0;
2349}
2350
2351/*
2352 * Check if vnode represents a disk device
2353 */
2354int
2355vn_isdisk(vp, errp)
2356	struct vnode *vp;
2357	int *errp;
2358{
2359	if (vp->v_type != VBLK && vp->v_type != VCHR)
2360		return (0);
2361
2362	return (1);
2363}
2364