vfs_subr.c revision 1.63
1/*	$OpenBSD: vfs_subr.c,v 1.63 2001/06/27 04:49:48 art Exp $	*/
2/*	$NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $	*/
3
4/*
5 * Copyright (c) 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by the University of
24 *	California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
42 */
43
44/*
45 * External virtual filesystem routines
46 */
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/proc.h>
51#include <sys/mount.h>
52#include <sys/time.h>
53#include <sys/fcntl.h>
54#include <sys/kernel.h>
55#include <sys/vnode.h>
56#include <sys/stat.h>
57#include <sys/namei.h>
58#include <sys/ucred.h>
59#include <sys/buf.h>
60#include <sys/errno.h>
61#include <sys/malloc.h>
62#include <sys/domain.h>
63#include <sys/mbuf.h>
64#include <sys/syscallargs.h>
65#include <sys/pool.h>
66
67#include <vm/vm.h>
68#include <sys/sysctl.h>
69
70#include <miscfs/specfs/specdev.h>
71
72#include <uvm/uvm_extern.h>
73
74enum vtype iftovt_tab[16] = {
75	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
76	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
77};
78int	vttoif_tab[9] = {
79	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
80	S_IFSOCK, S_IFIFO, S_IFMT,
81};
82
83int doforce = 1;		/* 1 => permit forcible unmounting */
84int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
85int suid_clear = 1;		/* 1 => clear SUID / SGID on owner change */
86
87/*
88 * Insq/Remq for the vnode usage lists.
89 */
90#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
91#define	bufremvn(bp) {							\
92	LIST_REMOVE(bp, b_vnbufs);					\
93	(bp)->b_vnbufs.le_next = NOLIST;				\
94}
95
96struct freelst vnode_hold_list;   /* list of vnodes referencing buffers */
97struct freelst vnode_free_list;   /* vnode free list */
98
99struct mntlist mountlist;			/* mounted filesystem list */
100struct simplelock mountlist_slock;
101static struct simplelock mntid_slock;
102struct simplelock mntvnode_slock;
103struct simplelock vnode_free_list_slock;
104struct simplelock spechash_slock;
105
106void	vclean __P((struct vnode *, int, struct proc *));
107
108void insmntque __P((struct vnode *, struct mount *));
109int getdevvp __P((dev_t, struct vnode **, enum vtype));
110
111int vfs_hang_addrlist __P((struct mount *, struct netexport *,
112				  struct export_args *));
113int vfs_free_netcred __P((struct radix_node *, void *));
114void vfs_free_addrlist __P((struct netexport *));
115static __inline__ void vputonfreelist __P((struct vnode *));
116
117#ifdef DEBUG
118void printlockedvnodes __P((void));
119#endif
120
121#define VN_KNOTE(vp, b) \
122	KNOTE((struct klist *)&vp->v_selectinfo.vsi_selinfo.si_note, (b))
123
124struct pool vnode_pool;
125
126/*
127 * Initialize the vnode management data structures.
128 */
129void
130vntblinit()
131{
132
133	pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodes",
134		0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE);
135	simple_lock_init(&mntvnode_slock);
136	simple_lock_init(&mntid_slock);
137	simple_lock_init(&spechash_slock);
138	TAILQ_INIT(&vnode_hold_list);
139	TAILQ_INIT(&vnode_free_list);
140	simple_lock_init(&vnode_free_list_slock);
141	CIRCLEQ_INIT(&mountlist);
142	simple_lock_init(&mountlist_slock);
143	/*
144	 * Initialize the filesystem syncer.
145	 */
146	vn_initialize_syncerd();
147}
148
149
150/*
151 * Mark a mount point as busy. Used to synchronize access and to delay
152 * unmounting. Interlock is not released on failure.
153 */
154
155int
156vfs_busy(mp, flags, interlkp, p)
157	struct mount *mp;
158	int flags;
159	struct simplelock *interlkp;
160	struct proc *p;
161{
162	int lkflags;
163
164	if (mp->mnt_flag & MNT_UNMOUNT) {
165		if (flags & LK_NOWAIT)
166			return (ENOENT);
167		mp->mnt_flag |= MNT_MWAIT;
168		if (interlkp)
169			simple_unlock(interlkp);
170		/*
171		 * Since all busy locks are shared except the exclusive
172		 * lock granted when unmounting, the only place that a
173		 * wakeup needs to be done is at the release of the
174		 * exclusive lock at the end of dounmount.
175		 */
176 		sleep((caddr_t)mp, PVFS);
177		if (interlkp)
178			simple_lock(interlkp);
179		return (ENOENT);
180	}
181	lkflags = LK_SHARED;
182	if (interlkp)
183		lkflags |= LK_INTERLOCK;
184	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
185		panic("vfs_busy: unexpected lock failure");
186	return (0);
187}
188
189
190/*
191 * Free a busy file system
192 */
193void
194vfs_unbusy(mp, p)
195	struct mount *mp;
196	struct proc *p;
197{
198	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
199}
200
201/*
202 * Lookup a filesystem type, and if found allocate and initialize
203 * a mount structure for it.
204 *
205 * Devname is usually updated by mount(8) after booting.
206 */
207
208int
209vfs_rootmountalloc(fstypename, devname, mpp)
210	char *fstypename;
211	char *devname;
212	struct mount **mpp;
213{
214	struct proc *p = curproc;	/* XXX */
215	struct vfsconf *vfsp;
216	struct mount *mp;
217
218	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
219		if (!strcmp(vfsp->vfc_name, fstypename))
220			break;
221	if (vfsp == NULL)
222		return (ENODEV);
223	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
224	bzero((char *)mp, (u_long)sizeof(struct mount));
225	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
226	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
227	LIST_INIT(&mp->mnt_vnodelist);
228	mp->mnt_vfc = vfsp;
229	mp->mnt_op = vfsp->vfc_vfsops;
230	mp->mnt_flag = MNT_RDONLY;
231	mp->mnt_vnodecovered = NULLVP;
232	vfsp->vfc_refcount++;
233	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
234	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
235	mp->mnt_stat.f_mntonname[0] = '/';
236	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
237	*mpp = mp;
238 	return (0);
239 }
240
241/*
242 * Find an appropriate filesystem to use for the root. If a filesystem
243 * has not been preselected, walk through the list of known filesystems
244 * trying those that have mountroot routines, and try them until one
245 * works or we have tried them all.
246  */
247int
248vfs_mountroot()
249{
250	struct vfsconf *vfsp;
251	extern int (*mountroot)(void);
252	int error;
253
254	if (mountroot != NULL)
255		return ((*mountroot)());
256	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
257		if (vfsp->vfc_mountroot == NULL)
258			continue;
259		if ((error = (*vfsp->vfc_mountroot)()) == 0)
260			return (0);
261		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
262 	}
263	return (ENODEV);
264}
265
266/*
267 * Lookup a mount point by filesystem identifier.
268 */
269struct mount *
270vfs_getvfs(fsid)
271	fsid_t *fsid;
272{
273	register struct mount *mp;
274
275	simple_lock(&mountlist_slock);
276	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
277		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
278		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
279			simple_unlock(&mountlist_slock);
280			return (mp);
281		}
282	}
283	simple_unlock(&mountlist_slock);
284	return ((struct mount *)0);
285}
286
287
288/*
289 * Get a new unique fsid
290 */
291void
292vfs_getnewfsid(mp)
293	struct mount *mp;
294{
295	static u_short xxxfs_mntid;
296
297	fsid_t tfsid;
298	int mtype;
299
300	simple_lock(&mntid_slock);
301	mtype = mp->mnt_vfc->vfc_typenum;
302	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
303	mp->mnt_stat.f_fsid.val[1] = mtype;
304	if (xxxfs_mntid == 0)
305		++xxxfs_mntid;
306	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
307	tfsid.val[1] = mtype;
308	if (!CIRCLEQ_EMPTY(&mountlist)) {
309		while (vfs_getvfs(&tfsid)) {
310			tfsid.val[0]++;
311			xxxfs_mntid++;
312		}
313	}
314	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
315	simple_unlock(&mntid_slock);
316}
317
318/*
319 * Make a 'unique' number from a mount type name.
320 * Note that this is no longer used for ffs which
321 * now has an on-disk filesystem id.
322 */
323long
324makefstype(type)
325	char *type;
326{
327	long rv;
328
329	for (rv = 0; *type; type++) {
330		rv <<= 2;
331		rv ^= *type;
332	}
333	return rv;
334}
335
336/*
337 * Set vnode attributes to VNOVAL
338 */
339void
340vattr_null(vap)
341	register struct vattr *vap;
342{
343
344	vap->va_type = VNON;
345	/* XXX These next two used to be one line, but for a GCC bug. */
346	vap->va_size = VNOVAL;
347	vap->va_bytes = VNOVAL;
348	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
349		vap->va_fsid = vap->va_fileid =
350		vap->va_blocksize = vap->va_rdev =
351		vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
352		vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
353		vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
354		vap->va_flags = vap->va_gen = VNOVAL;
355	vap->va_vaflags = 0;
356}
357
358/*
359 * Routines having to do with the management of the vnode table.
360 */
361extern int (**dead_vnodeop_p) __P((void *));
362long numvnodes;
363
364/*
365 * Return the next vnode from the free list.
366 */
367int
368getnewvnode(tag, mp, vops, vpp)
369	enum vtagtype tag;
370	struct mount *mp;
371	int (**vops) __P((void *));
372	struct vnode **vpp;
373{
374	struct proc *p = curproc;			/* XXX */
375	struct freelst *listhd;
376	static int toggle;
377	struct vnode *vp;
378	int s;
379
380	/*
381	 * We must choose whether to allocate a new vnode or recycle an
382	 * existing one. The criterion for allocating a new one is that
383	 * the total number of vnodes is less than the number desired or
384	 * there are no vnodes on either free list. Generally we only
385	 * want to recycle vnodes that have no buffers associated with
386	 * them, so we look first on the vnode_free_list. If it is empty,
387	 * we next consider vnodes with referencing buffers on the
388	 * vnode_hold_list. The toggle ensures that half the time we
389	 * will use a buffer from the vnode_hold_list, and half the time
390	 * we will allocate a new one unless the list has grown to twice
391	 * the desired size. We are reticent to recycle vnodes from the
392	 * vnode_hold_list because we will lose the identity of all its
393	 * referencing buffers.
394	 */
395	toggle ^= 1;
396	if (numvnodes > 2 * desiredvnodes)
397		toggle = 0;
398
399	simple_lock(&vnode_free_list_slock);
400	s = splbio();
401	if ((numvnodes < desiredvnodes) ||
402	    ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) &&
403	    ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) {
404		splx(s);
405		simple_unlock(&vnode_free_list_slock);
406		vp = pool_get(&vnode_pool, PR_WAITOK);
407		bzero((char *)vp, sizeof *vp);
408		numvnodes++;
409	} else {
410		for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
411		    vp = TAILQ_NEXT(vp, v_freelist)) {
412			if (simple_lock_try(&vp->v_interlock))
413				break;
414		}
415		/*
416		 * Unless this is a bad time of the month, at most
417		 * the first NCPUS items on the free list are
418		 * locked, so this is close enough to being empty.
419		 */
420		if (vp == NULLVP) {
421			splx(s);
422			simple_unlock(&vnode_free_list_slock);
423			tablefull("vnode");
424			*vpp = 0;
425			return (ENFILE);
426		}
427		if (vp->v_usecount) {
428			vprint("free vnode", vp);
429			panic("free vnode isn't");
430		}
431
432		TAILQ_REMOVE(listhd, vp, v_freelist);
433		vp->v_bioflag &= ~VBIOONFREELIST;
434		splx(s);
435
436		simple_unlock(&vnode_free_list_slock);
437		if (vp->v_type != VBAD)
438			vgonel(vp, p);
439		else
440			simple_unlock(&vp->v_interlock);
441#ifdef DIAGNOSTIC
442		if (vp->v_data) {
443			vprint("cleaned vnode", vp);
444			panic("cleaned vnode isn't");
445		}
446		s = splbio();
447		if (vp->v_numoutput)
448			panic("Clean vnode has pending I/O's");
449		splx(s);
450#endif
451		vp->v_flag = 0;
452		vp->v_bioflag = 0;
453		vp->v_socket = 0;
454	}
455	vp->v_type = VNON;
456	cache_purge(vp);
457	vp->v_tag = tag;
458	vp->v_op = vops;
459	insmntque(vp, mp);
460	*vpp = vp;
461	vp->v_usecount = 1;
462	vp->v_data = 0;
463	simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
464	return (0);
465}
466
467/*
468 * Move a vnode from one mount queue to another.
469 */
470void
471insmntque(vp, mp)
472	register struct vnode *vp;
473	register struct mount *mp;
474{
475	simple_lock(&mntvnode_slock);
476	/*
477	 * Delete from old mount point vnode list, if on one.
478	 */
479
480	if (vp->v_mount != NULL)
481		LIST_REMOVE(vp, v_mntvnodes);
482	/*
483	 * Insert into list of vnodes for the new mount point, if available.
484	 */
485	if ((vp->v_mount = mp) != NULL)
486		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
487	simple_unlock(&mntvnode_slock);
488}
489
490
491/*
492 * Create a vnode for a block device.
493 * Used for root filesystem, argdev, and swap areas.
494 * Also used for memory file system special devices.
495 */
496int
497bdevvp(dev, vpp)
498	dev_t dev;
499	struct vnode **vpp;
500{
501
502	return (getdevvp(dev, vpp, VBLK));
503}
504
505/*
506 * Create a vnode for a character device.
507 * Used for kernfs and some console handling.
508 */
509int
510cdevvp(dev, vpp)
511	dev_t dev;
512	struct vnode **vpp;
513{
514
515	return (getdevvp(dev, vpp, VCHR));
516}
517
518/*
519 * Create a vnode for a device.
520 * Used by bdevvp (block device) for root file system etc.,
521 * and by cdevvp (character device) for console and kernfs.
522 */
523int
524getdevvp(dev, vpp, type)
525	dev_t dev;
526	struct vnode **vpp;
527	enum vtype type;
528{
529	register struct vnode *vp;
530	struct vnode *nvp;
531	int error;
532
533	if (dev == NODEV) {
534		*vpp = NULLVP;
535		return (0);
536	}
537	error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
538	if (error) {
539		*vpp = NULLVP;
540		return (error);
541	}
542	vp = nvp;
543	vp->v_type = type;
544	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
545		vput(vp);
546		vp = nvp;
547	}
548	*vpp = vp;
549	return (0);
550}
551
552/*
553 * Check to see if the new vnode represents a special device
554 * for which we already have a vnode (either because of
555 * bdevvp() or because of a different vnode representing
556 * the same block device). If such an alias exists, deallocate
557 * the existing contents and return the aliased vnode. The
558 * caller is responsible for filling it with its new contents.
559 */
560struct vnode *
561checkalias(nvp, nvp_rdev, mp)
562	register struct vnode *nvp;
563	dev_t nvp_rdev;
564	struct mount *mp;
565{
566	struct proc *p = curproc;
567	register struct vnode *vp;
568	struct vnode **vpp;
569
570	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
571		return (NULLVP);
572
573	vpp = &speclisth[SPECHASH(nvp_rdev)];
574loop:
575	simple_lock(&spechash_slock);
576	for (vp = *vpp; vp; vp = vp->v_specnext) {
577		simple_lock(&vp->v_interlock);
578		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) {
579			simple_unlock(&vp->v_interlock);
580			continue;
581		}
582		/*
583		 * Alias, but not in use, so flush it out.
584		 */
585		if (vp->v_usecount == 0) {
586			simple_unlock(&spechash_slock);
587			vgonel(vp, p);
588			goto loop;
589		}
590		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
591			simple_unlock(&spechash_slock);
592			goto loop;
593		}
594		break;
595	}
596
597	/*
598	 * Common case is actually in the if statement
599	 */
600	if (vp == NULL || !(vp->v_tag == VT_NON && vp->v_type == VBLK)) {
601		MALLOC(nvp->v_specinfo, struct specinfo *,
602			sizeof(struct specinfo), M_VNODE, M_WAITOK);
603		nvp->v_rdev = nvp_rdev;
604		nvp->v_hashchain = vpp;
605		nvp->v_specnext = *vpp;
606		nvp->v_specmountpoint = NULL;
607		nvp->v_speclockf = NULL;
608		simple_unlock(&spechash_slock);
609		*vpp = nvp;
610		if (vp != NULLVP) {
611			nvp->v_flag |= VALIASED;
612			vp->v_flag |= VALIASED;
613			vput(vp);
614		}
615		return (NULLVP);
616	}
617
618	/*
619	 * This code is the uncommon case. It is called in case
620	 * we found an alias that was VT_NON && vtype of VBLK
621	 * This means we found a block device that was created
622	 * using bdevvp.
623	 * An example of such a vnode is the root partition device vnode
624	 * craeted in ffs_mountroot.
625	 *
626	 * The vnodes created by bdevvp should not be aliased (why?).
627	 */
628
629	simple_unlock(&spechash_slock);
630	VOP_UNLOCK(vp, 0, p);
631	simple_lock(&vp->v_interlock);
632	vclean(vp, 0, p);
633	vp->v_op = nvp->v_op;
634	vp->v_tag = nvp->v_tag;
635	nvp->v_type = VNON;
636	insmntque(vp, mp);
637	return (vp);
638}
639
640/*
641 * Grab a particular vnode from the free list, increment its
642 * reference count and lock it. The vnode lock bit is set the
643 * vnode is being eliminated in vgone. The process is awakened
644 * when the transition is completed, and an error returned to
645 * indicate that the vnode is no longer usable (possibly having
646 * been changed to a new file system type).
647 */
648int
649vget(vp, flags, p)
650	struct vnode *vp;
651	int flags;
652	struct proc *p;
653{
654	int error;
655	int s;
656	/*
657	 * If the vnode is in the process of being cleaned out for
658	 * another use, we wait for the cleaning to finish and then
659	 * return failure. Cleaning is determined by checking that
660	 * the VXLOCK flag is set.
661	 */
662	if ((flags & LK_INTERLOCK) == 0) {
663		simple_lock(&vp->v_interlock);
664		flags |= LK_INTERLOCK;
665	}
666	if (vp->v_flag & VXLOCK) {
667 		vp->v_flag |= VXWANT;
668		simple_unlock(&vp->v_interlock);
669		tsleep((caddr_t)vp, PINOD, "vget", 0);
670		return (ENOENT);
671 	}
672	if (vp->v_usecount == 0 &&
673	    (vp->v_bioflag & VBIOONFREELIST)) {
674		s = splbio();
675		simple_lock(&vnode_free_list_slock);
676		if (vp->v_holdcnt > 0)
677			TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
678		else
679			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
680		simple_unlock(&vnode_free_list_slock);
681		vp->v_bioflag &= ~VBIOONFREELIST;
682		splx(s);
683	}
684 	vp->v_usecount++;
685	if (flags & LK_TYPE_MASK) {
686		if ((error = vn_lock(vp, flags, p)) != 0) {
687			vp->v_usecount--;
688			if (vp->v_usecount == 0)
689				vputonfreelist(vp);
690
691			simple_unlock(&vp->v_interlock);
692		}
693		return (error);
694	}
695	simple_unlock(&vp->v_interlock);
696	return (0);
697}
698
699
700#ifdef DIAGNOSTIC
701/*
702 * Vnode reference.
703 */
704void
705vref(vp)
706	struct vnode *vp;
707{
708	simple_lock(&vp->v_interlock);
709	if (vp->v_usecount == 0)
710		panic("vref used where vget required");
711	vp->v_usecount++;
712	simple_unlock(&vp->v_interlock);
713}
714#endif /* DIAGNOSTIC */
715
716static __inline__ void
717vputonfreelist(vp)
718	struct vnode *vp;
719{
720	int s;
721	struct freelst *lst;
722
723	s = splbio();
724#ifdef DIAGNOSTIC
725	if (vp->v_usecount != 0)
726		panic("Use count is not zero!");
727
728	if (vp->v_bioflag & VBIOONFREELIST) {
729		vprint("vnode already on free list: ", vp);
730		panic("vnode already on free list");
731	}
732#endif
733
734	vp->v_bioflag |= VBIOONFREELIST;
735
736	if (vp->v_holdcnt > 0)
737		lst = &vnode_hold_list;
738	else
739		lst = &vnode_free_list;
740
741	if (vp->v_type == VBAD)
742		TAILQ_INSERT_HEAD(lst, vp, v_freelist);
743	else
744		TAILQ_INSERT_TAIL(lst, vp, v_freelist);
745
746	splx(s);
747}
748
749/*
750 * vput(), just unlock and vrele()
751 */
752void
753vput(vp)
754	register struct vnode *vp;
755{
756	struct proc *p = curproc;	/* XXX */
757
758#ifdef DIAGNOSTIC
759	if (vp == NULL)
760		panic("vput: null vp");
761#endif
762	simple_lock(&vp->v_interlock);
763
764#ifdef DIAGNOSTIC
765	if (vp->v_usecount == 0) {
766		vprint("vrele: bad ref count", vp);
767		panic("vrele: ref cnt");
768	}
769#endif
770	vp->v_usecount--;
771	if (vp->v_usecount > 0) {
772		simple_unlock(&vp->v_interlock);
773		VOP_UNLOCK(vp, 0, p);
774		return;
775	}
776
777#ifdef DIAGNOSTIC
778	if (vp->v_writecount != 0) {
779		vprint("vrele: bad writecount", vp);
780		panic("vrele: v_writecount != 0");
781	}
782#endif
783	vputonfreelist(vp);
784
785	simple_unlock(&vp->v_interlock);
786
787	VOP_INACTIVE(vp, p);
788}
789
790/*
791 * Vnode release - use for active VNODES.
792 * If count drops to zero, call inactive routine and return to freelist.
793 */
794void
795vrele(vp)
796	register struct vnode *vp;
797{
798	struct proc *p = curproc;	/* XXX */
799
800#ifdef DIAGNOSTIC
801	if (vp == NULL)
802		panic("vrele: null vp");
803#endif
804	simple_lock(&vp->v_interlock);
805#ifdef DIAGNOSTIC
806	if (vp->v_usecount == 0) {
807		vprint("vrele: bad ref count", vp);
808		panic("vrele: ref cnt");
809	}
810#endif
811	vp->v_usecount--;
812	if (vp->v_usecount > 0) {
813		simple_unlock(&vp->v_interlock);
814		return;
815	}
816
817#ifdef DIAGNOSTIC
818	if (vp->v_writecount != 0) {
819		vprint("vrele: bad writecount", vp);
820		panic("vrele: v_writecount != 0");
821	}
822#endif
823	vputonfreelist(vp);
824
825	if (vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p) == 0)
826		VOP_INACTIVE(vp, p);
827}
828
829void vhold __P((struct vnode *vp));
830
831/*
832 * Page or buffer structure gets a reference.
833 */
834void
835vhold(vp)
836	register struct vnode *vp;
837{
838
839	/*
840	 * If it is on the freelist and the hold count is currently
841	 * zero, move it to the hold list.
842	 */
843  	simple_lock(&vp->v_interlock);
844	if ((vp->v_bioflag & VBIOONFREELIST) &&
845	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
846		simple_lock(&vnode_free_list_slock);
847		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
848		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
849		simple_unlock(&vnode_free_list_slock);
850	}
851	vp->v_holdcnt++;
852	simple_unlock(&vp->v_interlock);
853}
854
855/*
856 * Remove any vnodes in the vnode table belonging to mount point mp.
857 *
858 * If MNT_NOFORCE is specified, there should not be any active ones,
859 * return error if any are found (nb: this is a user error, not a
860 * system error). If MNT_FORCE is specified, detach any active vnodes
861 * that are found.
862 */
863#ifdef DEBUG
864int busyprt = 0;	/* print out busy vnodes */
865struct ctldebug debug1 = { "busyprt", &busyprt };
866#endif
867
868int
869vflush(mp, skipvp, flags)
870	struct mount *mp;
871	struct vnode *skipvp;
872	int flags;
873{
874	struct proc *p = curproc;
875	register struct vnode *vp, *nvp;
876	int busy = 0;
877
878	simple_lock(&mntvnode_slock);
879loop:
880	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
881		if (vp->v_mount != mp)
882			goto loop;
883		nvp = vp->v_mntvnodes.le_next;
884		/*
885		 * Skip over a selected vnode.
886		 */
887		if (vp == skipvp)
888			continue;
889
890		simple_lock(&vp->v_interlock);
891		/*
892		 * Skip over a vnodes marked VSYSTEM.
893		 */
894		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
895			simple_unlock(&vp->v_interlock);
896			continue;
897		}
898		/*
899		 * If WRITECLOSE is set, only flush out regular file
900		 * vnodes open for writing.
901		 */
902		if ((flags & WRITECLOSE) &&
903		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
904			simple_unlock(&vp->v_interlock);
905			continue;
906		}
907		/*
908		 * With v_usecount == 0, all we need to do is clear
909		 * out the vnode data structures and we are done.
910		 */
911		if (vp->v_usecount == 0) {
912			simple_unlock(&mntvnode_slock);
913			vgonel(vp, p);
914			simple_lock(&mntvnode_slock);
915			continue;
916		}
917		/*
918		 * If FORCECLOSE is set, forcibly close the vnode.
919		 * For block or character devices, revert to an
920		 * anonymous device. For all other files, just kill them.
921		 */
922		if (flags & FORCECLOSE) {
923			simple_unlock(&mntvnode_slock);
924			if (vp->v_type != VBLK && vp->v_type != VCHR) {
925				vgonel(vp, p);
926			} else {
927				vclean(vp, 0, p);
928				vp->v_op = spec_vnodeop_p;
929				insmntque(vp, (struct mount *)0);
930			}
931			simple_lock(&mntvnode_slock);
932			continue;
933		}
934#ifdef DEBUG
935		if (busyprt)
936			vprint("vflush: busy vnode", vp);
937#endif
938		simple_unlock(&vp->v_interlock);
939		busy++;
940	}
941	simple_unlock(&mntvnode_slock);
942	if (busy)
943		return (EBUSY);
944	return (0);
945}
946
947/*
948 * Disassociate the underlying file system from a vnode.
949 * The vnode interlock is held on entry.
950 */
951void
952vclean(vp, flags, p)
953	register struct vnode *vp;
954	int flags;
955	struct proc *p;
956{
957	int active;
958
959	/*
960	 * Check to see if the vnode is in use.
961	 * If so we have to reference it before we clean it out
962	 * so that its count cannot fall to zero and generate a
963	 * race against ourselves to recycle it.
964	 */
965	if ((active = vp->v_usecount) != 0)
966		vp->v_usecount++;
967
968	/*
969	 * Prevent the vnode from being recycled or
970	 * brought into use while we clean it out.
971	 */
972	if (vp->v_flag & VXLOCK)
973		panic("vclean: deadlock");
974	vp->v_flag |= VXLOCK;
975	/*
976	 * Even if the count is zero, the VOP_INACTIVE routine may still
977	 * have the object locked while it cleans it out. The VOP_LOCK
978	 * ensures that the VOP_INACTIVE routine is done with its work.
979	 * For active vnodes, it ensures that no other activity can
980	 * occur while the underlying object is being cleaned out.
981	 */
982	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
983
984	/*
985	 * clean out any VM data associated with the vnode.
986	 */
987	uvm_vnp_terminate(vp);
988	/*
989	 * Clean out any buffers associated with the vnode.
990	 */
991	if (flags & DOCLOSE)
992		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
993	/*
994	 * If purging an active vnode, it must be closed and
995	 * deactivated before being reclaimed. Note that the
996	 * VOP_INACTIVE will unlock the vnode
997	 */
998	if (active) {
999		if (flags & DOCLOSE)
1000			VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
1001		VOP_INACTIVE(vp, p);
1002	} else {
1003		/*
1004		 * Any other processes trying to obtain this lock must first
1005		 * wait for VXLOCK to clear, then call the new lock operation.
1006		 */
1007		VOP_UNLOCK(vp, 0, p);
1008	}
1009
1010	/*
1011	 * Reclaim the vnode.
1012	 */
1013	if (VOP_RECLAIM(vp, p))
1014		panic("vclean: cannot reclaim");
1015	if (active) {
1016		simple_lock(&vp->v_interlock);
1017
1018		vp->v_usecount--;
1019		if (vp->v_usecount == 0) {
1020			if (vp->v_holdcnt > 0)
1021				panic("vclean: not clean");
1022			vputonfreelist(vp);
1023		}
1024
1025		simple_unlock(&vp->v_interlock);
1026	}
1027	cache_purge(vp);
1028	if (vp->v_vnlock) {
1029		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1030			vprint("vclean: lock not drained", vp);
1031		FREE(vp->v_vnlock, M_VNODE);
1032		vp->v_vnlock = NULL;
1033	}
1034
1035	/*
1036	 * Done with purge, notify sleepers of the grim news.
1037	 */
1038	vp->v_op = dead_vnodeop_p;
1039	simple_lock(&vp->v_selectinfo.vsi_lock);
1040	VN_KNOTE(vp, NOTE_REVOKE);
1041	simple_unlock(&vp->v_selectinfo.vsi_lock);
1042	vp->v_tag = VT_NON;
1043	vp->v_flag &= ~VXLOCK;
1044#ifdef DIAGNOSTIC
1045	vp->v_flag &= ~VLOCKSWORK;
1046#endif
1047	if (vp->v_flag & VXWANT) {
1048		vp->v_flag &= ~VXWANT;
1049		wakeup((caddr_t)vp);
1050	}
1051}
1052
1053
1054
1055/*
1056 * Recycle an unused vnode to the front of the free list.
1057 * Release the passed interlock if the vnode will be recycled.
1058 */
1059int
1060vrecycle(vp, inter_lkp, p)
1061	struct vnode *vp;
1062	struct simplelock *inter_lkp;
1063	struct proc *p;
1064{
1065
1066	simple_lock(&vp->v_interlock);
1067	if (vp->v_usecount == 0) {
1068		if (inter_lkp)
1069			simple_unlock(inter_lkp);
1070		vgonel(vp, p);
1071		return (1);
1072	}
1073	simple_unlock(&vp->v_interlock);
1074	return (0);
1075}
1076
1077
1078/*
1079 * Eliminate all activity associated with a vnode
1080 * in preparation for reuse.
1081 */
1082void
1083vgone(vp)
1084	register struct vnode *vp;
1085{
1086	struct proc *p = curproc;
1087
1088	simple_lock (&vp->v_interlock);
1089	vgonel(vp, p);
1090}
1091
1092/*
1093 * vgone, with the vp interlock held.
1094 */
1095void
1096vgonel(vp, p)
1097	struct vnode *vp;
1098	struct proc *p;
1099{
1100	register struct vnode *vq;
1101	struct vnode *vx;
1102
1103	/*
1104	 * If a vgone (or vclean) is already in progress,
1105	 * wait until it is done and return.
1106	 */
1107	if (vp->v_flag & VXLOCK) {
1108		vp->v_flag |= VXWANT;
1109		simple_unlock(&vp->v_interlock);
1110		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1111		return;
1112	}
1113	/*
1114	 * Clean out the filesystem specific data.
1115	 */
1116	vclean(vp, DOCLOSE, p);
1117	/*
1118	 * Delete from old mount point vnode list, if on one.
1119	 */
1120	if (vp->v_mount != NULL)
1121		insmntque(vp, (struct mount *)0);
1122	/*
1123	 * If special device, remove it from special device alias list
1124	 * if it is on one.
1125	 */
1126	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1127		simple_lock(&spechash_slock);
1128		if (*vp->v_hashchain == vp) {
1129			*vp->v_hashchain = vp->v_specnext;
1130		} else {
1131			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1132				if (vq->v_specnext != vp)
1133					continue;
1134				vq->v_specnext = vp->v_specnext;
1135				break;
1136			}
1137			if (vq == NULL)
1138				panic("missing bdev");
1139		}
1140		if (vp->v_flag & VALIASED) {
1141			vx = NULL;
1142			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1143				if (vq->v_rdev != vp->v_rdev ||
1144				    vq->v_type != vp->v_type)
1145					continue;
1146				if (vx)
1147					break;
1148				vx = vq;
1149			}
1150			if (vx == NULL)
1151				panic("missing alias");
1152			if (vq == NULL)
1153				vx->v_flag &= ~VALIASED;
1154			vp->v_flag &= ~VALIASED;
1155		}
1156		simple_unlock(&spechash_slock);
1157		FREE(vp->v_specinfo, M_VNODE);
1158		vp->v_specinfo = NULL;
1159	}
1160	/*
1161	 * If it is on the freelist and not already at the head,
1162	 * move it to the head of the list.
1163	 */
1164	vp->v_type = VBAD;
1165
1166	/*
1167	 * Move onto the free list, unless we were called from
1168	 * getnewvnode and we're not on any free list
1169	 */
1170	if (vp->v_usecount == 0 &&
1171	    (vp->v_bioflag & VBIOONFREELIST)) {
1172		int s;
1173
1174		simple_lock(&vnode_free_list_slock);
1175		s = splbio();
1176
1177		if (vp->v_holdcnt > 0)
1178			panic("vgonel: not clean");
1179
1180		if (TAILQ_FIRST(&vnode_free_list) != vp) {
1181			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1182			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1183		}
1184		splx(s);
1185		simple_unlock(&vnode_free_list_slock);
1186	}
1187}
1188
1189/*
1190 * Lookup a vnode by device number.
1191 */
1192int
1193vfinddev(dev, type, vpp)
1194	dev_t dev;
1195	enum vtype type;
1196	struct vnode **vpp;
1197{
1198	register struct vnode *vp;
1199	int rc =0;
1200
1201	simple_lock(&spechash_slock);
1202	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1203		if (dev != vp->v_rdev || type != vp->v_type)
1204			continue;
1205		*vpp = vp;
1206		rc = 1;
1207		break;
1208	}
1209	simple_unlock(&spechash_slock);
1210	return (rc);
1211}
1212
1213/*
1214 * Revoke all the vnodes corresponding to the specified minor number
1215 * range (endpoints inclusive) of the specified major.
1216 */
1217void
1218vdevgone(maj, minl, minh, type)
1219	int maj, minl, minh;
1220	enum vtype type;
1221{
1222	struct vnode *vp;
1223	int mn;
1224
1225	for (mn = minl; mn <= minh; mn++)
1226		if (vfinddev(makedev(maj, mn), type, &vp))
1227			VOP_REVOKE(vp, REVOKEALL);
1228}
1229
1230/*
1231 * Calculate the total number of references to a special device.
1232 */
1233int
1234vcount(vp)
1235	struct vnode *vp;
1236{
1237	struct vnode *vq, *vnext;
1238	int count;
1239
1240loop:
1241	if ((vp->v_flag & VALIASED) == 0)
1242		return (vp->v_usecount);
1243	simple_lock(&spechash_slock);
1244	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1245		vnext = vq->v_specnext;
1246		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1247			continue;
1248		/*
1249		 * Alias, but not in use, so flush it out.
1250		 */
1251		if (vq->v_usecount == 0 && vq != vp) {
1252			simple_unlock(&spechash_slock);
1253			vgone(vq);
1254			goto loop;
1255		}
1256		count += vq->v_usecount;
1257	}
1258	simple_unlock(&spechash_slock);
1259	return (count);
1260}
1261
1262/*
1263 * Print out a description of a vnode.
1264 */
1265static char *typename[] =
1266   { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1267
1268void
1269vprint(label, vp)
1270	char *label;
1271	register struct vnode *vp;
1272{
1273	char buf[64];
1274
1275	if (label != NULL)
1276		printf("%s: ", label);
1277	printf("type %s, usecount %u, writecount %u, holdcount %u,",
1278		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1279		vp->v_holdcnt);
1280	buf[0] = '\0';
1281	if (vp->v_flag & VROOT)
1282		strcat(buf, "|VROOT");
1283	if (vp->v_flag & VTEXT)
1284		strcat(buf, "|VTEXT");
1285	if (vp->v_flag & VSYSTEM)
1286		strcat(buf, "|VSYSTEM");
1287	if (vp->v_flag & VXLOCK)
1288		strcat(buf, "|VXLOCK");
1289	if (vp->v_flag & VXWANT)
1290		strcat(buf, "|VXWANT");
1291	if (vp->v_bioflag & VBIOWAIT)
1292		strcat(buf, "| VBIOWAIT");
1293	if (vp->v_flag & VALIASED)
1294		strcat(buf, "|VALIASED");
1295	if (buf[0] != '\0')
1296		printf(" flags (%s)", &buf[1]);
1297	if (vp->v_data == NULL) {
1298		printf("\n");
1299	} else {
1300		printf("\n\t");
1301		VOP_PRINT(vp);
1302	}
1303}
1304
1305#ifdef DEBUG
1306/*
1307 * List all of the locked vnodes in the system.
1308 * Called when debugging the kernel.
1309 */
1310void
1311printlockedvnodes()
1312{
1313	struct proc *p = curproc;
1314	register struct mount *mp, *nmp;
1315	register struct vnode *vp;
1316
1317	printf("Locked vnodes\n");
1318	simple_lock(&mountlist_slock);
1319	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1320	    mp = nmp) {
1321		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1322			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1323			continue;
1324		}
1325		for (vp = mp->mnt_vnodelist.lh_first; vp;
1326		    vp = vp->v_mntvnodes.le_next) {
1327			if (VOP_ISLOCKED(vp))
1328				vprint((char *)0, vp);
1329		}
1330		simple_lock(&mountlist_slock);
1331		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1332		vfs_unbusy(mp, p);
1333 	}
1334	simple_unlock(&mountlist_slock);
1335
1336}
1337#endif
1338
1339/*
1340 * Top level filesystem related information gathering.
1341 */
1342int
1343vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1344	int *name;
1345	u_int namelen;
1346	void *oldp;
1347	size_t *oldlenp;
1348	void *newp;
1349	size_t newlen;
1350	struct proc *p;
1351{
1352	struct vfsconf *vfsp;
1353
1354	/* all sysctl names at this level are at least name and field */
1355	if (namelen < 2)
1356		return (ENOTDIR);		/* overloaded */
1357	if (name[0] != VFS_GENERIC) {
1358		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1359			if (vfsp->vfc_typenum == name[0])
1360				break;
1361		if (vfsp == NULL)
1362			return (EOPNOTSUPP);
1363		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1364		    oldp, oldlenp, newp, newlen, p));
1365	}
1366	switch (name[1]) {
1367	case VFS_MAXTYPENUM:
1368		return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1369	case VFS_CONF:
1370		if (namelen < 3)
1371			return (ENOTDIR);	/* overloaded */
1372		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1373			if (vfsp->vfc_typenum == name[2])
1374				break;
1375		if (vfsp == NULL)
1376			return (EOPNOTSUPP);
1377		return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
1378		    sizeof(struct vfsconf)));
1379	}
1380	return (EOPNOTSUPP);
1381}
1382
1383
1384int kinfo_vdebug = 1;
1385int kinfo_vgetfailed;
1386#define KINFO_VNODESLOP	10
1387/*
1388 * Dump vnode list (via sysctl).
1389 * Copyout address of vnode followed by vnode.
1390 */
1391/* ARGSUSED */
1392int
1393sysctl_vnode(where, sizep, p)
1394	char *where;
1395	size_t *sizep;
1396	struct proc *p;
1397{
1398	register struct mount *mp, *nmp;
1399	struct vnode *vp, *nvp;
1400	register char *bp = where, *savebp;
1401	char *ewhere;
1402	int error;
1403
1404#define VPTRSZ	sizeof (struct vnode *)
1405#define VNODESZ	sizeof (struct vnode)
1406	if (where == NULL) {
1407		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1408		return (0);
1409	}
1410	ewhere = where + *sizep;
1411
1412	simple_lock(&mountlist_slock);
1413	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1414	    mp = nmp) {
1415		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1416			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1417			continue;
1418		}
1419		savebp = bp;
1420again:
1421		for (vp = mp->mnt_vnodelist.lh_first; vp != NULL;
1422		    vp = nvp) {
1423			/*
1424			 * Check that the vp is still associated with
1425			 * this filesystem.  RACE: could have been
1426			 * recycled onto the same filesystem.
1427			 */
1428			if (vp->v_mount != mp) {
1429				simple_unlock(&mntvnode_slock);
1430				if (kinfo_vdebug)
1431					printf("kinfo: vp changed\n");
1432				bp = savebp;
1433				goto again;
1434			}
1435			nvp = vp->v_mntvnodes.le_next;
1436			if (bp + VPTRSZ + VNODESZ > ewhere) {
1437				simple_unlock(&mntvnode_slock);
1438				*sizep = bp - where;
1439				return (ENOMEM);
1440			}
1441			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1442			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
1443				return (error);
1444			bp += VPTRSZ + VNODESZ;
1445			simple_lock(&mntvnode_slock);
1446		}
1447
1448		simple_unlock(&mntvnode_slock);
1449		simple_lock(&mountlist_slock);
1450		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1451		vfs_unbusy(mp, p);
1452	}
1453
1454	simple_unlock(&mountlist_slock);
1455
1456	*sizep = bp - where;
1457	return (0);
1458}
1459
1460/*
1461 * Check to see if a filesystem is mounted on a block device.
1462 */
1463int
1464vfs_mountedon(vp)
1465	register struct vnode *vp;
1466{
1467	register struct vnode *vq;
1468	int error = 0;
1469
1470 	if (vp->v_specmountpoint != NULL)
1471		return (EBUSY);
1472	if (vp->v_flag & VALIASED) {
1473		simple_lock(&spechash_slock);
1474		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1475			if (vq->v_rdev != vp->v_rdev ||
1476			    vq->v_type != vp->v_type)
1477				continue;
1478			if (vq->v_specmountpoint != NULL) {
1479				error = EBUSY;
1480				break;
1481			}
1482 		}
1483		simple_unlock(&spechash_slock);
1484	}
1485	return (error);
1486}
1487
1488/*
1489 * Build hash lists of net addresses and hang them off the mount point.
1490 * Called by ufs_mount() to set up the lists of export addresses.
1491 */
1492int
1493vfs_hang_addrlist(mp, nep, argp)
1494	struct mount *mp;
1495	struct netexport *nep;
1496	struct export_args *argp;
1497{
1498	register struct netcred *np;
1499	register struct radix_node_head *rnh;
1500	register int i;
1501	struct radix_node *rn;
1502	struct sockaddr *saddr, *smask = 0;
1503	struct domain *dom;
1504	int error;
1505
1506	if (argp->ex_addrlen == 0) {
1507		if (mp->mnt_flag & MNT_DEFEXPORTED)
1508			return (EPERM);
1509		np = &nep->ne_defexported;
1510		np->netc_exflags = argp->ex_flags;
1511		np->netc_anon = argp->ex_anon;
1512		np->netc_anon.cr_ref = 1;
1513		mp->mnt_flag |= MNT_DEFEXPORTED;
1514		return (0);
1515	}
1516	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1517	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1518	bzero((caddr_t)np, i);
1519	saddr = (struct sockaddr *)(np + 1);
1520	error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
1521	if (error)
1522		goto out;
1523	if (saddr->sa_len > argp->ex_addrlen)
1524		saddr->sa_len = argp->ex_addrlen;
1525	if (argp->ex_masklen) {
1526		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1527		error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
1528		if (error)
1529			goto out;
1530		if (smask->sa_len > argp->ex_masklen)
1531			smask->sa_len = argp->ex_masklen;
1532	}
1533	i = saddr->sa_family;
1534	if ((rnh = nep->ne_rtable[i]) == 0) {
1535		/*
1536		 * Seems silly to initialize every AF when most are not
1537		 * used, do so on demand here
1538		 */
1539		for (dom = domains; dom; dom = dom->dom_next)
1540			if (dom->dom_family == i && dom->dom_rtattach) {
1541				dom->dom_rtattach((void **)&nep->ne_rtable[i],
1542					dom->dom_rtoffset);
1543				break;
1544			}
1545		if ((rnh = nep->ne_rtable[i]) == 0) {
1546			error = ENOBUFS;
1547			goto out;
1548		}
1549	}
1550	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1551		np->netc_rnodes);
1552	if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1553		error = EPERM;
1554		goto out;
1555	}
1556	np->netc_exflags = argp->ex_flags;
1557	np->netc_anon = argp->ex_anon;
1558	np->netc_anon.cr_ref = 1;
1559	return (0);
1560out:
1561	free(np, M_NETADDR);
1562	return (error);
1563}
1564
1565/* ARGSUSED */
1566int
1567vfs_free_netcred(rn, w)
1568	struct radix_node *rn;
1569	void *w;
1570{
1571	register struct radix_node_head *rnh = (struct radix_node_head *)w;
1572
1573	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1574	free((caddr_t)rn, M_NETADDR);
1575	return (0);
1576}
1577
1578/*
1579 * Free the net address hash lists that are hanging off the mount points.
1580 */
1581void
1582vfs_free_addrlist(nep)
1583	struct netexport *nep;
1584{
1585	register int i;
1586	register struct radix_node_head *rnh;
1587
1588	for (i = 0; i <= AF_MAX; i++)
1589		if ((rnh = nep->ne_rtable[i]) != NULL) {
1590			(*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1591			free((caddr_t)rnh, M_RTABLE);
1592			nep->ne_rtable[i] = 0;
1593		}
1594}
1595
1596int
1597vfs_export(mp, nep, argp)
1598	struct mount *mp;
1599	struct netexport *nep;
1600	struct export_args *argp;
1601{
1602	int error;
1603
1604	if (argp->ex_flags & MNT_DELEXPORT) {
1605		vfs_free_addrlist(nep);
1606		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1607	}
1608	if (argp->ex_flags & MNT_EXPORTED) {
1609		if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1610			return (error);
1611		mp->mnt_flag |= MNT_EXPORTED;
1612	}
1613	return (0);
1614}
1615
1616struct netcred *
1617vfs_export_lookup(mp, nep, nam)
1618	register struct mount *mp;
1619	struct netexport *nep;
1620	struct mbuf *nam;
1621{
1622	register struct netcred *np;
1623	register struct radix_node_head *rnh;
1624	struct sockaddr *saddr;
1625
1626	np = NULL;
1627	if (mp->mnt_flag & MNT_EXPORTED) {
1628		/*
1629		 * Lookup in the export list first.
1630		 */
1631		if (nam != NULL) {
1632			saddr = mtod(nam, struct sockaddr *);
1633			rnh = nep->ne_rtable[saddr->sa_family];
1634			if (rnh != NULL) {
1635				np = (struct netcred *)
1636					(*rnh->rnh_matchaddr)((caddr_t)saddr,
1637					    rnh);
1638				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1639					np = NULL;
1640			}
1641		}
1642		/*
1643		 * If no address match, use the default if it exists.
1644		 */
1645		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1646			np = &nep->ne_defexported;
1647	}
1648	return (np);
1649}
1650
1651/*
1652 * Do the usual access checking.
1653 * file_mode, uid and gid are from the vnode in question,
1654 * while acc_mode and cred are from the VOP_ACCESS parameter list
1655 */
1656int
1657vaccess(file_mode, uid, gid, acc_mode, cred)
1658	mode_t file_mode;
1659	uid_t uid;
1660	gid_t gid;
1661	mode_t acc_mode;
1662	struct ucred *cred;
1663{
1664	mode_t mask;
1665
1666	/* User id 0 always gets access. */
1667	if (cred->cr_uid == 0)
1668		return 0;
1669
1670	mask = 0;
1671
1672	/* Otherwise, check the owner. */
1673	if (cred->cr_uid == uid) {
1674		if (acc_mode & VEXEC)
1675			mask |= S_IXUSR;
1676		if (acc_mode & VREAD)
1677			mask |= S_IRUSR;
1678		if (acc_mode & VWRITE)
1679			mask |= S_IWUSR;
1680		return (file_mode & mask) == mask ? 0 : EACCES;
1681	}
1682
1683	/* Otherwise, check the groups. */
1684	if (cred->cr_gid == gid || groupmember(gid, cred)) {
1685		if (acc_mode & VEXEC)
1686			mask |= S_IXGRP;
1687		if (acc_mode & VREAD)
1688			mask |= S_IRGRP;
1689		if (acc_mode & VWRITE)
1690			mask |= S_IWGRP;
1691		return (file_mode & mask) == mask ? 0 : EACCES;
1692	}
1693
1694	/* Otherwise, check everyone else. */
1695	if (acc_mode & VEXEC)
1696		mask |= S_IXOTH;
1697	if (acc_mode & VREAD)
1698		mask |= S_IROTH;
1699	if (acc_mode & VWRITE)
1700		mask |= S_IWOTH;
1701	return (file_mode & mask) == mask ? 0 : EACCES;
1702}
1703
1704/*
1705 * Unmount all file systems.
1706 * We traverse the list in reverse order under the assumption that doing so
1707 * will avoid needing to worry about dependencies.
1708 */
1709void
1710vfs_unmountall()
1711{
1712	register struct mount *mp, *nmp;
1713	int allerror, error, again = 1;
1714
1715 retry:
1716	allerror = 0;
1717	for (mp = CIRCLEQ_LAST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1718	    mp = nmp) {
1719		nmp = CIRCLEQ_PREV(mp, mnt_list);
1720		if ((error = dounmount(mp, MNT_FORCE, curproc)) != 0) {
1721			printf("unmount of %s failed with error %d\n",
1722			    mp->mnt_stat.f_mntonname, error);
1723			allerror = 1;
1724		}
1725	}
1726
1727	if (allerror) {
1728		printf("WARNING: some file systems would not unmount\n");
1729		if (again) {
1730			printf("retrying\n");
1731			again = 0;
1732			goto retry;
1733		}
1734	}
1735}
1736
1737/*
1738 * Sync and unmount file systems before shutting down.
1739 */
1740void
1741vfs_shutdown()
1742{
1743	/* XXX Should suspend scheduling. */
1744	(void) spl0();
1745
1746	printf("syncing disks... ");
1747
1748	if (panicstr == 0) {
1749		/* Sync before unmount, in case we hang on something. */
1750		sys_sync(&proc0, (void *)0, (register_t *)0);
1751
1752		/* Unmount file systems. */
1753		vfs_unmountall();
1754	}
1755
1756	if (vfs_syncwait(1))
1757		printf("giving up\n");
1758	else
1759		printf("done\n");
1760}
1761
1762/*
1763 * perform sync() operation and wait for buffers to flush.
1764 * assumtions: called w/ scheduler disabled and physical io enabled
1765 * for now called at spl0() XXX
1766 */
1767int
1768vfs_syncwait(verbose)
1769	int verbose;
1770{
1771	register struct buf *bp;
1772	int iter, nbusy, dcount, s;
1773	struct proc *p;
1774
1775	p = curproc? curproc : &proc0;
1776	sys_sync(p, (void *)0, (register_t *)0);
1777
1778	/* Wait for sync to finish. */
1779	dcount = 10000;
1780	for (iter = 0; iter < 20; iter++) {
1781		nbusy = 0;
1782		for (bp = &buf[nbuf]; --bp >= buf; ) {
1783			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1784				nbusy++;
1785			/*
1786			 * With soft updates, some buffers that are
1787			 * written will be remarked as dirty until other
1788			 * buffers are written.
1789			 */
1790			if (bp->b_flags & B_DELWRI) {
1791				s = splbio();
1792				bremfree(bp);
1793				bp->b_flags |= B_BUSY;
1794				splx(s);
1795				nbusy++;
1796				bawrite(bp);
1797				if (dcount-- <= 0) {
1798					if (verbose)
1799						printf("softdep ");
1800					return 1;
1801				}
1802			}
1803		}
1804		if (nbusy == 0)
1805			break;
1806		if (verbose)
1807			printf("%d ", nbusy);
1808		DELAY(40000 * iter);
1809	}
1810
1811	return nbusy;
1812}
1813
1814/*
1815 * posix file system related system variables.
1816 */
1817int
1818fs_posix_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1819	int *name;
1820	u_int namelen;
1821	void *oldp;
1822	size_t *oldlenp;
1823	void *newp;
1824	size_t newlen;
1825	struct proc *p;
1826{
1827	/* all sysctl names at this level are terminal */
1828	if (namelen != 1)
1829		return (ENOTDIR);
1830
1831	switch (name[0]) {
1832	case FS_POSIX_SETUID:
1833		if (newp && securelevel > 0)
1834			return (EPERM);
1835		return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear));
1836	default:
1837		return (EOPNOTSUPP);
1838	}
1839	/* NOTREACHED */
1840}
1841
1842/*
1843 * file system related system variables.
1844 */
1845int
1846fs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1847	int *name;
1848	u_int namelen;
1849	void *oldp;
1850	size_t *oldlenp;
1851	void *newp;
1852	size_t newlen;
1853	struct proc *p;
1854{
1855	sysctlfn *fn;
1856
1857	switch (name[0]) {
1858	case FS_POSIX:
1859		fn = fs_posix_sysctl;
1860		break;
1861	default:
1862		return (EOPNOTSUPP);
1863	}
1864	return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p);
1865}
1866
1867
1868/*
1869 * Routines dealing with vnodes and buffers
1870 */
1871
1872/*
1873 * Wait for all outstanding I/Os to complete
1874 *
1875 * Manipulates v_numoutput. Must be called at splbio()
1876 */
1877int
1878vwaitforio(vp, slpflag, wmesg, timeo)
1879	struct vnode *vp;
1880	int slpflag, timeo;
1881	char *wmesg;
1882{
1883	int error = 0;
1884
1885	while (vp->v_numoutput) {
1886		vp->v_bioflag |= VBIOWAIT;
1887		error = tsleep((caddr_t)&vp->v_numoutput,
1888		    slpflag | (PRIBIO + 1), wmesg, timeo);
1889		if (error)
1890			break;
1891	}
1892
1893	return (error);
1894}
1895
1896
1897/*
1898 * Update outstanding I/O count and do wakeup if requested.
1899 *
1900 * Manipulates v_numoutput. Must be called at splbio()
1901 */
1902void
1903vwakeup(vp)
1904	struct vnode *vp;
1905{
1906	if (vp != NULL) {
1907		if (vp->v_numoutput-- == 0)
1908			panic("vwakeup: neg numoutput");
1909		if ((vp->v_bioflag & VBIOWAIT) && vp->v_numoutput == 0) {
1910			vp->v_bioflag &= ~VBIOWAIT;
1911			wakeup((caddr_t)&vp->v_numoutput);
1912		}
1913	}
1914}
1915
1916/*
1917 * Flush out and invalidate all buffers associated with a vnode.
1918 * Called with the underlying object locked.
1919 */
1920int
1921vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
1922	register struct vnode *vp;
1923	int flags;
1924	struct ucred *cred;
1925	struct proc *p;
1926	int slpflag, slptimeo;
1927{
1928	register struct buf *bp;
1929	struct buf *nbp, *blist;
1930	int s, error;
1931
1932	if (flags & V_SAVE) {
1933		s = splbio();
1934		vwaitforio(vp, 0, "vinvalbuf", 0);
1935		if (vp->v_dirtyblkhd.lh_first != NULL) {
1936			splx(s);
1937			if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
1938				return (error);
1939			s = splbio();
1940			if (vp->v_numoutput > 0 ||
1941			    vp->v_dirtyblkhd.lh_first != NULL)
1942				panic("vinvalbuf: dirty bufs");
1943		}
1944		splx(s);
1945	}
1946loop:
1947	s = splbio();
1948	for (;;) {
1949		if ((blist = vp->v_cleanblkhd.lh_first) &&
1950		    (flags & V_SAVEMETA))
1951			while (blist && blist->b_lblkno < 0)
1952				blist = blist->b_vnbufs.le_next;
1953		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
1954		    (flags & V_SAVEMETA))
1955			while (blist && blist->b_lblkno < 0)
1956				blist = blist->b_vnbufs.le_next;
1957		if (!blist)
1958			break;
1959
1960		for (bp = blist; bp; bp = nbp) {
1961			nbp = bp->b_vnbufs.le_next;
1962			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
1963				continue;
1964			if (bp->b_flags & B_BUSY) {
1965				bp->b_flags |= B_WANTED;
1966				error = tsleep((caddr_t)bp,
1967					slpflag | (PRIBIO + 1), "vinvalbuf",
1968					slptimeo);
1969				if (error) {
1970					splx(s);
1971					return (error);
1972				}
1973				break;
1974			}
1975			bp->b_flags |= B_BUSY | B_VFLUSH;
1976			/*
1977			 * XXX Since there are no node locks for NFS, I believe
1978			 * there is a slight chance that a delayed write will
1979			 * occur while sleeping just above, so check for it.
1980			 */
1981			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
1982				splx(s);
1983				(void) VOP_BWRITE(bp);
1984				goto loop;
1985			}
1986			bp->b_flags |= B_INVAL;
1987			brelse(bp);
1988		}
1989	}
1990	if (!(flags & V_SAVEMETA) &&
1991	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
1992		panic("vinvalbuf: flush failed");
1993	splx(s);
1994	return (0);
1995}
1996
1997void
1998vflushbuf(vp, sync)
1999	register struct vnode *vp;
2000	int sync;
2001{
2002	register struct buf *bp, *nbp;
2003	int s;
2004
2005loop:
2006	s = splbio();
2007	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
2008		nbp = bp->b_vnbufs.le_next;
2009		if ((bp->b_flags & B_BUSY))
2010			continue;
2011		if ((bp->b_flags & B_DELWRI) == 0)
2012			panic("vflushbuf: not dirty");
2013		bp->b_flags |= B_BUSY | B_VFLUSH;
2014		splx(s);
2015		/*
2016		 * Wait for I/O associated with indirect blocks to complete,
2017		 * since there is no way to quickly wait for them below.
2018		 */
2019		if (bp->b_vp == vp || sync == 0)
2020			(void) bawrite(bp);
2021		else
2022			(void) bwrite(bp);
2023		goto loop;
2024	}
2025	if (sync == 0) {
2026		splx(s);
2027		return;
2028	}
2029	vwaitforio(vp, 0, "vflushbuf", 0);
2030	if (vp->v_dirtyblkhd.lh_first != NULL) {
2031		splx(s);
2032		vprint("vflushbuf: dirty", vp);
2033		goto loop;
2034	}
2035	splx(s);
2036}
2037
2038/*
2039 * Associate a buffer with a vnode.
2040 *
2041 * Manipulates buffer vnode queues. Must be called at splbio().
2042 */
2043void
2044bgetvp(vp, bp)
2045	register struct vnode *vp;
2046	register struct buf *bp;
2047{
2048
2049	if (bp->b_vp)
2050		panic("bgetvp: not free");
2051	vhold(vp);
2052	bp->b_vp = vp;
2053	if (vp->v_type == VBLK || vp->v_type == VCHR)
2054		bp->b_dev = vp->v_rdev;
2055	else
2056		bp->b_dev = NODEV;
2057	/*
2058	 * Insert onto list for new vnode.
2059	 */
2060	bufinsvn(bp, &vp->v_cleanblkhd);
2061}
2062
2063/*
2064 * Disassociate a buffer from a vnode.
2065 *
2066 * Manipulates vnode buffer queues. Must be called at splbio().
2067 */
2068void
2069brelvp(bp)
2070	register struct buf *bp;
2071{
2072	struct vnode *vp;
2073
2074	if ((vp = bp->b_vp) == (struct vnode *) 0)
2075		panic("brelvp: NULL");
2076	/*
2077	 * Delete from old vnode list, if on one.
2078	 */
2079	if (bp->b_vnbufs.le_next != NOLIST)
2080		bufremvn(bp);
2081	if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2082	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2083		vp->v_bioflag &= ~VBIOONSYNCLIST;
2084		LIST_REMOVE(vp, v_synclist);
2085	}
2086	bp->b_vp = (struct vnode *) 0;
2087
2088	simple_lock(&vp->v_interlock);
2089#ifdef DIAGNOSTIC
2090	if (vp->v_holdcnt == 0)
2091		panic("brelvp: holdcnt");
2092#endif
2093	vp->v_holdcnt--;
2094
2095	/*
2096	 * If it is on the holdlist and the hold count drops to
2097	 * zero, move it to the free list.
2098	 */
2099	if ((vp->v_bioflag & VBIOONFREELIST) &&
2100	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
2101		simple_lock(&vnode_free_list_slock);
2102		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
2103		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2104		simple_unlock(&vnode_free_list_slock);
2105	}
2106	simple_unlock(&vp->v_interlock);
2107}
2108
2109/*
2110 * Replaces the current vnode associated with the buffer, if any
2111 * with a new vnode.
2112 *
2113 * If an output I/O is pending on the buffer, the old vnode is
2114 * I/O count is adjusted.
2115 *
2116 * Ignores vnode buffer queues. Must be called at splbio().
2117 */
2118void
2119buf_replacevnode(bp, newvp)
2120	struct buf *bp;
2121	struct vnode *newvp;
2122{
2123	struct vnode *oldvp = bp->b_vp;
2124
2125	if (oldvp)
2126		brelvp(bp);
2127
2128	if ((bp->b_flags & (B_READ | B_DONE)) == 0) {
2129		newvp->v_numoutput++;	/* put it on swapdev */
2130		vwakeup(oldvp);
2131	}
2132
2133	bgetvp(newvp, bp);
2134	bufremvn(bp);
2135}
2136
2137/*
2138 * Used to assign buffers to the appropriate clean or dirty list on
2139 * the vnode and to add newly dirty vnodes to the appropriate
2140 * filesystem syncer list.
2141 *
2142 * Manipulates vnode buffer queues. Must be called at splbio().
2143 */
2144void
2145reassignbuf(bp)
2146	register struct buf *bp;
2147{
2148	struct buflists *listheadp;
2149	int delay;
2150	struct vnode *vp = bp->b_vp;
2151
2152	if (vp == NULL) {
2153		printf("reassignbuf: NULL");
2154		return;
2155	}
2156	/*
2157	 * Delete from old vnode list, if on one.
2158	 */
2159	if (bp->b_vnbufs.le_next != NOLIST)
2160		bufremvn(bp);
2161	/*
2162	 * If dirty, put on list of dirty buffers;
2163	 * otherwise insert onto list of clean buffers.
2164	 */
2165	if ((bp->b_flags & B_DELWRI) == 0) {
2166		listheadp = &vp->v_cleanblkhd;
2167		if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2168		    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2169			vp->v_bioflag &= ~VBIOONSYNCLIST;
2170			LIST_REMOVE(vp, v_synclist);
2171		}
2172	} else {
2173		listheadp = &vp->v_dirtyblkhd;
2174		if ((vp->v_bioflag & VBIOONSYNCLIST) == 0) {
2175			switch (vp->v_type) {
2176			case VDIR:
2177				delay = syncdelay / 2;
2178				break;
2179			case VBLK:
2180				if (vp->v_specmountpoint != NULL) {
2181					delay = syncdelay / 3;
2182					break;
2183				}
2184				/* fall through */
2185			default:
2186				delay = syncdelay;
2187			}
2188			vn_syncer_add_to_worklist(vp, delay);
2189		}
2190	}
2191	bufinsvn(bp, listheadp);
2192}
2193
2194int
2195vfs_register(vfs)
2196	struct vfsconf *vfs;
2197{
2198	struct vfsconf *vfsp;
2199	struct vfsconf **vfspp;
2200
2201#ifdef DIAGNOSTIC
2202	/* Paranoia? */
2203	if (vfs->vfc_refcount != 0)
2204		printf("vfs_register called with vfc_refcount > 0\n");
2205#endif
2206
2207	/* Check if filesystem already known */
2208	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2209	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next)
2210		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2211			return (EEXIST);
2212
2213	if (vfs->vfc_typenum > maxvfsconf)
2214		maxvfsconf = vfs->vfc_typenum;
2215
2216	vfs->vfc_next = NULL;
2217
2218	/* Add to the end of the list */
2219	*vfspp = vfs;
2220
2221	/* Call vfs_init() */
2222	if (vfs->vfc_vfsops->vfs_init)
2223		(*(vfs->vfc_vfsops->vfs_init))(vfs);
2224
2225	return 0;
2226}
2227
2228int
2229vfs_unregister(vfs)
2230	struct vfsconf *vfs;
2231{
2232	struct vfsconf *vfsp;
2233	struct vfsconf **vfspp;
2234	int maxtypenum;
2235
2236	/* Find our vfsconf struct */
2237	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2238	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next) {
2239		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2240			break;
2241	}
2242
2243	if (!vfsp)			/* Not found */
2244		return (ENOENT);
2245
2246	if (vfsp->vfc_refcount)		/* In use */
2247		return (EBUSY);
2248
2249	/* Remove from list and free */
2250	*vfspp = vfsp->vfc_next;
2251
2252	maxtypenum = 0;
2253
2254	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2255		if (vfsp->vfc_typenum > maxtypenum)
2256			maxtypenum = vfsp->vfc_typenum;
2257
2258	maxvfsconf = maxtypenum;
2259	return 0;
2260}
2261
2262/*
2263 * Check if vnode represents a disk device
2264 */
2265int
2266vn_isdisk(vp, errp)
2267	struct vnode *vp;
2268	int *errp;
2269{
2270	if (vp->v_type != VBLK && vp->v_type != VCHR)
2271		return (0);
2272
2273	return (1);
2274}
2275