vfs_subr.c revision 1.6
1/*	$NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $	*/
2
3/*
4 * Copyright (c) 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 *    must display the following acknowledgement:
22 *	This product includes software developed by the University of
23 *	California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 *    may be used to endorse or promote products derived from this software
26 *    without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
41 */
42
43/*
44 * External virtual filesystem routines
45 */
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/proc.h>
50#include <sys/mount.h>
51#include <sys/time.h>
52#include <sys/fcntl.h>
53#include <sys/vnode.h>
54#include <sys/stat.h>
55#include <sys/namei.h>
56#include <sys/ucred.h>
57#include <sys/buf.h>
58#include <sys/errno.h>
59#include <sys/malloc.h>
60#include <sys/domain.h>
61#include <sys/mbuf.h>
62#include <sys/syscallargs.h>
63
64#include <vm/vm.h>
65#include <sys/sysctl.h>
66
67#include <miscfs/specfs/specdev.h>
68
69enum vtype iftovt_tab[16] = {
70	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
71	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
72};
73int	vttoif_tab[9] = {
74	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
75	S_IFSOCK, S_IFIFO, S_IFMT,
76};
77
78int doforce = 1;		/* 1 => permit forcible unmounting */
79int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
80int suid_clear = 1;		/* 1 => clear SUID / SGID on owner change */
81
82/*
83 * Insq/Remq for the vnode usage lists.
84 */
85#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
86#define	bufremvn(bp) {							\
87	LIST_REMOVE(bp, b_vnbufs);					\
88	(bp)->b_vnbufs.le_next = NOLIST;				\
89}
90TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
91struct mntlist mountlist;			/* mounted filesystem list */
92
93int vfs_lock __P((struct mount *));
94void vfs_unlock __P((struct mount *));
95struct mount *getvfs __P((fsid_t *));
96long makefstype __P((char *));
97void vattr_null __P((struct vattr *));
98int getnewvnode __P((enum vtagtype, struct mount *, int (**)(void *),
99		     struct vnode **));
100void insmntque __P((struct vnode *, struct mount *));
101int vinvalbuf __P((struct vnode *, int, struct ucred *, struct proc *, int,
102		   int));
103void vflushbuf __P((struct vnode *, int));
104void brelvp __P((struct buf *));
105int bdevvp __P((dev_t, struct vnode **));
106int cdevvp __P((dev_t, struct vnode **));
107int getdevvp __P((dev_t, struct vnode **, enum vtype));
108struct vnode *checkalias __P((struct vnode *, dev_t, struct mount *));
109int vget __P((struct vnode *, int));
110void vref __P((struct vnode *));
111void vput __P((struct vnode *));
112void vrele __P((struct vnode *));
113void vhold __P((struct vnode *));
114void holdrele __P((struct vnode *));
115int vflush __P((struct mount *, struct vnode *, int));
116void vgoneall __P((struct vnode *));
117void vgone __P((struct vnode *));
118int vcount __P((struct vnode *));
119void vprint __P((char *, struct vnode *));
120int vfs_mountedon __P((struct vnode *));
121int vfs_export __P((struct mount *, struct netexport *, struct export_args *));
122struct netcred *vfs_export_lookup __P((struct mount *, struct netexport *,
123				       struct mbuf *));
124int vaccess __P((mode_t, uid_t, gid_t, mode_t, struct ucred *));
125void vfs_unmountall __P((void));
126void vfs_shutdown __P((void));
127
128static int vfs_hang_addrlist __P((struct mount *, struct netexport *,
129				  struct export_args *));
130static int vfs_free_netcred __P((struct radix_node *, void *));
131static void vfs_free_addrlist __P((struct netexport *));
132
133#ifdef DEBUG
134void printlockedvnodes __P((void));
135#endif
136
137/*
138 * Initialize the vnode management data structures.
139 */
140void
141vntblinit()
142{
143
144	TAILQ_INIT(&vnode_free_list);
145	CIRCLEQ_INIT(&mountlist);
146}
147
148/*
149 * Lock a filesystem.
150 * Used to prevent access to it while mounting and unmounting.
151 */
152int
153vfs_lock(mp)
154	register struct mount *mp;
155{
156
157	while (mp->mnt_flag & MNT_MLOCK) {
158		mp->mnt_flag |= MNT_MWAIT;
159		tsleep((caddr_t)mp, PVFS, "vfslock", 0);
160	}
161	mp->mnt_flag |= MNT_MLOCK;
162	return (0);
163}
164
165/*
166 * Unlock a locked filesystem.
167 * Panic if filesystem is not locked.
168 */
169void
170vfs_unlock(mp)
171	register struct mount *mp;
172{
173
174	if ((mp->mnt_flag & MNT_MLOCK) == 0)
175		panic("vfs_unlock: not locked");
176	mp->mnt_flag &= ~MNT_MLOCK;
177	if (mp->mnt_flag & MNT_MWAIT) {
178		mp->mnt_flag &= ~MNT_MWAIT;
179		wakeup((caddr_t)mp);
180	}
181}
182
183/*
184 * Mark a mount point as busy.
185 * Used to synchronize access and to delay unmounting.
186 */
187int
188vfs_busy(mp)
189	register struct mount *mp;
190{
191
192	while(mp->mnt_flag & MNT_MPBUSY) {
193		mp->mnt_flag |= MNT_MPWANT;
194		tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0);
195	}
196	if (mp->mnt_flag & MNT_UNMOUNT)
197		return (1);
198	mp->mnt_flag |= MNT_MPBUSY;
199	return (0);
200}
201
202/*
203 * Free a busy filesystem.
204 * Panic if filesystem is not busy.
205 */
206void
207vfs_unbusy(mp)
208	register struct mount *mp;
209{
210
211	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
212		panic("vfs_unbusy: not busy");
213	mp->mnt_flag &= ~MNT_MPBUSY;
214	if (mp->mnt_flag & MNT_MPWANT) {
215		mp->mnt_flag &= ~MNT_MPWANT;
216		wakeup((caddr_t)&mp->mnt_flag);
217	}
218}
219
220/*
221 * Lookup a mount point by filesystem identifier.
222 */
223struct mount *
224getvfs(fsid)
225	fsid_t *fsid;
226{
227	register struct mount *mp;
228
229	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
230	     mp = mp->mnt_list.cqe_next)
231		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
232		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
233			return (mp);
234	return ((struct mount *)0);
235}
236
237/*
238 * Get a new unique fsid
239 */
240void
241getnewfsid(mp, mtype)
242	struct mount *mp;
243	int mtype;
244{
245	static u_short xxxfs_mntid;
246
247	fsid_t tfsid;
248
249	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + 11, 0);	/* XXX */
250	mp->mnt_stat.f_fsid.val[1] = mtype;
251	if (xxxfs_mntid == 0)
252		++xxxfs_mntid;
253	tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid);
254	tfsid.val[1] = mtype;
255	if (mountlist.cqh_first != (void *)&mountlist) {
256		while (getvfs(&tfsid)) {
257			tfsid.val[0]++;
258			xxxfs_mntid++;
259		}
260	}
261	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
262}
263
264/*
265 * Make a 'unique' number from a mount type name.
266 */
267long
268makefstype(type)
269	char *type;
270{
271	long rv;
272
273	for (rv = 0; *type; type++) {
274		rv <<= 2;
275		rv ^= *type;
276	}
277	return rv;
278}
279
280/*
281 * Set vnode attributes to VNOVAL
282 */
283void
284vattr_null(vap)
285	register struct vattr *vap;
286{
287
288	vap->va_type = VNON;
289	/* XXX These next two used to be one line, but for a GCC bug. */
290	vap->va_size = VNOVAL;
291	vap->va_bytes = VNOVAL;
292	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
293		vap->va_fsid = vap->va_fileid =
294		vap->va_blocksize = vap->va_rdev =
295		vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
296		vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
297		vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
298		vap->va_flags = vap->va_gen = VNOVAL;
299	vap->va_vaflags = 0;
300}
301
302/*
303 * Routines having to do with the management of the vnode table.
304 */
305extern int (**dead_vnodeop_p) __P((void *));
306long numvnodes;
307
308/*
309 * Return the next vnode from the free list.
310 */
311int
312getnewvnode(tag, mp, vops, vpp)
313	enum vtagtype tag;
314	struct mount *mp;
315	int (**vops) __P((void *));
316	struct vnode **vpp;
317{
318	register struct vnode *vp;
319#ifdef DIAGNOSTIC
320	int s;
321#endif
322
323	if ((vnode_free_list.tqh_first == NULL &&
324	     numvnodes < 2 * desiredvnodes) ||
325	    numvnodes < desiredvnodes) {
326		vp = (struct vnode *)malloc((u_long)sizeof *vp,
327		    M_VNODE, M_WAITOK);
328		bzero((char *)vp, sizeof *vp);
329		numvnodes++;
330	} else {
331		if ((vp = vnode_free_list.tqh_first) == NULL) {
332			tablefull("vnode");
333			*vpp = 0;
334			return (ENFILE);
335		}
336		if (vp->v_usecount) {
337			vprint("free vnode", vp);
338			panic("free vnode isn't");
339		}
340		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
341		/* see comment on why 0xdeadb is set at end of vgone (below) */
342		vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
343		vp->v_lease = NULL;
344		if (vp->v_type != VBAD)
345			vgone(vp);
346#ifdef DIAGNOSTIC
347		if (vp->v_data) {
348			vprint("cleaned vnode", vp);
349			panic("cleaned vnode isn't");
350		}
351		s = splbio();
352		if (vp->v_numoutput)
353			panic("Clean vnode has pending I/O's");
354		splx(s);
355#endif
356		vp->v_flag = 0;
357		vp->v_lastr = 0;
358		vp->v_ralen = 0;
359		vp->v_maxra = 0;
360		vp->v_lastw = 0;
361		vp->v_lasta = 0;
362		vp->v_cstart = 0;
363		vp->v_clen = 0;
364		vp->v_socket = 0;
365	}
366	vp->v_type = VNON;
367	cache_purge(vp);
368	vp->v_tag = tag;
369	vp->v_op = vops;
370	insmntque(vp, mp);
371	*vpp = vp;
372	vp->v_usecount = 1;
373	vp->v_data = 0;
374	return (0);
375}
376
377/*
378 * Move a vnode from one mount queue to another.
379 */
380void
381insmntque(vp, mp)
382	register struct vnode *vp;
383	register struct mount *mp;
384{
385
386	/*
387	 * Delete from old mount point vnode list, if on one.
388	 */
389	if (vp->v_mount != NULL)
390		LIST_REMOVE(vp, v_mntvnodes);
391	/*
392	 * Insert into list of vnodes for the new mount point, if available.
393	 */
394	if ((vp->v_mount = mp) == NULL)
395		return;
396	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
397}
398
399/*
400 * Update outstanding I/O count and do wakeup if requested.
401 */
402void
403vwakeup(bp)
404	register struct buf *bp;
405{
406	register struct vnode *vp;
407
408	bp->b_flags &= ~B_WRITEINPROG;
409	if ((vp = bp->b_vp) != NULL) {
410		if (--vp->v_numoutput < 0)
411			panic("vwakeup: neg numoutput");
412		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
413			vp->v_flag &= ~VBWAIT;
414			wakeup((caddr_t)&vp->v_numoutput);
415		}
416	}
417}
418
419/*
420 * Flush out and invalidate all buffers associated with a vnode.
421 * Called with the underlying object locked.
422 */
423int
424vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
425	register struct vnode *vp;
426	int flags;
427	struct ucred *cred;
428	struct proc *p;
429	int slpflag, slptimeo;
430{
431	register struct buf *bp;
432	struct buf *nbp, *blist;
433	int s, error;
434
435	if (flags & V_SAVE) {
436		if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
437			return (error);
438		if (vp->v_dirtyblkhd.lh_first != NULL)
439			panic("vinvalbuf: dirty bufs");
440	}
441	for (;;) {
442		if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
443			while (blist && blist->b_lblkno < 0)
444				blist = blist->b_vnbufs.le_next;
445		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
446		    (flags & V_SAVEMETA))
447			while (blist && blist->b_lblkno < 0)
448				blist = blist->b_vnbufs.le_next;
449		if (!blist)
450			break;
451
452		for (bp = blist; bp; bp = nbp) {
453			nbp = bp->b_vnbufs.le_next;
454			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
455				continue;
456			s = splbio();
457			if (bp->b_flags & B_BUSY) {
458				bp->b_flags |= B_WANTED;
459				error = tsleep((caddr_t)bp,
460					slpflag | (PRIBIO + 1), "vinvalbuf",
461					slptimeo);
462				splx(s);
463				if (error)
464					return (error);
465				break;
466			}
467			bremfree(bp);
468			bp->b_flags |= B_BUSY;
469			splx(s);
470			/*
471			 * XXX Since there are no node locks for NFS, I believe
472			 * there is a slight chance that a delayed write will
473			 * occur while sleeping just above, so check for it.
474			 */
475			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
476				(void) VOP_BWRITE(bp);
477				break;
478			}
479			bp->b_flags |= B_INVAL;
480			brelse(bp);
481		}
482	}
483	if (!(flags & V_SAVEMETA) &&
484	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
485		panic("vinvalbuf: flush failed");
486	return (0);
487}
488
489void
490vflushbuf(vp, sync)
491	register struct vnode *vp;
492	int sync;
493{
494	register struct buf *bp, *nbp;
495	int s;
496
497loop:
498	s = splbio();
499	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
500		nbp = bp->b_vnbufs.le_next;
501		if ((bp->b_flags & B_BUSY))
502			continue;
503		if ((bp->b_flags & B_DELWRI) == 0)
504			panic("vflushbuf: not dirty");
505		bremfree(bp);
506		bp->b_flags |= B_BUSY;
507		splx(s);
508		/*
509		 * Wait for I/O associated with indirect blocks to complete,
510		 * since there is no way to quickly wait for them below.
511		 */
512		if (bp->b_vp == vp || sync == 0)
513			(void) bawrite(bp);
514		else
515			(void) bwrite(bp);
516		goto loop;
517	}
518	if (sync == 0) {
519		splx(s);
520		return;
521	}
522	while (vp->v_numoutput) {
523		vp->v_flag |= VBWAIT;
524		tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0);
525	}
526	splx(s);
527	if (vp->v_dirtyblkhd.lh_first != NULL) {
528		vprint("vflushbuf: dirty", vp);
529		goto loop;
530	}
531}
532
533/*
534 * Associate a buffer with a vnode.
535 */
536void
537bgetvp(vp, bp)
538	register struct vnode *vp;
539	register struct buf *bp;
540{
541
542	if (bp->b_vp)
543		panic("bgetvp: not free");
544	VHOLD(vp);
545	bp->b_vp = vp;
546	if (vp->v_type == VBLK || vp->v_type == VCHR)
547		bp->b_dev = vp->v_rdev;
548	else
549		bp->b_dev = NODEV;
550	/*
551	 * Insert onto list for new vnode.
552	 */
553	bufinsvn(bp, &vp->v_cleanblkhd);
554}
555
556/*
557 * Disassociate a buffer from a vnode.
558 */
559void
560brelvp(bp)
561	register struct buf *bp;
562{
563	struct vnode *vp;
564
565	if (bp->b_vp == (struct vnode *) 0)
566		panic("brelvp: NULL");
567	/*
568	 * Delete from old vnode list, if on one.
569	 */
570	if (bp->b_vnbufs.le_next != NOLIST)
571		bufremvn(bp);
572	vp = bp->b_vp;
573	bp->b_vp = (struct vnode *) 0;
574	HOLDRELE(vp);
575}
576
577/*
578 * Reassign a buffer from one vnode to another.
579 * Used to assign file specific control information
580 * (indirect blocks) to the vnode to which they belong.
581 */
582void
583reassignbuf(bp, newvp)
584	register struct buf *bp;
585	register struct vnode *newvp;
586{
587	register struct buflists *listheadp;
588
589	if (newvp == NULL) {
590		printf("reassignbuf: NULL");
591		return;
592	}
593	/*
594	 * Delete from old vnode list, if on one.
595	 */
596	if (bp->b_vnbufs.le_next != NOLIST)
597		bufremvn(bp);
598	/*
599	 * If dirty, put on list of dirty buffers;
600	 * otherwise insert onto list of clean buffers.
601	 */
602	if (bp->b_flags & B_DELWRI)
603		listheadp = &newvp->v_dirtyblkhd;
604	else
605		listheadp = &newvp->v_cleanblkhd;
606	bufinsvn(bp, listheadp);
607}
608
609/*
610 * Create a vnode for a block device.
611 * Used for root filesystem, argdev, and swap areas.
612 * Also used for memory file system special devices.
613 */
614int
615bdevvp(dev, vpp)
616	dev_t dev;
617	struct vnode **vpp;
618{
619
620	return (getdevvp(dev, vpp, VBLK));
621}
622
623/*
624 * Create a vnode for a character device.
625 * Used for kernfs and some console handling.
626 */
627int
628cdevvp(dev, vpp)
629	dev_t dev;
630	struct vnode **vpp;
631{
632
633	return (getdevvp(dev, vpp, VCHR));
634}
635
636/*
637 * Create a vnode for a device.
638 * Used by bdevvp (block device) for root file system etc.,
639 * and by cdevvp (character device) for console and kernfs.
640 */
641int
642getdevvp(dev, vpp, type)
643	dev_t dev;
644	struct vnode **vpp;
645	enum vtype type;
646{
647	register struct vnode *vp;
648	struct vnode *nvp;
649	int error;
650
651	if (dev == NODEV)
652		return (0);
653	error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
654	if (error) {
655		*vpp = NULLVP;
656		return (error);
657	}
658	vp = nvp;
659	vp->v_type = type;
660	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
661		vput(vp);
662		vp = nvp;
663	}
664	*vpp = vp;
665	return (0);
666}
667
668/*
669 * Check to see if the new vnode represents a special device
670 * for which we already have a vnode (either because of
671 * bdevvp() or because of a different vnode representing
672 * the same block device). If such an alias exists, deallocate
673 * the existing contents and return the aliased vnode. The
674 * caller is responsible for filling it with its new contents.
675 */
676struct vnode *
677checkalias(nvp, nvp_rdev, mp)
678	register struct vnode *nvp;
679	dev_t nvp_rdev;
680	struct mount *mp;
681{
682	register struct vnode *vp;
683	struct vnode **vpp;
684
685	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
686		return (NULLVP);
687
688	vpp = &speclisth[SPECHASH(nvp_rdev)];
689loop:
690	for (vp = *vpp; vp; vp = vp->v_specnext) {
691		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
692			continue;
693		/*
694		 * Alias, but not in use, so flush it out.
695		 */
696		if (vp->v_usecount == 0) {
697			vgone(vp);
698			goto loop;
699		}
700		if (vget(vp, 1))
701			goto loop;
702		break;
703	}
704	if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
705		MALLOC(nvp->v_specinfo, struct specinfo *,
706			sizeof(struct specinfo), M_VNODE, M_WAITOK);
707		nvp->v_rdev = nvp_rdev;
708		nvp->v_hashchain = vpp;
709		nvp->v_specnext = *vpp;
710		nvp->v_specflags = 0;
711		nvp->v_speclockf = NULL;
712		*vpp = nvp;
713		if (vp != NULL) {
714			nvp->v_flag |= VALIASED;
715			vp->v_flag |= VALIASED;
716			vput(vp);
717		}
718		return (NULLVP);
719	}
720	VOP_UNLOCK(vp);
721	vclean(vp, 0);
722	vp->v_op = nvp->v_op;
723	vp->v_tag = nvp->v_tag;
724	nvp->v_type = VNON;
725	insmntque(vp, mp);
726	return (vp);
727}
728
729/*
730 * Grab a particular vnode from the free list, increment its
731 * reference count and lock it. The vnode lock bit is set the
732 * vnode is being eliminated in vgone. The process is awakened
733 * when the transition is completed, and an error returned to
734 * indicate that the vnode is no longer usable (possibly having
735 * been changed to a new file system type).
736 */
737int
738vget(vp, lockflag)
739	register struct vnode *vp;
740	int lockflag;
741{
742
743	/*
744	 * If the vnode is in the process of being cleaned out for
745	 * another use, we wait for the cleaning to finish and then
746	 * return failure. Cleaning is determined either by checking
747	 * that the VXLOCK flag is set, or that the use count is
748	 * zero with the back pointer set to show that it has been
749	 * removed from the free list by getnewvnode. The VXLOCK
750	 * flag may not have been set yet because vclean is blocked in
751	 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete.
752	 */
753	if ((vp->v_flag & VXLOCK) ||
754	    (vp->v_usecount == 0 &&
755	     vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) {
756		vp->v_flag |= VXWANT;
757		tsleep((caddr_t)vp, PINOD, "vget", 0);
758		return (1);
759	}
760	if (vp->v_usecount == 0)
761		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
762	vp->v_usecount++;
763	if (lockflag)
764		VOP_LOCK(vp);
765	return (0);
766}
767
768/*
769 * Vnode reference, just increment the count
770 */
771void
772vref(vp)
773	struct vnode *vp;
774{
775
776	if (vp->v_usecount <= 0)
777		panic("vref used where vget required");
778	vp->v_usecount++;
779}
780
781/*
782 * vput(), just unlock and vrele()
783 */
784void
785vput(vp)
786	register struct vnode *vp;
787{
788
789	VOP_UNLOCK(vp);
790	vrele(vp);
791}
792
793/*
794 * Vnode release.
795 * If count drops to zero, call inactive routine and return to freelist.
796 */
797void
798vrele(vp)
799	register struct vnode *vp;
800{
801
802#ifdef DIAGNOSTIC
803	if (vp == NULL)
804		panic("vrele: null vp");
805#endif
806	vp->v_usecount--;
807	if (vp->v_usecount > 0)
808		return;
809#ifdef DIAGNOSTIC
810	if (vp->v_usecount != 0 || vp->v_writecount != 0) {
811		vprint("vrele: bad ref count", vp);
812		panic("vrele: ref cnt");
813	}
814#endif
815	/*
816	 * insert at tail of LRU list
817	 */
818	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
819	VOP_INACTIVE(vp);
820}
821
822/*
823 * Page or buffer structure gets a reference.
824 */
825void
826vhold(vp)
827	register struct vnode *vp;
828{
829
830	vp->v_holdcnt++;
831}
832
833/*
834 * Page or buffer structure frees a reference.
835 */
836void
837holdrele(vp)
838	register struct vnode *vp;
839{
840
841	if (vp->v_holdcnt <= 0)
842		panic("holdrele: holdcnt");
843	vp->v_holdcnt--;
844}
845
846/*
847 * Remove any vnodes in the vnode table belonging to mount point mp.
848 *
849 * If MNT_NOFORCE is specified, there should not be any active ones,
850 * return error if any are found (nb: this is a user error, not a
851 * system error). If MNT_FORCE is specified, detach any active vnodes
852 * that are found.
853 */
854#ifdef DEBUG
855int busyprt = 0;	/* print out busy vnodes */
856struct ctldebug debug1 = { "busyprt", &busyprt };
857#endif
858
859int
860vflush(mp, skipvp, flags)
861	struct mount *mp;
862	struct vnode *skipvp;
863	int flags;
864{
865	register struct vnode *vp, *nvp;
866	int busy = 0;
867
868	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
869		panic("vflush: not busy");
870loop:
871	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
872		if (vp->v_mount != mp)
873			goto loop;
874		nvp = vp->v_mntvnodes.le_next;
875		/*
876		 * Skip over a selected vnode.
877		 */
878		if (vp == skipvp)
879			continue;
880		/*
881		 * Skip over a vnodes marked VSYSTEM.
882		 */
883		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
884			continue;
885		/*
886		 * If WRITECLOSE is set, only flush out regular file
887		 * vnodes open for writing.
888		 */
889		if ((flags & WRITECLOSE) &&
890		    (vp->v_writecount == 0 || vp->v_type != VREG))
891			continue;
892		/*
893		 * With v_usecount == 0, all we need to do is clear
894		 * out the vnode data structures and we are done.
895		 */
896		if (vp->v_usecount == 0) {
897			vgone(vp);
898			continue;
899		}
900		/*
901		 * If FORCECLOSE is set, forcibly close the vnode.
902		 * For block or character devices, revert to an
903		 * anonymous device. For all other files, just kill them.
904		 */
905		if (flags & FORCECLOSE) {
906			if (vp->v_type != VBLK && vp->v_type != VCHR) {
907				vgone(vp);
908			} else {
909				vclean(vp, 0);
910				vp->v_op = spec_vnodeop_p;
911				insmntque(vp, (struct mount *)0);
912			}
913			continue;
914		}
915#ifdef DEBUG
916		if (busyprt)
917			vprint("vflush: busy vnode", vp);
918#endif
919		busy++;
920	}
921	if (busy)
922		return (EBUSY);
923	return (0);
924}
925
926/*
927 * Disassociate the underlying file system from a vnode.
928 */
929void
930vclean(vp, flags)
931	register struct vnode *vp;
932	int flags;
933{
934	int active;
935
936	/*
937	 * Check to see if the vnode is in use.
938	 * If so we have to reference it before we clean it out
939	 * so that its count cannot fall to zero and generate a
940	 * race against ourselves to recycle it.
941	 */
942	if ((active = vp->v_usecount) != 0)
943		VREF(vp);
944	/*
945	 * Even if the count is zero, the VOP_INACTIVE routine may still
946	 * have the object locked while it cleans it out. The VOP_LOCK
947	 * ensures that the VOP_INACTIVE routine is done with its work.
948	 * For active vnodes, it ensures that no other activity can
949	 * occur while the underlying object is being cleaned out.
950	 */
951	VOP_LOCK(vp);
952	/*
953	 * Prevent the vnode from being recycled or
954	 * brought into use while we clean it out.
955	 */
956	if (vp->v_flag & VXLOCK)
957		panic("vclean: deadlock");
958	vp->v_flag |= VXLOCK;
959	/*
960	 * Clean out any buffers associated with the vnode.
961	 */
962	if (flags & DOCLOSE)
963		vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
964	/*
965	 * Any other processes trying to obtain this lock must first
966	 * wait for VXLOCK to clear, then call the new lock operation.
967	 */
968	VOP_UNLOCK(vp);
969	/*
970	 * If purging an active vnode, it must be closed and
971	 * deactivated before being reclaimed.
972	 */
973	if (active) {
974		if (flags & DOCLOSE)
975			VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
976		VOP_INACTIVE(vp);
977	}
978	/*
979	 * Reclaim the vnode.
980	 */
981	if (VOP_RECLAIM(vp))
982		panic("vclean: cannot reclaim");
983	if (active)
984		vrele(vp);
985
986	/*
987	 * Done with purge, notify sleepers of the grim news.
988	 */
989	vp->v_op = dead_vnodeop_p;
990	vp->v_tag = VT_NON;
991	vp->v_flag &= ~VXLOCK;
992	if (vp->v_flag & VXWANT) {
993		vp->v_flag &= ~VXWANT;
994		wakeup((caddr_t)vp);
995	}
996}
997
998/*
999 * Eliminate all activity associated with  the requested vnode
1000 * and with all vnodes aliased to the requested vnode.
1001 */
1002void
1003vgoneall(vp)
1004	register struct vnode *vp;
1005{
1006	register struct vnode *vq;
1007
1008	if (vp->v_flag & VALIASED) {
1009		/*
1010		 * If a vgone (or vclean) is already in progress,
1011		 * wait until it is done and return.
1012		 */
1013		if (vp->v_flag & VXLOCK) {
1014			vp->v_flag |= VXWANT;
1015			tsleep((caddr_t)vp, PINOD, "vgoneall", 0);
1016			return;
1017		}
1018		/*
1019		 * Ensure that vp will not be vgone'd while we
1020		 * are eliminating its aliases.
1021		 */
1022		vp->v_flag |= VXLOCK;
1023		while (vp->v_flag & VALIASED) {
1024			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1025				if (vq->v_rdev != vp->v_rdev ||
1026				    vq->v_type != vp->v_type || vp == vq)
1027					continue;
1028				vgone(vq);
1029				break;
1030			}
1031		}
1032		/*
1033		 * Remove the lock so that vgone below will
1034		 * really eliminate the vnode after which time
1035		 * vgone will awaken any sleepers.
1036		 */
1037		vp->v_flag &= ~VXLOCK;
1038	}
1039	vgone(vp);
1040}
1041
1042/*
1043 * Eliminate all activity associated with a vnode
1044 * in preparation for reuse.
1045 */
1046void
1047vgone(vp)
1048	register struct vnode *vp;
1049{
1050	register struct vnode *vq;
1051	struct vnode *vx;
1052
1053	/*
1054	 * If a vgone (or vclean) is already in progress,
1055	 * wait until it is done and return.
1056	 */
1057	if (vp->v_flag & VXLOCK) {
1058		vp->v_flag |= VXWANT;
1059		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1060		return;
1061	}
1062	/*
1063	 * Clean out the filesystem specific data.
1064	 */
1065	vclean(vp, DOCLOSE);
1066	/*
1067	 * Delete from old mount point vnode list, if on one.
1068	 */
1069	insmntque(vp, (struct mount *)0);
1070	/*
1071	 * If special device, remove it from special device alias list.
1072	 */
1073	if (vp->v_type == VBLK || vp->v_type == VCHR) {
1074		if (*vp->v_hashchain == vp) {
1075			*vp->v_hashchain = vp->v_specnext;
1076		} else {
1077			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1078				if (vq->v_specnext != vp)
1079					continue;
1080				vq->v_specnext = vp->v_specnext;
1081				break;
1082			}
1083			if (vq == NULL)
1084				panic("missing bdev");
1085		}
1086		if (vp->v_flag & VALIASED) {
1087			vx = NULL;
1088			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1089				if (vq->v_rdev != vp->v_rdev ||
1090				    vq->v_type != vp->v_type)
1091					continue;
1092				if (vx)
1093					break;
1094				vx = vq;
1095			}
1096			if (vx == NULL)
1097				panic("missing alias");
1098			if (vq == NULL)
1099				vx->v_flag &= ~VALIASED;
1100			vp->v_flag &= ~VALIASED;
1101		}
1102		FREE(vp->v_specinfo, M_VNODE);
1103		vp->v_specinfo = NULL;
1104	}
1105	/*
1106	 * If it is on the freelist and not already at the head,
1107	 * move it to the head of the list. The test of the back
1108	 * pointer and the reference count of zero is because
1109	 * it will be removed from the free list by getnewvnode,
1110	 * but will not have its reference count incremented until
1111	 * after calling vgone. If the reference count were
1112	 * incremented first, vgone would (incorrectly) try to
1113	 * close the previous instance of the underlying object.
1114	 * So, the back pointer is explicitly set to `0xdeadb' in
1115	 * getnewvnode after removing it from the freelist to ensure
1116	 * that we do not try to move it here.
1117	 */
1118	if (vp->v_usecount == 0 &&
1119	    vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
1120	    vnode_free_list.tqh_first != vp) {
1121		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1122		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1123	}
1124	vp->v_type = VBAD;
1125}
1126
1127/*
1128 * Lookup a vnode by device number.
1129 */
1130int
1131vfinddev(dev, type, vpp)
1132	dev_t dev;
1133	enum vtype type;
1134	struct vnode **vpp;
1135{
1136	register struct vnode *vp;
1137
1138	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1139		if (dev != vp->v_rdev || type != vp->v_type)
1140			continue;
1141		*vpp = vp;
1142		return (1);
1143	}
1144	return (0);
1145}
1146
1147/*
1148 * Calculate the total number of references to a special device.
1149 */
1150int
1151vcount(vp)
1152	register struct vnode *vp;
1153{
1154	register struct vnode *vq, *vnext;
1155	int count;
1156
1157loop:
1158	if ((vp->v_flag & VALIASED) == 0)
1159		return (vp->v_usecount);
1160	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1161		vnext = vq->v_specnext;
1162		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1163			continue;
1164		/*
1165		 * Alias, but not in use, so flush it out.
1166		 */
1167		if (vq->v_usecount == 0 && vq != vp) {
1168			vgone(vq);
1169			goto loop;
1170		}
1171		count += vq->v_usecount;
1172	}
1173	return (count);
1174}
1175
1176/*
1177 * Print out a description of a vnode.
1178 */
1179static char *typename[] =
1180   { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1181
1182void
1183vprint(label, vp)
1184	char *label;
1185	register struct vnode *vp;
1186{
1187	char buf[64];
1188
1189	if (label != NULL)
1190		printf("%s: ", label);
1191	printf("type %s, usecount %d, writecount %d, refcount %ld,",
1192		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1193		vp->v_holdcnt);
1194	buf[0] = '\0';
1195	if (vp->v_flag & VROOT)
1196		strcat(buf, "|VROOT");
1197	if (vp->v_flag & VTEXT)
1198		strcat(buf, "|VTEXT");
1199	if (vp->v_flag & VSYSTEM)
1200		strcat(buf, "|VSYSTEM");
1201	if (vp->v_flag & VXLOCK)
1202		strcat(buf, "|VXLOCK");
1203	if (vp->v_flag & VXWANT)
1204		strcat(buf, "|VXWANT");
1205	if (vp->v_flag & VBWAIT)
1206		strcat(buf, "|VBWAIT");
1207	if (vp->v_flag & VALIASED)
1208		strcat(buf, "|VALIASED");
1209	if (buf[0] != '\0')
1210		printf(" flags (%s)", &buf[1]);
1211	if (vp->v_data == NULL) {
1212		printf("\n");
1213	} else {
1214		printf("\n\t");
1215		VOP_PRINT(vp);
1216	}
1217}
1218
1219#ifdef DEBUG
1220/*
1221 * List all of the locked vnodes in the system.
1222 * Called when debugging the kernel.
1223 */
1224void
1225printlockedvnodes()
1226{
1227	register struct mount *mp;
1228	register struct vnode *vp;
1229
1230	printf("Locked vnodes\n");
1231	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
1232	     mp = mp->mnt_list.cqe_next) {
1233		for (vp = mp->mnt_vnodelist.lh_first;
1234		     vp != NULL;
1235		     vp = vp->v_mntvnodes.le_next)
1236			if (VOP_ISLOCKED(vp))
1237				vprint((char *)0, vp);
1238	}
1239}
1240#endif
1241
1242int kinfo_vdebug = 1;
1243int kinfo_vgetfailed;
1244#define KINFO_VNODESLOP	10
1245/*
1246 * Dump vnode list (via sysctl).
1247 * Copyout address of vnode followed by vnode.
1248 */
1249/* ARGSUSED */
1250int
1251sysctl_vnode(where, sizep)
1252	char *where;
1253	size_t *sizep;
1254{
1255	register struct mount *mp, *nmp;
1256	struct vnode *vp;
1257	register char *bp = where, *savebp;
1258	char *ewhere;
1259	int error;
1260
1261#define VPTRSZ	sizeof (struct vnode *)
1262#define VNODESZ	sizeof (struct vnode)
1263	if (where == NULL) {
1264		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1265		return (0);
1266	}
1267	ewhere = where + *sizep;
1268
1269	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1270		nmp = mp->mnt_list.cqe_next;
1271		if (vfs_busy(mp))
1272			continue;
1273		savebp = bp;
1274again:
1275		for (vp = mp->mnt_vnodelist.lh_first;
1276		     vp != NULL;
1277		     vp = vp->v_mntvnodes.le_next) {
1278			/*
1279			 * Check that the vp is still associated with
1280			 * this filesystem.  RACE: could have been
1281			 * recycled onto the same filesystem.
1282			 */
1283			if (vp->v_mount != mp) {
1284				if (kinfo_vdebug)
1285					printf("kinfo: vp changed\n");
1286				bp = savebp;
1287				goto again;
1288			}
1289			if (bp + VPTRSZ + VNODESZ > ewhere) {
1290				*sizep = bp - where;
1291				return (ENOMEM);
1292			}
1293			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1294			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
1295				return (error);
1296			bp += VPTRSZ + VNODESZ;
1297		}
1298		vfs_unbusy(mp);
1299	}
1300
1301	*sizep = bp - where;
1302	return (0);
1303}
1304
1305/*
1306 * Check to see if a filesystem is mounted on a block device.
1307 */
1308int
1309vfs_mountedon(vp)
1310	register struct vnode *vp;
1311{
1312	register struct vnode *vq;
1313
1314	if (vp->v_specflags & SI_MOUNTEDON)
1315		return (EBUSY);
1316	if (vp->v_flag & VALIASED) {
1317		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1318			if (vq->v_rdev != vp->v_rdev ||
1319			    vq->v_type != vp->v_type)
1320				continue;
1321			if (vq->v_specflags & SI_MOUNTEDON)
1322				return (EBUSY);
1323		}
1324	}
1325	return (0);
1326}
1327
1328/*
1329 * Build hash lists of net addresses and hang them off the mount point.
1330 * Called by ufs_mount() to set up the lists of export addresses.
1331 */
1332static int
1333vfs_hang_addrlist(mp, nep, argp)
1334	struct mount *mp;
1335	struct netexport *nep;
1336	struct export_args *argp;
1337{
1338	register struct netcred *np;
1339	register struct radix_node_head *rnh;
1340	register int i;
1341	struct radix_node *rn;
1342	struct sockaddr *saddr, *smask = 0;
1343	struct domain *dom;
1344	int error;
1345
1346	if (argp->ex_addrlen == 0) {
1347		if (mp->mnt_flag & MNT_DEFEXPORTED)
1348			return (EPERM);
1349		np = &nep->ne_defexported;
1350		np->netc_exflags = argp->ex_flags;
1351		np->netc_anon = argp->ex_anon;
1352		np->netc_anon.cr_ref = 1;
1353		mp->mnt_flag |= MNT_DEFEXPORTED;
1354		return (0);
1355	}
1356	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1357	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1358	bzero((caddr_t)np, i);
1359	saddr = (struct sockaddr *)(np + 1);
1360	error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
1361	if (error)
1362		goto out;
1363	if (saddr->sa_len > argp->ex_addrlen)
1364		saddr->sa_len = argp->ex_addrlen;
1365	if (argp->ex_masklen) {
1366		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1367		error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
1368		if (error)
1369			goto out;
1370		if (smask->sa_len > argp->ex_masklen)
1371			smask->sa_len = argp->ex_masklen;
1372	}
1373	i = saddr->sa_family;
1374	if ((rnh = nep->ne_rtable[i]) == 0) {
1375		/*
1376		 * Seems silly to initialize every AF when most are not
1377		 * used, do so on demand here
1378		 */
1379		for (dom = domains; dom; dom = dom->dom_next)
1380			if (dom->dom_family == i && dom->dom_rtattach) {
1381				dom->dom_rtattach((void **)&nep->ne_rtable[i],
1382					dom->dom_rtoffset);
1383				break;
1384			}
1385		if ((rnh = nep->ne_rtable[i]) == 0) {
1386			error = ENOBUFS;
1387			goto out;
1388		}
1389	}
1390	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1391		np->netc_rnodes);
1392	if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1393		error = EPERM;
1394		goto out;
1395	}
1396	np->netc_exflags = argp->ex_flags;
1397	np->netc_anon = argp->ex_anon;
1398	np->netc_anon.cr_ref = 1;
1399	return (0);
1400out:
1401	free(np, M_NETADDR);
1402	return (error);
1403}
1404
1405/* ARGSUSED */
1406static int
1407vfs_free_netcred(rn, w)
1408	struct radix_node *rn;
1409	void *w;
1410{
1411	register struct radix_node_head *rnh = (struct radix_node_head *)w;
1412
1413	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1414	free((caddr_t)rn, M_NETADDR);
1415	return (0);
1416}
1417
1418/*
1419 * Free the net address hash lists that are hanging off the mount points.
1420 */
1421static void
1422vfs_free_addrlist(nep)
1423	struct netexport *nep;
1424{
1425	register int i;
1426	register struct radix_node_head *rnh;
1427
1428	for (i = 0; i <= AF_MAX; i++)
1429		if ((rnh = nep->ne_rtable[i]) != NULL) {
1430			(*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1431			free((caddr_t)rnh, M_RTABLE);
1432			nep->ne_rtable[i] = 0;
1433		}
1434}
1435
1436int
1437vfs_export(mp, nep, argp)
1438	struct mount *mp;
1439	struct netexport *nep;
1440	struct export_args *argp;
1441{
1442	int error;
1443
1444	if (argp->ex_flags & MNT_DELEXPORT) {
1445		vfs_free_addrlist(nep);
1446		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1447	}
1448	if (argp->ex_flags & MNT_EXPORTED) {
1449		if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1450			return (error);
1451		mp->mnt_flag |= MNT_EXPORTED;
1452	}
1453	return (0);
1454}
1455
1456struct netcred *
1457vfs_export_lookup(mp, nep, nam)
1458	register struct mount *mp;
1459	struct netexport *nep;
1460	struct mbuf *nam;
1461{
1462	register struct netcred *np;
1463	register struct radix_node_head *rnh;
1464	struct sockaddr *saddr;
1465
1466	np = NULL;
1467	if (mp->mnt_flag & MNT_EXPORTED) {
1468		/*
1469		 * Lookup in the export list first.
1470		 */
1471		if (nam != NULL) {
1472			saddr = mtod(nam, struct sockaddr *);
1473			rnh = nep->ne_rtable[saddr->sa_family];
1474			if (rnh != NULL) {
1475				np = (struct netcred *)
1476					(*rnh->rnh_matchaddr)((caddr_t)saddr,
1477							      rnh);
1478				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1479					np = NULL;
1480			}
1481		}
1482		/*
1483		 * If no address match, use the default if it exists.
1484		 */
1485		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1486			np = &nep->ne_defexported;
1487	}
1488	return (np);
1489}
1490
1491/*
1492 * Do the usual access checking.
1493 * file_mode, uid and gid are from the vnode in question,
1494 * while acc_mode and cred are from the VOP_ACCESS parameter list
1495 */
1496int
1497vaccess(file_mode, uid, gid, acc_mode, cred)
1498	mode_t file_mode;
1499	uid_t uid;
1500	gid_t gid;
1501	mode_t acc_mode;
1502	struct ucred *cred;
1503{
1504	mode_t mask;
1505
1506	/* User id 0 always gets access. */
1507	if (cred->cr_uid == 0)
1508		return 0;
1509
1510	mask = 0;
1511
1512	/* Otherwise, check the owner. */
1513	if (cred->cr_uid == uid) {
1514		if (acc_mode & VEXEC)
1515			mask |= S_IXUSR;
1516		if (acc_mode & VREAD)
1517			mask |= S_IRUSR;
1518		if (acc_mode & VWRITE)
1519			mask |= S_IWUSR;
1520		return (file_mode & mask) == mask ? 0 : EACCES;
1521	}
1522
1523	/* Otherwise, check the groups. */
1524	if (cred->cr_gid == gid || groupmember(gid, cred)) {
1525		if (acc_mode & VEXEC)
1526			mask |= S_IXGRP;
1527		if (acc_mode & VREAD)
1528			mask |= S_IRGRP;
1529		if (acc_mode & VWRITE)
1530			mask |= S_IWGRP;
1531		return (file_mode & mask) == mask ? 0 : EACCES;
1532	}
1533
1534	/* Otherwise, check everyone else. */
1535	if (acc_mode & VEXEC)
1536		mask |= S_IXOTH;
1537	if (acc_mode & VREAD)
1538		mask |= S_IROTH;
1539	if (acc_mode & VWRITE)
1540		mask |= S_IWOTH;
1541	return (file_mode & mask) == mask ? 0 : EACCES;
1542}
1543
1544/*
1545 * Unmount all file systems.
1546 * We traverse the list in reverse order under the assumption that doing so
1547 * will avoid needing to worry about dependencies.
1548 */
1549void
1550vfs_unmountall()
1551{
1552	register struct mount *mp, *nmp;
1553	int allerror, error;
1554
1555	for (allerror = 0,
1556	     mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1557		nmp = mp->mnt_list.cqe_prev;
1558		if ((error = dounmount(mp, MNT_FORCE, &proc0)) != 0) {
1559			printf("unmount of %s failed with error %d\n",
1560			    mp->mnt_stat.f_mntonname, error);
1561			allerror = 1;
1562		}
1563	}
1564	if (allerror)
1565		printf("WARNING: some file systems would not unmount\n");
1566}
1567
1568/*
1569 * Sync and unmount file systems before shutting down.
1570 */
1571void
1572vfs_shutdown()
1573{
1574	register struct buf *bp;
1575	int iter, nbusy;
1576
1577	/* XXX Should suspend scheduling. */
1578	(void) spl0();
1579
1580	printf("syncing disks... ");
1581
1582	if (panicstr == 0) {
1583		/* Release inodes held by texts before update. */
1584		vnode_pager_umount(NULL);
1585#ifdef notdef
1586		vnshutdown();
1587#endif
1588
1589		/* Sync before unmount, in case we hang on something. */
1590		sys_sync(&proc0, (void *)0, (register_t *)0);
1591
1592		/* Unmount file systems. */
1593		vfs_unmountall();
1594	}
1595
1596	/* Sync again after unmount, just in case. */
1597	sys_sync(&proc0, (void *)0, (register_t *)0);
1598
1599	/* Wait for sync to finish. */
1600	for (iter = 0; iter < 20; iter++) {
1601		nbusy = 0;
1602		for (bp = &buf[nbuf]; --bp >= buf; )
1603			if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY)
1604				nbusy++;
1605		if (nbusy == 0)
1606			break;
1607		printf("%d ", nbusy);
1608		DELAY(40000 * iter);
1609	}
1610	if (nbusy)
1611		printf("giving up\n");
1612	else
1613		printf("done\n");
1614}
1615
1616/*
1617 * posix file system related system variables.
1618 */
1619int
1620fs_posix_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1621	int *name;
1622	u_int namelen;
1623	void *oldp;
1624	size_t *oldlenp;
1625	void *newp;
1626	size_t newlen;
1627	struct proc *p;
1628{
1629	/* all sysctl names at this level are terminal */
1630	if (namelen != 1)
1631		return (ENOTDIR);
1632
1633	switch (name[0]) {
1634	case FS_POSIX_SETUID:
1635		if (newp && securelevel > 0)
1636			return (EPERM);
1637		return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear));
1638	default:
1639		return (EOPNOTSUPP);
1640	}
1641	/* NOTREACHED */
1642}
1643
1644/*
1645 * file system related system variables.
1646 */
1647int
1648fs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1649	int *name;
1650	u_int namelen;
1651	void *oldp;
1652	size_t *oldlenp;
1653	void *newp;
1654	size_t newlen;
1655	struct proc *p;
1656{
1657	sysctlfn *fn;
1658
1659	switch (name[0]) {
1660	case FS_POSIX:
1661		fn = fs_posix_sysctl;
1662		break;
1663	default:
1664		return (EOPNOTSUPP);
1665	}
1666	return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p);
1667}
1668