Deleted Added
full compact
vfs_export.c (27892) vfs_export.c (28270)
1/*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
1/*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
39 * $Id: vfs_subr.c,v 1.89 1997/07/17 07:17:31 dfr Exp $
39 * $Id: vfs_subr.c,v 1.90 1997/08/04 07:43:28 dyson Exp $
40 */
41
42/*
43 * External virtual filesystem routines
44 */
45#include "opt_ddb.h"
46#include "opt_devfs.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/kernel.h>
51#include <sys/file.h>
52#include <sys/proc.h>
53#include <sys/mount.h>
54#include <sys/time.h>
55#include <sys/vnode.h>
56#include <sys/stat.h>
57#include <sys/namei.h>
58#include <sys/ucred.h>
59#include <sys/buf.h>
60#include <sys/errno.h>
61#include <sys/malloc.h>
62#include <sys/domain.h>
63#include <sys/mbuf.h>
64#include <sys/dirent.h>
65
66#include <vm/vm.h>
67#include <vm/vm_param.h>
68#include <vm/vm_object.h>
69#include <vm/vm_extern.h>
70#include <vm/vm_pager.h>
71#include <vm/vnode_pager.h>
72#include <sys/sysctl.h>
73
74#include <miscfs/specfs/specdev.h>
75
76#ifdef DDB
77extern void printlockedvnodes __P((void));
78#endif
79static void vclean __P((struct vnode *vp, int flags, struct proc *p));
80static void vgonel __P((struct vnode *vp, struct proc *p));
81unsigned long numvnodes;
82SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
83static void vputrele __P((struct vnode *vp, int put));
84
85enum vtype iftovt_tab[16] = {
86 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
87 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
88};
89int vttoif_tab[9] = {
90 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
91 S_IFSOCK, S_IFIFO, S_IFMT,
92};
93
94/*
95 * Insq/Remq for the vnode usage lists.
96 */
97#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
98#define bufremvn(bp) { \
99 LIST_REMOVE(bp, b_vnbufs); \
100 (bp)->b_vnbufs.le_next = NOLIST; \
101}
102TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
103static u_long freevnodes = 0;
104
105struct mntlist mountlist; /* mounted filesystem list */
106struct simplelock mountlist_slock;
107static struct simplelock mntid_slock;
108struct simplelock mntvnode_slock;
109struct simplelock vnode_free_list_slock;
110static struct simplelock spechash_slock;
111struct nfs_public nfs_pub; /* publicly exported FS */
112
113int desiredvnodes;
114SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
115
116static void vfs_free_addrlist __P((struct netexport *nep));
117static int vfs_free_netcred __P((struct radix_node *rn, void *w));
118static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
119 struct export_args *argp));
120
121/*
122 * Initialize the vnode management data structures.
123 */
124void
125vntblinit()
126{
127
128 desiredvnodes = maxproc + vm_object_cache_max;
129 simple_lock_init(&mntvnode_slock);
130 simple_lock_init(&mntid_slock);
131 simple_lock_init(&spechash_slock);
132 TAILQ_INIT(&vnode_free_list);
133 simple_lock_init(&vnode_free_list_slock);
134 CIRCLEQ_INIT(&mountlist);
135}
136
137/*
138 * Mark a mount point as busy. Used to synchronize access and to delay
139 * unmounting. Interlock is not released on failure.
140 */
141int
142vfs_busy(mp, flags, interlkp, p)
143 struct mount *mp;
144 int flags;
145 struct simplelock *interlkp;
146 struct proc *p;
147{
148 int lkflags;
149
150 if (mp->mnt_flag & MNT_UNMOUNT) {
151 if (flags & LK_NOWAIT)
152 return (ENOENT);
153 mp->mnt_flag |= MNT_MWAIT;
154 if (interlkp) {
155 simple_unlock(interlkp);
156 }
157 /*
158 * Since all busy locks are shared except the exclusive
159 * lock granted when unmounting, the only place that a
160 * wakeup needs to be done is at the release of the
161 * exclusive lock at the end of dounmount.
162 */
163 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
164 if (interlkp) {
165 simple_lock(interlkp);
166 }
167 return (ENOENT);
168 }
169 lkflags = LK_SHARED;
170 if (interlkp)
171 lkflags |= LK_INTERLOCK;
172 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
173 panic("vfs_busy: unexpected lock failure");
174 return (0);
175}
176
177/*
178 * Free a busy filesystem.
179 */
180void
181vfs_unbusy(mp, p)
182 struct mount *mp;
183 struct proc *p;
184{
185
186 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
187}
188
189/*
190 * Lookup a filesystem type, and if found allocate and initialize
191 * a mount structure for it.
192 *
193 * Devname is usually updated by mount(8) after booting.
194 */
195int
196vfs_rootmountalloc(fstypename, devname, mpp)
197 char *fstypename;
198 char *devname;
199 struct mount **mpp;
200{
201 struct proc *p = curproc; /* XXX */
202 struct vfsconf *vfsp;
203 struct mount *mp;
204
205 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
206 if (!strcmp(vfsp->vfc_name, fstypename))
207 break;
208 if (vfsp == NULL)
209 return (ENODEV);
210 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
211 bzero((char *)mp, (u_long)sizeof(struct mount));
212 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
213 (void)vfs_busy(mp, LK_NOWAIT, 0, p);
214 LIST_INIT(&mp->mnt_vnodelist);
215 mp->mnt_vfc = vfsp;
216 mp->mnt_op = vfsp->vfc_vfsops;
217 mp->mnt_flag = MNT_RDONLY;
218 mp->mnt_vnodecovered = NULLVP;
219 vfsp->vfc_refcount++;
220 mp->mnt_stat.f_type = vfsp->vfc_typenum;
221 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
222 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
223 mp->mnt_stat.f_mntonname[0] = '/';
224 mp->mnt_stat.f_mntonname[1] = 0;
225 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
226 *mpp = mp;
227 return (0);
228}
229
230/*
231 * Find an appropriate filesystem to use for the root. If a filesystem
232 * has not been preselected, walk through the list of known filesystems
233 * trying those that have mountroot routines, and try them until one
234 * works or we have tried them all.
235 */
236#ifdef notdef /* XXX JH */
237int
238lite2_vfs_mountroot(void)
239{
240 struct vfsconf *vfsp;
241 extern int (*lite2_mountroot)(void);
242 int error;
243
244 if (lite2_mountroot != NULL)
245 return ((*lite2_mountroot)());
246 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
247 if (vfsp->vfc_mountroot == NULL)
248 continue;
249 if ((error = (*vfsp->vfc_mountroot)()) == 0)
250 return (0);
251 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
252 }
253 return (ENODEV);
254}
255#endif
256
257/*
258 * Lookup a mount point by filesystem identifier.
259 */
260struct mount *
261vfs_getvfs(fsid)
262 fsid_t *fsid;
263{
264 register struct mount *mp;
265
266 simple_lock(&mountlist_slock);
267 for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
268 mp = mp->mnt_list.cqe_next) {
269 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
270 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
271 simple_unlock(&mountlist_slock);
272 return (mp);
273 }
274 }
275 simple_unlock(&mountlist_slock);
276 return ((struct mount *) 0);
277}
278
279/*
280 * Get a new unique fsid
281 */
282void
283vfs_getnewfsid(mp)
284 struct mount *mp;
285{
286 static u_short xxxfs_mntid;
287
288 fsid_t tfsid;
289 int mtype;
290
291 simple_lock(&mntid_slock);
292 mtype = mp->mnt_vfc->vfc_typenum;
293 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
294 mp->mnt_stat.f_fsid.val[1] = mtype;
295 if (xxxfs_mntid == 0)
296 ++xxxfs_mntid;
297 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
298 tfsid.val[1] = mtype;
299 if (mountlist.cqh_first != (void *)&mountlist) {
300 while (vfs_getvfs(&tfsid)) {
301 tfsid.val[0]++;
302 xxxfs_mntid++;
303 }
304 }
305 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
306 simple_unlock(&mntid_slock);
307}
308
309/*
310 * Set vnode attributes to VNOVAL
311 */
312void
313vattr_null(vap)
314 register struct vattr *vap;
315{
316
317 vap->va_type = VNON;
318 vap->va_size = VNOVAL;
319 vap->va_bytes = VNOVAL;
320 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
321 vap->va_fsid = vap->va_fileid =
322 vap->va_blocksize = vap->va_rdev =
323 vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
324 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
325 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
326 vap->va_flags = vap->va_gen = VNOVAL;
327 vap->va_vaflags = 0;
328}
329
330/*
331 * Routines having to do with the management of the vnode table.
332 */
333extern vop_t **dead_vnodeop_p;
334
335/*
336 * Return the next vnode from the free list.
337 */
338int
339getnewvnode(tag, mp, vops, vpp)
340 enum vtagtype tag;
341 struct mount *mp;
342 vop_t **vops;
343 struct vnode **vpp;
344{
345 struct proc *p = curproc; /* XXX */
346 struct vnode *vp;
347
348 /*
349 * We take the least recently used vnode from the freelist
350 * if we can get it and it has no cached pages, and no
351 * namecache entries are relative to it.
352 * Otherwise we allocate a new vnode
353 */
354
355 simple_lock(&vnode_free_list_slock);
356
357 if (freevnodes >= desiredvnodes) {
358 TAILQ_FOREACH(vp, &vnode_free_list, v_freelist) {
359 if (!simple_lock_try(&vp->v_interlock))
360 continue;
361 if (vp->v_usecount)
362 panic("free vnode isn't");
363
364 if (vp->v_object && vp->v_object->resident_page_count) {
365 /* Don't recycle if it's caching some pages */
366 simple_unlock(&vp->v_interlock);
367 continue;
368 } else if (LIST_FIRST(&vp->v_cache_src)) {
369 /* Don't recycle if active in the namecache */
370 simple_unlock(&vp->v_interlock);
371 continue;
372 } else {
373 break;
374 }
375 }
376 } else {
377 vp = NULL;
378 }
379
380 if (vp) {
381 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
382 freevnodes--;
383 /* see comment on why 0xdeadb is set at end of vgone (below) */
384 vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb;
385 simple_unlock(&vnode_free_list_slock);
386 vp->v_lease = NULL;
387 if (vp->v_type != VBAD)
388 vgonel(vp, p);
389 else {
390 simple_unlock(&vp->v_interlock);
391 }
392
393#ifdef DIAGNOSTIC
394 {
395 int s;
396
397 if (vp->v_data)
398 panic("cleaned vnode isn't");
399 s = splbio();
400 if (vp->v_numoutput)
401 panic("Clean vnode has pending I/O's");
402 splx(s);
403 }
404#endif
405 vp->v_flag = 0;
406 vp->v_lastr = 0;
407 vp->v_lastw = 0;
408 vp->v_lasta = 0;
409 vp->v_cstart = 0;
410 vp->v_clen = 0;
411 vp->v_socket = 0;
412 vp->v_writecount = 0; /* XXX */
413 } else {
414 simple_unlock(&vnode_free_list_slock);
415 vp = (struct vnode *) malloc((u_long) sizeof *vp,
416 M_VNODE, M_WAITOK);
417 bzero((char *) vp, sizeof *vp);
418 vp->v_dd = vp;
419 LIST_INIT(&vp->v_cache_src);
420 TAILQ_INIT(&vp->v_cache_dst);
421 numvnodes++;
422 }
423
424 vp->v_type = VNON;
425 cache_purge(vp);
426 vp->v_tag = tag;
427 vp->v_op = vops;
428 insmntque(vp, mp);
429 *vpp = vp;
430 vp->v_usecount = 1;
431 vp->v_data = 0;
432 return (0);
433}
434
435/*
436 * Move a vnode from one mount queue to another.
437 */
438void
439insmntque(vp, mp)
440 register struct vnode *vp;
441 register struct mount *mp;
442{
443
444 simple_lock(&mntvnode_slock);
445 /*
446 * Delete from old mount point vnode list, if on one.
447 */
448 if (vp->v_mount != NULL)
449 LIST_REMOVE(vp, v_mntvnodes);
450 /*
451 * Insert into list of vnodes for the new mount point, if available.
452 */
453 if ((vp->v_mount = mp) == NULL) {
454 simple_unlock(&mntvnode_slock);
455 return;
456 }
457 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
458 simple_unlock(&mntvnode_slock);
459}
460
461/*
462 * Update outstanding I/O count and do wakeup if requested.
463 */
464void
465vwakeup(bp)
466 register struct buf *bp;
467{
468 register struct vnode *vp;
469
470 bp->b_flags &= ~B_WRITEINPROG;
471 if ((vp = bp->b_vp)) {
472 vp->v_numoutput--;
473 if (vp->v_numoutput < 0)
474 panic("vwakeup: neg numoutput");
475 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
476 vp->v_flag &= ~VBWAIT;
477 wakeup((caddr_t) &vp->v_numoutput);
478 }
479 }
480}
481
482/*
483 * Flush out and invalidate all buffers associated with a vnode.
484 * Called with the underlying object locked.
485 */
486int
487vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
488 register struct vnode *vp;
489 int flags;
490 struct ucred *cred;
491 struct proc *p;
492 int slpflag, slptimeo;
493{
494 register struct buf *bp;
495 struct buf *nbp, *blist;
496 int s, error;
497 vm_object_t object;
498
499 if (flags & V_SAVE) {
500 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
501 return (error);
502 if (vp->v_dirtyblkhd.lh_first != NULL)
503 panic("vinvalbuf: dirty bufs");
504 }
505
506 s = splbio();
507 for (;;) {
508 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
509 while (blist && blist->b_lblkno < 0)
510 blist = blist->b_vnbufs.le_next;
511 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
512 (flags & V_SAVEMETA))
513 while (blist && blist->b_lblkno < 0)
514 blist = blist->b_vnbufs.le_next;
515 if (!blist)
516 break;
517
518 for (bp = blist; bp; bp = nbp) {
519 nbp = bp->b_vnbufs.le_next;
520 if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
521 continue;
522 if (bp->b_flags & B_BUSY) {
523 bp->b_flags |= B_WANTED;
524 error = tsleep((caddr_t) bp,
525 slpflag | (PRIBIO + 1), "vinvalbuf",
526 slptimeo);
527 if (error) {
528 splx(s);
529 return (error);
530 }
531 break;
532 }
533 bremfree(bp);
534 bp->b_flags |= B_BUSY;
535 /*
536 * XXX Since there are no node locks for NFS, I
537 * believe there is a slight chance that a delayed
538 * write will occur while sleeping just above, so
539 * check for it.
540 */
541 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
542 (void) VOP_BWRITE(bp);
543 break;
544 }
545 bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF);
546 brelse(bp);
547 }
548 }
549
550 while (vp->v_numoutput > 0) {
551 vp->v_flag |= VBWAIT;
552 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
553 }
554
555 splx(s);
556
557 /*
558 * Destroy the copy in the VM cache, too.
559 */
560 object = vp->v_object;
561 if (object != NULL) {
562 vm_object_page_remove(object, 0, object->size,
563 (flags & V_SAVE) ? TRUE : FALSE);
564 }
565 if (!(flags & V_SAVEMETA) &&
566 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
567 panic("vinvalbuf: flush failed");
568 return (0);
569}
570
571/*
572 * Associate a buffer with a vnode.
573 */
574void
575bgetvp(vp, bp)
576 register struct vnode *vp;
577 register struct buf *bp;
578{
579 int s;
580
581 if (bp->b_vp)
582 panic("bgetvp: not free");
583 VHOLD(vp);
584 bp->b_vp = vp;
585 if (vp->v_type == VBLK || vp->v_type == VCHR)
586 bp->b_dev = vp->v_rdev;
587 else
588 bp->b_dev = NODEV;
589 /*
590 * Insert onto list for new vnode.
591 */
592 s = splbio();
593 bufinsvn(bp, &vp->v_cleanblkhd);
594 splx(s);
595}
596
597/*
598 * Disassociate a buffer from a vnode.
599 */
600void
601brelvp(bp)
602 register struct buf *bp;
603{
604 struct vnode *vp;
605 int s;
606
607 if (bp->b_vp == (struct vnode *) 0)
608 panic("brelvp: NULL");
609 /*
610 * Delete from old vnode list, if on one.
611 */
612 s = splbio();
613 if (bp->b_vnbufs.le_next != NOLIST)
614 bufremvn(bp);
615 splx(s);
616
617 vp = bp->b_vp;
618 bp->b_vp = (struct vnode *) 0;
619 HOLDRELE(vp);
620}
621
622/*
623 * Associate a p-buffer with a vnode.
624 */
625void
626pbgetvp(vp, bp)
627 register struct vnode *vp;
628 register struct buf *bp;
629{
630#if defined(DIAGNOSTIC)
631 if (bp->b_vp)
632 panic("pbgetvp: not free");
633#endif
634 bp->b_vp = vp;
635 if (vp->v_type == VBLK || vp->v_type == VCHR)
636 bp->b_dev = vp->v_rdev;
637 else
638 bp->b_dev = NODEV;
639}
640
641/*
642 * Disassociate a p-buffer from a vnode.
643 */
644void
645pbrelvp(bp)
646 register struct buf *bp;
647{
648 struct vnode *vp;
649
650#if defined(DIAGNOSTIC)
651 if (bp->b_vp == (struct vnode *) 0)
652 panic("pbrelvp: NULL");
653#endif
654
655 bp->b_vp = (struct vnode *) 0;
656}
657
658/*
659 * Reassign a buffer from one vnode to another.
660 * Used to assign file specific control information
661 * (indirect blocks) to the vnode to which they belong.
662 */
663void
664reassignbuf(bp, newvp)
665 register struct buf *bp;
666 register struct vnode *newvp;
667{
668 int s;
669
670 if (newvp == NULL) {
671 printf("reassignbuf: NULL");
672 return;
673 }
674
675 s = splbio();
676 /*
677 * Delete from old vnode list, if on one.
678 */
679 if (bp->b_vnbufs.le_next != NOLIST)
680 bufremvn(bp);
681 /*
682 * If dirty, put on list of dirty buffers; otherwise insert onto list
683 * of clean buffers.
684 */
685 if (bp->b_flags & B_DELWRI) {
686 struct buf *tbp;
687
688 tbp = newvp->v_dirtyblkhd.lh_first;
689 if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
690 bufinsvn(bp, &newvp->v_dirtyblkhd);
691 } else {
692 while (tbp->b_vnbufs.le_next &&
693 (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
694 tbp = tbp->b_vnbufs.le_next;
695 }
696 LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
697 }
698 } else {
699 bufinsvn(bp, &newvp->v_cleanblkhd);
700 }
701 splx(s);
702}
703
704#ifndef DEVFS_ROOT
705/*
706 * Create a vnode for a block device.
707 * Used for root filesystem, argdev, and swap areas.
708 * Also used for memory file system special devices.
709 */
710int
711bdevvp(dev, vpp)
712 dev_t dev;
713 struct vnode **vpp;
714{
715 register struct vnode *vp;
716 struct vnode *nvp;
717 int error;
718
719 if (dev == NODEV)
720 return (0);
721 error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
722 if (error) {
723 *vpp = 0;
724 return (error);
725 }
726 vp = nvp;
727 vp->v_type = VBLK;
728 if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
729 vput(vp);
730 vp = nvp;
731 }
732 *vpp = vp;
733 return (0);
734}
735#endif /* !DEVFS_ROOT */
736
737/*
738 * Check to see if the new vnode represents a special device
739 * for which we already have a vnode (either because of
740 * bdevvp() or because of a different vnode representing
741 * the same block device). If such an alias exists, deallocate
742 * the existing contents and return the aliased vnode. The
743 * caller is responsible for filling it with its new contents.
744 */
745struct vnode *
746checkalias(nvp, nvp_rdev, mp)
747 register struct vnode *nvp;
748 dev_t nvp_rdev;
749 struct mount *mp;
750{
751 struct proc *p = curproc; /* XXX */
752 struct vnode *vp;
753 struct vnode **vpp;
754
755 if (nvp->v_type != VBLK && nvp->v_type != VCHR)
756 return (NULLVP);
757
758 vpp = &speclisth[SPECHASH(nvp_rdev)];
759loop:
760 simple_lock(&spechash_slock);
761 for (vp = *vpp; vp; vp = vp->v_specnext) {
762 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
763 continue;
764 /*
765 * Alias, but not in use, so flush it out.
766 */
767 simple_lock(&vp->v_interlock);
768 if (vp->v_usecount == 0) {
769 simple_unlock(&spechash_slock);
770 vgonel(vp, p);
771 goto loop;
772 }
773 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
774 simple_unlock(&spechash_slock);
775 goto loop;
776 }
777 break;
778 }
779 if (vp == NULL || vp->v_tag != VT_NON) {
780 MALLOC(nvp->v_specinfo, struct specinfo *,
781 sizeof(struct specinfo), M_VNODE, M_WAITOK);
782 nvp->v_rdev = nvp_rdev;
783 nvp->v_hashchain = vpp;
784 nvp->v_specnext = *vpp;
785 nvp->v_specflags = 0;
786 simple_unlock(&spechash_slock);
787 *vpp = nvp;
788 if (vp != NULLVP) {
789 nvp->v_flag |= VALIASED;
790 vp->v_flag |= VALIASED;
791 vput(vp);
792 }
793 return (NULLVP);
794 }
795 simple_unlock(&spechash_slock);
796 VOP_UNLOCK(vp, 0, p);
797 simple_lock(&vp->v_interlock);
798 vclean(vp, 0, p);
799 vp->v_op = nvp->v_op;
800 vp->v_tag = nvp->v_tag;
801 nvp->v_type = VNON;
802 insmntque(vp, mp);
803 return (vp);
804}
805
806/*
807 * Grab a particular vnode from the free list, increment its
808 * reference count and lock it. The vnode lock bit is set the
809 * vnode is being eliminated in vgone. The process is awakened
810 * when the transition is completed, and an error returned to
811 * indicate that the vnode is no longer usable (possibly having
812 * been changed to a new file system type).
813 */
814int
815vget(vp, flags, p)
816 register struct vnode *vp;
817 int flags;
818 struct proc *p;
819{
820 int error;
821
822 /*
823 * If the vnode is in the process of being cleaned out for
824 * another use, we wait for the cleaning to finish and then
825 * return failure. Cleaning is determined by checking that
826 * the VXLOCK flag is set.
827 */
828 if ((flags & LK_INTERLOCK) == 0) {
829 simple_lock(&vp->v_interlock);
830 }
831 if (vp->v_flag & VXLOCK) {
832 vp->v_flag |= VXWANT;
833 simple_unlock(&vp->v_interlock);
834 tsleep((caddr_t)vp, PINOD, "vget", 0);
835 return (ENOENT);
836 }
837 if (vp->v_usecount == 0) {
838 simple_lock(&vnode_free_list_slock);
839 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
840 simple_unlock(&vnode_free_list_slock);
841 freevnodes--;
842 }
843 vp->v_usecount++;
844 /*
845 * Create the VM object, if needed
846 */
847 if ((vp->v_type == VREG) &&
848 ((vp->v_object == NULL) ||
849 (vp->v_object->flags & OBJ_VFS_REF) == 0 ||
850 (vp->v_object->flags & OBJ_DEAD))) {
851 /*
852 * XXX vfs_object_create probably needs the interlock.
853 */
854 simple_unlock(&vp->v_interlock);
855 vfs_object_create(vp, curproc, curproc->p_ucred, 0);
856 simple_lock(&vp->v_interlock);
857 }
858 if (flags & LK_TYPE_MASK) {
859 if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
860 vrele(vp);
861 return (error);
862 }
863 simple_unlock(&vp->v_interlock);
864 return (0);
865}
866
867/*
868 * Stubs to use when there is no locking to be done on the underlying object.
869 * A minimal shared lock is necessary to ensure that the underlying object
870 * is not revoked while an operation is in progress. So, an active shared
871 * count is maintained in an auxillary vnode lock structure.
872 */
873int
874vop_sharedlock(ap)
875 struct vop_lock_args /* {
876 struct vnode *a_vp;
877 int a_flags;
878 struct proc *a_p;
879 } */ *ap;
880{
881 /*
882 * This code cannot be used until all the non-locking filesystems
883 * (notably NFS) are converted to properly lock and release nodes.
884 * Also, certain vnode operations change the locking state within
885 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
886 * and symlink). Ideally these operations should not change the
887 * lock state, but should be changed to let the caller of the
888 * function unlock them. Otherwise all intermediate vnode layers
889 * (such as union, umapfs, etc) must catch these functions to do
890 * the necessary locking at their layer. Note that the inactive
891 * and lookup operations also change their lock state, but this
892 * cannot be avoided, so these two operations will always need
893 * to be handled in intermediate layers.
894 */
895 struct vnode *vp = ap->a_vp;
896 int vnflags, flags = ap->a_flags;
897
898 if (vp->v_vnlock == NULL) {
899 if ((flags & LK_TYPE_MASK) == LK_DRAIN)
900 return (0);
901 MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
902 M_VNODE, M_WAITOK);
903 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
904 }
905 switch (flags & LK_TYPE_MASK) {
906 case LK_DRAIN:
907 vnflags = LK_DRAIN;
908 break;
909 case LK_EXCLUSIVE:
910#ifdef DEBUG_VFS_LOCKS
911 /*
912 * Normally, we use shared locks here, but that confuses
913 * the locking assertions.
914 */
915 vnflags = LK_EXCLUSIVE;
916 break;
917#endif
918 case LK_SHARED:
919 vnflags = LK_SHARED;
920 break;
921 case LK_UPGRADE:
922 case LK_EXCLUPGRADE:
923 case LK_DOWNGRADE:
924 return (0);
925 case LK_RELEASE:
926 default:
927 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
928 }
929 if (flags & LK_INTERLOCK)
930 vnflags |= LK_INTERLOCK;
931 return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
932}
933
934/*
935 * Stubs to use when there is no locking to be done on the underlying object.
936 * A minimal shared lock is necessary to ensure that the underlying object
937 * is not revoked while an operation is in progress. So, an active shared
938 * count is maintained in an auxillary vnode lock structure.
939 */
940int
941vop_nolock(ap)
942 struct vop_lock_args /* {
943 struct vnode *a_vp;
944 int a_flags;
945 struct proc *a_p;
946 } */ *ap;
947{
948#ifdef notyet
949 /*
950 * This code cannot be used until all the non-locking filesystems
951 * (notably NFS) are converted to properly lock and release nodes.
952 * Also, certain vnode operations change the locking state within
953 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
954 * and symlink). Ideally these operations should not change the
955 * lock state, but should be changed to let the caller of the
956 * function unlock them. Otherwise all intermediate vnode layers
957 * (such as union, umapfs, etc) must catch these functions to do
958 * the necessary locking at their layer. Note that the inactive
959 * and lookup operations also change their lock state, but this
960 * cannot be avoided, so these two operations will always need
961 * to be handled in intermediate layers.
962 */
963 struct vnode *vp = ap->a_vp;
964 int vnflags, flags = ap->a_flags;
965
966 if (vp->v_vnlock == NULL) {
967 if ((flags & LK_TYPE_MASK) == LK_DRAIN)
968 return (0);
969 MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
970 M_VNODE, M_WAITOK);
971 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
972 }
973 switch (flags & LK_TYPE_MASK) {
974 case LK_DRAIN:
975 vnflags = LK_DRAIN;
976 break;
977 case LK_EXCLUSIVE:
978 case LK_SHARED:
979 vnflags = LK_SHARED;
980 break;
981 case LK_UPGRADE:
982 case LK_EXCLUPGRADE:
983 case LK_DOWNGRADE:
984 return (0);
985 case LK_RELEASE:
986 default:
987 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
988 }
989 if (flags & LK_INTERLOCK)
990 vnflags |= LK_INTERLOCK;
991 return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
992#else /* for now */
993 /*
994 * Since we are not using the lock manager, we must clear
995 * the interlock here.
996 */
997 if (ap->a_flags & LK_INTERLOCK) {
998 simple_unlock(&ap->a_vp->v_interlock);
999 }
1000 return (0);
1001#endif
1002}
1003
1004/*
1005 * Do the inverse of vop_nolock, handling the interlock in a compatible way.
1006 */
1007int
1008vop_nounlock(ap)
1009 struct vop_unlock_args /* {
1010 struct vnode *a_vp;
1011 int a_flags;
1012 struct proc *a_p;
1013 } */ *ap;
1014{
1015 struct vnode *vp = ap->a_vp;
1016
1017 if (vp->v_vnlock == NULL) {
1018 if (ap->a_flags & LK_INTERLOCK)
1019 simple_unlock(&ap->a_vp->v_interlock);
1020 return (0);
1021 }
1022 return (lockmgr(vp->v_vnlock, LK_RELEASE | ap->a_flags,
1023 &ap->a_vp->v_interlock, ap->a_p));
1024}
1025
1026/*
1027 * Return whether or not the node is in use.
1028 */
1029int
1030vop_noislocked(ap)
1031 struct vop_islocked_args /* {
1032 struct vnode *a_vp;
1033 } */ *ap;
1034{
1035 struct vnode *vp = ap->a_vp;
1036
1037 if (vp->v_vnlock == NULL)
1038 return (0);
1039 return (lockstatus(vp->v_vnlock));
1040}
1041
1042/* #ifdef DIAGNOSTIC */
1043/*
1044 * Vnode reference, just increment the count
1045 */
1046void
1047vref(vp)
1048 struct vnode *vp;
1049{
1050 simple_lock(&vp->v_interlock);
1051 if (vp->v_usecount <= 0)
1052 panic("vref used where vget required");
1053
1054 vp->v_usecount++;
1055
1056 if ((vp->v_type == VREG) &&
1057 ((vp->v_object == NULL) ||
1058 ((vp->v_object->flags & OBJ_VFS_REF) == 0) ||
1059 (vp->v_object->flags & OBJ_DEAD))) {
1060 /*
1061 * We need to lock to VP during the time that
1062 * the object is created. This is necessary to
1063 * keep the system from re-entrantly doing it
1064 * multiple times.
1065 * XXX vfs_object_create probably needs the interlock?
1066 */
1067 simple_unlock(&vp->v_interlock);
1068 vfs_object_create(vp, curproc, curproc->p_ucred, 0);
1069 return;
1070 }
1071 simple_unlock(&vp->v_interlock);
1072}
1073
1074/*
1075 * Vnode put/release.
1076 * If count drops to zero, call inactive routine and return to freelist.
1077 */
1078static void
1079vputrele(vp, put)
1080 struct vnode *vp;
1081 int put;
1082{
1083 struct proc *p = curproc; /* XXX */
1084
1085#ifdef DIAGNOSTIC
1086 if (vp == NULL)
1087 panic("vputrele: null vp");
1088#endif
1089 simple_lock(&vp->v_interlock);
1090 vp->v_usecount--;
1091
1092 if ((vp->v_usecount == 1) &&
1093 vp->v_object &&
1094 (vp->v_object->flags & OBJ_VFS_REF)) {
1095 vp->v_object->flags &= ~OBJ_VFS_REF;
1096 if (put) {
1097 VOP_UNLOCK(vp, LK_INTERLOCK, p);
1098 } else {
1099 simple_unlock(&vp->v_interlock);
1100 }
1101 vm_object_deallocate(vp->v_object);
1102 return;
1103 }
1104
1105 if (vp->v_usecount > 0) {
1106 if (put) {
1107 VOP_UNLOCK(vp, LK_INTERLOCK, p);
1108 } else {
1109 simple_unlock(&vp->v_interlock);
1110 }
1111 return;
1112 }
1113
1114 if (vp->v_usecount < 0) {
1115#ifdef DIAGNOSTIC
1116 vprint("vputrele: negative ref count", vp);
1117#endif
1118 panic("vputrele: negative ref cnt");
1119 }
1120 simple_lock(&vnode_free_list_slock);
1121 if (vp->v_flag & VAGE) {
1122 vp->v_flag &= ~VAGE;
1123 if(vp->v_tag != VT_TFS)
1124 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1125 } else {
1126 if(vp->v_tag != VT_TFS)
1127 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1128 }
1129 freevnodes++;
1130 simple_unlock(&vnode_free_list_slock);
1131
1132 /*
1133 * If we are doing a vput, the node is already locked, and we must
1134 * call VOP_INACTIVE with the node locked. So, in the case of
1135 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
1136 */
1137 if (put) {
1138 simple_unlock(&vp->v_interlock);
1139 VOP_INACTIVE(vp, p);
1140 } else if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1141 VOP_INACTIVE(vp, p);
1142 }
1143}
1144
1145/*
1146 * vput(), just unlock and vrele()
1147 */
1148void
1149vput(vp)
1150 struct vnode *vp;
1151{
1152 vputrele(vp, 1);
1153}
1154
1155void
1156vrele(vp)
1157 struct vnode *vp;
1158{
1159 vputrele(vp, 0);
1160}
1161
1162#ifdef DIAGNOSTIC
1163/*
1164 * Page or buffer structure gets a reference.
1165 */
1166void
1167vhold(vp)
1168 register struct vnode *vp;
1169{
1170
1171 simple_lock(&vp->v_interlock);
1172 vp->v_holdcnt++;
1173 simple_unlock(&vp->v_interlock);
1174}
1175
1176/*
1177 * Page or buffer structure frees a reference.
1178 */
1179void
1180holdrele(vp)
1181 register struct vnode *vp;
1182{
1183
1184 simple_lock(&vp->v_interlock);
1185 if (vp->v_holdcnt <= 0)
1186 panic("holdrele: holdcnt");
1187 vp->v_holdcnt--;
1188 simple_unlock(&vp->v_interlock);
1189}
1190#endif /* DIAGNOSTIC */
1191
1192/*
1193 * Remove any vnodes in the vnode table belonging to mount point mp.
1194 *
1195 * If MNT_NOFORCE is specified, there should not be any active ones,
1196 * return error if any are found (nb: this is a user error, not a
1197 * system error). If MNT_FORCE is specified, detach any active vnodes
1198 * that are found.
1199 */
1200#ifdef DIAGNOSTIC
1201static int busyprt = 0; /* print out busy vnodes */
1202SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
1203#endif
1204
1205int
1206vflush(mp, skipvp, flags)
1207 struct mount *mp;
1208 struct vnode *skipvp;
1209 int flags;
1210{
1211 struct proc *p = curproc; /* XXX */
1212 struct vnode *vp, *nvp;
1213 int busy = 0;
1214
1215 simple_lock(&mntvnode_slock);
1216loop:
1217 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1218 /*
1219 * Make sure this vnode wasn't reclaimed in getnewvnode().
1220 * Start over if it has (it won't be on the list anymore).
1221 */
1222 if (vp->v_mount != mp)
1223 goto loop;
1224 nvp = vp->v_mntvnodes.le_next;
1225 /*
1226 * Skip over a selected vnode.
1227 */
1228 if (vp == skipvp)
1229 continue;
1230
1231 simple_lock(&vp->v_interlock);
1232 /*
1233 * Skip over a vnodes marked VSYSTEM.
1234 */
1235 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1236 simple_unlock(&vp->v_interlock);
1237 continue;
1238 }
1239 /*
1240 * If WRITECLOSE is set, only flush out regular file vnodes
1241 * open for writing.
1242 */
1243 if ((flags & WRITECLOSE) &&
1244 (vp->v_writecount == 0 || vp->v_type != VREG)) {
1245 simple_unlock(&vp->v_interlock);
1246 continue;
1247 }
1248
1249 /*
1250 * With v_usecount == 0, all we need to do is clear out the
1251 * vnode data structures and we are done.
1252 */
1253 if (vp->v_usecount == 0) {
1254 simple_unlock(&mntvnode_slock);
1255 vgonel(vp, p);
1256 simple_lock(&mntvnode_slock);
1257 continue;
1258 }
1259
1260 /*
1261 * If FORCECLOSE is set, forcibly close the vnode. For block
1262 * or character devices, revert to an anonymous device. For
1263 * all other files, just kill them.
1264 */
1265 if (flags & FORCECLOSE) {
1266 simple_unlock(&mntvnode_slock);
1267 if (vp->v_type != VBLK && vp->v_type != VCHR) {
1268 vgonel(vp, p);
1269 } else {
1270 vclean(vp, 0, p);
1271 vp->v_op = spec_vnodeop_p;
1272 insmntque(vp, (struct mount *) 0);
1273 }
1274 simple_lock(&mntvnode_slock);
1275 continue;
1276 }
1277#ifdef DIAGNOSTIC
1278 if (busyprt)
1279 vprint("vflush: busy vnode", vp);
1280#endif
1281 simple_unlock(&vp->v_interlock);
1282 busy++;
1283 }
1284 simple_unlock(&mntvnode_slock);
1285 if (busy)
1286 return (EBUSY);
1287 return (0);
1288}
1289
1290/*
1291 * Disassociate the underlying file system from a vnode.
1292 */
1293static void
1294vclean(struct vnode *vp, int flags, struct proc *p)
1295{
1296 int active, irefed;
1297 vm_object_t object;
1298
1299 /*
1300 * Check to see if the vnode is in use. If so we have to reference it
1301 * before we clean it out so that its count cannot fall to zero and
1302 * generate a race against ourselves to recycle it.
1303 */
1304 if ((active = vp->v_usecount))
1305 vp->v_usecount++;
1306 /*
1307 * Prevent the vnode from being recycled or brought into use while we
1308 * clean it out.
1309 */
1310 if (vp->v_flag & VXLOCK)
1311 panic("vclean: deadlock");
1312 vp->v_flag |= VXLOCK;
1313 /*
1314 * Even if the count is zero, the VOP_INACTIVE routine may still
1315 * have the object locked while it cleans it out. The VOP_LOCK
1316 * ensures that the VOP_INACTIVE routine is done with its work.
1317 * For active vnodes, it ensures that no other activity can
1318 * occur while the underlying object is being cleaned out.
1319 */
1320 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1321
1322 object = vp->v_object;
1323 irefed = 0;
1324 if (object && ((object->flags & OBJ_DEAD) == 0)) {
1325 if (object->ref_count == 0) {
1326 vm_object_reference(object);
1327 irefed = 1;
1328 }
1329 ++object->ref_count;
1330 pager_cache(object, FALSE);
1331 }
1332
1333 /*
1334 * Clean out any buffers associated with the vnode.
1335 */
1336 if (flags & DOCLOSE)
1337 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1338
1339 if (irefed) {
1340 vm_object_deallocate(object);
1341 }
1342
1343 /*
1344 * If purging an active vnode, it must be closed and
1345 * deactivated before being reclaimed. Note that the
1346 * VOP_INACTIVE will unlock the vnode.
1347 */
1348 if (active) {
1349 if (flags & DOCLOSE)
1350 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1351 VOP_INACTIVE(vp, p);
1352 } else {
1353 /*
1354 * Any other processes trying to obtain this lock must first
1355 * wait for VXLOCK to clear, then call the new lock operation.
1356 */
1357 VOP_UNLOCK(vp, 0, p);
1358 }
1359 /*
1360 * Reclaim the vnode.
1361 */
1362 if (VOP_RECLAIM(vp, p))
1363 panic("vclean: cannot reclaim");
1364 if (active)
1365 vrele(vp);
1366 cache_purge(vp);
1367 if (vp->v_vnlock) {
1368#ifdef DIAGNOSTIC
1369 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1370 vprint("vclean: lock not drained", vp);
1371#endif
1372 FREE(vp->v_vnlock, M_VNODE);
1373 vp->v_vnlock = NULL;
1374 }
1375
1376 /*
1377 * Done with purge, notify sleepers of the grim news.
1378 */
1379 vp->v_op = dead_vnodeop_p;
1380 vp->v_tag = VT_NON;
1381 vp->v_flag &= ~VXLOCK;
1382 if (vp->v_flag & VXWANT) {
1383 vp->v_flag &= ~VXWANT;
1384 wakeup((caddr_t) vp);
1385 }
1386}
1387
1388/*
1389 * Eliminate all activity associated with the requested vnode
1390 * and with all vnodes aliased to the requested vnode.
1391 */
1392int
1393vop_revoke(ap)
1394 struct vop_revoke_args /* {
1395 struct vnode *a_vp;
1396 int a_flags;
1397 } */ *ap;
1398{
1399 struct vnode *vp, *vq;
1400 struct proc *p = curproc; /* XXX */
1401
1402#ifdef DIAGNOSTIC
1403 if ((ap->a_flags & REVOKEALL) == 0)
1404 panic("vop_revoke");
1405#endif
1406
1407 vp = ap->a_vp;
1408 simple_lock(&vp->v_interlock);
1409
1410 if (vp->v_flag & VALIASED) {
1411 /*
1412 * If a vgone (or vclean) is already in progress,
1413 * wait until it is done and return.
1414 */
1415 if (vp->v_flag & VXLOCK) {
1416 vp->v_flag |= VXWANT;
1417 simple_unlock(&vp->v_interlock);
1418 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1419 return (0);
1420 }
1421 /*
1422 * Ensure that vp will not be vgone'd while we
1423 * are eliminating its aliases.
1424 */
1425 vp->v_flag |= VXLOCK;
1426 simple_unlock(&vp->v_interlock);
1427 while (vp->v_flag & VALIASED) {
1428 simple_lock(&spechash_slock);
1429 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1430 if (vq->v_rdev != vp->v_rdev ||
1431 vq->v_type != vp->v_type || vp == vq)
1432 continue;
1433 simple_unlock(&spechash_slock);
1434 vgone(vq);
1435 break;
1436 }
1437 if (vq == NULLVP) {
1438 simple_unlock(&spechash_slock);
1439 }
1440 }
1441 /*
1442 * Remove the lock so that vgone below will
1443 * really eliminate the vnode after which time
1444 * vgone will awaken any sleepers.
1445 */
1446 simple_lock(&vp->v_interlock);
1447 vp->v_flag &= ~VXLOCK;
1448 }
1449 vgonel(vp, p);
1450 return (0);
1451}
1452
1453/*
1454 * Recycle an unused vnode to the front of the free list.
1455 * Release the passed interlock if the vnode will be recycled.
1456 */
1457int
1458vrecycle(vp, inter_lkp, p)
1459 struct vnode *vp;
1460 struct simplelock *inter_lkp;
1461 struct proc *p;
1462{
1463
1464 simple_lock(&vp->v_interlock);
1465 if (vp->v_usecount == 0) {
1466 if (inter_lkp) {
1467 simple_unlock(inter_lkp);
1468 }
1469 vgonel(vp, p);
1470 return (1);
1471 }
1472 simple_unlock(&vp->v_interlock);
1473 return (0);
1474}
1475
1476/*
1477 * Eliminate all activity associated with a vnode
1478 * in preparation for reuse.
1479 */
1480void
1481vgone(vp)
1482 register struct vnode *vp;
1483{
1484 struct proc *p = curproc; /* XXX */
1485
1486 simple_lock(&vp->v_interlock);
1487 vgonel(vp, p);
1488}
1489
1490/*
1491 * vgone, with the vp interlock held.
1492 */
1493static void
1494vgonel(vp, p)
1495 struct vnode *vp;
1496 struct proc *p;
1497{
1498 struct vnode *vq;
1499 struct vnode *vx;
1500
1501 /*
1502 * If a vgone (or vclean) is already in progress,
1503 * wait until it is done and return.
1504 */
1505 if (vp->v_flag & VXLOCK) {
1506 vp->v_flag |= VXWANT;
1507 simple_unlock(&vp->v_interlock);
1508 tsleep((caddr_t)vp, PINOD, "vgone", 0);
1509 return;
1510 }
1511
1512 if (vp->v_object) {
1513 vp->v_object->flags |= OBJ_VNODE_GONE;
1514 }
1515
1516 /*
1517 * Clean out the filesystem specific data.
1518 */
1519 vclean(vp, DOCLOSE, p);
1520 /*
1521 * Delete from old mount point vnode list, if on one.
1522 */
1523 if (vp->v_mount != NULL)
1524 insmntque(vp, (struct mount *)0);
1525 /*
1526 * If special device, remove it from special device alias list
1527 * if it is on one.
1528 */
1529 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1530 simple_lock(&spechash_slock);
1531 if (*vp->v_hashchain == vp) {
1532 *vp->v_hashchain = vp->v_specnext;
1533 } else {
1534 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1535 if (vq->v_specnext != vp)
1536 continue;
1537 vq->v_specnext = vp->v_specnext;
1538 break;
1539 }
1540 if (vq == NULL)
1541 panic("missing bdev");
1542 }
1543 if (vp->v_flag & VALIASED) {
1544 vx = NULL;
1545 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1546 if (vq->v_rdev != vp->v_rdev ||
1547 vq->v_type != vp->v_type)
1548 continue;
1549 if (vx)
1550 break;
1551 vx = vq;
1552 }
1553 if (vx == NULL)
1554 panic("missing alias");
1555 if (vq == NULL)
1556 vx->v_flag &= ~VALIASED;
1557 vp->v_flag &= ~VALIASED;
1558 }
1559 simple_unlock(&spechash_slock);
1560 FREE(vp->v_specinfo, M_VNODE);
1561 vp->v_specinfo = NULL;
1562 }
1563
1564 /*
1565 * If it is on the freelist and not already at the head,
1566 * move it to the head of the list. The test of the back
1567 * pointer and the reference count of zero is because
1568 * it will be removed from the free list by getnewvnode,
1569 * but will not have its reference count incremented until
1570 * after calling vgone. If the reference count were
1571 * incremented first, vgone would (incorrectly) try to
1572 * close the previous instance of the underlying object.
1573 * So, the back pointer is explicitly set to `0xdeadb' in
1574 * getnewvnode after removing it from the freelist to ensure
1575 * that we do not try to move it here.
1576 */
1577 if (vp->v_usecount == 0) {
1578 simple_lock(&vnode_free_list_slock);
1579 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1580 vnode_free_list.tqh_first != vp) {
1581 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1582 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1583 }
1584 simple_unlock(&vnode_free_list_slock);
1585 }
1586
1587 vp->v_type = VBAD;
1588}
1589
1590/*
1591 * Lookup a vnode by device number.
1592 */
1593int
1594vfinddev(dev, type, vpp)
1595 dev_t dev;
1596 enum vtype type;
1597 struct vnode **vpp;
1598{
1599 register struct vnode *vp;
1600 int rc = 0;
1601
1602 simple_lock(&spechash_slock);
1603 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1604 if (dev != vp->v_rdev || type != vp->v_type)
1605 continue;
1606 *vpp = vp;
1607 rc = 1;
1608 break;
1609 }
1610 simple_unlock(&spechash_slock);
1611 return (rc);
1612}
1613
1614/*
1615 * Calculate the total number of references to a special device.
1616 */
1617int
1618vcount(vp)
1619 register struct vnode *vp;
1620{
1621 struct vnode *vq, *vnext;
1622 int count;
1623
1624loop:
1625 if ((vp->v_flag & VALIASED) == 0)
1626 return (vp->v_usecount);
1627 simple_lock(&spechash_slock);
1628 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1629 vnext = vq->v_specnext;
1630 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1631 continue;
1632 /*
1633 * Alias, but not in use, so flush it out.
1634 */
1635 if (vq->v_usecount == 0 && vq != vp) {
1636 simple_unlock(&spechash_slock);
1637 vgone(vq);
1638 goto loop;
1639 }
1640 count += vq->v_usecount;
1641 }
1642 simple_unlock(&spechash_slock);
1643 return (count);
1644}
1645
1646/*
1647 * Print out a description of a vnode.
1648 */
1649static char *typename[] =
1650{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
1651
1652void
1653vprint(label, vp)
1654 char *label;
1655 register struct vnode *vp;
1656{
1657 char buf[64];
1658
1659 if (label != NULL)
1660 printf("%s: %x: ", label, vp);
1661 else
1662 printf("%x: ", vp);
1663 printf("type %s, usecount %d, writecount %d, refcount %ld,",
1664 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1665 vp->v_holdcnt);
1666 buf[0] = '\0';
1667 if (vp->v_flag & VROOT)
1668 strcat(buf, "|VROOT");
1669 if (vp->v_flag & VTEXT)
1670 strcat(buf, "|VTEXT");
1671 if (vp->v_flag & VSYSTEM)
1672 strcat(buf, "|VSYSTEM");
1673 if (vp->v_flag & VXLOCK)
1674 strcat(buf, "|VXLOCK");
1675 if (vp->v_flag & VXWANT)
1676 strcat(buf, "|VXWANT");
1677 if (vp->v_flag & VBWAIT)
1678 strcat(buf, "|VBWAIT");
1679 if (vp->v_flag & VALIASED)
1680 strcat(buf, "|VALIASED");
1681 if (buf[0] != '\0')
1682 printf(" flags (%s)", &buf[1]);
1683 if (vp->v_data == NULL) {
1684 printf("\n");
1685 } else {
1686 printf("\n\t");
1687 VOP_PRINT(vp);
1688 }
1689}
1690
1691#ifdef DDB
1692/*
1693 * List all of the locked vnodes in the system.
1694 * Called when debugging the kernel.
1695 */
1696void
1697printlockedvnodes()
1698{
1699 struct proc *p = curproc; /* XXX */
1700 struct mount *mp, *nmp;
1701 struct vnode *vp;
1702
1703 printf("Locked vnodes\n");
1704 simple_lock(&mountlist_slock);
1705 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1706 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1707 nmp = mp->mnt_list.cqe_next;
1708 continue;
1709 }
1710 for (vp = mp->mnt_vnodelist.lh_first;
1711 vp != NULL;
1712 vp = vp->v_mntvnodes.le_next) {
1713 if (VOP_ISLOCKED(vp))
1714 vprint((char *)0, vp);
1715 }
1716 simple_lock(&mountlist_slock);
1717 nmp = mp->mnt_list.cqe_next;
1718 vfs_unbusy(mp, p);
1719 }
1720 simple_unlock(&mountlist_slock);
1721}
1722#endif
1723
1724/*
1725 * Top level filesystem related information gathering.
1726 */
1727static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS);
1728
1729static int
1730vfs_sysctl SYSCTL_HANDLER_ARGS
1731{
1732 int *name = (int *)arg1 - 1; /* XXX */
1733 u_int namelen = arg2 + 1; /* XXX */
1734 struct vfsconf *vfsp;
1735
1736#ifndef NO_COMPAT_PRELITE2
1737 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
1738 if (namelen == 1)
1739 return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
1740#endif
1741
1742#ifdef notyet
1743 /* all sysctl names at this level are at least name and field */
1744 if (namelen < 2)
1745 return (ENOTDIR); /* overloaded */
1746 if (name[0] != VFS_GENERIC) {
1747 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1748 if (vfsp->vfc_typenum == name[0])
1749 break;
1750 if (vfsp == NULL)
1751 return (EOPNOTSUPP);
1752 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1753 oldp, oldlenp, newp, newlen, p));
1754 }
1755#endif
1756 switch (name[1]) {
1757 case VFS_MAXTYPENUM:
1758 if (namelen != 2)
1759 return (ENOTDIR);
1760 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
1761 case VFS_CONF:
1762 if (namelen != 3)
1763 return (ENOTDIR); /* overloaded */
1764 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1765 if (vfsp->vfc_typenum == name[2])
1766 break;
1767 if (vfsp == NULL)
1768 return (EOPNOTSUPP);
1769 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
1770 }
1771 return (EOPNOTSUPP);
1772}
1773
1774SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
1775 "Generic filesystem");
1776
1777#ifndef NO_COMPAT_PRELITE2
1778
1779static int
1780sysctl_ovfs_conf SYSCTL_HANDLER_ARGS
1781{
1782 int error;
1783 struct vfsconf *vfsp;
1784 struct ovfsconf ovfs;
1785
1786 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
1787 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */
1788 strcpy(ovfs.vfc_name, vfsp->vfc_name);
1789 ovfs.vfc_index = vfsp->vfc_typenum;
1790 ovfs.vfc_refcount = vfsp->vfc_refcount;
1791 ovfs.vfc_flags = vfsp->vfc_flags;
1792 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
1793 if (error)
1794 return error;
1795 }
1796 return 0;
1797}
1798
1799#endif /* !NO_COMPAT_PRELITE2 */
1800
1801int kinfo_vdebug = 1;
1802int kinfo_vgetfailed;
1803
1804#define KINFO_VNODESLOP 10
1805/*
1806 * Dump vnode list (via sysctl).
1807 * Copyout address of vnode followed by vnode.
1808 */
1809/* ARGSUSED */
1810static int
1811sysctl_vnode SYSCTL_HANDLER_ARGS
1812{
1813 struct proc *p = curproc; /* XXX */
1814 struct mount *mp, *nmp;
1815 struct vnode *nvp, *vp;
1816 int error;
1817
1818#define VPTRSZ sizeof (struct vnode *)
1819#define VNODESZ sizeof (struct vnode)
1820
1821 req->lock = 0;
1822 if (!req->oldptr) /* Make an estimate */
1823 return (SYSCTL_OUT(req, 0,
1824 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
1825
1826 simple_lock(&mountlist_slock);
1827 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1828 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1829 nmp = mp->mnt_list.cqe_next;
1830 continue;
1831 }
1832again:
1833 simple_lock(&mntvnode_slock);
1834 for (vp = mp->mnt_vnodelist.lh_first;
1835 vp != NULL;
1836 vp = nvp) {
1837 /*
1838 * Check that the vp is still associated with
1839 * this filesystem. RACE: could have been
1840 * recycled onto the same filesystem.
1841 */
1842 if (vp->v_mount != mp) {
1843 simple_unlock(&mntvnode_slock);
1844 if (kinfo_vdebug)
1845 printf("kinfo: vp changed\n");
1846 goto again;
1847 }
1848 nvp = vp->v_mntvnodes.le_next;
1849 simple_unlock(&mntvnode_slock);
1850 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
1851 (error = SYSCTL_OUT(req, vp, VNODESZ)))
1852 return (error);
1853 simple_lock(&mntvnode_slock);
1854 }
1855 simple_unlock(&mntvnode_slock);
1856 simple_lock(&mountlist_slock);
1857 nmp = mp->mnt_list.cqe_next;
1858 vfs_unbusy(mp, p);
1859 }
1860 simple_unlock(&mountlist_slock);
1861
1862 return (0);
1863}
1864
1865/*
1866 * XXX
1867 * Exporting the vnode list on large systems causes them to crash.
1868 * Exporting the vnode list on medium systems causes sysctl to coredump.
1869 */
1870#if 0
1871SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
1872 0, 0, sysctl_vnode, "S,vnode", "");
1873#endif
1874
1875/*
1876 * Check to see if a filesystem is mounted on a block device.
1877 */
1878int
1879vfs_mountedon(vp)
1880 struct vnode *vp;
1881{
1882 struct vnode *vq;
1883 int error = 0;
1884
1885 if (vp->v_specflags & SI_MOUNTEDON)
1886 return (EBUSY);
1887 if (vp->v_flag & VALIASED) {
1888 simple_lock(&spechash_slock);
1889 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1890 if (vq->v_rdev != vp->v_rdev ||
1891 vq->v_type != vp->v_type)
1892 continue;
1893 if (vq->v_specflags & SI_MOUNTEDON) {
1894 error = EBUSY;
1895 break;
1896 }
1897 }
1898 simple_unlock(&spechash_slock);
1899 }
1900 return (error);
1901}
1902
1903/*
1904 * Unmount all filesystems. The list is traversed in reverse order
1905 * of mounting to avoid dependencies.
1906 */
1907void
1908vfs_unmountall()
1909{
1910 struct mount *mp, *nmp;
1911 struct proc *p = initproc; /* XXX XXX should this be proc0? */
1912 int error;
1913
1914 /*
1915 * Since this only runs when rebooting, it is not interlocked.
1916 */
1917 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1918 nmp = mp->mnt_list.cqe_prev;
1919 error = dounmount(mp, MNT_FORCE, p);
1920 if (error) {
1921 printf("unmount of %s failed (",
1922 mp->mnt_stat.f_mntonname);
1923 if (error == EBUSY)
1924 printf("BUSY)\n");
1925 else
1926 printf("%d)\n", error);
1927 }
1928 }
1929}
1930
1931/*
1932 * Build hash lists of net addresses and hang them off the mount point.
1933 * Called by ufs_mount() to set up the lists of export addresses.
1934 */
1935static int
1936vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
1937 struct export_args *argp)
1938{
1939 register struct netcred *np;
1940 register struct radix_node_head *rnh;
1941 register int i;
1942 struct radix_node *rn;
1943 struct sockaddr *saddr, *smask = 0;
1944 struct domain *dom;
1945 int error;
1946
1947 if (argp->ex_addrlen == 0) {
1948 if (mp->mnt_flag & MNT_DEFEXPORTED)
1949 return (EPERM);
1950 np = &nep->ne_defexported;
1951 np->netc_exflags = argp->ex_flags;
1952 np->netc_anon = argp->ex_anon;
1953 np->netc_anon.cr_ref = 1;
1954 mp->mnt_flag |= MNT_DEFEXPORTED;
1955 return (0);
1956 }
1957 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1958 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
1959 bzero((caddr_t) np, i);
1960 saddr = (struct sockaddr *) (np + 1);
1961 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
1962 goto out;
1963 if (saddr->sa_len > argp->ex_addrlen)
1964 saddr->sa_len = argp->ex_addrlen;
1965 if (argp->ex_masklen) {
1966 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
1967 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
1968 if (error)
1969 goto out;
1970 if (smask->sa_len > argp->ex_masklen)
1971 smask->sa_len = argp->ex_masklen;
1972 }
1973 i = saddr->sa_family;
1974 if ((rnh = nep->ne_rtable[i]) == 0) {
1975 /*
1976 * Seems silly to initialize every AF when most are not used,
1977 * do so on demand here
1978 */
1979 for (dom = domains; dom; dom = dom->dom_next)
1980 if (dom->dom_family == i && dom->dom_rtattach) {
1981 dom->dom_rtattach((void **) &nep->ne_rtable[i],
1982 dom->dom_rtoffset);
1983 break;
1984 }
1985 if ((rnh = nep->ne_rtable[i]) == 0) {
1986 error = ENOBUFS;
1987 goto out;
1988 }
1989 }
1990 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
1991 np->netc_rnodes);
1992 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */
1993 error = EPERM;
1994 goto out;
1995 }
1996 np->netc_exflags = argp->ex_flags;
1997 np->netc_anon = argp->ex_anon;
1998 np->netc_anon.cr_ref = 1;
1999 return (0);
2000out:
2001 free(np, M_NETADDR);
2002 return (error);
2003}
2004
2005/* ARGSUSED */
2006static int
2007vfs_free_netcred(struct radix_node *rn, void *w)
2008{
2009 register struct radix_node_head *rnh = (struct radix_node_head *) w;
2010
2011 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
2012 free((caddr_t) rn, M_NETADDR);
2013 return (0);
2014}
2015
2016/*
2017 * Free the net address hash lists that are hanging off the mount points.
2018 */
2019static void
2020vfs_free_addrlist(struct netexport *nep)
2021{
2022 register int i;
2023 register struct radix_node_head *rnh;
2024
2025 for (i = 0; i <= AF_MAX; i++)
2026 if ((rnh = nep->ne_rtable[i])) {
2027 (*rnh->rnh_walktree) (rnh, vfs_free_netcred,
2028 (caddr_t) rnh);
2029 free((caddr_t) rnh, M_RTABLE);
2030 nep->ne_rtable[i] = 0;
2031 }
2032}
2033
2034int
2035vfs_export(mp, nep, argp)
2036 struct mount *mp;
2037 struct netexport *nep;
2038 struct export_args *argp;
2039{
2040 int error;
2041
2042 if (argp->ex_flags & MNT_DELEXPORT) {
2043 if (mp->mnt_flag & MNT_EXPUBLIC) {
2044 vfs_setpublicfs(NULL, NULL, NULL);
2045 mp->mnt_flag &= ~MNT_EXPUBLIC;
2046 }
2047 vfs_free_addrlist(nep);
2048 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2049 }
2050 if (argp->ex_flags & MNT_EXPORTED) {
2051 if (argp->ex_flags & MNT_EXPUBLIC) {
2052 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
2053 return (error);
2054 mp->mnt_flag |= MNT_EXPUBLIC;
2055 }
2056 if ((error = vfs_hang_addrlist(mp, nep, argp)))
2057 return (error);
2058 mp->mnt_flag |= MNT_EXPORTED;
2059 }
2060 return (0);
2061}
2062
2063
2064/*
2065 * Set the publicly exported filesystem (WebNFS). Currently, only
2066 * one public filesystem is possible in the spec (RFC 2054 and 2055)
2067 */
2068int
2069vfs_setpublicfs(mp, nep, argp)
2070 struct mount *mp;
2071 struct netexport *nep;
2072 struct export_args *argp;
2073{
2074 int error;
2075 struct vnode *rvp;
2076 char *cp;
2077
2078 /*
2079 * mp == NULL -> invalidate the current info, the FS is
2080 * no longer exported. May be called from either vfs_export
2081 * or unmount, so check if it hasn't already been done.
2082 */
2083 if (mp == NULL) {
2084 if (nfs_pub.np_valid) {
2085 nfs_pub.np_valid = 0;
2086 if (nfs_pub.np_index != NULL) {
2087 FREE(nfs_pub.np_index, M_TEMP);
2088 nfs_pub.np_index = NULL;
2089 }
2090 }
2091 return (0);
2092 }
2093
2094 /*
2095 * Only one allowed at a time.
2096 */
2097 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
2098 return (EBUSY);
2099
2100 /*
2101 * Get real filehandle for root of exported FS.
2102 */
2103 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
2104 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
2105
2106 if ((error = VFS_ROOT(mp, &rvp)))
2107 return (error);
2108
2109 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
2110 return (error);
2111
2112 vput(rvp);
2113
2114 /*
2115 * If an indexfile was specified, pull it in.
2116 */
2117 if (argp->ex_indexfile != NULL) {
2118 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2119 M_WAITOK);
2120 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2121 MAXNAMLEN, (size_t *)0);
2122 if (!error) {
2123 /*
2124 * Check for illegal filenames.
2125 */
2126 for (cp = nfs_pub.np_index; *cp; cp++) {
2127 if (*cp == '/') {
2128 error = EINVAL;
2129 break;
2130 }
2131 }
2132 }
2133 if (error) {
2134 FREE(nfs_pub.np_index, M_TEMP);
2135 return (error);
2136 }
2137 }
2138
2139 nfs_pub.np_mount = mp;
2140 nfs_pub.np_valid = 1;
2141 return (0);
2142}
2143
2144struct netcred *
2145vfs_export_lookup(mp, nep, nam)
2146 register struct mount *mp;
2147 struct netexport *nep;
40 */
41
42/*
43 * External virtual filesystem routines
44 */
45#include "opt_ddb.h"
46#include "opt_devfs.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/kernel.h>
51#include <sys/file.h>
52#include <sys/proc.h>
53#include <sys/mount.h>
54#include <sys/time.h>
55#include <sys/vnode.h>
56#include <sys/stat.h>
57#include <sys/namei.h>
58#include <sys/ucred.h>
59#include <sys/buf.h>
60#include <sys/errno.h>
61#include <sys/malloc.h>
62#include <sys/domain.h>
63#include <sys/mbuf.h>
64#include <sys/dirent.h>
65
66#include <vm/vm.h>
67#include <vm/vm_param.h>
68#include <vm/vm_object.h>
69#include <vm/vm_extern.h>
70#include <vm/vm_pager.h>
71#include <vm/vnode_pager.h>
72#include <sys/sysctl.h>
73
74#include <miscfs/specfs/specdev.h>
75
76#ifdef DDB
77extern void printlockedvnodes __P((void));
78#endif
79static void vclean __P((struct vnode *vp, int flags, struct proc *p));
80static void vgonel __P((struct vnode *vp, struct proc *p));
81unsigned long numvnodes;
82SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
83static void vputrele __P((struct vnode *vp, int put));
84
85enum vtype iftovt_tab[16] = {
86 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
87 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
88};
89int vttoif_tab[9] = {
90 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
91 S_IFSOCK, S_IFIFO, S_IFMT,
92};
93
94/*
95 * Insq/Remq for the vnode usage lists.
96 */
97#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
98#define bufremvn(bp) { \
99 LIST_REMOVE(bp, b_vnbufs); \
100 (bp)->b_vnbufs.le_next = NOLIST; \
101}
102TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
103static u_long freevnodes = 0;
104
105struct mntlist mountlist; /* mounted filesystem list */
106struct simplelock mountlist_slock;
107static struct simplelock mntid_slock;
108struct simplelock mntvnode_slock;
109struct simplelock vnode_free_list_slock;
110static struct simplelock spechash_slock;
111struct nfs_public nfs_pub; /* publicly exported FS */
112
113int desiredvnodes;
114SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
115
116static void vfs_free_addrlist __P((struct netexport *nep));
117static int vfs_free_netcred __P((struct radix_node *rn, void *w));
118static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
119 struct export_args *argp));
120
121/*
122 * Initialize the vnode management data structures.
123 */
124void
125vntblinit()
126{
127
128 desiredvnodes = maxproc + vm_object_cache_max;
129 simple_lock_init(&mntvnode_slock);
130 simple_lock_init(&mntid_slock);
131 simple_lock_init(&spechash_slock);
132 TAILQ_INIT(&vnode_free_list);
133 simple_lock_init(&vnode_free_list_slock);
134 CIRCLEQ_INIT(&mountlist);
135}
136
137/*
138 * Mark a mount point as busy. Used to synchronize access and to delay
139 * unmounting. Interlock is not released on failure.
140 */
141int
142vfs_busy(mp, flags, interlkp, p)
143 struct mount *mp;
144 int flags;
145 struct simplelock *interlkp;
146 struct proc *p;
147{
148 int lkflags;
149
150 if (mp->mnt_flag & MNT_UNMOUNT) {
151 if (flags & LK_NOWAIT)
152 return (ENOENT);
153 mp->mnt_flag |= MNT_MWAIT;
154 if (interlkp) {
155 simple_unlock(interlkp);
156 }
157 /*
158 * Since all busy locks are shared except the exclusive
159 * lock granted when unmounting, the only place that a
160 * wakeup needs to be done is at the release of the
161 * exclusive lock at the end of dounmount.
162 */
163 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
164 if (interlkp) {
165 simple_lock(interlkp);
166 }
167 return (ENOENT);
168 }
169 lkflags = LK_SHARED;
170 if (interlkp)
171 lkflags |= LK_INTERLOCK;
172 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
173 panic("vfs_busy: unexpected lock failure");
174 return (0);
175}
176
177/*
178 * Free a busy filesystem.
179 */
180void
181vfs_unbusy(mp, p)
182 struct mount *mp;
183 struct proc *p;
184{
185
186 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
187}
188
189/*
190 * Lookup a filesystem type, and if found allocate and initialize
191 * a mount structure for it.
192 *
193 * Devname is usually updated by mount(8) after booting.
194 */
195int
196vfs_rootmountalloc(fstypename, devname, mpp)
197 char *fstypename;
198 char *devname;
199 struct mount **mpp;
200{
201 struct proc *p = curproc; /* XXX */
202 struct vfsconf *vfsp;
203 struct mount *mp;
204
205 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
206 if (!strcmp(vfsp->vfc_name, fstypename))
207 break;
208 if (vfsp == NULL)
209 return (ENODEV);
210 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
211 bzero((char *)mp, (u_long)sizeof(struct mount));
212 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
213 (void)vfs_busy(mp, LK_NOWAIT, 0, p);
214 LIST_INIT(&mp->mnt_vnodelist);
215 mp->mnt_vfc = vfsp;
216 mp->mnt_op = vfsp->vfc_vfsops;
217 mp->mnt_flag = MNT_RDONLY;
218 mp->mnt_vnodecovered = NULLVP;
219 vfsp->vfc_refcount++;
220 mp->mnt_stat.f_type = vfsp->vfc_typenum;
221 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
222 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
223 mp->mnt_stat.f_mntonname[0] = '/';
224 mp->mnt_stat.f_mntonname[1] = 0;
225 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
226 *mpp = mp;
227 return (0);
228}
229
230/*
231 * Find an appropriate filesystem to use for the root. If a filesystem
232 * has not been preselected, walk through the list of known filesystems
233 * trying those that have mountroot routines, and try them until one
234 * works or we have tried them all.
235 */
236#ifdef notdef /* XXX JH */
237int
238lite2_vfs_mountroot(void)
239{
240 struct vfsconf *vfsp;
241 extern int (*lite2_mountroot)(void);
242 int error;
243
244 if (lite2_mountroot != NULL)
245 return ((*lite2_mountroot)());
246 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
247 if (vfsp->vfc_mountroot == NULL)
248 continue;
249 if ((error = (*vfsp->vfc_mountroot)()) == 0)
250 return (0);
251 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
252 }
253 return (ENODEV);
254}
255#endif
256
257/*
258 * Lookup a mount point by filesystem identifier.
259 */
260struct mount *
261vfs_getvfs(fsid)
262 fsid_t *fsid;
263{
264 register struct mount *mp;
265
266 simple_lock(&mountlist_slock);
267 for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
268 mp = mp->mnt_list.cqe_next) {
269 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
270 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
271 simple_unlock(&mountlist_slock);
272 return (mp);
273 }
274 }
275 simple_unlock(&mountlist_slock);
276 return ((struct mount *) 0);
277}
278
279/*
280 * Get a new unique fsid
281 */
282void
283vfs_getnewfsid(mp)
284 struct mount *mp;
285{
286 static u_short xxxfs_mntid;
287
288 fsid_t tfsid;
289 int mtype;
290
291 simple_lock(&mntid_slock);
292 mtype = mp->mnt_vfc->vfc_typenum;
293 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
294 mp->mnt_stat.f_fsid.val[1] = mtype;
295 if (xxxfs_mntid == 0)
296 ++xxxfs_mntid;
297 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
298 tfsid.val[1] = mtype;
299 if (mountlist.cqh_first != (void *)&mountlist) {
300 while (vfs_getvfs(&tfsid)) {
301 tfsid.val[0]++;
302 xxxfs_mntid++;
303 }
304 }
305 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
306 simple_unlock(&mntid_slock);
307}
308
309/*
310 * Set vnode attributes to VNOVAL
311 */
312void
313vattr_null(vap)
314 register struct vattr *vap;
315{
316
317 vap->va_type = VNON;
318 vap->va_size = VNOVAL;
319 vap->va_bytes = VNOVAL;
320 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
321 vap->va_fsid = vap->va_fileid =
322 vap->va_blocksize = vap->va_rdev =
323 vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
324 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
325 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
326 vap->va_flags = vap->va_gen = VNOVAL;
327 vap->va_vaflags = 0;
328}
329
330/*
331 * Routines having to do with the management of the vnode table.
332 */
333extern vop_t **dead_vnodeop_p;
334
335/*
336 * Return the next vnode from the free list.
337 */
338int
339getnewvnode(tag, mp, vops, vpp)
340 enum vtagtype tag;
341 struct mount *mp;
342 vop_t **vops;
343 struct vnode **vpp;
344{
345 struct proc *p = curproc; /* XXX */
346 struct vnode *vp;
347
348 /*
349 * We take the least recently used vnode from the freelist
350 * if we can get it and it has no cached pages, and no
351 * namecache entries are relative to it.
352 * Otherwise we allocate a new vnode
353 */
354
355 simple_lock(&vnode_free_list_slock);
356
357 if (freevnodes >= desiredvnodes) {
358 TAILQ_FOREACH(vp, &vnode_free_list, v_freelist) {
359 if (!simple_lock_try(&vp->v_interlock))
360 continue;
361 if (vp->v_usecount)
362 panic("free vnode isn't");
363
364 if (vp->v_object && vp->v_object->resident_page_count) {
365 /* Don't recycle if it's caching some pages */
366 simple_unlock(&vp->v_interlock);
367 continue;
368 } else if (LIST_FIRST(&vp->v_cache_src)) {
369 /* Don't recycle if active in the namecache */
370 simple_unlock(&vp->v_interlock);
371 continue;
372 } else {
373 break;
374 }
375 }
376 } else {
377 vp = NULL;
378 }
379
380 if (vp) {
381 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
382 freevnodes--;
383 /* see comment on why 0xdeadb is set at end of vgone (below) */
384 vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb;
385 simple_unlock(&vnode_free_list_slock);
386 vp->v_lease = NULL;
387 if (vp->v_type != VBAD)
388 vgonel(vp, p);
389 else {
390 simple_unlock(&vp->v_interlock);
391 }
392
393#ifdef DIAGNOSTIC
394 {
395 int s;
396
397 if (vp->v_data)
398 panic("cleaned vnode isn't");
399 s = splbio();
400 if (vp->v_numoutput)
401 panic("Clean vnode has pending I/O's");
402 splx(s);
403 }
404#endif
405 vp->v_flag = 0;
406 vp->v_lastr = 0;
407 vp->v_lastw = 0;
408 vp->v_lasta = 0;
409 vp->v_cstart = 0;
410 vp->v_clen = 0;
411 vp->v_socket = 0;
412 vp->v_writecount = 0; /* XXX */
413 } else {
414 simple_unlock(&vnode_free_list_slock);
415 vp = (struct vnode *) malloc((u_long) sizeof *vp,
416 M_VNODE, M_WAITOK);
417 bzero((char *) vp, sizeof *vp);
418 vp->v_dd = vp;
419 LIST_INIT(&vp->v_cache_src);
420 TAILQ_INIT(&vp->v_cache_dst);
421 numvnodes++;
422 }
423
424 vp->v_type = VNON;
425 cache_purge(vp);
426 vp->v_tag = tag;
427 vp->v_op = vops;
428 insmntque(vp, mp);
429 *vpp = vp;
430 vp->v_usecount = 1;
431 vp->v_data = 0;
432 return (0);
433}
434
435/*
436 * Move a vnode from one mount queue to another.
437 */
438void
439insmntque(vp, mp)
440 register struct vnode *vp;
441 register struct mount *mp;
442{
443
444 simple_lock(&mntvnode_slock);
445 /*
446 * Delete from old mount point vnode list, if on one.
447 */
448 if (vp->v_mount != NULL)
449 LIST_REMOVE(vp, v_mntvnodes);
450 /*
451 * Insert into list of vnodes for the new mount point, if available.
452 */
453 if ((vp->v_mount = mp) == NULL) {
454 simple_unlock(&mntvnode_slock);
455 return;
456 }
457 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
458 simple_unlock(&mntvnode_slock);
459}
460
461/*
462 * Update outstanding I/O count and do wakeup if requested.
463 */
464void
465vwakeup(bp)
466 register struct buf *bp;
467{
468 register struct vnode *vp;
469
470 bp->b_flags &= ~B_WRITEINPROG;
471 if ((vp = bp->b_vp)) {
472 vp->v_numoutput--;
473 if (vp->v_numoutput < 0)
474 panic("vwakeup: neg numoutput");
475 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
476 vp->v_flag &= ~VBWAIT;
477 wakeup((caddr_t) &vp->v_numoutput);
478 }
479 }
480}
481
482/*
483 * Flush out and invalidate all buffers associated with a vnode.
484 * Called with the underlying object locked.
485 */
486int
487vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
488 register struct vnode *vp;
489 int flags;
490 struct ucred *cred;
491 struct proc *p;
492 int slpflag, slptimeo;
493{
494 register struct buf *bp;
495 struct buf *nbp, *blist;
496 int s, error;
497 vm_object_t object;
498
499 if (flags & V_SAVE) {
500 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
501 return (error);
502 if (vp->v_dirtyblkhd.lh_first != NULL)
503 panic("vinvalbuf: dirty bufs");
504 }
505
506 s = splbio();
507 for (;;) {
508 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
509 while (blist && blist->b_lblkno < 0)
510 blist = blist->b_vnbufs.le_next;
511 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
512 (flags & V_SAVEMETA))
513 while (blist && blist->b_lblkno < 0)
514 blist = blist->b_vnbufs.le_next;
515 if (!blist)
516 break;
517
518 for (bp = blist; bp; bp = nbp) {
519 nbp = bp->b_vnbufs.le_next;
520 if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
521 continue;
522 if (bp->b_flags & B_BUSY) {
523 bp->b_flags |= B_WANTED;
524 error = tsleep((caddr_t) bp,
525 slpflag | (PRIBIO + 1), "vinvalbuf",
526 slptimeo);
527 if (error) {
528 splx(s);
529 return (error);
530 }
531 break;
532 }
533 bremfree(bp);
534 bp->b_flags |= B_BUSY;
535 /*
536 * XXX Since there are no node locks for NFS, I
537 * believe there is a slight chance that a delayed
538 * write will occur while sleeping just above, so
539 * check for it.
540 */
541 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
542 (void) VOP_BWRITE(bp);
543 break;
544 }
545 bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF);
546 brelse(bp);
547 }
548 }
549
550 while (vp->v_numoutput > 0) {
551 vp->v_flag |= VBWAIT;
552 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
553 }
554
555 splx(s);
556
557 /*
558 * Destroy the copy in the VM cache, too.
559 */
560 object = vp->v_object;
561 if (object != NULL) {
562 vm_object_page_remove(object, 0, object->size,
563 (flags & V_SAVE) ? TRUE : FALSE);
564 }
565 if (!(flags & V_SAVEMETA) &&
566 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
567 panic("vinvalbuf: flush failed");
568 return (0);
569}
570
571/*
572 * Associate a buffer with a vnode.
573 */
574void
575bgetvp(vp, bp)
576 register struct vnode *vp;
577 register struct buf *bp;
578{
579 int s;
580
581 if (bp->b_vp)
582 panic("bgetvp: not free");
583 VHOLD(vp);
584 bp->b_vp = vp;
585 if (vp->v_type == VBLK || vp->v_type == VCHR)
586 bp->b_dev = vp->v_rdev;
587 else
588 bp->b_dev = NODEV;
589 /*
590 * Insert onto list for new vnode.
591 */
592 s = splbio();
593 bufinsvn(bp, &vp->v_cleanblkhd);
594 splx(s);
595}
596
597/*
598 * Disassociate a buffer from a vnode.
599 */
600void
601brelvp(bp)
602 register struct buf *bp;
603{
604 struct vnode *vp;
605 int s;
606
607 if (bp->b_vp == (struct vnode *) 0)
608 panic("brelvp: NULL");
609 /*
610 * Delete from old vnode list, if on one.
611 */
612 s = splbio();
613 if (bp->b_vnbufs.le_next != NOLIST)
614 bufremvn(bp);
615 splx(s);
616
617 vp = bp->b_vp;
618 bp->b_vp = (struct vnode *) 0;
619 HOLDRELE(vp);
620}
621
622/*
623 * Associate a p-buffer with a vnode.
624 */
625void
626pbgetvp(vp, bp)
627 register struct vnode *vp;
628 register struct buf *bp;
629{
630#if defined(DIAGNOSTIC)
631 if (bp->b_vp)
632 panic("pbgetvp: not free");
633#endif
634 bp->b_vp = vp;
635 if (vp->v_type == VBLK || vp->v_type == VCHR)
636 bp->b_dev = vp->v_rdev;
637 else
638 bp->b_dev = NODEV;
639}
640
641/*
642 * Disassociate a p-buffer from a vnode.
643 */
644void
645pbrelvp(bp)
646 register struct buf *bp;
647{
648 struct vnode *vp;
649
650#if defined(DIAGNOSTIC)
651 if (bp->b_vp == (struct vnode *) 0)
652 panic("pbrelvp: NULL");
653#endif
654
655 bp->b_vp = (struct vnode *) 0;
656}
657
658/*
659 * Reassign a buffer from one vnode to another.
660 * Used to assign file specific control information
661 * (indirect blocks) to the vnode to which they belong.
662 */
663void
664reassignbuf(bp, newvp)
665 register struct buf *bp;
666 register struct vnode *newvp;
667{
668 int s;
669
670 if (newvp == NULL) {
671 printf("reassignbuf: NULL");
672 return;
673 }
674
675 s = splbio();
676 /*
677 * Delete from old vnode list, if on one.
678 */
679 if (bp->b_vnbufs.le_next != NOLIST)
680 bufremvn(bp);
681 /*
682 * If dirty, put on list of dirty buffers; otherwise insert onto list
683 * of clean buffers.
684 */
685 if (bp->b_flags & B_DELWRI) {
686 struct buf *tbp;
687
688 tbp = newvp->v_dirtyblkhd.lh_first;
689 if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
690 bufinsvn(bp, &newvp->v_dirtyblkhd);
691 } else {
692 while (tbp->b_vnbufs.le_next &&
693 (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
694 tbp = tbp->b_vnbufs.le_next;
695 }
696 LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
697 }
698 } else {
699 bufinsvn(bp, &newvp->v_cleanblkhd);
700 }
701 splx(s);
702}
703
704#ifndef DEVFS_ROOT
705/*
706 * Create a vnode for a block device.
707 * Used for root filesystem, argdev, and swap areas.
708 * Also used for memory file system special devices.
709 */
710int
711bdevvp(dev, vpp)
712 dev_t dev;
713 struct vnode **vpp;
714{
715 register struct vnode *vp;
716 struct vnode *nvp;
717 int error;
718
719 if (dev == NODEV)
720 return (0);
721 error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
722 if (error) {
723 *vpp = 0;
724 return (error);
725 }
726 vp = nvp;
727 vp->v_type = VBLK;
728 if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
729 vput(vp);
730 vp = nvp;
731 }
732 *vpp = vp;
733 return (0);
734}
735#endif /* !DEVFS_ROOT */
736
737/*
738 * Check to see if the new vnode represents a special device
739 * for which we already have a vnode (either because of
740 * bdevvp() or because of a different vnode representing
741 * the same block device). If such an alias exists, deallocate
742 * the existing contents and return the aliased vnode. The
743 * caller is responsible for filling it with its new contents.
744 */
745struct vnode *
746checkalias(nvp, nvp_rdev, mp)
747 register struct vnode *nvp;
748 dev_t nvp_rdev;
749 struct mount *mp;
750{
751 struct proc *p = curproc; /* XXX */
752 struct vnode *vp;
753 struct vnode **vpp;
754
755 if (nvp->v_type != VBLK && nvp->v_type != VCHR)
756 return (NULLVP);
757
758 vpp = &speclisth[SPECHASH(nvp_rdev)];
759loop:
760 simple_lock(&spechash_slock);
761 for (vp = *vpp; vp; vp = vp->v_specnext) {
762 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
763 continue;
764 /*
765 * Alias, but not in use, so flush it out.
766 */
767 simple_lock(&vp->v_interlock);
768 if (vp->v_usecount == 0) {
769 simple_unlock(&spechash_slock);
770 vgonel(vp, p);
771 goto loop;
772 }
773 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
774 simple_unlock(&spechash_slock);
775 goto loop;
776 }
777 break;
778 }
779 if (vp == NULL || vp->v_tag != VT_NON) {
780 MALLOC(nvp->v_specinfo, struct specinfo *,
781 sizeof(struct specinfo), M_VNODE, M_WAITOK);
782 nvp->v_rdev = nvp_rdev;
783 nvp->v_hashchain = vpp;
784 nvp->v_specnext = *vpp;
785 nvp->v_specflags = 0;
786 simple_unlock(&spechash_slock);
787 *vpp = nvp;
788 if (vp != NULLVP) {
789 nvp->v_flag |= VALIASED;
790 vp->v_flag |= VALIASED;
791 vput(vp);
792 }
793 return (NULLVP);
794 }
795 simple_unlock(&spechash_slock);
796 VOP_UNLOCK(vp, 0, p);
797 simple_lock(&vp->v_interlock);
798 vclean(vp, 0, p);
799 vp->v_op = nvp->v_op;
800 vp->v_tag = nvp->v_tag;
801 nvp->v_type = VNON;
802 insmntque(vp, mp);
803 return (vp);
804}
805
806/*
807 * Grab a particular vnode from the free list, increment its
808 * reference count and lock it. The vnode lock bit is set the
809 * vnode is being eliminated in vgone. The process is awakened
810 * when the transition is completed, and an error returned to
811 * indicate that the vnode is no longer usable (possibly having
812 * been changed to a new file system type).
813 */
814int
815vget(vp, flags, p)
816 register struct vnode *vp;
817 int flags;
818 struct proc *p;
819{
820 int error;
821
822 /*
823 * If the vnode is in the process of being cleaned out for
824 * another use, we wait for the cleaning to finish and then
825 * return failure. Cleaning is determined by checking that
826 * the VXLOCK flag is set.
827 */
828 if ((flags & LK_INTERLOCK) == 0) {
829 simple_lock(&vp->v_interlock);
830 }
831 if (vp->v_flag & VXLOCK) {
832 vp->v_flag |= VXWANT;
833 simple_unlock(&vp->v_interlock);
834 tsleep((caddr_t)vp, PINOD, "vget", 0);
835 return (ENOENT);
836 }
837 if (vp->v_usecount == 0) {
838 simple_lock(&vnode_free_list_slock);
839 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
840 simple_unlock(&vnode_free_list_slock);
841 freevnodes--;
842 }
843 vp->v_usecount++;
844 /*
845 * Create the VM object, if needed
846 */
847 if ((vp->v_type == VREG) &&
848 ((vp->v_object == NULL) ||
849 (vp->v_object->flags & OBJ_VFS_REF) == 0 ||
850 (vp->v_object->flags & OBJ_DEAD))) {
851 /*
852 * XXX vfs_object_create probably needs the interlock.
853 */
854 simple_unlock(&vp->v_interlock);
855 vfs_object_create(vp, curproc, curproc->p_ucred, 0);
856 simple_lock(&vp->v_interlock);
857 }
858 if (flags & LK_TYPE_MASK) {
859 if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
860 vrele(vp);
861 return (error);
862 }
863 simple_unlock(&vp->v_interlock);
864 return (0);
865}
866
867/*
868 * Stubs to use when there is no locking to be done on the underlying object.
869 * A minimal shared lock is necessary to ensure that the underlying object
870 * is not revoked while an operation is in progress. So, an active shared
871 * count is maintained in an auxillary vnode lock structure.
872 */
873int
874vop_sharedlock(ap)
875 struct vop_lock_args /* {
876 struct vnode *a_vp;
877 int a_flags;
878 struct proc *a_p;
879 } */ *ap;
880{
881 /*
882 * This code cannot be used until all the non-locking filesystems
883 * (notably NFS) are converted to properly lock and release nodes.
884 * Also, certain vnode operations change the locking state within
885 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
886 * and symlink). Ideally these operations should not change the
887 * lock state, but should be changed to let the caller of the
888 * function unlock them. Otherwise all intermediate vnode layers
889 * (such as union, umapfs, etc) must catch these functions to do
890 * the necessary locking at their layer. Note that the inactive
891 * and lookup operations also change their lock state, but this
892 * cannot be avoided, so these two operations will always need
893 * to be handled in intermediate layers.
894 */
895 struct vnode *vp = ap->a_vp;
896 int vnflags, flags = ap->a_flags;
897
898 if (vp->v_vnlock == NULL) {
899 if ((flags & LK_TYPE_MASK) == LK_DRAIN)
900 return (0);
901 MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
902 M_VNODE, M_WAITOK);
903 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
904 }
905 switch (flags & LK_TYPE_MASK) {
906 case LK_DRAIN:
907 vnflags = LK_DRAIN;
908 break;
909 case LK_EXCLUSIVE:
910#ifdef DEBUG_VFS_LOCKS
911 /*
912 * Normally, we use shared locks here, but that confuses
913 * the locking assertions.
914 */
915 vnflags = LK_EXCLUSIVE;
916 break;
917#endif
918 case LK_SHARED:
919 vnflags = LK_SHARED;
920 break;
921 case LK_UPGRADE:
922 case LK_EXCLUPGRADE:
923 case LK_DOWNGRADE:
924 return (0);
925 case LK_RELEASE:
926 default:
927 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
928 }
929 if (flags & LK_INTERLOCK)
930 vnflags |= LK_INTERLOCK;
931 return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
932}
933
934/*
935 * Stubs to use when there is no locking to be done on the underlying object.
936 * A minimal shared lock is necessary to ensure that the underlying object
937 * is not revoked while an operation is in progress. So, an active shared
938 * count is maintained in an auxillary vnode lock structure.
939 */
940int
941vop_nolock(ap)
942 struct vop_lock_args /* {
943 struct vnode *a_vp;
944 int a_flags;
945 struct proc *a_p;
946 } */ *ap;
947{
948#ifdef notyet
949 /*
950 * This code cannot be used until all the non-locking filesystems
951 * (notably NFS) are converted to properly lock and release nodes.
952 * Also, certain vnode operations change the locking state within
953 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
954 * and symlink). Ideally these operations should not change the
955 * lock state, but should be changed to let the caller of the
956 * function unlock them. Otherwise all intermediate vnode layers
957 * (such as union, umapfs, etc) must catch these functions to do
958 * the necessary locking at their layer. Note that the inactive
959 * and lookup operations also change their lock state, but this
960 * cannot be avoided, so these two operations will always need
961 * to be handled in intermediate layers.
962 */
963 struct vnode *vp = ap->a_vp;
964 int vnflags, flags = ap->a_flags;
965
966 if (vp->v_vnlock == NULL) {
967 if ((flags & LK_TYPE_MASK) == LK_DRAIN)
968 return (0);
969 MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
970 M_VNODE, M_WAITOK);
971 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
972 }
973 switch (flags & LK_TYPE_MASK) {
974 case LK_DRAIN:
975 vnflags = LK_DRAIN;
976 break;
977 case LK_EXCLUSIVE:
978 case LK_SHARED:
979 vnflags = LK_SHARED;
980 break;
981 case LK_UPGRADE:
982 case LK_EXCLUPGRADE:
983 case LK_DOWNGRADE:
984 return (0);
985 case LK_RELEASE:
986 default:
987 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
988 }
989 if (flags & LK_INTERLOCK)
990 vnflags |= LK_INTERLOCK;
991 return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
992#else /* for now */
993 /*
994 * Since we are not using the lock manager, we must clear
995 * the interlock here.
996 */
997 if (ap->a_flags & LK_INTERLOCK) {
998 simple_unlock(&ap->a_vp->v_interlock);
999 }
1000 return (0);
1001#endif
1002}
1003
1004/*
1005 * Do the inverse of vop_nolock, handling the interlock in a compatible way.
1006 */
1007int
1008vop_nounlock(ap)
1009 struct vop_unlock_args /* {
1010 struct vnode *a_vp;
1011 int a_flags;
1012 struct proc *a_p;
1013 } */ *ap;
1014{
1015 struct vnode *vp = ap->a_vp;
1016
1017 if (vp->v_vnlock == NULL) {
1018 if (ap->a_flags & LK_INTERLOCK)
1019 simple_unlock(&ap->a_vp->v_interlock);
1020 return (0);
1021 }
1022 return (lockmgr(vp->v_vnlock, LK_RELEASE | ap->a_flags,
1023 &ap->a_vp->v_interlock, ap->a_p));
1024}
1025
1026/*
1027 * Return whether or not the node is in use.
1028 */
1029int
1030vop_noislocked(ap)
1031 struct vop_islocked_args /* {
1032 struct vnode *a_vp;
1033 } */ *ap;
1034{
1035 struct vnode *vp = ap->a_vp;
1036
1037 if (vp->v_vnlock == NULL)
1038 return (0);
1039 return (lockstatus(vp->v_vnlock));
1040}
1041
1042/* #ifdef DIAGNOSTIC */
1043/*
1044 * Vnode reference, just increment the count
1045 */
1046void
1047vref(vp)
1048 struct vnode *vp;
1049{
1050 simple_lock(&vp->v_interlock);
1051 if (vp->v_usecount <= 0)
1052 panic("vref used where vget required");
1053
1054 vp->v_usecount++;
1055
1056 if ((vp->v_type == VREG) &&
1057 ((vp->v_object == NULL) ||
1058 ((vp->v_object->flags & OBJ_VFS_REF) == 0) ||
1059 (vp->v_object->flags & OBJ_DEAD))) {
1060 /*
1061 * We need to lock to VP during the time that
1062 * the object is created. This is necessary to
1063 * keep the system from re-entrantly doing it
1064 * multiple times.
1065 * XXX vfs_object_create probably needs the interlock?
1066 */
1067 simple_unlock(&vp->v_interlock);
1068 vfs_object_create(vp, curproc, curproc->p_ucred, 0);
1069 return;
1070 }
1071 simple_unlock(&vp->v_interlock);
1072}
1073
1074/*
1075 * Vnode put/release.
1076 * If count drops to zero, call inactive routine and return to freelist.
1077 */
1078static void
1079vputrele(vp, put)
1080 struct vnode *vp;
1081 int put;
1082{
1083 struct proc *p = curproc; /* XXX */
1084
1085#ifdef DIAGNOSTIC
1086 if (vp == NULL)
1087 panic("vputrele: null vp");
1088#endif
1089 simple_lock(&vp->v_interlock);
1090 vp->v_usecount--;
1091
1092 if ((vp->v_usecount == 1) &&
1093 vp->v_object &&
1094 (vp->v_object->flags & OBJ_VFS_REF)) {
1095 vp->v_object->flags &= ~OBJ_VFS_REF;
1096 if (put) {
1097 VOP_UNLOCK(vp, LK_INTERLOCK, p);
1098 } else {
1099 simple_unlock(&vp->v_interlock);
1100 }
1101 vm_object_deallocate(vp->v_object);
1102 return;
1103 }
1104
1105 if (vp->v_usecount > 0) {
1106 if (put) {
1107 VOP_UNLOCK(vp, LK_INTERLOCK, p);
1108 } else {
1109 simple_unlock(&vp->v_interlock);
1110 }
1111 return;
1112 }
1113
1114 if (vp->v_usecount < 0) {
1115#ifdef DIAGNOSTIC
1116 vprint("vputrele: negative ref count", vp);
1117#endif
1118 panic("vputrele: negative ref cnt");
1119 }
1120 simple_lock(&vnode_free_list_slock);
1121 if (vp->v_flag & VAGE) {
1122 vp->v_flag &= ~VAGE;
1123 if(vp->v_tag != VT_TFS)
1124 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1125 } else {
1126 if(vp->v_tag != VT_TFS)
1127 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1128 }
1129 freevnodes++;
1130 simple_unlock(&vnode_free_list_slock);
1131
1132 /*
1133 * If we are doing a vput, the node is already locked, and we must
1134 * call VOP_INACTIVE with the node locked. So, in the case of
1135 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
1136 */
1137 if (put) {
1138 simple_unlock(&vp->v_interlock);
1139 VOP_INACTIVE(vp, p);
1140 } else if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1141 VOP_INACTIVE(vp, p);
1142 }
1143}
1144
1145/*
1146 * vput(), just unlock and vrele()
1147 */
1148void
1149vput(vp)
1150 struct vnode *vp;
1151{
1152 vputrele(vp, 1);
1153}
1154
1155void
1156vrele(vp)
1157 struct vnode *vp;
1158{
1159 vputrele(vp, 0);
1160}
1161
1162#ifdef DIAGNOSTIC
1163/*
1164 * Page or buffer structure gets a reference.
1165 */
1166void
1167vhold(vp)
1168 register struct vnode *vp;
1169{
1170
1171 simple_lock(&vp->v_interlock);
1172 vp->v_holdcnt++;
1173 simple_unlock(&vp->v_interlock);
1174}
1175
1176/*
1177 * Page or buffer structure frees a reference.
1178 */
1179void
1180holdrele(vp)
1181 register struct vnode *vp;
1182{
1183
1184 simple_lock(&vp->v_interlock);
1185 if (vp->v_holdcnt <= 0)
1186 panic("holdrele: holdcnt");
1187 vp->v_holdcnt--;
1188 simple_unlock(&vp->v_interlock);
1189}
1190#endif /* DIAGNOSTIC */
1191
1192/*
1193 * Remove any vnodes in the vnode table belonging to mount point mp.
1194 *
1195 * If MNT_NOFORCE is specified, there should not be any active ones,
1196 * return error if any are found (nb: this is a user error, not a
1197 * system error). If MNT_FORCE is specified, detach any active vnodes
1198 * that are found.
1199 */
1200#ifdef DIAGNOSTIC
1201static int busyprt = 0; /* print out busy vnodes */
1202SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
1203#endif
1204
1205int
1206vflush(mp, skipvp, flags)
1207 struct mount *mp;
1208 struct vnode *skipvp;
1209 int flags;
1210{
1211 struct proc *p = curproc; /* XXX */
1212 struct vnode *vp, *nvp;
1213 int busy = 0;
1214
1215 simple_lock(&mntvnode_slock);
1216loop:
1217 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1218 /*
1219 * Make sure this vnode wasn't reclaimed in getnewvnode().
1220 * Start over if it has (it won't be on the list anymore).
1221 */
1222 if (vp->v_mount != mp)
1223 goto loop;
1224 nvp = vp->v_mntvnodes.le_next;
1225 /*
1226 * Skip over a selected vnode.
1227 */
1228 if (vp == skipvp)
1229 continue;
1230
1231 simple_lock(&vp->v_interlock);
1232 /*
1233 * Skip over a vnodes marked VSYSTEM.
1234 */
1235 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1236 simple_unlock(&vp->v_interlock);
1237 continue;
1238 }
1239 /*
1240 * If WRITECLOSE is set, only flush out regular file vnodes
1241 * open for writing.
1242 */
1243 if ((flags & WRITECLOSE) &&
1244 (vp->v_writecount == 0 || vp->v_type != VREG)) {
1245 simple_unlock(&vp->v_interlock);
1246 continue;
1247 }
1248
1249 /*
1250 * With v_usecount == 0, all we need to do is clear out the
1251 * vnode data structures and we are done.
1252 */
1253 if (vp->v_usecount == 0) {
1254 simple_unlock(&mntvnode_slock);
1255 vgonel(vp, p);
1256 simple_lock(&mntvnode_slock);
1257 continue;
1258 }
1259
1260 /*
1261 * If FORCECLOSE is set, forcibly close the vnode. For block
1262 * or character devices, revert to an anonymous device. For
1263 * all other files, just kill them.
1264 */
1265 if (flags & FORCECLOSE) {
1266 simple_unlock(&mntvnode_slock);
1267 if (vp->v_type != VBLK && vp->v_type != VCHR) {
1268 vgonel(vp, p);
1269 } else {
1270 vclean(vp, 0, p);
1271 vp->v_op = spec_vnodeop_p;
1272 insmntque(vp, (struct mount *) 0);
1273 }
1274 simple_lock(&mntvnode_slock);
1275 continue;
1276 }
1277#ifdef DIAGNOSTIC
1278 if (busyprt)
1279 vprint("vflush: busy vnode", vp);
1280#endif
1281 simple_unlock(&vp->v_interlock);
1282 busy++;
1283 }
1284 simple_unlock(&mntvnode_slock);
1285 if (busy)
1286 return (EBUSY);
1287 return (0);
1288}
1289
1290/*
1291 * Disassociate the underlying file system from a vnode.
1292 */
1293static void
1294vclean(struct vnode *vp, int flags, struct proc *p)
1295{
1296 int active, irefed;
1297 vm_object_t object;
1298
1299 /*
1300 * Check to see if the vnode is in use. If so we have to reference it
1301 * before we clean it out so that its count cannot fall to zero and
1302 * generate a race against ourselves to recycle it.
1303 */
1304 if ((active = vp->v_usecount))
1305 vp->v_usecount++;
1306 /*
1307 * Prevent the vnode from being recycled or brought into use while we
1308 * clean it out.
1309 */
1310 if (vp->v_flag & VXLOCK)
1311 panic("vclean: deadlock");
1312 vp->v_flag |= VXLOCK;
1313 /*
1314 * Even if the count is zero, the VOP_INACTIVE routine may still
1315 * have the object locked while it cleans it out. The VOP_LOCK
1316 * ensures that the VOP_INACTIVE routine is done with its work.
1317 * For active vnodes, it ensures that no other activity can
1318 * occur while the underlying object is being cleaned out.
1319 */
1320 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1321
1322 object = vp->v_object;
1323 irefed = 0;
1324 if (object && ((object->flags & OBJ_DEAD) == 0)) {
1325 if (object->ref_count == 0) {
1326 vm_object_reference(object);
1327 irefed = 1;
1328 }
1329 ++object->ref_count;
1330 pager_cache(object, FALSE);
1331 }
1332
1333 /*
1334 * Clean out any buffers associated with the vnode.
1335 */
1336 if (flags & DOCLOSE)
1337 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1338
1339 if (irefed) {
1340 vm_object_deallocate(object);
1341 }
1342
1343 /*
1344 * If purging an active vnode, it must be closed and
1345 * deactivated before being reclaimed. Note that the
1346 * VOP_INACTIVE will unlock the vnode.
1347 */
1348 if (active) {
1349 if (flags & DOCLOSE)
1350 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1351 VOP_INACTIVE(vp, p);
1352 } else {
1353 /*
1354 * Any other processes trying to obtain this lock must first
1355 * wait for VXLOCK to clear, then call the new lock operation.
1356 */
1357 VOP_UNLOCK(vp, 0, p);
1358 }
1359 /*
1360 * Reclaim the vnode.
1361 */
1362 if (VOP_RECLAIM(vp, p))
1363 panic("vclean: cannot reclaim");
1364 if (active)
1365 vrele(vp);
1366 cache_purge(vp);
1367 if (vp->v_vnlock) {
1368#ifdef DIAGNOSTIC
1369 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1370 vprint("vclean: lock not drained", vp);
1371#endif
1372 FREE(vp->v_vnlock, M_VNODE);
1373 vp->v_vnlock = NULL;
1374 }
1375
1376 /*
1377 * Done with purge, notify sleepers of the grim news.
1378 */
1379 vp->v_op = dead_vnodeop_p;
1380 vp->v_tag = VT_NON;
1381 vp->v_flag &= ~VXLOCK;
1382 if (vp->v_flag & VXWANT) {
1383 vp->v_flag &= ~VXWANT;
1384 wakeup((caddr_t) vp);
1385 }
1386}
1387
1388/*
1389 * Eliminate all activity associated with the requested vnode
1390 * and with all vnodes aliased to the requested vnode.
1391 */
1392int
1393vop_revoke(ap)
1394 struct vop_revoke_args /* {
1395 struct vnode *a_vp;
1396 int a_flags;
1397 } */ *ap;
1398{
1399 struct vnode *vp, *vq;
1400 struct proc *p = curproc; /* XXX */
1401
1402#ifdef DIAGNOSTIC
1403 if ((ap->a_flags & REVOKEALL) == 0)
1404 panic("vop_revoke");
1405#endif
1406
1407 vp = ap->a_vp;
1408 simple_lock(&vp->v_interlock);
1409
1410 if (vp->v_flag & VALIASED) {
1411 /*
1412 * If a vgone (or vclean) is already in progress,
1413 * wait until it is done and return.
1414 */
1415 if (vp->v_flag & VXLOCK) {
1416 vp->v_flag |= VXWANT;
1417 simple_unlock(&vp->v_interlock);
1418 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1419 return (0);
1420 }
1421 /*
1422 * Ensure that vp will not be vgone'd while we
1423 * are eliminating its aliases.
1424 */
1425 vp->v_flag |= VXLOCK;
1426 simple_unlock(&vp->v_interlock);
1427 while (vp->v_flag & VALIASED) {
1428 simple_lock(&spechash_slock);
1429 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1430 if (vq->v_rdev != vp->v_rdev ||
1431 vq->v_type != vp->v_type || vp == vq)
1432 continue;
1433 simple_unlock(&spechash_slock);
1434 vgone(vq);
1435 break;
1436 }
1437 if (vq == NULLVP) {
1438 simple_unlock(&spechash_slock);
1439 }
1440 }
1441 /*
1442 * Remove the lock so that vgone below will
1443 * really eliminate the vnode after which time
1444 * vgone will awaken any sleepers.
1445 */
1446 simple_lock(&vp->v_interlock);
1447 vp->v_flag &= ~VXLOCK;
1448 }
1449 vgonel(vp, p);
1450 return (0);
1451}
1452
1453/*
1454 * Recycle an unused vnode to the front of the free list.
1455 * Release the passed interlock if the vnode will be recycled.
1456 */
1457int
1458vrecycle(vp, inter_lkp, p)
1459 struct vnode *vp;
1460 struct simplelock *inter_lkp;
1461 struct proc *p;
1462{
1463
1464 simple_lock(&vp->v_interlock);
1465 if (vp->v_usecount == 0) {
1466 if (inter_lkp) {
1467 simple_unlock(inter_lkp);
1468 }
1469 vgonel(vp, p);
1470 return (1);
1471 }
1472 simple_unlock(&vp->v_interlock);
1473 return (0);
1474}
1475
1476/*
1477 * Eliminate all activity associated with a vnode
1478 * in preparation for reuse.
1479 */
1480void
1481vgone(vp)
1482 register struct vnode *vp;
1483{
1484 struct proc *p = curproc; /* XXX */
1485
1486 simple_lock(&vp->v_interlock);
1487 vgonel(vp, p);
1488}
1489
1490/*
1491 * vgone, with the vp interlock held.
1492 */
1493static void
1494vgonel(vp, p)
1495 struct vnode *vp;
1496 struct proc *p;
1497{
1498 struct vnode *vq;
1499 struct vnode *vx;
1500
1501 /*
1502 * If a vgone (or vclean) is already in progress,
1503 * wait until it is done and return.
1504 */
1505 if (vp->v_flag & VXLOCK) {
1506 vp->v_flag |= VXWANT;
1507 simple_unlock(&vp->v_interlock);
1508 tsleep((caddr_t)vp, PINOD, "vgone", 0);
1509 return;
1510 }
1511
1512 if (vp->v_object) {
1513 vp->v_object->flags |= OBJ_VNODE_GONE;
1514 }
1515
1516 /*
1517 * Clean out the filesystem specific data.
1518 */
1519 vclean(vp, DOCLOSE, p);
1520 /*
1521 * Delete from old mount point vnode list, if on one.
1522 */
1523 if (vp->v_mount != NULL)
1524 insmntque(vp, (struct mount *)0);
1525 /*
1526 * If special device, remove it from special device alias list
1527 * if it is on one.
1528 */
1529 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1530 simple_lock(&spechash_slock);
1531 if (*vp->v_hashchain == vp) {
1532 *vp->v_hashchain = vp->v_specnext;
1533 } else {
1534 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1535 if (vq->v_specnext != vp)
1536 continue;
1537 vq->v_specnext = vp->v_specnext;
1538 break;
1539 }
1540 if (vq == NULL)
1541 panic("missing bdev");
1542 }
1543 if (vp->v_flag & VALIASED) {
1544 vx = NULL;
1545 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1546 if (vq->v_rdev != vp->v_rdev ||
1547 vq->v_type != vp->v_type)
1548 continue;
1549 if (vx)
1550 break;
1551 vx = vq;
1552 }
1553 if (vx == NULL)
1554 panic("missing alias");
1555 if (vq == NULL)
1556 vx->v_flag &= ~VALIASED;
1557 vp->v_flag &= ~VALIASED;
1558 }
1559 simple_unlock(&spechash_slock);
1560 FREE(vp->v_specinfo, M_VNODE);
1561 vp->v_specinfo = NULL;
1562 }
1563
1564 /*
1565 * If it is on the freelist and not already at the head,
1566 * move it to the head of the list. The test of the back
1567 * pointer and the reference count of zero is because
1568 * it will be removed from the free list by getnewvnode,
1569 * but will not have its reference count incremented until
1570 * after calling vgone. If the reference count were
1571 * incremented first, vgone would (incorrectly) try to
1572 * close the previous instance of the underlying object.
1573 * So, the back pointer is explicitly set to `0xdeadb' in
1574 * getnewvnode after removing it from the freelist to ensure
1575 * that we do not try to move it here.
1576 */
1577 if (vp->v_usecount == 0) {
1578 simple_lock(&vnode_free_list_slock);
1579 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1580 vnode_free_list.tqh_first != vp) {
1581 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1582 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1583 }
1584 simple_unlock(&vnode_free_list_slock);
1585 }
1586
1587 vp->v_type = VBAD;
1588}
1589
1590/*
1591 * Lookup a vnode by device number.
1592 */
1593int
1594vfinddev(dev, type, vpp)
1595 dev_t dev;
1596 enum vtype type;
1597 struct vnode **vpp;
1598{
1599 register struct vnode *vp;
1600 int rc = 0;
1601
1602 simple_lock(&spechash_slock);
1603 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1604 if (dev != vp->v_rdev || type != vp->v_type)
1605 continue;
1606 *vpp = vp;
1607 rc = 1;
1608 break;
1609 }
1610 simple_unlock(&spechash_slock);
1611 return (rc);
1612}
1613
1614/*
1615 * Calculate the total number of references to a special device.
1616 */
1617int
1618vcount(vp)
1619 register struct vnode *vp;
1620{
1621 struct vnode *vq, *vnext;
1622 int count;
1623
1624loop:
1625 if ((vp->v_flag & VALIASED) == 0)
1626 return (vp->v_usecount);
1627 simple_lock(&spechash_slock);
1628 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1629 vnext = vq->v_specnext;
1630 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1631 continue;
1632 /*
1633 * Alias, but not in use, so flush it out.
1634 */
1635 if (vq->v_usecount == 0 && vq != vp) {
1636 simple_unlock(&spechash_slock);
1637 vgone(vq);
1638 goto loop;
1639 }
1640 count += vq->v_usecount;
1641 }
1642 simple_unlock(&spechash_slock);
1643 return (count);
1644}
1645
1646/*
1647 * Print out a description of a vnode.
1648 */
1649static char *typename[] =
1650{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
1651
1652void
1653vprint(label, vp)
1654 char *label;
1655 register struct vnode *vp;
1656{
1657 char buf[64];
1658
1659 if (label != NULL)
1660 printf("%s: %x: ", label, vp);
1661 else
1662 printf("%x: ", vp);
1663 printf("type %s, usecount %d, writecount %d, refcount %ld,",
1664 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1665 vp->v_holdcnt);
1666 buf[0] = '\0';
1667 if (vp->v_flag & VROOT)
1668 strcat(buf, "|VROOT");
1669 if (vp->v_flag & VTEXT)
1670 strcat(buf, "|VTEXT");
1671 if (vp->v_flag & VSYSTEM)
1672 strcat(buf, "|VSYSTEM");
1673 if (vp->v_flag & VXLOCK)
1674 strcat(buf, "|VXLOCK");
1675 if (vp->v_flag & VXWANT)
1676 strcat(buf, "|VXWANT");
1677 if (vp->v_flag & VBWAIT)
1678 strcat(buf, "|VBWAIT");
1679 if (vp->v_flag & VALIASED)
1680 strcat(buf, "|VALIASED");
1681 if (buf[0] != '\0')
1682 printf(" flags (%s)", &buf[1]);
1683 if (vp->v_data == NULL) {
1684 printf("\n");
1685 } else {
1686 printf("\n\t");
1687 VOP_PRINT(vp);
1688 }
1689}
1690
1691#ifdef DDB
1692/*
1693 * List all of the locked vnodes in the system.
1694 * Called when debugging the kernel.
1695 */
1696void
1697printlockedvnodes()
1698{
1699 struct proc *p = curproc; /* XXX */
1700 struct mount *mp, *nmp;
1701 struct vnode *vp;
1702
1703 printf("Locked vnodes\n");
1704 simple_lock(&mountlist_slock);
1705 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1706 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1707 nmp = mp->mnt_list.cqe_next;
1708 continue;
1709 }
1710 for (vp = mp->mnt_vnodelist.lh_first;
1711 vp != NULL;
1712 vp = vp->v_mntvnodes.le_next) {
1713 if (VOP_ISLOCKED(vp))
1714 vprint((char *)0, vp);
1715 }
1716 simple_lock(&mountlist_slock);
1717 nmp = mp->mnt_list.cqe_next;
1718 vfs_unbusy(mp, p);
1719 }
1720 simple_unlock(&mountlist_slock);
1721}
1722#endif
1723
1724/*
1725 * Top level filesystem related information gathering.
1726 */
1727static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS);
1728
1729static int
1730vfs_sysctl SYSCTL_HANDLER_ARGS
1731{
1732 int *name = (int *)arg1 - 1; /* XXX */
1733 u_int namelen = arg2 + 1; /* XXX */
1734 struct vfsconf *vfsp;
1735
1736#ifndef NO_COMPAT_PRELITE2
1737 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
1738 if (namelen == 1)
1739 return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
1740#endif
1741
1742#ifdef notyet
1743 /* all sysctl names at this level are at least name and field */
1744 if (namelen < 2)
1745 return (ENOTDIR); /* overloaded */
1746 if (name[0] != VFS_GENERIC) {
1747 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1748 if (vfsp->vfc_typenum == name[0])
1749 break;
1750 if (vfsp == NULL)
1751 return (EOPNOTSUPP);
1752 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1753 oldp, oldlenp, newp, newlen, p));
1754 }
1755#endif
1756 switch (name[1]) {
1757 case VFS_MAXTYPENUM:
1758 if (namelen != 2)
1759 return (ENOTDIR);
1760 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
1761 case VFS_CONF:
1762 if (namelen != 3)
1763 return (ENOTDIR); /* overloaded */
1764 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1765 if (vfsp->vfc_typenum == name[2])
1766 break;
1767 if (vfsp == NULL)
1768 return (EOPNOTSUPP);
1769 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
1770 }
1771 return (EOPNOTSUPP);
1772}
1773
1774SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
1775 "Generic filesystem");
1776
1777#ifndef NO_COMPAT_PRELITE2
1778
1779static int
1780sysctl_ovfs_conf SYSCTL_HANDLER_ARGS
1781{
1782 int error;
1783 struct vfsconf *vfsp;
1784 struct ovfsconf ovfs;
1785
1786 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
1787 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */
1788 strcpy(ovfs.vfc_name, vfsp->vfc_name);
1789 ovfs.vfc_index = vfsp->vfc_typenum;
1790 ovfs.vfc_refcount = vfsp->vfc_refcount;
1791 ovfs.vfc_flags = vfsp->vfc_flags;
1792 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
1793 if (error)
1794 return error;
1795 }
1796 return 0;
1797}
1798
1799#endif /* !NO_COMPAT_PRELITE2 */
1800
1801int kinfo_vdebug = 1;
1802int kinfo_vgetfailed;
1803
1804#define KINFO_VNODESLOP 10
1805/*
1806 * Dump vnode list (via sysctl).
1807 * Copyout address of vnode followed by vnode.
1808 */
1809/* ARGSUSED */
1810static int
1811sysctl_vnode SYSCTL_HANDLER_ARGS
1812{
1813 struct proc *p = curproc; /* XXX */
1814 struct mount *mp, *nmp;
1815 struct vnode *nvp, *vp;
1816 int error;
1817
1818#define VPTRSZ sizeof (struct vnode *)
1819#define VNODESZ sizeof (struct vnode)
1820
1821 req->lock = 0;
1822 if (!req->oldptr) /* Make an estimate */
1823 return (SYSCTL_OUT(req, 0,
1824 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
1825
1826 simple_lock(&mountlist_slock);
1827 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1828 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1829 nmp = mp->mnt_list.cqe_next;
1830 continue;
1831 }
1832again:
1833 simple_lock(&mntvnode_slock);
1834 for (vp = mp->mnt_vnodelist.lh_first;
1835 vp != NULL;
1836 vp = nvp) {
1837 /*
1838 * Check that the vp is still associated with
1839 * this filesystem. RACE: could have been
1840 * recycled onto the same filesystem.
1841 */
1842 if (vp->v_mount != mp) {
1843 simple_unlock(&mntvnode_slock);
1844 if (kinfo_vdebug)
1845 printf("kinfo: vp changed\n");
1846 goto again;
1847 }
1848 nvp = vp->v_mntvnodes.le_next;
1849 simple_unlock(&mntvnode_slock);
1850 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
1851 (error = SYSCTL_OUT(req, vp, VNODESZ)))
1852 return (error);
1853 simple_lock(&mntvnode_slock);
1854 }
1855 simple_unlock(&mntvnode_slock);
1856 simple_lock(&mountlist_slock);
1857 nmp = mp->mnt_list.cqe_next;
1858 vfs_unbusy(mp, p);
1859 }
1860 simple_unlock(&mountlist_slock);
1861
1862 return (0);
1863}
1864
1865/*
1866 * XXX
1867 * Exporting the vnode list on large systems causes them to crash.
1868 * Exporting the vnode list on medium systems causes sysctl to coredump.
1869 */
1870#if 0
1871SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
1872 0, 0, sysctl_vnode, "S,vnode", "");
1873#endif
1874
1875/*
1876 * Check to see if a filesystem is mounted on a block device.
1877 */
1878int
1879vfs_mountedon(vp)
1880 struct vnode *vp;
1881{
1882 struct vnode *vq;
1883 int error = 0;
1884
1885 if (vp->v_specflags & SI_MOUNTEDON)
1886 return (EBUSY);
1887 if (vp->v_flag & VALIASED) {
1888 simple_lock(&spechash_slock);
1889 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1890 if (vq->v_rdev != vp->v_rdev ||
1891 vq->v_type != vp->v_type)
1892 continue;
1893 if (vq->v_specflags & SI_MOUNTEDON) {
1894 error = EBUSY;
1895 break;
1896 }
1897 }
1898 simple_unlock(&spechash_slock);
1899 }
1900 return (error);
1901}
1902
1903/*
1904 * Unmount all filesystems. The list is traversed in reverse order
1905 * of mounting to avoid dependencies.
1906 */
1907void
1908vfs_unmountall()
1909{
1910 struct mount *mp, *nmp;
1911 struct proc *p = initproc; /* XXX XXX should this be proc0? */
1912 int error;
1913
1914 /*
1915 * Since this only runs when rebooting, it is not interlocked.
1916 */
1917 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1918 nmp = mp->mnt_list.cqe_prev;
1919 error = dounmount(mp, MNT_FORCE, p);
1920 if (error) {
1921 printf("unmount of %s failed (",
1922 mp->mnt_stat.f_mntonname);
1923 if (error == EBUSY)
1924 printf("BUSY)\n");
1925 else
1926 printf("%d)\n", error);
1927 }
1928 }
1929}
1930
1931/*
1932 * Build hash lists of net addresses and hang them off the mount point.
1933 * Called by ufs_mount() to set up the lists of export addresses.
1934 */
1935static int
1936vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
1937 struct export_args *argp)
1938{
1939 register struct netcred *np;
1940 register struct radix_node_head *rnh;
1941 register int i;
1942 struct radix_node *rn;
1943 struct sockaddr *saddr, *smask = 0;
1944 struct domain *dom;
1945 int error;
1946
1947 if (argp->ex_addrlen == 0) {
1948 if (mp->mnt_flag & MNT_DEFEXPORTED)
1949 return (EPERM);
1950 np = &nep->ne_defexported;
1951 np->netc_exflags = argp->ex_flags;
1952 np->netc_anon = argp->ex_anon;
1953 np->netc_anon.cr_ref = 1;
1954 mp->mnt_flag |= MNT_DEFEXPORTED;
1955 return (0);
1956 }
1957 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1958 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
1959 bzero((caddr_t) np, i);
1960 saddr = (struct sockaddr *) (np + 1);
1961 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
1962 goto out;
1963 if (saddr->sa_len > argp->ex_addrlen)
1964 saddr->sa_len = argp->ex_addrlen;
1965 if (argp->ex_masklen) {
1966 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
1967 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
1968 if (error)
1969 goto out;
1970 if (smask->sa_len > argp->ex_masklen)
1971 smask->sa_len = argp->ex_masklen;
1972 }
1973 i = saddr->sa_family;
1974 if ((rnh = nep->ne_rtable[i]) == 0) {
1975 /*
1976 * Seems silly to initialize every AF when most are not used,
1977 * do so on demand here
1978 */
1979 for (dom = domains; dom; dom = dom->dom_next)
1980 if (dom->dom_family == i && dom->dom_rtattach) {
1981 dom->dom_rtattach((void **) &nep->ne_rtable[i],
1982 dom->dom_rtoffset);
1983 break;
1984 }
1985 if ((rnh = nep->ne_rtable[i]) == 0) {
1986 error = ENOBUFS;
1987 goto out;
1988 }
1989 }
1990 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
1991 np->netc_rnodes);
1992 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */
1993 error = EPERM;
1994 goto out;
1995 }
1996 np->netc_exflags = argp->ex_flags;
1997 np->netc_anon = argp->ex_anon;
1998 np->netc_anon.cr_ref = 1;
1999 return (0);
2000out:
2001 free(np, M_NETADDR);
2002 return (error);
2003}
2004
2005/* ARGSUSED */
2006static int
2007vfs_free_netcred(struct radix_node *rn, void *w)
2008{
2009 register struct radix_node_head *rnh = (struct radix_node_head *) w;
2010
2011 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
2012 free((caddr_t) rn, M_NETADDR);
2013 return (0);
2014}
2015
2016/*
2017 * Free the net address hash lists that are hanging off the mount points.
2018 */
2019static void
2020vfs_free_addrlist(struct netexport *nep)
2021{
2022 register int i;
2023 register struct radix_node_head *rnh;
2024
2025 for (i = 0; i <= AF_MAX; i++)
2026 if ((rnh = nep->ne_rtable[i])) {
2027 (*rnh->rnh_walktree) (rnh, vfs_free_netcred,
2028 (caddr_t) rnh);
2029 free((caddr_t) rnh, M_RTABLE);
2030 nep->ne_rtable[i] = 0;
2031 }
2032}
2033
2034int
2035vfs_export(mp, nep, argp)
2036 struct mount *mp;
2037 struct netexport *nep;
2038 struct export_args *argp;
2039{
2040 int error;
2041
2042 if (argp->ex_flags & MNT_DELEXPORT) {
2043 if (mp->mnt_flag & MNT_EXPUBLIC) {
2044 vfs_setpublicfs(NULL, NULL, NULL);
2045 mp->mnt_flag &= ~MNT_EXPUBLIC;
2046 }
2047 vfs_free_addrlist(nep);
2048 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2049 }
2050 if (argp->ex_flags & MNT_EXPORTED) {
2051 if (argp->ex_flags & MNT_EXPUBLIC) {
2052 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
2053 return (error);
2054 mp->mnt_flag |= MNT_EXPUBLIC;
2055 }
2056 if ((error = vfs_hang_addrlist(mp, nep, argp)))
2057 return (error);
2058 mp->mnt_flag |= MNT_EXPORTED;
2059 }
2060 return (0);
2061}
2062
2063
2064/*
2065 * Set the publicly exported filesystem (WebNFS). Currently, only
2066 * one public filesystem is possible in the spec (RFC 2054 and 2055)
2067 */
2068int
2069vfs_setpublicfs(mp, nep, argp)
2070 struct mount *mp;
2071 struct netexport *nep;
2072 struct export_args *argp;
2073{
2074 int error;
2075 struct vnode *rvp;
2076 char *cp;
2077
2078 /*
2079 * mp == NULL -> invalidate the current info, the FS is
2080 * no longer exported. May be called from either vfs_export
2081 * or unmount, so check if it hasn't already been done.
2082 */
2083 if (mp == NULL) {
2084 if (nfs_pub.np_valid) {
2085 nfs_pub.np_valid = 0;
2086 if (nfs_pub.np_index != NULL) {
2087 FREE(nfs_pub.np_index, M_TEMP);
2088 nfs_pub.np_index = NULL;
2089 }
2090 }
2091 return (0);
2092 }
2093
2094 /*
2095 * Only one allowed at a time.
2096 */
2097 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
2098 return (EBUSY);
2099
2100 /*
2101 * Get real filehandle for root of exported FS.
2102 */
2103 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
2104 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
2105
2106 if ((error = VFS_ROOT(mp, &rvp)))
2107 return (error);
2108
2109 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
2110 return (error);
2111
2112 vput(rvp);
2113
2114 /*
2115 * If an indexfile was specified, pull it in.
2116 */
2117 if (argp->ex_indexfile != NULL) {
2118 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2119 M_WAITOK);
2120 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2121 MAXNAMLEN, (size_t *)0);
2122 if (!error) {
2123 /*
2124 * Check for illegal filenames.
2125 */
2126 for (cp = nfs_pub.np_index; *cp; cp++) {
2127 if (*cp == '/') {
2128 error = EINVAL;
2129 break;
2130 }
2131 }
2132 }
2133 if (error) {
2134 FREE(nfs_pub.np_index, M_TEMP);
2135 return (error);
2136 }
2137 }
2138
2139 nfs_pub.np_mount = mp;
2140 nfs_pub.np_valid = 1;
2141 return (0);
2142}
2143
2144struct netcred *
2145vfs_export_lookup(mp, nep, nam)
2146 register struct mount *mp;
2147 struct netexport *nep;
2148 struct mbuf *nam;
2148 struct sockaddr *nam;
2149{
2150 register struct netcred *np;
2151 register struct radix_node_head *rnh;
2152 struct sockaddr *saddr;
2153
2154 np = NULL;
2155 if (mp->mnt_flag & MNT_EXPORTED) {
2156 /*
2157 * Lookup in the export list first.
2158 */
2159 if (nam != NULL) {
2149{
2150 register struct netcred *np;
2151 register struct radix_node_head *rnh;
2152 struct sockaddr *saddr;
2153
2154 np = NULL;
2155 if (mp->mnt_flag & MNT_EXPORTED) {
2156 /*
2157 * Lookup in the export list first.
2158 */
2159 if (nam != NULL) {
2160 saddr = mtod(nam, struct sockaddr *);
2160 saddr = nam;
2161 rnh = nep->ne_rtable[saddr->sa_family];
2162 if (rnh != NULL) {
2163 np = (struct netcred *)
2164 (*rnh->rnh_matchaddr)((caddr_t)saddr,
2165 rnh);
2166 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2167 np = NULL;
2168 }
2169 }
2170 /*
2171 * If no address match, use the default if it exists.
2172 */
2173 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2174 np = &nep->ne_defexported;
2175 }
2176 return (np);
2177}
2178
2179/*
2180 * perform msync on all vnodes under a mount point
2181 * the mount point must be locked.
2182 */
2183void
2184vfs_msync(struct mount *mp, int flags) {
2185 struct vnode *vp, *nvp;
2186loop:
2187 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
2188
2189 if (vp->v_mount != mp)
2190 goto loop;
2191 nvp = vp->v_mntvnodes.le_next;
2192 if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))
2193 continue;
2194 if (vp->v_object &&
2195 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
2196 vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE);
2197 }
2198 }
2199}
2200
2201/*
2202 * Create the VM object needed for VMIO and mmap support. This
2203 * is done for all VREG files in the system. Some filesystems might
2204 * afford the additional metadata buffering capability of the
2205 * VMIO code by making the device node be VMIO mode also.
2206 */
2207int
2208vfs_object_create(vp, p, cred, waslocked)
2209 struct vnode *vp;
2210 struct proc *p;
2211 struct ucred *cred;
2212 int waslocked;
2213{
2214 struct vattr vat;
2215 vm_object_t object;
2216 int error = 0;
2217
2218retry:
2219 if ((object = vp->v_object) == NULL) {
2220 if (vp->v_type == VREG) {
2221 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
2222 goto retn;
2223 (void) vnode_pager_alloc(vp,
2224 OFF_TO_IDX(round_page(vat.va_size)), 0, 0);
2225 } else {
2226 /*
2227 * This simply allocates the biggest object possible
2228 * for a VBLK vnode. This should be fixed, but doesn't
2229 * cause any problems (yet).
2230 */
2231 (void) vnode_pager_alloc(vp, INT_MAX, 0, 0);
2232 }
2233 vp->v_object->flags |= OBJ_VFS_REF;
2234 } else {
2235 if (object->flags & OBJ_DEAD) {
2236 if (waslocked)
2237 VOP_UNLOCK(vp, 0, p);
2238 tsleep(object, PVM, "vodead", 0);
2239 if (waslocked)
2240 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2241 goto retry;
2242 }
2243 if ((object->flags & OBJ_VFS_REF) == 0) {
2244 object->flags |= OBJ_VFS_REF;
2245 vm_object_reference(object);
2246 }
2247 }
2248 if (vp->v_object)
2249 vp->v_flag |= VVMIO;
2250
2251retn:
2252 return error;
2253}
2254
2255void
2256vtouch(vp)
2257 struct vnode *vp;
2258{
2259 simple_lock(&vp->v_interlock);
2260 if (vp->v_usecount) {
2261 simple_unlock(&vp->v_interlock);
2262 return;
2263 }
2264 if (simple_lock_try(&vnode_free_list_slock)) {
2265 if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) {
2266 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
2267 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2268 }
2269 simple_unlock(&vnode_free_list_slock);
2270 }
2271 simple_unlock(&vp->v_interlock);
2272}
2161 rnh = nep->ne_rtable[saddr->sa_family];
2162 if (rnh != NULL) {
2163 np = (struct netcred *)
2164 (*rnh->rnh_matchaddr)((caddr_t)saddr,
2165 rnh);
2166 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2167 np = NULL;
2168 }
2169 }
2170 /*
2171 * If no address match, use the default if it exists.
2172 */
2173 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2174 np = &nep->ne_defexported;
2175 }
2176 return (np);
2177}
2178
2179/*
2180 * perform msync on all vnodes under a mount point
2181 * the mount point must be locked.
2182 */
2183void
2184vfs_msync(struct mount *mp, int flags) {
2185 struct vnode *vp, *nvp;
2186loop:
2187 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
2188
2189 if (vp->v_mount != mp)
2190 goto loop;
2191 nvp = vp->v_mntvnodes.le_next;
2192 if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))
2193 continue;
2194 if (vp->v_object &&
2195 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
2196 vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE);
2197 }
2198 }
2199}
2200
2201/*
2202 * Create the VM object needed for VMIO and mmap support. This
2203 * is done for all VREG files in the system. Some filesystems might
2204 * afford the additional metadata buffering capability of the
2205 * VMIO code by making the device node be VMIO mode also.
2206 */
2207int
2208vfs_object_create(vp, p, cred, waslocked)
2209 struct vnode *vp;
2210 struct proc *p;
2211 struct ucred *cred;
2212 int waslocked;
2213{
2214 struct vattr vat;
2215 vm_object_t object;
2216 int error = 0;
2217
2218retry:
2219 if ((object = vp->v_object) == NULL) {
2220 if (vp->v_type == VREG) {
2221 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
2222 goto retn;
2223 (void) vnode_pager_alloc(vp,
2224 OFF_TO_IDX(round_page(vat.va_size)), 0, 0);
2225 } else {
2226 /*
2227 * This simply allocates the biggest object possible
2228 * for a VBLK vnode. This should be fixed, but doesn't
2229 * cause any problems (yet).
2230 */
2231 (void) vnode_pager_alloc(vp, INT_MAX, 0, 0);
2232 }
2233 vp->v_object->flags |= OBJ_VFS_REF;
2234 } else {
2235 if (object->flags & OBJ_DEAD) {
2236 if (waslocked)
2237 VOP_UNLOCK(vp, 0, p);
2238 tsleep(object, PVM, "vodead", 0);
2239 if (waslocked)
2240 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2241 goto retry;
2242 }
2243 if ((object->flags & OBJ_VFS_REF) == 0) {
2244 object->flags |= OBJ_VFS_REF;
2245 vm_object_reference(object);
2246 }
2247 }
2248 if (vp->v_object)
2249 vp->v_flag |= VVMIO;
2250
2251retn:
2252 return error;
2253}
2254
2255void
2256vtouch(vp)
2257 struct vnode *vp;
2258{
2259 simple_lock(&vp->v_interlock);
2260 if (vp->v_usecount) {
2261 simple_unlock(&vp->v_interlock);
2262 return;
2263 }
2264 if (simple_lock_try(&vnode_free_list_slock)) {
2265 if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) {
2266 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
2267 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2268 }
2269 simple_unlock(&vnode_free_list_slock);
2270 }
2271 simple_unlock(&vp->v_interlock);
2272}