vfs_export.c revision 41995
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39 * $Id: vfs_subr.c,v 1.175 1998/12/21 23:38:33 eivind Exp $ 40 */ 41 42/* 43 * External virtual filesystem routines 44 */ 45#include "opt_ddb.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/conf.h> 50#include <sys/kernel.h> 51#include <sys/proc.h> 52#include <sys/malloc.h> 53#include <sys/mount.h> 54#include <sys/socket.h> 55#include <sys/vnode.h> 56#include <sys/stat.h> 57#include <sys/buf.h> 58#include <sys/domain.h> 59#include <sys/dirent.h> 60#include <sys/vmmeter.h> 61 62#include <machine/limits.h> 63 64#include <vm/vm.h> 65#include <vm/vm_object.h> 66#include <vm/vm_extern.h> 67#include <vm/pmap.h> 68#include <vm/vm_map.h> 69#include <vm/vm_pager.h> 70#include <vm/vnode_pager.h> 71#include <vm/vm_zone.h> 72#include <sys/sysctl.h> 73 74#include <miscfs/specfs/specdev.h> 75 76static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 77 78static void insmntque __P((struct vnode *vp, struct mount *mp)); 79static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 80static void vfree __P((struct vnode *)); 81static void vgonel __P((struct vnode *vp, struct proc *p)); 82static unsigned long numvnodes; 83SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 84 85enum vtype iftovt_tab[16] = { 86 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 87 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 88}; 89int vttoif_tab[9] = { 90 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 91 S_IFSOCK, S_IFIFO, S_IFMT, 92}; 93 94static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 95struct tobefreelist vnode_tobefree_list; /* vnode free list */ 96 97static u_long wantfreevnodes = 25; 98SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 99static u_long freevnodes = 0; 100SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 101 102int vfs_ioopt = 0; 103#ifdef ENABLE_VFS_IOOPT 104SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 105#endif 106 107struct mntlist mountlist; /* mounted filesystem list */ 108struct simplelock mountlist_slock; 109static struct simplelock mntid_slock; 110struct simplelock mntvnode_slock; 111int nfs_mount_type = -1; 112static struct simplelock vnode_free_list_slock; 113static struct simplelock spechash_slock; 114struct nfs_public nfs_pub; /* publicly exported FS */ 115static vm_zone_t vnode_zone; 116 117/* 118 * The workitem queue. 119 */ 120#define SYNCER_MAXDELAY 32 121static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ 122time_t syncdelay = 30; 123int rushjob; /* number of slots to run ASAP */ 124 125static int syncer_delayno = 0; 126static long syncer_mask; 127LIST_HEAD(synclist, vnode); 128static struct synclist *syncer_workitem_pending; 129 130int desiredvnodes; 131SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); 132 133static void vfs_free_addrlist __P((struct netexport *nep)); 134static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 135static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 136 struct export_args *argp)); 137 138/* 139 * Initialize the vnode management data structures. 140 */ 141void 142vntblinit() 143{ 144 145 desiredvnodes = maxproc + cnt.v_page_count / 4; 146 simple_lock_init(&mntvnode_slock); 147 simple_lock_init(&mntid_slock); 148 simple_lock_init(&spechash_slock); 149 TAILQ_INIT(&vnode_free_list); 150 TAILQ_INIT(&vnode_tobefree_list); 151 simple_lock_init(&vnode_free_list_slock); 152 CIRCLEQ_INIT(&mountlist); 153 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 154 /* 155 * Initialize the filesystem syncer. 156 */ 157 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 158 &syncer_mask); 159 syncer_maxdelay = syncer_mask + 1; 160} 161 162/* 163 * Mark a mount point as busy. Used to synchronize access and to delay 164 * unmounting. Interlock is not released on failure. 165 */ 166int 167vfs_busy(mp, flags, interlkp, p) 168 struct mount *mp; 169 int flags; 170 struct simplelock *interlkp; 171 struct proc *p; 172{ 173 int lkflags; 174 175 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 176 if (flags & LK_NOWAIT) 177 return (ENOENT); 178 mp->mnt_kern_flag |= MNTK_MWAIT; 179 if (interlkp) { 180 simple_unlock(interlkp); 181 } 182 /* 183 * Since all busy locks are shared except the exclusive 184 * lock granted when unmounting, the only place that a 185 * wakeup needs to be done is at the release of the 186 * exclusive lock at the end of dounmount. 187 */ 188 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 189 if (interlkp) { 190 simple_lock(interlkp); 191 } 192 return (ENOENT); 193 } 194 lkflags = LK_SHARED | LK_NOPAUSE; 195 if (interlkp) 196 lkflags |= LK_INTERLOCK; 197 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 198 panic("vfs_busy: unexpected lock failure"); 199 return (0); 200} 201 202/* 203 * Free a busy filesystem. 204 */ 205void 206vfs_unbusy(mp, p) 207 struct mount *mp; 208 struct proc *p; 209{ 210 211 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 212} 213 214/* 215 * Lookup a filesystem type, and if found allocate and initialize 216 * a mount structure for it. 217 * 218 * Devname is usually updated by mount(8) after booting. 219 */ 220int 221vfs_rootmountalloc(fstypename, devname, mpp) 222 char *fstypename; 223 char *devname; 224 struct mount **mpp; 225{ 226 struct proc *p = curproc; /* XXX */ 227 struct vfsconf *vfsp; 228 struct mount *mp; 229 230 if (fstypename == NULL) 231 return (ENODEV); 232 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 233 if (!strcmp(vfsp->vfc_name, fstypename)) 234 break; 235 if (vfsp == NULL) 236 return (ENODEV); 237 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 238 bzero((char *)mp, (u_long)sizeof(struct mount)); 239 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 240 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 241 LIST_INIT(&mp->mnt_vnodelist); 242 mp->mnt_vfc = vfsp; 243 mp->mnt_op = vfsp->vfc_vfsops; 244 mp->mnt_flag = MNT_RDONLY; 245 mp->mnt_vnodecovered = NULLVP; 246 vfsp->vfc_refcount++; 247 mp->mnt_stat.f_type = vfsp->vfc_typenum; 248 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 249 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 250 mp->mnt_stat.f_mntonname[0] = '/'; 251 mp->mnt_stat.f_mntonname[1] = 0; 252 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 253 *mpp = mp; 254 return (0); 255} 256 257/* 258 * Find an appropriate filesystem to use for the root. If a filesystem 259 * has not been preselected, walk through the list of known filesystems 260 * trying those that have mountroot routines, and try them until one 261 * works or we have tried them all. 262 */ 263#ifdef notdef /* XXX JH */ 264int 265lite2_vfs_mountroot() 266{ 267 struct vfsconf *vfsp; 268 extern int (*lite2_mountroot) __P((void)); 269 int error; 270 271 if (lite2_mountroot != NULL) 272 return ((*lite2_mountroot)()); 273 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 274 if (vfsp->vfc_mountroot == NULL) 275 continue; 276 if ((error = (*vfsp->vfc_mountroot)()) == 0) 277 return (0); 278 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 279 } 280 return (ENODEV); 281} 282#endif 283 284/* 285 * Lookup a mount point by filesystem identifier. 286 */ 287struct mount * 288vfs_getvfs(fsid) 289 fsid_t *fsid; 290{ 291 register struct mount *mp; 292 293 simple_lock(&mountlist_slock); 294 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 295 mp = mp->mnt_list.cqe_next) { 296 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 297 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 298 simple_unlock(&mountlist_slock); 299 return (mp); 300 } 301 } 302 simple_unlock(&mountlist_slock); 303 return ((struct mount *) 0); 304} 305 306/* 307 * Get a new unique fsid 308 */ 309void 310vfs_getnewfsid(mp) 311 struct mount *mp; 312{ 313 static u_short xxxfs_mntid; 314 315 fsid_t tfsid; 316 int mtype; 317 318 simple_lock(&mntid_slock); 319 mtype = mp->mnt_vfc->vfc_typenum; 320 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 321 mp->mnt_stat.f_fsid.val[1] = mtype; 322 if (xxxfs_mntid == 0) 323 ++xxxfs_mntid; 324 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 325 tfsid.val[1] = mtype; 326 if (mountlist.cqh_first != (void *)&mountlist) { 327 while (vfs_getvfs(&tfsid)) { 328 tfsid.val[0]++; 329 xxxfs_mntid++; 330 } 331 } 332 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 333 simple_unlock(&mntid_slock); 334} 335 336/* 337 * Set vnode attributes to VNOVAL 338 */ 339void 340vattr_null(vap) 341 register struct vattr *vap; 342{ 343 344 vap->va_type = VNON; 345 vap->va_size = VNOVAL; 346 vap->va_bytes = VNOVAL; 347 vap->va_mode = VNOVAL; 348 vap->va_nlink = VNOVAL; 349 vap->va_uid = VNOVAL; 350 vap->va_gid = VNOVAL; 351 vap->va_fsid = VNOVAL; 352 vap->va_fileid = VNOVAL; 353 vap->va_blocksize = VNOVAL; 354 vap->va_rdev = VNOVAL; 355 vap->va_atime.tv_sec = VNOVAL; 356 vap->va_atime.tv_nsec = VNOVAL; 357 vap->va_mtime.tv_sec = VNOVAL; 358 vap->va_mtime.tv_nsec = VNOVAL; 359 vap->va_ctime.tv_sec = VNOVAL; 360 vap->va_ctime.tv_nsec = VNOVAL; 361 vap->va_flags = VNOVAL; 362 vap->va_gen = VNOVAL; 363 vap->va_vaflags = 0; 364} 365 366/* 367 * Routines having to do with the management of the vnode table. 368 */ 369extern vop_t **dead_vnodeop_p; 370 371/* 372 * Return the next vnode from the free list. 373 */ 374int 375getnewvnode(tag, mp, vops, vpp) 376 enum vtagtype tag; 377 struct mount *mp; 378 vop_t **vops; 379 struct vnode **vpp; 380{ 381 int s; 382 struct proc *p = curproc; /* XXX */ 383 struct vnode *vp, *tvp, *nvp; 384 vm_object_t object; 385 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 386 387 /* 388 * We take the least recently used vnode from the freelist 389 * if we can get it and it has no cached pages, and no 390 * namecache entries are relative to it. 391 * Otherwise we allocate a new vnode 392 */ 393 394 s = splbio(); 395 simple_lock(&vnode_free_list_slock); 396 TAILQ_INIT(&vnode_tmp_list); 397 398 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 399 nvp = TAILQ_NEXT(vp, v_freelist); 400 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 401 if (vp->v_flag & VAGE) { 402 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 403 } else { 404 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 405 } 406 vp->v_flag &= ~(VTBFREE|VAGE); 407 vp->v_flag |= VFREE; 408 if (vp->v_usecount) 409 panic("tobe free vnode isn't"); 410 freevnodes++; 411 } 412 413 if (wantfreevnodes && freevnodes < wantfreevnodes) { 414 vp = NULL; 415 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 416 /* 417 * XXX: this is only here to be backwards compatible 418 */ 419 vp = NULL; 420 } else { 421 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 422 nvp = TAILQ_NEXT(vp, v_freelist); 423 if (!simple_lock_try(&vp->v_interlock)) 424 continue; 425 if (vp->v_usecount) 426 panic("free vnode isn't"); 427 428 object = vp->v_object; 429 if (object && (object->resident_page_count || object->ref_count)) { 430 printf("object inconsistant state: RPC: %d, RC: %d\n", 431 object->resident_page_count, object->ref_count); 432 /* Don't recycle if it's caching some pages */ 433 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 434 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 435 continue; 436 } else if (LIST_FIRST(&vp->v_cache_src)) { 437 /* Don't recycle if active in the namecache */ 438 simple_unlock(&vp->v_interlock); 439 continue; 440 } else { 441 break; 442 } 443 } 444 } 445 446 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 447 nvp = TAILQ_NEXT(tvp, v_freelist); 448 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 449 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 450 simple_unlock(&tvp->v_interlock); 451 } 452 453 if (vp) { 454 vp->v_flag |= VDOOMED; 455 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 456 freevnodes--; 457 simple_unlock(&vnode_free_list_slock); 458 cache_purge(vp); 459 vp->v_lease = NULL; 460 if (vp->v_type != VBAD) { 461 vgonel(vp, p); 462 } else { 463 simple_unlock(&vp->v_interlock); 464 } 465 466#ifdef DIAGNOSTIC 467 { 468 int s; 469 470 if (vp->v_data) 471 panic("cleaned vnode isn't"); 472 s = splbio(); 473 if (vp->v_numoutput) 474 panic("Clean vnode has pending I/O's"); 475 splx(s); 476 } 477#endif 478 vp->v_flag = 0; 479 vp->v_lastr = 0; 480 vp->v_lastw = 0; 481 vp->v_lasta = 0; 482 vp->v_cstart = 0; 483 vp->v_clen = 0; 484 vp->v_socket = 0; 485 vp->v_writecount = 0; /* XXX */ 486 vp->v_maxio = 0; 487 } else { 488 simple_unlock(&vnode_free_list_slock); 489 vp = (struct vnode *) zalloc(vnode_zone); 490 bzero((char *) vp, sizeof *vp); 491 simple_lock_init(&vp->v_interlock); 492 vp->v_dd = vp; 493 cache_purge(vp); 494 LIST_INIT(&vp->v_cache_src); 495 TAILQ_INIT(&vp->v_cache_dst); 496 numvnodes++; 497 } 498 499 TAILQ_INIT(&vp->v_cleanblkhd); 500 TAILQ_INIT(&vp->v_dirtyblkhd); 501 vp->v_type = VNON; 502 vp->v_tag = tag; 503 vp->v_op = vops; 504 insmntque(vp, mp); 505 *vpp = vp; 506 vp->v_usecount = 1; 507 vp->v_data = 0; 508 splx(s); 509 510 vfs_object_create(vp, p, p->p_ucred, TRUE); 511 return (0); 512} 513 514/* 515 * Move a vnode from one mount queue to another. 516 */ 517static void 518insmntque(vp, mp) 519 register struct vnode *vp; 520 register struct mount *mp; 521{ 522 523 simple_lock(&mntvnode_slock); 524 /* 525 * Delete from old mount point vnode list, if on one. 526 */ 527 if (vp->v_mount != NULL) 528 LIST_REMOVE(vp, v_mntvnodes); 529 /* 530 * Insert into list of vnodes for the new mount point, if available. 531 */ 532 if ((vp->v_mount = mp) == NULL) { 533 simple_unlock(&mntvnode_slock); 534 return; 535 } 536 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 537 simple_unlock(&mntvnode_slock); 538} 539 540/* 541 * Update outstanding I/O count and do wakeup if requested. 542 */ 543void 544vwakeup(bp) 545 register struct buf *bp; 546{ 547 register struct vnode *vp; 548 549 bp->b_flags &= ~B_WRITEINPROG; 550 if ((vp = bp->b_vp)) { 551 vp->v_numoutput--; 552 if (vp->v_numoutput < 0) 553 panic("vwakeup: neg numoutput"); 554 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 555 vp->v_flag &= ~VBWAIT; 556 wakeup((caddr_t) &vp->v_numoutput); 557 } 558 } 559} 560 561/* 562 * Flush out and invalidate all buffers associated with a vnode. 563 * Called with the underlying object locked. 564 */ 565int 566vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 567 register struct vnode *vp; 568 int flags; 569 struct ucred *cred; 570 struct proc *p; 571 int slpflag, slptimeo; 572{ 573 register struct buf *bp; 574 struct buf *nbp, *blist; 575 int s, error; 576 vm_object_t object; 577 578 if (flags & V_SAVE) { 579 s = splbio(); 580 while (vp->v_numoutput) { 581 vp->v_flag |= VBWAIT; 582 error = tsleep((caddr_t)&vp->v_numoutput, 583 slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo); 584 if (error) { 585 splx(s); 586 return (error); 587 } 588 } 589 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 590 splx(s); 591 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 592 return (error); 593 s = splbio(); 594 if (vp->v_numoutput > 0 || 595 !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 596 panic("vinvalbuf: dirty bufs"); 597 } 598 splx(s); 599 } 600 s = splbio(); 601 for (;;) { 602 blist = TAILQ_FIRST(&vp->v_cleanblkhd); 603 if (!blist) 604 blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 605 if (!blist) 606 break; 607 608 for (bp = blist; bp; bp = nbp) { 609 nbp = TAILQ_NEXT(bp, b_vnbufs); 610 if (bp->b_flags & B_BUSY) { 611 bp->b_flags |= B_WANTED; 612 error = tsleep((caddr_t) bp, 613 slpflag | (PRIBIO + 4), "vinvalbuf", 614 slptimeo); 615 if (error) { 616 splx(s); 617 return (error); 618 } 619 break; 620 } 621 /* 622 * XXX Since there are no node locks for NFS, I 623 * believe there is a slight chance that a delayed 624 * write will occur while sleeping just above, so 625 * check for it. Note that vfs_bio_awrite expects 626 * buffers to reside on a queue, while VOP_BWRITE and 627 * brelse do not. 628 */ 629 if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 630 (flags & V_SAVE)) { 631 632 if (bp->b_vp == vp) { 633 if (bp->b_flags & B_CLUSTEROK) { 634 vfs_bio_awrite(bp); 635 } else { 636 bremfree(bp); 637 bp->b_flags |= (B_BUSY | B_ASYNC); 638 VOP_BWRITE(bp); 639 } 640 } else { 641 bremfree(bp); 642 bp->b_flags |= B_BUSY; 643 (void) VOP_BWRITE(bp); 644 } 645 break; 646 } 647 bremfree(bp); 648 bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF | B_BUSY); 649 bp->b_flags &= ~B_ASYNC; 650 brelse(bp); 651 } 652 } 653 654 while (vp->v_numoutput > 0) { 655 vp->v_flag |= VBWAIT; 656 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 657 } 658 659 splx(s); 660 661 /* 662 * Destroy the copy in the VM cache, too. 663 */ 664 simple_lock(&vp->v_interlock); 665 object = vp->v_object; 666 if (object != NULL) { 667 vm_object_page_remove(object, 0, 0, 668 (flags & V_SAVE) ? TRUE : FALSE); 669 } 670 simple_unlock(&vp->v_interlock); 671 672 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)) 673 panic("vinvalbuf: flush failed"); 674 return (0); 675} 676 677/* 678 * Truncate a file's buffer and pages to a specified length. This 679 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 680 * sync activity. 681 */ 682int 683vtruncbuf(vp, cred, p, length, blksize) 684 register struct vnode *vp; 685 struct ucred *cred; 686 struct proc *p; 687 off_t length; 688 int blksize; 689{ 690 register struct buf *bp; 691 struct buf *nbp; 692 int s, anyfreed; 693 int trunclbn; 694 695 /* 696 * Round up to the *next* lbn. 697 */ 698 trunclbn = (length + blksize - 1) / blksize; 699 700 s = splbio(); 701restart: 702 anyfreed = 1; 703 for (;anyfreed;) { 704 anyfreed = 0; 705 for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 706 nbp = TAILQ_NEXT(bp, b_vnbufs); 707 if (bp->b_lblkno >= trunclbn) { 708 if (bp->b_flags & B_BUSY) { 709 bp->b_flags |= B_WANTED; 710 tsleep(bp, PRIBIO + 4, "vtrb1", 0); 711 goto restart; 712 } else { 713 bremfree(bp); 714 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 715 bp->b_flags &= ~B_ASYNC; 716 brelse(bp); 717 anyfreed = 1; 718 } 719 if (nbp && (((nbp->b_xflags & B_VNCLEAN) == 0)|| 720 (nbp->b_vp != vp) || 721 (nbp->b_flags & B_DELWRI))) { 722 goto restart; 723 } 724 } 725 } 726 727 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 728 nbp = TAILQ_NEXT(bp, b_vnbufs); 729 if (bp->b_lblkno >= trunclbn) { 730 if (bp->b_flags & B_BUSY) { 731 bp->b_flags |= B_WANTED; 732 tsleep(bp, PRIBIO + 4, "vtrb2", 0); 733 goto restart; 734 } else { 735 bremfree(bp); 736 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 737 bp->b_flags &= ~B_ASYNC; 738 brelse(bp); 739 anyfreed = 1; 740 } 741 if (nbp && (((nbp->b_xflags & B_VNDIRTY) == 0)|| 742 (nbp->b_vp != vp) || 743 (nbp->b_flags & B_DELWRI) == 0)) { 744 goto restart; 745 } 746 } 747 } 748 } 749 750 if (length > 0) { 751restartsync: 752 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 753 nbp = TAILQ_NEXT(bp, b_vnbufs); 754 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 755 if (bp->b_flags & B_BUSY) { 756 bp->b_flags |= B_WANTED; 757 tsleep(bp, PRIBIO, "vtrb3", 0); 758 } else { 759 bremfree(bp); 760 bp->b_flags |= B_BUSY; 761 if (bp->b_vp == vp) { 762 bp->b_flags |= B_ASYNC; 763 } else { 764 bp->b_flags &= ~B_ASYNC; 765 } 766 VOP_BWRITE(bp); 767 } 768 goto restartsync; 769 } 770 771 } 772 } 773 774 while (vp->v_numoutput > 0) { 775 vp->v_flag |= VBWAIT; 776 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 777 } 778 779 splx(s); 780 781 vnode_pager_setsize(vp, length); 782 783 return (0); 784} 785 786/* 787 * Associate a buffer with a vnode. 788 */ 789void 790bgetvp(vp, bp) 791 register struct vnode *vp; 792 register struct buf *bp; 793{ 794 int s; 795 796#if defined(DIAGNOSTIC) 797 if (bp->b_vp) 798 panic("bgetvp: not free"); 799#endif 800 vhold(vp); 801 bp->b_vp = vp; 802 if (vp->v_type == VBLK || vp->v_type == VCHR) 803 bp->b_dev = vp->v_rdev; 804 else 805 bp->b_dev = NODEV; 806 /* 807 * Insert onto list for new vnode. 808 */ 809 s = splbio(); 810 bp->b_xflags |= B_VNCLEAN; 811 bp->b_xflags &= ~B_VNDIRTY; 812 TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 813 splx(s); 814} 815 816/* 817 * Disassociate a buffer from a vnode. 818 */ 819void 820brelvp(bp) 821 register struct buf *bp; 822{ 823 struct vnode *vp; 824 struct buflists *listheadp; 825 int s; 826 827#if defined(DIAGNOSTIC) 828 if (bp->b_vp == (struct vnode *) 0) 829 panic("brelvp: NULL"); 830#endif 831 832 /* 833 * Delete from old vnode list, if on one. 834 */ 835 vp = bp->b_vp; 836 s = splbio(); 837 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 838 if (bp->b_xflags & B_VNDIRTY) 839 listheadp = &vp->v_dirtyblkhd; 840 else 841 listheadp = &vp->v_cleanblkhd; 842 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 843 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 844 } 845 if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 846 vp->v_flag &= ~VONWORKLST; 847 LIST_REMOVE(vp, v_synclist); 848 } 849 splx(s); 850 bp->b_vp = (struct vnode *) 0; 851 vdrop(vp); 852} 853 854/* 855 * The workitem queue. 856 * 857 * It is useful to delay writes of file data and filesystem metadata 858 * for tens of seconds so that quickly created and deleted files need 859 * not waste disk bandwidth being created and removed. To realize this, 860 * we append vnodes to a "workitem" queue. When running with a soft 861 * updates implementation, most pending metadata dependencies should 862 * not wait for more than a few seconds. Thus, mounted on block devices 863 * are delayed only about a half the time that file data is delayed. 864 * Similarly, directory updates are more critical, so are only delayed 865 * about a third the time that file data is delayed. Thus, there are 866 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 867 * one each second (driven off the filesystem syner process). The 868 * syncer_delayno variable indicates the next queue that is to be processed. 869 * Items that need to be processed soon are placed in this queue: 870 * 871 * syncer_workitem_pending[syncer_delayno] 872 * 873 * A delay of fifteen seconds is done by placing the request fifteen 874 * entries later in the queue: 875 * 876 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 877 * 878 */ 879 880/* 881 * Add an item to the syncer work queue. 882 */ 883void 884vn_syncer_add_to_worklist(vp, delay) 885 struct vnode *vp; 886 int delay; 887{ 888 int s, slot; 889 890 s = splbio(); 891 892 if (vp->v_flag & VONWORKLST) { 893 LIST_REMOVE(vp, v_synclist); 894 } 895 896 if (delay > syncer_maxdelay - 2) 897 delay = syncer_maxdelay - 2; 898 slot = (syncer_delayno + delay) & syncer_mask; 899 900 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 901 vp->v_flag |= VONWORKLST; 902 splx(s); 903} 904 905static void sched_sync __P((void)); 906static struct proc *updateproc; 907static struct kproc_desc up_kp = { 908 "syncer", 909 sched_sync, 910 &updateproc 911}; 912SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 913 914/* 915 * System filesystem synchronizer daemon. 916 */ 917void 918sched_sync(void) 919{ 920 struct synclist *slp; 921 struct vnode *vp; 922 long starttime; 923 int s; 924 struct proc *p = updateproc; 925 926 for (;;) { 927 starttime = time_second; 928 929 /* 930 * Push files whose dirty time has expired. 931 */ 932 s = splbio(); 933 slp = &syncer_workitem_pending[syncer_delayno]; 934 syncer_delayno += 1; 935 if (syncer_delayno == syncer_maxdelay) 936 syncer_delayno = 0; 937 splx(s); 938 939 while ((vp = LIST_FIRST(slp)) != NULL) { 940 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 941 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 942 VOP_UNLOCK(vp, 0, p); 943 if (LIST_FIRST(slp) == vp) { 944 if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 945 vp->v_type != VBLK) 946 panic("sched_sync: fsync failed"); 947 /* 948 * Move ourselves to the back of the sync list. 949 */ 950 LIST_REMOVE(vp, v_synclist); 951 vn_syncer_add_to_worklist(vp, syncdelay); 952 } 953 } 954 955 /* 956 * Do soft update processing. 957 */ 958 if (bioops.io_sync) 959 (*bioops.io_sync)(NULL); 960 961 /* 962 * The variable rushjob allows the kernel to speed up the 963 * processing of the filesystem syncer process. A rushjob 964 * value of N tells the filesystem syncer to process the next 965 * N seconds worth of work on its queue ASAP. Currently rushjob 966 * is used by the soft update code to speed up the filesystem 967 * syncer process when the incore state is getting so far 968 * ahead of the disk that the kernel memory pool is being 969 * threatened with exhaustion. 970 */ 971 if (rushjob > 0) { 972 rushjob -= 1; 973 continue; 974 } 975 /* 976 * If it has taken us less than a second to process the 977 * current work, then wait. Otherwise start right over 978 * again. We can still lose time if any single round 979 * takes more than two seconds, but it does not really 980 * matter as we are just trying to generally pace the 981 * filesystem activity. 982 */ 983 if (time_second == starttime) 984 tsleep(&lbolt, PPAUSE, "syncer", 0); 985 } 986} 987 988/* 989 * Associate a p-buffer with a vnode. 990 */ 991void 992pbgetvp(vp, bp) 993 register struct vnode *vp; 994 register struct buf *bp; 995{ 996#if defined(DIAGNOSTIC) 997 if (bp->b_vp) 998 panic("pbgetvp: not free"); 999#endif 1000 bp->b_vp = vp; 1001 if (vp->v_type == VBLK || vp->v_type == VCHR) 1002 bp->b_dev = vp->v_rdev; 1003 else 1004 bp->b_dev = NODEV; 1005} 1006 1007/* 1008 * Disassociate a p-buffer from a vnode. 1009 */ 1010void 1011pbrelvp(bp) 1012 register struct buf *bp; 1013{ 1014 1015#if defined(DIAGNOSTIC) 1016 if (bp->b_vp == (struct vnode *) 0) 1017 panic("pbrelvp: NULL"); 1018#endif 1019 1020 bp->b_vp = (struct vnode *) 0; 1021} 1022 1023/* 1024 * Reassign a buffer from one vnode to another. 1025 * Used to assign file specific control information 1026 * (indirect blocks) to the vnode to which they belong. 1027 */ 1028void 1029reassignbuf(bp, newvp) 1030 register struct buf *bp; 1031 register struct vnode *newvp; 1032{ 1033 struct buflists *listheadp; 1034 struct vnode *oldvp; 1035 int delay; 1036 int s; 1037 1038 if (newvp == NULL) { 1039 printf("reassignbuf: NULL"); 1040 return; 1041 } 1042 1043 s = splbio(); 1044 /* 1045 * Delete from old vnode list, if on one. 1046 */ 1047 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 1048 oldvp = bp->b_vp; 1049 if (bp->b_xflags & B_VNDIRTY) 1050 listheadp = &oldvp->v_dirtyblkhd; 1051 else 1052 listheadp = &oldvp->v_cleanblkhd; 1053 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 1054 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 1055 vdrop(oldvp); 1056 } 1057 /* 1058 * If dirty, put on list of dirty buffers; otherwise insert onto list 1059 * of clean buffers. 1060 */ 1061 if (bp->b_flags & B_DELWRI) { 1062 struct buf *tbp; 1063 1064 listheadp = &newvp->v_dirtyblkhd; 1065 if ((newvp->v_flag & VONWORKLST) == 0) { 1066 switch (newvp->v_type) { 1067 case VDIR: 1068 delay = syncdelay / 3; 1069 break; 1070 case VBLK: 1071 if (newvp->v_specmountpoint != NULL) { 1072 delay = syncdelay / 2; 1073 break; 1074 } 1075 /* fall through */ 1076 default: 1077 delay = syncdelay; 1078 } 1079 vn_syncer_add_to_worklist(newvp, delay); 1080 } 1081 bp->b_xflags |= B_VNDIRTY; 1082 tbp = TAILQ_FIRST(listheadp); 1083 if (tbp == NULL || 1084 (bp->b_lblkno >= 0 && tbp->b_lblkno > bp->b_lblkno)) { 1085 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1086 } else { 1087 if (bp->b_lblkno >= 0) { 1088 struct buf *ttbp; 1089 while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 1090 (ttbp->b_lblkno < bp->b_lblkno)) { 1091 tbp = ttbp; 1092 } 1093 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1094 } else { 1095 TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 1096 } 1097 } 1098 } else { 1099 bp->b_xflags |= B_VNCLEAN; 1100 TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1101 if ((newvp->v_flag & VONWORKLST) && 1102 TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 1103 newvp->v_flag &= ~VONWORKLST; 1104 LIST_REMOVE(newvp, v_synclist); 1105 } 1106 } 1107 bp->b_vp = newvp; 1108 vhold(bp->b_vp); 1109 splx(s); 1110} 1111 1112/* 1113 * Create a vnode for a block device. 1114 * Used for mounting the root file system. 1115 */ 1116int 1117bdevvp(dev, vpp) 1118 dev_t dev; 1119 struct vnode **vpp; 1120{ 1121 register struct vnode *vp; 1122 struct vnode *nvp; 1123 int error; 1124 1125 /* XXX 255 is for mfs. */ 1126 if (dev == NODEV || (major(dev) != 255 && (major(dev) >= nblkdev || 1127 bdevsw[major(dev)] == NULL))) { 1128 *vpp = NULLVP; 1129 return (ENXIO); 1130 } 1131 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 1132 if (error) { 1133 *vpp = NULLVP; 1134 return (error); 1135 } 1136 vp = nvp; 1137 vp->v_type = VBLK; 1138 if ((nvp = checkalias(vp, dev, (struct mount *)0)) != NULL) { 1139 vput(vp); 1140 vp = nvp; 1141 } 1142 *vpp = vp; 1143 return (0); 1144} 1145 1146/* 1147 * Check to see if the new vnode represents a special device 1148 * for which we already have a vnode (either because of 1149 * bdevvp() or because of a different vnode representing 1150 * the same block device). If such an alias exists, deallocate 1151 * the existing contents and return the aliased vnode. The 1152 * caller is responsible for filling it with its new contents. 1153 */ 1154struct vnode * 1155checkalias(nvp, nvp_rdev, mp) 1156 register struct vnode *nvp; 1157 dev_t nvp_rdev; 1158 struct mount *mp; 1159{ 1160 struct proc *p = curproc; /* XXX */ 1161 struct vnode *vp; 1162 struct vnode **vpp; 1163 1164 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1165 return (NULLVP); 1166 1167 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1168loop: 1169 simple_lock(&spechash_slock); 1170 for (vp = *vpp; vp; vp = vp->v_specnext) { 1171 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1172 continue; 1173 /* 1174 * Alias, but not in use, so flush it out. 1175 * Only alias active device nodes. 1176 * Not sure why we don't re-use this like we do below. 1177 */ 1178 simple_lock(&vp->v_interlock); 1179 if (vp->v_usecount == 0) { 1180 simple_unlock(&spechash_slock); 1181 vgonel(vp, p); 1182 goto loop; 1183 } 1184 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 1185 /* 1186 * It dissappeared, and we may have slept. 1187 * Restart from the beginning 1188 */ 1189 simple_unlock(&spechash_slock); 1190 goto loop; 1191 } 1192 break; 1193 } 1194 /* 1195 * It would be a lot clearer what is going on here if 1196 * this had been expressed as: 1197 * if ( vp && (vp->v_tag == VT_NULL)) 1198 * and the clauses had been swapped. 1199 */ 1200 if (vp == NULL || vp->v_tag != VT_NON) { 1201 /* 1202 * Put the new vnode into the hash chain. 1203 * and if there was an alias, connect them. 1204 */ 1205 MALLOC(nvp->v_specinfo, struct specinfo *, 1206 sizeof(struct specinfo), M_VNODE, M_WAITOK); 1207 nvp->v_rdev = nvp_rdev; 1208 nvp->v_hashchain = vpp; 1209 nvp->v_specnext = *vpp; 1210 nvp->v_specmountpoint = NULL; 1211 simple_unlock(&spechash_slock); 1212 *vpp = nvp; 1213 if (vp != NULLVP) { 1214 nvp->v_flag |= VALIASED; 1215 vp->v_flag |= VALIASED; 1216 vput(vp); 1217 } 1218 return (NULLVP); 1219 } 1220 /* 1221 * if ( vp && (vp->v_tag == VT_NULL)) 1222 * We have a vnode alias, but it is a trashed. 1223 * Make it look like it's newley allocated. (by getnewvnode()) 1224 * The caller should use this instead. 1225 */ 1226 simple_unlock(&spechash_slock); 1227 VOP_UNLOCK(vp, 0, p); 1228 simple_lock(&vp->v_interlock); 1229 vclean(vp, 0, p); 1230 vp->v_op = nvp->v_op; 1231 vp->v_tag = nvp->v_tag; 1232 nvp->v_type = VNON; 1233 insmntque(vp, mp); 1234 return (vp); 1235} 1236 1237/* 1238 * Grab a particular vnode from the free list, increment its 1239 * reference count and lock it. The vnode lock bit is set the 1240 * vnode is being eliminated in vgone. The process is awakened 1241 * when the transition is completed, and an error returned to 1242 * indicate that the vnode is no longer usable (possibly having 1243 * been changed to a new file system type). 1244 */ 1245int 1246vget(vp, flags, p) 1247 register struct vnode *vp; 1248 int flags; 1249 struct proc *p; 1250{ 1251 int error; 1252 1253 /* 1254 * If the vnode is in the process of being cleaned out for 1255 * another use, we wait for the cleaning to finish and then 1256 * return failure. Cleaning is determined by checking that 1257 * the VXLOCK flag is set. 1258 */ 1259 if ((flags & LK_INTERLOCK) == 0) { 1260 simple_lock(&vp->v_interlock); 1261 } 1262 if (vp->v_flag & VXLOCK) { 1263 vp->v_flag |= VXWANT; 1264 simple_unlock(&vp->v_interlock); 1265 tsleep((caddr_t)vp, PINOD, "vget", 0); 1266 return (ENOENT); 1267 } 1268 1269 vp->v_usecount++; 1270 1271 if (VSHOULDBUSY(vp)) 1272 vbusy(vp); 1273 if (flags & LK_TYPE_MASK) { 1274 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 1275 /* 1276 * must expand vrele here because we do not want 1277 * to call VOP_INACTIVE if the reference count 1278 * drops back to zero since it was never really 1279 * active. We must remove it from the free list 1280 * before sleeping so that multiple processes do 1281 * not try to recycle it. 1282 */ 1283 simple_lock(&vp->v_interlock); 1284 vp->v_usecount--; 1285 if (VSHOULDFREE(vp)) 1286 vfree(vp); 1287 simple_unlock(&vp->v_interlock); 1288 } 1289 return (error); 1290 } 1291 simple_unlock(&vp->v_interlock); 1292 return (0); 1293} 1294 1295void 1296vref(struct vnode *vp) 1297{ 1298 simple_lock(&vp->v_interlock); 1299 vp->v_usecount++; 1300 simple_unlock(&vp->v_interlock); 1301} 1302 1303/* 1304 * Vnode put/release. 1305 * If count drops to zero, call inactive routine and return to freelist. 1306 */ 1307void 1308vrele(vp) 1309 struct vnode *vp; 1310{ 1311 struct proc *p = curproc; /* XXX */ 1312 1313#ifdef DIAGNOSTIC 1314 if (vp == NULL) 1315 panic("vrele: null vp"); 1316#endif 1317 simple_lock(&vp->v_interlock); 1318 1319 if (vp->v_usecount > 1) { 1320 1321 vp->v_usecount--; 1322 simple_unlock(&vp->v_interlock); 1323 1324 return; 1325 } 1326 1327 if (vp->v_usecount == 1) { 1328 1329 vp->v_usecount--; 1330 if (VSHOULDFREE(vp)) 1331 vfree(vp); 1332 /* 1333 * If we are doing a vput, the node is already locked, and we must 1334 * call VOP_INACTIVE with the node locked. So, in the case of 1335 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1336 */ 1337 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1338 VOP_INACTIVE(vp, p); 1339 } 1340 1341 } else { 1342#ifdef DIAGNOSTIC 1343 vprint("vrele: negative ref count", vp); 1344 simple_unlock(&vp->v_interlock); 1345#endif 1346 panic("vrele: negative ref cnt"); 1347 } 1348} 1349 1350void 1351vput(vp) 1352 struct vnode *vp; 1353{ 1354 struct proc *p = curproc; /* XXX */ 1355 1356#ifdef DIAGNOSTIC 1357 if (vp == NULL) 1358 panic("vput: null vp"); 1359#endif 1360 1361 simple_lock(&vp->v_interlock); 1362 1363 if (vp->v_usecount > 1) { 1364 1365 vp->v_usecount--; 1366 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1367 return; 1368 1369 } 1370 1371 if (vp->v_usecount == 1) { 1372 1373 vp->v_usecount--; 1374 if (VSHOULDFREE(vp)) 1375 vfree(vp); 1376 /* 1377 * If we are doing a vput, the node is already locked, and we must 1378 * call VOP_INACTIVE with the node locked. So, in the case of 1379 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1380 */ 1381 simple_unlock(&vp->v_interlock); 1382 VOP_INACTIVE(vp, p); 1383 1384 } else { 1385#ifdef DIAGNOSTIC 1386 vprint("vput: negative ref count", vp); 1387#endif 1388 panic("vput: negative ref cnt"); 1389 } 1390} 1391 1392/* 1393 * Somebody doesn't want the vnode recycled. 1394 */ 1395void 1396vhold(vp) 1397 register struct vnode *vp; 1398{ 1399 int s; 1400 1401 s = splbio(); 1402 vp->v_holdcnt++; 1403 if (VSHOULDBUSY(vp)) 1404 vbusy(vp); 1405 splx(s); 1406} 1407 1408/* 1409 * One less who cares about this vnode. 1410 */ 1411void 1412vdrop(vp) 1413 register struct vnode *vp; 1414{ 1415 int s; 1416 1417 s = splbio(); 1418 if (vp->v_holdcnt <= 0) 1419 panic("vdrop: holdcnt"); 1420 vp->v_holdcnt--; 1421 if (VSHOULDFREE(vp)) 1422 vfree(vp); 1423 splx(s); 1424} 1425 1426/* 1427 * Remove any vnodes in the vnode table belonging to mount point mp. 1428 * 1429 * If MNT_NOFORCE is specified, there should not be any active ones, 1430 * return error if any are found (nb: this is a user error, not a 1431 * system error). If MNT_FORCE is specified, detach any active vnodes 1432 * that are found. 1433 */ 1434#ifdef DIAGNOSTIC 1435static int busyprt = 0; /* print out busy vnodes */ 1436SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1437#endif 1438 1439int 1440vflush(mp, skipvp, flags) 1441 struct mount *mp; 1442 struct vnode *skipvp; 1443 int flags; 1444{ 1445 struct proc *p = curproc; /* XXX */ 1446 struct vnode *vp, *nvp; 1447 int busy = 0; 1448 1449 simple_lock(&mntvnode_slock); 1450loop: 1451 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1452 /* 1453 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1454 * Start over if it has (it won't be on the list anymore). 1455 */ 1456 if (vp->v_mount != mp) 1457 goto loop; 1458 nvp = vp->v_mntvnodes.le_next; 1459 /* 1460 * Skip over a selected vnode. 1461 */ 1462 if (vp == skipvp) 1463 continue; 1464 1465 simple_lock(&vp->v_interlock); 1466 /* 1467 * Skip over a vnodes marked VSYSTEM. 1468 */ 1469 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1470 simple_unlock(&vp->v_interlock); 1471 continue; 1472 } 1473 /* 1474 * If WRITECLOSE is set, only flush out regular file vnodes 1475 * open for writing. 1476 */ 1477 if ((flags & WRITECLOSE) && 1478 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1479 simple_unlock(&vp->v_interlock); 1480 continue; 1481 } 1482 1483 /* 1484 * With v_usecount == 0, all we need to do is clear out the 1485 * vnode data structures and we are done. 1486 */ 1487 if (vp->v_usecount == 0) { 1488 simple_unlock(&mntvnode_slock); 1489 vgonel(vp, p); 1490 simple_lock(&mntvnode_slock); 1491 continue; 1492 } 1493 1494 /* 1495 * If FORCECLOSE is set, forcibly close the vnode. For block 1496 * or character devices, revert to an anonymous device. For 1497 * all other files, just kill them. 1498 */ 1499 if (flags & FORCECLOSE) { 1500 simple_unlock(&mntvnode_slock); 1501 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1502 vgonel(vp, p); 1503 } else { 1504 vclean(vp, 0, p); 1505 vp->v_op = spec_vnodeop_p; 1506 insmntque(vp, (struct mount *) 0); 1507 } 1508 simple_lock(&mntvnode_slock); 1509 continue; 1510 } 1511#ifdef DIAGNOSTIC 1512 if (busyprt) 1513 vprint("vflush: busy vnode", vp); 1514#endif 1515 simple_unlock(&vp->v_interlock); 1516 busy++; 1517 } 1518 simple_unlock(&mntvnode_slock); 1519 if (busy) 1520 return (EBUSY); 1521 return (0); 1522} 1523 1524/* 1525 * Disassociate the underlying file system from a vnode. 1526 */ 1527static void 1528vclean(vp, flags, p) 1529 struct vnode *vp; 1530 int flags; 1531 struct proc *p; 1532{ 1533 int active; 1534 vm_object_t obj; 1535 1536 /* 1537 * Check to see if the vnode is in use. If so we have to reference it 1538 * before we clean it out so that its count cannot fall to zero and 1539 * generate a race against ourselves to recycle it. 1540 */ 1541 if ((active = vp->v_usecount)) 1542 vp->v_usecount++; 1543 1544 /* 1545 * Prevent the vnode from being recycled or brought into use while we 1546 * clean it out. 1547 */ 1548 if (vp->v_flag & VXLOCK) 1549 panic("vclean: deadlock"); 1550 vp->v_flag |= VXLOCK; 1551 /* 1552 * Even if the count is zero, the VOP_INACTIVE routine may still 1553 * have the object locked while it cleans it out. The VOP_LOCK 1554 * ensures that the VOP_INACTIVE routine is done with its work. 1555 * For active vnodes, it ensures that no other activity can 1556 * occur while the underlying object is being cleaned out. 1557 */ 1558 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1559 1560 /* 1561 * Clean out any buffers associated with the vnode. 1562 */ 1563 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1564 if (obj = vp->v_object) { 1565 if (obj->ref_count == 0) { 1566 /* 1567 * This is a normal way of shutting down the object/vnode 1568 * association. 1569 */ 1570 vm_object_terminate(obj); 1571 } else { 1572 /* 1573 * Woe to the process that tries to page now :-). 1574 */ 1575 vm_pager_deallocate(obj); 1576 } 1577 } 1578 1579 /* 1580 * If purging an active vnode, it must be closed and 1581 * deactivated before being reclaimed. Note that the 1582 * VOP_INACTIVE will unlock the vnode. 1583 */ 1584 if (active) { 1585 if (flags & DOCLOSE) 1586 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); 1587 VOP_INACTIVE(vp, p); 1588 } else { 1589 /* 1590 * Any other processes trying to obtain this lock must first 1591 * wait for VXLOCK to clear, then call the new lock operation. 1592 */ 1593 VOP_UNLOCK(vp, 0, p); 1594 } 1595 /* 1596 * Reclaim the vnode. 1597 */ 1598 if (VOP_RECLAIM(vp, p)) 1599 panic("vclean: cannot reclaim"); 1600 1601 if (active) 1602 vrele(vp); 1603 1604 cache_purge(vp); 1605 if (vp->v_vnlock) { 1606#if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */ 1607#ifdef DIAGNOSTIC 1608 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1609 vprint("vclean: lock not drained", vp); 1610#endif 1611#endif 1612 FREE(vp->v_vnlock, M_VNODE); 1613 vp->v_vnlock = NULL; 1614 } 1615 1616 if (VSHOULDFREE(vp)) 1617 vfree(vp); 1618 1619 /* 1620 * Done with purge, notify sleepers of the grim news. 1621 */ 1622 vp->v_op = dead_vnodeop_p; 1623 vn_pollgone(vp); 1624 vp->v_tag = VT_NON; 1625 vp->v_flag &= ~VXLOCK; 1626 if (vp->v_flag & VXWANT) { 1627 vp->v_flag &= ~VXWANT; 1628 wakeup((caddr_t) vp); 1629 } 1630} 1631 1632/* 1633 * Eliminate all activity associated with the requested vnode 1634 * and with all vnodes aliased to the requested vnode. 1635 */ 1636int 1637vop_revoke(ap) 1638 struct vop_revoke_args /* { 1639 struct vnode *a_vp; 1640 int a_flags; 1641 } */ *ap; 1642{ 1643 struct vnode *vp, *vq; 1644 struct proc *p = curproc; /* XXX */ 1645 1646#ifdef DIAGNOSTIC 1647 if ((ap->a_flags & REVOKEALL) == 0) 1648 panic("vop_revoke"); 1649#endif 1650 1651 vp = ap->a_vp; 1652 simple_lock(&vp->v_interlock); 1653 1654 if (vp->v_flag & VALIASED) { 1655 /* 1656 * If a vgone (or vclean) is already in progress, 1657 * wait until it is done and return. 1658 */ 1659 if (vp->v_flag & VXLOCK) { 1660 vp->v_flag |= VXWANT; 1661 simple_unlock(&vp->v_interlock); 1662 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1663 return (0); 1664 } 1665 /* 1666 * Ensure that vp will not be vgone'd while we 1667 * are eliminating its aliases. 1668 */ 1669 vp->v_flag |= VXLOCK; 1670 simple_unlock(&vp->v_interlock); 1671 while (vp->v_flag & VALIASED) { 1672 simple_lock(&spechash_slock); 1673 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1674 if (vq->v_rdev != vp->v_rdev || 1675 vq->v_type != vp->v_type || vp == vq) 1676 continue; 1677 simple_unlock(&spechash_slock); 1678 vgone(vq); 1679 break; 1680 } 1681 if (vq == NULLVP) { 1682 simple_unlock(&spechash_slock); 1683 } 1684 } 1685 /* 1686 * Remove the lock so that vgone below will 1687 * really eliminate the vnode after which time 1688 * vgone will awaken any sleepers. 1689 */ 1690 simple_lock(&vp->v_interlock); 1691 vp->v_flag &= ~VXLOCK; 1692 if (vp->v_flag & VXWANT) { 1693 vp->v_flag &= ~VXWANT; 1694 wakeup(vp); 1695 } 1696 } 1697 vgonel(vp, p); 1698 return (0); 1699} 1700 1701/* 1702 * Recycle an unused vnode to the front of the free list. 1703 * Release the passed interlock if the vnode will be recycled. 1704 */ 1705int 1706vrecycle(vp, inter_lkp, p) 1707 struct vnode *vp; 1708 struct simplelock *inter_lkp; 1709 struct proc *p; 1710{ 1711 1712 simple_lock(&vp->v_interlock); 1713 if (vp->v_usecount == 0) { 1714 if (inter_lkp) { 1715 simple_unlock(inter_lkp); 1716 } 1717 vgonel(vp, p); 1718 return (1); 1719 } 1720 simple_unlock(&vp->v_interlock); 1721 return (0); 1722} 1723 1724/* 1725 * Eliminate all activity associated with a vnode 1726 * in preparation for reuse. 1727 */ 1728void 1729vgone(vp) 1730 register struct vnode *vp; 1731{ 1732 struct proc *p = curproc; /* XXX */ 1733 1734 simple_lock(&vp->v_interlock); 1735 vgonel(vp, p); 1736} 1737 1738/* 1739 * vgone, with the vp interlock held. 1740 */ 1741static void 1742vgonel(vp, p) 1743 struct vnode *vp; 1744 struct proc *p; 1745{ 1746 int s; 1747 struct vnode *vq; 1748 struct vnode *vx; 1749 1750 /* 1751 * If a vgone (or vclean) is already in progress, 1752 * wait until it is done and return. 1753 */ 1754 if (vp->v_flag & VXLOCK) { 1755 vp->v_flag |= VXWANT; 1756 simple_unlock(&vp->v_interlock); 1757 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1758 return; 1759 } 1760 1761 /* 1762 * Clean out the filesystem specific data. 1763 */ 1764 vclean(vp, DOCLOSE, p); 1765 simple_lock(&vp->v_interlock); 1766 1767 /* 1768 * Delete from old mount point vnode list, if on one. 1769 */ 1770 if (vp->v_mount != NULL) 1771 insmntque(vp, (struct mount *)0); 1772 /* 1773 * If special device, remove it from special device alias list 1774 * if it is on one. 1775 */ 1776 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1777 simple_lock(&spechash_slock); 1778 if (*vp->v_hashchain == vp) { 1779 *vp->v_hashchain = vp->v_specnext; 1780 } else { 1781 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1782 if (vq->v_specnext != vp) 1783 continue; 1784 vq->v_specnext = vp->v_specnext; 1785 break; 1786 } 1787 if (vq == NULL) 1788 panic("missing bdev"); 1789 } 1790 if (vp->v_flag & VALIASED) { 1791 vx = NULL; 1792 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1793 if (vq->v_rdev != vp->v_rdev || 1794 vq->v_type != vp->v_type) 1795 continue; 1796 if (vx) 1797 break; 1798 vx = vq; 1799 } 1800 if (vx == NULL) 1801 panic("missing alias"); 1802 if (vq == NULL) 1803 vx->v_flag &= ~VALIASED; 1804 vp->v_flag &= ~VALIASED; 1805 } 1806 simple_unlock(&spechash_slock); 1807 FREE(vp->v_specinfo, M_VNODE); 1808 vp->v_specinfo = NULL; 1809 } 1810 1811 /* 1812 * If it is on the freelist and not already at the head, 1813 * move it to the head of the list. The test of the back 1814 * pointer and the reference count of zero is because 1815 * it will be removed from the free list by getnewvnode, 1816 * but will not have its reference count incremented until 1817 * after calling vgone. If the reference count were 1818 * incremented first, vgone would (incorrectly) try to 1819 * close the previous instance of the underlying object. 1820 */ 1821 if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1822 s = splbio(); 1823 simple_lock(&vnode_free_list_slock); 1824 if (vp->v_flag & VFREE) { 1825 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1826 } else if (vp->v_flag & VTBFREE) { 1827 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1828 vp->v_flag &= ~VTBFREE; 1829 freevnodes++; 1830 } else 1831 freevnodes++; 1832 vp->v_flag |= VFREE; 1833 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1834 simple_unlock(&vnode_free_list_slock); 1835 splx(s); 1836 } 1837 1838 vp->v_type = VBAD; 1839 simple_unlock(&vp->v_interlock); 1840} 1841 1842/* 1843 * Lookup a vnode by device number. 1844 */ 1845int 1846vfinddev(dev, type, vpp) 1847 dev_t dev; 1848 enum vtype type; 1849 struct vnode **vpp; 1850{ 1851 register struct vnode *vp; 1852 int rc = 0; 1853 1854 simple_lock(&spechash_slock); 1855 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1856 if (dev != vp->v_rdev || type != vp->v_type) 1857 continue; 1858 *vpp = vp; 1859 rc = 1; 1860 break; 1861 } 1862 simple_unlock(&spechash_slock); 1863 return (rc); 1864} 1865 1866/* 1867 * Calculate the total number of references to a special device. 1868 */ 1869int 1870vcount(vp) 1871 register struct vnode *vp; 1872{ 1873 struct vnode *vq, *vnext; 1874 int count; 1875 1876loop: 1877 if ((vp->v_flag & VALIASED) == 0) 1878 return (vp->v_usecount); 1879 simple_lock(&spechash_slock); 1880 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1881 vnext = vq->v_specnext; 1882 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1883 continue; 1884 /* 1885 * Alias, but not in use, so flush it out. 1886 */ 1887 if (vq->v_usecount == 0 && vq != vp) { 1888 simple_unlock(&spechash_slock); 1889 vgone(vq); 1890 goto loop; 1891 } 1892 count += vq->v_usecount; 1893 } 1894 simple_unlock(&spechash_slock); 1895 return (count); 1896} 1897/* 1898 * Print out a description of a vnode. 1899 */ 1900static char *typename[] = 1901{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1902 1903void 1904vprint(label, vp) 1905 char *label; 1906 register struct vnode *vp; 1907{ 1908 char buf[96]; 1909 1910 if (label != NULL) 1911 printf("%s: %p: ", label, (void *)vp); 1912 else 1913 printf("%p: ", (void *)vp); 1914 printf("type %s, usecount %d, writecount %d, refcount %d,", 1915 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1916 vp->v_holdcnt); 1917 buf[0] = '\0'; 1918 if (vp->v_flag & VROOT) 1919 strcat(buf, "|VROOT"); 1920 if (vp->v_flag & VTEXT) 1921 strcat(buf, "|VTEXT"); 1922 if (vp->v_flag & VSYSTEM) 1923 strcat(buf, "|VSYSTEM"); 1924 if (vp->v_flag & VXLOCK) 1925 strcat(buf, "|VXLOCK"); 1926 if (vp->v_flag & VXWANT) 1927 strcat(buf, "|VXWANT"); 1928 if (vp->v_flag & VBWAIT) 1929 strcat(buf, "|VBWAIT"); 1930 if (vp->v_flag & VALIASED) 1931 strcat(buf, "|VALIASED"); 1932 if (vp->v_flag & VDOOMED) 1933 strcat(buf, "|VDOOMED"); 1934 if (vp->v_flag & VFREE) 1935 strcat(buf, "|VFREE"); 1936 if (vp->v_flag & VOBJBUF) 1937 strcat(buf, "|VOBJBUF"); 1938 if (buf[0] != '\0') 1939 printf(" flags (%s)", &buf[1]); 1940 if (vp->v_data == NULL) { 1941 printf("\n"); 1942 } else { 1943 printf("\n\t"); 1944 VOP_PRINT(vp); 1945 } 1946} 1947 1948#ifdef DDB 1949#include <ddb/ddb.h> 1950/* 1951 * List all of the locked vnodes in the system. 1952 * Called when debugging the kernel. 1953 */ 1954DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 1955{ 1956 struct proc *p = curproc; /* XXX */ 1957 struct mount *mp, *nmp; 1958 struct vnode *vp; 1959 1960 printf("Locked vnodes\n"); 1961 simple_lock(&mountlist_slock); 1962 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1963 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1964 nmp = mp->mnt_list.cqe_next; 1965 continue; 1966 } 1967 for (vp = mp->mnt_vnodelist.lh_first; 1968 vp != NULL; 1969 vp = vp->v_mntvnodes.le_next) { 1970 if (VOP_ISLOCKED(vp)) 1971 vprint((char *)0, vp); 1972 } 1973 simple_lock(&mountlist_slock); 1974 nmp = mp->mnt_list.cqe_next; 1975 vfs_unbusy(mp, p); 1976 } 1977 simple_unlock(&mountlist_slock); 1978} 1979#endif 1980 1981/* 1982 * Top level filesystem related information gathering. 1983 */ 1984static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 1985 1986static int 1987vfs_sysctl SYSCTL_HANDLER_ARGS 1988{ 1989 int *name = (int *)arg1 - 1; /* XXX */ 1990 u_int namelen = arg2 + 1; /* XXX */ 1991 struct vfsconf *vfsp; 1992 1993#if 1 || defined(COMPAT_PRELITE2) 1994 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 1995 if (namelen == 1) 1996 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 1997#endif 1998 1999#ifdef notyet 2000 /* all sysctl names at this level are at least name and field */ 2001 if (namelen < 2) 2002 return (ENOTDIR); /* overloaded */ 2003 if (name[0] != VFS_GENERIC) { 2004 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2005 if (vfsp->vfc_typenum == name[0]) 2006 break; 2007 if (vfsp == NULL) 2008 return (EOPNOTSUPP); 2009 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 2010 oldp, oldlenp, newp, newlen, p)); 2011 } 2012#endif 2013 switch (name[1]) { 2014 case VFS_MAXTYPENUM: 2015 if (namelen != 2) 2016 return (ENOTDIR); 2017 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2018 case VFS_CONF: 2019 if (namelen != 3) 2020 return (ENOTDIR); /* overloaded */ 2021 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2022 if (vfsp->vfc_typenum == name[2]) 2023 break; 2024 if (vfsp == NULL) 2025 return (EOPNOTSUPP); 2026 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 2027 } 2028 return (EOPNOTSUPP); 2029} 2030 2031SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2032 "Generic filesystem"); 2033 2034#if 1 || defined(COMPAT_PRELITE2) 2035 2036static int 2037sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 2038{ 2039 int error; 2040 struct vfsconf *vfsp; 2041 struct ovfsconf ovfs; 2042 2043 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2044 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 2045 strcpy(ovfs.vfc_name, vfsp->vfc_name); 2046 ovfs.vfc_index = vfsp->vfc_typenum; 2047 ovfs.vfc_refcount = vfsp->vfc_refcount; 2048 ovfs.vfc_flags = vfsp->vfc_flags; 2049 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2050 if (error) 2051 return error; 2052 } 2053 return 0; 2054} 2055 2056#endif /* 1 || COMPAT_PRELITE2 */ 2057 2058#if 0 2059#define KINFO_VNODESLOP 10 2060/* 2061 * Dump vnode list (via sysctl). 2062 * Copyout address of vnode followed by vnode. 2063 */ 2064/* ARGSUSED */ 2065static int 2066sysctl_vnode SYSCTL_HANDLER_ARGS 2067{ 2068 struct proc *p = curproc; /* XXX */ 2069 struct mount *mp, *nmp; 2070 struct vnode *nvp, *vp; 2071 int error; 2072 2073#define VPTRSZ sizeof (struct vnode *) 2074#define VNODESZ sizeof (struct vnode) 2075 2076 req->lock = 0; 2077 if (!req->oldptr) /* Make an estimate */ 2078 return (SYSCTL_OUT(req, 0, 2079 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2080 2081 simple_lock(&mountlist_slock); 2082 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 2083 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2084 nmp = mp->mnt_list.cqe_next; 2085 continue; 2086 } 2087again: 2088 simple_lock(&mntvnode_slock); 2089 for (vp = mp->mnt_vnodelist.lh_first; 2090 vp != NULL; 2091 vp = nvp) { 2092 /* 2093 * Check that the vp is still associated with 2094 * this filesystem. RACE: could have been 2095 * recycled onto the same filesystem. 2096 */ 2097 if (vp->v_mount != mp) { 2098 simple_unlock(&mntvnode_slock); 2099 goto again; 2100 } 2101 nvp = vp->v_mntvnodes.le_next; 2102 simple_unlock(&mntvnode_slock); 2103 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 2104 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2105 return (error); 2106 simple_lock(&mntvnode_slock); 2107 } 2108 simple_unlock(&mntvnode_slock); 2109 simple_lock(&mountlist_slock); 2110 nmp = mp->mnt_list.cqe_next; 2111 vfs_unbusy(mp, p); 2112 } 2113 simple_unlock(&mountlist_slock); 2114 2115 return (0); 2116} 2117#endif 2118 2119/* 2120 * XXX 2121 * Exporting the vnode list on large systems causes them to crash. 2122 * Exporting the vnode list on medium systems causes sysctl to coredump. 2123 */ 2124#if 0 2125SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 2126 0, 0, sysctl_vnode, "S,vnode", ""); 2127#endif 2128 2129/* 2130 * Check to see if a filesystem is mounted on a block device. 2131 */ 2132int 2133vfs_mountedon(vp) 2134 struct vnode *vp; 2135{ 2136 struct vnode *vq; 2137 int error = 0; 2138 2139 if (vp->v_specmountpoint != NULL) 2140 return (EBUSY); 2141 if (vp->v_flag & VALIASED) { 2142 simple_lock(&spechash_slock); 2143 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2144 if (vq->v_rdev != vp->v_rdev || 2145 vq->v_type != vp->v_type) 2146 continue; 2147 if (vq->v_specmountpoint != NULL) { 2148 error = EBUSY; 2149 break; 2150 } 2151 } 2152 simple_unlock(&spechash_slock); 2153 } 2154 return (error); 2155} 2156 2157/* 2158 * Unmount all filesystems. The list is traversed in reverse order 2159 * of mounting to avoid dependencies. 2160 */ 2161void 2162vfs_unmountall() 2163{ 2164 struct mount *mp, *nmp; 2165 struct proc *p; 2166 int error; 2167 2168 if (curproc != NULL) 2169 p = curproc; 2170 else 2171 p = initproc; /* XXX XXX should this be proc0? */ 2172 /* 2173 * Since this only runs when rebooting, it is not interlocked. 2174 */ 2175 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2176 nmp = mp->mnt_list.cqe_prev; 2177 error = dounmount(mp, MNT_FORCE, p); 2178 if (error) { 2179 printf("unmount of %s failed (", 2180 mp->mnt_stat.f_mntonname); 2181 if (error == EBUSY) 2182 printf("BUSY)\n"); 2183 else 2184 printf("%d)\n", error); 2185 } 2186 } 2187} 2188 2189/* 2190 * Build hash lists of net addresses and hang them off the mount point. 2191 * Called by ufs_mount() to set up the lists of export addresses. 2192 */ 2193static int 2194vfs_hang_addrlist(mp, nep, argp) 2195 struct mount *mp; 2196 struct netexport *nep; 2197 struct export_args *argp; 2198{ 2199 register struct netcred *np; 2200 register struct radix_node_head *rnh; 2201 register int i; 2202 struct radix_node *rn; 2203 struct sockaddr *saddr, *smask = 0; 2204 struct domain *dom; 2205 int error; 2206 2207 if (argp->ex_addrlen == 0) { 2208 if (mp->mnt_flag & MNT_DEFEXPORTED) 2209 return (EPERM); 2210 np = &nep->ne_defexported; 2211 np->netc_exflags = argp->ex_flags; 2212 np->netc_anon = argp->ex_anon; 2213 np->netc_anon.cr_ref = 1; 2214 mp->mnt_flag |= MNT_DEFEXPORTED; 2215 return (0); 2216 } 2217 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2218 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 2219 bzero((caddr_t) np, i); 2220 saddr = (struct sockaddr *) (np + 1); 2221 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2222 goto out; 2223 if (saddr->sa_len > argp->ex_addrlen) 2224 saddr->sa_len = argp->ex_addrlen; 2225 if (argp->ex_masklen) { 2226 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 2227 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2228 if (error) 2229 goto out; 2230 if (smask->sa_len > argp->ex_masklen) 2231 smask->sa_len = argp->ex_masklen; 2232 } 2233 i = saddr->sa_family; 2234 if ((rnh = nep->ne_rtable[i]) == 0) { 2235 /* 2236 * Seems silly to initialize every AF when most are not used, 2237 * do so on demand here 2238 */ 2239 for (dom = domains; dom; dom = dom->dom_next) 2240 if (dom->dom_family == i && dom->dom_rtattach) { 2241 dom->dom_rtattach((void **) &nep->ne_rtable[i], 2242 dom->dom_rtoffset); 2243 break; 2244 } 2245 if ((rnh = nep->ne_rtable[i]) == 0) { 2246 error = ENOBUFS; 2247 goto out; 2248 } 2249 } 2250 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2251 np->netc_rnodes); 2252 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 2253 error = EPERM; 2254 goto out; 2255 } 2256 np->netc_exflags = argp->ex_flags; 2257 np->netc_anon = argp->ex_anon; 2258 np->netc_anon.cr_ref = 1; 2259 return (0); 2260out: 2261 free(np, M_NETADDR); 2262 return (error); 2263} 2264 2265/* ARGSUSED */ 2266static int 2267vfs_free_netcred(rn, w) 2268 struct radix_node *rn; 2269 void *w; 2270{ 2271 register struct radix_node_head *rnh = (struct radix_node_head *) w; 2272 2273 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2274 free((caddr_t) rn, M_NETADDR); 2275 return (0); 2276} 2277 2278/* 2279 * Free the net address hash lists that are hanging off the mount points. 2280 */ 2281static void 2282vfs_free_addrlist(nep) 2283 struct netexport *nep; 2284{ 2285 register int i; 2286 register struct radix_node_head *rnh; 2287 2288 for (i = 0; i <= AF_MAX; i++) 2289 if ((rnh = nep->ne_rtable[i])) { 2290 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2291 (caddr_t) rnh); 2292 free((caddr_t) rnh, M_RTABLE); 2293 nep->ne_rtable[i] = 0; 2294 } 2295} 2296 2297int 2298vfs_export(mp, nep, argp) 2299 struct mount *mp; 2300 struct netexport *nep; 2301 struct export_args *argp; 2302{ 2303 int error; 2304 2305 if (argp->ex_flags & MNT_DELEXPORT) { 2306 if (mp->mnt_flag & MNT_EXPUBLIC) { 2307 vfs_setpublicfs(NULL, NULL, NULL); 2308 mp->mnt_flag &= ~MNT_EXPUBLIC; 2309 } 2310 vfs_free_addrlist(nep); 2311 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2312 } 2313 if (argp->ex_flags & MNT_EXPORTED) { 2314 if (argp->ex_flags & MNT_EXPUBLIC) { 2315 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2316 return (error); 2317 mp->mnt_flag |= MNT_EXPUBLIC; 2318 } 2319 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2320 return (error); 2321 mp->mnt_flag |= MNT_EXPORTED; 2322 } 2323 return (0); 2324} 2325 2326 2327/* 2328 * Set the publicly exported filesystem (WebNFS). Currently, only 2329 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2330 */ 2331int 2332vfs_setpublicfs(mp, nep, argp) 2333 struct mount *mp; 2334 struct netexport *nep; 2335 struct export_args *argp; 2336{ 2337 int error; 2338 struct vnode *rvp; 2339 char *cp; 2340 2341 /* 2342 * mp == NULL -> invalidate the current info, the FS is 2343 * no longer exported. May be called from either vfs_export 2344 * or unmount, so check if it hasn't already been done. 2345 */ 2346 if (mp == NULL) { 2347 if (nfs_pub.np_valid) { 2348 nfs_pub.np_valid = 0; 2349 if (nfs_pub.np_index != NULL) { 2350 FREE(nfs_pub.np_index, M_TEMP); 2351 nfs_pub.np_index = NULL; 2352 } 2353 } 2354 return (0); 2355 } 2356 2357 /* 2358 * Only one allowed at a time. 2359 */ 2360 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2361 return (EBUSY); 2362 2363 /* 2364 * Get real filehandle for root of exported FS. 2365 */ 2366 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2367 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2368 2369 if ((error = VFS_ROOT(mp, &rvp))) 2370 return (error); 2371 2372 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2373 return (error); 2374 2375 vput(rvp); 2376 2377 /* 2378 * If an indexfile was specified, pull it in. 2379 */ 2380 if (argp->ex_indexfile != NULL) { 2381 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2382 M_WAITOK); 2383 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2384 MAXNAMLEN, (size_t *)0); 2385 if (!error) { 2386 /* 2387 * Check for illegal filenames. 2388 */ 2389 for (cp = nfs_pub.np_index; *cp; cp++) { 2390 if (*cp == '/') { 2391 error = EINVAL; 2392 break; 2393 } 2394 } 2395 } 2396 if (error) { 2397 FREE(nfs_pub.np_index, M_TEMP); 2398 return (error); 2399 } 2400 } 2401 2402 nfs_pub.np_mount = mp; 2403 nfs_pub.np_valid = 1; 2404 return (0); 2405} 2406 2407struct netcred * 2408vfs_export_lookup(mp, nep, nam) 2409 register struct mount *mp; 2410 struct netexport *nep; 2411 struct sockaddr *nam; 2412{ 2413 register struct netcred *np; 2414 register struct radix_node_head *rnh; 2415 struct sockaddr *saddr; 2416 2417 np = NULL; 2418 if (mp->mnt_flag & MNT_EXPORTED) { 2419 /* 2420 * Lookup in the export list first. 2421 */ 2422 if (nam != NULL) { 2423 saddr = nam; 2424 rnh = nep->ne_rtable[saddr->sa_family]; 2425 if (rnh != NULL) { 2426 np = (struct netcred *) 2427 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2428 rnh); 2429 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2430 np = NULL; 2431 } 2432 } 2433 /* 2434 * If no address match, use the default if it exists. 2435 */ 2436 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2437 np = &nep->ne_defexported; 2438 } 2439 return (np); 2440} 2441 2442/* 2443 * perform msync on all vnodes under a mount point 2444 * the mount point must be locked. 2445 */ 2446void 2447vfs_msync(struct mount *mp, int flags) { 2448 struct vnode *vp, *nvp; 2449 struct vm_object *obj; 2450 int anyio, tries; 2451 2452 tries = 5; 2453loop: 2454 anyio = 0; 2455 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2456 2457 nvp = vp->v_mntvnodes.le_next; 2458 2459 if (vp->v_mount != mp) { 2460 goto loop; 2461 } 2462 2463 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ 2464 continue; 2465 2466 if (flags != MNT_WAIT) { 2467 obj = vp->v_object; 2468 if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2469 continue; 2470 if (VOP_ISLOCKED(vp)) 2471 continue; 2472 } 2473 2474 simple_lock(&vp->v_interlock); 2475 if (vp->v_object && 2476 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2477 if (!vget(vp, 2478 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 2479 if (vp->v_object) { 2480 vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : 0); 2481 anyio = 1; 2482 } 2483 vput(vp); 2484 } 2485 } else { 2486 simple_unlock(&vp->v_interlock); 2487 } 2488 } 2489 if (anyio && (--tries > 0)) 2490 goto loop; 2491} 2492 2493/* 2494 * Create the VM object needed for VMIO and mmap support. This 2495 * is done for all VREG files in the system. Some filesystems might 2496 * afford the additional metadata buffering capability of the 2497 * VMIO code by making the device node be VMIO mode also. 2498 * 2499 * If !waslocked, must be called with interlock. 2500 */ 2501int 2502vfs_object_create(vp, p, cred, waslocked) 2503 struct vnode *vp; 2504 struct proc *p; 2505 struct ucred *cred; 2506 int waslocked; 2507{ 2508 struct vattr vat; 2509 vm_object_t object; 2510 int error = 0; 2511 2512 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) { 2513 if (!waslocked) 2514 simple_unlock(&vp->v_interlock); 2515 return 0; 2516 } 2517 2518 if (!waslocked) 2519 vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, p); 2520 2521retry: 2522 if ((object = vp->v_object) == NULL) { 2523 if (vp->v_type == VREG) { 2524 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2525 goto retn; 2526 object = vnode_pager_alloc(vp, vat.va_size, 0, 0); 2527 } else if (major(vp->v_rdev) < nblkdev && 2528 bdevsw[major(vp->v_rdev)] != NULL) { 2529 /* 2530 * This simply allocates the biggest object possible 2531 * for a VBLK vnode. This should be fixed, but doesn't 2532 * cause any problems (yet). 2533 */ 2534 object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); 2535 } 2536 object->ref_count--; 2537 vp->v_usecount--; 2538 } else { 2539 if (object->flags & OBJ_DEAD) { 2540 VOP_UNLOCK(vp, 0, p); 2541 tsleep(object, PVM, "vodead", 0); 2542 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2543 goto retry; 2544 } 2545 } 2546 2547 if (vp->v_object) { 2548 vp->v_flag |= VOBJBUF; 2549 } 2550 2551retn: 2552 if (!waslocked) { 2553 simple_lock(&vp->v_interlock); 2554 VOP_UNLOCK(vp, LK_INTERLOCK, p); 2555 } 2556 2557 return error; 2558} 2559 2560static void 2561vfree(vp) 2562 struct vnode *vp; 2563{ 2564 int s; 2565 2566 s = splbio(); 2567 simple_lock(&vnode_free_list_slock); 2568 if (vp->v_flag & VTBFREE) { 2569 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2570 vp->v_flag &= ~VTBFREE; 2571 } 2572 if (vp->v_flag & VAGE) { 2573 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2574 } else { 2575 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2576 } 2577 freevnodes++; 2578 simple_unlock(&vnode_free_list_slock); 2579 vp->v_flag &= ~VAGE; 2580 vp->v_flag |= VFREE; 2581 splx(s); 2582} 2583 2584void 2585vbusy(vp) 2586 struct vnode *vp; 2587{ 2588 int s; 2589 2590 s = splbio(); 2591 simple_lock(&vnode_free_list_slock); 2592 if (vp->v_flag & VTBFREE) { 2593 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2594 vp->v_flag &= ~VTBFREE; 2595 } else { 2596 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2597 freevnodes--; 2598 } 2599 simple_unlock(&vnode_free_list_slock); 2600 vp->v_flag &= ~(VFREE|VAGE); 2601 splx(s); 2602} 2603 2604/* 2605 * Record a process's interest in events which might happen to 2606 * a vnode. Because poll uses the historic select-style interface 2607 * internally, this routine serves as both the ``check for any 2608 * pending events'' and the ``record my interest in future events'' 2609 * functions. (These are done together, while the lock is held, 2610 * to avoid race conditions.) 2611 */ 2612int 2613vn_pollrecord(vp, p, events) 2614 struct vnode *vp; 2615 struct proc *p; 2616 short events; 2617{ 2618 simple_lock(&vp->v_pollinfo.vpi_lock); 2619 if (vp->v_pollinfo.vpi_revents & events) { 2620 /* 2621 * This leaves events we are not interested 2622 * in available for the other process which 2623 * which presumably had requested them 2624 * (otherwise they would never have been 2625 * recorded). 2626 */ 2627 events &= vp->v_pollinfo.vpi_revents; 2628 vp->v_pollinfo.vpi_revents &= ~events; 2629 2630 simple_unlock(&vp->v_pollinfo.vpi_lock); 2631 return events; 2632 } 2633 vp->v_pollinfo.vpi_events |= events; 2634 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2635 simple_unlock(&vp->v_pollinfo.vpi_lock); 2636 return 0; 2637} 2638 2639/* 2640 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2641 * it is possible for us to miss an event due to race conditions, but 2642 * that condition is expected to be rare, so for the moment it is the 2643 * preferred interface. 2644 */ 2645void 2646vn_pollevent(vp, events) 2647 struct vnode *vp; 2648 short events; 2649{ 2650 simple_lock(&vp->v_pollinfo.vpi_lock); 2651 if (vp->v_pollinfo.vpi_events & events) { 2652 /* 2653 * We clear vpi_events so that we don't 2654 * call selwakeup() twice if two events are 2655 * posted before the polling process(es) is 2656 * awakened. This also ensures that we take at 2657 * most one selwakeup() if the polling process 2658 * is no longer interested. However, it does 2659 * mean that only one event can be noticed at 2660 * a time. (Perhaps we should only clear those 2661 * event bits which we note?) XXX 2662 */ 2663 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 2664 vp->v_pollinfo.vpi_revents |= events; 2665 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2666 } 2667 simple_unlock(&vp->v_pollinfo.vpi_lock); 2668} 2669 2670/* 2671 * Wake up anyone polling on vp because it is being revoked. 2672 * This depends on dead_poll() returning POLLHUP for correct 2673 * behavior. 2674 */ 2675void 2676vn_pollgone(vp) 2677 struct vnode *vp; 2678{ 2679 simple_lock(&vp->v_pollinfo.vpi_lock); 2680 if (vp->v_pollinfo.vpi_events) { 2681 vp->v_pollinfo.vpi_events = 0; 2682 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2683 } 2684 simple_unlock(&vp->v_pollinfo.vpi_lock); 2685} 2686 2687 2688 2689/* 2690 * Routine to create and manage a filesystem syncer vnode. 2691 */ 2692#define sync_close ((int (*) __P((struct vop_close_args *)))nullop) 2693int sync_fsync __P((struct vop_fsync_args *)); 2694int sync_inactive __P((struct vop_inactive_args *)); 2695int sync_reclaim __P((struct vop_reclaim_args *)); 2696#define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) 2697#define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) 2698int sync_print __P((struct vop_print_args *)); 2699#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) 2700 2701static vop_t **sync_vnodeop_p; 2702static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2703 { &vop_default_desc, (vop_t *) vop_eopnotsupp }, 2704 { &vop_close_desc, (vop_t *) sync_close }, /* close */ 2705 { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ 2706 { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ 2707 { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ 2708 { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ 2709 { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ 2710 { &vop_print_desc, (vop_t *) sync_print }, /* print */ 2711 { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ 2712 { NULL, NULL } 2713}; 2714static struct vnodeopv_desc sync_vnodeop_opv_desc = 2715 { &sync_vnodeop_p, sync_vnodeop_entries }; 2716 2717VNODEOP_SET(sync_vnodeop_opv_desc); 2718 2719/* 2720 * Create a new filesystem syncer vnode for the specified mount point. 2721 */ 2722int 2723vfs_allocate_syncvnode(mp) 2724 struct mount *mp; 2725{ 2726 struct vnode *vp; 2727 static long start, incr, next; 2728 int error; 2729 2730 /* Allocate a new vnode */ 2731 if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2732 mp->mnt_syncer = NULL; 2733 return (error); 2734 } 2735 vp->v_type = VNON; 2736 /* 2737 * Place the vnode onto the syncer worklist. We attempt to 2738 * scatter them about on the list so that they will go off 2739 * at evenly distributed times even if all the filesystems 2740 * are mounted at once. 2741 */ 2742 next += incr; 2743 if (next == 0 || next > syncer_maxdelay) { 2744 start /= 2; 2745 incr /= 2; 2746 if (start == 0) { 2747 start = syncer_maxdelay / 2; 2748 incr = syncer_maxdelay; 2749 } 2750 next = start; 2751 } 2752 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2753 mp->mnt_syncer = vp; 2754 return (0); 2755} 2756 2757/* 2758 * Do a lazy sync of the filesystem. 2759 */ 2760static int 2761sync_fsync(ap) 2762 struct vop_fsync_args /* { 2763 struct vnode *a_vp; 2764 struct ucred *a_cred; 2765 int a_waitfor; 2766 struct proc *a_p; 2767 } */ *ap; 2768{ 2769 struct vnode *syncvp = ap->a_vp; 2770 struct mount *mp = syncvp->v_mount; 2771 struct proc *p = ap->a_p; 2772 int asyncflag; 2773 2774 /* 2775 * We only need to do something if this is a lazy evaluation. 2776 */ 2777 if (ap->a_waitfor != MNT_LAZY) 2778 return (0); 2779 2780 /* 2781 * Move ourselves to the back of the sync list. 2782 */ 2783 vn_syncer_add_to_worklist(syncvp, syncdelay); 2784 2785 /* 2786 * Walk the list of vnodes pushing all that are dirty and 2787 * not already on the sync list. 2788 */ 2789 simple_lock(&mountlist_slock); 2790 if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) { 2791 simple_unlock(&mountlist_slock); 2792 return (0); 2793 } 2794 asyncflag = mp->mnt_flag & MNT_ASYNC; 2795 mp->mnt_flag &= ~MNT_ASYNC; 2796 vfs_msync(mp, MNT_NOWAIT); 2797 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2798 if (asyncflag) 2799 mp->mnt_flag |= MNT_ASYNC; 2800 vfs_unbusy(mp, p); 2801 return (0); 2802} 2803 2804/* 2805 * The syncer vnode is no referenced. 2806 */ 2807static int 2808sync_inactive(ap) 2809 struct vop_inactive_args /* { 2810 struct vnode *a_vp; 2811 struct proc *a_p; 2812 } */ *ap; 2813{ 2814 2815 vgone(ap->a_vp); 2816 return (0); 2817} 2818 2819/* 2820 * The syncer vnode is no longer needed and is being decommissioned. 2821 */ 2822static int 2823sync_reclaim(ap) 2824 struct vop_reclaim_args /* { 2825 struct vnode *a_vp; 2826 } */ *ap; 2827{ 2828 struct vnode *vp = ap->a_vp; 2829 2830 vp->v_mount->mnt_syncer = NULL; 2831 if (vp->v_flag & VONWORKLST) { 2832 LIST_REMOVE(vp, v_synclist); 2833 vp->v_flag &= ~VONWORKLST; 2834 } 2835 2836 return (0); 2837} 2838 2839/* 2840 * Print out a syncer vnode. 2841 */ 2842static int 2843sync_print(ap) 2844 struct vop_print_args /* { 2845 struct vnode *a_vp; 2846 } */ *ap; 2847{ 2848 struct vnode *vp = ap->a_vp; 2849 2850 printf("syncer vnode"); 2851 if (vp->v_vnlock != NULL) 2852 lockmgr_printinfo(vp->v_vnlock); 2853 printf("\n"); 2854 return (0); 2855} 2856