vfs_export.c revision 47640
1272343Sngie/* 2272343Sngie * Copyright (c) 1989, 1993 3272343Sngie * The Regents of the University of California. All rights reserved. 4272343Sngie * (c) UNIX System Laboratories, Inc. 5272343Sngie * All or some portions of this file are derived from material licensed 6272343Sngie * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39 * $Id: vfs_subr.c,v 1.199 1999/05/24 00:34:10 jb Exp $ 40 */ 41 42/* 43 * External virtual filesystem routines 44 */ 45#include "opt_ddb.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/conf.h> 50#include <sys/fcntl.h> 51#include <sys/kernel.h> 52#include <sys/proc.h> 53#include <sys/malloc.h> 54#include <sys/mount.h> 55#include <sys/socket.h> 56#include <sys/vnode.h> 57#include <sys/stat.h> 58#include <sys/buf.h> 59#include <sys/domain.h> 60#include <sys/dirent.h> 61#include <sys/vmmeter.h> 62 63#include <machine/limits.h> 64 65#include <vm/vm.h> 66#include <vm/vm_param.h> 67#include <vm/vm_prot.h> 68#include <vm/vm_object.h> 69#include <vm/vm_extern.h> 70#include <vm/pmap.h> 71#include <vm/vm_map.h> 72#include <vm/vm_page.h> 73#include <vm/vm_pager.h> 74#include <vm/vnode_pager.h> 75#include <vm/vm_zone.h> 76#include <sys/sysctl.h> 77 78#include <miscfs/specfs/specdev.h> 79 80static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 81 82static void insmntque __P((struct vnode *vp, struct mount *mp)); 83static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 84static void vfree __P((struct vnode *)); 85static void vgonel __P((struct vnode *vp, struct proc *p)); 86static unsigned long numvnodes; 87SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 88 89enum vtype iftovt_tab[16] = { 90 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 91 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 92}; 93int vttoif_tab[9] = { 94 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 95 S_IFSOCK, S_IFIFO, S_IFMT, 96}; 97 98static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 99struct tobefreelist vnode_tobefree_list; /* vnode free list */ 100 101static u_long wantfreevnodes = 25; 102SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 103static u_long freevnodes = 0; 104SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 105 106int vfs_ioopt = 0; 107#ifdef ENABLE_VFS_IOOPT 108SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 109#endif 110 111struct mntlist mountlist; /* mounted filesystem list */ 112struct simplelock mountlist_slock; 113struct simplelock mntvnode_slock; 114int nfs_mount_type = -1; 115#ifndef NULL_SIMPLELOCKS 116static struct simplelock mntid_slock; 117static struct simplelock vnode_free_list_slock; 118static struct simplelock spechash_slock; 119#endif 120struct nfs_public nfs_pub; /* publicly exported FS */ 121static vm_zone_t vnode_zone; 122 123/* 124 * The workitem queue. 125 */ 126#define SYNCER_MAXDELAY 32 127static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ 128time_t syncdelay = 30; 129int rushjob; /* number of slots to run ASAP */ 130 131static int syncer_delayno = 0; 132static long syncer_mask; 133LIST_HEAD(synclist, vnode); 134static struct synclist *syncer_workitem_pending; 135 136int desiredvnodes; 137SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 138 &desiredvnodes, 0, "Maximum number of vnodes"); 139 140static void vfs_free_addrlist __P((struct netexport *nep)); 141static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 142static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 143 struct export_args *argp)); 144 145/* 146 * Initialize the vnode management data structures. 147 */ 148void 149vntblinit() 150{ 151 152 desiredvnodes = maxproc + cnt.v_page_count / 4; 153 simple_lock_init(&mntvnode_slock); 154 simple_lock_init(&mntid_slock); 155 simple_lock_init(&spechash_slock); 156 TAILQ_INIT(&vnode_free_list); 157 TAILQ_INIT(&vnode_tobefree_list); 158 simple_lock_init(&vnode_free_list_slock); 159 CIRCLEQ_INIT(&mountlist); 160 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 161 /* 162 * Initialize the filesystem syncer. 163 */ 164 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 165 &syncer_mask); 166 syncer_maxdelay = syncer_mask + 1; 167} 168 169/* 170 * Mark a mount point as busy. Used to synchronize access and to delay 171 * unmounting. Interlock is not released on failure. 172 */ 173int 174vfs_busy(mp, flags, interlkp, p) 175 struct mount *mp; 176 int flags; 177 struct simplelock *interlkp; 178 struct proc *p; 179{ 180 int lkflags; 181 182 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 183 if (flags & LK_NOWAIT) 184 return (ENOENT); 185 mp->mnt_kern_flag |= MNTK_MWAIT; 186 if (interlkp) { 187 simple_unlock(interlkp); 188 } 189 /* 190 * Since all busy locks are shared except the exclusive 191 * lock granted when unmounting, the only place that a 192 * wakeup needs to be done is at the release of the 193 * exclusive lock at the end of dounmount. 194 */ 195 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 196 if (interlkp) { 197 simple_lock(interlkp); 198 } 199 return (ENOENT); 200 } 201 lkflags = LK_SHARED | LK_NOPAUSE; 202 if (interlkp) 203 lkflags |= LK_INTERLOCK; 204 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 205 panic("vfs_busy: unexpected lock failure"); 206 return (0); 207} 208 209/* 210 * Free a busy filesystem. 211 */ 212void 213vfs_unbusy(mp, p) 214 struct mount *mp; 215 struct proc *p; 216{ 217 218 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 219} 220 221/* 222 * Lookup a filesystem type, and if found allocate and initialize 223 * a mount structure for it. 224 * 225 * Devname is usually updated by mount(8) after booting. 226 */ 227int 228vfs_rootmountalloc(fstypename, devname, mpp) 229 char *fstypename; 230 char *devname; 231 struct mount **mpp; 232{ 233 struct proc *p = curproc; /* XXX */ 234 struct vfsconf *vfsp; 235 struct mount *mp; 236 237 if (fstypename == NULL) 238 return (ENODEV); 239 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 240 if (!strcmp(vfsp->vfc_name, fstypename)) 241 break; 242 if (vfsp == NULL) 243 return (ENODEV); 244 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 245 bzero((char *)mp, (u_long)sizeof(struct mount)); 246 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 247 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 248 LIST_INIT(&mp->mnt_vnodelist); 249 mp->mnt_vfc = vfsp; 250 mp->mnt_op = vfsp->vfc_vfsops; 251 mp->mnt_flag = MNT_RDONLY; 252 mp->mnt_vnodecovered = NULLVP; 253 vfsp->vfc_refcount++; 254 mp->mnt_stat.f_type = vfsp->vfc_typenum; 255 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 256 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 257 mp->mnt_stat.f_mntonname[0] = '/'; 258 mp->mnt_stat.f_mntonname[1] = 0; 259 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 260 *mpp = mp; 261 return (0); 262} 263 264/* 265 * Find an appropriate filesystem to use for the root. If a filesystem 266 * has not been preselected, walk through the list of known filesystems 267 * trying those that have mountroot routines, and try them until one 268 * works or we have tried them all. 269 */ 270#ifdef notdef /* XXX JH */ 271int 272lite2_vfs_mountroot() 273{ 274 struct vfsconf *vfsp; 275 extern int (*lite2_mountroot) __P((void)); 276 int error; 277 278 if (lite2_mountroot != NULL) 279 return ((*lite2_mountroot)()); 280 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 281 if (vfsp->vfc_mountroot == NULL) 282 continue; 283 if ((error = (*vfsp->vfc_mountroot)()) == 0) 284 return (0); 285 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 286 } 287 return (ENODEV); 288} 289#endif 290 291/* 292 * Lookup a mount point by filesystem identifier. 293 */ 294struct mount * 295vfs_getvfs(fsid) 296 fsid_t *fsid; 297{ 298 register struct mount *mp; 299 300 simple_lock(&mountlist_slock); 301 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 302 mp = mp->mnt_list.cqe_next) { 303 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 304 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 305 simple_unlock(&mountlist_slock); 306 return (mp); 307 } 308 } 309 simple_unlock(&mountlist_slock); 310 return ((struct mount *) 0); 311} 312 313/* 314 * Get a new unique fsid 315 */ 316void 317vfs_getnewfsid(mp) 318 struct mount *mp; 319{ 320 static u_short xxxfs_mntid; 321 322 fsid_t tfsid; 323 int mtype; 324 325 simple_lock(&mntid_slock); 326 mtype = mp->mnt_vfc->vfc_typenum; 327 mp->mnt_stat.f_fsid.val[0] = (256 + mtype) * 256; 328 mp->mnt_stat.f_fsid.val[1] = mtype; 329 if (xxxfs_mntid == 0) 330 ++xxxfs_mntid; 331 tfsid.val[0] = (256 + mtype) * 256 | xxxfs_mntid; 332 tfsid.val[1] = mtype; 333 if (mountlist.cqh_first != (void *)&mountlist) { 334 while (vfs_getvfs(&tfsid)) { 335 tfsid.val[0]++; 336 xxxfs_mntid++; 337 } 338 } 339 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 340 simple_unlock(&mntid_slock); 341} 342 343/* 344 * Set vnode attributes to VNOVAL 345 */ 346void 347vattr_null(vap) 348 register struct vattr *vap; 349{ 350 351 vap->va_type = VNON; 352 vap->va_size = VNOVAL; 353 vap->va_bytes = VNOVAL; 354 vap->va_mode = VNOVAL; 355 vap->va_nlink = VNOVAL; 356 vap->va_uid = VNOVAL; 357 vap->va_gid = VNOVAL; 358 vap->va_fsid = VNOVAL; 359 vap->va_fileid = VNOVAL; 360 vap->va_blocksize = VNOVAL; 361 vap->va_rdev = VNOVAL; 362 vap->va_atime.tv_sec = VNOVAL; 363 vap->va_atime.tv_nsec = VNOVAL; 364 vap->va_mtime.tv_sec = VNOVAL; 365 vap->va_mtime.tv_nsec = VNOVAL; 366 vap->va_ctime.tv_sec = VNOVAL; 367 vap->va_ctime.tv_nsec = VNOVAL; 368 vap->va_flags = VNOVAL; 369 vap->va_gen = VNOVAL; 370 vap->va_vaflags = 0; 371} 372 373/* 374 * Routines having to do with the management of the vnode table. 375 */ 376extern vop_t **dead_vnodeop_p; 377 378/* 379 * Return the next vnode from the free list. 380 */ 381int 382getnewvnode(tag, mp, vops, vpp) 383 enum vtagtype tag; 384 struct mount *mp; 385 vop_t **vops; 386 struct vnode **vpp; 387{ 388 int s; 389 struct proc *p = curproc; /* XXX */ 390 struct vnode *vp, *tvp, *nvp; 391 vm_object_t object; 392 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 393 394 /* 395 * We take the least recently used vnode from the freelist 396 * if we can get it and it has no cached pages, and no 397 * namecache entries are relative to it. 398 * Otherwise we allocate a new vnode 399 */ 400 401 s = splbio(); 402 simple_lock(&vnode_free_list_slock); 403 TAILQ_INIT(&vnode_tmp_list); 404 405 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 406 nvp = TAILQ_NEXT(vp, v_freelist); 407 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 408 if (vp->v_flag & VAGE) { 409 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 410 } else { 411 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 412 } 413 vp->v_flag &= ~(VTBFREE|VAGE); 414 vp->v_flag |= VFREE; 415 if (vp->v_usecount) 416 panic("tobe free vnode isn't"); 417 freevnodes++; 418 } 419 420 if (wantfreevnodes && freevnodes < wantfreevnodes) { 421 vp = NULL; 422 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 423 /* 424 * XXX: this is only here to be backwards compatible 425 */ 426 vp = NULL; 427 } else { 428 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 429 nvp = TAILQ_NEXT(vp, v_freelist); 430 if (!simple_lock_try(&vp->v_interlock)) 431 continue; 432 if (vp->v_usecount) 433 panic("free vnode isn't"); 434 435 object = vp->v_object; 436 if (object && (object->resident_page_count || object->ref_count)) { 437 printf("object inconsistant state: RPC: %d, RC: %d\n", 438 object->resident_page_count, object->ref_count); 439 /* Don't recycle if it's caching some pages */ 440 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 441 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 442 continue; 443 } else if (LIST_FIRST(&vp->v_cache_src)) { 444 /* Don't recycle if active in the namecache */ 445 simple_unlock(&vp->v_interlock); 446 continue; 447 } else { 448 break; 449 } 450 } 451 } 452 453 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 454 nvp = TAILQ_NEXT(tvp, v_freelist); 455 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 456 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 457 simple_unlock(&tvp->v_interlock); 458 } 459 460 if (vp) { 461 vp->v_flag |= VDOOMED; 462 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 463 freevnodes--; 464 simple_unlock(&vnode_free_list_slock); 465 cache_purge(vp); 466 vp->v_lease = NULL; 467 if (vp->v_type != VBAD) { 468 vgonel(vp, p); 469 } else { 470 simple_unlock(&vp->v_interlock); 471 } 472 473#ifdef INVARIANTS 474 { 475 int s; 476 477 if (vp->v_data) 478 panic("cleaned vnode isn't"); 479 s = splbio(); 480 if (vp->v_numoutput) 481 panic("Clean vnode has pending I/O's"); 482 splx(s); 483 } 484#endif 485 vp->v_flag = 0; 486 vp->v_lastr = 0; 487 vp->v_lastw = 0; 488 vp->v_lasta = 0; 489 vp->v_cstart = 0; 490 vp->v_clen = 0; 491 vp->v_socket = 0; 492 vp->v_writecount = 0; /* XXX */ 493 vp->v_maxio = 0; 494 } else { 495 simple_unlock(&vnode_free_list_slock); 496 vp = (struct vnode *) zalloc(vnode_zone); 497 bzero((char *) vp, sizeof *vp); 498 simple_lock_init(&vp->v_interlock); 499 vp->v_dd = vp; 500 cache_purge(vp); 501 LIST_INIT(&vp->v_cache_src); 502 TAILQ_INIT(&vp->v_cache_dst); 503 numvnodes++; 504 } 505 506 TAILQ_INIT(&vp->v_cleanblkhd); 507 TAILQ_INIT(&vp->v_dirtyblkhd); 508 vp->v_type = VNON; 509 vp->v_tag = tag; 510 vp->v_op = vops; 511 insmntque(vp, mp); 512 *vpp = vp; 513 vp->v_usecount = 1; 514 vp->v_data = 0; 515 splx(s); 516 517 vfs_object_create(vp, p, p->p_ucred); 518 return (0); 519} 520 521/* 522 * Move a vnode from one mount queue to another. 523 */ 524static void 525insmntque(vp, mp) 526 register struct vnode *vp; 527 register struct mount *mp; 528{ 529 530 simple_lock(&mntvnode_slock); 531 /* 532 * Delete from old mount point vnode list, if on one. 533 */ 534 if (vp->v_mount != NULL) 535 LIST_REMOVE(vp, v_mntvnodes); 536 /* 537 * Insert into list of vnodes for the new mount point, if available. 538 */ 539 if ((vp->v_mount = mp) == NULL) { 540 simple_unlock(&mntvnode_slock); 541 return; 542 } 543 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 544 simple_unlock(&mntvnode_slock); 545} 546 547/* 548 * Update outstanding I/O count and do wakeup if requested. 549 */ 550void 551vwakeup(bp) 552 register struct buf *bp; 553{ 554 register struct vnode *vp; 555 556 bp->b_flags &= ~B_WRITEINPROG; 557 if ((vp = bp->b_vp)) { 558 vp->v_numoutput--; 559 if (vp->v_numoutput < 0) 560 panic("vwakeup: neg numoutput"); 561 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 562 vp->v_flag &= ~VBWAIT; 563 wakeup((caddr_t) &vp->v_numoutput); 564 } 565 } 566} 567 568/* 569 * Flush out and invalidate all buffers associated with a vnode. 570 * Called with the underlying object locked. 571 */ 572int 573vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 574 register struct vnode *vp; 575 int flags; 576 struct ucred *cred; 577 struct proc *p; 578 int slpflag, slptimeo; 579{ 580 register struct buf *bp; 581 struct buf *nbp, *blist; 582 int s, error; 583 vm_object_t object; 584 585 if (flags & V_SAVE) { 586 s = splbio(); 587 while (vp->v_numoutput) { 588 vp->v_flag |= VBWAIT; 589 error = tsleep((caddr_t)&vp->v_numoutput, 590 slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo); 591 if (error) { 592 splx(s); 593 return (error); 594 } 595 } 596 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 597 splx(s); 598 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 599 return (error); 600 s = splbio(); 601 if (vp->v_numoutput > 0 || 602 !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 603 panic("vinvalbuf: dirty bufs"); 604 } 605 splx(s); 606 } 607 s = splbio(); 608 for (;;) { 609 blist = TAILQ_FIRST(&vp->v_cleanblkhd); 610 if (!blist) 611 blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 612 if (!blist) 613 break; 614 615 for (bp = blist; bp; bp = nbp) { 616 nbp = TAILQ_NEXT(bp, b_vnbufs); 617 if (bp->b_flags & B_BUSY) { 618 bp->b_flags |= B_WANTED; 619 error = tsleep((caddr_t) bp, 620 slpflag | (PRIBIO + 4), "vinvalbuf", 621 slptimeo); 622 if (error) { 623 splx(s); 624 return (error); 625 } 626 break; 627 } 628 /* 629 * XXX Since there are no node locks for NFS, I 630 * believe there is a slight chance that a delayed 631 * write will occur while sleeping just above, so 632 * check for it. Note that vfs_bio_awrite expects 633 * buffers to reside on a queue, while VOP_BWRITE and 634 * brelse do not. 635 */ 636 if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 637 (flags & V_SAVE)) { 638 639 if (bp->b_vp == vp) { 640 if (bp->b_flags & B_CLUSTEROK) { 641 vfs_bio_awrite(bp); 642 } else { 643 bremfree(bp); 644 bp->b_flags |= (B_BUSY | B_ASYNC); 645 VOP_BWRITE(bp); 646 } 647 } else { 648 bremfree(bp); 649 bp->b_flags |= B_BUSY; 650 (void) VOP_BWRITE(bp); 651 } 652 break; 653 } 654 bremfree(bp); 655 bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF | B_BUSY); 656 bp->b_flags &= ~B_ASYNC; 657 brelse(bp); 658 } 659 } 660 661 while (vp->v_numoutput > 0) { 662 vp->v_flag |= VBWAIT; 663 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 664 } 665 666 splx(s); 667 668 /* 669 * Destroy the copy in the VM cache, too. 670 */ 671 simple_lock(&vp->v_interlock); 672 object = vp->v_object; 673 if (object != NULL) { 674 vm_object_page_remove(object, 0, 0, 675 (flags & V_SAVE) ? TRUE : FALSE); 676 } 677 simple_unlock(&vp->v_interlock); 678 679 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)) 680 panic("vinvalbuf: flush failed"); 681 return (0); 682} 683 684/* 685 * Truncate a file's buffer and pages to a specified length. This 686 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 687 * sync activity. 688 */ 689int 690vtruncbuf(vp, cred, p, length, blksize) 691 register struct vnode *vp; 692 struct ucred *cred; 693 struct proc *p; 694 off_t length; 695 int blksize; 696{ 697 register struct buf *bp; 698 struct buf *nbp; 699 int s, anyfreed; 700 int trunclbn; 701 702 /* 703 * Round up to the *next* lbn. 704 */ 705 trunclbn = (length + blksize - 1) / blksize; 706 707 s = splbio(); 708restart: 709 anyfreed = 1; 710 for (;anyfreed;) { 711 anyfreed = 0; 712 for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 713 nbp = TAILQ_NEXT(bp, b_vnbufs); 714 if (bp->b_lblkno >= trunclbn) { 715 if (bp->b_flags & B_BUSY) { 716 bp->b_flags |= B_WANTED; 717 tsleep(bp, PRIBIO + 4, "vtrb1", 0); 718 goto restart; 719 } else { 720 bremfree(bp); 721 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 722 bp->b_flags &= ~B_ASYNC; 723 brelse(bp); 724 anyfreed = 1; 725 } 726 if (nbp && (((nbp->b_xflags & B_VNCLEAN) == 0)|| 727 (nbp->b_vp != vp) || 728 (nbp->b_flags & B_DELWRI))) { 729 goto restart; 730 } 731 } 732 } 733 734 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 735 nbp = TAILQ_NEXT(bp, b_vnbufs); 736 if (bp->b_lblkno >= trunclbn) { 737 if (bp->b_flags & B_BUSY) { 738 bp->b_flags |= B_WANTED; 739 tsleep(bp, PRIBIO + 4, "vtrb2", 0); 740 goto restart; 741 } else { 742 bremfree(bp); 743 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 744 bp->b_flags &= ~B_ASYNC; 745 brelse(bp); 746 anyfreed = 1; 747 } 748 if (nbp && (((nbp->b_xflags & B_VNDIRTY) == 0)|| 749 (nbp->b_vp != vp) || 750 (nbp->b_flags & B_DELWRI) == 0)) { 751 goto restart; 752 } 753 } 754 } 755 } 756 757 if (length > 0) { 758restartsync: 759 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 760 nbp = TAILQ_NEXT(bp, b_vnbufs); 761 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 762 if (bp->b_flags & B_BUSY) { 763 bp->b_flags |= B_WANTED; 764 tsleep(bp, PRIBIO, "vtrb3", 0); 765 } else { 766 bremfree(bp); 767 bp->b_flags |= B_BUSY; 768 if (bp->b_vp == vp) { 769 bp->b_flags |= B_ASYNC; 770 } else { 771 bp->b_flags &= ~B_ASYNC; 772 } 773 VOP_BWRITE(bp); 774 } 775 goto restartsync; 776 } 777 778 } 779 } 780 781 while (vp->v_numoutput > 0) { 782 vp->v_flag |= VBWAIT; 783 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 784 } 785 786 splx(s); 787 788 vnode_pager_setsize(vp, length); 789 790 return (0); 791} 792 793/* 794 * Associate a buffer with a vnode. 795 */ 796void 797bgetvp(vp, bp) 798 register struct vnode *vp; 799 register struct buf *bp; 800{ 801 int s; 802 803 KASSERT(bp->b_vp == NULL, ("bgetvp: not free")); 804 805 vhold(vp); 806 bp->b_vp = vp; 807 if (vp->v_type == VBLK || vp->v_type == VCHR) 808 bp->b_dev = vp->v_rdev; 809 else 810 bp->b_dev = NODEV; 811 /* 812 * Insert onto list for new vnode. 813 */ 814 s = splbio(); 815 bp->b_xflags |= B_VNCLEAN; 816 bp->b_xflags &= ~B_VNDIRTY; 817 TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 818 splx(s); 819} 820 821/* 822 * Disassociate a buffer from a vnode. 823 */ 824void 825brelvp(bp) 826 register struct buf *bp; 827{ 828 struct vnode *vp; 829 struct buflists *listheadp; 830 int s; 831 832 KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); 833 834 /* 835 * Delete from old vnode list, if on one. 836 */ 837 vp = bp->b_vp; 838 s = splbio(); 839 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 840 if (bp->b_xflags & B_VNDIRTY) 841 listheadp = &vp->v_dirtyblkhd; 842 else 843 listheadp = &vp->v_cleanblkhd; 844 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 845 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 846 } 847 if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 848 vp->v_flag &= ~VONWORKLST; 849 LIST_REMOVE(vp, v_synclist); 850 } 851 splx(s); 852 bp->b_vp = (struct vnode *) 0; 853 vdrop(vp); 854} 855 856/* 857 * The workitem queue. 858 * 859 * It is useful to delay writes of file data and filesystem metadata 860 * for tens of seconds so that quickly created and deleted files need 861 * not waste disk bandwidth being created and removed. To realize this, 862 * we append vnodes to a "workitem" queue. When running with a soft 863 * updates implementation, most pending metadata dependencies should 864 * not wait for more than a few seconds. Thus, mounted on block devices 865 * are delayed only about a half the time that file data is delayed. 866 * Similarly, directory updates are more critical, so are only delayed 867 * about a third the time that file data is delayed. Thus, there are 868 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 869 * one each second (driven off the filesystem syner process). The 870 * syncer_delayno variable indicates the next queue that is to be processed. 871 * Items that need to be processed soon are placed in this queue: 872 * 873 * syncer_workitem_pending[syncer_delayno] 874 * 875 * A delay of fifteen seconds is done by placing the request fifteen 876 * entries later in the queue: 877 * 878 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 879 * 880 */ 881 882/* 883 * Add an item to the syncer work queue. 884 */ 885static void 886vn_syncer_add_to_worklist(struct vnode *vp, int delay) 887{ 888 int s, slot; 889 890 s = splbio(); 891 892 if (vp->v_flag & VONWORKLST) { 893 LIST_REMOVE(vp, v_synclist); 894 } 895 896 if (delay > syncer_maxdelay - 2) 897 delay = syncer_maxdelay - 2; 898 slot = (syncer_delayno + delay) & syncer_mask; 899 900 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 901 vp->v_flag |= VONWORKLST; 902 splx(s); 903} 904 905struct proc *updateproc; 906static void sched_sync __P((void)); 907static const struct kproc_desc up_kp = { 908 "syncer", 909 sched_sync, 910 &updateproc 911}; 912SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 913 914/* 915 * System filesystem synchronizer daemon. 916 */ 917void 918sched_sync(void) 919{ 920 struct synclist *slp; 921 struct vnode *vp; 922 long starttime; 923 int s; 924 struct proc *p = updateproc; 925 926 for (;;) { 927 starttime = time_second; 928 929 /* 930 * Push files whose dirty time has expired. Be careful 931 * of interrupt race on slp queue. 932 */ 933 s = splbio(); 934 slp = &syncer_workitem_pending[syncer_delayno]; 935 syncer_delayno += 1; 936 if (syncer_delayno == syncer_maxdelay) 937 syncer_delayno = 0; 938 splx(s); 939 940 while ((vp = LIST_FIRST(slp)) != NULL) { 941 if (VOP_ISLOCKED(vp) == 0) { 942 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 943 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 944 VOP_UNLOCK(vp, 0, p); 945 } 946 s = splbio(); 947 if (LIST_FIRST(slp) == vp) { 948 /* 949 * Note: v_tag VT_VFS vps can remain on the 950 * worklist too with no dirty blocks, but 951 * since sync_fsync() moves it to a different 952 * slot we are safe. 953 */ 954 if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 955 vp->v_type != VBLK) 956 panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag); 957 /* 958 * Put us back on the worklist. The worklist 959 * routine will remove us from our current 960 * position and then add us back in at a later 961 * position. 962 */ 963 vn_syncer_add_to_worklist(vp, syncdelay); 964 } 965 splx(s); 966 } 967 968 /* 969 * Do soft update processing. 970 */ 971 if (bioops.io_sync) 972 (*bioops.io_sync)(NULL); 973 974 /* 975 * The variable rushjob allows the kernel to speed up the 976 * processing of the filesystem syncer process. A rushjob 977 * value of N tells the filesystem syncer to process the next 978 * N seconds worth of work on its queue ASAP. Currently rushjob 979 * is used by the soft update code to speed up the filesystem 980 * syncer process when the incore state is getting so far 981 * ahead of the disk that the kernel memory pool is being 982 * threatened with exhaustion. 983 */ 984 if (rushjob > 0) { 985 rushjob -= 1; 986 continue; 987 } 988 /* 989 * If it has taken us less than a second to process the 990 * current work, then wait. Otherwise start right over 991 * again. We can still lose time if any single round 992 * takes more than two seconds, but it does not really 993 * matter as we are just trying to generally pace the 994 * filesystem activity. 995 */ 996 if (time_second == starttime) 997 tsleep(&lbolt, PPAUSE, "syncer", 0); 998 } 999} 1000 1001/* 1002 * Associate a p-buffer with a vnode. 1003 * 1004 * Also sets B_PAGING flag to indicate that vnode is not fully associated 1005 * with the buffer. i.e. the bp has not been linked into the vnode or 1006 * ref-counted. 1007 */ 1008void 1009pbgetvp(vp, bp) 1010 register struct vnode *vp; 1011 register struct buf *bp; 1012{ 1013 1014 KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); 1015 1016 bp->b_vp = vp; 1017 bp->b_flags |= B_PAGING; 1018 if (vp->v_type == VBLK || vp->v_type == VCHR) 1019 bp->b_dev = vp->v_rdev; 1020 else 1021 bp->b_dev = NODEV; 1022} 1023 1024/* 1025 * Disassociate a p-buffer from a vnode. 1026 */ 1027void 1028pbrelvp(bp) 1029 register struct buf *bp; 1030{ 1031 1032 KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL")); 1033 1034#if !defined(MAX_PERF) 1035 /* XXX REMOVE ME */ 1036 if (bp->b_vnbufs.tqe_next != NULL) { 1037 panic( 1038 "relpbuf(): b_vp was probably reassignbuf()d %p %x", 1039 bp, 1040 (int)bp->b_flags 1041 ); 1042 } 1043#endif 1044 bp->b_vp = (struct vnode *) 0; 1045 bp->b_flags &= ~B_PAGING; 1046} 1047 1048void 1049pbreassignbuf(bp, newvp) 1050 struct buf *bp; 1051 struct vnode *newvp; 1052{ 1053#if !defined(MAX_PERF) 1054 if ((bp->b_flags & B_PAGING) == 0) { 1055 panic( 1056 "pbreassignbuf() on non phys bp %p", 1057 bp 1058 ); 1059 } 1060#endif 1061 bp->b_vp = newvp; 1062} 1063 1064/* 1065 * Reassign a buffer from one vnode to another. 1066 * Used to assign file specific control information 1067 * (indirect blocks) to the vnode to which they belong. 1068 */ 1069void 1070reassignbuf(bp, newvp) 1071 register struct buf *bp; 1072 register struct vnode *newvp; 1073{ 1074 struct buflists *listheadp; 1075 int delay; 1076 int s; 1077 1078 if (newvp == NULL) { 1079 printf("reassignbuf: NULL"); 1080 return; 1081 } 1082 1083#if !defined(MAX_PERF) 1084 /* 1085 * B_PAGING flagged buffers cannot be reassigned because their vp 1086 * is not fully linked in. 1087 */ 1088 if (bp->b_flags & B_PAGING) 1089 panic("cannot reassign paging buffer"); 1090#endif 1091 1092 s = splbio(); 1093 /* 1094 * Delete from old vnode list, if on one. 1095 */ 1096 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 1097 if (bp->b_xflags & B_VNDIRTY) 1098 listheadp = &bp->b_vp->v_dirtyblkhd; 1099 else 1100 listheadp = &bp->b_vp->v_cleanblkhd; 1101 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 1102 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 1103 if (bp->b_vp != newvp) { 1104 vdrop(bp->b_vp); 1105 bp->b_vp = NULL; /* for clarification */ 1106 } 1107 } 1108 /* 1109 * If dirty, put on list of dirty buffers; otherwise insert onto list 1110 * of clean buffers. 1111 */ 1112 if (bp->b_flags & B_DELWRI) { 1113 struct buf *tbp; 1114 1115 listheadp = &newvp->v_dirtyblkhd; 1116 if ((newvp->v_flag & VONWORKLST) == 0) { 1117 switch (newvp->v_type) { 1118 case VDIR: 1119 delay = syncdelay / 2; 1120 break; 1121 case VBLK: 1122 if (newvp->v_specmountpoint != NULL) { 1123 delay = syncdelay / 3; 1124 break; 1125 } 1126 /* fall through */ 1127 default: 1128 delay = syncdelay; 1129 } 1130 vn_syncer_add_to_worklist(newvp, delay); 1131 } 1132 bp->b_xflags |= B_VNDIRTY; 1133 tbp = TAILQ_FIRST(listheadp); 1134 if (tbp == NULL || 1135 (bp->b_lblkno >= 0 && tbp->b_lblkno > bp->b_lblkno)) { 1136 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1137 } else { 1138 if (bp->b_lblkno >= 0) { 1139 struct buf *ttbp; 1140 while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 1141 (ttbp->b_lblkno < bp->b_lblkno)) { 1142 tbp = ttbp; 1143 } 1144 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1145 } else { 1146 TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 1147 } 1148 } 1149 } else { 1150 bp->b_xflags |= B_VNCLEAN; 1151 TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1152 if ((newvp->v_flag & VONWORKLST) && 1153 TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 1154 newvp->v_flag &= ~VONWORKLST; 1155 LIST_REMOVE(newvp, v_synclist); 1156 } 1157 } 1158 if (bp->b_vp != newvp) { 1159 bp->b_vp = newvp; 1160 vhold(bp->b_vp); 1161 } 1162 splx(s); 1163} 1164 1165/* 1166 * Create a vnode for a block device. 1167 * Used for mounting the root file system. 1168 */ 1169int 1170bdevvp(dev, vpp) 1171 dev_t dev; 1172 struct vnode **vpp; 1173{ 1174 register struct vnode *vp; 1175 struct vnode *nvp; 1176 int error; 1177 1178 if (dev == NODEV) { 1179 *vpp = NULLVP; 1180 return (ENXIO); 1181 } 1182 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 1183 if (error) { 1184 *vpp = NULLVP; 1185 return (error); 1186 } 1187 vp = nvp; 1188 vp->v_type = VBLK; 1189 if ((nvp = checkalias(vp, dev2udev(dev), (struct mount *)0)) != NULL) { 1190 vput(vp); 1191 vp = nvp; 1192 } 1193 *vpp = vp; 1194 return (0); 1195} 1196 1197/* 1198 * Check to see if the new vnode represents a special device 1199 * for which we already have a vnode (either because of 1200 * bdevvp() or because of a different vnode representing 1201 * the same block device). If such an alias exists, deallocate 1202 * the existing contents and return the aliased vnode. The 1203 * caller is responsible for filling it with its new contents. 1204 */ 1205struct vnode * 1206checkalias(nvp, nvp_rdev, mp) 1207 register struct vnode *nvp; 1208 udev_t nvp_rdev; 1209 struct mount *mp; 1210{ 1211 struct proc *p = curproc; /* XXX */ 1212 struct vnode *vp; 1213 struct vnode **vpp; 1214 dev_t dev; 1215 1216 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1217 return (NULLVP); 1218 1219 dev = udev2dev(nvp_rdev, 2); 1220 1221 vpp = &speclisth[SPECHASH(dev)]; 1222loop: 1223 simple_lock(&spechash_slock); 1224 for (vp = *vpp; vp; vp = vp->v_specnext) { 1225 if (dev != vp->v_rdev || nvp->v_type != vp->v_type) 1226 continue; 1227 /* 1228 * Alias, but not in use, so flush it out. 1229 * Only alias active device nodes. 1230 * Not sure why we don't re-use this like we do below. 1231 */ 1232 simple_lock(&vp->v_interlock); 1233 if (vp->v_usecount == 0) { 1234 simple_unlock(&spechash_slock); 1235 vgonel(vp, p); 1236 goto loop; 1237 } 1238 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 1239 /* 1240 * It dissappeared, and we may have slept. 1241 * Restart from the beginning 1242 */ 1243 simple_unlock(&spechash_slock); 1244 goto loop; 1245 } 1246 break; 1247 } 1248 /* 1249 * It would be a lot clearer what is going on here if 1250 * this had been expressed as: 1251 * if ( vp && (vp->v_tag == VT_NULL)) 1252 * and the clauses had been swapped. 1253 */ 1254 if (vp == NULL || vp->v_tag != VT_NON) { 1255 struct specinfo *sinfo; 1256 1257 /* 1258 * Put the new vnode into the hash chain. 1259 * and if there was an alias, connect them. 1260 */ 1261 MALLOC(sinfo, struct specinfo *, 1262 sizeof(struct specinfo), M_VNODE, M_WAITOK); 1263 bzero(sinfo, sizeof(struct specinfo)); 1264 nvp->v_specinfo = sinfo; 1265 sinfo->si_rdev = dev; 1266 sinfo->si_hashchain = vpp; 1267 sinfo->si_specnext = *vpp; 1268 sinfo->si_bsize_phys = DEV_BSIZE; 1269 sinfo->si_bsize_best = BLKDEV_IOSIZE; 1270 sinfo->si_bsize_max = MAXBSIZE; 1271 1272 /* 1273 * Ask the device to fix up specinfo. Typically the 1274 * si_bsize_* parameters may need fixing up. 1275 */ 1276 1277 if (nvp->v_type == VBLK) { 1278 if (bdevsw(dev) && bdevsw(dev)->d_parms) 1279 (*bdevsw(dev)->d_parms)(dev, sinfo, DPARM_GET); 1280 } else if (nvp->v_type == VCHR) { 1281 if (devsw(dev) && devsw(dev)->d_parms) 1282 (*devsw(dev)->d_parms)(dev, sinfo, DPARM_GET); 1283 } 1284 1285 simple_unlock(&spechash_slock); 1286 *vpp = nvp; 1287 if (vp != NULLVP) { 1288 nvp->v_flag |= VALIASED; 1289 vp->v_flag |= VALIASED; 1290 vput(vp); 1291 } 1292 return (NULLVP); 1293 } 1294 /* 1295 * if ( vp && (vp->v_tag == VT_NULL)) 1296 * We have a vnode alias, but it is a trashed. 1297 * Make it look like it's newley allocated. (by getnewvnode()) 1298 * The caller should use this instead. 1299 */ 1300 simple_unlock(&spechash_slock); 1301 VOP_UNLOCK(vp, 0, p); 1302 simple_lock(&vp->v_interlock); 1303 vclean(vp, 0, p); 1304 vp->v_op = nvp->v_op; 1305 vp->v_tag = nvp->v_tag; 1306 nvp->v_type = VNON; 1307 insmntque(vp, mp); 1308 return (vp); 1309} 1310 1311/* 1312 * Grab a particular vnode from the free list, increment its 1313 * reference count and lock it. The vnode lock bit is set the 1314 * vnode is being eliminated in vgone. The process is awakened 1315 * when the transition is completed, and an error returned to 1316 * indicate that the vnode is no longer usable (possibly having 1317 * been changed to a new file system type). 1318 */ 1319int 1320vget(vp, flags, p) 1321 register struct vnode *vp; 1322 int flags; 1323 struct proc *p; 1324{ 1325 int error; 1326 1327 /* 1328 * If the vnode is in the process of being cleaned out for 1329 * another use, we wait for the cleaning to finish and then 1330 * return failure. Cleaning is determined by checking that 1331 * the VXLOCK flag is set. 1332 */ 1333 if ((flags & LK_INTERLOCK) == 0) { 1334 simple_lock(&vp->v_interlock); 1335 } 1336 if (vp->v_flag & VXLOCK) { 1337 vp->v_flag |= VXWANT; 1338 simple_unlock(&vp->v_interlock); 1339 tsleep((caddr_t)vp, PINOD, "vget", 0); 1340 return (ENOENT); 1341 } 1342 1343 vp->v_usecount++; 1344 1345 if (VSHOULDBUSY(vp)) 1346 vbusy(vp); 1347 if (flags & LK_TYPE_MASK) { 1348 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 1349 /* 1350 * must expand vrele here because we do not want 1351 * to call VOP_INACTIVE if the reference count 1352 * drops back to zero since it was never really 1353 * active. We must remove it from the free list 1354 * before sleeping so that multiple processes do 1355 * not try to recycle it. 1356 */ 1357 simple_lock(&vp->v_interlock); 1358 vp->v_usecount--; 1359 if (VSHOULDFREE(vp)) 1360 vfree(vp); 1361 simple_unlock(&vp->v_interlock); 1362 } 1363 return (error); 1364 } 1365 simple_unlock(&vp->v_interlock); 1366 return (0); 1367} 1368 1369void 1370vref(struct vnode *vp) 1371{ 1372 simple_lock(&vp->v_interlock); 1373 vp->v_usecount++; 1374 simple_unlock(&vp->v_interlock); 1375} 1376 1377/* 1378 * Vnode put/release. 1379 * If count drops to zero, call inactive routine and return to freelist. 1380 */ 1381void 1382vrele(vp) 1383 struct vnode *vp; 1384{ 1385 struct proc *p = curproc; /* XXX */ 1386 1387 KASSERT(vp != NULL, ("vrele: null vp")); 1388 1389 simple_lock(&vp->v_interlock); 1390 1391 if (vp->v_usecount > 1) { 1392 1393 vp->v_usecount--; 1394 simple_unlock(&vp->v_interlock); 1395 1396 return; 1397 } 1398 1399 if (vp->v_usecount == 1) { 1400 1401 vp->v_usecount--; 1402 if (VSHOULDFREE(vp)) 1403 vfree(vp); 1404 /* 1405 * If we are doing a vput, the node is already locked, and we must 1406 * call VOP_INACTIVE with the node locked. So, in the case of 1407 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1408 */ 1409 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1410 VOP_INACTIVE(vp, p); 1411 } 1412 1413 } else { 1414#ifdef DIAGNOSTIC 1415 vprint("vrele: negative ref count", vp); 1416 simple_unlock(&vp->v_interlock); 1417#endif 1418 panic("vrele: negative ref cnt"); 1419 } 1420} 1421 1422void 1423vput(vp) 1424 struct vnode *vp; 1425{ 1426 struct proc *p = curproc; /* XXX */ 1427 1428 KASSERT(vp != NULL, ("vput: null vp")); 1429 1430 simple_lock(&vp->v_interlock); 1431 1432 if (vp->v_usecount > 1) { 1433 1434 vp->v_usecount--; 1435 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1436 return; 1437 1438 } 1439 1440 if (vp->v_usecount == 1) { 1441 1442 vp->v_usecount--; 1443 if (VSHOULDFREE(vp)) 1444 vfree(vp); 1445 /* 1446 * If we are doing a vput, the node is already locked, and we must 1447 * call VOP_INACTIVE with the node locked. So, in the case of 1448 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1449 */ 1450 simple_unlock(&vp->v_interlock); 1451 VOP_INACTIVE(vp, p); 1452 1453 } else { 1454#ifdef DIAGNOSTIC 1455 vprint("vput: negative ref count", vp); 1456#endif 1457 panic("vput: negative ref cnt"); 1458 } 1459} 1460 1461/* 1462 * Somebody doesn't want the vnode recycled. 1463 */ 1464void 1465vhold(vp) 1466 register struct vnode *vp; 1467{ 1468 int s; 1469 1470 s = splbio(); 1471 vp->v_holdcnt++; 1472 if (VSHOULDBUSY(vp)) 1473 vbusy(vp); 1474 splx(s); 1475} 1476 1477/* 1478 * One less who cares about this vnode. 1479 */ 1480void 1481vdrop(vp) 1482 register struct vnode *vp; 1483{ 1484 int s; 1485 1486 s = splbio(); 1487 if (vp->v_holdcnt <= 0) 1488 panic("vdrop: holdcnt"); 1489 vp->v_holdcnt--; 1490 if (VSHOULDFREE(vp)) 1491 vfree(vp); 1492 splx(s); 1493} 1494 1495/* 1496 * Remove any vnodes in the vnode table belonging to mount point mp. 1497 * 1498 * If MNT_NOFORCE is specified, there should not be any active ones, 1499 * return error if any are found (nb: this is a user error, not a 1500 * system error). If MNT_FORCE is specified, detach any active vnodes 1501 * that are found. 1502 */ 1503#ifdef DIAGNOSTIC 1504static int busyprt = 0; /* print out busy vnodes */ 1505SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1506#endif 1507 1508int 1509vflush(mp, skipvp, flags) 1510 struct mount *mp; 1511 struct vnode *skipvp; 1512 int flags; 1513{ 1514 struct proc *p = curproc; /* XXX */ 1515 struct vnode *vp, *nvp; 1516 int busy = 0; 1517 1518 simple_lock(&mntvnode_slock); 1519loop: 1520 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1521 /* 1522 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1523 * Start over if it has (it won't be on the list anymore). 1524 */ 1525 if (vp->v_mount != mp) 1526 goto loop; 1527 nvp = vp->v_mntvnodes.le_next; 1528 /* 1529 * Skip over a selected vnode. 1530 */ 1531 if (vp == skipvp) 1532 continue; 1533 1534 simple_lock(&vp->v_interlock); 1535 /* 1536 * Skip over a vnodes marked VSYSTEM. 1537 */ 1538 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1539 simple_unlock(&vp->v_interlock); 1540 continue; 1541 } 1542 /* 1543 * If WRITECLOSE is set, only flush out regular file vnodes 1544 * open for writing. 1545 */ 1546 if ((flags & WRITECLOSE) && 1547 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1548 simple_unlock(&vp->v_interlock); 1549 continue; 1550 } 1551 1552 /* 1553 * With v_usecount == 0, all we need to do is clear out the 1554 * vnode data structures and we are done. 1555 */ 1556 if (vp->v_usecount == 0) { 1557 simple_unlock(&mntvnode_slock); 1558 vgonel(vp, p); 1559 simple_lock(&mntvnode_slock); 1560 continue; 1561 } 1562 1563 /* 1564 * If FORCECLOSE is set, forcibly close the vnode. For block 1565 * or character devices, revert to an anonymous device. For 1566 * all other files, just kill them. 1567 */ 1568 if (flags & FORCECLOSE) { 1569 simple_unlock(&mntvnode_slock); 1570 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1571 vgonel(vp, p); 1572 } else { 1573 vclean(vp, 0, p); 1574 vp->v_op = spec_vnodeop_p; 1575 insmntque(vp, (struct mount *) 0); 1576 } 1577 simple_lock(&mntvnode_slock); 1578 continue; 1579 } 1580#ifdef DIAGNOSTIC 1581 if (busyprt) 1582 vprint("vflush: busy vnode", vp); 1583#endif 1584 simple_unlock(&vp->v_interlock); 1585 busy++; 1586 } 1587 simple_unlock(&mntvnode_slock); 1588 if (busy) 1589 return (EBUSY); 1590 return (0); 1591} 1592 1593/* 1594 * Disassociate the underlying file system from a vnode. 1595 */ 1596static void 1597vclean(vp, flags, p) 1598 struct vnode *vp; 1599 int flags; 1600 struct proc *p; 1601{ 1602 int active; 1603 vm_object_t obj; 1604 1605 /* 1606 * Check to see if the vnode is in use. If so we have to reference it 1607 * before we clean it out so that its count cannot fall to zero and 1608 * generate a race against ourselves to recycle it. 1609 */ 1610 if ((active = vp->v_usecount)) 1611 vp->v_usecount++; 1612 1613 /* 1614 * Prevent the vnode from being recycled or brought into use while we 1615 * clean it out. 1616 */ 1617 if (vp->v_flag & VXLOCK) 1618 panic("vclean: deadlock"); 1619 vp->v_flag |= VXLOCK; 1620 /* 1621 * Even if the count is zero, the VOP_INACTIVE routine may still 1622 * have the object locked while it cleans it out. The VOP_LOCK 1623 * ensures that the VOP_INACTIVE routine is done with its work. 1624 * For active vnodes, it ensures that no other activity can 1625 * occur while the underlying object is being cleaned out. 1626 */ 1627 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1628 1629 /* 1630 * Clean out any buffers associated with the vnode. 1631 */ 1632 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1633 if ((obj = vp->v_object) != NULL) { 1634 if (obj->ref_count == 0) { 1635 /* 1636 * This is a normal way of shutting down the object/vnode 1637 * association. 1638 */ 1639 vm_object_terminate(obj); 1640 } else { 1641 /* 1642 * Woe to the process that tries to page now :-). 1643 */ 1644 vm_pager_deallocate(obj); 1645 } 1646 } 1647 1648 /* 1649 * If purging an active vnode, it must be closed and 1650 * deactivated before being reclaimed. Note that the 1651 * VOP_INACTIVE will unlock the vnode. 1652 */ 1653 if (active) { 1654 if (flags & DOCLOSE) 1655 VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 1656 VOP_INACTIVE(vp, p); 1657 } else { 1658 /* 1659 * Any other processes trying to obtain this lock must first 1660 * wait for VXLOCK to clear, then call the new lock operation. 1661 */ 1662 VOP_UNLOCK(vp, 0, p); 1663 } 1664 /* 1665 * Reclaim the vnode. 1666 */ 1667 if (VOP_RECLAIM(vp, p)) 1668 panic("vclean: cannot reclaim"); 1669 1670 if (active) 1671 vrele(vp); 1672 1673 cache_purge(vp); 1674 if (vp->v_vnlock) { 1675#if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */ 1676#ifdef DIAGNOSTIC 1677 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1678 vprint("vclean: lock not drained", vp); 1679#endif 1680#endif 1681 FREE(vp->v_vnlock, M_VNODE); 1682 vp->v_vnlock = NULL; 1683 } 1684 1685 if (VSHOULDFREE(vp)) 1686 vfree(vp); 1687 1688 /* 1689 * Done with purge, notify sleepers of the grim news. 1690 */ 1691 vp->v_op = dead_vnodeop_p; 1692 vn_pollgone(vp); 1693 vp->v_tag = VT_NON; 1694 vp->v_flag &= ~VXLOCK; 1695 if (vp->v_flag & VXWANT) { 1696 vp->v_flag &= ~VXWANT; 1697 wakeup((caddr_t) vp); 1698 } 1699} 1700 1701/* 1702 * Eliminate all activity associated with the requested vnode 1703 * and with all vnodes aliased to the requested vnode. 1704 */ 1705int 1706vop_revoke(ap) 1707 struct vop_revoke_args /* { 1708 struct vnode *a_vp; 1709 int a_flags; 1710 } */ *ap; 1711{ 1712 struct vnode *vp, *vq; 1713 struct proc *p = curproc; /* XXX */ 1714 1715 KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke")); 1716 1717 vp = ap->a_vp; 1718 simple_lock(&vp->v_interlock); 1719 1720 if (vp->v_flag & VALIASED) { 1721 /* 1722 * If a vgone (or vclean) is already in progress, 1723 * wait until it is done and return. 1724 */ 1725 if (vp->v_flag & VXLOCK) { 1726 vp->v_flag |= VXWANT; 1727 simple_unlock(&vp->v_interlock); 1728 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1729 return (0); 1730 } 1731 /* 1732 * Ensure that vp will not be vgone'd while we 1733 * are eliminating its aliases. 1734 */ 1735 vp->v_flag |= VXLOCK; 1736 simple_unlock(&vp->v_interlock); 1737 while (vp->v_flag & VALIASED) { 1738 simple_lock(&spechash_slock); 1739 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1740 if (vq->v_rdev != vp->v_rdev || 1741 vq->v_type != vp->v_type || vp == vq) 1742 continue; 1743 simple_unlock(&spechash_slock); 1744 vgone(vq); 1745 break; 1746 } 1747 if (vq == NULLVP) { 1748 simple_unlock(&spechash_slock); 1749 } 1750 } 1751 /* 1752 * Remove the lock so that vgone below will 1753 * really eliminate the vnode after which time 1754 * vgone will awaken any sleepers. 1755 */ 1756 simple_lock(&vp->v_interlock); 1757 vp->v_flag &= ~VXLOCK; 1758 if (vp->v_flag & VXWANT) { 1759 vp->v_flag &= ~VXWANT; 1760 wakeup(vp); 1761 } 1762 } 1763 vgonel(vp, p); 1764 return (0); 1765} 1766 1767/* 1768 * Recycle an unused vnode to the front of the free list. 1769 * Release the passed interlock if the vnode will be recycled. 1770 */ 1771int 1772vrecycle(vp, inter_lkp, p) 1773 struct vnode *vp; 1774 struct simplelock *inter_lkp; 1775 struct proc *p; 1776{ 1777 1778 simple_lock(&vp->v_interlock); 1779 if (vp->v_usecount == 0) { 1780 if (inter_lkp) { 1781 simple_unlock(inter_lkp); 1782 } 1783 vgonel(vp, p); 1784 return (1); 1785 } 1786 simple_unlock(&vp->v_interlock); 1787 return (0); 1788} 1789 1790/* 1791 * Eliminate all activity associated with a vnode 1792 * in preparation for reuse. 1793 */ 1794void 1795vgone(vp) 1796 register struct vnode *vp; 1797{ 1798 struct proc *p = curproc; /* XXX */ 1799 1800 simple_lock(&vp->v_interlock); 1801 vgonel(vp, p); 1802} 1803 1804/* 1805 * vgone, with the vp interlock held. 1806 */ 1807static void 1808vgonel(vp, p) 1809 struct vnode *vp; 1810 struct proc *p; 1811{ 1812 int s; 1813 struct vnode *vq; 1814 struct vnode *vx; 1815 1816 /* 1817 * If a vgone (or vclean) is already in progress, 1818 * wait until it is done and return. 1819 */ 1820 if (vp->v_flag & VXLOCK) { 1821 vp->v_flag |= VXWANT; 1822 simple_unlock(&vp->v_interlock); 1823 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1824 return; 1825 } 1826 1827 /* 1828 * Clean out the filesystem specific data. 1829 */ 1830 vclean(vp, DOCLOSE, p); 1831 simple_lock(&vp->v_interlock); 1832 1833 /* 1834 * Delete from old mount point vnode list, if on one. 1835 */ 1836 if (vp->v_mount != NULL) 1837 insmntque(vp, (struct mount *)0); 1838 /* 1839 * If special device, remove it from special device alias list 1840 * if it is on one. 1841 */ 1842 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1843 simple_lock(&spechash_slock); 1844 if (*vp->v_hashchain == vp) { 1845 *vp->v_hashchain = vp->v_specnext; 1846 } else { 1847 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1848 if (vq->v_specnext != vp) 1849 continue; 1850 vq->v_specnext = vp->v_specnext; 1851 break; 1852 } 1853 if (vq == NULL) 1854 panic("missing bdev"); 1855 } 1856 if (vp->v_flag & VALIASED) { 1857 vx = NULL; 1858 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1859 if (vq->v_rdev != vp->v_rdev || 1860 vq->v_type != vp->v_type) 1861 continue; 1862 if (vx) 1863 break; 1864 vx = vq; 1865 } 1866 if (vx == NULL) 1867 panic("missing alias"); 1868 if (vq == NULL) 1869 vx->v_flag &= ~VALIASED; 1870 vp->v_flag &= ~VALIASED; 1871 } 1872 simple_unlock(&spechash_slock); 1873 FREE(vp->v_specinfo, M_VNODE); 1874 vp->v_specinfo = NULL; 1875 } 1876 1877 /* 1878 * If it is on the freelist and not already at the head, 1879 * move it to the head of the list. The test of the back 1880 * pointer and the reference count of zero is because 1881 * it will be removed from the free list by getnewvnode, 1882 * but will not have its reference count incremented until 1883 * after calling vgone. If the reference count were 1884 * incremented first, vgone would (incorrectly) try to 1885 * close the previous instance of the underlying object. 1886 */ 1887 if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1888 s = splbio(); 1889 simple_lock(&vnode_free_list_slock); 1890 if (vp->v_flag & VFREE) { 1891 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1892 } else if (vp->v_flag & VTBFREE) { 1893 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1894 vp->v_flag &= ~VTBFREE; 1895 freevnodes++; 1896 } else 1897 freevnodes++; 1898 vp->v_flag |= VFREE; 1899 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1900 simple_unlock(&vnode_free_list_slock); 1901 splx(s); 1902 } 1903 1904 vp->v_type = VBAD; 1905 simple_unlock(&vp->v_interlock); 1906} 1907 1908/* 1909 * Lookup a vnode by device number. 1910 */ 1911int 1912vfinddev(dev, type, vpp) 1913 dev_t dev; 1914 enum vtype type; 1915 struct vnode **vpp; 1916{ 1917 register struct vnode *vp; 1918 int rc = 0; 1919 1920 simple_lock(&spechash_slock); 1921 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1922 if (dev != vp->v_rdev || type != vp->v_type) 1923 continue; 1924 *vpp = vp; 1925 rc = 1; 1926 break; 1927 } 1928 simple_unlock(&spechash_slock); 1929 return (rc); 1930} 1931 1932/* 1933 * Calculate the total number of references to a special device. 1934 */ 1935int 1936vcount(vp) 1937 register struct vnode *vp; 1938{ 1939 struct vnode *vq, *vnext; 1940 int count; 1941 1942loop: 1943 if ((vp->v_flag & VALIASED) == 0) 1944 return (vp->v_usecount); 1945 simple_lock(&spechash_slock); 1946 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1947 vnext = vq->v_specnext; 1948 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1949 continue; 1950 /* 1951 * Alias, but not in use, so flush it out. 1952 */ 1953 if (vq->v_usecount == 0 && vq != vp) { 1954 simple_unlock(&spechash_slock); 1955 vgone(vq); 1956 goto loop; 1957 } 1958 count += vq->v_usecount; 1959 } 1960 simple_unlock(&spechash_slock); 1961 return (count); 1962} 1963/* 1964 * Print out a description of a vnode. 1965 */ 1966static char *typename[] = 1967{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1968 1969void 1970vprint(label, vp) 1971 char *label; 1972 register struct vnode *vp; 1973{ 1974 char buf[96]; 1975 1976 if (label != NULL) 1977 printf("%s: %p: ", label, (void *)vp); 1978 else 1979 printf("%p: ", (void *)vp); 1980 printf("type %s, usecount %d, writecount %d, refcount %d,", 1981 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1982 vp->v_holdcnt); 1983 buf[0] = '\0'; 1984 if (vp->v_flag & VROOT) 1985 strcat(buf, "|VROOT"); 1986 if (vp->v_flag & VTEXT) 1987 strcat(buf, "|VTEXT"); 1988 if (vp->v_flag & VSYSTEM) 1989 strcat(buf, "|VSYSTEM"); 1990 if (vp->v_flag & VXLOCK) 1991 strcat(buf, "|VXLOCK"); 1992 if (vp->v_flag & VXWANT) 1993 strcat(buf, "|VXWANT"); 1994 if (vp->v_flag & VBWAIT) 1995 strcat(buf, "|VBWAIT"); 1996 if (vp->v_flag & VALIASED) 1997 strcat(buf, "|VALIASED"); 1998 if (vp->v_flag & VDOOMED) 1999 strcat(buf, "|VDOOMED"); 2000 if (vp->v_flag & VFREE) 2001 strcat(buf, "|VFREE"); 2002 if (vp->v_flag & VOBJBUF) 2003 strcat(buf, "|VOBJBUF"); 2004 if (buf[0] != '\0') 2005 printf(" flags (%s)", &buf[1]); 2006 if (vp->v_data == NULL) { 2007 printf("\n"); 2008 } else { 2009 printf("\n\t"); 2010 VOP_PRINT(vp); 2011 } 2012} 2013 2014#ifdef DDB 2015#include <ddb/ddb.h> 2016/* 2017 * List all of the locked vnodes in the system. 2018 * Called when debugging the kernel. 2019 */ 2020DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 2021{ 2022 struct proc *p = curproc; /* XXX */ 2023 struct mount *mp, *nmp; 2024 struct vnode *vp; 2025 2026 printf("Locked vnodes\n"); 2027 simple_lock(&mountlist_slock); 2028 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 2029 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2030 nmp = mp->mnt_list.cqe_next; 2031 continue; 2032 } 2033 for (vp = mp->mnt_vnodelist.lh_first; 2034 vp != NULL; 2035 vp = vp->v_mntvnodes.le_next) { 2036 if (VOP_ISLOCKED(vp)) 2037 vprint((char *)0, vp); 2038 } 2039 simple_lock(&mountlist_slock); 2040 nmp = mp->mnt_list.cqe_next; 2041 vfs_unbusy(mp, p); 2042 } 2043 simple_unlock(&mountlist_slock); 2044} 2045#endif 2046 2047/* 2048 * Top level filesystem related information gathering. 2049 */ 2050static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 2051 2052static int 2053vfs_sysctl SYSCTL_HANDLER_ARGS 2054{ 2055 int *name = (int *)arg1 - 1; /* XXX */ 2056 u_int namelen = arg2 + 1; /* XXX */ 2057 struct vfsconf *vfsp; 2058 2059#if 1 || defined(COMPAT_PRELITE2) 2060 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 2061 if (namelen == 1) 2062 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 2063#endif 2064 2065#ifdef notyet 2066 /* all sysctl names at this level are at least name and field */ 2067 if (namelen < 2) 2068 return (ENOTDIR); /* overloaded */ 2069 if (name[0] != VFS_GENERIC) { 2070 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2071 if (vfsp->vfc_typenum == name[0]) 2072 break; 2073 if (vfsp == NULL) 2074 return (EOPNOTSUPP); 2075 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 2076 oldp, oldlenp, newp, newlen, p)); 2077 } 2078#endif 2079 switch (name[1]) { 2080 case VFS_MAXTYPENUM: 2081 if (namelen != 2) 2082 return (ENOTDIR); 2083 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2084 case VFS_CONF: 2085 if (namelen != 3) 2086 return (ENOTDIR); /* overloaded */ 2087 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2088 if (vfsp->vfc_typenum == name[2]) 2089 break; 2090 if (vfsp == NULL) 2091 return (EOPNOTSUPP); 2092 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 2093 } 2094 return (EOPNOTSUPP); 2095} 2096 2097SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2098 "Generic filesystem"); 2099 2100#if 1 || defined(COMPAT_PRELITE2) 2101 2102static int 2103sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 2104{ 2105 int error; 2106 struct vfsconf *vfsp; 2107 struct ovfsconf ovfs; 2108 2109 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2110 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 2111 strcpy(ovfs.vfc_name, vfsp->vfc_name); 2112 ovfs.vfc_index = vfsp->vfc_typenum; 2113 ovfs.vfc_refcount = vfsp->vfc_refcount; 2114 ovfs.vfc_flags = vfsp->vfc_flags; 2115 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2116 if (error) 2117 return error; 2118 } 2119 return 0; 2120} 2121 2122#endif /* 1 || COMPAT_PRELITE2 */ 2123 2124#if 0 2125#define KINFO_VNODESLOP 10 2126/* 2127 * Dump vnode list (via sysctl). 2128 * Copyout address of vnode followed by vnode. 2129 */ 2130/* ARGSUSED */ 2131static int 2132sysctl_vnode SYSCTL_HANDLER_ARGS 2133{ 2134 struct proc *p = curproc; /* XXX */ 2135 struct mount *mp, *nmp; 2136 struct vnode *nvp, *vp; 2137 int error; 2138 2139#define VPTRSZ sizeof (struct vnode *) 2140#define VNODESZ sizeof (struct vnode) 2141 2142 req->lock = 0; 2143 if (!req->oldptr) /* Make an estimate */ 2144 return (SYSCTL_OUT(req, 0, 2145 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2146 2147 simple_lock(&mountlist_slock); 2148 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 2149 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2150 nmp = mp->mnt_list.cqe_next; 2151 continue; 2152 } 2153again: 2154 simple_lock(&mntvnode_slock); 2155 for (vp = mp->mnt_vnodelist.lh_first; 2156 vp != NULL; 2157 vp = nvp) { 2158 /* 2159 * Check that the vp is still associated with 2160 * this filesystem. RACE: could have been 2161 * recycled onto the same filesystem. 2162 */ 2163 if (vp->v_mount != mp) { 2164 simple_unlock(&mntvnode_slock); 2165 goto again; 2166 } 2167 nvp = vp->v_mntvnodes.le_next; 2168 simple_unlock(&mntvnode_slock); 2169 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 2170 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2171 return (error); 2172 simple_lock(&mntvnode_slock); 2173 } 2174 simple_unlock(&mntvnode_slock); 2175 simple_lock(&mountlist_slock); 2176 nmp = mp->mnt_list.cqe_next; 2177 vfs_unbusy(mp, p); 2178 } 2179 simple_unlock(&mountlist_slock); 2180 2181 return (0); 2182} 2183#endif 2184 2185/* 2186 * XXX 2187 * Exporting the vnode list on large systems causes them to crash. 2188 * Exporting the vnode list on medium systems causes sysctl to coredump. 2189 */ 2190#if 0 2191SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 2192 0, 0, sysctl_vnode, "S,vnode", ""); 2193#endif 2194 2195/* 2196 * Check to see if a filesystem is mounted on a block device. 2197 */ 2198int 2199vfs_mountedon(vp) 2200 struct vnode *vp; 2201{ 2202 struct vnode *vq; 2203 int error = 0; 2204 2205 if (vp->v_specmountpoint != NULL) 2206 return (EBUSY); 2207 if (vp->v_flag & VALIASED) { 2208 simple_lock(&spechash_slock); 2209 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2210 if (vq->v_rdev != vp->v_rdev || 2211 vq->v_type != vp->v_type) 2212 continue; 2213 if (vq->v_specmountpoint != NULL) { 2214 error = EBUSY; 2215 break; 2216 } 2217 } 2218 simple_unlock(&spechash_slock); 2219 } 2220 return (error); 2221} 2222 2223/* 2224 * Unmount all filesystems. The list is traversed in reverse order 2225 * of mounting to avoid dependencies. 2226 */ 2227void 2228vfs_unmountall() 2229{ 2230 struct mount *mp, *nmp; 2231 struct proc *p; 2232 int error; 2233 2234 if (curproc != NULL) 2235 p = curproc; 2236 else 2237 p = initproc; /* XXX XXX should this be proc0? */ 2238 /* 2239 * Since this only runs when rebooting, it is not interlocked. 2240 */ 2241 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2242 nmp = mp->mnt_list.cqe_prev; 2243 error = dounmount(mp, MNT_FORCE, p); 2244 if (error) { 2245 printf("unmount of %s failed (", 2246 mp->mnt_stat.f_mntonname); 2247 if (error == EBUSY) 2248 printf("BUSY)\n"); 2249 else 2250 printf("%d)\n", error); 2251 } 2252 } 2253} 2254 2255/* 2256 * Build hash lists of net addresses and hang them off the mount point. 2257 * Called by ufs_mount() to set up the lists of export addresses. 2258 */ 2259static int 2260vfs_hang_addrlist(mp, nep, argp) 2261 struct mount *mp; 2262 struct netexport *nep; 2263 struct export_args *argp; 2264{ 2265 register struct netcred *np; 2266 register struct radix_node_head *rnh; 2267 register int i; 2268 struct radix_node *rn; 2269 struct sockaddr *saddr, *smask = 0; 2270 struct domain *dom; 2271 int error; 2272 2273 if (argp->ex_addrlen == 0) { 2274 if (mp->mnt_flag & MNT_DEFEXPORTED) 2275 return (EPERM); 2276 np = &nep->ne_defexported; 2277 np->netc_exflags = argp->ex_flags; 2278 np->netc_anon = argp->ex_anon; 2279 np->netc_anon.cr_ref = 1; 2280 mp->mnt_flag |= MNT_DEFEXPORTED; 2281 return (0); 2282 } 2283 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2284 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 2285 bzero((caddr_t) np, i); 2286 saddr = (struct sockaddr *) (np + 1); 2287 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2288 goto out; 2289 if (saddr->sa_len > argp->ex_addrlen) 2290 saddr->sa_len = argp->ex_addrlen; 2291 if (argp->ex_masklen) { 2292 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 2293 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2294 if (error) 2295 goto out; 2296 if (smask->sa_len > argp->ex_masklen) 2297 smask->sa_len = argp->ex_masklen; 2298 } 2299 i = saddr->sa_family; 2300 if ((rnh = nep->ne_rtable[i]) == 0) { 2301 /* 2302 * Seems silly to initialize every AF when most are not used, 2303 * do so on demand here 2304 */ 2305 for (dom = domains; dom; dom = dom->dom_next) 2306 if (dom->dom_family == i && dom->dom_rtattach) { 2307 dom->dom_rtattach((void **) &nep->ne_rtable[i], 2308 dom->dom_rtoffset); 2309 break; 2310 } 2311 if ((rnh = nep->ne_rtable[i]) == 0) { 2312 error = ENOBUFS; 2313 goto out; 2314 } 2315 } 2316 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2317 np->netc_rnodes); 2318 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 2319 error = EPERM; 2320 goto out; 2321 } 2322 np->netc_exflags = argp->ex_flags; 2323 np->netc_anon = argp->ex_anon; 2324 np->netc_anon.cr_ref = 1; 2325 return (0); 2326out: 2327 free(np, M_NETADDR); 2328 return (error); 2329} 2330 2331/* ARGSUSED */ 2332static int 2333vfs_free_netcred(rn, w) 2334 struct radix_node *rn; 2335 void *w; 2336{ 2337 register struct radix_node_head *rnh = (struct radix_node_head *) w; 2338 2339 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2340 free((caddr_t) rn, M_NETADDR); 2341 return (0); 2342} 2343 2344/* 2345 * Free the net address hash lists that are hanging off the mount points. 2346 */ 2347static void 2348vfs_free_addrlist(nep) 2349 struct netexport *nep; 2350{ 2351 register int i; 2352 register struct radix_node_head *rnh; 2353 2354 for (i = 0; i <= AF_MAX; i++) 2355 if ((rnh = nep->ne_rtable[i])) { 2356 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2357 (caddr_t) rnh); 2358 free((caddr_t) rnh, M_RTABLE); 2359 nep->ne_rtable[i] = 0; 2360 } 2361} 2362 2363int 2364vfs_export(mp, nep, argp) 2365 struct mount *mp; 2366 struct netexport *nep; 2367 struct export_args *argp; 2368{ 2369 int error; 2370 2371 if (argp->ex_flags & MNT_DELEXPORT) { 2372 if (mp->mnt_flag & MNT_EXPUBLIC) { 2373 vfs_setpublicfs(NULL, NULL, NULL); 2374 mp->mnt_flag &= ~MNT_EXPUBLIC; 2375 } 2376 vfs_free_addrlist(nep); 2377 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2378 } 2379 if (argp->ex_flags & MNT_EXPORTED) { 2380 if (argp->ex_flags & MNT_EXPUBLIC) { 2381 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2382 return (error); 2383 mp->mnt_flag |= MNT_EXPUBLIC; 2384 } 2385 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2386 return (error); 2387 mp->mnt_flag |= MNT_EXPORTED; 2388 } 2389 return (0); 2390} 2391 2392 2393/* 2394 * Set the publicly exported filesystem (WebNFS). Currently, only 2395 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2396 */ 2397int 2398vfs_setpublicfs(mp, nep, argp) 2399 struct mount *mp; 2400 struct netexport *nep; 2401 struct export_args *argp; 2402{ 2403 int error; 2404 struct vnode *rvp; 2405 char *cp; 2406 2407 /* 2408 * mp == NULL -> invalidate the current info, the FS is 2409 * no longer exported. May be called from either vfs_export 2410 * or unmount, so check if it hasn't already been done. 2411 */ 2412 if (mp == NULL) { 2413 if (nfs_pub.np_valid) { 2414 nfs_pub.np_valid = 0; 2415 if (nfs_pub.np_index != NULL) { 2416 FREE(nfs_pub.np_index, M_TEMP); 2417 nfs_pub.np_index = NULL; 2418 } 2419 } 2420 return (0); 2421 } 2422 2423 /* 2424 * Only one allowed at a time. 2425 */ 2426 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2427 return (EBUSY); 2428 2429 /* 2430 * Get real filehandle for root of exported FS. 2431 */ 2432 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2433 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2434 2435 if ((error = VFS_ROOT(mp, &rvp))) 2436 return (error); 2437 2438 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2439 return (error); 2440 2441 vput(rvp); 2442 2443 /* 2444 * If an indexfile was specified, pull it in. 2445 */ 2446 if (argp->ex_indexfile != NULL) { 2447 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2448 M_WAITOK); 2449 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2450 MAXNAMLEN, (size_t *)0); 2451 if (!error) { 2452 /* 2453 * Check for illegal filenames. 2454 */ 2455 for (cp = nfs_pub.np_index; *cp; cp++) { 2456 if (*cp == '/') { 2457 error = EINVAL; 2458 break; 2459 } 2460 } 2461 } 2462 if (error) { 2463 FREE(nfs_pub.np_index, M_TEMP); 2464 return (error); 2465 } 2466 } 2467 2468 nfs_pub.np_mount = mp; 2469 nfs_pub.np_valid = 1; 2470 return (0); 2471} 2472 2473struct netcred * 2474vfs_export_lookup(mp, nep, nam) 2475 register struct mount *mp; 2476 struct netexport *nep; 2477 struct sockaddr *nam; 2478{ 2479 register struct netcred *np; 2480 register struct radix_node_head *rnh; 2481 struct sockaddr *saddr; 2482 2483 np = NULL; 2484 if (mp->mnt_flag & MNT_EXPORTED) { 2485 /* 2486 * Lookup in the export list first. 2487 */ 2488 if (nam != NULL) { 2489 saddr = nam; 2490 rnh = nep->ne_rtable[saddr->sa_family]; 2491 if (rnh != NULL) { 2492 np = (struct netcred *) 2493 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2494 rnh); 2495 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2496 np = NULL; 2497 } 2498 } 2499 /* 2500 * If no address match, use the default if it exists. 2501 */ 2502 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2503 np = &nep->ne_defexported; 2504 } 2505 return (np); 2506} 2507 2508/* 2509 * perform msync on all vnodes under a mount point 2510 * the mount point must be locked. 2511 */ 2512void 2513vfs_msync(struct mount *mp, int flags) { 2514 struct vnode *vp, *nvp; 2515 struct vm_object *obj; 2516 int anyio, tries; 2517 2518 tries = 5; 2519loop: 2520 anyio = 0; 2521 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2522 2523 nvp = vp->v_mntvnodes.le_next; 2524 2525 if (vp->v_mount != mp) { 2526 goto loop; 2527 } 2528 2529 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ 2530 continue; 2531 2532 if (flags != MNT_WAIT) { 2533 obj = vp->v_object; 2534 if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2535 continue; 2536 if (VOP_ISLOCKED(vp)) 2537 continue; 2538 } 2539 2540 simple_lock(&vp->v_interlock); 2541 if (vp->v_object && 2542 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2543 if (!vget(vp, 2544 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 2545 if (vp->v_object) { 2546 vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : 0); 2547 anyio = 1; 2548 } 2549 vput(vp); 2550 } 2551 } else { 2552 simple_unlock(&vp->v_interlock); 2553 } 2554 } 2555 if (anyio && (--tries > 0)) 2556 goto loop; 2557} 2558 2559/* 2560 * Create the VM object needed for VMIO and mmap support. This 2561 * is done for all VREG files in the system. Some filesystems might 2562 * afford the additional metadata buffering capability of the 2563 * VMIO code by making the device node be VMIO mode also. 2564 * 2565 * vp must be locked when vfs_object_create is called. 2566 */ 2567int 2568vfs_object_create(vp, p, cred) 2569 struct vnode *vp; 2570 struct proc *p; 2571 struct ucred *cred; 2572{ 2573 struct vattr vat; 2574 vm_object_t object; 2575 int error = 0; 2576 2577 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) 2578 return 0; 2579 2580retry: 2581 if ((object = vp->v_object) == NULL) { 2582 if (vp->v_type == VREG) { 2583 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2584 goto retn; 2585 object = vnode_pager_alloc(vp, vat.va_size, 0, 0); 2586 } else if (bdevsw(vp->v_rdev) != NULL) { 2587 /* 2588 * This simply allocates the biggest object possible 2589 * for a VBLK vnode. This should be fixed, but doesn't 2590 * cause any problems (yet). 2591 */ 2592 object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); 2593 } else { 2594 goto retn; 2595 } 2596 /* 2597 * Dereference the reference we just created. This assumes 2598 * that the object is associated with the vp. 2599 */ 2600 object->ref_count--; 2601 vp->v_usecount--; 2602 } else { 2603 if (object->flags & OBJ_DEAD) { 2604 VOP_UNLOCK(vp, 0, p); 2605 tsleep(object, PVM, "vodead", 0); 2606 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2607 goto retry; 2608 } 2609 } 2610 2611 KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object")); 2612 vp->v_flag |= VOBJBUF; 2613 2614retn: 2615 return error; 2616} 2617 2618static void 2619vfree(vp) 2620 struct vnode *vp; 2621{ 2622 int s; 2623 2624 s = splbio(); 2625 simple_lock(&vnode_free_list_slock); 2626 if (vp->v_flag & VTBFREE) { 2627 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2628 vp->v_flag &= ~VTBFREE; 2629 } 2630 if (vp->v_flag & VAGE) { 2631 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2632 } else { 2633 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2634 } 2635 freevnodes++; 2636 simple_unlock(&vnode_free_list_slock); 2637 vp->v_flag &= ~VAGE; 2638 vp->v_flag |= VFREE; 2639 splx(s); 2640} 2641 2642void 2643vbusy(vp) 2644 struct vnode *vp; 2645{ 2646 int s; 2647 2648 s = splbio(); 2649 simple_lock(&vnode_free_list_slock); 2650 if (vp->v_flag & VTBFREE) { 2651 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2652 vp->v_flag &= ~VTBFREE; 2653 } else { 2654 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2655 freevnodes--; 2656 } 2657 simple_unlock(&vnode_free_list_slock); 2658 vp->v_flag &= ~(VFREE|VAGE); 2659 splx(s); 2660} 2661 2662/* 2663 * Record a process's interest in events which might happen to 2664 * a vnode. Because poll uses the historic select-style interface 2665 * internally, this routine serves as both the ``check for any 2666 * pending events'' and the ``record my interest in future events'' 2667 * functions. (These are done together, while the lock is held, 2668 * to avoid race conditions.) 2669 */ 2670int 2671vn_pollrecord(vp, p, events) 2672 struct vnode *vp; 2673 struct proc *p; 2674 short events; 2675{ 2676 simple_lock(&vp->v_pollinfo.vpi_lock); 2677 if (vp->v_pollinfo.vpi_revents & events) { 2678 /* 2679 * This leaves events we are not interested 2680 * in available for the other process which 2681 * which presumably had requested them 2682 * (otherwise they would never have been 2683 * recorded). 2684 */ 2685 events &= vp->v_pollinfo.vpi_revents; 2686 vp->v_pollinfo.vpi_revents &= ~events; 2687 2688 simple_unlock(&vp->v_pollinfo.vpi_lock); 2689 return events; 2690 } 2691 vp->v_pollinfo.vpi_events |= events; 2692 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2693 simple_unlock(&vp->v_pollinfo.vpi_lock); 2694 return 0; 2695} 2696 2697/* 2698 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2699 * it is possible for us to miss an event due to race conditions, but 2700 * that condition is expected to be rare, so for the moment it is the 2701 * preferred interface. 2702 */ 2703void 2704vn_pollevent(vp, events) 2705 struct vnode *vp; 2706 short events; 2707{ 2708 simple_lock(&vp->v_pollinfo.vpi_lock); 2709 if (vp->v_pollinfo.vpi_events & events) { 2710 /* 2711 * We clear vpi_events so that we don't 2712 * call selwakeup() twice if two events are 2713 * posted before the polling process(es) is 2714 * awakened. This also ensures that we take at 2715 * most one selwakeup() if the polling process 2716 * is no longer interested. However, it does 2717 * mean that only one event can be noticed at 2718 * a time. (Perhaps we should only clear those 2719 * event bits which we note?) XXX 2720 */ 2721 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 2722 vp->v_pollinfo.vpi_revents |= events; 2723 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2724 } 2725 simple_unlock(&vp->v_pollinfo.vpi_lock); 2726} 2727 2728/* 2729 * Wake up anyone polling on vp because it is being revoked. 2730 * This depends on dead_poll() returning POLLHUP for correct 2731 * behavior. 2732 */ 2733void 2734vn_pollgone(vp) 2735 struct vnode *vp; 2736{ 2737 simple_lock(&vp->v_pollinfo.vpi_lock); 2738 if (vp->v_pollinfo.vpi_events) { 2739 vp->v_pollinfo.vpi_events = 0; 2740 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2741 } 2742 simple_unlock(&vp->v_pollinfo.vpi_lock); 2743} 2744 2745 2746 2747/* 2748 * Routine to create and manage a filesystem syncer vnode. 2749 */ 2750#define sync_close ((int (*) __P((struct vop_close_args *)))nullop) 2751static int sync_fsync __P((struct vop_fsync_args *)); 2752static int sync_inactive __P((struct vop_inactive_args *)); 2753static int sync_reclaim __P((struct vop_reclaim_args *)); 2754#define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) 2755#define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) 2756static int sync_print __P((struct vop_print_args *)); 2757#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) 2758 2759static vop_t **sync_vnodeop_p; 2760static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2761 { &vop_default_desc, (vop_t *) vop_eopnotsupp }, 2762 { &vop_close_desc, (vop_t *) sync_close }, /* close */ 2763 { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ 2764 { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ 2765 { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ 2766 { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ 2767 { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ 2768 { &vop_print_desc, (vop_t *) sync_print }, /* print */ 2769 { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ 2770 { NULL, NULL } 2771}; 2772static struct vnodeopv_desc sync_vnodeop_opv_desc = 2773 { &sync_vnodeop_p, sync_vnodeop_entries }; 2774 2775VNODEOP_SET(sync_vnodeop_opv_desc); 2776 2777/* 2778 * Create a new filesystem syncer vnode for the specified mount point. 2779 */ 2780int 2781vfs_allocate_syncvnode(mp) 2782 struct mount *mp; 2783{ 2784 struct vnode *vp; 2785 static long start, incr, next; 2786 int error; 2787 2788 /* Allocate a new vnode */ 2789 if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2790 mp->mnt_syncer = NULL; 2791 return (error); 2792 } 2793 vp->v_type = VNON; 2794 /* 2795 * Place the vnode onto the syncer worklist. We attempt to 2796 * scatter them about on the list so that they will go off 2797 * at evenly distributed times even if all the filesystems 2798 * are mounted at once. 2799 */ 2800 next += incr; 2801 if (next == 0 || next > syncer_maxdelay) { 2802 start /= 2; 2803 incr /= 2; 2804 if (start == 0) { 2805 start = syncer_maxdelay / 2; 2806 incr = syncer_maxdelay; 2807 } 2808 next = start; 2809 } 2810 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2811 mp->mnt_syncer = vp; 2812 return (0); 2813} 2814 2815/* 2816 * Do a lazy sync of the filesystem. 2817 */ 2818static int 2819sync_fsync(ap) 2820 struct vop_fsync_args /* { 2821 struct vnode *a_vp; 2822 struct ucred *a_cred; 2823 int a_waitfor; 2824 struct proc *a_p; 2825 } */ *ap; 2826{ 2827 struct vnode *syncvp = ap->a_vp; 2828 struct mount *mp = syncvp->v_mount; 2829 struct proc *p = ap->a_p; 2830 int asyncflag; 2831 2832 /* 2833 * We only need to do something if this is a lazy evaluation. 2834 */ 2835 if (ap->a_waitfor != MNT_LAZY) 2836 return (0); 2837 2838 /* 2839 * Move ourselves to the back of the sync list. 2840 */ 2841 vn_syncer_add_to_worklist(syncvp, syncdelay); 2842 2843 /* 2844 * Walk the list of vnodes pushing all that are dirty and 2845 * not already on the sync list. 2846 */ 2847 simple_lock(&mountlist_slock); 2848 if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) { 2849 simple_unlock(&mountlist_slock); 2850 return (0); 2851 } 2852 asyncflag = mp->mnt_flag & MNT_ASYNC; 2853 mp->mnt_flag &= ~MNT_ASYNC; 2854 vfs_msync(mp, MNT_NOWAIT); 2855 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2856 if (asyncflag) 2857 mp->mnt_flag |= MNT_ASYNC; 2858 vfs_unbusy(mp, p); 2859 return (0); 2860} 2861 2862/* 2863 * The syncer vnode is no referenced. 2864 */ 2865static int 2866sync_inactive(ap) 2867 struct vop_inactive_args /* { 2868 struct vnode *a_vp; 2869 struct proc *a_p; 2870 } */ *ap; 2871{ 2872 2873 vgone(ap->a_vp); 2874 return (0); 2875} 2876 2877/* 2878 * The syncer vnode is no longer needed and is being decommissioned. 2879 * 2880 * Modifications to the worklist must be protected at splbio(). 2881 */ 2882static int 2883sync_reclaim(ap) 2884 struct vop_reclaim_args /* { 2885 struct vnode *a_vp; 2886 } */ *ap; 2887{ 2888 struct vnode *vp = ap->a_vp; 2889 int s; 2890 2891 s = splbio(); 2892 vp->v_mount->mnt_syncer = NULL; 2893 if (vp->v_flag & VONWORKLST) { 2894 LIST_REMOVE(vp, v_synclist); 2895 vp->v_flag &= ~VONWORKLST; 2896 } 2897 splx(s); 2898 2899 return (0); 2900} 2901 2902/* 2903 * Print out a syncer vnode. 2904 */ 2905static int 2906sync_print(ap) 2907 struct vop_print_args /* { 2908 struct vnode *a_vp; 2909 } */ *ap; 2910{ 2911 struct vnode *vp = ap->a_vp; 2912 2913 printf("syncer vnode"); 2914 if (vp->v_vnlock != NULL) 2915 lockmgr_printinfo(vp->v_vnlock); 2916 printf("\n"); 2917 return (0); 2918} 2919