vfs_export.c revision 34926
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39 * $Id: vfs_subr.c,v 1.145 1998/03/19 22:48:16 dyson Exp $ 40 */ 41 42/* 43 * External virtual filesystem routines 44 */ 45#include "opt_ddb.h" 46#include "opt_devfs.h" 47 48#include <sys/param.h> 49#include <sys/systm.h> 50#include <sys/kernel.h> 51#include <sys/proc.h> 52#include <sys/malloc.h> 53#include <sys/mount.h> 54#include <sys/socket.h> 55#include <sys/vnode.h> 56#include <sys/stat.h> 57#include <sys/buf.h> 58#include <sys/poll.h> 59#include <sys/domain.h> 60#include <sys/dirent.h> 61#include <sys/vmmeter.h> 62 63#include <machine/limits.h> 64 65#include <vm/vm.h> 66#include <vm/vm_object.h> 67#include <vm/vm_extern.h> 68#include <vm/pmap.h> 69#include <vm/vm_map.h> 70#include <vm/vm_pager.h> 71#include <vm/vnode_pager.h> 72#include <vm/vm_zone.h> 73#include <sys/sysctl.h> 74 75#include <miscfs/specfs/specdev.h> 76 77static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 78 79static void insmntque __P((struct vnode *vp, struct mount *mp)); 80#ifdef DDB 81static void printlockedvnodes __P((void)); 82#endif 83static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 84static void vfree __P((struct vnode *)); 85static void vgonel __P((struct vnode *vp, struct proc *p)); 86static unsigned long numvnodes; 87SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 88 89enum vtype iftovt_tab[16] = { 90 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 91 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 92}; 93int vttoif_tab[9] = { 94 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 95 S_IFSOCK, S_IFIFO, S_IFMT, 96}; 97 98/* 99 * Insq/Remq for the vnode usage lists. 100 */ 101#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 102#define bufremvn(bp) { \ 103 LIST_REMOVE(bp, b_vnbufs); \ 104 (bp)->b_vnbufs.le_next = NOLIST; \ 105} 106 107static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 108struct tobefreelist vnode_tobefree_list; /* vnode free list */ 109 110static u_long wantfreevnodes = 25; 111SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 112static u_long freevnodes = 0; 113SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 114 115int vfs_ioopt = 0; 116#ifdef ENABLE_VFS_IOOPT 117SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 118#endif 119 120struct mntlist mountlist; /* mounted filesystem list */ 121struct simplelock mountlist_slock; 122static struct simplelock mntid_slock; 123struct simplelock mntvnode_slock; 124static struct simplelock vnode_free_list_slock; 125static struct simplelock spechash_slock; 126struct nfs_public nfs_pub; /* publicly exported FS */ 127static vm_zone_t vnode_zone; 128 129/* 130 * The workitem queue. 131 */ 132#define SYNCER_MAXDELAY 32 133int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ 134time_t syncdelay = 30; 135int rushjob; /* number of slots to run ASAP */ 136 137static int syncer_delayno = 0; 138static long syncer_mask; 139LIST_HEAD(synclist, vnode); 140static struct synclist *syncer_workitem_pending; 141 142int desiredvnodes; 143SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); 144 145static void vfs_free_addrlist __P((struct netexport *nep)); 146static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 147static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 148 struct export_args *argp)); 149 150/* 151 * Initialize the vnode management data structures. 152 */ 153void 154vntblinit() 155{ 156 157 desiredvnodes = maxproc + cnt.v_page_count / 4; 158 simple_lock_init(&mntvnode_slock); 159 simple_lock_init(&mntid_slock); 160 simple_lock_init(&spechash_slock); 161 TAILQ_INIT(&vnode_free_list); 162 TAILQ_INIT(&vnode_tobefree_list); 163 simple_lock_init(&vnode_free_list_slock); 164 CIRCLEQ_INIT(&mountlist); 165 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 166 /* 167 * Initialize the filesystem syncer. 168 */ 169 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 170 &syncer_mask); 171 syncer_maxdelay = syncer_mask + 1; 172} 173 174/* 175 * Mark a mount point as busy. Used to synchronize access and to delay 176 * unmounting. Interlock is not released on failure. 177 */ 178int 179vfs_busy(mp, flags, interlkp, p) 180 struct mount *mp; 181 int flags; 182 struct simplelock *interlkp; 183 struct proc *p; 184{ 185 int lkflags; 186 187 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 188 if (flags & LK_NOWAIT) 189 return (ENOENT); 190 mp->mnt_kern_flag |= MNTK_MWAIT; 191 if (interlkp) { 192 simple_unlock(interlkp); 193 } 194 /* 195 * Since all busy locks are shared except the exclusive 196 * lock granted when unmounting, the only place that a 197 * wakeup needs to be done is at the release of the 198 * exclusive lock at the end of dounmount. 199 */ 200 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 201 if (interlkp) { 202 simple_lock(interlkp); 203 } 204 return (ENOENT); 205 } 206 lkflags = LK_SHARED | LK_NOPAUSE; 207 if (interlkp) 208 lkflags |= LK_INTERLOCK; 209 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 210 panic("vfs_busy: unexpected lock failure"); 211 return (0); 212} 213 214/* 215 * Free a busy filesystem. 216 */ 217void 218vfs_unbusy(mp, p) 219 struct mount *mp; 220 struct proc *p; 221{ 222 223 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 224} 225 226/* 227 * Lookup a filesystem type, and if found allocate and initialize 228 * a mount structure for it. 229 * 230 * Devname is usually updated by mount(8) after booting. 231 */ 232int 233vfs_rootmountalloc(fstypename, devname, mpp) 234 char *fstypename; 235 char *devname; 236 struct mount **mpp; 237{ 238 struct proc *p = curproc; /* XXX */ 239 struct vfsconf *vfsp; 240 struct mount *mp; 241 242 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 243 if (!strcmp(vfsp->vfc_name, fstypename)) 244 break; 245 if (vfsp == NULL) 246 return (ENODEV); 247 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 248 bzero((char *)mp, (u_long)sizeof(struct mount)); 249 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 250 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 251 LIST_INIT(&mp->mnt_vnodelist); 252 mp->mnt_vfc = vfsp; 253 mp->mnt_op = vfsp->vfc_vfsops; 254 mp->mnt_flag = MNT_RDONLY; 255 mp->mnt_vnodecovered = NULLVP; 256 vfsp->vfc_refcount++; 257 mp->mnt_stat.f_type = vfsp->vfc_typenum; 258 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 259 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 260 mp->mnt_stat.f_mntonname[0] = '/'; 261 mp->mnt_stat.f_mntonname[1] = 0; 262 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 263 *mpp = mp; 264 return (0); 265} 266 267/* 268 * Find an appropriate filesystem to use for the root. If a filesystem 269 * has not been preselected, walk through the list of known filesystems 270 * trying those that have mountroot routines, and try them until one 271 * works or we have tried them all. 272 */ 273#ifdef notdef /* XXX JH */ 274int 275lite2_vfs_mountroot() 276{ 277 struct vfsconf *vfsp; 278 extern int (*lite2_mountroot) __P((void)); 279 int error; 280 281 if (lite2_mountroot != NULL) 282 return ((*lite2_mountroot)()); 283 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 284 if (vfsp->vfc_mountroot == NULL) 285 continue; 286 if ((error = (*vfsp->vfc_mountroot)()) == 0) 287 return (0); 288 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 289 } 290 return (ENODEV); 291} 292#endif 293 294/* 295 * Lookup a mount point by filesystem identifier. 296 */ 297struct mount * 298vfs_getvfs(fsid) 299 fsid_t *fsid; 300{ 301 register struct mount *mp; 302 303 simple_lock(&mountlist_slock); 304 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 305 mp = mp->mnt_list.cqe_next) { 306 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 307 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 308 simple_unlock(&mountlist_slock); 309 return (mp); 310 } 311 } 312 simple_unlock(&mountlist_slock); 313 return ((struct mount *) 0); 314} 315 316/* 317 * Get a new unique fsid 318 */ 319void 320vfs_getnewfsid(mp) 321 struct mount *mp; 322{ 323 static u_short xxxfs_mntid; 324 325 fsid_t tfsid; 326 int mtype; 327 328 simple_lock(&mntid_slock); 329 mtype = mp->mnt_vfc->vfc_typenum; 330 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 331 mp->mnt_stat.f_fsid.val[1] = mtype; 332 if (xxxfs_mntid == 0) 333 ++xxxfs_mntid; 334 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 335 tfsid.val[1] = mtype; 336 if (mountlist.cqh_first != (void *)&mountlist) { 337 while (vfs_getvfs(&tfsid)) { 338 tfsid.val[0]++; 339 xxxfs_mntid++; 340 } 341 } 342 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 343 simple_unlock(&mntid_slock); 344} 345 346/* 347 * Set vnode attributes to VNOVAL 348 */ 349void 350vattr_null(vap) 351 register struct vattr *vap; 352{ 353 354 vap->va_type = VNON; 355 vap->va_size = VNOVAL; 356 vap->va_bytes = VNOVAL; 357 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 358 vap->va_fsid = vap->va_fileid = 359 vap->va_blocksize = vap->va_rdev = 360 vap->va_atime.tv_sec = vap->va_atime.tv_nsec = 361 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec = 362 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec = 363 vap->va_flags = vap->va_gen = VNOVAL; 364 vap->va_vaflags = 0; 365} 366 367/* 368 * Routines having to do with the management of the vnode table. 369 */ 370extern vop_t **dead_vnodeop_p; 371 372/* 373 * Return the next vnode from the free list. 374 */ 375int 376getnewvnode(tag, mp, vops, vpp) 377 enum vtagtype tag; 378 struct mount *mp; 379 vop_t **vops; 380 struct vnode **vpp; 381{ 382 int s; 383 struct proc *p = curproc; /* XXX */ 384 struct vnode *vp, *tvp, *nvp; 385 vm_object_t object; 386 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 387 388 /* 389 * We take the least recently used vnode from the freelist 390 * if we can get it and it has no cached pages, and no 391 * namecache entries are relative to it. 392 * Otherwise we allocate a new vnode 393 */ 394 395 s = splbio(); 396 simple_lock(&vnode_free_list_slock); 397 TAILQ_INIT(&vnode_tmp_list); 398 399 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 400 nvp = TAILQ_NEXT(vp, v_freelist); 401 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 402 if (vp->v_flag & VAGE) { 403 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 404 } else { 405 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 406 } 407 vp->v_flag &= ~(VTBFREE|VAGE); 408 vp->v_flag |= VFREE; 409 if (vp->v_usecount) 410 panic("tobe free vnode isn't"); 411 freevnodes++; 412 } 413 414 if (wantfreevnodes && freevnodes < wantfreevnodes) { 415 vp = NULL; 416 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 417 /* 418 * XXX: this is only here to be backwards compatible 419 */ 420 vp = NULL; 421 } else { 422 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 423 424 nvp = TAILQ_NEXT(vp, v_freelist); 425 426 if (!simple_lock_try(&vp->v_interlock)) 427 continue; 428 if (vp->v_usecount) 429 panic("free vnode isn't"); 430 431 object = vp->v_object; 432 if (object && (object->resident_page_count || object->ref_count)) { 433 printf("object inconsistant state: RPC: %d, RC: %d\n", 434 object->resident_page_count, object->ref_count); 435 /* Don't recycle if it's caching some pages */ 436 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 437 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 438 continue; 439 } else if (LIST_FIRST(&vp->v_cache_src)) { 440 /* Don't recycle if active in the namecache */ 441 simple_unlock(&vp->v_interlock); 442 continue; 443 } else { 444 break; 445 } 446 } 447 } 448 449 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 450 nvp = TAILQ_NEXT(tvp, v_freelist); 451 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 452 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 453 simple_unlock(&tvp->v_interlock); 454 } 455 456 if (vp) { 457 vp->v_flag |= VDOOMED; 458 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 459 freevnodes--; 460 simple_unlock(&vnode_free_list_slock); 461 cache_purge(vp); 462 vp->v_lease = NULL; 463 if (vp->v_type != VBAD) { 464 vgonel(vp, p); 465 } else { 466 simple_unlock(&vp->v_interlock); 467 } 468 469#ifdef DIAGNOSTIC 470 { 471 int s; 472 473 if (vp->v_data) 474 panic("cleaned vnode isn't"); 475 s = splbio(); 476 if (vp->v_numoutput) 477 panic("Clean vnode has pending I/O's"); 478 splx(s); 479 } 480#endif 481 vp->v_flag = 0; 482 vp->v_lastr = 0; 483 vp->v_lastw = 0; 484 vp->v_lasta = 0; 485 vp->v_cstart = 0; 486 vp->v_clen = 0; 487 vp->v_socket = 0; 488 vp->v_writecount = 0; /* XXX */ 489 vp->v_maxio = 0; 490 } else { 491 simple_unlock(&vnode_free_list_slock); 492 vp = (struct vnode *) zalloc(vnode_zone); 493 bzero((char *) vp, sizeof *vp); 494 simple_lock_init(&vp->v_interlock); 495 vp->v_dd = vp; 496 cache_purge(vp); 497 LIST_INIT(&vp->v_cache_src); 498 TAILQ_INIT(&vp->v_cache_dst); 499 numvnodes++; 500 } 501 502 vp->v_type = VNON; 503 vp->v_tag = tag; 504 vp->v_op = vops; 505 insmntque(vp, mp); 506 *vpp = vp; 507 vp->v_usecount = 1; 508 vp->v_data = 0; 509 splx(s); 510 511 vfs_object_create(vp, p, p->p_ucred, TRUE); 512 return (0); 513} 514 515/* 516 * Move a vnode from one mount queue to another. 517 */ 518static void 519insmntque(vp, mp) 520 register struct vnode *vp; 521 register struct mount *mp; 522{ 523 524 simple_lock(&mntvnode_slock); 525 /* 526 * Delete from old mount point vnode list, if on one. 527 */ 528 if (vp->v_mount != NULL) 529 LIST_REMOVE(vp, v_mntvnodes); 530 /* 531 * Insert into list of vnodes for the new mount point, if available. 532 */ 533 if ((vp->v_mount = mp) == NULL) { 534 simple_unlock(&mntvnode_slock); 535 return; 536 } 537 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 538 simple_unlock(&mntvnode_slock); 539} 540 541/* 542 * Update outstanding I/O count and do wakeup if requested. 543 */ 544void 545vwakeup(bp) 546 register struct buf *bp; 547{ 548 register struct vnode *vp; 549 550 bp->b_flags &= ~B_WRITEINPROG; 551 if ((vp = bp->b_vp)) { 552 vp->v_numoutput--; 553 if (vp->v_numoutput < 0) 554 panic("vwakeup: neg numoutput"); 555 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 556 vp->v_flag &= ~VBWAIT; 557 wakeup((caddr_t) &vp->v_numoutput); 558 } 559 } 560} 561 562/* 563 * Flush out and invalidate all buffers associated with a vnode. 564 * Called with the underlying object locked. 565 */ 566int 567vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 568 register struct vnode *vp; 569 int flags; 570 struct ucred *cred; 571 struct proc *p; 572 int slpflag, slptimeo; 573{ 574 register struct buf *bp; 575 struct buf *nbp, *blist; 576 int s, error; 577 vm_object_t object; 578 579 if ((flags & V_SAVE) && vp->v_dirtyblkhd.lh_first != NULL) { 580 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p))) 581 return (error); 582 if (vp->v_dirtyblkhd.lh_first != NULL) 583 panic("vinvalbuf: dirty bufs"); 584 } 585 586 s = splbio(); 587 for (;;) { 588 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) 589 while (blist && blist->b_lblkno < 0) 590 blist = blist->b_vnbufs.le_next; 591 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 592 (flags & V_SAVEMETA)) 593 while (blist && blist->b_lblkno < 0) 594 blist = blist->b_vnbufs.le_next; 595 if (!blist) 596 break; 597 598 for (bp = blist; bp; bp = nbp) { 599 nbp = bp->b_vnbufs.le_next; 600 if ((flags & V_SAVEMETA) && bp->b_lblkno < 0) 601 continue; 602 if (bp->b_flags & B_BUSY) { 603 bp->b_flags |= B_WANTED; 604 error = tsleep((caddr_t) bp, 605 slpflag | (PRIBIO + 4), "vinvalbuf", 606 slptimeo); 607 if (error) { 608 splx(s); 609 return (error); 610 } 611 break; 612 } 613 /* 614 * XXX Since there are no node locks for NFS, I 615 * believe there is a slight chance that a delayed 616 * write will occur while sleeping just above, so 617 * check for it. Note that vfs_bio_awrite expects 618 * buffers to reside on a queue, while VOP_BWRITE and 619 * brelse do not. 620 */ 621 if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 622 (flags & V_SAVE)) { 623 624 if (bp->b_vp == vp) { 625 if (bp->b_flags & B_CLUSTEROK) { 626 vfs_bio_awrite(bp); 627 } else { 628 bremfree(bp); 629 bp->b_flags |= (B_BUSY | B_ASYNC); 630 VOP_BWRITE(bp); 631 } 632 } else { 633 bremfree(bp); 634 bp->b_flags |= B_BUSY; 635 (void) VOP_BWRITE(bp); 636 } 637 break; 638 } 639 bremfree(bp); 640 bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF | B_BUSY); 641 bp->b_flags &= ~B_ASYNC; 642 brelse(bp); 643 } 644 } 645 646 while (vp->v_numoutput > 0) { 647 vp->v_flag |= VBWAIT; 648 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 649 } 650 651 splx(s); 652 653 /* 654 * Destroy the copy in the VM cache, too. 655 */ 656 simple_lock(&vp->v_interlock); 657 object = vp->v_object; 658 if (object != NULL) { 659 if (flags & V_SAVEMETA) 660 vm_object_page_remove(object, 0, object->size, 661 (flags & V_SAVE) ? TRUE : FALSE); 662 else 663 vm_object_page_remove(object, 0, 0, 664 (flags & V_SAVE) ? TRUE : FALSE); 665 } 666 simple_unlock(&vp->v_interlock); 667 668 if (!(flags & V_SAVEMETA) && 669 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 670 panic("vinvalbuf: flush failed"); 671 return (0); 672} 673 674/* 675 * Truncate a file's buffer and pages to a specified length. This 676 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 677 * sync activity. 678 */ 679int 680vtruncbuf(vp, cred, p, length, blksize) 681 register struct vnode *vp; 682 struct ucred *cred; 683 struct proc *p; 684 off_t length; 685 int blksize; 686{ 687 register struct buf *bp; 688 struct buf *nbp, *blist; 689 int s, error, anyfreed; 690 vm_object_t object; 691 int trunclbn; 692 693 /* 694 * Round up to the *next* lbn. 695 */ 696 trunclbn = (length + blksize - 1) / blksize; 697 698 s = splbio(); 699restart: 700 anyfreed = 1; 701 for (;anyfreed;) { 702 anyfreed = 0; 703 for ( bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 704 705 nbp = LIST_NEXT(bp, b_vnbufs); 706 707 if (bp->b_lblkno >= trunclbn) { 708 if (bp->b_flags & B_BUSY) { 709 bp->b_flags |= B_WANTED; 710 tsleep(bp, PRIBIO + 4, "vtrb1", 0); 711 goto restart; 712 } else { 713 bremfree(bp); 714 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 715 bp->b_flags &= ~B_ASYNC; 716 brelse(bp); 717 anyfreed = 1; 718 } 719 if (nbp && 720 ((LIST_NEXT(nbp, b_vnbufs) == NOLIST) || 721 (nbp->b_vp != vp) || 722 (nbp->b_flags & B_DELWRI))) { 723 goto restart; 724 } 725 } 726 } 727 728 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 729 730 nbp = LIST_NEXT(bp, b_vnbufs); 731 732 if (bp->b_lblkno >= trunclbn) { 733 if (bp->b_flags & B_BUSY) { 734 bp->b_flags |= B_WANTED; 735 tsleep(bp, PRIBIO + 4, "vtrb2", 0); 736 goto restart; 737 } else { 738 bremfree(bp); 739 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 740 bp->b_flags &= ~B_ASYNC; 741 brelse(bp); 742 anyfreed = 1; 743 } 744 if (nbp && 745 ((LIST_NEXT(nbp, b_vnbufs) == NOLIST) || 746 (nbp->b_vp != vp) || 747 (nbp->b_flags & B_DELWRI) == 0)) { 748 goto restart; 749 } 750 } 751 } 752 } 753 754 if (length > 0) { 755restartsync: 756 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 757 758 nbp = LIST_NEXT(bp, b_vnbufs); 759 760 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 761 if (bp->b_flags & B_BUSY) { 762 bp->b_flags |= B_WANTED; 763 tsleep(bp, PRIBIO, "vtrb3", 0); 764 } else { 765 bremfree(bp); 766 bp->b_flags |= B_BUSY; 767 if (bp->b_vp == vp) { 768 bp->b_flags |= B_ASYNC; 769 } else { 770 bp->b_flags &= ~B_ASYNC; 771 } 772 VOP_BWRITE(bp); 773 } 774 goto restartsync; 775 } 776 777 } 778 } 779 780 while (vp->v_numoutput > 0) { 781 vp->v_flag |= VBWAIT; 782 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 783 } 784 785 splx(s); 786 787 vnode_pager_setsize(vp, length); 788 789 return (0); 790} 791 792/* 793 * Associate a buffer with a vnode. 794 */ 795void 796bgetvp(vp, bp) 797 register struct vnode *vp; 798 register struct buf *bp; 799{ 800 int s; 801 802#if defined(DIAGNOSTIC) 803 if (bp->b_vp) 804 panic("bgetvp: not free"); 805#endif 806 vhold(vp); 807 bp->b_vp = vp; 808 if (vp->v_type == VBLK || vp->v_type == VCHR) 809 bp->b_dev = vp->v_rdev; 810 else 811 bp->b_dev = NODEV; 812 /* 813 * Insert onto list for new vnode. 814 */ 815 s = splbio(); 816 bufinsvn(bp, &vp->v_cleanblkhd); 817 splx(s); 818} 819 820/* 821 * Disassociate a buffer from a vnode. 822 */ 823void 824brelvp(bp) 825 register struct buf *bp; 826{ 827 struct vnode *vp; 828 int s; 829 830#if defined(DIAGNOSTIC) 831 if (bp->b_vp == (struct vnode *) 0) 832 panic("brelvp: NULL"); 833#endif 834 835 /* 836 * Delete from old vnode list, if on one. 837 */ 838 vp = bp->b_vp; 839 s = splbio(); 840 if (bp->b_vnbufs.le_next != NOLIST) 841 bufremvn(bp); 842 if ((vp->v_flag & VONWORKLST) && (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)) { 843 vp->v_flag &= ~VONWORKLST; 844 LIST_REMOVE(vp, v_synclist); 845 } 846 splx(s); 847 bp->b_vp = (struct vnode *) 0; 848 vdrop(vp); 849} 850 851/* 852 * The workitem queue. 853 * 854 * It is useful to delay writes of file data and filesystem metadata 855 * for tens of seconds so that quickly created and deleted files need 856 * not waste disk bandwidth being created and removed. To realize this, 857 * we append vnodes to a "workitem" queue. When running with a soft 858 * updates implementation, most pending metadata dependencies should 859 * not wait for more than a few seconds. Thus, mounted on block devices 860 * are delayed only about a half the time that file data is delayed. 861 * Similarly, directory updates are more critical, so are only delayed 862 * about a third the time that file data is delayed. Thus, there are 863 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 864 * one each second (driven off the filesystem syner process). The 865 * syncer_delayno variable indicates the next queue that is to be processed. 866 * Items that need to be processed soon are placed in this queue: 867 * 868 * syncer_workitem_pending[syncer_delayno] 869 * 870 * A delay of fifteen seconds is done by placing the request fifteen 871 * entries later in the queue: 872 * 873 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 874 * 875 */ 876 877/* 878 * Add an item to the syncer work queue. 879 */ 880void 881vn_syncer_add_to_worklist(vp, delay) 882 struct vnode *vp; 883 int delay; 884{ 885 int s, slot; 886 887 s = splbio(); 888 889 if (vp->v_flag & VONWORKLST) { 890 LIST_REMOVE(vp, v_synclist); 891 } 892 893 if (delay > syncer_maxdelay - 2) 894 delay = syncer_maxdelay - 2; 895 slot = (syncer_delayno + delay) & syncer_mask; 896 897 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 898 vp->v_flag |= VONWORKLST; 899 splx(s); 900} 901 902static void sched_sync __P((void)); 903static struct proc *updateproc; 904static struct kproc_desc up_kp = { 905 "syncer", 906 sched_sync, 907 &updateproc 908}; 909SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 910 911/* 912 * System filesystem synchronizer daemon. 913 */ 914void 915sched_sync(void) 916{ 917 struct synclist *slp; 918 struct vnode *vp; 919 long starttime; 920 int s; 921 struct proc *p = updateproc; 922 923 for (;;) { 924 starttime = time.tv_sec; 925 926 /* 927 * Push files whose dirty time has expired. 928 */ 929 s = splbio(); 930 slp = &syncer_workitem_pending[syncer_delayno]; 931 syncer_delayno += 1; 932 if (syncer_delayno == syncer_maxdelay) 933 syncer_delayno = 0; 934 splx(s); 935 936 while ((vp = LIST_FIRST(slp)) != NULL) { 937 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 938 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 939 VOP_UNLOCK(vp, 0, p); 940 if (LIST_FIRST(slp) == vp) { 941 if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL && 942 vp->v_type != VBLK) 943 panic("sched_sync: fsync failed"); 944 /* 945 * Move ourselves to the back of the sync list. 946 */ 947 LIST_REMOVE(vp, v_synclist); 948 vn_syncer_add_to_worklist(vp, syncdelay); 949 } 950 } 951 952 /* 953 * Do soft update processing. 954 */ 955 if (bioops.io_sync) 956 (*bioops.io_sync)(NULL); 957 958 /* 959 * The variable rushjob allows the kernel to speed up the 960 * processing of the filesystem syncer process. A rushjob 961 * value of N tells the filesystem syncer to process the next 962 * N seconds worth of work on its queue ASAP. Currently rushjob 963 * is used by the soft update code to speed up the filesystem 964 * syncer process when the incore state is getting so far 965 * ahead of the disk that the kernel memory pool is being 966 * threatened with exhaustion. 967 */ 968 if (rushjob > 0) { 969 rushjob -= 1; 970 continue; 971 } 972 /* 973 * If it has taken us less than a second to process the 974 * current work, then wait. Otherwise start right over 975 * again. We can still lose time if any single round 976 * takes more than two seconds, but it does not really 977 * matter as we are just trying to generally pace the 978 * filesystem activity. 979 */ 980 if (time.tv_sec == starttime) 981 tsleep(&lbolt, PPAUSE, "syncer", 0); 982 } 983} 984 985/* 986 * Associate a p-buffer with a vnode. 987 */ 988void 989pbgetvp(vp, bp) 990 register struct vnode *vp; 991 register struct buf *bp; 992{ 993#if defined(DIAGNOSTIC) 994 if (bp->b_vp) 995 panic("pbgetvp: not free"); 996#endif 997 bp->b_vp = vp; 998 if (vp->v_type == VBLK || vp->v_type == VCHR) 999 bp->b_dev = vp->v_rdev; 1000 else 1001 bp->b_dev = NODEV; 1002} 1003 1004/* 1005 * Disassociate a p-buffer from a vnode. 1006 */ 1007void 1008pbrelvp(bp) 1009 register struct buf *bp; 1010{ 1011 1012#if defined(DIAGNOSTIC) 1013 if (bp->b_vp == (struct vnode *) 0) 1014 panic("pbrelvp: NULL"); 1015#endif 1016 1017 bp->b_vp = (struct vnode *) 0; 1018} 1019 1020/* 1021 * Reassign a buffer from one vnode to another. 1022 * Used to assign file specific control information 1023 * (indirect blocks) to the vnode to which they belong. 1024 */ 1025void 1026reassignbuf(bp, newvp) 1027 register struct buf *bp; 1028 register struct vnode *newvp; 1029{ 1030 struct buflists *listheadp; 1031 int delay; 1032 int s; 1033 1034 if (newvp == NULL) { 1035 printf("reassignbuf: NULL"); 1036 return; 1037 } 1038 1039 s = splbio(); 1040 /* 1041 * Delete from old vnode list, if on one. 1042 */ 1043 if (bp->b_vnbufs.le_next != NOLIST) { 1044 bufremvn(bp); 1045 vdrop(bp->b_vp); 1046 } 1047 /* 1048 * If dirty, put on list of dirty buffers; otherwise insert onto list 1049 * of clean buffers. 1050 */ 1051 if (bp->b_flags & B_DELWRI) { 1052 struct buf *tbp; 1053 1054 listheadp = &newvp->v_dirtyblkhd; 1055 if ((newvp->v_flag & VONWORKLST) == 0) { 1056 switch (newvp->v_type) { 1057 case VDIR: 1058 delay = syncdelay / 3; 1059 break; 1060 case VBLK: 1061 if (newvp->v_specmountpoint != NULL) { 1062 delay = syncdelay / 2; 1063 break; 1064 } 1065 /* fall through */ 1066 default: 1067 delay = syncdelay; 1068 } 1069 vn_syncer_add_to_worklist(newvp, delay); 1070 } 1071 tbp = listheadp->lh_first; 1072 if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) { 1073 bufinsvn(bp, listheadp); 1074 } else { 1075 while (tbp->b_vnbufs.le_next && 1076 (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { 1077 tbp = tbp->b_vnbufs.le_next; 1078 } 1079 LIST_INSERT_AFTER(tbp, bp, b_vnbufs); 1080 } 1081 } else { 1082 bufinsvn(bp, &newvp->v_cleanblkhd); 1083 if ((newvp->v_flag & VONWORKLST) && 1084 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 1085 newvp->v_flag &= ~VONWORKLST; 1086 LIST_REMOVE(newvp, v_synclist); 1087 } 1088 } 1089 bp->b_vp = newvp; 1090 vhold(bp->b_vp); 1091 splx(s); 1092} 1093 1094#ifndef DEVFS_ROOT 1095/* 1096 * Create a vnode for a block device. 1097 * Used for mounting the root file system. 1098 */ 1099int 1100bdevvp(dev, vpp) 1101 dev_t dev; 1102 struct vnode **vpp; 1103{ 1104 register struct vnode *vp; 1105 struct vnode *nvp; 1106 int error; 1107 1108 if (dev == NODEV) 1109 return (0); 1110 error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp); 1111 if (error) { 1112 *vpp = 0; 1113 return (error); 1114 } 1115 vp = nvp; 1116 vp->v_type = VBLK; 1117 if ((nvp = checkalias(vp, dev, (struct mount *) 0))) { 1118 vput(vp); 1119 vp = nvp; 1120 } 1121 *vpp = vp; 1122 return (0); 1123} 1124#endif /* !DEVFS_ROOT */ 1125 1126/* 1127 * Check to see if the new vnode represents a special device 1128 * for which we already have a vnode (either because of 1129 * bdevvp() or because of a different vnode representing 1130 * the same block device). If such an alias exists, deallocate 1131 * the existing contents and return the aliased vnode. The 1132 * caller is responsible for filling it with its new contents. 1133 */ 1134struct vnode * 1135checkalias(nvp, nvp_rdev, mp) 1136 register struct vnode *nvp; 1137 dev_t nvp_rdev; 1138 struct mount *mp; 1139{ 1140 struct proc *p = curproc; /* XXX */ 1141 struct vnode *vp; 1142 struct vnode **vpp; 1143 1144 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1145 return (NULLVP); 1146 1147 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1148loop: 1149 simple_lock(&spechash_slock); 1150 for (vp = *vpp; vp; vp = vp->v_specnext) { 1151 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1152 continue; 1153 /* 1154 * Alias, but not in use, so flush it out. 1155 */ 1156 simple_lock(&vp->v_interlock); 1157 if (vp->v_usecount == 0) { 1158 simple_unlock(&spechash_slock); 1159 vgonel(vp, p); 1160 goto loop; 1161 } 1162 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 1163 simple_unlock(&spechash_slock); 1164 goto loop; 1165 } 1166 break; 1167 } 1168 if (vp == NULL || vp->v_tag != VT_NON) { 1169 MALLOC(nvp->v_specinfo, struct specinfo *, 1170 sizeof(struct specinfo), M_VNODE, M_WAITOK); 1171 nvp->v_rdev = nvp_rdev; 1172 nvp->v_hashchain = vpp; 1173 nvp->v_specnext = *vpp; 1174 nvp->v_specmountpoint = NULL; 1175 simple_unlock(&spechash_slock); 1176 *vpp = nvp; 1177 if (vp != NULLVP) { 1178 nvp->v_flag |= VALIASED; 1179 vp->v_flag |= VALIASED; 1180 vput(vp); 1181 } 1182 return (NULLVP); 1183 } 1184 simple_unlock(&spechash_slock); 1185 VOP_UNLOCK(vp, 0, p); 1186 simple_lock(&vp->v_interlock); 1187 vclean(vp, 0, p); 1188 vp->v_op = nvp->v_op; 1189 vp->v_tag = nvp->v_tag; 1190 nvp->v_type = VNON; 1191 insmntque(vp, mp); 1192 return (vp); 1193} 1194 1195/* 1196 * Grab a particular vnode from the free list, increment its 1197 * reference count and lock it. The vnode lock bit is set the 1198 * vnode is being eliminated in vgone. The process is awakened 1199 * when the transition is completed, and an error returned to 1200 * indicate that the vnode is no longer usable (possibly having 1201 * been changed to a new file system type). 1202 */ 1203int 1204vget(vp, flags, p) 1205 register struct vnode *vp; 1206 int flags; 1207 struct proc *p; 1208{ 1209 int error; 1210 1211 /* 1212 * If the vnode is in the process of being cleaned out for 1213 * another use, we wait for the cleaning to finish and then 1214 * return failure. Cleaning is determined by checking that 1215 * the VXLOCK flag is set. 1216 */ 1217 if ((flags & LK_INTERLOCK) == 0) { 1218 simple_lock(&vp->v_interlock); 1219 } 1220 if (vp->v_flag & VXLOCK) { 1221 vp->v_flag |= VXWANT; 1222 simple_unlock(&vp->v_interlock); 1223 tsleep((caddr_t)vp, PINOD, "vget", 0); 1224 return (ENOENT); 1225 } 1226 1227 vp->v_usecount++; 1228 1229 if (VSHOULDBUSY(vp)) 1230 vbusy(vp); 1231 if (flags & LK_TYPE_MASK) { 1232 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 1233 /* 1234 * must expand vrele here because we do not want 1235 * to call VOP_INACTIVE if the reference count 1236 * drops back to zero since it was never really 1237 * active. We must remove it from the free list 1238 * before sleeping so that multiple processes do 1239 * not try to recycle it. 1240 */ 1241 simple_lock(&vp->v_interlock); 1242 vp->v_usecount--; 1243 if (VSHOULDFREE(vp)) 1244 vfree(vp); 1245 simple_unlock(&vp->v_interlock); 1246 } 1247 return (error); 1248 } 1249 simple_unlock(&vp->v_interlock); 1250 return (0); 1251} 1252 1253void 1254vref(struct vnode *vp) 1255{ 1256 simple_lock(&vp->v_interlock); 1257 vp->v_usecount++; 1258 simple_unlock(&vp->v_interlock); 1259} 1260 1261/* 1262 * Vnode put/release. 1263 * If count drops to zero, call inactive routine and return to freelist. 1264 */ 1265void 1266vrele(vp) 1267 struct vnode *vp; 1268{ 1269 struct proc *p = curproc; /* XXX */ 1270 1271#ifdef DIAGNOSTIC 1272 if (vp == NULL) 1273 panic("vrele: null vp"); 1274#endif 1275 simple_lock(&vp->v_interlock); 1276 1277 if (vp->v_usecount > 1) { 1278 1279 vp->v_usecount--; 1280 simple_unlock(&vp->v_interlock); 1281 1282 return; 1283 } 1284 1285 if (vp->v_usecount == 1) { 1286 1287 vp->v_usecount--; 1288 1289 if (VSHOULDFREE(vp)) 1290 vfree(vp); 1291 /* 1292 * If we are doing a vput, the node is already locked, and we must 1293 * call VOP_INACTIVE with the node locked. So, in the case of 1294 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1295 */ 1296 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1297 VOP_INACTIVE(vp, p); 1298 } 1299 1300 } else { 1301#ifdef DIAGNOSTIC 1302 vprint("vrele: negative ref count", vp); 1303 simple_unlock(&vp->v_interlock); 1304#endif 1305 panic("vrele: negative ref cnt"); 1306 } 1307} 1308 1309void 1310vput(vp) 1311 struct vnode *vp; 1312{ 1313 struct proc *p = curproc; /* XXX */ 1314 1315#ifdef DIAGNOSTIC 1316 if (vp == NULL) 1317 panic("vput: null vp"); 1318#endif 1319 1320 simple_lock(&vp->v_interlock); 1321 1322 if (vp->v_usecount > 1) { 1323 1324 vp->v_usecount--; 1325 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1326 return; 1327 1328 } 1329 1330 if (vp->v_usecount == 1) { 1331 1332 vp->v_usecount--; 1333 if (VSHOULDFREE(vp)) 1334 vfree(vp); 1335 /* 1336 * If we are doing a vput, the node is already locked, and we must 1337 * call VOP_INACTIVE with the node locked. So, in the case of 1338 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1339 */ 1340 simple_unlock(&vp->v_interlock); 1341 VOP_INACTIVE(vp, p); 1342 1343 } else { 1344#ifdef DIAGNOSTIC 1345 vprint("vput: negative ref count", vp); 1346#endif 1347 panic("vput: negative ref cnt"); 1348 } 1349} 1350 1351/* 1352 * Somebody doesn't want the vnode recycled. 1353 */ 1354void 1355vhold(vp) 1356 register struct vnode *vp; 1357{ 1358 int s; 1359 1360 s = splbio(); 1361 vp->v_holdcnt++; 1362 if (VSHOULDBUSY(vp)) 1363 vbusy(vp); 1364 splx(s); 1365} 1366 1367/* 1368 * One less who cares about this vnode. 1369 */ 1370void 1371vdrop(vp) 1372 register struct vnode *vp; 1373{ 1374 int s; 1375 1376 s = splbio(); 1377 if (vp->v_holdcnt <= 0) 1378 panic("vdrop: holdcnt"); 1379 vp->v_holdcnt--; 1380 if (VSHOULDFREE(vp)) 1381 vfree(vp); 1382 splx(s); 1383} 1384 1385/* 1386 * Remove any vnodes in the vnode table belonging to mount point mp. 1387 * 1388 * If MNT_NOFORCE is specified, there should not be any active ones, 1389 * return error if any are found (nb: this is a user error, not a 1390 * system error). If MNT_FORCE is specified, detach any active vnodes 1391 * that are found. 1392 */ 1393#ifdef DIAGNOSTIC 1394static int busyprt = 0; /* print out busy vnodes */ 1395SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1396#endif 1397 1398int 1399vflush(mp, skipvp, flags) 1400 struct mount *mp; 1401 struct vnode *skipvp; 1402 int flags; 1403{ 1404 struct proc *p = curproc; /* XXX */ 1405 struct vnode *vp, *nvp; 1406 int busy = 0; 1407 1408 simple_lock(&mntvnode_slock); 1409loop: 1410 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1411 /* 1412 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1413 * Start over if it has (it won't be on the list anymore). 1414 */ 1415 if (vp->v_mount != mp) 1416 goto loop; 1417 nvp = vp->v_mntvnodes.le_next; 1418 /* 1419 * Skip over a selected vnode. 1420 */ 1421 if (vp == skipvp) 1422 continue; 1423 1424 simple_lock(&vp->v_interlock); 1425 /* 1426 * Skip over a vnodes marked VSYSTEM. 1427 */ 1428 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1429 simple_unlock(&vp->v_interlock); 1430 continue; 1431 } 1432 /* 1433 * If WRITECLOSE is set, only flush out regular file vnodes 1434 * open for writing. 1435 */ 1436 if ((flags & WRITECLOSE) && 1437 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1438 simple_unlock(&vp->v_interlock); 1439 continue; 1440 } 1441 1442 /* 1443 * With v_usecount == 0, all we need to do is clear out the 1444 * vnode data structures and we are done. 1445 */ 1446 if (vp->v_usecount == 0) { 1447 simple_unlock(&mntvnode_slock); 1448 vgonel(vp, p); 1449 simple_lock(&mntvnode_slock); 1450 continue; 1451 } 1452 1453 /* 1454 * If FORCECLOSE is set, forcibly close the vnode. For block 1455 * or character devices, revert to an anonymous device. For 1456 * all other files, just kill them. 1457 */ 1458 if (flags & FORCECLOSE) { 1459 simple_unlock(&mntvnode_slock); 1460 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1461 vgonel(vp, p); 1462 } else { 1463 vclean(vp, 0, p); 1464 vp->v_op = spec_vnodeop_p; 1465 insmntque(vp, (struct mount *) 0); 1466 } 1467 simple_lock(&mntvnode_slock); 1468 continue; 1469 } 1470#ifdef DIAGNOSTIC 1471 if (busyprt) 1472 vprint("vflush: busy vnode", vp); 1473#endif 1474 simple_unlock(&vp->v_interlock); 1475 busy++; 1476 } 1477 simple_unlock(&mntvnode_slock); 1478 if (busy) 1479 return (EBUSY); 1480 return (0); 1481} 1482 1483/* 1484 * Disassociate the underlying file system from a vnode. 1485 */ 1486static void 1487vclean(vp, flags, p) 1488 struct vnode *vp; 1489 int flags; 1490 struct proc *p; 1491{ 1492 int active; 1493 vm_object_t obj; 1494 1495 /* 1496 * Check to see if the vnode is in use. If so we have to reference it 1497 * before we clean it out so that its count cannot fall to zero and 1498 * generate a race against ourselves to recycle it. 1499 */ 1500 if ((active = vp->v_usecount)) 1501 vp->v_usecount++; 1502 1503 /* 1504 * Prevent the vnode from being recycled or brought into use while we 1505 * clean it out. 1506 */ 1507 if (vp->v_flag & VXLOCK) 1508 panic("vclean: deadlock"); 1509 vp->v_flag |= VXLOCK; 1510 /* 1511 * Even if the count is zero, the VOP_INACTIVE routine may still 1512 * have the object locked while it cleans it out. The VOP_LOCK 1513 * ensures that the VOP_INACTIVE routine is done with its work. 1514 * For active vnodes, it ensures that no other activity can 1515 * occur while the underlying object is being cleaned out. 1516 */ 1517 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1518 1519 /* 1520 * Clean out any buffers associated with the vnode. 1521 */ 1522 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1523 if (obj = vp->v_object) { 1524 if (obj->ref_count == 0) { 1525 /* 1526 * This is a normal way of shutting down the object/vnode 1527 * association. 1528 */ 1529 vm_object_terminate(obj); 1530 } else { 1531 /* 1532 * Woe to the process that tries to page now :-). 1533 */ 1534 vm_pager_deallocate(obj); 1535 } 1536 } 1537 1538 /* 1539 * If purging an active vnode, it must be closed and 1540 * deactivated before being reclaimed. Note that the 1541 * VOP_INACTIVE will unlock the vnode. 1542 */ 1543 if (active) { 1544 if (flags & DOCLOSE) 1545 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); 1546 VOP_INACTIVE(vp, p); 1547 } else { 1548 /* 1549 * Any other processes trying to obtain this lock must first 1550 * wait for VXLOCK to clear, then call the new lock operation. 1551 */ 1552 VOP_UNLOCK(vp, 0, p); 1553 } 1554 /* 1555 * Reclaim the vnode. 1556 */ 1557 if (VOP_RECLAIM(vp, p)) 1558 panic("vclean: cannot reclaim"); 1559 1560 if (active) 1561 vrele(vp); 1562 1563 cache_purge(vp); 1564 if (vp->v_vnlock) { 1565#if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */ 1566#ifdef DIAGNOSTIC 1567 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1568 vprint("vclean: lock not drained", vp); 1569#endif 1570#endif 1571 FREE(vp->v_vnlock, M_VNODE); 1572 vp->v_vnlock = NULL; 1573 } 1574 1575 if (VSHOULDFREE(vp)) 1576 vfree(vp); 1577 1578 /* 1579 * Done with purge, notify sleepers of the grim news. 1580 */ 1581 vp->v_op = dead_vnodeop_p; 1582 vn_pollgone(vp); 1583 vp->v_tag = VT_NON; 1584 vp->v_flag &= ~VXLOCK; 1585 if (vp->v_flag & VXWANT) { 1586 vp->v_flag &= ~VXWANT; 1587 wakeup((caddr_t) vp); 1588 } 1589} 1590 1591/* 1592 * Eliminate all activity associated with the requested vnode 1593 * and with all vnodes aliased to the requested vnode. 1594 */ 1595int 1596vop_revoke(ap) 1597 struct vop_revoke_args /* { 1598 struct vnode *a_vp; 1599 int a_flags; 1600 } */ *ap; 1601{ 1602 struct vnode *vp, *vq; 1603 struct proc *p = curproc; /* XXX */ 1604 1605#ifdef DIAGNOSTIC 1606 if ((ap->a_flags & REVOKEALL) == 0) 1607 panic("vop_revoke"); 1608#endif 1609 1610 vp = ap->a_vp; 1611 simple_lock(&vp->v_interlock); 1612 1613 if (vp->v_flag & VALIASED) { 1614 /* 1615 * If a vgone (or vclean) is already in progress, 1616 * wait until it is done and return. 1617 */ 1618 if (vp->v_flag & VXLOCK) { 1619 vp->v_flag |= VXWANT; 1620 simple_unlock(&vp->v_interlock); 1621 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1622 return (0); 1623 } 1624 /* 1625 * Ensure that vp will not be vgone'd while we 1626 * are eliminating its aliases. 1627 */ 1628 vp->v_flag |= VXLOCK; 1629 simple_unlock(&vp->v_interlock); 1630 while (vp->v_flag & VALIASED) { 1631 simple_lock(&spechash_slock); 1632 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1633 if (vq->v_rdev != vp->v_rdev || 1634 vq->v_type != vp->v_type || vp == vq) 1635 continue; 1636 simple_unlock(&spechash_slock); 1637 vgone(vq); 1638 break; 1639 } 1640 if (vq == NULLVP) { 1641 simple_unlock(&spechash_slock); 1642 } 1643 } 1644 /* 1645 * Remove the lock so that vgone below will 1646 * really eliminate the vnode after which time 1647 * vgone will awaken any sleepers. 1648 */ 1649 simple_lock(&vp->v_interlock); 1650 vp->v_flag &= ~VXLOCK; 1651 if (vp->v_flag & VXWANT) { 1652 vp->v_flag &= ~VXWANT; 1653 wakeup(vp); 1654 } 1655 } 1656 vgonel(vp, p); 1657 return (0); 1658} 1659 1660/* 1661 * Recycle an unused vnode to the front of the free list. 1662 * Release the passed interlock if the vnode will be recycled. 1663 */ 1664int 1665vrecycle(vp, inter_lkp, p) 1666 struct vnode *vp; 1667 struct simplelock *inter_lkp; 1668 struct proc *p; 1669{ 1670 1671 simple_lock(&vp->v_interlock); 1672 if (vp->v_usecount == 0) { 1673 if (inter_lkp) { 1674 simple_unlock(inter_lkp); 1675 } 1676 vgonel(vp, p); 1677 return (1); 1678 } 1679 simple_unlock(&vp->v_interlock); 1680 return (0); 1681} 1682 1683/* 1684 * Eliminate all activity associated with a vnode 1685 * in preparation for reuse. 1686 */ 1687void 1688vgone(vp) 1689 register struct vnode *vp; 1690{ 1691 struct proc *p = curproc; /* XXX */ 1692 1693 simple_lock(&vp->v_interlock); 1694 vgonel(vp, p); 1695} 1696 1697/* 1698 * vgone, with the vp interlock held. 1699 */ 1700static void 1701vgonel(vp, p) 1702 struct vnode *vp; 1703 struct proc *p; 1704{ 1705 int s; 1706 struct vnode *vq; 1707 struct vnode *vx; 1708 1709 /* 1710 * If a vgone (or vclean) is already in progress, 1711 * wait until it is done and return. 1712 */ 1713 if (vp->v_flag & VXLOCK) { 1714 vp->v_flag |= VXWANT; 1715 simple_unlock(&vp->v_interlock); 1716 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1717 return; 1718 } 1719 1720 /* 1721 * Clean out the filesystem specific data. 1722 */ 1723 vclean(vp, DOCLOSE, p); 1724 simple_lock(&vp->v_interlock); 1725 1726 /* 1727 * Delete from old mount point vnode list, if on one. 1728 */ 1729 if (vp->v_mount != NULL) 1730 insmntque(vp, (struct mount *)0); 1731 /* 1732 * If special device, remove it from special device alias list 1733 * if it is on one. 1734 */ 1735 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1736 simple_lock(&spechash_slock); 1737 if (*vp->v_hashchain == vp) { 1738 *vp->v_hashchain = vp->v_specnext; 1739 } else { 1740 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1741 if (vq->v_specnext != vp) 1742 continue; 1743 vq->v_specnext = vp->v_specnext; 1744 break; 1745 } 1746 if (vq == NULL) 1747 panic("missing bdev"); 1748 } 1749 if (vp->v_flag & VALIASED) { 1750 vx = NULL; 1751 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1752 if (vq->v_rdev != vp->v_rdev || 1753 vq->v_type != vp->v_type) 1754 continue; 1755 if (vx) 1756 break; 1757 vx = vq; 1758 } 1759 if (vx == NULL) 1760 panic("missing alias"); 1761 if (vq == NULL) 1762 vx->v_flag &= ~VALIASED; 1763 vp->v_flag &= ~VALIASED; 1764 } 1765 simple_unlock(&spechash_slock); 1766 FREE(vp->v_specinfo, M_VNODE); 1767 vp->v_specinfo = NULL; 1768 } 1769 1770 /* 1771 * If it is on the freelist and not already at the head, 1772 * move it to the head of the list. The test of the back 1773 * pointer and the reference count of zero is because 1774 * it will be removed from the free list by getnewvnode, 1775 * but will not have its reference count incremented until 1776 * after calling vgone. If the reference count were 1777 * incremented first, vgone would (incorrectly) try to 1778 * close the previous instance of the underlying object. 1779 */ 1780 if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1781 s = splbio(); 1782 simple_lock(&vnode_free_list_slock); 1783 if (vp->v_flag & VFREE) { 1784 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1785 } else if (vp->v_flag & VTBFREE) { 1786 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1787 vp->v_flag &= ~VTBFREE; 1788 freevnodes++; 1789 } else 1790 freevnodes++; 1791 vp->v_flag |= VFREE; 1792 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1793 simple_unlock(&vnode_free_list_slock); 1794 splx(s); 1795 } 1796 1797 vp->v_type = VBAD; 1798 simple_unlock(&vp->v_interlock); 1799} 1800 1801/* 1802 * Lookup a vnode by device number. 1803 */ 1804int 1805vfinddev(dev, type, vpp) 1806 dev_t dev; 1807 enum vtype type; 1808 struct vnode **vpp; 1809{ 1810 register struct vnode *vp; 1811 int rc = 0; 1812 1813 simple_lock(&spechash_slock); 1814 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1815 if (dev != vp->v_rdev || type != vp->v_type) 1816 continue; 1817 *vpp = vp; 1818 rc = 1; 1819 break; 1820 } 1821 simple_unlock(&spechash_slock); 1822 return (rc); 1823} 1824 1825/* 1826 * Calculate the total number of references to a special device. 1827 */ 1828int 1829vcount(vp) 1830 register struct vnode *vp; 1831{ 1832 struct vnode *vq, *vnext; 1833 int count; 1834 1835loop: 1836 if ((vp->v_flag & VALIASED) == 0) 1837 return (vp->v_usecount); 1838 simple_lock(&spechash_slock); 1839 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1840 vnext = vq->v_specnext; 1841 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1842 continue; 1843 /* 1844 * Alias, but not in use, so flush it out. 1845 */ 1846 if (vq->v_usecount == 0 && vq != vp) { 1847 simple_unlock(&spechash_slock); 1848 vgone(vq); 1849 goto loop; 1850 } 1851 count += vq->v_usecount; 1852 } 1853 simple_unlock(&spechash_slock); 1854 return (count); 1855} 1856/* 1857 * Print out a description of a vnode. 1858 */ 1859static char *typename[] = 1860{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1861 1862void 1863vprint(label, vp) 1864 char *label; 1865 register struct vnode *vp; 1866{ 1867 char buf[64]; 1868 1869 if (label != NULL) 1870 printf("%s: %x: ", label, vp); 1871 else 1872 printf("%x: ", vp); 1873 printf("type %s, usecount %d, writecount %d, refcount %ld,", 1874 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1875 vp->v_holdcnt); 1876 buf[0] = '\0'; 1877 if (vp->v_flag & VROOT) 1878 strcat(buf, "|VROOT"); 1879 if (vp->v_flag & VTEXT) 1880 strcat(buf, "|VTEXT"); 1881 if (vp->v_flag & VSYSTEM) 1882 strcat(buf, "|VSYSTEM"); 1883 if (vp->v_flag & VXLOCK) 1884 strcat(buf, "|VXLOCK"); 1885 if (vp->v_flag & VXWANT) 1886 strcat(buf, "|VXWANT"); 1887 if (vp->v_flag & VBWAIT) 1888 strcat(buf, "|VBWAIT"); 1889 if (vp->v_flag & VALIASED) 1890 strcat(buf, "|VALIASED"); 1891 if (vp->v_flag & VDOOMED) 1892 strcat(buf, "|VDOOMED"); 1893 if (vp->v_flag & VFREE) 1894 strcat(buf, "|VFREE"); 1895 if (vp->v_flag & VOBJBUF) 1896 strcat(buf, "|VOBJBUF"); 1897 if (buf[0] != '\0') 1898 printf(" flags (%s)", &buf[1]); 1899 if (vp->v_data == NULL) { 1900 printf("\n"); 1901 } else { 1902 printf("\n\t"); 1903 VOP_PRINT(vp); 1904 } 1905} 1906 1907#ifdef DDB 1908/* 1909 * List all of the locked vnodes in the system. 1910 * Called when debugging the kernel. 1911 */ 1912static void 1913printlockedvnodes() 1914{ 1915 struct proc *p = curproc; /* XXX */ 1916 struct mount *mp, *nmp; 1917 struct vnode *vp; 1918 1919 printf("Locked vnodes\n"); 1920 simple_lock(&mountlist_slock); 1921 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1922 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1923 nmp = mp->mnt_list.cqe_next; 1924 continue; 1925 } 1926 for (vp = mp->mnt_vnodelist.lh_first; 1927 vp != NULL; 1928 vp = vp->v_mntvnodes.le_next) { 1929 if (VOP_ISLOCKED(vp)) 1930 vprint((char *)0, vp); 1931 } 1932 simple_lock(&mountlist_slock); 1933 nmp = mp->mnt_list.cqe_next; 1934 vfs_unbusy(mp, p); 1935 } 1936 simple_unlock(&mountlist_slock); 1937} 1938#endif 1939 1940/* 1941 * Top level filesystem related information gathering. 1942 */ 1943static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 1944 1945static int 1946vfs_sysctl SYSCTL_HANDLER_ARGS 1947{ 1948 int *name = (int *)arg1 - 1; /* XXX */ 1949 u_int namelen = arg2 + 1; /* XXX */ 1950 struct vfsconf *vfsp; 1951 1952#ifndef NO_COMPAT_PRELITE2 1953 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 1954 if (namelen == 1) 1955 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 1956#endif 1957 1958#ifdef notyet 1959 /* all sysctl names at this level are at least name and field */ 1960 if (namelen < 2) 1961 return (ENOTDIR); /* overloaded */ 1962 if (name[0] != VFS_GENERIC) { 1963 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1964 if (vfsp->vfc_typenum == name[0]) 1965 break; 1966 if (vfsp == NULL) 1967 return (EOPNOTSUPP); 1968 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 1969 oldp, oldlenp, newp, newlen, p)); 1970 } 1971#endif 1972 switch (name[1]) { 1973 case VFS_MAXTYPENUM: 1974 if (namelen != 2) 1975 return (ENOTDIR); 1976 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 1977 case VFS_CONF: 1978 if (namelen != 3) 1979 return (ENOTDIR); /* overloaded */ 1980 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1981 if (vfsp->vfc_typenum == name[2]) 1982 break; 1983 if (vfsp == NULL) 1984 return (EOPNOTSUPP); 1985 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 1986 } 1987 return (EOPNOTSUPP); 1988} 1989 1990SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 1991 "Generic filesystem"); 1992 1993#ifndef NO_COMPAT_PRELITE2 1994 1995static int 1996sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 1997{ 1998 int error; 1999 struct vfsconf *vfsp; 2000 struct ovfsconf ovfs; 2001 2002 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2003 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 2004 strcpy(ovfs.vfc_name, vfsp->vfc_name); 2005 ovfs.vfc_index = vfsp->vfc_typenum; 2006 ovfs.vfc_refcount = vfsp->vfc_refcount; 2007 ovfs.vfc_flags = vfsp->vfc_flags; 2008 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2009 if (error) 2010 return error; 2011 } 2012 return 0; 2013} 2014 2015#endif /* !NO_COMPAT_PRELITE2 */ 2016 2017static volatile int kinfo_vdebug = 1; 2018 2019#if 0 2020#define KINFO_VNODESLOP 10 2021/* 2022 * Dump vnode list (via sysctl). 2023 * Copyout address of vnode followed by vnode. 2024 */ 2025/* ARGSUSED */ 2026static int 2027sysctl_vnode SYSCTL_HANDLER_ARGS 2028{ 2029 struct proc *p = curproc; /* XXX */ 2030 struct mount *mp, *nmp; 2031 struct vnode *nvp, *vp; 2032 int error; 2033 2034#define VPTRSZ sizeof (struct vnode *) 2035#define VNODESZ sizeof (struct vnode) 2036 2037 req->lock = 0; 2038 if (!req->oldptr) /* Make an estimate */ 2039 return (SYSCTL_OUT(req, 0, 2040 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2041 2042 simple_lock(&mountlist_slock); 2043 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 2044 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2045 nmp = mp->mnt_list.cqe_next; 2046 continue; 2047 } 2048again: 2049 simple_lock(&mntvnode_slock); 2050 for (vp = mp->mnt_vnodelist.lh_first; 2051 vp != NULL; 2052 vp = nvp) { 2053 /* 2054 * Check that the vp is still associated with 2055 * this filesystem. RACE: could have been 2056 * recycled onto the same filesystem. 2057 */ 2058 if (vp->v_mount != mp) { 2059 simple_unlock(&mntvnode_slock); 2060 if (kinfo_vdebug) 2061 printf("kinfo: vp changed\n"); 2062 goto again; 2063 } 2064 nvp = vp->v_mntvnodes.le_next; 2065 simple_unlock(&mntvnode_slock); 2066 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 2067 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2068 return (error); 2069 simple_lock(&mntvnode_slock); 2070 } 2071 simple_unlock(&mntvnode_slock); 2072 simple_lock(&mountlist_slock); 2073 nmp = mp->mnt_list.cqe_next; 2074 vfs_unbusy(mp, p); 2075 } 2076 simple_unlock(&mountlist_slock); 2077 2078 return (0); 2079} 2080#endif 2081 2082/* 2083 * XXX 2084 * Exporting the vnode list on large systems causes them to crash. 2085 * Exporting the vnode list on medium systems causes sysctl to coredump. 2086 */ 2087#if 0 2088SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 2089 0, 0, sysctl_vnode, "S,vnode", ""); 2090#endif 2091 2092/* 2093 * Check to see if a filesystem is mounted on a block device. 2094 */ 2095int 2096vfs_mountedon(vp) 2097 struct vnode *vp; 2098{ 2099 struct vnode *vq; 2100 int error = 0; 2101 2102 if (vp->v_specmountpoint != NULL) 2103 return (EBUSY); 2104 if (vp->v_flag & VALIASED) { 2105 simple_lock(&spechash_slock); 2106 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2107 if (vq->v_rdev != vp->v_rdev || 2108 vq->v_type != vp->v_type) 2109 continue; 2110 if (vq->v_specmountpoint != NULL) { 2111 error = EBUSY; 2112 break; 2113 } 2114 } 2115 simple_unlock(&spechash_slock); 2116 } 2117 return (error); 2118} 2119 2120/* 2121 * Unmount all filesystems. The list is traversed in reverse order 2122 * of mounting to avoid dependencies. 2123 */ 2124void 2125vfs_unmountall() 2126{ 2127 struct mount *mp, *nmp; 2128 struct proc *p = initproc; /* XXX XXX should this be proc0? */ 2129 int error; 2130 2131 /* 2132 * Since this only runs when rebooting, it is not interlocked. 2133 */ 2134 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2135 nmp = mp->mnt_list.cqe_prev; 2136 error = dounmount(mp, MNT_FORCE, p); 2137 if (error) { 2138 printf("unmount of %s failed (", 2139 mp->mnt_stat.f_mntonname); 2140 if (error == EBUSY) 2141 printf("BUSY)\n"); 2142 else 2143 printf("%d)\n", error); 2144 } 2145 } 2146} 2147 2148/* 2149 * Build hash lists of net addresses and hang them off the mount point. 2150 * Called by ufs_mount() to set up the lists of export addresses. 2151 */ 2152static int 2153vfs_hang_addrlist(mp, nep, argp) 2154 struct mount *mp; 2155 struct netexport *nep; 2156 struct export_args *argp; 2157{ 2158 register struct netcred *np; 2159 register struct radix_node_head *rnh; 2160 register int i; 2161 struct radix_node *rn; 2162 struct sockaddr *saddr, *smask = 0; 2163 struct domain *dom; 2164 int error; 2165 2166 if (argp->ex_addrlen == 0) { 2167 if (mp->mnt_flag & MNT_DEFEXPORTED) 2168 return (EPERM); 2169 np = &nep->ne_defexported; 2170 np->netc_exflags = argp->ex_flags; 2171 np->netc_anon = argp->ex_anon; 2172 np->netc_anon.cr_ref = 1; 2173 mp->mnt_flag |= MNT_DEFEXPORTED; 2174 return (0); 2175 } 2176 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2177 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 2178 bzero((caddr_t) np, i); 2179 saddr = (struct sockaddr *) (np + 1); 2180 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2181 goto out; 2182 if (saddr->sa_len > argp->ex_addrlen) 2183 saddr->sa_len = argp->ex_addrlen; 2184 if (argp->ex_masklen) { 2185 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 2186 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2187 if (error) 2188 goto out; 2189 if (smask->sa_len > argp->ex_masklen) 2190 smask->sa_len = argp->ex_masklen; 2191 } 2192 i = saddr->sa_family; 2193 if ((rnh = nep->ne_rtable[i]) == 0) { 2194 /* 2195 * Seems silly to initialize every AF when most are not used, 2196 * do so on demand here 2197 */ 2198 for (dom = domains; dom; dom = dom->dom_next) 2199 if (dom->dom_family == i && dom->dom_rtattach) { 2200 dom->dom_rtattach((void **) &nep->ne_rtable[i], 2201 dom->dom_rtoffset); 2202 break; 2203 } 2204 if ((rnh = nep->ne_rtable[i]) == 0) { 2205 error = ENOBUFS; 2206 goto out; 2207 } 2208 } 2209 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2210 np->netc_rnodes); 2211 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 2212 error = EPERM; 2213 goto out; 2214 } 2215 np->netc_exflags = argp->ex_flags; 2216 np->netc_anon = argp->ex_anon; 2217 np->netc_anon.cr_ref = 1; 2218 return (0); 2219out: 2220 free(np, M_NETADDR); 2221 return (error); 2222} 2223 2224/* ARGSUSED */ 2225static int 2226vfs_free_netcred(rn, w) 2227 struct radix_node *rn; 2228 void *w; 2229{ 2230 register struct radix_node_head *rnh = (struct radix_node_head *) w; 2231 2232 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2233 free((caddr_t) rn, M_NETADDR); 2234 return (0); 2235} 2236 2237/* 2238 * Free the net address hash lists that are hanging off the mount points. 2239 */ 2240static void 2241vfs_free_addrlist(nep) 2242 struct netexport *nep; 2243{ 2244 register int i; 2245 register struct radix_node_head *rnh; 2246 2247 for (i = 0; i <= AF_MAX; i++) 2248 if ((rnh = nep->ne_rtable[i])) { 2249 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2250 (caddr_t) rnh); 2251 free((caddr_t) rnh, M_RTABLE); 2252 nep->ne_rtable[i] = 0; 2253 } 2254} 2255 2256int 2257vfs_export(mp, nep, argp) 2258 struct mount *mp; 2259 struct netexport *nep; 2260 struct export_args *argp; 2261{ 2262 int error; 2263 2264 if (argp->ex_flags & MNT_DELEXPORT) { 2265 if (mp->mnt_flag & MNT_EXPUBLIC) { 2266 vfs_setpublicfs(NULL, NULL, NULL); 2267 mp->mnt_flag &= ~MNT_EXPUBLIC; 2268 } 2269 vfs_free_addrlist(nep); 2270 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2271 } 2272 if (argp->ex_flags & MNT_EXPORTED) { 2273 if (argp->ex_flags & MNT_EXPUBLIC) { 2274 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2275 return (error); 2276 mp->mnt_flag |= MNT_EXPUBLIC; 2277 } 2278 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2279 return (error); 2280 mp->mnt_flag |= MNT_EXPORTED; 2281 } 2282 return (0); 2283} 2284 2285 2286/* 2287 * Set the publicly exported filesystem (WebNFS). Currently, only 2288 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2289 */ 2290int 2291vfs_setpublicfs(mp, nep, argp) 2292 struct mount *mp; 2293 struct netexport *nep; 2294 struct export_args *argp; 2295{ 2296 int error; 2297 struct vnode *rvp; 2298 char *cp; 2299 2300 /* 2301 * mp == NULL -> invalidate the current info, the FS is 2302 * no longer exported. May be called from either vfs_export 2303 * or unmount, so check if it hasn't already been done. 2304 */ 2305 if (mp == NULL) { 2306 if (nfs_pub.np_valid) { 2307 nfs_pub.np_valid = 0; 2308 if (nfs_pub.np_index != NULL) { 2309 FREE(nfs_pub.np_index, M_TEMP); 2310 nfs_pub.np_index = NULL; 2311 } 2312 } 2313 return (0); 2314 } 2315 2316 /* 2317 * Only one allowed at a time. 2318 */ 2319 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2320 return (EBUSY); 2321 2322 /* 2323 * Get real filehandle for root of exported FS. 2324 */ 2325 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2326 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2327 2328 if ((error = VFS_ROOT(mp, &rvp))) 2329 return (error); 2330 2331 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2332 return (error); 2333 2334 vput(rvp); 2335 2336 /* 2337 * If an indexfile was specified, pull it in. 2338 */ 2339 if (argp->ex_indexfile != NULL) { 2340 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2341 M_WAITOK); 2342 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2343 MAXNAMLEN, (size_t *)0); 2344 if (!error) { 2345 /* 2346 * Check for illegal filenames. 2347 */ 2348 for (cp = nfs_pub.np_index; *cp; cp++) { 2349 if (*cp == '/') { 2350 error = EINVAL; 2351 break; 2352 } 2353 } 2354 } 2355 if (error) { 2356 FREE(nfs_pub.np_index, M_TEMP); 2357 return (error); 2358 } 2359 } 2360 2361 nfs_pub.np_mount = mp; 2362 nfs_pub.np_valid = 1; 2363 return (0); 2364} 2365 2366struct netcred * 2367vfs_export_lookup(mp, nep, nam) 2368 register struct mount *mp; 2369 struct netexport *nep; 2370 struct sockaddr *nam; 2371{ 2372 register struct netcred *np; 2373 register struct radix_node_head *rnh; 2374 struct sockaddr *saddr; 2375 2376 np = NULL; 2377 if (mp->mnt_flag & MNT_EXPORTED) { 2378 /* 2379 * Lookup in the export list first. 2380 */ 2381 if (nam != NULL) { 2382 saddr = nam; 2383 rnh = nep->ne_rtable[saddr->sa_family]; 2384 if (rnh != NULL) { 2385 np = (struct netcred *) 2386 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2387 rnh); 2388 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2389 np = NULL; 2390 } 2391 } 2392 /* 2393 * If no address match, use the default if it exists. 2394 */ 2395 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2396 np = &nep->ne_defexported; 2397 } 2398 return (np); 2399} 2400 2401/* 2402 * perform msync on all vnodes under a mount point 2403 * the mount point must be locked. 2404 */ 2405void 2406vfs_msync(struct mount *mp, int flags) { 2407 struct vnode *vp, *nvp; 2408 int anyio, tries; 2409 2410 tries = 5; 2411loop: 2412 anyio = 0; 2413 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2414 2415 nvp = vp->v_mntvnodes.le_next; 2416 2417 if (vp->v_mount != mp) { 2418 goto loop; 2419 } 2420 2421 if ((vp->v_flag & VXLOCK) || 2422 (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))) { 2423 continue; 2424 } 2425 2426 simple_lock(&vp->v_interlock); 2427 if (vp->v_object && 2428 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2429 if (!vget(vp, 2430 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 2431 if (vp->v_object) { 2432 vm_object_page_clean(vp->v_object, 0, 0, TRUE); 2433 anyio = 1; 2434 } 2435 vput(vp); 2436 } 2437 } else { 2438 simple_unlock(&vp->v_interlock); 2439 } 2440 } 2441 if (anyio && (--tries > 0)) 2442 goto loop; 2443} 2444 2445/* 2446 * Create the VM object needed for VMIO and mmap support. This 2447 * is done for all VREG files in the system. Some filesystems might 2448 * afford the additional metadata buffering capability of the 2449 * VMIO code by making the device node be VMIO mode also. 2450 * 2451 * If !waslocked, must be called with interlock. 2452 */ 2453int 2454vfs_object_create(vp, p, cred, waslocked) 2455 struct vnode *vp; 2456 struct proc *p; 2457 struct ucred *cred; 2458 int waslocked; 2459{ 2460 struct vattr vat; 2461 vm_object_t object; 2462 int error = 0; 2463 2464 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) { 2465 if (!waslocked) 2466 simple_unlock(&vp->v_interlock); 2467 return 0; 2468 } 2469 2470 if (!waslocked) 2471 vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, p); 2472 2473retry: 2474 if ((object = vp->v_object) == NULL) { 2475 if (vp->v_type == VREG) { 2476 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2477 goto retn; 2478 object = vnode_pager_alloc(vp, 2479 OFF_TO_IDX(round_page(vat.va_size)), 0, 0); 2480 } else if (major(vp->v_rdev) < nblkdev) { 2481 /* 2482 * This simply allocates the biggest object possible 2483 * for a VBLK vnode. This should be fixed, but doesn't 2484 * cause any problems (yet). 2485 */ 2486 object = vnode_pager_alloc(vp, INT_MAX, 0, 0); 2487 } 2488 object->ref_count--; 2489 vp->v_usecount--; 2490 } else { 2491 if (object->flags & OBJ_DEAD) { 2492 VOP_UNLOCK(vp, 0, p); 2493 tsleep(object, PVM, "vodead", 0); 2494 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2495 goto retry; 2496 } 2497 } 2498 2499 if (vp->v_object) { 2500 vp->v_flag |= VOBJBUF; 2501 } 2502 2503retn: 2504 if (!waslocked) { 2505 simple_lock(&vp->v_interlock); 2506 VOP_UNLOCK(vp, LK_INTERLOCK, p); 2507 } 2508 2509 return error; 2510} 2511 2512static void 2513vfree(vp) 2514 struct vnode *vp; 2515{ 2516 int s; 2517 2518 s = splbio(); 2519 simple_lock(&vnode_free_list_slock); 2520 if (vp->v_flag & VTBFREE) { 2521 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2522 vp->v_flag &= ~VTBFREE; 2523 } 2524 if (vp->v_flag & VAGE) { 2525 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2526 } else { 2527 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2528 } 2529 freevnodes++; 2530 simple_unlock(&vnode_free_list_slock); 2531 vp->v_flag &= ~VAGE; 2532 vp->v_flag |= VFREE; 2533 splx(s); 2534} 2535 2536void 2537vbusy(vp) 2538 struct vnode *vp; 2539{ 2540 int s; 2541 2542 s = splbio(); 2543 simple_lock(&vnode_free_list_slock); 2544 if (vp->v_flag & VTBFREE) { 2545 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2546 vp->v_flag &= ~VTBFREE; 2547 } else { 2548 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2549 freevnodes--; 2550 } 2551 simple_unlock(&vnode_free_list_slock); 2552 vp->v_flag &= ~(VFREE|VAGE); 2553 splx(s); 2554} 2555 2556/* 2557 * Record a process's interest in events which might happen to 2558 * a vnode. Because poll uses the historic select-style interface 2559 * internally, this routine serves as both the ``check for any 2560 * pending events'' and the ``record my interest in future events'' 2561 * functions. (These are done together, while the lock is held, 2562 * to avoid race conditions.) 2563 */ 2564int 2565vn_pollrecord(vp, p, events) 2566 struct vnode *vp; 2567 struct proc *p; 2568 short events; 2569{ 2570 simple_lock(&vp->v_pollinfo.vpi_lock); 2571 if (vp->v_pollinfo.vpi_revents & events) { 2572 /* 2573 * This leaves events we are not interested 2574 * in available for the other process which 2575 * which presumably had requested them 2576 * (otherwise they would never have been 2577 * recorded). 2578 */ 2579 events &= vp->v_pollinfo.vpi_revents; 2580 vp->v_pollinfo.vpi_revents &= ~events; 2581 2582 simple_unlock(&vp->v_pollinfo.vpi_lock); 2583 return events; 2584 } 2585 vp->v_pollinfo.vpi_events |= events; 2586 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2587 simple_unlock(&vp->v_pollinfo.vpi_lock); 2588 return 0; 2589} 2590 2591/* 2592 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2593 * it is possible for us to miss an event due to race conditions, but 2594 * that condition is expected to be rare, so for the moment it is the 2595 * preferred interface. 2596 */ 2597void 2598vn_pollevent(vp, events) 2599 struct vnode *vp; 2600 short events; 2601{ 2602 simple_lock(&vp->v_pollinfo.vpi_lock); 2603 if (vp->v_pollinfo.vpi_events & events) { 2604 /* 2605 * We clear vpi_events so that we don't 2606 * call selwakeup() twice if two events are 2607 * posted before the polling process(es) is 2608 * awakened. This also ensures that we take at 2609 * most one selwakeup() if the polling process 2610 * is no longer interested. However, it does 2611 * mean that only one event can be noticed at 2612 * a time. (Perhaps we should only clear those 2613 * event bits which we note?) XXX 2614 */ 2615 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 2616 vp->v_pollinfo.vpi_revents |= events; 2617 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2618 } 2619 simple_unlock(&vp->v_pollinfo.vpi_lock); 2620} 2621 2622/* 2623 * Wake up anyone polling on vp because it is being revoked. 2624 * This depends on dead_poll() returning POLLHUP for correct 2625 * behavior. 2626 */ 2627void 2628vn_pollgone(vp) 2629 struct vnode *vp; 2630{ 2631 simple_lock(&vp->v_pollinfo.vpi_lock); 2632 if (vp->v_pollinfo.vpi_events) { 2633 vp->v_pollinfo.vpi_events = 0; 2634 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2635 } 2636 simple_unlock(&vp->v_pollinfo.vpi_lock); 2637} 2638 2639 2640 2641/* 2642 * Routine to create and manage a filesystem syncer vnode. 2643 */ 2644#define sync_close ((int (*) __P((struct vop_close_args *)))nullop) 2645int sync_fsync __P((struct vop_fsync_args *)); 2646int sync_inactive __P((struct vop_inactive_args *)); 2647int sync_reclaim __P((struct vop_reclaim_args *)); 2648#define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) 2649#define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) 2650int sync_print __P((struct vop_print_args *)); 2651#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) 2652 2653vop_t **sync_vnodeop_p; 2654struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2655 { &vop_default_desc, (vop_t *) vop_eopnotsupp }, 2656 { &vop_close_desc, (vop_t *) sync_close }, /* close */ 2657 { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ 2658 { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ 2659 { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ 2660 { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ 2661 { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ 2662 { &vop_print_desc, (vop_t *) sync_print }, /* print */ 2663 { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ 2664 { NULL, NULL } 2665}; 2666struct vnodeopv_desc sync_vnodeop_opv_desc = 2667 { &sync_vnodeop_p, sync_vnodeop_entries }; 2668 2669VNODEOP_SET(sync_vnodeop_opv_desc); 2670 2671/* 2672 * Create a new filesystem syncer vnode for the specified mount point. 2673 */ 2674int 2675vfs_allocate_syncvnode(mp) 2676 struct mount *mp; 2677{ 2678 struct vnode *vp; 2679 static long start, incr, next; 2680 int error; 2681 2682 /* Allocate a new vnode */ 2683 if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2684 mp->mnt_syncer = NULL; 2685 return (error); 2686 } 2687 vp->v_type = VNON; 2688 /* 2689 * Place the vnode onto the syncer worklist. We attempt to 2690 * scatter them about on the list so that they will go off 2691 * at evenly distributed times even if all the filesystems 2692 * are mounted at once. 2693 */ 2694 next += incr; 2695 if (next == 0 || next > syncer_maxdelay) { 2696 start /= 2; 2697 incr /= 2; 2698 if (start == 0) { 2699 start = syncer_maxdelay / 2; 2700 incr = syncer_maxdelay; 2701 } 2702 next = start; 2703 } 2704 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2705 mp->mnt_syncer = vp; 2706 return (0); 2707} 2708 2709/* 2710 * Do a lazy sync of the filesystem. 2711 */ 2712int 2713sync_fsync(ap) 2714 struct vop_fsync_args /* { 2715 struct vnode *a_vp; 2716 struct ucred *a_cred; 2717 int a_waitfor; 2718 struct proc *a_p; 2719 } */ *ap; 2720{ 2721 struct vnode *syncvp = ap->a_vp; 2722 struct mount *mp = syncvp->v_mount; 2723 struct proc *p = ap->a_p; 2724 int asyncflag; 2725 2726 /* 2727 * We only need to do something if this is a lazy evaluation. 2728 */ 2729 if (ap->a_waitfor != MNT_LAZY) 2730 return (0); 2731 2732 /* 2733 * Move ourselves to the back of the sync list. 2734 */ 2735 vn_syncer_add_to_worklist(syncvp, syncdelay); 2736 2737 /* 2738 * Walk the list of vnodes pushing all that are dirty and 2739 * not already on the sync list. 2740 */ 2741 simple_lock(&mountlist_slock); 2742 if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) 2743 return (0); 2744 asyncflag = mp->mnt_flag & MNT_ASYNC; 2745 mp->mnt_flag &= ~MNT_ASYNC; 2746 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2747 if (asyncflag) 2748 mp->mnt_flag |= MNT_ASYNC; 2749 vfs_unbusy(mp, p); 2750 return (0); 2751} 2752 2753/* 2754 * The syncer vnode is no referenced. 2755 */ 2756int 2757sync_inactive(ap) 2758 struct vop_inactive_args /* { 2759 struct vnode *a_vp; 2760 struct proc *a_p; 2761 } */ *ap; 2762{ 2763 2764 vgone(ap->a_vp); 2765 return (0); 2766} 2767 2768/* 2769 * The syncer vnode is no longer needed and is being decommissioned. 2770 */ 2771int 2772sync_reclaim(ap) 2773 struct vop_reclaim_args /* { 2774 struct vnode *a_vp; 2775 } */ *ap; 2776{ 2777 struct vnode *vp = ap->a_vp; 2778 2779 vp->v_mount->mnt_syncer = NULL; 2780 if (vp->v_flag & VONWORKLST) { 2781 LIST_REMOVE(vp, v_synclist); 2782 vp->v_flag &= ~VONWORKLST; 2783 } 2784 2785 return (0); 2786} 2787 2788/* 2789 * Print out a syncer vnode. 2790 */ 2791int 2792sync_print(ap) 2793 struct vop_print_args /* { 2794 struct vnode *a_vp; 2795 } */ *ap; 2796{ 2797 struct vnode *vp = ap->a_vp; 2798 2799 printf("syncer vnode"); 2800 if (vp->v_vnlock != NULL) 2801 lockmgr_printinfo(vp->v_vnlock); 2802 printf("\n"); 2803 return (0); 2804} 2805