vfs_export.c revision 3308
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 39 * $Id: vfs_subr.c,v 1.9 1994/09/25 19:33:52 phk Exp $ 40 */ 41 42/* 43 * External virtual filesystem routines 44 */ 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/proc.h> 49#include <sys/mount.h> 50#include <sys/time.h> 51#include <sys/vnode.h> 52#include <sys/stat.h> 53#include <sys/namei.h> 54#include <sys/ucred.h> 55#include <sys/buf.h> 56#include <sys/errno.h> 57#include <sys/malloc.h> 58#include <sys/domain.h> 59#include <sys/mbuf.h> 60 61#include <vm/vm.h> 62#include <sys/sysctl.h> 63 64#include <miscfs/specfs/specdev.h> 65 66void insmntque __P((struct vnode *, struct mount *)); 67 68enum vtype iftovt_tab[16] = { 69 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 70 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 71}; 72int vttoif_tab[9] = { 73 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 74 S_IFSOCK, S_IFIFO, S_IFMT, 75}; 76 77/* 78 * Insq/Remq for the vnode usage lists. 79 */ 80#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 81#define bufremvn(bp) { \ 82 LIST_REMOVE(bp, b_vnbufs); \ 83 (bp)->b_vnbufs.le_next = NOLIST; \ 84} 85 86TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 87struct mntlist mountlist; /* mounted filesystem list */ 88 89/* 90 * Initialize the vnode management data structures. 91 */ 92void 93vntblinit() 94{ 95 96 TAILQ_INIT(&vnode_free_list); 97 TAILQ_INIT(&mountlist); 98} 99 100/* 101 * Lock a filesystem. 102 * Used to prevent access to it while mounting and unmounting. 103 */ 104int 105vfs_lock(mp) 106 register struct mount *mp; 107{ 108 109 while(mp->mnt_flag & MNT_MLOCK) { 110 mp->mnt_flag |= MNT_MWAIT; 111 sleep((caddr_t)mp, PVFS); 112 } 113 mp->mnt_flag |= MNT_MLOCK; 114 return (0); 115} 116 117/* 118 * Unlock a locked filesystem. 119 * Panic if filesystem is not locked. 120 */ 121void 122vfs_unlock(mp) 123 register struct mount *mp; 124{ 125 126 if ((mp->mnt_flag & MNT_MLOCK) == 0) 127 panic("vfs_unlock: not locked"); 128 mp->mnt_flag &= ~MNT_MLOCK; 129 if (mp->mnt_flag & MNT_MWAIT) { 130 mp->mnt_flag &= ~MNT_MWAIT; 131 wakeup((caddr_t)mp); 132 } 133} 134 135/* 136 * Mark a mount point as busy. 137 * Used to synchronize access and to delay unmounting. 138 */ 139int 140vfs_busy(mp) 141 register struct mount *mp; 142{ 143 144 while(mp->mnt_flag & MNT_MPBUSY) { 145 mp->mnt_flag |= MNT_MPWANT; 146 sleep((caddr_t)&mp->mnt_flag, PVFS); 147 } 148 if (mp->mnt_flag & MNT_UNMOUNT) 149 return (1); 150 mp->mnt_flag |= MNT_MPBUSY; 151 return (0); 152} 153 154/* 155 * Free a busy filesystem. 156 * Panic if filesystem is not busy. 157 */ 158void 159vfs_unbusy(mp) 160 register struct mount *mp; 161{ 162 163 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 164 panic("vfs_unbusy: not busy"); 165 mp->mnt_flag &= ~MNT_MPBUSY; 166 if (mp->mnt_flag & MNT_MPWANT) { 167 mp->mnt_flag &= ~MNT_MPWANT; 168 wakeup((caddr_t)&mp->mnt_flag); 169 } 170} 171 172void 173vfs_unmountroot(rootfs) 174 struct mount *rootfs; 175{ 176 struct mount *mp = rootfs; 177 int error; 178 179 if (vfs_busy(mp)) { 180 printf("failed to unmount root\n"); 181 return; 182 } 183 184 mp->mnt_flag |= MNT_UNMOUNT; 185 if ((error = vfs_lock(mp))) { 186 printf("lock of root filesystem failed (%d)\n", error); 187 return; 188 } 189 190 vnode_pager_umount(mp); /* release cached vnodes */ 191 cache_purgevfs(mp); /* remove cache entries for this file sys */ 192 193 if ((error = VFS_SYNC(mp, MNT_WAIT, initproc->p_ucred, initproc))) 194 printf("sync of root filesystem failed (%d)\n", error); 195 196 if ((error = VFS_UNMOUNT(mp, MNT_FORCE, initproc))) { 197 printf("unmount of root filesystem failed ("); 198 if (error == EBUSY) 199 printf("BUSY)\n"); 200 else 201 printf("%d)\n", error); 202 } 203 204 mp->mnt_flag &= ~MNT_UNMOUNT; 205 vfs_unbusy(mp); 206} 207 208/* 209 * Unmount all filesystems. Should only be called by halt(). 210 */ 211void 212vfs_unmountall() 213{ 214 struct mount *mp, *mp_next, *rootfs = NULL; 215 int error; 216 217 /* unmount all but rootfs */ 218 for (mp = mountlist.tqh_first; mp != NULL; mp = mp_next) { 219 mp_next = mp->mnt_list.tqe_next; 220 221 if (mp->mnt_flag & MNT_ROOTFS) { 222 rootfs = mp; 223 continue; 224 } 225 226 error = dounmount(mp, MNT_FORCE, initproc); 227 if (error) { 228 printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); 229 if (error == EBUSY) 230 printf("BUSY)\n"); 231 else 232 printf("%d)\n", error); 233 } 234 } 235 236 /* and finally... */ 237 if (rootfs) { 238 vfs_unmountroot(rootfs); 239 } else { 240 printf("no root filesystem\n"); 241 } 242} 243 244/* 245 * Lookup a mount point by filesystem identifier. 246 */ 247struct mount * 248getvfs(fsid) 249 fsid_t *fsid; 250{ 251 register struct mount *mp; 252 253 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 254 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 255 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 256 return (mp); 257 } 258 return ((struct mount *)0); 259} 260 261/* 262 * Get a new unique fsid 263 */ 264void 265getnewfsid(mp, mtype) 266 struct mount *mp; 267 int mtype; 268{ 269static u_short xxxfs_mntid; 270 271 fsid_t tfsid; 272 273 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 274 mp->mnt_stat.f_fsid.val[1] = mtype; 275 if (xxxfs_mntid == 0) 276 ++xxxfs_mntid; 277 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 278 tfsid.val[1] = mtype; 279 if (mountlist.tqh_first != NULL) { 280 while (getvfs(&tfsid)) { 281 tfsid.val[0]++; 282 xxxfs_mntid++; 283 } 284 } 285 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 286} 287 288/* 289 * Set vnode attributes to VNOVAL 290 */ 291void 292vattr_null(vap) 293 register struct vattr *vap; 294{ 295 296 vap->va_type = VNON; 297 vap->va_size = VNOVAL; 298 vap->va_bytes = VNOVAL; 299 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 300 vap->va_fsid = vap->va_fileid = 301 vap->va_blocksize = vap->va_rdev = 302 vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 303 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 304 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 305 vap->va_flags = vap->va_gen = VNOVAL; 306 vap->va_vaflags = 0; 307} 308 309/* 310 * Routines having to do with the management of the vnode table. 311 */ 312extern int (**dead_vnodeop_p)(); 313extern void vclean(); 314long numvnodes; 315 316/* 317 * Return the next vnode from the free list. 318 */ 319int 320getnewvnode(tag, mp, vops, vpp) 321 enum vtagtype tag; 322 struct mount *mp; 323 int (**vops)(); 324 struct vnode **vpp; 325{ 326 register struct vnode *vp; 327 328 if ((vnode_free_list.tqh_first == NULL && 329 numvnodes < 2 * desiredvnodes) || 330 numvnodes < desiredvnodes) { 331 vp = (struct vnode *)malloc((u_long)sizeof *vp, 332 M_VNODE, M_WAITOK); 333 bzero((char *)vp, sizeof *vp); 334 numvnodes++; 335 } else { 336 if ((vp = vnode_free_list.tqh_first) == NULL) { 337 tablefull("vnode"); 338 *vpp = 0; 339 return (ENFILE); 340 } 341 if (vp->v_usecount) 342 panic("free vnode isn't"); 343 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 344 /* see comment on why 0xdeadb is set at end of vgone (below) */ 345 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 346 vp->v_lease = NULL; 347 if (vp->v_type != VBAD) 348 vgone(vp); 349#ifdef DIAGNOSTIC 350 { 351 int s; 352 if (vp->v_data) 353 panic("cleaned vnode isn't"); 354 s = splbio(); 355 if (vp->v_numoutput) 356 panic("Clean vnode has pending I/O's"); 357 splx(s); 358 } 359#endif 360 vp->v_flag = 0; 361 vp->v_lastr = 0; 362 vp->v_ralen = 0; 363 vp->v_maxra = 0; 364 vp->v_lastw = 0; 365 vp->v_lasta = 0; 366 vp->v_cstart = 0; 367 vp->v_clen = 0; 368 vp->v_socket = 0; 369 vp->v_writecount = 0; /* XXX */ 370 } 371 vp->v_type = VNON; 372 cache_purge(vp); 373 vp->v_tag = tag; 374 vp->v_op = vops; 375 insmntque(vp, mp); 376 *vpp = vp; 377 vp->v_usecount = 1; 378 vp->v_data = 0; 379 return (0); 380} 381 382/* 383 * Move a vnode from one mount queue to another. 384 */ 385void 386insmntque(vp, mp) 387 register struct vnode *vp; 388 register struct mount *mp; 389{ 390 391 /* 392 * Delete from old mount point vnode list, if on one. 393 */ 394 if (vp->v_mount != NULL) 395 LIST_REMOVE(vp, v_mntvnodes); 396 /* 397 * Insert into list of vnodes for the new mount point, if available. 398 */ 399 if ((vp->v_mount = mp) == NULL) 400 return; 401 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 402} 403 404/* 405 * Update outstanding I/O count and do wakeup if requested. 406 */ 407void 408vwakeup(bp) 409 register struct buf *bp; 410{ 411 register struct vnode *vp; 412 413 bp->b_flags &= ~B_WRITEINPROG; 414 if ((vp = bp->b_vp)) { 415 vp->v_numoutput--; 416 if (vp->v_numoutput < 0) 417 panic("vwakeup: neg numoutput"); 418 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 419 if (vp->v_numoutput < 0) 420 panic("vwakeup: neg numoutput"); 421 vp->v_flag &= ~VBWAIT; 422 wakeup((caddr_t)&vp->v_numoutput); 423 } 424 } 425} 426 427/* 428 * Flush out and invalidate all buffers associated with a vnode. 429 * Called with the underlying object locked. 430 */ 431int 432vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 433 register struct vnode *vp; 434 int flags; 435 struct ucred *cred; 436 struct proc *p; 437 int slpflag, slptimeo; 438{ 439 register struct buf *bp; 440 struct buf *nbp, *blist; 441 int s, error; 442 vm_pager_t pager; 443 vm_object_t object; 444 445 if (flags & V_SAVE) { 446 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p))) 447 return (error); 448 if (vp->v_dirtyblkhd.lh_first != NULL) 449 panic("vinvalbuf: dirty bufs"); 450 } 451 for (;;) { 452 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) 453 while (blist && blist->b_lblkno < 0) 454 blist = blist->b_vnbufs.le_next; 455 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 456 (flags & V_SAVEMETA)) 457 while (blist && blist->b_lblkno < 0) 458 blist = blist->b_vnbufs.le_next; 459 if (!blist) 460 break; 461 462 for (bp = blist; bp; bp = nbp) { 463 nbp = bp->b_vnbufs.le_next; 464 if ((flags & V_SAVEMETA) && bp->b_lblkno < 0) 465 continue; 466 s = splbio(); 467 if (bp->b_flags & B_BUSY) { 468 bp->b_flags |= B_WANTED; 469 error = tsleep((caddr_t)bp, 470 slpflag | (PRIBIO + 1), "vinvalbuf", 471 slptimeo); 472 splx(s); 473 if (error) 474 return (error); 475 break; 476 } 477 bremfree(bp); 478 bp->b_flags |= B_BUSY; 479 splx(s); 480 /* 481 * XXX Since there are no node locks for NFS, I believe 482 * there is a slight chance that a delayed write will 483 * occur while sleeping just above, so check for it. 484 */ 485 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 486 (void) VOP_BWRITE(bp); 487 break; 488 } 489 bp->b_flags |= B_INVAL; 490 brelse(bp); 491 } 492 } 493 494 pager = (vm_pager_t)vp->v_vmdata; 495 if (pager != NULL) { 496 object = vm_object_lookup(pager); 497 if (object) { 498 vm_object_lock(object); 499 if (flags & V_SAVE) 500 vm_object_page_clean(object, 0, 0, TRUE, FALSE); 501 vm_object_page_remove(object, 0, object->size); 502 vm_object_unlock(object); 503 vm_object_deallocate(object); 504 } 505 } 506 507 if (!(flags & V_SAVEMETA) && 508 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 509 panic("vinvalbuf: flush failed"); 510 return (0); 511} 512 513/* 514 * Associate a buffer with a vnode. 515 */ 516void 517bgetvp(vp, bp) 518 register struct vnode *vp; 519 register struct buf *bp; 520{ 521 522 if (bp->b_vp) 523 panic("bgetvp: not free"); 524 VHOLD(vp); 525 bp->b_vp = vp; 526 if (vp->v_type == VBLK || vp->v_type == VCHR) 527 bp->b_dev = vp->v_rdev; 528 else 529 bp->b_dev = NODEV; 530 /* 531 * Insert onto list for new vnode. 532 */ 533 bufinsvn(bp, &vp->v_cleanblkhd); 534} 535 536/* 537 * Disassociate a buffer from a vnode. 538 */ 539void 540brelvp(bp) 541 register struct buf *bp; 542{ 543 struct vnode *vp; 544 545 if (bp->b_vp == (struct vnode *) 0) 546 panic("brelvp: NULL"); 547 /* 548 * Delete from old vnode list, if on one. 549 */ 550 if (bp->b_vnbufs.le_next != NOLIST) 551 bufremvn(bp); 552 vp = bp->b_vp; 553 bp->b_vp = (struct vnode *) 0; 554 HOLDRELE(vp); 555} 556 557/* 558 * Reassign a buffer from one vnode to another. 559 * Used to assign file specific control information 560 * (indirect blocks) to the vnode to which they belong. 561 */ 562void 563reassignbuf(bp, newvp) 564 register struct buf *bp; 565 register struct vnode *newvp; 566{ 567 register struct buflists *listheadp; 568 569 if (newvp == NULL) { 570 printf("reassignbuf: NULL"); 571 return; 572 } 573 /* 574 * Delete from old vnode list, if on one. 575 */ 576 if (bp->b_vnbufs.le_next != NOLIST) 577 bufremvn(bp); 578 /* 579 * If dirty, put on list of dirty buffers; 580 * otherwise insert onto list of clean buffers. 581 */ 582 if (bp->b_flags & B_DELWRI) 583 listheadp = &newvp->v_dirtyblkhd; 584 else 585 listheadp = &newvp->v_cleanblkhd; 586 bufinsvn(bp, listheadp); 587} 588 589/* 590 * Create a vnode for a block device. 591 * Used for root filesystem, argdev, and swap areas. 592 * Also used for memory file system special devices. 593 */ 594int 595bdevvp(dev, vpp) 596 dev_t dev; 597 struct vnode **vpp; 598{ 599 register struct vnode *vp; 600 struct vnode *nvp; 601 int error; 602 603 if (dev == NODEV) 604 return (0); 605 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 606 if (error) { 607 *vpp = 0; 608 return (error); 609 } 610 vp = nvp; 611 vp->v_type = VBLK; 612 if ((nvp = checkalias(vp, dev, (struct mount *)0))) { 613 vput(vp); 614 vp = nvp; 615 } 616 *vpp = vp; 617 return (0); 618} 619 620/* 621 * Check to see if the new vnode represents a special device 622 * for which we already have a vnode (either because of 623 * bdevvp() or because of a different vnode representing 624 * the same block device). If such an alias exists, deallocate 625 * the existing contents and return the aliased vnode. The 626 * caller is responsible for filling it with its new contents. 627 */ 628struct vnode * 629checkalias(nvp, nvp_rdev, mp) 630 register struct vnode *nvp; 631 dev_t nvp_rdev; 632 struct mount *mp; 633{ 634 register struct vnode *vp; 635 struct vnode **vpp; 636 637 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 638 return (NULLVP); 639 640 vpp = &speclisth[SPECHASH(nvp_rdev)]; 641loop: 642 for (vp = *vpp; vp; vp = vp->v_specnext) { 643 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 644 continue; 645 /* 646 * Alias, but not in use, so flush it out. 647 */ 648 if (vp->v_usecount == 0) { 649 vgone(vp); 650 goto loop; 651 } 652 if (vget(vp, 1)) 653 goto loop; 654 break; 655 } 656 if (vp == NULL || vp->v_tag != VT_NON) { 657 MALLOC(nvp->v_specinfo, struct specinfo *, 658 sizeof(struct specinfo), M_VNODE, M_WAITOK); 659 nvp->v_rdev = nvp_rdev; 660 nvp->v_hashchain = vpp; 661 nvp->v_specnext = *vpp; 662 nvp->v_specflags = 0; 663 *vpp = nvp; 664 if (vp != NULL) { 665 nvp->v_flag |= VALIASED; 666 vp->v_flag |= VALIASED; 667 vput(vp); 668 } 669 return (NULLVP); 670 } 671 VOP_UNLOCK(vp); 672 vclean(vp, 0); 673 vp->v_op = nvp->v_op; 674 vp->v_tag = nvp->v_tag; 675 nvp->v_type = VNON; 676 insmntque(vp, mp); 677 return (vp); 678} 679 680/* 681 * Grab a particular vnode from the free list, increment its 682 * reference count and lock it. The vnode lock bit is set the 683 * vnode is being eliminated in vgone. The process is awakened 684 * when the transition is completed, and an error returned to 685 * indicate that the vnode is no longer usable (possibly having 686 * been changed to a new file system type). 687 */ 688int 689vget(vp, lockflag) 690 register struct vnode *vp; 691 int lockflag; 692{ 693 694 /* 695 * If the vnode is in the process of being cleaned out for 696 * another use, we wait for the cleaning to finish and then 697 * return failure. Cleaning is determined either by checking 698 * that the VXLOCK flag is set, or that the use count is 699 * zero with the back pointer set to show that it has been 700 * removed from the free list by getnewvnode. The VXLOCK 701 * flag may not have been set yet because vclean is blocked in 702 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. 703 */ 704 if ((vp->v_flag & VXLOCK) || 705 (vp->v_usecount == 0 && 706 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { 707 vp->v_flag |= VXWANT; 708 sleep((caddr_t)vp, PINOD); 709 return (1); 710 } 711 if (vp->v_usecount == 0) 712 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 713 vp->v_usecount++; 714 if (lockflag) 715 VOP_LOCK(vp); 716 return (0); 717} 718 719/* 720 * Vnode reference, just increment the count 721 */ 722void 723vref(vp) 724 struct vnode *vp; 725{ 726 727 if (vp->v_usecount <= 0) 728 panic("vref used where vget required"); 729 vp->v_usecount++; 730} 731 732/* 733 * vput(), just unlock and vrele() 734 */ 735void 736vput(vp) 737 register struct vnode *vp; 738{ 739 740 VOP_UNLOCK(vp); 741 vrele(vp); 742} 743 744/* 745 * Vnode release. 746 * If count drops to zero, call inactive routine and return to freelist. 747 */ 748void 749vrele(vp) 750 register struct vnode *vp; 751{ 752 753#ifdef DIAGNOSTIC 754 if (vp == NULL) 755 panic("vrele: null vp"); 756#endif 757 vp->v_usecount--; 758 if (vp->v_usecount > 0) 759 return; 760#ifdef DIAGNOSTIC 761 if (vp->v_usecount != 0 /* || vp->v_writecount != 0 */) { 762 vprint("vrele: bad ref count", vp); 763 panic("vrele: ref cnt"); 764 } 765#endif 766 /* 767 * insert at tail of LRU list 768 */ 769 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 770 VOP_INACTIVE(vp); 771} 772 773/* 774 * Page or buffer structure gets a reference. 775 */ 776void 777vhold(vp) 778 register struct vnode *vp; 779{ 780 781 vp->v_holdcnt++; 782} 783 784/* 785 * Page or buffer structure frees a reference. 786 */ 787void 788holdrele(vp) 789 register struct vnode *vp; 790{ 791 792 if (vp->v_holdcnt <= 0) 793 panic("holdrele: holdcnt"); 794 vp->v_holdcnt--; 795} 796 797/* 798 * Remove any vnodes in the vnode table belonging to mount point mp. 799 * 800 * If MNT_NOFORCE is specified, there should not be any active ones, 801 * return error if any are found (nb: this is a user error, not a 802 * system error). If MNT_FORCE is specified, detach any active vnodes 803 * that are found. 804 */ 805#ifdef DIAGNOSTIC 806int busyprt = 0; /* print out busy vnodes */ 807struct ctldebug debug1 = { "busyprt", &busyprt }; 808#endif 809 810int 811vflush(mp, skipvp, flags) 812 struct mount *mp; 813 struct vnode *skipvp; 814 int flags; 815{ 816 register struct vnode *vp, *nvp; 817 int busy = 0; 818 819 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 820 panic("vflush: not busy"); 821loop: 822 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 823 if (vp->v_mount != mp) 824 goto loop; 825 nvp = vp->v_mntvnodes.le_next; 826 /* 827 * Skip over a selected vnode. 828 */ 829 if (vp == skipvp) 830 continue; 831 /* 832 * Skip over a vnodes marked VSYSTEM. 833 */ 834 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 835 continue; 836 /* 837 * If WRITECLOSE is set, only flush out regular file 838 * vnodes open for writing. 839 */ 840 if ((flags & WRITECLOSE) && 841 (vp->v_writecount == 0 || vp->v_type != VREG)) 842 continue; 843 /* 844 * With v_usecount == 0, all we need to do is clear 845 * out the vnode data structures and we are done. 846 */ 847 if (vp->v_usecount == 0) { 848 vgone(vp); 849 continue; 850 } 851 /* 852 * If FORCECLOSE is set, forcibly close the vnode. 853 * For block or character devices, revert to an 854 * anonymous device. For all other files, just kill them. 855 */ 856 if (flags & FORCECLOSE) { 857 if (vp->v_type != VBLK && vp->v_type != VCHR) { 858 vgone(vp); 859 } else { 860 vclean(vp, 0); 861 vp->v_op = spec_vnodeop_p; 862 insmntque(vp, (struct mount *)0); 863 } 864 continue; 865 } 866#ifdef DIAGNOSTIC 867 if (busyprt) 868 vprint("vflush: busy vnode", vp); 869#endif 870 busy++; 871 } 872 if (busy) 873 return (EBUSY); 874 return (0); 875} 876 877/* 878 * Disassociate the underlying file system from a vnode. 879 */ 880void 881vclean(vp, flags) 882 register struct vnode *vp; 883 int flags; 884{ 885 int active; 886 887 /* 888 * Check to see if the vnode is in use. 889 * If so we have to reference it before we clean it out 890 * so that its count cannot fall to zero and generate a 891 * race against ourselves to recycle it. 892 */ 893 if ((active = vp->v_usecount)) 894 VREF(vp); 895 /* 896 * Even if the count is zero, the VOP_INACTIVE routine may still 897 * have the object locked while it cleans it out. The VOP_LOCK 898 * ensures that the VOP_INACTIVE routine is done with its work. 899 * For active vnodes, it ensures that no other activity can 900 * occur while the underlying object is being cleaned out. 901 */ 902 VOP_LOCK(vp); 903 /* 904 * Prevent the vnode from being recycled or 905 * brought into use while we clean it out. 906 */ 907 if (vp->v_flag & VXLOCK) 908 panic("vclean: deadlock"); 909 vp->v_flag |= VXLOCK; 910 /* 911 * Clean out any buffers associated with the vnode. 912 */ 913 if (flags & DOCLOSE) 914 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 915 /* 916 * Any other processes trying to obtain this lock must first 917 * wait for VXLOCK to clear, then call the new lock operation. 918 */ 919 VOP_UNLOCK(vp); 920 /* 921 * If purging an active vnode, it must be closed and 922 * deactivated before being reclaimed. 923 */ 924 if (active) { 925 if (flags & DOCLOSE) 926 VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL); 927 VOP_INACTIVE(vp); 928 } 929 /* 930 * Reclaim the vnode. 931 */ 932 if (VOP_RECLAIM(vp)) 933 panic("vclean: cannot reclaim"); 934 if (active) 935 vrele(vp); 936 937 /* 938 * Done with purge, notify sleepers of the grim news. 939 */ 940 vp->v_op = dead_vnodeop_p; 941 vp->v_tag = VT_NON; 942 vp->v_flag &= ~VXLOCK; 943 if (vp->v_flag & VXWANT) { 944 vp->v_flag &= ~VXWANT; 945 wakeup((caddr_t)vp); 946 } 947} 948 949/* 950 * Eliminate all activity associated with the requested vnode 951 * and with all vnodes aliased to the requested vnode. 952 */ 953void 954vgoneall(vp) 955 register struct vnode *vp; 956{ 957 register struct vnode *vq; 958 959 if (vp->v_flag & VALIASED) { 960 /* 961 * If a vgone (or vclean) is already in progress, 962 * wait until it is done and return. 963 */ 964 if (vp->v_flag & VXLOCK) { 965 vp->v_flag |= VXWANT; 966 sleep((caddr_t)vp, PINOD); 967 return; 968 } 969 /* 970 * Ensure that vp will not be vgone'd while we 971 * are eliminating its aliases. 972 */ 973 vp->v_flag |= VXLOCK; 974 while (vp->v_flag & VALIASED) { 975 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 976 if (vq->v_rdev != vp->v_rdev || 977 vq->v_type != vp->v_type || vp == vq) 978 continue; 979 vgone(vq); 980 break; 981 } 982 } 983 /* 984 * Remove the lock so that vgone below will 985 * really eliminate the vnode after which time 986 * vgone will awaken any sleepers. 987 */ 988 vp->v_flag &= ~VXLOCK; 989 } 990 vgone(vp); 991} 992 993/* 994 * Eliminate all activity associated with a vnode 995 * in preparation for reuse. 996 */ 997void 998vgone(vp) 999 register struct vnode *vp; 1000{ 1001 register struct vnode *vq; 1002 struct vnode *vx; 1003 1004 /* 1005 * If a vgone (or vclean) is already in progress, 1006 * wait until it is done and return. 1007 */ 1008 if (vp->v_flag & VXLOCK) { 1009 vp->v_flag |= VXWANT; 1010 sleep((caddr_t)vp, PINOD); 1011 return; 1012 } 1013 /* 1014 * Clean out the filesystem specific data. 1015 */ 1016 vclean(vp, DOCLOSE); 1017 /* 1018 * Delete from old mount point vnode list, if on one. 1019 */ 1020 if (vp->v_mount != NULL) { 1021 LIST_REMOVE(vp, v_mntvnodes); 1022 vp->v_mount = NULL; 1023 } 1024 /* 1025 * If special device, remove it from special device alias list. 1026 */ 1027 if (vp->v_type == VBLK || vp->v_type == VCHR) { 1028 if (*vp->v_hashchain == vp) { 1029 *vp->v_hashchain = vp->v_specnext; 1030 } else { 1031 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1032 if (vq->v_specnext != vp) 1033 continue; 1034 vq->v_specnext = vp->v_specnext; 1035 break; 1036 } 1037 if (vq == NULL) 1038 panic("missing bdev"); 1039 } 1040 if (vp->v_flag & VALIASED) { 1041 vx = NULL; 1042 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1043 if (vq->v_rdev != vp->v_rdev || 1044 vq->v_type != vp->v_type) 1045 continue; 1046 if (vx) 1047 break; 1048 vx = vq; 1049 } 1050 if (vx == NULL) 1051 panic("missing alias"); 1052 if (vq == NULL) 1053 vx->v_flag &= ~VALIASED; 1054 vp->v_flag &= ~VALIASED; 1055 } 1056 FREE(vp->v_specinfo, M_VNODE); 1057 vp->v_specinfo = NULL; 1058 } 1059 /* 1060 * If it is on the freelist and not already at the head, 1061 * move it to the head of the list. The test of the back 1062 * pointer and the reference count of zero is because 1063 * it will be removed from the free list by getnewvnode, 1064 * but will not have its reference count incremented until 1065 * after calling vgone. If the reference count were 1066 * incremented first, vgone would (incorrectly) try to 1067 * close the previous instance of the underlying object. 1068 * So, the back pointer is explicitly set to `0xdeadb' in 1069 * getnewvnode after removing it from the freelist to ensure 1070 * that we do not try to move it here. 1071 */ 1072 if (vp->v_usecount == 0 && 1073 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1074 vnode_free_list.tqh_first != vp) { 1075 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1076 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1077 } 1078 vp->v_type = VBAD; 1079} 1080 1081/* 1082 * Lookup a vnode by device number. 1083 */ 1084int 1085vfinddev(dev, type, vpp) 1086 dev_t dev; 1087 enum vtype type; 1088 struct vnode **vpp; 1089{ 1090 register struct vnode *vp; 1091 1092 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1093 if (dev != vp->v_rdev || type != vp->v_type) 1094 continue; 1095 *vpp = vp; 1096 return (1); 1097 } 1098 return (0); 1099} 1100 1101/* 1102 * Calculate the total number of references to a special device. 1103 */ 1104int 1105vcount(vp) 1106 register struct vnode *vp; 1107{ 1108 register struct vnode *vq, *vnext; 1109 int count; 1110 1111loop: 1112 if ((vp->v_flag & VALIASED) == 0) 1113 return (vp->v_usecount); 1114 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1115 vnext = vq->v_specnext; 1116 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1117 continue; 1118 /* 1119 * Alias, but not in use, so flush it out. 1120 */ 1121 if (vq->v_usecount == 0 && vq != vp) { 1122 vgone(vq); 1123 goto loop; 1124 } 1125 count += vq->v_usecount; 1126 } 1127 return (count); 1128} 1129 1130/* 1131 * Print out a description of a vnode. 1132 */ 1133static char *typename[] = 1134 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1135 1136void 1137vprint(label, vp) 1138 char *label; 1139 register struct vnode *vp; 1140{ 1141 char buf[64]; 1142 1143 if (label != NULL) 1144 printf("%s: ", label); 1145 printf("type %s, usecount %d, writecount %d, refcount %ld,", 1146 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1147 vp->v_holdcnt); 1148 buf[0] = '\0'; 1149 if (vp->v_flag & VROOT) 1150 strcat(buf, "|VROOT"); 1151 if (vp->v_flag & VTEXT) 1152 strcat(buf, "|VTEXT"); 1153 if (vp->v_flag & VSYSTEM) 1154 strcat(buf, "|VSYSTEM"); 1155 if (vp->v_flag & VXLOCK) 1156 strcat(buf, "|VXLOCK"); 1157 if (vp->v_flag & VXWANT) 1158 strcat(buf, "|VXWANT"); 1159 if (vp->v_flag & VBWAIT) 1160 strcat(buf, "|VBWAIT"); 1161 if (vp->v_flag & VALIASED) 1162 strcat(buf, "|VALIASED"); 1163 if (buf[0] != '\0') 1164 printf(" flags (%s)", &buf[1]); 1165 if (vp->v_data == NULL) { 1166 printf("\n"); 1167 } else { 1168 printf("\n\t"); 1169 VOP_PRINT(vp); 1170 } 1171} 1172 1173#ifdef DEBUG 1174/* 1175 * List all of the locked vnodes in the system. 1176 * Called when debugging the kernel. 1177 */ 1178void 1179printlockedvnodes() 1180{ 1181 register struct mount *mp; 1182 register struct vnode *vp; 1183 1184 printf("Locked vnodes\n"); 1185 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 1186 for (vp = mp->mnt_vnodelist.lh_first; 1187 vp != NULL; 1188 vp = vp->v_mntvnodes.le_next) 1189 if (VOP_ISLOCKED(vp)) 1190 vprint((char *)0, vp); 1191 } 1192} 1193#endif 1194 1195int kinfo_vdebug = 1; 1196int kinfo_vgetfailed; 1197#define KINFO_VNODESLOP 10 1198/* 1199 * Dump vnode list (via sysctl). 1200 * Copyout address of vnode followed by vnode. 1201 */ 1202/* ARGSUSED */ 1203int 1204sysctl_vnode(where, sizep) 1205 char *where; 1206 size_t *sizep; 1207{ 1208 register struct mount *mp, *nmp; 1209 struct vnode *vp; 1210 register char *bp = where, *savebp; 1211 char *ewhere; 1212 int error; 1213 1214#define VPTRSZ sizeof (struct vnode *) 1215#define VNODESZ sizeof (struct vnode) 1216 if (where == NULL) { 1217 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1218 return (0); 1219 } 1220 ewhere = where + *sizep; 1221 1222 for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) { 1223 nmp = mp->mnt_list.tqe_next; 1224 if (vfs_busy(mp)) 1225 continue; 1226 savebp = bp; 1227again: 1228 for (vp = mp->mnt_vnodelist.lh_first; 1229 vp != NULL; 1230 vp = vp->v_mntvnodes.le_next) { 1231 /* 1232 * Check that the vp is still associated with 1233 * this filesystem. RACE: could have been 1234 * recycled onto the same filesystem. 1235 */ 1236 if (vp->v_mount != mp) { 1237 if (kinfo_vdebug) 1238 printf("kinfo: vp changed\n"); 1239 bp = savebp; 1240 goto again; 1241 } 1242 if (bp + VPTRSZ + VNODESZ > ewhere) { 1243 *sizep = bp - where; 1244 return (ENOMEM); 1245 } 1246 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1247 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1248 return (error); 1249 bp += VPTRSZ + VNODESZ; 1250 } 1251 vfs_unbusy(mp); 1252 } 1253 1254 *sizep = bp - where; 1255 return (0); 1256} 1257 1258/* 1259 * Check to see if a filesystem is mounted on a block device. 1260 */ 1261int 1262vfs_mountedon(vp) 1263 register struct vnode *vp; 1264{ 1265 register struct vnode *vq; 1266 1267 if (vp->v_specflags & SI_MOUNTEDON) 1268 return (EBUSY); 1269 if (vp->v_flag & VALIASED) { 1270 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1271 if (vq->v_rdev != vp->v_rdev || 1272 vq->v_type != vp->v_type) 1273 continue; 1274 if (vq->v_specflags & SI_MOUNTEDON) 1275 return (EBUSY); 1276 } 1277 } 1278 return (0); 1279} 1280 1281/* 1282 * Build hash lists of net addresses and hang them off the mount point. 1283 * Called by ufs_mount() to set up the lists of export addresses. 1284 */ 1285static int 1286vfs_hang_addrlist(mp, nep, argp) 1287 struct mount *mp; 1288 struct netexport *nep; 1289 struct export_args *argp; 1290{ 1291 register struct netcred *np; 1292 register struct radix_node_head *rnh; 1293 register int i; 1294 struct radix_node *rn; 1295 struct sockaddr *saddr, *smask = 0; 1296 struct domain *dom; 1297 int error; 1298 1299 if (argp->ex_addrlen == 0) { 1300 if (mp->mnt_flag & MNT_DEFEXPORTED) 1301 return (EPERM); 1302 np = &nep->ne_defexported; 1303 np->netc_exflags = argp->ex_flags; 1304 np->netc_anon = argp->ex_anon; 1305 np->netc_anon.cr_ref = 1; 1306 mp->mnt_flag |= MNT_DEFEXPORTED; 1307 return (0); 1308 } 1309 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1310 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1311 bzero((caddr_t)np, i); 1312 saddr = (struct sockaddr *)(np + 1); 1313 if ((error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))) 1314 goto out; 1315 if (saddr->sa_len > argp->ex_addrlen) 1316 saddr->sa_len = argp->ex_addrlen; 1317 if (argp->ex_masklen) { 1318 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1319 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); 1320 if (error) 1321 goto out; 1322 if (smask->sa_len > argp->ex_masklen) 1323 smask->sa_len = argp->ex_masklen; 1324 } 1325 i = saddr->sa_family; 1326 if ((rnh = nep->ne_rtable[i]) == 0) { 1327 /* 1328 * Seems silly to initialize every AF when most are not 1329 * used, do so on demand here 1330 */ 1331 for (dom = domains; dom; dom = dom->dom_next) 1332 if (dom->dom_family == i && dom->dom_rtattach) { 1333 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1334 dom->dom_rtoffset); 1335 break; 1336 } 1337 if ((rnh = nep->ne_rtable[i]) == 0) { 1338 error = ENOBUFS; 1339 goto out; 1340 } 1341 } 1342 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1343 np->netc_rnodes); 1344 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1345 error = EPERM; 1346 goto out; 1347 } 1348 np->netc_exflags = argp->ex_flags; 1349 np->netc_anon = argp->ex_anon; 1350 np->netc_anon.cr_ref = 1; 1351 return (0); 1352out: 1353 free(np, M_NETADDR); 1354 return (error); 1355} 1356 1357/* ARGSUSED */ 1358static int 1359vfs_free_netcred(rn, w) 1360 struct radix_node *rn; 1361 caddr_t w; 1362{ 1363 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1364 1365 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1366 free((caddr_t)rn, M_NETADDR); 1367 return (0); 1368} 1369 1370/* 1371 * Free the net address hash lists that are hanging off the mount points. 1372 */ 1373static void 1374vfs_free_addrlist(nep) 1375 struct netexport *nep; 1376{ 1377 register int i; 1378 register struct radix_node_head *rnh; 1379 1380 for (i = 0; i <= AF_MAX; i++) 1381 if ((rnh = nep->ne_rtable[i])) { 1382 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, 1383 (caddr_t)rnh); 1384 free((caddr_t)rnh, M_RTABLE); 1385 nep->ne_rtable[i] = 0; 1386 } 1387} 1388 1389int 1390vfs_export(mp, nep, argp) 1391 struct mount *mp; 1392 struct netexport *nep; 1393 struct export_args *argp; 1394{ 1395 int error; 1396 1397 if (argp->ex_flags & MNT_DELEXPORT) { 1398 vfs_free_addrlist(nep); 1399 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1400 } 1401 if (argp->ex_flags & MNT_EXPORTED) { 1402 if ((error = vfs_hang_addrlist(mp, nep, argp))) 1403 return (error); 1404 mp->mnt_flag |= MNT_EXPORTED; 1405 } 1406 return (0); 1407} 1408 1409struct netcred * 1410vfs_export_lookup(mp, nep, nam) 1411 register struct mount *mp; 1412 struct netexport *nep; 1413 struct mbuf *nam; 1414{ 1415 register struct netcred *np; 1416 register struct radix_node_head *rnh; 1417 struct sockaddr *saddr; 1418 1419 np = NULL; 1420 if (mp->mnt_flag & MNT_EXPORTED) { 1421 /* 1422 * Lookup in the export list first. 1423 */ 1424 if (nam != NULL) { 1425 saddr = mtod(nam, struct sockaddr *); 1426 rnh = nep->ne_rtable[saddr->sa_family]; 1427 if (rnh != NULL) { 1428 np = (struct netcred *) 1429 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1430 rnh); 1431 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1432 np = NULL; 1433 } 1434 } 1435 /* 1436 * If no address match, use the default if it exists. 1437 */ 1438 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1439 np = &nep->ne_defexported; 1440 } 1441 return (np); 1442} 1443