vfs_export.c revision 5201
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 39 * $Id: vfs_subr.c,v 1.12 1994/10/06 21:06:37 davidg Exp $ 40 */ 41 42/* 43 * External virtual filesystem routines 44 */ 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/proc.h> 49#include <sys/mount.h> 50#include <sys/time.h> 51#include <sys/vnode.h> 52#include <sys/stat.h> 53#include <sys/namei.h> 54#include <sys/ucred.h> 55#include <sys/buf.h> 56#include <sys/errno.h> 57#include <sys/malloc.h> 58#include <sys/domain.h> 59#include <sys/mbuf.h> 60 61#include <vm/vm.h> 62#include <sys/sysctl.h> 63 64#include <miscfs/specfs/specdev.h> 65 66void insmntque __P((struct vnode *, struct mount *)); 67 68enum vtype iftovt_tab[16] = { 69 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 70 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 71}; 72int vttoif_tab[9] = { 73 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 74 S_IFSOCK, S_IFIFO, S_IFMT, 75}; 76 77/* 78 * Insq/Remq for the vnode usage lists. 79 */ 80#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 81#define bufremvn(bp) { \ 82 LIST_REMOVE(bp, b_vnbufs); \ 83 (bp)->b_vnbufs.le_next = NOLIST; \ 84} 85 86TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 87struct mntlist mountlist; /* mounted filesystem list */ 88 89/* 90 * Initialize the vnode management data structures. 91 */ 92void 93vntblinit() 94{ 95 96 TAILQ_INIT(&vnode_free_list); 97 TAILQ_INIT(&mountlist); 98} 99 100/* 101 * Lock a filesystem. 102 * Used to prevent access to it while mounting and unmounting. 103 */ 104int 105vfs_lock(mp) 106 register struct mount *mp; 107{ 108 109 while(mp->mnt_flag & MNT_MLOCK) { 110 mp->mnt_flag |= MNT_MWAIT; 111 (void) tsleep((caddr_t)mp, PVFS, "vfslck", 0); 112 } 113 mp->mnt_flag |= MNT_MLOCK; 114 return (0); 115} 116 117/* 118 * Unlock a locked filesystem. 119 * Panic if filesystem is not locked. 120 */ 121void 122vfs_unlock(mp) 123 register struct mount *mp; 124{ 125 126 if ((mp->mnt_flag & MNT_MLOCK) == 0) 127 panic("vfs_unlock: not locked"); 128 mp->mnt_flag &= ~MNT_MLOCK; 129 if (mp->mnt_flag & MNT_MWAIT) { 130 mp->mnt_flag &= ~MNT_MWAIT; 131 wakeup((caddr_t)mp); 132 } 133} 134 135/* 136 * Mark a mount point as busy. 137 * Used to synchronize access and to delay unmounting. 138 */ 139int 140vfs_busy(mp) 141 register struct mount *mp; 142{ 143 144 while(mp->mnt_flag & MNT_MPBUSY) { 145 mp->mnt_flag |= MNT_MPWANT; 146 (void) tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbsy", 0); 147 } 148 if (mp->mnt_flag & MNT_UNMOUNT) 149 return (1); 150 mp->mnt_flag |= MNT_MPBUSY; 151 return (0); 152} 153 154/* 155 * Free a busy filesystem. 156 * Panic if filesystem is not busy. 157 */ 158void 159vfs_unbusy(mp) 160 register struct mount *mp; 161{ 162 163 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 164 panic("vfs_unbusy: not busy"); 165 mp->mnt_flag &= ~MNT_MPBUSY; 166 if (mp->mnt_flag & MNT_MPWANT) { 167 mp->mnt_flag &= ~MNT_MPWANT; 168 wakeup((caddr_t)&mp->mnt_flag); 169 } 170} 171 172void 173vfs_unmountroot(rootfs) 174 struct mount *rootfs; 175{ 176 struct mount *mp = rootfs; 177 int error; 178 179 if (vfs_busy(mp)) { 180 printf("failed to unmount root\n"); 181 return; 182 } 183 184 mp->mnt_flag |= MNT_UNMOUNT; 185 if ((error = vfs_lock(mp))) { 186 printf("lock of root filesystem failed (%d)\n", error); 187 return; 188 } 189 190 vnode_pager_umount(mp); /* release cached vnodes */ 191 cache_purgevfs(mp); /* remove cache entries for this file sys */ 192 193 if ((error = VFS_SYNC(mp, MNT_WAIT, initproc->p_ucred, initproc))) 194 printf("sync of root filesystem failed (%d)\n", error); 195 196 if ((error = VFS_UNMOUNT(mp, MNT_FORCE, initproc))) { 197 printf("unmount of root filesystem failed ("); 198 if (error == EBUSY) 199 printf("BUSY)\n"); 200 else 201 printf("%d)\n", error); 202 } 203 204 mp->mnt_flag &= ~MNT_UNMOUNT; 205 vfs_unbusy(mp); 206} 207 208/* 209 * Unmount all filesystems. Should only be called by halt(). 210 */ 211void 212vfs_unmountall() 213{ 214 struct mount *mp, *mp_next, *rootfs = NULL; 215 int error; 216 217 /* unmount all but rootfs */ 218 for (mp = mountlist.tqh_first; mp != NULL; mp = mp_next) { 219 mp_next = mp->mnt_list.tqe_next; 220 221 if (mp->mnt_flag & MNT_ROOTFS) { 222 rootfs = mp; 223 continue; 224 } 225 226 error = dounmount(mp, MNT_FORCE, initproc); 227 if (error) { 228 printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); 229 if (error == EBUSY) 230 printf("BUSY)\n"); 231 else 232 printf("%d)\n", error); 233 } 234 } 235 236 /* and finally... */ 237 if (rootfs) { 238 vfs_unmountroot(rootfs); 239 } else { 240 printf("no root filesystem\n"); 241 } 242} 243 244/* 245 * Lookup a mount point by filesystem identifier. 246 */ 247struct mount * 248getvfs(fsid) 249 fsid_t *fsid; 250{ 251 register struct mount *mp; 252 253 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 254 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 255 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 256 return (mp); 257 } 258 return ((struct mount *)0); 259} 260 261/* 262 * Get a new unique fsid 263 */ 264void 265getnewfsid(mp, mtype) 266 struct mount *mp; 267 int mtype; 268{ 269static u_short xxxfs_mntid; 270 271 fsid_t tfsid; 272 273 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 274 mp->mnt_stat.f_fsid.val[1] = mtype; 275 if (xxxfs_mntid == 0) 276 ++xxxfs_mntid; 277 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 278 tfsid.val[1] = mtype; 279 if (mountlist.tqh_first != NULL) { 280 while (getvfs(&tfsid)) { 281 tfsid.val[0]++; 282 xxxfs_mntid++; 283 } 284 } 285 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 286} 287 288/* 289 * Set vnode attributes to VNOVAL 290 */ 291void 292vattr_null(vap) 293 register struct vattr *vap; 294{ 295 296 vap->va_type = VNON; 297 vap->va_size = VNOVAL; 298 vap->va_bytes = VNOVAL; 299 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 300 vap->va_fsid = vap->va_fileid = 301 vap->va_blocksize = vap->va_rdev = 302 vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 303 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 304 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 305 vap->va_flags = vap->va_gen = VNOVAL; 306 vap->va_vaflags = 0; 307} 308 309/* 310 * Routines having to do with the management of the vnode table. 311 */ 312extern int (**dead_vnodeop_p)(); 313extern void vclean(); 314long numvnodes; 315 316/* 317 * Return the next vnode from the free list. 318 */ 319int 320getnewvnode(tag, mp, vops, vpp) 321 enum vtagtype tag; 322 struct mount *mp; 323 int (**vops)(); 324 struct vnode **vpp; 325{ 326 register struct vnode *vp; 327 328 if ((vnode_free_list.tqh_first == NULL && 329 numvnodes < 2 * desiredvnodes) || 330 numvnodes < desiredvnodes) { 331 vp = (struct vnode *)malloc((u_long)sizeof *vp, 332 M_VNODE, M_WAITOK); 333 bzero((char *)vp, sizeof *vp); 334 numvnodes++; 335 } else { 336 if ((vp = vnode_free_list.tqh_first) == NULL) { 337 tablefull("vnode"); 338 *vpp = 0; 339 return (ENFILE); 340 } 341 if (vp->v_usecount) 342 panic("free vnode isn't"); 343 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 344 /* see comment on why 0xdeadb is set at end of vgone (below) */ 345 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 346 vp->v_lease = NULL; 347 if (vp->v_type != VBAD) 348 vgone(vp); 349#ifdef DIAGNOSTIC 350 { 351 int s; 352 if (vp->v_data) 353 panic("cleaned vnode isn't"); 354 s = splbio(); 355 if (vp->v_numoutput) 356 panic("Clean vnode has pending I/O's"); 357 splx(s); 358 } 359#endif 360 vp->v_flag = 0; 361 vp->v_lastr = 0; 362 vp->v_ralen = 0; 363 vp->v_maxra = 0; 364 vp->v_lastw = 0; 365 vp->v_lasta = 0; 366 vp->v_cstart = 0; 367 vp->v_clen = 0; 368 vp->v_socket = 0; 369 vp->v_writecount = 0; /* XXX */ 370 } 371 vp->v_type = VNON; 372 cache_purge(vp); 373 vp->v_tag = tag; 374 vp->v_op = vops; 375 insmntque(vp, mp); 376 *vpp = vp; 377 vp->v_usecount = 1; 378 vp->v_data = 0; 379 return (0); 380} 381 382/* 383 * Move a vnode from one mount queue to another. 384 */ 385void 386insmntque(vp, mp) 387 register struct vnode *vp; 388 register struct mount *mp; 389{ 390 391 /* 392 * Delete from old mount point vnode list, if on one. 393 */ 394 if (vp->v_mount != NULL) 395 LIST_REMOVE(vp, v_mntvnodes); 396 /* 397 * Insert into list of vnodes for the new mount point, if available. 398 */ 399 if ((vp->v_mount = mp) == NULL) 400 return; 401 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 402} 403 404/* 405 * Update outstanding I/O count and do wakeup if requested. 406 */ 407void 408vwakeup(bp) 409 register struct buf *bp; 410{ 411 register struct vnode *vp; 412 413 bp->b_flags &= ~B_WRITEINPROG; 414 if ((vp = bp->b_vp)) { 415 vp->v_numoutput--; 416 if (vp->v_numoutput < 0) 417 panic("vwakeup: neg numoutput"); 418 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 419 if (vp->v_numoutput < 0) 420 panic("vwakeup: neg numoutput"); 421 vp->v_flag &= ~VBWAIT; 422 wakeup((caddr_t)&vp->v_numoutput); 423 } 424 } 425} 426 427/* 428 * Flush out and invalidate all buffers associated with a vnode. 429 * Called with the underlying object locked. 430 */ 431int 432vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 433 register struct vnode *vp; 434 int flags; 435 struct ucred *cred; 436 struct proc *p; 437 int slpflag, slptimeo; 438{ 439 register struct buf *bp; 440 struct buf *nbp, *blist; 441 int s, error; 442 vm_pager_t pager; 443 vm_object_t object; 444 445 if (flags & V_SAVE) { 446 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p))) 447 return (error); 448 if (vp->v_dirtyblkhd.lh_first != NULL) 449 panic("vinvalbuf: dirty bufs"); 450 } 451 for (;;) { 452 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) 453 while (blist && blist->b_lblkno < 0) 454 blist = blist->b_vnbufs.le_next; 455 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 456 (flags & V_SAVEMETA)) 457 while (blist && blist->b_lblkno < 0) 458 blist = blist->b_vnbufs.le_next; 459 if (!blist) 460 break; 461 462 for (bp = blist; bp; bp = nbp) { 463 nbp = bp->b_vnbufs.le_next; 464 if ((flags & V_SAVEMETA) && bp->b_lblkno < 0) 465 continue; 466 s = splbio(); 467 if (bp->b_flags & B_BUSY) { 468 bp->b_flags |= B_WANTED; 469 error = tsleep((caddr_t)bp, 470 slpflag | (PRIBIO + 1), "vinvalbuf", 471 slptimeo); 472 splx(s); 473 if (error) 474 return (error); 475 break; 476 } 477 bremfree(bp); 478 bp->b_flags |= B_BUSY; 479 splx(s); 480 /* 481 * XXX Since there are no node locks for NFS, I believe 482 * there is a slight chance that a delayed write will 483 * occur while sleeping just above, so check for it. 484 */ 485 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 486 (void) VOP_BWRITE(bp); 487 break; 488 } 489 bp->b_flags |= B_INVAL; 490 brelse(bp); 491 } 492 } 493 494 pager = NULL; 495 object = (vm_object_t)vp->v_vmdata; 496 if( object != NULL) 497 pager = object->pager; 498 if (pager != NULL) { 499 object = vm_object_lookup(pager); 500 if (object) { 501 vm_object_lock(object); 502 if (flags & V_SAVE) 503 vm_object_page_clean(object, 0, 0, TRUE, FALSE); 504 vm_object_page_remove(object, 0, object->size); 505 vm_object_unlock(object); 506 vm_object_deallocate(object); 507 } 508 } 509 510 if (!(flags & V_SAVEMETA) && 511 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 512 panic("vinvalbuf: flush failed"); 513 return (0); 514} 515 516/* 517 * Associate a buffer with a vnode. 518 */ 519void 520bgetvp(vp, bp) 521 register struct vnode *vp; 522 register struct buf *bp; 523{ 524 int s; 525 526 if (bp->b_vp) 527 panic("bgetvp: not free"); 528 VHOLD(vp); 529 bp->b_vp = vp; 530 if (vp->v_type == VBLK || vp->v_type == VCHR) 531 bp->b_dev = vp->v_rdev; 532 else 533 bp->b_dev = NODEV; 534 /* 535 * Insert onto list for new vnode. 536 */ 537 s = splbio(); 538 bufinsvn(bp, &vp->v_cleanblkhd); 539 splx(s); 540} 541 542/* 543 * Disassociate a buffer from a vnode. 544 */ 545void 546brelvp(bp) 547 register struct buf *bp; 548{ 549 struct vnode *vp; 550 int s; 551 552 if (bp->b_vp == (struct vnode *) 0) 553 panic("brelvp: NULL"); 554 /* 555 * Delete from old vnode list, if on one. 556 */ 557 s = splbio(); 558 if (bp->b_vnbufs.le_next != NOLIST) 559 bufremvn(bp); 560 splx(s); 561 562 vp = bp->b_vp; 563 bp->b_vp = (struct vnode *) 0; 564 HOLDRELE(vp); 565} 566 567/* 568 * Reassign a buffer from one vnode to another. 569 * Used to assign file specific control information 570 * (indirect blocks) to the vnode to which they belong. 571 */ 572void 573reassignbuf(bp, newvp) 574 register struct buf *bp; 575 register struct vnode *newvp; 576{ 577 register struct buflists *listheadp; 578 579 if (newvp == NULL) { 580 printf("reassignbuf: NULL"); 581 return; 582 } 583 /* 584 * Delete from old vnode list, if on one. 585 */ 586 if (bp->b_vnbufs.le_next != NOLIST) 587 bufremvn(bp); 588 /* 589 * If dirty, put on list of dirty buffers; 590 * otherwise insert onto list of clean buffers. 591 */ 592 if (bp->b_flags & B_DELWRI) 593 listheadp = &newvp->v_dirtyblkhd; 594 else 595 listheadp = &newvp->v_cleanblkhd; 596 bufinsvn(bp, listheadp); 597} 598 599/* 600 * Create a vnode for a block device. 601 * Used for root filesystem, argdev, and swap areas. 602 * Also used for memory file system special devices. 603 */ 604int 605bdevvp(dev, vpp) 606 dev_t dev; 607 struct vnode **vpp; 608{ 609 register struct vnode *vp; 610 struct vnode *nvp; 611 int error; 612 613 if (dev == NODEV) 614 return (0); 615 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 616 if (error) { 617 *vpp = 0; 618 return (error); 619 } 620 vp = nvp; 621 vp->v_type = VBLK; 622 if ((nvp = checkalias(vp, dev, (struct mount *)0))) { 623 vput(vp); 624 vp = nvp; 625 } 626 *vpp = vp; 627 return (0); 628} 629 630/* 631 * Check to see if the new vnode represents a special device 632 * for which we already have a vnode (either because of 633 * bdevvp() or because of a different vnode representing 634 * the same block device). If such an alias exists, deallocate 635 * the existing contents and return the aliased vnode. The 636 * caller is responsible for filling it with its new contents. 637 */ 638struct vnode * 639checkalias(nvp, nvp_rdev, mp) 640 register struct vnode *nvp; 641 dev_t nvp_rdev; 642 struct mount *mp; 643{ 644 register struct vnode *vp; 645 struct vnode **vpp; 646 647 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 648 return (NULLVP); 649 650 vpp = &speclisth[SPECHASH(nvp_rdev)]; 651loop: 652 for (vp = *vpp; vp; vp = vp->v_specnext) { 653 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 654 continue; 655 /* 656 * Alias, but not in use, so flush it out. 657 */ 658 if (vp->v_usecount == 0) { 659 vgone(vp); 660 goto loop; 661 } 662 if (vget(vp, 1)) 663 goto loop; 664 break; 665 } 666 if (vp == NULL || vp->v_tag != VT_NON) { 667 MALLOC(nvp->v_specinfo, struct specinfo *, 668 sizeof(struct specinfo), M_VNODE, M_WAITOK); 669 nvp->v_rdev = nvp_rdev; 670 nvp->v_hashchain = vpp; 671 nvp->v_specnext = *vpp; 672 nvp->v_specflags = 0; 673 *vpp = nvp; 674 if (vp != NULL) { 675 nvp->v_flag |= VALIASED; 676 vp->v_flag |= VALIASED; 677 vput(vp); 678 } 679 return (NULLVP); 680 } 681 VOP_UNLOCK(vp); 682 vclean(vp, 0); 683 vp->v_op = nvp->v_op; 684 vp->v_tag = nvp->v_tag; 685 nvp->v_type = VNON; 686 insmntque(vp, mp); 687 return (vp); 688} 689 690/* 691 * Grab a particular vnode from the free list, increment its 692 * reference count and lock it. The vnode lock bit is set the 693 * vnode is being eliminated in vgone. The process is awakened 694 * when the transition is completed, and an error returned to 695 * indicate that the vnode is no longer usable (possibly having 696 * been changed to a new file system type). 697 */ 698int 699vget(vp, lockflag) 700 register struct vnode *vp; 701 int lockflag; 702{ 703 704 /* 705 * If the vnode is in the process of being cleaned out for 706 * another use, we wait for the cleaning to finish and then 707 * return failure. Cleaning is determined either by checking 708 * that the VXLOCK flag is set, or that the use count is 709 * zero with the back pointer set to show that it has been 710 * removed from the free list by getnewvnode. The VXLOCK 711 * flag may not have been set yet because vclean is blocked in 712 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. 713 */ 714 if ((vp->v_flag & VXLOCK) || 715 (vp->v_usecount == 0 && 716 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { 717 vp->v_flag |= VXWANT; 718 (void) tsleep((caddr_t)vp, PINOD, "vget", 0); 719 return (1); 720 } 721 if (vp->v_usecount == 0) 722 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 723 vp->v_usecount++; 724 if (lockflag) 725 VOP_LOCK(vp); 726 return (0); 727} 728 729/* 730 * Vnode reference, just increment the count 731 */ 732void 733vref(vp) 734 struct vnode *vp; 735{ 736 737 if (vp->v_usecount <= 0) 738 panic("vref used where vget required"); 739 vp->v_usecount++; 740} 741 742/* 743 * vput(), just unlock and vrele() 744 */ 745void 746vput(vp) 747 register struct vnode *vp; 748{ 749 750 VOP_UNLOCK(vp); 751 vrele(vp); 752} 753 754/* 755 * Vnode release. 756 * If count drops to zero, call inactive routine and return to freelist. 757 */ 758void 759vrele(vp) 760 register struct vnode *vp; 761{ 762 763#ifdef DIAGNOSTIC 764 if (vp == NULL) 765 panic("vrele: null vp"); 766#endif 767 vp->v_usecount--; 768 if (vp->v_usecount > 0) 769 return; 770#ifdef DIAGNOSTIC 771 if (vp->v_usecount != 0 /* || vp->v_writecount != 0 */) { 772 vprint("vrele: bad ref count", vp); 773 panic("vrele: ref cnt"); 774 } 775#endif 776 /* 777 * insert at tail of LRU list 778 */ 779 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 780 VOP_INACTIVE(vp); 781} 782 783/* 784 * Page or buffer structure gets a reference. 785 */ 786void 787vhold(vp) 788 register struct vnode *vp; 789{ 790 791 vp->v_holdcnt++; 792} 793 794/* 795 * Page or buffer structure frees a reference. 796 */ 797void 798holdrele(vp) 799 register struct vnode *vp; 800{ 801 802 if (vp->v_holdcnt <= 0) 803 panic("holdrele: holdcnt"); 804 vp->v_holdcnt--; 805} 806 807/* 808 * Remove any vnodes in the vnode table belonging to mount point mp. 809 * 810 * If MNT_NOFORCE is specified, there should not be any active ones, 811 * return error if any are found (nb: this is a user error, not a 812 * system error). If MNT_FORCE is specified, detach any active vnodes 813 * that are found. 814 */ 815#ifdef DIAGNOSTIC 816int busyprt = 0; /* print out busy vnodes */ 817struct ctldebug debug1 = { "busyprt", &busyprt }; 818#endif 819 820int 821vflush(mp, skipvp, flags) 822 struct mount *mp; 823 struct vnode *skipvp; 824 int flags; 825{ 826 register struct vnode *vp, *nvp; 827 int busy = 0; 828 829 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 830 panic("vflush: not busy"); 831loop: 832 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 833 if (vp->v_mount != mp) 834 goto loop; 835 nvp = vp->v_mntvnodes.le_next; 836 /* 837 * Skip over a selected vnode. 838 */ 839 if (vp == skipvp) 840 continue; 841 /* 842 * Skip over a vnodes marked VSYSTEM. 843 */ 844 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 845 continue; 846 /* 847 * If WRITECLOSE is set, only flush out regular file 848 * vnodes open for writing. 849 */ 850 if ((flags & WRITECLOSE) && 851 (vp->v_writecount == 0 || vp->v_type != VREG)) 852 continue; 853 /* 854 * With v_usecount == 0, all we need to do is clear 855 * out the vnode data structures and we are done. 856 */ 857 if (vp->v_usecount == 0) { 858 vgone(vp); 859 continue; 860 } 861 /* 862 * If FORCECLOSE is set, forcibly close the vnode. 863 * For block or character devices, revert to an 864 * anonymous device. For all other files, just kill them. 865 */ 866 if (flags & FORCECLOSE) { 867 if (vp->v_type != VBLK && vp->v_type != VCHR) { 868 vgone(vp); 869 } else { 870 vclean(vp, 0); 871 vp->v_op = spec_vnodeop_p; 872 insmntque(vp, (struct mount *)0); 873 } 874 continue; 875 } 876#ifdef DIAGNOSTIC 877 if (busyprt) 878 vprint("vflush: busy vnode", vp); 879#endif 880 busy++; 881 } 882 if (busy) 883 return (EBUSY); 884 return (0); 885} 886 887/* 888 * Disassociate the underlying file system from a vnode. 889 */ 890void 891vclean(vp, flags) 892 register struct vnode *vp; 893 int flags; 894{ 895 int active; 896 897 /* 898 * Check to see if the vnode is in use. 899 * If so we have to reference it before we clean it out 900 * so that its count cannot fall to zero and generate a 901 * race against ourselves to recycle it. 902 */ 903 if ((active = vp->v_usecount)) 904 VREF(vp); 905 /* 906 * Even if the count is zero, the VOP_INACTIVE routine may still 907 * have the object locked while it cleans it out. The VOP_LOCK 908 * ensures that the VOP_INACTIVE routine is done with its work. 909 * For active vnodes, it ensures that no other activity can 910 * occur while the underlying object is being cleaned out. 911 */ 912 VOP_LOCK(vp); 913 /* 914 * Prevent the vnode from being recycled or 915 * brought into use while we clean it out. 916 */ 917 if (vp->v_flag & VXLOCK) 918 panic("vclean: deadlock"); 919 vp->v_flag |= VXLOCK; 920 /* 921 * Clean out any buffers associated with the vnode. 922 */ 923 if (flags & DOCLOSE) 924 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 925 /* 926 * Any other processes trying to obtain this lock must first 927 * wait for VXLOCK to clear, then call the new lock operation. 928 */ 929 VOP_UNLOCK(vp); 930 /* 931 * If purging an active vnode, it must be closed and 932 * deactivated before being reclaimed. 933 */ 934 if (active) { 935 if (flags & DOCLOSE) 936 VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL); 937 VOP_INACTIVE(vp); 938 } 939 /* 940 * Reclaim the vnode. 941 */ 942 if (VOP_RECLAIM(vp)) 943 panic("vclean: cannot reclaim"); 944 if (active) 945 vrele(vp); 946 947 /* 948 * Done with purge, notify sleepers of the grim news. 949 */ 950 vp->v_op = dead_vnodeop_p; 951 vp->v_tag = VT_NON; 952 vp->v_flag &= ~VXLOCK; 953 if (vp->v_flag & VXWANT) { 954 vp->v_flag &= ~VXWANT; 955 wakeup((caddr_t)vp); 956 } 957} 958 959/* 960 * Eliminate all activity associated with the requested vnode 961 * and with all vnodes aliased to the requested vnode. 962 */ 963void 964vgoneall(vp) 965 register struct vnode *vp; 966{ 967 register struct vnode *vq; 968 969 if (vp->v_flag & VALIASED) { 970 /* 971 * If a vgone (or vclean) is already in progress, 972 * wait until it is done and return. 973 */ 974 if (vp->v_flag & VXLOCK) { 975 vp->v_flag |= VXWANT; 976 (void) tsleep((caddr_t)vp, PINOD, "vgall", 0); 977 return; 978 } 979 /* 980 * Ensure that vp will not be vgone'd while we 981 * are eliminating its aliases. 982 */ 983 vp->v_flag |= VXLOCK; 984 while (vp->v_flag & VALIASED) { 985 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 986 if (vq->v_rdev != vp->v_rdev || 987 vq->v_type != vp->v_type || vp == vq) 988 continue; 989 vgone(vq); 990 break; 991 } 992 } 993 /* 994 * Remove the lock so that vgone below will 995 * really eliminate the vnode after which time 996 * vgone will awaken any sleepers. 997 */ 998 vp->v_flag &= ~VXLOCK; 999 } 1000 vgone(vp); 1001} 1002 1003/* 1004 * Eliminate all activity associated with a vnode 1005 * in preparation for reuse. 1006 */ 1007void 1008vgone(vp) 1009 register struct vnode *vp; 1010{ 1011 register struct vnode *vq; 1012 struct vnode *vx; 1013 1014 /* 1015 * If a vgone (or vclean) is already in progress, 1016 * wait until it is done and return. 1017 */ 1018 if (vp->v_flag & VXLOCK) { 1019 vp->v_flag |= VXWANT; 1020 (void) tsleep((caddr_t)vp, PINOD, "vgone", 0); 1021 return; 1022 } 1023 /* 1024 * Clean out the filesystem specific data. 1025 */ 1026 vclean(vp, DOCLOSE); 1027 /* 1028 * Delete from old mount point vnode list, if on one. 1029 */ 1030 if (vp->v_mount != NULL) { 1031 LIST_REMOVE(vp, v_mntvnodes); 1032 vp->v_mount = NULL; 1033 } 1034 /* 1035 * If special device, remove it from special device alias list. 1036 */ 1037 if (vp->v_type == VBLK || vp->v_type == VCHR) { 1038 if (*vp->v_hashchain == vp) { 1039 *vp->v_hashchain = vp->v_specnext; 1040 } else { 1041 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1042 if (vq->v_specnext != vp) 1043 continue; 1044 vq->v_specnext = vp->v_specnext; 1045 break; 1046 } 1047 if (vq == NULL) 1048 panic("missing bdev"); 1049 } 1050 if (vp->v_flag & VALIASED) { 1051 vx = NULL; 1052 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1053 if (vq->v_rdev != vp->v_rdev || 1054 vq->v_type != vp->v_type) 1055 continue; 1056 if (vx) 1057 break; 1058 vx = vq; 1059 } 1060 if (vx == NULL) 1061 panic("missing alias"); 1062 if (vq == NULL) 1063 vx->v_flag &= ~VALIASED; 1064 vp->v_flag &= ~VALIASED; 1065 } 1066 FREE(vp->v_specinfo, M_VNODE); 1067 vp->v_specinfo = NULL; 1068 } 1069 /* 1070 * If it is on the freelist and not already at the head, 1071 * move it to the head of the list. The test of the back 1072 * pointer and the reference count of zero is because 1073 * it will be removed from the free list by getnewvnode, 1074 * but will not have its reference count incremented until 1075 * after calling vgone. If the reference count were 1076 * incremented first, vgone would (incorrectly) try to 1077 * close the previous instance of the underlying object. 1078 * So, the back pointer is explicitly set to `0xdeadb' in 1079 * getnewvnode after removing it from the freelist to ensure 1080 * that we do not try to move it here. 1081 */ 1082 if (vp->v_usecount == 0 && 1083 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1084 vnode_free_list.tqh_first != vp) { 1085 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1086 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1087 } 1088 vp->v_type = VBAD; 1089} 1090 1091/* 1092 * Lookup a vnode by device number. 1093 */ 1094int 1095vfinddev(dev, type, vpp) 1096 dev_t dev; 1097 enum vtype type; 1098 struct vnode **vpp; 1099{ 1100 register struct vnode *vp; 1101 1102 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1103 if (dev != vp->v_rdev || type != vp->v_type) 1104 continue; 1105 *vpp = vp; 1106 return (1); 1107 } 1108 return (0); 1109} 1110 1111/* 1112 * Calculate the total number of references to a special device. 1113 */ 1114int 1115vcount(vp) 1116 register struct vnode *vp; 1117{ 1118 register struct vnode *vq, *vnext; 1119 int count; 1120 1121loop: 1122 if ((vp->v_flag & VALIASED) == 0) 1123 return (vp->v_usecount); 1124 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1125 vnext = vq->v_specnext; 1126 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1127 continue; 1128 /* 1129 * Alias, but not in use, so flush it out. 1130 */ 1131 if (vq->v_usecount == 0 && vq != vp) { 1132 vgone(vq); 1133 goto loop; 1134 } 1135 count += vq->v_usecount; 1136 } 1137 return (count); 1138} 1139 1140/* 1141 * Print out a description of a vnode. 1142 */ 1143static char *typename[] = 1144 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1145 1146void 1147vprint(label, vp) 1148 char *label; 1149 register struct vnode *vp; 1150{ 1151 char buf[64]; 1152 1153 if (label != NULL) 1154 printf("%s: ", label); 1155 printf("type %s, usecount %d, writecount %d, refcount %ld,", 1156 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1157 vp->v_holdcnt); 1158 buf[0] = '\0'; 1159 if (vp->v_flag & VROOT) 1160 strcat(buf, "|VROOT"); 1161 if (vp->v_flag & VTEXT) 1162 strcat(buf, "|VTEXT"); 1163 if (vp->v_flag & VSYSTEM) 1164 strcat(buf, "|VSYSTEM"); 1165 if (vp->v_flag & VXLOCK) 1166 strcat(buf, "|VXLOCK"); 1167 if (vp->v_flag & VXWANT) 1168 strcat(buf, "|VXWANT"); 1169 if (vp->v_flag & VBWAIT) 1170 strcat(buf, "|VBWAIT"); 1171 if (vp->v_flag & VALIASED) 1172 strcat(buf, "|VALIASED"); 1173 if (buf[0] != '\0') 1174 printf(" flags (%s)", &buf[1]); 1175 if (vp->v_data == NULL) { 1176 printf("\n"); 1177 } else { 1178 printf("\n\t"); 1179 VOP_PRINT(vp); 1180 } 1181} 1182 1183#ifdef DEBUG 1184/* 1185 * List all of the locked vnodes in the system. 1186 * Called when debugging the kernel. 1187 */ 1188void 1189printlockedvnodes() 1190{ 1191 register struct mount *mp; 1192 register struct vnode *vp; 1193 1194 printf("Locked vnodes\n"); 1195 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 1196 for (vp = mp->mnt_vnodelist.lh_first; 1197 vp != NULL; 1198 vp = vp->v_mntvnodes.le_next) 1199 if (VOP_ISLOCKED(vp)) 1200 vprint((char *)0, vp); 1201 } 1202} 1203#endif 1204 1205int kinfo_vdebug = 1; 1206int kinfo_vgetfailed; 1207#define KINFO_VNODESLOP 10 1208/* 1209 * Dump vnode list (via sysctl). 1210 * Copyout address of vnode followed by vnode. 1211 */ 1212/* ARGSUSED */ 1213int 1214sysctl_vnode(where, sizep) 1215 char *where; 1216 size_t *sizep; 1217{ 1218 register struct mount *mp, *nmp; 1219 struct vnode *vp; 1220 register char *bp = where, *savebp; 1221 char *ewhere; 1222 int error; 1223 1224#define VPTRSZ sizeof (struct vnode *) 1225#define VNODESZ sizeof (struct vnode) 1226 if (where == NULL) { 1227 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1228 return (0); 1229 } 1230 ewhere = where + *sizep; 1231 1232 for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) { 1233 nmp = mp->mnt_list.tqe_next; 1234 if (vfs_busy(mp)) 1235 continue; 1236 savebp = bp; 1237again: 1238 for (vp = mp->mnt_vnodelist.lh_first; 1239 vp != NULL; 1240 vp = vp->v_mntvnodes.le_next) { 1241 /* 1242 * Check that the vp is still associated with 1243 * this filesystem. RACE: could have been 1244 * recycled onto the same filesystem. 1245 */ 1246 if (vp->v_mount != mp) { 1247 if (kinfo_vdebug) 1248 printf("kinfo: vp changed\n"); 1249 bp = savebp; 1250 goto again; 1251 } 1252 if (bp + VPTRSZ + VNODESZ > ewhere) { 1253 *sizep = bp - where; 1254 return (ENOMEM); 1255 } 1256 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1257 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1258 return (error); 1259 bp += VPTRSZ + VNODESZ; 1260 } 1261 vfs_unbusy(mp); 1262 } 1263 1264 *sizep = bp - where; 1265 return (0); 1266} 1267 1268/* 1269 * Check to see if a filesystem is mounted on a block device. 1270 */ 1271int 1272vfs_mountedon(vp) 1273 register struct vnode *vp; 1274{ 1275 register struct vnode *vq; 1276 1277 if (vp->v_specflags & SI_MOUNTEDON) 1278 return (EBUSY); 1279 if (vp->v_flag & VALIASED) { 1280 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1281 if (vq->v_rdev != vp->v_rdev || 1282 vq->v_type != vp->v_type) 1283 continue; 1284 if (vq->v_specflags & SI_MOUNTEDON) 1285 return (EBUSY); 1286 } 1287 } 1288 return (0); 1289} 1290 1291/* 1292 * Build hash lists of net addresses and hang them off the mount point. 1293 * Called by ufs_mount() to set up the lists of export addresses. 1294 */ 1295static int 1296vfs_hang_addrlist(mp, nep, argp) 1297 struct mount *mp; 1298 struct netexport *nep; 1299 struct export_args *argp; 1300{ 1301 register struct netcred *np; 1302 register struct radix_node_head *rnh; 1303 register int i; 1304 struct radix_node *rn; 1305 struct sockaddr *saddr, *smask = 0; 1306 struct domain *dom; 1307 int error; 1308 1309 if (argp->ex_addrlen == 0) { 1310 if (mp->mnt_flag & MNT_DEFEXPORTED) 1311 return (EPERM); 1312 np = &nep->ne_defexported; 1313 np->netc_exflags = argp->ex_flags; 1314 np->netc_anon = argp->ex_anon; 1315 np->netc_anon.cr_ref = 1; 1316 mp->mnt_flag |= MNT_DEFEXPORTED; 1317 return (0); 1318 } 1319 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1320 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1321 bzero((caddr_t)np, i); 1322 saddr = (struct sockaddr *)(np + 1); 1323 if ((error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))) 1324 goto out; 1325 if (saddr->sa_len > argp->ex_addrlen) 1326 saddr->sa_len = argp->ex_addrlen; 1327 if (argp->ex_masklen) { 1328 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1329 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); 1330 if (error) 1331 goto out; 1332 if (smask->sa_len > argp->ex_masklen) 1333 smask->sa_len = argp->ex_masklen; 1334 } 1335 i = saddr->sa_family; 1336 if ((rnh = nep->ne_rtable[i]) == 0) { 1337 /* 1338 * Seems silly to initialize every AF when most are not 1339 * used, do so on demand here 1340 */ 1341 for (dom = domains; dom; dom = dom->dom_next) 1342 if (dom->dom_family == i && dom->dom_rtattach) { 1343 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1344 dom->dom_rtoffset); 1345 break; 1346 } 1347 if ((rnh = nep->ne_rtable[i]) == 0) { 1348 error = ENOBUFS; 1349 goto out; 1350 } 1351 } 1352 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1353 np->netc_rnodes); 1354 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1355 error = EPERM; 1356 goto out; 1357 } 1358 np->netc_exflags = argp->ex_flags; 1359 np->netc_anon = argp->ex_anon; 1360 np->netc_anon.cr_ref = 1; 1361 return (0); 1362out: 1363 free(np, M_NETADDR); 1364 return (error); 1365} 1366 1367/* ARGSUSED */ 1368static int 1369vfs_free_netcred(rn, w) 1370 struct radix_node *rn; 1371 caddr_t w; 1372{ 1373 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1374 1375 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1376 free((caddr_t)rn, M_NETADDR); 1377 return (0); 1378} 1379 1380/* 1381 * Free the net address hash lists that are hanging off the mount points. 1382 */ 1383static void 1384vfs_free_addrlist(nep) 1385 struct netexport *nep; 1386{ 1387 register int i; 1388 register struct radix_node_head *rnh; 1389 1390 for (i = 0; i <= AF_MAX; i++) 1391 if ((rnh = nep->ne_rtable[i])) { 1392 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, 1393 (caddr_t)rnh); 1394 free((caddr_t)rnh, M_RTABLE); 1395 nep->ne_rtable[i] = 0; 1396 } 1397} 1398 1399int 1400vfs_export(mp, nep, argp) 1401 struct mount *mp; 1402 struct netexport *nep; 1403 struct export_args *argp; 1404{ 1405 int error; 1406 1407 if (argp->ex_flags & MNT_DELEXPORT) { 1408 vfs_free_addrlist(nep); 1409 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1410 } 1411 if (argp->ex_flags & MNT_EXPORTED) { 1412 if ((error = vfs_hang_addrlist(mp, nep, argp))) 1413 return (error); 1414 mp->mnt_flag |= MNT_EXPORTED; 1415 } 1416 return (0); 1417} 1418 1419struct netcred * 1420vfs_export_lookup(mp, nep, nam) 1421 register struct mount *mp; 1422 struct netexport *nep; 1423 struct mbuf *nam; 1424{ 1425 register struct netcred *np; 1426 register struct radix_node_head *rnh; 1427 struct sockaddr *saddr; 1428 1429 np = NULL; 1430 if (mp->mnt_flag & MNT_EXPORTED) { 1431 /* 1432 * Lookup in the export list first. 1433 */ 1434 if (nam != NULL) { 1435 saddr = mtod(nam, struct sockaddr *); 1436 rnh = nep->ne_rtable[saddr->sa_family]; 1437 if (rnh != NULL) { 1438 np = (struct netcred *) 1439 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1440 rnh); 1441 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1442 np = NULL; 1443 } 1444 } 1445 /* 1446 * If no address match, use the default if it exists. 1447 */ 1448 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1449 np = &nep->ne_defexported; 1450 } 1451 return (np); 1452} 1453