vfs_export.c revision 2384
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 39 * $Id: vfs_subr.c,v 1.7 1994/08/24 04:06:39 davidg Exp $ 40 */ 41 42/* 43 * External virtual filesystem routines 44 */ 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/proc.h> 49#include <sys/mount.h> 50#include <sys/time.h> 51#include <sys/vnode.h> 52#include <sys/stat.h> 53#include <sys/namei.h> 54#include <sys/ucred.h> 55#include <sys/buf.h> 56#include <sys/errno.h> 57#include <sys/malloc.h> 58#include <sys/domain.h> 59#include <sys/mbuf.h> 60 61#include <vm/vm.h> 62#include <sys/sysctl.h> 63 64#include <miscfs/specfs/specdev.h> 65 66void insmntque __P((struct vnode *, struct mount *)); 67 68enum vtype iftovt_tab[16] = { 69 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 70 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 71}; 72int vttoif_tab[9] = { 73 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 74 S_IFSOCK, S_IFIFO, S_IFMT, 75}; 76 77/* 78 * Insq/Remq for the vnode usage lists. 79 */ 80#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 81#define bufremvn(bp) { \ 82 LIST_REMOVE(bp, b_vnbufs); \ 83 (bp)->b_vnbufs.le_next = NOLIST; \ 84} 85 86TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 87struct mntlist mountlist; /* mounted filesystem list */ 88 89/* 90 * Initialize the vnode management data structures. 91 */ 92void 93vntblinit() 94{ 95 96 TAILQ_INIT(&vnode_free_list); 97 TAILQ_INIT(&mountlist); 98} 99 100/* 101 * Lock a filesystem. 102 * Used to prevent access to it while mounting and unmounting. 103 */ 104int 105vfs_lock(mp) 106 register struct mount *mp; 107{ 108 109 while(mp->mnt_flag & MNT_MLOCK) { 110 mp->mnt_flag |= MNT_MWAIT; 111 sleep((caddr_t)mp, PVFS); 112 } 113 mp->mnt_flag |= MNT_MLOCK; 114 return (0); 115} 116 117/* 118 * Unlock a locked filesystem. 119 * Panic if filesystem is not locked. 120 */ 121void 122vfs_unlock(mp) 123 register struct mount *mp; 124{ 125 126 if ((mp->mnt_flag & MNT_MLOCK) == 0) 127 panic("vfs_unlock: not locked"); 128 mp->mnt_flag &= ~MNT_MLOCK; 129 if (mp->mnt_flag & MNT_MWAIT) { 130 mp->mnt_flag &= ~MNT_MWAIT; 131 wakeup((caddr_t)mp); 132 } 133} 134 135/* 136 * Mark a mount point as busy. 137 * Used to synchronize access and to delay unmounting. 138 */ 139int 140vfs_busy(mp) 141 register struct mount *mp; 142{ 143 144 while(mp->mnt_flag & MNT_MPBUSY) { 145 mp->mnt_flag |= MNT_MPWANT; 146 sleep((caddr_t)&mp->mnt_flag, PVFS); 147 } 148 if (mp->mnt_flag & MNT_UNMOUNT) 149 return (1); 150 mp->mnt_flag |= MNT_MPBUSY; 151 return (0); 152} 153 154/* 155 * Free a busy filesystem. 156 * Panic if filesystem is not busy. 157 */ 158void 159vfs_unbusy(mp) 160 register struct mount *mp; 161{ 162 163 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 164 panic("vfs_unbusy: not busy"); 165 mp->mnt_flag &= ~MNT_MPBUSY; 166 if (mp->mnt_flag & MNT_MPWANT) { 167 mp->mnt_flag &= ~MNT_MPWANT; 168 wakeup((caddr_t)&mp->mnt_flag); 169 } 170} 171 172void 173vfs_unmountroot(rootfs) 174 struct mount *rootfs; 175{ 176 struct mount *mp = rootfs; 177 int error; 178 179 if (vfs_busy(mp)) { 180 printf("failed to unmount root\n"); 181 return; 182 } 183 184 mp->mnt_flag |= MNT_UNMOUNT; 185 if (error = vfs_lock(mp)) { 186 printf("lock of root filesystem failed (%d)\n", error); 187 return; 188 } 189 190 vnode_pager_umount(mp); /* release cached vnodes */ 191 cache_purgevfs(mp); /* remove cache entries for this file sys */ 192 193 if (error = VFS_SYNC(mp, MNT_WAIT, initproc->p_ucred, initproc)) 194 printf("sync of root filesystem failed (%d)\n", error); 195 196 if (error = VFS_UNMOUNT(mp, MNT_FORCE, initproc)) { 197 printf("unmount of root filesystem failed ("); 198 if (error == EBUSY) 199 printf("BUSY)\n"); 200 else 201 printf("%d)\n", error); 202 } 203 204 mp->mnt_flag &= ~MNT_UNMOUNT; 205 vfs_unbusy(mp); 206} 207 208/* 209 * Unmount all filesystems. Should only be called by halt(). 210 */ 211void 212vfs_unmountall() 213{ 214 struct mount *mp, *mp_next, *rootfs = NULL; 215 int error; 216 217 /* unmount all but rootfs */ 218 for (mp = mountlist.tqh_first; mp != NULL; mp = mp_next) { 219 mp_next = mp->mnt_list.tqe_next; 220 221 if (mp->mnt_flag & MNT_ROOTFS) { 222 rootfs = mp; 223 continue; 224 } 225 226 error = dounmount(mp, MNT_FORCE, initproc); 227 if (error) { 228 printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); 229 if (error == EBUSY) 230 printf("BUSY)\n"); 231 else 232 printf("%d)\n", error); 233 } 234 } 235 236 /* and finally... */ 237 if (rootfs) { 238 vfs_unmountroot(rootfs); 239 } else { 240 printf("no root filesystem\n"); 241 } 242} 243 244/* 245 * Lookup a mount point by filesystem identifier. 246 */ 247struct mount * 248getvfs(fsid) 249 fsid_t *fsid; 250{ 251 register struct mount *mp; 252 253 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 254 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 255 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 256 return (mp); 257 } 258 return ((struct mount *)0); 259} 260 261/* 262 * Get a new unique fsid 263 */ 264void 265getnewfsid(mp, mtype) 266 struct mount *mp; 267 int mtype; 268{ 269static u_short xxxfs_mntid; 270 271 fsid_t tfsid; 272 273 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 274 mp->mnt_stat.f_fsid.val[1] = mtype; 275 if (xxxfs_mntid == 0) 276 ++xxxfs_mntid; 277 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 278 tfsid.val[1] = mtype; 279 if (mountlist.tqh_first != NULL) { 280 while (getvfs(&tfsid)) { 281 tfsid.val[0]++; 282 xxxfs_mntid++; 283 } 284 } 285 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 286} 287 288/* 289 * Set vnode attributes to VNOVAL 290 */ 291void 292vattr_null(vap) 293 register struct vattr *vap; 294{ 295 296 vap->va_type = VNON; 297 vap->va_size = VNOVAL; 298 vap->va_bytes = VNOVAL; 299 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 300 vap->va_fsid = vap->va_fileid = 301 vap->va_blocksize = vap->va_rdev = 302 vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 303 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 304 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 305 vap->va_flags = vap->va_gen = VNOVAL; 306 vap->va_vaflags = 0; 307} 308 309/* 310 * Routines having to do with the management of the vnode table. 311 */ 312extern int (**dead_vnodeop_p)(); 313extern void vclean(); 314long numvnodes; 315 316/* 317 * Return the next vnode from the free list. 318 */ 319int 320getnewvnode(tag, mp, vops, vpp) 321 enum vtagtype tag; 322 struct mount *mp; 323 int (**vops)(); 324 struct vnode **vpp; 325{ 326 register struct vnode *vp; 327 int s; 328 329 if ((vnode_free_list.tqh_first == NULL && 330 numvnodes < 2 * desiredvnodes) || 331 numvnodes < desiredvnodes) { 332 vp = (struct vnode *)malloc((u_long)sizeof *vp, 333 M_VNODE, M_WAITOK); 334 bzero((char *)vp, sizeof *vp); 335 numvnodes++; 336 } else { 337 if ((vp = vnode_free_list.tqh_first) == NULL) { 338 tablefull("vnode"); 339 *vpp = 0; 340 return (ENFILE); 341 } 342 if (vp->v_usecount) 343 panic("free vnode isn't"); 344 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 345 /* see comment on why 0xdeadb is set at end of vgone (below) */ 346 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 347 vp->v_lease = NULL; 348 if (vp->v_type != VBAD) 349 vgone(vp); 350#ifdef DIAGNOSTIC 351 if (vp->v_data) 352 panic("cleaned vnode isn't"); 353 s = splbio(); 354 if (vp->v_numoutput) 355 panic("Clean vnode has pending I/O's"); 356 splx(s); 357#endif 358 vp->v_flag = 0; 359 vp->v_lastr = 0; 360 vp->v_ralen = 0; 361 vp->v_maxra = 0; 362 vp->v_lastw = 0; 363 vp->v_lasta = 0; 364 vp->v_cstart = 0; 365 vp->v_clen = 0; 366 vp->v_socket = 0; 367 vp->v_writecount = 0; /* XXX */ 368 } 369 vp->v_type = VNON; 370 cache_purge(vp); 371 vp->v_tag = tag; 372 vp->v_op = vops; 373 insmntque(vp, mp); 374 *vpp = vp; 375 vp->v_usecount = 1; 376 vp->v_data = 0; 377 return (0); 378} 379 380/* 381 * Move a vnode from one mount queue to another. 382 */ 383void 384insmntque(vp, mp) 385 register struct vnode *vp; 386 register struct mount *mp; 387{ 388 389 /* 390 * Delete from old mount point vnode list, if on one. 391 */ 392 if (vp->v_mount != NULL) 393 LIST_REMOVE(vp, v_mntvnodes); 394 /* 395 * Insert into list of vnodes for the new mount point, if available. 396 */ 397 if ((vp->v_mount = mp) == NULL) 398 return; 399 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 400} 401 402/* 403 * Update outstanding I/O count and do wakeup if requested. 404 */ 405void 406vwakeup(bp) 407 register struct buf *bp; 408{ 409 register struct vnode *vp; 410 411 bp->b_flags &= ~B_WRITEINPROG; 412 if (vp = bp->b_vp) { 413 vp->v_numoutput--; 414 if (vp->v_numoutput < 0) 415 panic("vwakeup: neg numoutput"); 416 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 417 if (vp->v_numoutput < 0) 418 panic("vwakeup: neg numoutput"); 419 vp->v_flag &= ~VBWAIT; 420 wakeup((caddr_t)&vp->v_numoutput); 421 } 422 } 423} 424 425/* 426 * Flush out and invalidate all buffers associated with a vnode. 427 * Called with the underlying object locked. 428 */ 429int 430vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 431 register struct vnode *vp; 432 int flags; 433 struct ucred *cred; 434 struct proc *p; 435 int slpflag, slptimeo; 436{ 437 register struct buf *bp; 438 struct buf *nbp, *blist; 439 int s, error; 440 vm_pager_t pager; 441 vm_object_t object; 442 443 if (flags & V_SAVE) { 444 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) 445 return (error); 446 if (vp->v_dirtyblkhd.lh_first != NULL) 447 panic("vinvalbuf: dirty bufs"); 448 } 449 for (;;) { 450 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) 451 while (blist && blist->b_lblkno < 0) 452 blist = blist->b_vnbufs.le_next; 453 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 454 (flags & V_SAVEMETA)) 455 while (blist && blist->b_lblkno < 0) 456 blist = blist->b_vnbufs.le_next; 457 if (!blist) 458 break; 459 460 for (bp = blist; bp; bp = nbp) { 461 nbp = bp->b_vnbufs.le_next; 462 if ((flags & V_SAVEMETA) && bp->b_lblkno < 0) 463 continue; 464 s = splbio(); 465 if (bp->b_flags & B_BUSY) { 466 bp->b_flags |= B_WANTED; 467 error = tsleep((caddr_t)bp, 468 slpflag | (PRIBIO + 1), "vinvalbuf", 469 slptimeo); 470 splx(s); 471 if (error) 472 return (error); 473 break; 474 } 475 bremfree(bp); 476 bp->b_flags |= B_BUSY; 477 splx(s); 478 /* 479 * XXX Since there are no node locks for NFS, I believe 480 * there is a slight chance that a delayed write will 481 * occur while sleeping just above, so check for it. 482 */ 483 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 484 (void) VOP_BWRITE(bp); 485 break; 486 } 487 bp->b_flags |= B_INVAL; 488 brelse(bp); 489 } 490 } 491 492 pager = (vm_pager_t)vp->v_vmdata; 493 if (pager != NULL) { 494 object = vm_object_lookup(pager); 495 if (object) { 496 vm_object_lock(object); 497 if (flags & V_SAVE) 498 vm_object_page_clean(object, 0, 0, TRUE, FALSE); 499 vm_object_page_remove(object, 0, object->size); 500 vm_object_unlock(object); 501 vm_object_deallocate(object); 502 } 503 } 504 505 if (!(flags & V_SAVEMETA) && 506 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 507 panic("vinvalbuf: flush failed"); 508 return (0); 509} 510 511/* 512 * Associate a buffer with a vnode. 513 */ 514void 515bgetvp(vp, bp) 516 register struct vnode *vp; 517 register struct buf *bp; 518{ 519 520 if (bp->b_vp) 521 panic("bgetvp: not free"); 522 VHOLD(vp); 523 bp->b_vp = vp; 524 if (vp->v_type == VBLK || vp->v_type == VCHR) 525 bp->b_dev = vp->v_rdev; 526 else 527 bp->b_dev = NODEV; 528 /* 529 * Insert onto list for new vnode. 530 */ 531 bufinsvn(bp, &vp->v_cleanblkhd); 532} 533 534/* 535 * Disassociate a buffer from a vnode. 536 */ 537void 538brelvp(bp) 539 register struct buf *bp; 540{ 541 struct vnode *vp; 542 543 if (bp->b_vp == (struct vnode *) 0) 544 panic("brelvp: NULL"); 545 /* 546 * Delete from old vnode list, if on one. 547 */ 548 if (bp->b_vnbufs.le_next != NOLIST) 549 bufremvn(bp); 550 vp = bp->b_vp; 551 bp->b_vp = (struct vnode *) 0; 552 HOLDRELE(vp); 553} 554 555/* 556 * Reassign a buffer from one vnode to another. 557 * Used to assign file specific control information 558 * (indirect blocks) to the vnode to which they belong. 559 */ 560void 561reassignbuf(bp, newvp) 562 register struct buf *bp; 563 register struct vnode *newvp; 564{ 565 register struct buflists *listheadp; 566 567 if (newvp == NULL) { 568 printf("reassignbuf: NULL"); 569 return; 570 } 571 /* 572 * Delete from old vnode list, if on one. 573 */ 574 if (bp->b_vnbufs.le_next != NOLIST) 575 bufremvn(bp); 576 /* 577 * If dirty, put on list of dirty buffers; 578 * otherwise insert onto list of clean buffers. 579 */ 580 if (bp->b_flags & B_DELWRI) 581 listheadp = &newvp->v_dirtyblkhd; 582 else 583 listheadp = &newvp->v_cleanblkhd; 584 bufinsvn(bp, listheadp); 585} 586 587/* 588 * Create a vnode for a block device. 589 * Used for root filesystem, argdev, and swap areas. 590 * Also used for memory file system special devices. 591 */ 592int 593bdevvp(dev, vpp) 594 dev_t dev; 595 struct vnode **vpp; 596{ 597 register struct vnode *vp; 598 struct vnode *nvp; 599 int error; 600 601 if (dev == NODEV) 602 return (0); 603 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 604 if (error) { 605 *vpp = 0; 606 return (error); 607 } 608 vp = nvp; 609 vp->v_type = VBLK; 610 if (nvp = checkalias(vp, dev, (struct mount *)0)) { 611 vput(vp); 612 vp = nvp; 613 } 614 *vpp = vp; 615 return (0); 616} 617 618/* 619 * Check to see if the new vnode represents a special device 620 * for which we already have a vnode (either because of 621 * bdevvp() or because of a different vnode representing 622 * the same block device). If such an alias exists, deallocate 623 * the existing contents and return the aliased vnode. The 624 * caller is responsible for filling it with its new contents. 625 */ 626struct vnode * 627checkalias(nvp, nvp_rdev, mp) 628 register struct vnode *nvp; 629 dev_t nvp_rdev; 630 struct mount *mp; 631{ 632 register struct vnode *vp; 633 struct vnode **vpp; 634 635 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 636 return (NULLVP); 637 638 vpp = &speclisth[SPECHASH(nvp_rdev)]; 639loop: 640 for (vp = *vpp; vp; vp = vp->v_specnext) { 641 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 642 continue; 643 /* 644 * Alias, but not in use, so flush it out. 645 */ 646 if (vp->v_usecount == 0) { 647 vgone(vp); 648 goto loop; 649 } 650 if (vget(vp, 1)) 651 goto loop; 652 break; 653 } 654 if (vp == NULL || vp->v_tag != VT_NON) { 655 MALLOC(nvp->v_specinfo, struct specinfo *, 656 sizeof(struct specinfo), M_VNODE, M_WAITOK); 657 nvp->v_rdev = nvp_rdev; 658 nvp->v_hashchain = vpp; 659 nvp->v_specnext = *vpp; 660 nvp->v_specflags = 0; 661 *vpp = nvp; 662 if (vp != NULL) { 663 nvp->v_flag |= VALIASED; 664 vp->v_flag |= VALIASED; 665 vput(vp); 666 } 667 return (NULLVP); 668 } 669 VOP_UNLOCK(vp); 670 vclean(vp, 0); 671 vp->v_op = nvp->v_op; 672 vp->v_tag = nvp->v_tag; 673 nvp->v_type = VNON; 674 insmntque(vp, mp); 675 return (vp); 676} 677 678/* 679 * Grab a particular vnode from the free list, increment its 680 * reference count and lock it. The vnode lock bit is set the 681 * vnode is being eliminated in vgone. The process is awakened 682 * when the transition is completed, and an error returned to 683 * indicate that the vnode is no longer usable (possibly having 684 * been changed to a new file system type). 685 */ 686int 687vget(vp, lockflag) 688 register struct vnode *vp; 689 int lockflag; 690{ 691 692 /* 693 * If the vnode is in the process of being cleaned out for 694 * another use, we wait for the cleaning to finish and then 695 * return failure. Cleaning is determined either by checking 696 * that the VXLOCK flag is set, or that the use count is 697 * zero with the back pointer set to show that it has been 698 * removed from the free list by getnewvnode. The VXLOCK 699 * flag may not have been set yet because vclean is blocked in 700 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. 701 */ 702 if ((vp->v_flag & VXLOCK) || 703 (vp->v_usecount == 0 && 704 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { 705 vp->v_flag |= VXWANT; 706 sleep((caddr_t)vp, PINOD); 707 return (1); 708 } 709 if (vp->v_usecount == 0) 710 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 711 vp->v_usecount++; 712 if (lockflag) 713 VOP_LOCK(vp); 714 return (0); 715} 716 717/* 718 * Vnode reference, just increment the count 719 */ 720void 721vref(vp) 722 struct vnode *vp; 723{ 724 725 if (vp->v_usecount <= 0) 726 panic("vref used where vget required"); 727 vp->v_usecount++; 728} 729 730/* 731 * vput(), just unlock and vrele() 732 */ 733void 734vput(vp) 735 register struct vnode *vp; 736{ 737 738 VOP_UNLOCK(vp); 739 vrele(vp); 740} 741 742/* 743 * Vnode release. 744 * If count drops to zero, call inactive routine and return to freelist. 745 */ 746void 747vrele(vp) 748 register struct vnode *vp; 749{ 750 751#ifdef DIAGNOSTIC 752 if (vp == NULL) 753 panic("vrele: null vp"); 754#endif 755 vp->v_usecount--; 756 if (vp->v_usecount > 0) 757 return; 758#ifdef DIAGNOSTIC 759 if (vp->v_usecount != 0 /* || vp->v_writecount != 0 */) { 760 vprint("vrele: bad ref count", vp); 761 panic("vrele: ref cnt"); 762 } 763#endif 764 /* 765 * insert at tail of LRU list 766 */ 767 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 768 VOP_INACTIVE(vp); 769} 770 771/* 772 * Page or buffer structure gets a reference. 773 */ 774void 775vhold(vp) 776 register struct vnode *vp; 777{ 778 779 vp->v_holdcnt++; 780} 781 782/* 783 * Page or buffer structure frees a reference. 784 */ 785void 786holdrele(vp) 787 register struct vnode *vp; 788{ 789 790 if (vp->v_holdcnt <= 0) 791 panic("holdrele: holdcnt"); 792 vp->v_holdcnt--; 793} 794 795/* 796 * Remove any vnodes in the vnode table belonging to mount point mp. 797 * 798 * If MNT_NOFORCE is specified, there should not be any active ones, 799 * return error if any are found (nb: this is a user error, not a 800 * system error). If MNT_FORCE is specified, detach any active vnodes 801 * that are found. 802 */ 803#ifdef DIAGNOSTIC 804int busyprt = 0; /* print out busy vnodes */ 805struct ctldebug debug1 = { "busyprt", &busyprt }; 806#endif 807 808int 809vflush(mp, skipvp, flags) 810 struct mount *mp; 811 struct vnode *skipvp; 812 int flags; 813{ 814 register struct vnode *vp, *nvp; 815 int busy = 0; 816 817 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 818 panic("vflush: not busy"); 819loop: 820 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 821 if (vp->v_mount != mp) 822 goto loop; 823 nvp = vp->v_mntvnodes.le_next; 824 /* 825 * Skip over a selected vnode. 826 */ 827 if (vp == skipvp) 828 continue; 829 /* 830 * Skip over a vnodes marked VSYSTEM. 831 */ 832 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 833 continue; 834 /* 835 * If WRITECLOSE is set, only flush out regular file 836 * vnodes open for writing. 837 */ 838 if ((flags & WRITECLOSE) && 839 (vp->v_writecount == 0 || vp->v_type != VREG)) 840 continue; 841 /* 842 * With v_usecount == 0, all we need to do is clear 843 * out the vnode data structures and we are done. 844 */ 845 if (vp->v_usecount == 0) { 846 vgone(vp); 847 continue; 848 } 849 /* 850 * If FORCECLOSE is set, forcibly close the vnode. 851 * For block or character devices, revert to an 852 * anonymous device. For all other files, just kill them. 853 */ 854 if (flags & FORCECLOSE) { 855 if (vp->v_type != VBLK && vp->v_type != VCHR) { 856 vgone(vp); 857 } else { 858 vclean(vp, 0); 859 vp->v_op = spec_vnodeop_p; 860 insmntque(vp, (struct mount *)0); 861 } 862 continue; 863 } 864#ifdef DIAGNOSTIC 865 if (busyprt) 866 vprint("vflush: busy vnode", vp); 867#endif 868 busy++; 869 } 870 if (busy) 871 return (EBUSY); 872 return (0); 873} 874 875/* 876 * Disassociate the underlying file system from a vnode. 877 */ 878void 879vclean(vp, flags) 880 register struct vnode *vp; 881 int flags; 882{ 883 int active; 884 885 /* 886 * Check to see if the vnode is in use. 887 * If so we have to reference it before we clean it out 888 * so that its count cannot fall to zero and generate a 889 * race against ourselves to recycle it. 890 */ 891 if (active = vp->v_usecount) 892 VREF(vp); 893 /* 894 * Even if the count is zero, the VOP_INACTIVE routine may still 895 * have the object locked while it cleans it out. The VOP_LOCK 896 * ensures that the VOP_INACTIVE routine is done with its work. 897 * For active vnodes, it ensures that no other activity can 898 * occur while the underlying object is being cleaned out. 899 */ 900 VOP_LOCK(vp); 901 /* 902 * Prevent the vnode from being recycled or 903 * brought into use while we clean it out. 904 */ 905 if (vp->v_flag & VXLOCK) 906 panic("vclean: deadlock"); 907 vp->v_flag |= VXLOCK; 908 /* 909 * Clean out any buffers associated with the vnode. 910 */ 911 if (flags & DOCLOSE) 912 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 913 /* 914 * Any other processes trying to obtain this lock must first 915 * wait for VXLOCK to clear, then call the new lock operation. 916 */ 917 VOP_UNLOCK(vp); 918 /* 919 * If purging an active vnode, it must be closed and 920 * deactivated before being reclaimed. 921 */ 922 if (active) { 923 if (flags & DOCLOSE) 924 VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL); 925 VOP_INACTIVE(vp); 926 } 927 /* 928 * Reclaim the vnode. 929 */ 930 if (VOP_RECLAIM(vp)) 931 panic("vclean: cannot reclaim"); 932 if (active) 933 vrele(vp); 934 935 /* 936 * Done with purge, notify sleepers of the grim news. 937 */ 938 vp->v_op = dead_vnodeop_p; 939 vp->v_tag = VT_NON; 940 vp->v_flag &= ~VXLOCK; 941 if (vp->v_flag & VXWANT) { 942 vp->v_flag &= ~VXWANT; 943 wakeup((caddr_t)vp); 944 } 945} 946 947/* 948 * Eliminate all activity associated with the requested vnode 949 * and with all vnodes aliased to the requested vnode. 950 */ 951void 952vgoneall(vp) 953 register struct vnode *vp; 954{ 955 register struct vnode *vq; 956 957 if (vp->v_flag & VALIASED) { 958 /* 959 * If a vgone (or vclean) is already in progress, 960 * wait until it is done and return. 961 */ 962 if (vp->v_flag & VXLOCK) { 963 vp->v_flag |= VXWANT; 964 sleep((caddr_t)vp, PINOD); 965 return; 966 } 967 /* 968 * Ensure that vp will not be vgone'd while we 969 * are eliminating its aliases. 970 */ 971 vp->v_flag |= VXLOCK; 972 while (vp->v_flag & VALIASED) { 973 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 974 if (vq->v_rdev != vp->v_rdev || 975 vq->v_type != vp->v_type || vp == vq) 976 continue; 977 vgone(vq); 978 break; 979 } 980 } 981 /* 982 * Remove the lock so that vgone below will 983 * really eliminate the vnode after which time 984 * vgone will awaken any sleepers. 985 */ 986 vp->v_flag &= ~VXLOCK; 987 } 988 vgone(vp); 989} 990 991/* 992 * Eliminate all activity associated with a vnode 993 * in preparation for reuse. 994 */ 995void 996vgone(vp) 997 register struct vnode *vp; 998{ 999 register struct vnode *vq; 1000 struct vnode *vx; 1001 1002 /* 1003 * If a vgone (or vclean) is already in progress, 1004 * wait until it is done and return. 1005 */ 1006 if (vp->v_flag & VXLOCK) { 1007 vp->v_flag |= VXWANT; 1008 sleep((caddr_t)vp, PINOD); 1009 return; 1010 } 1011 /* 1012 * Clean out the filesystem specific data. 1013 */ 1014 vclean(vp, DOCLOSE); 1015 /* 1016 * Delete from old mount point vnode list, if on one. 1017 */ 1018 if (vp->v_mount != NULL) { 1019 LIST_REMOVE(vp, v_mntvnodes); 1020 vp->v_mount = NULL; 1021 } 1022 /* 1023 * If special device, remove it from special device alias list. 1024 */ 1025 if (vp->v_type == VBLK || vp->v_type == VCHR) { 1026 if (*vp->v_hashchain == vp) { 1027 *vp->v_hashchain = vp->v_specnext; 1028 } else { 1029 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1030 if (vq->v_specnext != vp) 1031 continue; 1032 vq->v_specnext = vp->v_specnext; 1033 break; 1034 } 1035 if (vq == NULL) 1036 panic("missing bdev"); 1037 } 1038 if (vp->v_flag & VALIASED) { 1039 vx = NULL; 1040 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1041 if (vq->v_rdev != vp->v_rdev || 1042 vq->v_type != vp->v_type) 1043 continue; 1044 if (vx) 1045 break; 1046 vx = vq; 1047 } 1048 if (vx == NULL) 1049 panic("missing alias"); 1050 if (vq == NULL) 1051 vx->v_flag &= ~VALIASED; 1052 vp->v_flag &= ~VALIASED; 1053 } 1054 FREE(vp->v_specinfo, M_VNODE); 1055 vp->v_specinfo = NULL; 1056 } 1057 /* 1058 * If it is on the freelist and not already at the head, 1059 * move it to the head of the list. The test of the back 1060 * pointer and the reference count of zero is because 1061 * it will be removed from the free list by getnewvnode, 1062 * but will not have its reference count incremented until 1063 * after calling vgone. If the reference count were 1064 * incremented first, vgone would (incorrectly) try to 1065 * close the previous instance of the underlying object. 1066 * So, the back pointer is explicitly set to `0xdeadb' in 1067 * getnewvnode after removing it from the freelist to ensure 1068 * that we do not try to move it here. 1069 */ 1070 if (vp->v_usecount == 0 && 1071 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1072 vnode_free_list.tqh_first != vp) { 1073 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1074 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1075 } 1076 vp->v_type = VBAD; 1077} 1078 1079/* 1080 * Lookup a vnode by device number. 1081 */ 1082int 1083vfinddev(dev, type, vpp) 1084 dev_t dev; 1085 enum vtype type; 1086 struct vnode **vpp; 1087{ 1088 register struct vnode *vp; 1089 1090 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1091 if (dev != vp->v_rdev || type != vp->v_type) 1092 continue; 1093 *vpp = vp; 1094 return (1); 1095 } 1096 return (0); 1097} 1098 1099/* 1100 * Calculate the total number of references to a special device. 1101 */ 1102int 1103vcount(vp) 1104 register struct vnode *vp; 1105{ 1106 register struct vnode *vq, *vnext; 1107 int count; 1108 1109loop: 1110 if ((vp->v_flag & VALIASED) == 0) 1111 return (vp->v_usecount); 1112 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1113 vnext = vq->v_specnext; 1114 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1115 continue; 1116 /* 1117 * Alias, but not in use, so flush it out. 1118 */ 1119 if (vq->v_usecount == 0 && vq != vp) { 1120 vgone(vq); 1121 goto loop; 1122 } 1123 count += vq->v_usecount; 1124 } 1125 return (count); 1126} 1127 1128/* 1129 * Print out a description of a vnode. 1130 */ 1131static char *typename[] = 1132 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1133 1134void 1135vprint(label, vp) 1136 char *label; 1137 register struct vnode *vp; 1138{ 1139 char buf[64]; 1140 1141 if (label != NULL) 1142 printf("%s: ", label); 1143 printf("type %s, usecount %d, writecount %d, refcount %d,", 1144 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1145 vp->v_holdcnt); 1146 buf[0] = '\0'; 1147 if (vp->v_flag & VROOT) 1148 strcat(buf, "|VROOT"); 1149 if (vp->v_flag & VTEXT) 1150 strcat(buf, "|VTEXT"); 1151 if (vp->v_flag & VSYSTEM) 1152 strcat(buf, "|VSYSTEM"); 1153 if (vp->v_flag & VXLOCK) 1154 strcat(buf, "|VXLOCK"); 1155 if (vp->v_flag & VXWANT) 1156 strcat(buf, "|VXWANT"); 1157 if (vp->v_flag & VBWAIT) 1158 strcat(buf, "|VBWAIT"); 1159 if (vp->v_flag & VALIASED) 1160 strcat(buf, "|VALIASED"); 1161 if (buf[0] != '\0') 1162 printf(" flags (%s)", &buf[1]); 1163 if (vp->v_data == NULL) { 1164 printf("\n"); 1165 } else { 1166 printf("\n\t"); 1167 VOP_PRINT(vp); 1168 } 1169} 1170 1171#ifdef DEBUG 1172/* 1173 * List all of the locked vnodes in the system. 1174 * Called when debugging the kernel. 1175 */ 1176void 1177printlockedvnodes() 1178{ 1179 register struct mount *mp; 1180 register struct vnode *vp; 1181 1182 printf("Locked vnodes\n"); 1183 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 1184 for (vp = mp->mnt_vnodelist.lh_first; 1185 vp != NULL; 1186 vp = vp->v_mntvnodes.le_next) 1187 if (VOP_ISLOCKED(vp)) 1188 vprint((char *)0, vp); 1189 } 1190} 1191#endif 1192 1193int kinfo_vdebug = 1; 1194int kinfo_vgetfailed; 1195#define KINFO_VNODESLOP 10 1196/* 1197 * Dump vnode list (via sysctl). 1198 * Copyout address of vnode followed by vnode. 1199 */ 1200/* ARGSUSED */ 1201int 1202sysctl_vnode(where, sizep) 1203 char *where; 1204 size_t *sizep; 1205{ 1206 register struct mount *mp, *nmp; 1207 struct vnode *vp; 1208 register char *bp = where, *savebp; 1209 char *ewhere; 1210 int error; 1211 1212#define VPTRSZ sizeof (struct vnode *) 1213#define VNODESZ sizeof (struct vnode) 1214 if (where == NULL) { 1215 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1216 return (0); 1217 } 1218 ewhere = where + *sizep; 1219 1220 for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) { 1221 nmp = mp->mnt_list.tqe_next; 1222 if (vfs_busy(mp)) 1223 continue; 1224 savebp = bp; 1225again: 1226 for (vp = mp->mnt_vnodelist.lh_first; 1227 vp != NULL; 1228 vp = vp->v_mntvnodes.le_next) { 1229 /* 1230 * Check that the vp is still associated with 1231 * this filesystem. RACE: could have been 1232 * recycled onto the same filesystem. 1233 */ 1234 if (vp->v_mount != mp) { 1235 if (kinfo_vdebug) 1236 printf("kinfo: vp changed\n"); 1237 bp = savebp; 1238 goto again; 1239 } 1240 if (bp + VPTRSZ + VNODESZ > ewhere) { 1241 *sizep = bp - where; 1242 return (ENOMEM); 1243 } 1244 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1245 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1246 return (error); 1247 bp += VPTRSZ + VNODESZ; 1248 } 1249 vfs_unbusy(mp); 1250 } 1251 1252 *sizep = bp - where; 1253 return (0); 1254} 1255 1256/* 1257 * Check to see if a filesystem is mounted on a block device. 1258 */ 1259int 1260vfs_mountedon(vp) 1261 register struct vnode *vp; 1262{ 1263 register struct vnode *vq; 1264 1265 if (vp->v_specflags & SI_MOUNTEDON) 1266 return (EBUSY); 1267 if (vp->v_flag & VALIASED) { 1268 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1269 if (vq->v_rdev != vp->v_rdev || 1270 vq->v_type != vp->v_type) 1271 continue; 1272 if (vq->v_specflags & SI_MOUNTEDON) 1273 return (EBUSY); 1274 } 1275 } 1276 return (0); 1277} 1278 1279/* 1280 * Build hash lists of net addresses and hang them off the mount point. 1281 * Called by ufs_mount() to set up the lists of export addresses. 1282 */ 1283static int 1284vfs_hang_addrlist(mp, nep, argp) 1285 struct mount *mp; 1286 struct netexport *nep; 1287 struct export_args *argp; 1288{ 1289 register struct netcred *np; 1290 register struct radix_node_head *rnh; 1291 register int i; 1292 struct radix_node *rn; 1293 struct sockaddr *saddr, *smask = 0; 1294 struct domain *dom; 1295 int error; 1296 1297 if (argp->ex_addrlen == 0) { 1298 if (mp->mnt_flag & MNT_DEFEXPORTED) 1299 return (EPERM); 1300 np = &nep->ne_defexported; 1301 np->netc_exflags = argp->ex_flags; 1302 np->netc_anon = argp->ex_anon; 1303 np->netc_anon.cr_ref = 1; 1304 mp->mnt_flag |= MNT_DEFEXPORTED; 1305 return (0); 1306 } 1307 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1308 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1309 bzero((caddr_t)np, i); 1310 saddr = (struct sockaddr *)(np + 1); 1311 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen)) 1312 goto out; 1313 if (saddr->sa_len > argp->ex_addrlen) 1314 saddr->sa_len = argp->ex_addrlen; 1315 if (argp->ex_masklen) { 1316 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1317 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); 1318 if (error) 1319 goto out; 1320 if (smask->sa_len > argp->ex_masklen) 1321 smask->sa_len = argp->ex_masklen; 1322 } 1323 i = saddr->sa_family; 1324 if ((rnh = nep->ne_rtable[i]) == 0) { 1325 /* 1326 * Seems silly to initialize every AF when most are not 1327 * used, do so on demand here 1328 */ 1329 for (dom = domains; dom; dom = dom->dom_next) 1330 if (dom->dom_family == i && dom->dom_rtattach) { 1331 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1332 dom->dom_rtoffset); 1333 break; 1334 } 1335 if ((rnh = nep->ne_rtable[i]) == 0) { 1336 error = ENOBUFS; 1337 goto out; 1338 } 1339 } 1340 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1341 np->netc_rnodes); 1342 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1343 error = EPERM; 1344 goto out; 1345 } 1346 np->netc_exflags = argp->ex_flags; 1347 np->netc_anon = argp->ex_anon; 1348 np->netc_anon.cr_ref = 1; 1349 return (0); 1350out: 1351 free(np, M_NETADDR); 1352 return (error); 1353} 1354 1355/* ARGSUSED */ 1356static int 1357vfs_free_netcred(rn, w) 1358 struct radix_node *rn; 1359 caddr_t w; 1360{ 1361 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1362 1363 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1364 free((caddr_t)rn, M_NETADDR); 1365 return (0); 1366} 1367 1368/* 1369 * Free the net address hash lists that are hanging off the mount points. 1370 */ 1371static void 1372vfs_free_addrlist(nep) 1373 struct netexport *nep; 1374{ 1375 register int i; 1376 register struct radix_node_head *rnh; 1377 1378 for (i = 0; i <= AF_MAX; i++) 1379 if (rnh = nep->ne_rtable[i]) { 1380 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, 1381 (caddr_t)rnh); 1382 free((caddr_t)rnh, M_RTABLE); 1383 nep->ne_rtable[i] = 0; 1384 } 1385} 1386 1387int 1388vfs_export(mp, nep, argp) 1389 struct mount *mp; 1390 struct netexport *nep; 1391 struct export_args *argp; 1392{ 1393 int error; 1394 1395 if (argp->ex_flags & MNT_DELEXPORT) { 1396 vfs_free_addrlist(nep); 1397 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1398 } 1399 if (argp->ex_flags & MNT_EXPORTED) { 1400 if (error = vfs_hang_addrlist(mp, nep, argp)) 1401 return (error); 1402 mp->mnt_flag |= MNT_EXPORTED; 1403 } 1404 return (0); 1405} 1406 1407struct netcred * 1408vfs_export_lookup(mp, nep, nam) 1409 register struct mount *mp; 1410 struct netexport *nep; 1411 struct mbuf *nam; 1412{ 1413 register struct netcred *np; 1414 register struct radix_node_head *rnh; 1415 struct sockaddr *saddr; 1416 1417 np = NULL; 1418 if (mp->mnt_flag & MNT_EXPORTED) { 1419 /* 1420 * Lookup in the export list first. 1421 */ 1422 if (nam != NULL) { 1423 saddr = mtod(nam, struct sockaddr *); 1424 rnh = nep->ne_rtable[saddr->sa_family]; 1425 if (rnh != NULL) { 1426 np = (struct netcred *) 1427 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1428 rnh); 1429 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1430 np = NULL; 1431 } 1432 } 1433 /* 1434 * If no address match, use the default if it exists. 1435 */ 1436 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1437 np = &nep->ne_defexported; 1438 } 1439 return (np); 1440} 1441