46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/kernel.h> 50#include <sys/file.h> 51#include <sys/proc.h> 52#include <sys/mount.h> 53#include <sys/time.h> 54#include <sys/vnode.h> 55#include <sys/stat.h> 56#include <sys/namei.h> 57#include <sys/ucred.h> 58#include <sys/buf.h> 59#include <sys/errno.h> 60#include <sys/malloc.h> 61#include <sys/domain.h> 62#include <sys/mbuf.h> 63 64#include <vm/vm.h> 65#include <vm/vm_param.h> 66#include <vm/vm_object.h> 67#include <vm/vm_extern.h> 68#include <sys/sysctl.h> 69 70#include <miscfs/specfs/specdev.h> 71 72#ifdef DDB 73extern void printlockedvnodes __P((void)); 74#endif 75extern void vclean __P((struct vnode *vp, int flags)); 76extern void vfs_unmountroot __P((struct mount *rootfs)); 77 78enum vtype iftovt_tab[16] = { 79 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 80 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 81}; 82int vttoif_tab[9] = { 83 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 84 S_IFSOCK, S_IFIFO, S_IFMT, 85}; 86 87/* 88 * Insq/Remq for the vnode usage lists. 89 */ 90#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 91#define bufremvn(bp) { \ 92 LIST_REMOVE(bp, b_vnbufs); \ 93 (bp)->b_vnbufs.le_next = NOLIST; \ 94} 95 96TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 97u_long freevnodes = 0; 98 99struct mntlist mountlist; /* mounted filesystem list */ 100 101int desiredvnodes; 102SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RD, &desiredvnodes, 0, ""); 103 104static void vfs_free_addrlist __P((struct netexport *nep)); 105static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 106static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 107 struct export_args *argp)); 108 109/* 110 * Initialize the vnode management data structures. 111 */ 112void 113vntblinit() 114{ 115 desiredvnodes = maxproc + vm_object_cache_max; 116 117 TAILQ_INIT(&vnode_free_list); 118 CIRCLEQ_INIT(&mountlist); 119} 120 121/* 122 * Lock a filesystem. 123 * Used to prevent access to it while mounting and unmounting. 124 */ 125int 126vfs_lock(mp) 127 register struct mount *mp; 128{ 129 130 while (mp->mnt_flag & MNT_MLOCK) { 131 mp->mnt_flag |= MNT_MWAIT; 132 (void) tsleep((caddr_t) mp, PVFS, "vfslck", 0); 133 } 134 mp->mnt_flag |= MNT_MLOCK; 135 return (0); 136} 137 138/* 139 * Unlock a locked filesystem. 140 * Panic if filesystem is not locked. 141 */ 142void 143vfs_unlock(mp) 144 register struct mount *mp; 145{ 146 147 if ((mp->mnt_flag & MNT_MLOCK) == 0) 148 panic("vfs_unlock: not locked"); 149 mp->mnt_flag &= ~MNT_MLOCK; 150 if (mp->mnt_flag & MNT_MWAIT) { 151 mp->mnt_flag &= ~MNT_MWAIT; 152 wakeup((caddr_t) mp); 153 } 154} 155 156/* 157 * Mark a mount point as busy. 158 * Used to synchronize access and to delay unmounting. 159 */ 160int 161vfs_busy(mp) 162 register struct mount *mp; 163{ 164 165 while (mp->mnt_flag & MNT_MPBUSY) { 166 mp->mnt_flag |= MNT_MPWANT; 167 (void) tsleep((caddr_t) &mp->mnt_flag, PVFS, "vfsbsy", 0); 168 } 169 if (mp->mnt_flag & MNT_UNMOUNT) 170 return (1); 171 mp->mnt_flag |= MNT_MPBUSY; 172 return (0); 173} 174 175/* 176 * Free a busy filesystem. 177 * Panic if filesystem is not busy. 178 */ 179void 180vfs_unbusy(mp) 181 register struct mount *mp; 182{ 183 184 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 185 panic("vfs_unbusy: not busy"); 186 mp->mnt_flag &= ~MNT_MPBUSY; 187 if (mp->mnt_flag & MNT_MPWANT) { 188 mp->mnt_flag &= ~MNT_MPWANT; 189 wakeup((caddr_t) &mp->mnt_flag); 190 } 191} 192 193void 194vfs_unmountroot(struct mount *rootfs) 195{ 196 struct mount *mp = rootfs; 197 int error; 198 199 if (vfs_busy(mp)) { 200 printf("failed to unmount root\n"); 201 return; 202 } 203 mp->mnt_flag |= MNT_UNMOUNT; 204 if ((error = vfs_lock(mp))) { 205 printf("lock of root filesystem failed (%d)\n", error); 206 return; 207 } 208 vnode_pager_umount(mp); /* release cached vnodes */ 209 cache_purgevfs(mp); /* remove cache entries for this file sys */ 210 211 if ((error = VFS_SYNC(mp, MNT_WAIT, initproc->p_ucred, initproc))) 212 printf("sync of root filesystem failed (%d)\n", error); 213 214 if ((error = VFS_UNMOUNT(mp, MNT_FORCE, initproc))) { 215 printf("unmount of root filesystem failed ("); 216 if (error == EBUSY) 217 printf("BUSY)\n"); 218 else 219 printf("%d)\n", error); 220 } 221 mp->mnt_flag &= ~MNT_UNMOUNT; 222 vfs_unbusy(mp); 223} 224 225/* 226 * Unmount all filesystems. Should only be called by halt(). 227 */ 228void 229vfs_unmountall() 230{ 231 struct mount *mp, *nmp, *rootfs = NULL; 232 int error; 233 234 /* unmount all but rootfs */ 235 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 236 nmp = mp->mnt_list.cqe_prev; 237 238 if (mp->mnt_flag & MNT_ROOTFS) { 239 rootfs = mp; 240 continue; 241 } 242 error = dounmount(mp, MNT_FORCE, initproc); 243 if (error) { 244 printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); 245 if (error == EBUSY) 246 printf("BUSY)\n"); 247 else 248 printf("%d)\n", error); 249 } 250 } 251 252 /* and finally... */ 253 if (rootfs) { 254 vfs_unmountroot(rootfs); 255 } else { 256 printf("no root filesystem\n"); 257 } 258} 259 260/* 261 * Lookup a mount point by filesystem identifier. 262 */ 263struct mount * 264getvfs(fsid) 265 fsid_t *fsid; 266{ 267 register struct mount *mp; 268 269 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 270 mp = mp->mnt_list.cqe_next) { 271 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 272 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 273 return (mp); 274 } 275 return ((struct mount *) 0); 276} 277 278/* 279 * Get a new unique fsid 280 */ 281void 282getnewfsid(mp, mtype) 283 struct mount *mp; 284 int mtype; 285{ 286 static u_short xxxfs_mntid; 287 288 fsid_t tfsid; 289 290 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 291 mp->mnt_stat.f_fsid.val[1] = mtype; 292 if (xxxfs_mntid == 0) 293 ++xxxfs_mntid; 294 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 295 tfsid.val[1] = mtype; 296 if (mountlist.cqh_first != (void *)&mountlist) { 297 while (getvfs(&tfsid)) { 298 tfsid.val[0]++; 299 xxxfs_mntid++; 300 } 301 } 302 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 303} 304 305/* 306 * Set vnode attributes to VNOVAL 307 */ 308void 309vattr_null(vap) 310 register struct vattr *vap; 311{ 312 313 vap->va_type = VNON; 314 vap->va_size = VNOVAL; 315 vap->va_bytes = VNOVAL; 316 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 317 vap->va_fsid = vap->va_fileid = 318 vap->va_blocksize = vap->va_rdev = 319 vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 320 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 321 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 322 vap->va_flags = vap->va_gen = VNOVAL; 323 vap->va_vaflags = 0; 324} 325 326/* 327 * Routines having to do with the management of the vnode table. 328 */ 329extern vop_t **dead_vnodeop_p; 330 331/* 332 * Return the next vnode from the free list. 333 */ 334int 335getnewvnode(tag, mp, vops, vpp) 336 enum vtagtype tag; 337 struct mount *mp; 338 vop_t **vops; 339 struct vnode **vpp; 340{ 341 register struct vnode *vp; 342 343 vp = vnode_free_list.tqh_first; 344 /* 345 * we allocate a new vnode if 346 * 1. we don't have any free 347 * Pretty obvious, we actually used to panic, but that 348 * is a silly thing to do. 349 * 2. we havn't filled our pool yet 350 * We don't want to trash the incore (VM-)vnodecache. 351 * 3. if less that 1/4th of our vnodes are free. 352 * We don't want to trash the namei cache either. 353 */ 354 if (freevnodes < (numvnodes >> 2) || 355 numvnodes < desiredvnodes || 356 vp == NULL) { 357 vp = (struct vnode *) malloc((u_long) sizeof *vp, 358 M_VNODE, M_WAITOK); 359 bzero((char *) vp, sizeof *vp); 360 numvnodes++; 361 } else { 362 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 363 freevnodes--; 364 365 if (vp->v_usecount) 366 panic("free vnode isn't"); 367 368 /* see comment on why 0xdeadb is set at end of vgone (below) */ 369 vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb; 370 vp->v_lease = NULL; 371 if (vp->v_type != VBAD) 372 vgone(vp); 373#ifdef DIAGNOSTIC 374 { 375 int s; 376 377 if (vp->v_data) 378 panic("cleaned vnode isn't"); 379 s = splbio(); 380 if (vp->v_numoutput) 381 panic("Clean vnode has pending I/O's"); 382 splx(s); 383 } 384#endif 385 vp->v_flag = 0; 386 vp->v_lastr = 0; 387 vp->v_ralen = 0; 388 vp->v_maxra = 0; 389 vp->v_lastw = 0; 390 vp->v_lasta = 0; 391 vp->v_cstart = 0; 392 vp->v_clen = 0; 393 vp->v_socket = 0; 394 vp->v_writecount = 0; /* XXX */ 395 } 396 vp->v_type = VNON; 397 cache_purge(vp); 398 vp->v_tag = tag; 399 vp->v_op = vops; 400 insmntque(vp, mp); 401 *vpp = vp; 402 vp->v_usecount = 1; 403 vp->v_data = 0; 404 return (0); 405} 406 407/* 408 * Move a vnode from one mount queue to another. 409 */ 410void 411insmntque(vp, mp) 412 register struct vnode *vp; 413 register struct mount *mp; 414{ 415 416 /* 417 * Delete from old mount point vnode list, if on one. 418 */ 419 if (vp->v_mount != NULL) 420 LIST_REMOVE(vp, v_mntvnodes); 421 /* 422 * Insert into list of vnodes for the new mount point, if available. 423 */ 424 if ((vp->v_mount = mp) == NULL) 425 return; 426 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 427} 428 429/* 430 * Update outstanding I/O count and do wakeup if requested. 431 */ 432void 433vwakeup(bp) 434 register struct buf *bp; 435{ 436 register struct vnode *vp; 437 438 bp->b_flags &= ~B_WRITEINPROG; 439 if ((vp = bp->b_vp)) { 440 vp->v_numoutput--; 441 if (vp->v_numoutput < 0) 442 panic("vwakeup: neg numoutput"); 443 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 444 vp->v_flag &= ~VBWAIT; 445 wakeup((caddr_t) &vp->v_numoutput); 446 } 447 } 448} 449 450/* 451 * Flush out and invalidate all buffers associated with a vnode. 452 * Called with the underlying object locked. 453 */ 454int 455vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 456 register struct vnode *vp; 457 int flags; 458 struct ucred *cred; 459 struct proc *p; 460 int slpflag, slptimeo; 461{ 462 register struct buf *bp; 463 struct buf *nbp, *blist; 464 int s, error; 465 vm_object_t object; 466 467 if (flags & V_SAVE) { 468 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p))) 469 return (error); 470 if (vp->v_dirtyblkhd.lh_first != NULL) 471 panic("vinvalbuf: dirty bufs"); 472 } 473 for (;;) { 474 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) 475 while (blist && blist->b_lblkno < 0) 476 blist = blist->b_vnbufs.le_next; 477 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 478 (flags & V_SAVEMETA)) 479 while (blist && blist->b_lblkno < 0) 480 blist = blist->b_vnbufs.le_next; 481 if (!blist) 482 break; 483 484 for (bp = blist; bp; bp = nbp) { 485 nbp = bp->b_vnbufs.le_next; 486 if ((flags & V_SAVEMETA) && bp->b_lblkno < 0) 487 continue; 488 s = splbio(); 489 if (bp->b_flags & B_BUSY) { 490 bp->b_flags |= B_WANTED; 491 error = tsleep((caddr_t) bp, 492 slpflag | (PRIBIO + 1), "vinvalbuf", 493 slptimeo); 494 splx(s); 495 if (error) 496 return (error); 497 break; 498 } 499 bremfree(bp); 500 bp->b_flags |= B_BUSY; 501 splx(s); 502 /* 503 * XXX Since there are no node locks for NFS, I 504 * believe there is a slight chance that a delayed 505 * write will occur while sleeping just above, so 506 * check for it. 507 */ 508 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 509 (void) VOP_BWRITE(bp); 510 break; 511 } 512 bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF); 513 brelse(bp); 514 } 515 } 516 517 s = splbio(); 518 while (vp->v_numoutput > 0) { 519 vp->v_flag |= VBWAIT; 520 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 521 } 522 splx(s); 523 524 /* 525 * Destroy the copy in the VM cache, too. 526 */ 527 object = vp->v_object; 528 if (object != NULL) { 529 vm_object_page_remove(object, 0, object->size, 530 (flags & V_SAVE) ? TRUE : FALSE); 531 } 532 if (!(flags & V_SAVEMETA) && 533 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 534 panic("vinvalbuf: flush failed"); 535 return (0); 536} 537 538/* 539 * Associate a buffer with a vnode. 540 */ 541void 542bgetvp(vp, bp) 543 register struct vnode *vp; 544 register struct buf *bp; 545{ 546 int s; 547 548 if (bp->b_vp) 549 panic("bgetvp: not free"); 550 VHOLD(vp); 551 bp->b_vp = vp; 552 if (vp->v_type == VBLK || vp->v_type == VCHR) 553 bp->b_dev = vp->v_rdev; 554 else 555 bp->b_dev = NODEV; 556 /* 557 * Insert onto list for new vnode. 558 */ 559 s = splbio(); 560 bufinsvn(bp, &vp->v_cleanblkhd); 561 splx(s); 562} 563 564/* 565 * Disassociate a buffer from a vnode. 566 */ 567void 568brelvp(bp) 569 register struct buf *bp; 570{ 571 struct vnode *vp; 572 int s; 573 574 if (bp->b_vp == (struct vnode *) 0) 575 panic("brelvp: NULL"); 576 /* 577 * Delete from old vnode list, if on one. 578 */ 579 s = splbio(); 580 if (bp->b_vnbufs.le_next != NOLIST) 581 bufremvn(bp); 582 splx(s); 583 584 vp = bp->b_vp; 585 bp->b_vp = (struct vnode *) 0; 586 HOLDRELE(vp); 587} 588 589/* 590 * Associate a p-buffer with a vnode. 591 */ 592void 593pbgetvp(vp, bp) 594 register struct vnode *vp; 595 register struct buf *bp; 596{ 597 if (bp->b_vp) 598 panic("pbgetvp: not free"); 599 VHOLD(vp); 600 bp->b_vp = vp; 601 if (vp->v_type == VBLK || vp->v_type == VCHR) 602 bp->b_dev = vp->v_rdev; 603 else 604 bp->b_dev = NODEV; 605} 606 607/* 608 * Disassociate a p-buffer from a vnode. 609 */ 610void 611pbrelvp(bp) 612 register struct buf *bp; 613{ 614 struct vnode *vp; 615 616 if (bp->b_vp == (struct vnode *) 0) 617 panic("brelvp: NULL"); 618 619 vp = bp->b_vp; 620 bp->b_vp = (struct vnode *) 0; 621 HOLDRELE(vp); 622} 623 624/* 625 * Reassign a buffer from one vnode to another. 626 * Used to assign file specific control information 627 * (indirect blocks) to the vnode to which they belong. 628 */ 629void 630reassignbuf(bp, newvp) 631 register struct buf *bp; 632 register struct vnode *newvp; 633{ 634 register struct buflists *listheadp; 635 636 if (newvp == NULL) { 637 printf("reassignbuf: NULL"); 638 return; 639 } 640 /* 641 * Delete from old vnode list, if on one. 642 */ 643 if (bp->b_vnbufs.le_next != NOLIST) 644 bufremvn(bp); 645 /* 646 * If dirty, put on list of dirty buffers; otherwise insert onto list 647 * of clean buffers. 648 */ 649 if (bp->b_flags & B_DELWRI) { 650 struct buf *tbp; 651 652 tbp = newvp->v_dirtyblkhd.lh_first; 653 if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) { 654 bufinsvn(bp, &newvp->v_dirtyblkhd); 655 } else { 656 while (tbp->b_vnbufs.le_next && (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { 657 tbp = tbp->b_vnbufs.le_next; 658 } 659 LIST_INSERT_AFTER(tbp, bp, b_vnbufs); 660 } 661 } else { 662 listheadp = &newvp->v_cleanblkhd; 663 bufinsvn(bp, listheadp); 664 } 665} 666 667/* 668 * Create a vnode for a block device. 669 * Used for root filesystem, argdev, and swap areas. 670 * Also used for memory file system special devices. 671 */ 672int 673bdevvp(dev, vpp) 674 dev_t dev; 675 struct vnode **vpp; 676{ 677 register struct vnode *vp; 678 struct vnode *nvp; 679 int error; 680 681 if (dev == NODEV) 682 return (0); 683 error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp); 684 if (error) { 685 *vpp = 0; 686 return (error); 687 } 688 vp = nvp; 689 vp->v_type = VBLK; 690 if ((nvp = checkalias(vp, dev, (struct mount *) 0))) { 691 vput(vp); 692 vp = nvp; 693 } 694 *vpp = vp; 695 return (0); 696} 697 698/* 699 * Check to see if the new vnode represents a special device 700 * for which we already have a vnode (either because of 701 * bdevvp() or because of a different vnode representing 702 * the same block device). If such an alias exists, deallocate 703 * the existing contents and return the aliased vnode. The 704 * caller is responsible for filling it with its new contents. 705 */ 706struct vnode * 707checkalias(nvp, nvp_rdev, mp) 708 register struct vnode *nvp; 709 dev_t nvp_rdev; 710 struct mount *mp; 711{ 712 register struct vnode *vp; 713 struct vnode **vpp; 714 715 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 716 return (NULLVP); 717 718 vpp = &speclisth[SPECHASH(nvp_rdev)]; 719loop: 720 for (vp = *vpp; vp; vp = vp->v_specnext) { 721 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 722 continue; 723 /* 724 * Alias, but not in use, so flush it out. 725 */ 726 if (vp->v_usecount == 0) { 727 vgone(vp); 728 goto loop; 729 } 730 if (vget(vp, 1)) 731 goto loop; 732 break; 733 } 734 if (vp == NULL || vp->v_tag != VT_NON) { 735 MALLOC(nvp->v_specinfo, struct specinfo *, 736 sizeof(struct specinfo), M_VNODE, M_WAITOK); 737 nvp->v_rdev = nvp_rdev; 738 nvp->v_hashchain = vpp; 739 nvp->v_specnext = *vpp; 740 nvp->v_specflags = 0; 741 *vpp = nvp; 742 if (vp != NULL) { 743 nvp->v_flag |= VALIASED; 744 vp->v_flag |= VALIASED; 745 vput(vp); 746 } 747 return (NULLVP); 748 } 749 VOP_UNLOCK(vp); 750 vclean(vp, 0); 751 vp->v_op = nvp->v_op; 752 vp->v_tag = nvp->v_tag; 753 nvp->v_type = VNON; 754 insmntque(vp, mp); 755 return (vp); 756} 757 758/* 759 * Grab a particular vnode from the free list, increment its 760 * reference count and lock it. The vnode lock bit is set the 761 * vnode is being eliminated in vgone. The process is awakened 762 * when the transition is completed, and an error returned to 763 * indicate that the vnode is no longer usable (possibly having 764 * been changed to a new file system type). 765 */ 766int 767vget(vp, lockflag) 768 register struct vnode *vp; 769 int lockflag; 770{ 771 772 /* 773 * If the vnode is in the process of being cleaned out for another 774 * use, we wait for the cleaning to finish and then return failure. 775 * Cleaning is determined either by checking that the VXLOCK flag is 776 * set, or that the use count is zero with the back pointer set to 777 * show that it has been removed from the free list by getnewvnode. 778 * The VXLOCK flag may not have been set yet because vclean is blocked 779 * in the VOP_LOCK call waiting for the VOP_INACTIVE to complete. 780 */ 781 if ((vp->v_flag & VXLOCK) || 782 (vp->v_usecount == 0 && 783 vp->v_freelist.tqe_prev == (struct vnode **) 0xdeadb)) { 784 vp->v_flag |= VXWANT; 785 (void) tsleep((caddr_t) vp, PINOD, "vget", 0); 786 return (1); 787 } 788 if (vp->v_usecount == 0) { 789 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 790 freevnodes--; 791 } 792 vp->v_usecount++; 793 if (lockflag) 794 VOP_LOCK(vp); 795 return (0); 796} 797 798/* 799 * Vnode reference, just increment the count 800 */ 801void 802vref(vp) 803 struct vnode *vp; 804{ 805 806 if (vp->v_usecount <= 0) 807 panic("vref used where vget required"); 808 vp->v_usecount++; 809} 810 811/* 812 * vput(), just unlock and vrele() 813 */ 814void 815vput(vp) 816 register struct vnode *vp; 817{ 818 819 VOP_UNLOCK(vp); 820 vrele(vp); 821} 822 823/* 824 * Vnode release. 825 * If count drops to zero, call inactive routine and return to freelist. 826 */ 827void 828vrele(vp) 829 register struct vnode *vp; 830{ 831 832#ifdef DIAGNOSTIC 833 if (vp == NULL) 834 panic("vrele: null vp"); 835#endif 836 vp->v_usecount--; 837 if (vp->v_usecount > 0) 838 return; 839 if (vp->v_usecount < 0 /* || vp->v_writecount < 0 */ ) { 840#ifdef DIAGNOSTIC 841 vprint("vrele: negative ref count", vp); 842#endif 843 panic("vrele: negative reference cnt"); 844 } 845 if (vp->v_flag & VAGE) { 846 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 847 vp->v_flag &= ~VAGE; 848 } else { 849 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 850 } 851 freevnodes++; 852 853 VOP_INACTIVE(vp); 854} 855 856#ifdef DIAGNOSTIC 857/* 858 * Page or buffer structure gets a reference. 859 */ 860void 861vhold(vp) 862 register struct vnode *vp; 863{ 864 865 vp->v_holdcnt++; 866} 867 868/* 869 * Page or buffer structure frees a reference. 870 */ 871void 872holdrele(vp) 873 register struct vnode *vp; 874{ 875 876 if (vp->v_holdcnt <= 0) 877 panic("holdrele: holdcnt"); 878 vp->v_holdcnt--; 879} 880#endif /* DIAGNOSTIC */ 881 882/* 883 * Remove any vnodes in the vnode table belonging to mount point mp. 884 * 885 * If MNT_NOFORCE is specified, there should not be any active ones, 886 * return error if any are found (nb: this is a user error, not a 887 * system error). If MNT_FORCE is specified, detach any active vnodes 888 * that are found. 889 */ 890#ifdef DIAGNOSTIC 891static int busyprt = 0; /* print out busy vnodes */ 892SYSCTL_INT(_debug, 1, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 893#endif 894 895int 896vflush(mp, skipvp, flags) 897 struct mount *mp; 898 struct vnode *skipvp; 899 int flags; 900{ 901 register struct vnode *vp, *nvp; 902 int busy = 0; 903 904 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 905 panic("vflush: not busy"); 906loop: 907 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 908 /* 909 * Make sure this vnode wasn't reclaimed in getnewvnode(). 910 * Start over if it has (it won't be on the list anymore). 911 */ 912 if (vp->v_mount != mp) 913 goto loop; 914 nvp = vp->v_mntvnodes.le_next; 915 /* 916 * Skip over a selected vnode. 917 */ 918 if (vp == skipvp) 919 continue; 920 /* 921 * Skip over a vnodes marked VSYSTEM. 922 */ 923 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 924 continue; 925 /* 926 * If WRITECLOSE is set, only flush out regular file vnodes 927 * open for writing. 928 */ 929 if ((flags & WRITECLOSE) && 930 (vp->v_writecount == 0 || vp->v_type != VREG)) 931 continue; 932 /* 933 * With v_usecount == 0, all we need to do is clear out the 934 * vnode data structures and we are done. 935 */ 936 if (vp->v_usecount == 0) { 937 vgone(vp); 938 continue; 939 } 940 /* 941 * If FORCECLOSE is set, forcibly close the vnode. For block 942 * or character devices, revert to an anonymous device. For 943 * all other files, just kill them. 944 */ 945 if (flags & FORCECLOSE) { 946 if (vp->v_type != VBLK && vp->v_type != VCHR) { 947 vgone(vp); 948 } else { 949 vclean(vp, 0); 950 vp->v_op = spec_vnodeop_p; 951 insmntque(vp, (struct mount *) 0); 952 } 953 continue; 954 } 955#ifdef DIAGNOSTIC 956 if (busyprt) 957 vprint("vflush: busy vnode", vp); 958#endif 959 busy++; 960 } 961 if (busy) 962 return (EBUSY); 963 return (0); 964} 965 966/* 967 * Disassociate the underlying file system from a vnode. 968 */ 969void 970vclean(struct vnode *vp, int flags) 971{ 972 int active; 973 974 /* 975 * Check to see if the vnode is in use. If so we have to reference it 976 * before we clean it out so that its count cannot fall to zero and 977 * generate a race against ourselves to recycle it. 978 */ 979 if ((active = vp->v_usecount)) 980 VREF(vp); 981 /* 982 * Even if the count is zero, the VOP_INACTIVE routine may still have 983 * the object locked while it cleans it out. The VOP_LOCK ensures that 984 * the VOP_INACTIVE routine is done with its work. For active vnodes, 985 * it ensures that no other activity can occur while the underlying 986 * object is being cleaned out. 987 */ 988 VOP_LOCK(vp); 989 /* 990 * Prevent the vnode from being recycled or brought into use while we 991 * clean it out. 992 */ 993 if (vp->v_flag & VXLOCK) 994 panic("vclean: deadlock"); 995 vp->v_flag |= VXLOCK; 996 /* 997 * Clean out any buffers associated with the vnode. 998 */ 999 if (flags & DOCLOSE) 1000 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 1001 /* 1002 * Any other processes trying to obtain this lock must first wait for 1003 * VXLOCK to clear, then call the new lock operation. 1004 */ 1005 VOP_UNLOCK(vp); 1006 /* 1007 * If purging an active vnode, it must be closed and deactivated 1008 * before being reclaimed. 1009 */ 1010 if (active) { 1011 if (flags & DOCLOSE) 1012 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1013 VOP_INACTIVE(vp); 1014 } 1015 /* 1016 * Reclaim the vnode. 1017 */ 1018 if (VOP_RECLAIM(vp)) 1019 panic("vclean: cannot reclaim"); 1020 if (active) 1021 vrele(vp); 1022 1023 /* 1024 * Done with purge, notify sleepers of the grim news. 1025 */ 1026 vp->v_op = dead_vnodeop_p; 1027 vp->v_tag = VT_NON; 1028 vp->v_flag &= ~VXLOCK; 1029 if (vp->v_flag & VXWANT) { 1030 vp->v_flag &= ~VXWANT; 1031 wakeup((caddr_t) vp); 1032 } 1033} 1034 1035/* 1036 * Eliminate all activity associated with the requested vnode 1037 * and with all vnodes aliased to the requested vnode. 1038 */ 1039void 1040vgoneall(vp) 1041 register struct vnode *vp; 1042{ 1043 register struct vnode *vq; 1044 1045 if (vp->v_flag & VALIASED) { 1046 /* 1047 * If a vgone (or vclean) is already in progress, wait until 1048 * it is done and return. 1049 */ 1050 if (vp->v_flag & VXLOCK) { 1051 vp->v_flag |= VXWANT; 1052 (void) tsleep((caddr_t) vp, PINOD, "vgall", 0); 1053 return; 1054 } 1055 /* 1056 * Ensure that vp will not be vgone'd while we are eliminating 1057 * its aliases. 1058 */ 1059 vp->v_flag |= VXLOCK; 1060 while (vp->v_flag & VALIASED) { 1061 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1062 if (vq->v_rdev != vp->v_rdev || 1063 vq->v_type != vp->v_type || vp == vq) 1064 continue; 1065 vgone(vq); 1066 break; 1067 } 1068 } 1069 /* 1070 * Remove the lock so that vgone below will really eliminate 1071 * the vnode after which time vgone will awaken any sleepers. 1072 */ 1073 vp->v_flag &= ~VXLOCK; 1074 } 1075 vgone(vp); 1076} 1077 1078/* 1079 * Eliminate all activity associated with a vnode 1080 * in preparation for reuse. 1081 */ 1082void 1083vgone(vp) 1084 register struct vnode *vp; 1085{ 1086 register struct vnode *vq; 1087 struct vnode *vx; 1088 1089 /* 1090 * If a vgone (or vclean) is already in progress, wait until it is 1091 * done and return. 1092 */ 1093 if (vp->v_flag & VXLOCK) { 1094 vp->v_flag |= VXWANT; 1095 (void) tsleep((caddr_t) vp, PINOD, "vgone", 0); 1096 return; 1097 } 1098 /* 1099 * Clean out the filesystem specific data. 1100 */ 1101 vclean(vp, DOCLOSE); 1102 /* 1103 * Delete from old mount point vnode list, if on one. 1104 */ 1105 if (vp->v_mount != NULL) { 1106 LIST_REMOVE(vp, v_mntvnodes); 1107 vp->v_mount = NULL; 1108 } 1109 /* 1110 * If special device, remove it from special device alias list. 1111 */ 1112 if (vp->v_type == VBLK || vp->v_type == VCHR) { 1113 if (*vp->v_hashchain == vp) { 1114 *vp->v_hashchain = vp->v_specnext; 1115 } else { 1116 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1117 if (vq->v_specnext != vp) 1118 continue; 1119 vq->v_specnext = vp->v_specnext; 1120 break; 1121 } 1122 if (vq == NULL) 1123 panic("missing bdev"); 1124 } 1125 if (vp->v_flag & VALIASED) { 1126 vx = NULL; 1127 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1128 if (vq->v_rdev != vp->v_rdev || 1129 vq->v_type != vp->v_type) 1130 continue; 1131 if (vx) 1132 break; 1133 vx = vq; 1134 } 1135 if (vx == NULL) 1136 panic("missing alias"); 1137 if (vq == NULL) 1138 vx->v_flag &= ~VALIASED; 1139 vp->v_flag &= ~VALIASED; 1140 } 1141 FREE(vp->v_specinfo, M_VNODE); 1142 vp->v_specinfo = NULL; 1143 } 1144 /* 1145 * If it is on the freelist and not already at the head, move it to 1146 * the head of the list. The test of the back pointer and the 1147 * reference count of zero is because it will be removed from the free 1148 * list by getnewvnode, but will not have its reference count 1149 * incremented until after calling vgone. If the reference count were 1150 * incremented first, vgone would (incorrectly) try to close the 1151 * previous instance of the underlying object. So, the back pointer is 1152 * explicitly set to `0xdeadb' in getnewvnode after removing it from 1153 * the freelist to ensure that we do not try to move it here. 1154 */ 1155 if (vp->v_usecount == 0 && 1156 vp->v_freelist.tqe_prev != (struct vnode **) 0xdeadb && 1157 vnode_free_list.tqh_first != vp) { 1158 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1159 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1160 } 1161 vp->v_type = VBAD; 1162} 1163 1164/* 1165 * Lookup a vnode by device number. 1166 */ 1167int 1168vfinddev(dev, type, vpp) 1169 dev_t dev; 1170 enum vtype type; 1171 struct vnode **vpp; 1172{ 1173 register struct vnode *vp; 1174 1175 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1176 if (dev != vp->v_rdev || type != vp->v_type) 1177 continue; 1178 *vpp = vp; 1179 return (1); 1180 } 1181 return (0); 1182} 1183 1184/* 1185 * Calculate the total number of references to a special device. 1186 */ 1187int 1188vcount(vp) 1189 register struct vnode *vp; 1190{ 1191 register struct vnode *vq, *vnext; 1192 int count; 1193 1194loop: 1195 if ((vp->v_flag & VALIASED) == 0) 1196 return (vp->v_usecount); 1197 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1198 vnext = vq->v_specnext; 1199 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1200 continue; 1201 /* 1202 * Alias, but not in use, so flush it out. 1203 */ 1204 if (vq->v_usecount == 0 && vq != vp) { 1205 vgone(vq); 1206 goto loop; 1207 } 1208 count += vq->v_usecount; 1209 } 1210 return (count); 1211} 1212 1213/* 1214 * Print out a description of a vnode. 1215 */ 1216static char *typename[] = 1217{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1218 1219void 1220vprint(label, vp) 1221 char *label; 1222 register struct vnode *vp; 1223{ 1224 char buf[64]; 1225 1226 if (label != NULL) 1227 printf("%s: ", label); 1228 printf("type %s, usecount %d, writecount %d, refcount %ld,", 1229 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1230 vp->v_holdcnt); 1231 buf[0] = '\0'; 1232 if (vp->v_flag & VROOT) 1233 strcat(buf, "|VROOT"); 1234 if (vp->v_flag & VTEXT) 1235 strcat(buf, "|VTEXT"); 1236 if (vp->v_flag & VSYSTEM) 1237 strcat(buf, "|VSYSTEM"); 1238 if (vp->v_flag & VXLOCK) 1239 strcat(buf, "|VXLOCK"); 1240 if (vp->v_flag & VXWANT) 1241 strcat(buf, "|VXWANT"); 1242 if (vp->v_flag & VBWAIT) 1243 strcat(buf, "|VBWAIT"); 1244 if (vp->v_flag & VALIASED) 1245 strcat(buf, "|VALIASED"); 1246 if (buf[0] != '\0') 1247 printf(" flags (%s)", &buf[1]); 1248 if (vp->v_data == NULL) { 1249 printf("\n"); 1250 } else { 1251 printf("\n\t"); 1252 VOP_PRINT(vp); 1253 } 1254} 1255 1256#ifdef DDB 1257/* 1258 * List all of the locked vnodes in the system. 1259 * Called when debugging the kernel. 1260 */ 1261void 1262printlockedvnodes(void) 1263{ 1264 register struct mount *mp; 1265 register struct vnode *vp; 1266 1267 printf("Locked vnodes\n"); 1268 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 1269 mp = mp->mnt_list.cqe_next) { 1270 for (vp = mp->mnt_vnodelist.lh_first; 1271 vp != NULL; 1272 vp = vp->v_mntvnodes.le_next) 1273 if (VOP_ISLOCKED(vp)) 1274 vprint((char *) 0, vp); 1275 } 1276} 1277#endif 1278 1279int kinfo_vdebug = 1; 1280int kinfo_vgetfailed; 1281 1282#define KINFO_VNODESLOP 10 1283/* 1284 * Dump vnode list (via sysctl). 1285 * Copyout address of vnode followed by vnode. 1286 */ 1287/* ARGSUSED */ 1288static int 1289sysctl_vnode SYSCTL_HANDLER_ARGS 1290{ 1291 register struct mount *mp, *nmp; 1292 struct vnode *vp; 1293 int error; 1294 1295#define VPTRSZ sizeof (struct vnode *) 1296#define VNODESZ sizeof (struct vnode) 1297 1298 req->lock = 0; 1299 if (!req->oldptr) /* Make an estimate */ 1300 return (SYSCTL_OUT(req, 0, 1301 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 1302 1303 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1304 nmp = mp->mnt_list.cqe_next; 1305 if (vfs_busy(mp)) 1306 continue; 1307again: 1308 for (vp = mp->mnt_vnodelist.lh_first; 1309 vp != NULL; 1310 vp = vp->v_mntvnodes.le_next) { 1311 /* 1312 * Check that the vp is still associated with this 1313 * filesystem. RACE: could have been recycled onto 1314 * the same filesystem. 1315 */ 1316 if (vp->v_mount != mp) { 1317 if (kinfo_vdebug) 1318 printf("kinfo: vp changed\n"); 1319 goto again; 1320 } 1321 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 1322 (error = SYSCTL_OUT(req, vp, VNODESZ))) { 1323 vfs_unbusy(mp); 1324 return (error); 1325 } 1326 } 1327 vfs_unbusy(mp); 1328 } 1329 1330 return (0); 1331} 1332 1333SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 1334 0, 0, sysctl_vnode, "S,vnode", ""); 1335 1336/* 1337 * Check to see if a filesystem is mounted on a block device. 1338 */ 1339int 1340vfs_mountedon(vp) 1341 register struct vnode *vp; 1342{ 1343 register struct vnode *vq; 1344 1345 if (vp->v_specflags & SI_MOUNTEDON) 1346 return (EBUSY); 1347 if (vp->v_flag & VALIASED) { 1348 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1349 if (vq->v_rdev != vp->v_rdev || 1350 vq->v_type != vp->v_type) 1351 continue; 1352 if (vq->v_specflags & SI_MOUNTEDON) 1353 return (EBUSY); 1354 } 1355 } 1356 return (0); 1357} 1358 1359/* 1360 * Build hash lists of net addresses and hang them off the mount point. 1361 * Called by ufs_mount() to set up the lists of export addresses. 1362 */ 1363static int 1364vfs_hang_addrlist(struct mount *mp, struct netexport *nep, 1365 struct export_args *argp) 1366{ 1367 register struct netcred *np; 1368 register struct radix_node_head *rnh; 1369 register int i; 1370 struct radix_node *rn; 1371 struct sockaddr *saddr, *smask = 0; 1372 struct domain *dom; 1373 int error; 1374 1375 if (argp->ex_addrlen == 0) { 1376 if (mp->mnt_flag & MNT_DEFEXPORTED) 1377 return (EPERM); 1378 np = &nep->ne_defexported; 1379 np->netc_exflags = argp->ex_flags; 1380 np->netc_anon = argp->ex_anon; 1381 np->netc_anon.cr_ref = 1; 1382 mp->mnt_flag |= MNT_DEFEXPORTED; 1383 return (0); 1384 } 1385 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1386 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 1387 bzero((caddr_t) np, i); 1388 saddr = (struct sockaddr *) (np + 1); 1389 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 1390 goto out; 1391 if (saddr->sa_len > argp->ex_addrlen) 1392 saddr->sa_len = argp->ex_addrlen; 1393 if (argp->ex_masklen) { 1394 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 1395 error = copyin(argp->ex_addr, (caddr_t) smask, argp->ex_masklen); 1396 if (error) 1397 goto out; 1398 if (smask->sa_len > argp->ex_masklen) 1399 smask->sa_len = argp->ex_masklen; 1400 } 1401 i = saddr->sa_family; 1402 if ((rnh = nep->ne_rtable[i]) == 0) { 1403 /* 1404 * Seems silly to initialize every AF when most are not used, 1405 * do so on demand here 1406 */ 1407 for (dom = domains; dom; dom = dom->dom_next) 1408 if (dom->dom_family == i && dom->dom_rtattach) { 1409 dom->dom_rtattach((void **) &nep->ne_rtable[i], 1410 dom->dom_rtoffset); 1411 break; 1412 } 1413 if ((rnh = nep->ne_rtable[i]) == 0) { 1414 error = ENOBUFS; 1415 goto out; 1416 } 1417 } 1418 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 1419 np->netc_rnodes); 1420 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 1421 error = EPERM; 1422 goto out; 1423 } 1424 np->netc_exflags = argp->ex_flags; 1425 np->netc_anon = argp->ex_anon; 1426 np->netc_anon.cr_ref = 1; 1427 return (0); 1428out: 1429 free(np, M_NETADDR); 1430 return (error); 1431} 1432 1433/* ARGSUSED */ 1434static int 1435vfs_free_netcred(struct radix_node *rn, void *w) 1436{ 1437 register struct radix_node_head *rnh = (struct radix_node_head *) w; 1438 1439 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 1440 free((caddr_t) rn, M_NETADDR); 1441 return (0); 1442} 1443 1444/* 1445 * Free the net address hash lists that are hanging off the mount points. 1446 */ 1447static void 1448vfs_free_addrlist(struct netexport *nep) 1449{ 1450 register int i; 1451 register struct radix_node_head *rnh; 1452 1453 for (i = 0; i <= AF_MAX; i++) 1454 if ((rnh = nep->ne_rtable[i])) { 1455 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 1456 (caddr_t) rnh); 1457 free((caddr_t) rnh, M_RTABLE); 1458 nep->ne_rtable[i] = 0; 1459 } 1460} 1461 1462int 1463vfs_export(mp, nep, argp) 1464 struct mount *mp; 1465 struct netexport *nep; 1466 struct export_args *argp; 1467{ 1468 int error; 1469 1470 if (argp->ex_flags & MNT_DELEXPORT) { 1471 vfs_free_addrlist(nep); 1472 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1473 } 1474 if (argp->ex_flags & MNT_EXPORTED) { 1475 if ((error = vfs_hang_addrlist(mp, nep, argp))) 1476 return (error); 1477 mp->mnt_flag |= MNT_EXPORTED; 1478 } 1479 return (0); 1480} 1481 1482struct netcred * 1483vfs_export_lookup(mp, nep, nam) 1484 register struct mount *mp; 1485 struct netexport *nep; 1486 struct mbuf *nam; 1487{ 1488 register struct netcred *np; 1489 register struct radix_node_head *rnh; 1490 struct sockaddr *saddr; 1491 1492 np = NULL; 1493 if (mp->mnt_flag & MNT_EXPORTED) { 1494 /* 1495 * Lookup in the export list first. 1496 */ 1497 if (nam != NULL) { 1498 saddr = mtod(nam, struct sockaddr *); 1499 rnh = nep->ne_rtable[saddr->sa_family]; 1500 if (rnh != NULL) { 1501 np = (struct netcred *) 1502 (*rnh->rnh_matchaddr) ((caddr_t) saddr, 1503 rnh); 1504 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1505 np = NULL; 1506 } 1507 } 1508 /* 1509 * If no address match, use the default if it exists. 1510 */ 1511 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1512 np = &nep->ne_defexported; 1513 } 1514 return (np); 1515} 1516 1517 1518/* 1519 * perform msync on all vnodes under a mount point 1520 * the mount point must be locked. 1521 */ 1522void 1523vfs_msync(struct mount *mp, int flags) { 1524 struct vnode *vp, *nvp; 1525loop: 1526 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 1527 1528 if (vp->v_mount != mp) 1529 goto loop; 1530 nvp = vp->v_mntvnodes.le_next; 1531 if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT)) 1532 continue; 1533 if (vp->v_object && 1534 (((vm_object_t) vp->v_object)->flags & OBJ_MIGHTBEDIRTY)) { 1535 vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE); 1536 } 1537 } 1538}
|