vfs_subr.c revision 1.18
1/* $OpenBSD: vfs_subr.c,v 1.18 1998/01/11 02:10:44 csapuntz Exp $ */ 2/* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */ 3 4/* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 42 */ 43 44/* 45 * External virtual filesystem routines 46 */ 47 48#include <sys/param.h> 49#include <sys/systm.h> 50#include <sys/proc.h> 51#include <sys/mount.h> 52#include <sys/time.h> 53#include <sys/fcntl.h> 54#include <sys/kernel.h> 55#include <sys/vnode.h> 56#include <sys/stat.h> 57#include <sys/namei.h> 58#include <sys/ucred.h> 59#include <sys/buf.h> 60#include <sys/errno.h> 61#include <sys/malloc.h> 62#include <sys/domain.h> 63#include <sys/mbuf.h> 64#include <sys/syscallargs.h> 65 66#include <vm/vm.h> 67#include <sys/sysctl.h> 68 69#include <miscfs/specfs/specdev.h> 70 71enum vtype iftovt_tab[16] = { 72 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 73 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 74}; 75int vttoif_tab[9] = { 76 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 77 S_IFSOCK, S_IFIFO, S_IFMT, 78}; 79 80int doforce = 1; /* 1 => permit forcible unmounting */ 81int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 82int suid_clear = 1; /* 1 => clear SUID / SGID on owner change */ 83 84/* 85 * Insq/Remq for the vnode usage lists. 86 */ 87#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 88#define bufremvn(bp) { \ 89 LIST_REMOVE(bp, b_vnbufs); \ 90 (bp)->b_vnbufs.le_next = NOLIST; \ 91} 92 93struct freelst vnode_hold_list; /* list of vnodes referencing buffers */ 94struct freelst vnode_free_list; /* vnode free list */ 95 96struct mntlist mountlist; /* mounted filesystem list */ 97struct simplelock mountlist_slock; 98static struct simplelock mntid_slock; 99struct simplelock mntvnode_slock; 100struct simplelock vnode_free_list_slock; 101struct simplelock spechash_slock; 102 103 104void insmntque __P((struct vnode *, struct mount *)); 105int getdevvp __P((dev_t, struct vnode **, enum vtype)); 106int vunref __P((struct vnode *)); 107 108int vfs_hang_addrlist __P((struct mount *, struct netexport *, 109 struct export_args *)); 110int vfs_free_netcred __P((struct radix_node *, void *)); 111void vfs_free_addrlist __P((struct netexport *)); 112 113#ifdef DEBUG 114void printlockedvnodes __P((void)); 115#endif 116 117/* 118 * Initialize the vnode management data structures. 119 */ 120void 121vntblinit() 122{ 123 124 simple_lock_init(&mntvnode_slock); 125 simple_lock_init(&mntid_slock); 126 simple_lock_init(&spechash_slock); 127 TAILQ_INIT(&vnode_hold_list); 128 TAILQ_INIT(&vnode_free_list); 129 simple_lock_init(&vnode_free_list_slock); 130 CIRCLEQ_INIT(&mountlist); 131 /* 132 * Initialize the filesystem syncer. 133 */ 134 vn_initialize_syncerd(); 135} 136 137 138/* 139 * Mark a mount point as busy. Used to synchornize access and to delay 140 * unmounting. Interlock is not released n failure. 141 */ 142 143int 144vfs_busy(mp, flags, interlkp, p) 145 struct mount *mp; 146 int flags; 147 struct simplelock *interlkp; 148 struct proc *p; 149{ 150 int lkflags; 151 152 if (mp->mnt_flag & MNT_UNMOUNT) { 153 if (flags & LK_NOWAIT) 154 return (ENOENT); 155 mp->mnt_flag |= MNT_MWAIT; 156 if (interlkp) 157 simple_unlock(interlkp); 158 /* 159 * Since all busy locks are shared except the exclusive 160 * lock granted when unmounting, the only place that a 161 * wakeup needs to be done is at the release of the 162 * exclusive lock at the end of dounmount. 163 */ 164 sleep((caddr_t)mp, PVFS); 165 if (interlkp) 166 simple_lock(interlkp); 167 return (ENOENT); 168 } 169 lkflags = LK_SHARED; 170 if (interlkp) 171 lkflags |= LK_INTERLOCK; 172 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 173 panic("vfs_busy: unexpected lock failure"); 174 return (0); 175} 176 177 178/* 179 * Free a busy file system 180 */ 181void 182vfs_unbusy(mp, p) 183 struct mount *mp; 184 struct proc *p; 185{ 186 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 187} 188 189/* 190 * Lookup a filesystem type, and if found allocate and initialize 191 * a mount structure for it. 192 * 193 * Devname is usually updated by mount(8) after booting. 194 */ 195 196int 197vfs_rootmountalloc(fstypename, devname, mpp) 198 char *fstypename; 199 char *devname; 200 struct mount **mpp; 201{ 202 struct proc *p = curproc; /* XXX */ 203 struct vfsconf *vfsp; 204 struct mount *mp; 205 206 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 207 if (!strcmp(vfsp->vfc_name, fstypename)) 208 break; 209 if (vfsp == NULL) 210 return (ENODEV); 211 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 212 bzero((char *)mp, (u_long)sizeof(struct mount)); 213 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 214 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 215 LIST_INIT(&mp->mnt_vnodelist); 216 mp->mnt_vfc = vfsp; 217 mp->mnt_op = vfsp->vfc_vfsops; 218 mp->mnt_flag = MNT_RDONLY; 219 mp->mnt_vnodecovered = NULLVP; 220 vfsp->vfc_refcount++; 221 mp->mnt_stat.f_type = vfsp->vfc_typenum; 222 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 223 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 224 mp->mnt_stat.f_mntonname[0] = '/'; 225 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 226 *mpp = mp; 227 return (0); 228 } 229 230/* 231 * Find an appropriate filesystem to use for the root. If a filesystem 232 * has not been preselected, walk through the list of known filesystems 233 * trying those that have mountroot routines, and try them until one 234 * works or we have tried them all. 235 */ 236int 237vfs_mountroot() 238{ 239 struct vfsconf *vfsp; 240 extern int (*mountroot)(void); 241 int error; 242 243 if (mountroot != NULL) 244 return ((*mountroot)()); 245 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 246 if (vfsp->vfc_mountroot == NULL) 247 continue; 248 if ((error = (*vfsp->vfc_mountroot)()) == 0) 249 return (0); 250 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 251 } 252 return (ENODEV); 253} 254 255/* 256 * Lookup a mount point by filesystem identifier. 257 */ 258struct mount * 259vfs_getvfs(fsid) 260 fsid_t *fsid; 261{ 262 register struct mount *mp; 263 264 simple_lock(&mountlist_slock); 265 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 266 mp = mp->mnt_list.cqe_next) { 267 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 268 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 269 simple_unlock(&mountlist_slock); 270 return (mp); 271 } 272 } 273 simple_unlock(&mountlist_slock); 274 return ((struct mount *)0); 275} 276 277 278/* 279 * Get a new unique fsid 280 */ 281void 282vfs_getnewfsid(mp) 283 struct mount *mp; 284{ 285 static u_short xxxfs_mntid; 286 287 fsid_t tfsid; 288 int mtype; 289 290 simple_lock(&mntid_slock); 291 mtype = mp->mnt_vfc->vfc_typenum; 292 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 293 mp->mnt_stat.f_fsid.val[1] = mtype; 294 if (xxxfs_mntid == 0) 295 ++xxxfs_mntid; 296 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 297 tfsid.val[1] = mtype; 298 if (mountlist.cqh_first != (void *)&mountlist) { 299 while (vfs_getvfs(&tfsid)) { 300 tfsid.val[0]++; 301 xxxfs_mntid++; 302 } 303 } 304 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 305 simple_unlock(&mntid_slock); 306} 307 308/* 309 * Make a 'unique' number from a mount type name. 310 * Note that this is no longer used for ffs which 311 * now has an on-disk filesystem id. 312 */ 313long 314makefstype(type) 315 char *type; 316{ 317 long rv; 318 319 for (rv = 0; *type; type++) { 320 rv <<= 2; 321 rv ^= *type; 322 } 323 return rv; 324} 325 326/* 327 * Set vnode attributes to VNOVAL 328 */ 329void 330vattr_null(vap) 331 register struct vattr *vap; 332{ 333 334 vap->va_type = VNON; 335 /* XXX These next two used to be one line, but for a GCC bug. */ 336 vap->va_size = VNOVAL; 337 vap->va_bytes = VNOVAL; 338 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 339 vap->va_fsid = vap->va_fileid = 340 vap->va_blocksize = vap->va_rdev = 341 vap->va_atime.tv_sec = vap->va_atime.tv_nsec = 342 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec = 343 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec = 344 vap->va_flags = vap->va_gen = VNOVAL; 345 vap->va_vaflags = 0; 346} 347 348/* 349 * Routines having to do with the management of the vnode table. 350 */ 351extern int (**dead_vnodeop_p) __P((void *)); 352long numvnodes; 353 354/* 355 * Return the next vnode from the free list. 356 */ 357int 358getnewvnode(tag, mp, vops, vpp) 359 enum vtagtype tag; 360 struct mount *mp; 361 int (**vops) __P((void *)); 362 struct vnode **vpp; 363{ 364 struct proc *p = curproc; /* XXX */ 365 struct freelst *listhd; 366 static int toggle; 367 struct vnode *vp; 368#ifdef DIAGNOSTIC 369 int s; 370#endif 371 372 /* 373 * We must choose whether to allocate a new vnode or recycle an 374 * existing one. The criterion for allocating a new one is that 375 * the total number of vnodes is less than the number desired or 376 * there are no vnodes on either free list. Generally we only 377 * want to recycle vnodes that have no buffers associated with 378 * them, so we look first on the vnode_free_list. If it is empty, 379 * we next consider vnodes with referencing buffers on the 380 * vnode_hold_list. The toggle ensures that half the time we 381 * will use a buffer from the vnode_hold_list, and half the time 382 * we will allocate a new one unless the list has grown to twice 383 * the desired size. We are reticent to recycle vnodes from the 384 * vnode_hold_list because we will lose the identity of all its 385 * referencing buffers. 386 */ 387 toggle ^= 1; 388 if (numvnodes > 2 * desiredvnodes) 389 toggle = 0; 390 391 392 simple_lock(&vnode_free_list_slock); 393 if ((numvnodes < desiredvnodes) || 394 ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) && 395 ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) { 396 simple_unlock(&vnode_free_list_slock); 397 vp = (struct vnode *)malloc((u_long)sizeof *vp, 398 M_VNODE, M_WAITOK); 399 bzero((char *)vp, sizeof *vp); 400 numvnodes++; 401 } else { 402 for (vp = TAILQ_FIRST(listhd); vp != NULLVP; 403 vp = TAILQ_NEXT(vp, v_freelist)) { 404 if (simple_lock_try(&vp->v_interlock)) 405 break; 406 } 407 /* 408 * Unless this is a bad time of the month, at most 409 * the first NCPUS items on the free list are 410 * locked, so this is close enough to being empty. 411 */ 412 if (vp == NULLVP) { 413 simple_unlock(&vnode_free_list_slock); 414 tablefull("vnode"); 415 *vpp = 0; 416 return (ENFILE); 417 } 418 if (vp->v_usecount) { 419 vprint("free vnode", vp); 420 panic("free vnode isn't"); 421 } 422 TAILQ_REMOVE(listhd, vp, v_freelist); 423 /* see comment on why 0xdeadb is set at end of vgone (below) */ 424 vp->v_flag |= VGONEHACK; 425 simple_unlock(&vnode_free_list_slock); 426 vp->v_lease = NULL; 427 if (vp->v_type != VBAD) 428 vgonel(vp, p); 429 else 430 simple_unlock(&vp->v_interlock); 431#ifdef DIAGNOSTIC 432 if (vp->v_data) { 433 vprint("cleaned vnode", vp); 434 panic("cleaned vnode isn't"); 435 } 436 s = splbio(); 437 if (vp->v_numoutput) 438 panic("Clean vnode has pending I/O's"); 439 splx(s); 440#endif 441 vp->v_flag = 0; 442 vp->v_lastr = 0; 443 vp->v_ralen = 0; 444 vp->v_maxra = 0; 445 vp->v_lastw = 0; 446 vp->v_lasta = 0; 447 vp->v_cstart = 0; 448 vp->v_clen = 0; 449 vp->v_socket = 0; 450 } 451 vp->v_type = VNON; 452 cache_purge(vp); 453 vp->v_tag = tag; 454 vp->v_op = vops; 455 insmntque(vp, mp); 456 *vpp = vp; 457 vp->v_usecount = 1; 458 vp->v_data = 0; 459 return (0); 460} 461 462/* 463 * Move a vnode from one mount queue to another. 464 */ 465void 466insmntque(vp, mp) 467 register struct vnode *vp; 468 register struct mount *mp; 469{ 470 simple_lock(&mntvnode_slock); 471 /* 472 * Delete from old mount point vnode list, if on one. 473 */ 474 475 if (vp->v_mount != NULL) 476 LIST_REMOVE(vp, v_mntvnodes); 477 /* 478 * Insert into list of vnodes for the new mount point, if available. 479 */ 480 if ((vp->v_mount = mp) != NULL) 481 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 482 simple_unlock(&mntvnode_slock); 483} 484 485 486/* 487 * Create a vnode for a block device. 488 * Used for root filesystem, argdev, and swap areas. 489 * Also used for memory file system special devices. 490 */ 491int 492bdevvp(dev, vpp) 493 dev_t dev; 494 struct vnode **vpp; 495{ 496 497 return (getdevvp(dev, vpp, VBLK)); 498} 499 500/* 501 * Create a vnode for a character device. 502 * Used for kernfs and some console handling. 503 */ 504int 505cdevvp(dev, vpp) 506 dev_t dev; 507 struct vnode **vpp; 508{ 509 510 return (getdevvp(dev, vpp, VCHR)); 511} 512 513/* 514 * Create a vnode for a device. 515 * Used by bdevvp (block device) for root file system etc., 516 * and by cdevvp (character device) for console and kernfs. 517 */ 518int 519getdevvp(dev, vpp, type) 520 dev_t dev; 521 struct vnode **vpp; 522 enum vtype type; 523{ 524 register struct vnode *vp; 525 struct vnode *nvp; 526 int error; 527 528 if (dev == NODEV) { 529 *vpp = NULLVP; 530 return (0); 531 } 532 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 533 if (error) { 534 *vpp = NULLVP; 535 return (error); 536 } 537 vp = nvp; 538 vp->v_type = type; 539 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 540 vput(vp); 541 vp = nvp; 542 } 543 *vpp = vp; 544 return (0); 545} 546 547/* 548 * Check to see if the new vnode represents a special device 549 * for which we already have a vnode (either because of 550 * bdevvp() or because of a different vnode representing 551 * the same block device). If such an alias exists, deallocate 552 * the existing contents and return the aliased vnode. The 553 * caller is responsible for filling it with its new contents. 554 */ 555struct vnode * 556checkalias(nvp, nvp_rdev, mp) 557 register struct vnode *nvp; 558 dev_t nvp_rdev; 559 struct mount *mp; 560{ 561 struct proc *p = curproc; 562 register struct vnode *vp; 563 struct vnode **vpp; 564 565 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 566 return (NULLVP); 567 568 vpp = &speclisth[SPECHASH(nvp_rdev)]; 569loop: 570 simple_lock(&spechash_slock); 571 for (vp = *vpp; vp; vp = vp->v_specnext) { 572 simple_lock(&vp->v_interlock); 573 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 574 continue; 575 /* 576 * Alias, but not in use, so flush it out. 577 */ 578 if (vp->v_usecount == 0) { 579 simple_unlock(&spechash_slock); 580 vgonel(vp, p); 581 goto loop; 582 } 583 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 584 simple_unlock(&spechash_slock); 585 goto loop; 586 } 587 break; 588 } 589 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 590 MALLOC(nvp->v_specinfo, struct specinfo *, 591 sizeof(struct specinfo), M_VNODE, M_WAITOK); 592 nvp->v_rdev = nvp_rdev; 593 nvp->v_hashchain = vpp; 594 nvp->v_specnext = *vpp; 595 nvp->v_specmountpoint = NULL; 596 nvp->v_speclockf = NULL; 597 simple_unlock(&spechash_slock); 598 *vpp = nvp; 599 if (vp != NULLVP) { 600 nvp->v_flag |= VALIASED; 601 vp->v_flag |= VALIASED; 602 vput(vp); 603 } 604 return (NULLVP); 605 } 606 simple_unlock(&spechash_slock); 607 VOP_UNLOCK(vp, 0, p); 608 simple_lock(&vp->v_interlock); 609 vclean(vp, 0, p); 610 vp->v_op = nvp->v_op; 611 vp->v_tag = nvp->v_tag; 612 nvp->v_type = VNON; 613 insmntque(vp, mp); 614 return (vp); 615} 616 617/* 618 * Grab a particular vnode from the free list, increment its 619 * reference count and lock it. The vnode lock bit is set the 620 * vnode is being eliminated in vgone. The process is awakened 621 * when the transition is completed, and an error returned to 622 * indicate that the vnode is no longer usable (possibly having 623 * been changed to a new file system type). 624 */ 625int 626vget(vp, flags, p) 627 struct vnode *vp; 628 int flags; 629 struct proc *p; 630{ 631 int error; 632 /* 633 * If the vnode is in the process of being cleaned out for 634 * another use, we wait for the cleaning to finish and then 635 * return failure. Cleaning is determined by checking that 636 * the VXLOCK flag is set. 637 */ 638 if ((flags & LK_INTERLOCK) == 0) 639 simple_lock(&vp->v_interlock); 640 if (vp->v_flag & VXLOCK) { 641 vp->v_flag |= VXWANT; 642 simple_unlock(&vp->v_interlock); 643 tsleep((caddr_t)vp, PINOD, "vget", 0); 644 return (ENOENT); 645 } 646 if (vp->v_usecount == 0) { 647 simple_lock(&vnode_free_list_slock); 648 if (vp->v_holdcnt > 0) 649 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 650 else 651 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 652 simple_unlock(&vnode_free_list_slock); 653 } 654 vp->v_usecount++; 655 if (flags & LK_TYPE_MASK) { 656 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 657 vunref(vp); 658 simple_unlock(&vp->v_interlock); 659 } 660 return (error); 661 } 662 simple_unlock(&vp->v_interlock); 663 return (0); 664} 665 666 667#ifdef DIAGNOSTIC 668/* 669 * Vnode reference. 670 */ 671void 672vref(vp) 673 struct vnode *vp; 674{ 675 simple_lock(&vp->v_interlock); 676 if (vp->v_usecount <= 0) 677 panic("vref used where vget required"); 678 vp->v_usecount++; 679 simple_unlock(&vp->v_interlock); 680} 681#endif /* DIAGNOSTIC */ 682 683int 684vunref(vp) 685 struct vnode *vp; 686{ 687#ifdef DIAGNOSTIC 688 if (vp == NULL) 689 panic("vrele: null vp"); 690#endif 691 simple_lock (&vp->v_interlock); 692 vp->v_usecount--; 693 if (vp->v_usecount > 0) { 694 simple_unlock(&vp->v_interlock); 695 return (vp->v_usecount); 696 } 697#ifdef DIAGNOSTIC 698 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 699 vprint("vrele: bad ref count", vp); 700 panic("vrele: ref cnt"); 701 } 702#endif 703 /* 704 * insert at tail of LRU list 705 */ 706 simple_lock(&vnode_free_list_slock); 707 if (vp->v_holdcnt > 0) 708 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 709 else 710 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 711 simple_unlock(&vnode_free_list_slock); 712 713 return (0); 714} 715 716/* 717 * vput(), just unlock and vrele() 718 */ 719void 720vput(vp) 721 register struct vnode *vp; 722{ 723 struct proc *p = curproc; /* XXX */ 724 725#ifdef DIGANOSTIC 726 if (vp == NULL) 727 panic("vput: null vp"); 728#endif 729 simple_lock(&vp->v_interlock); 730 vp->v_usecount--; 731 if (vp->v_usecount > 0) { 732 simple_unlock(&vp->v_interlock); 733 VOP_UNLOCK(vp, 0, p); 734 return; 735 } 736#ifdef DIAGNOSTIC 737 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 738 vprint("vput: bad ref count", vp); 739 panic("vput: ref cnt"); 740 } 741#endif 742 /* 743 * insert at tail of LRU list 744 */ 745 simple_lock(&vnode_free_list_slock); 746 if (vp->v_holdcnt > 0) 747 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 748 else 749 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 750 simple_unlock(&vnode_free_list_slock); 751 simple_unlock(&vp->v_interlock); 752 VOP_INACTIVE(vp, p); 753} 754 755/* 756 * Vnode release - use for active VNODES. 757 * If count drops to zero, call inactive routine and return to freelist. 758 */ 759void 760vrele(vp) 761 register struct vnode *vp; 762{ 763 struct proc *p = curproc; 764 765 if (vunref(vp) == 0 && 766 vn_lock(vp, LK_EXCLUSIVE |LK_INTERLOCK, p) == 0) 767 VOP_INACTIVE(vp, p); 768} 769 770#ifdef DIAGNOSTIC 771/* 772 * Page or buffer structure gets a reference. 773 */ 774void 775vhold(vp) 776 register struct vnode *vp; 777{ 778 779 /* 780 * If it is on the freelist and the hold count is currently 781 * zero, move it to the hold list. 782 * 783 * The VGONEHACK flag reflects a call from getnewvnode, 784 * which will remove the vnode from the free list, but 785 * will not increment the ref count until after it calls vgone 786 * If the ref count we're incremented first, vgone would 787 * (incorrectly) try to close the previous instance of the 788 * underlying object. 789 */ 790 simple_lock(&vp->v_interlock); 791 if (!(vp->v_flag & VGONEHACK) && 792 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 793 simple_lock(&vnode_free_list_slock); 794 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 795 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 796 simple_unlock(&vnode_free_list_slock); 797 } 798 vp->v_holdcnt++; 799 simple_unlock(&vp->v_interlock); 800} 801 802/* 803 * Page or buffer structure frees a reference. 804 */ 805void 806holdrele(vp) 807 register struct vnode *vp; 808{ 809 810 simple_lock(&vp->v_interlock); 811 if (vp->v_holdcnt <= 0) 812 panic("holdrele: holdcnt"); 813 vp->v_holdcnt--; 814 /* 815 * If it is on the holdlist and the hold count drops to 816 * zero, move it to the free list. 817 * 818 * See above for VGONEHACK 819 */ 820 if (!(vp->v_flag & VGONEHACK) && 821 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 822 simple_lock(&vnode_free_list_slock); 823 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 824 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 825 simple_unlock(&vnode_free_list_slock); 826 } 827 simple_unlock(&vp->v_interlock); 828} 829#endif /* DIAGNOSTIC */ 830 831/* 832 * Remove any vnodes in the vnode table belonging to mount point mp. 833 * 834 * If MNT_NOFORCE is specified, there should not be any active ones, 835 * return error if any are found (nb: this is a user error, not a 836 * system error). If MNT_FORCE is specified, detach any active vnodes 837 * that are found. 838 */ 839#ifdef DEBUG 840int busyprt = 0; /* print out busy vnodes */ 841struct ctldebug debug1 = { "busyprt", &busyprt }; 842#endif 843 844int 845vflush(mp, skipvp, flags) 846 struct mount *mp; 847 struct vnode *skipvp; 848 int flags; 849{ 850 struct proc *p = curproc; 851 register struct vnode *vp, *nvp; 852 int busy = 0; 853 854 simple_lock(&mntvnode_slock); 855loop: 856 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 857 if (vp->v_mount != mp) 858 goto loop; 859 nvp = vp->v_mntvnodes.le_next; 860 /* 861 * Skip over a selected vnode. 862 */ 863 if (vp == skipvp) 864 continue; 865 866 simple_lock(&vp->v_interlock); 867 /* 868 * Skip over a vnodes marked VSYSTEM. 869 */ 870 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 871 simple_unlock(&vp->v_interlock); 872 continue; 873 } 874 /* 875 * If WRITECLOSE is set, only flush out regular file 876 * vnodes open for writing. 877 */ 878 if ((flags & WRITECLOSE) && 879 (vp->v_writecount == 0 || vp->v_type != VREG)) { 880 simple_unlock(&vp->v_interlock); 881 continue; 882 } 883 /* 884 * With v_usecount == 0, all we need to do is clear 885 * out the vnode data structures and we are done. 886 */ 887 if (vp->v_usecount == 0) { 888 simple_unlock(&mntvnode_slock); 889 vgonel(vp, p); 890 simple_lock(&mntvnode_slock); 891 continue; 892 } 893 /* 894 * If FORCECLOSE is set, forcibly close the vnode. 895 * For block or character devices, revert to an 896 * anonymous device. For all other files, just kill them. 897 */ 898 if (flags & FORCECLOSE) { 899 simple_unlock(&mntvnode_slock); 900 if (vp->v_type != VBLK && vp->v_type != VCHR) { 901 vgonel(vp, p); 902 } else { 903 vclean(vp, 0, p); 904 vp->v_op = spec_vnodeop_p; 905 insmntque(vp, (struct mount *)0); 906 } 907 simple_lock(&mntvnode_slock); 908 continue; 909 } 910#ifdef DEBUG 911 if (busyprt) 912 vprint("vflush: busy vnode", vp); 913#endif 914 simple_unlock(&vp->v_interlock); 915 busy++; 916 } 917 simple_unlock(&mntvnode_slock); 918 if (busy) 919 return (EBUSY); 920 return (0); 921} 922 923/* 924 * Disassociate the underlying file system from a vnode. 925 * The vnode interlock is held on entry. 926 */ 927void 928vclean(vp, flags, p) 929 register struct vnode *vp; 930 int flags; 931 struct proc *p; 932{ 933 int active; 934 935 /* 936 * Check to see if the vnode is in use. 937 * If so we have to reference it before we clean it out 938 * so that its count cannot fall to zero and generate a 939 * race against ourselves to recycle it. 940 */ 941 if ((active = vp->v_usecount) != 0) 942 vp->v_usecount++; 943 944 /* 945 * Prevent the vnode from being recycled or 946 * brought into use while we clean it out. 947 */ 948 if (vp->v_flag & VXLOCK) 949 panic("vclean: deadlock"); 950 vp->v_flag |= VXLOCK; 951 952 953 /* 954 * Even if the count is zero, the VOP_INACTIVE routine may still 955 * have the object locked while it cleans it out. The VOP_LOCK 956 * ensures that the VOP_INACTIVE routine is done with its work. 957 * For active vnodes, it ensures that no other activity can 958 * occur while the underlying object is being cleaned out. 959 */ 960 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 961 962 /* 963 * Clean out any buffers associated with the vnode. 964 */ 965 if (flags & DOCLOSE) 966 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 967 /* 968 * If purging an active vnode, it must be closed and 969 * deactivated before being reclaimed. Note that the 970 * VOP_INACTIVE will unlock the vnode 971 */ 972 if (active) { 973 if (flags & DOCLOSE) 974 VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 975 VOP_INACTIVE(vp, p); 976 } else { 977 /* 978 * Any other processes trying to obtain this lock must first 979 * wait for VXLOCK to clear, then call the new lock operation. 980 */ 981 VOP_UNLOCK(vp, 0, p); 982 } 983 984 /* 985 * Reclaim the vnode. 986 */ 987 if (VOP_RECLAIM(vp, p)) 988 panic("vclean: cannot reclaim"); 989 if (active) { 990 if (vunref(vp) == 0 && 991 vp->v_holdcnt > 0) 992 panic("vclean: not clean"); 993 simple_unlock(&vp->v_interlock); 994 } 995 cache_purge(vp); 996 if (vp->v_vnlock) { 997 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 998 vprint("vclean: lock not drained", vp); 999 FREE(vp->v_vnlock, M_VNODE); 1000 vp->v_vnlock = NULL; 1001 } 1002 1003 /* 1004 * Done with purge, notify sleepers of the grim news. 1005 */ 1006 vp->v_op = dead_vnodeop_p; 1007 vp->v_tag = VT_NON; 1008 vp->v_flag &= ~VXLOCK; 1009 if (vp->v_flag & VXWANT) { 1010 vp->v_flag &= ~VXWANT; 1011 wakeup((caddr_t)vp); 1012 } 1013} 1014 1015 1016 1017/* 1018 * Recycle an unused vnode to the front of the free list. 1019 * Release the passed interlock if the vnode will be recycled. 1020 */ 1021int 1022vrecycle(vp, inter_lkp, p) 1023 struct vnode *vp; 1024 struct simplelock *inter_lkp; 1025 struct proc *p; 1026{ 1027 1028 simple_lock(&vp->v_interlock); 1029 if (vp->v_usecount == 0) { 1030 if (inter_lkp) 1031 simple_unlock(inter_lkp); 1032 vgonel(vp, p); 1033 return (1); 1034 } 1035 simple_unlock(&vp->v_interlock); 1036 return (0); 1037} 1038 1039/* 1040 * Eliminate all activity associated with a vnode 1041 * in preparation for reuse. 1042 */ 1043void 1044vgone(vp) 1045 register struct vnode *vp; 1046{ 1047 struct proc *p = curproc; 1048 1049 simple_lock (&vp->v_interlock); 1050 vgonel(vp, p); 1051} 1052 1053/* 1054 * vgone, with the vp interlock held. 1055 */ 1056void 1057vgonel(vp, p) 1058 struct vnode *vp; 1059 struct proc *p; 1060{ 1061 register struct vnode *vq; 1062 struct vnode *vx; 1063 1064 /* 1065 * If a vgone (or vclean) is already in progress, 1066 * wait until it is done and return. 1067 */ 1068 if (vp->v_flag & VXLOCK) { 1069 vp->v_flag |= VXWANT; 1070 simple_unlock(&vp->v_interlock); 1071 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1072 return; 1073 } 1074 /* 1075 * Clean out the filesystem specific data. 1076 */ 1077 vclean(vp, DOCLOSE, p); 1078 /* 1079 * Delete from old mount point vnode list, if on one. 1080 */ 1081 if (vp->v_mount != NULL) 1082 insmntque(vp, (struct mount *)0); 1083 /* 1084 * If special device, remove it from special device alias list 1085 * if it is on one. 1086 */ 1087 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1088 simple_lock(&spechash_slock); 1089 if (*vp->v_hashchain == vp) { 1090 *vp->v_hashchain = vp->v_specnext; 1091 } else { 1092 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1093 if (vq->v_specnext != vp) 1094 continue; 1095 vq->v_specnext = vp->v_specnext; 1096 break; 1097 } 1098 if (vq == NULL) 1099 panic("missing bdev"); 1100 } 1101 if (vp->v_flag & VALIASED) { 1102 vx = NULL; 1103 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1104 if (vq->v_rdev != vp->v_rdev || 1105 vq->v_type != vp->v_type) 1106 continue; 1107 if (vx) 1108 break; 1109 vx = vq; 1110 } 1111 if (vx == NULL) 1112 panic("missing alias"); 1113 if (vq == NULL) 1114 vx->v_flag &= ~VALIASED; 1115 vp->v_flag &= ~VALIASED; 1116 } 1117 simple_unlock(&spechash_slock); 1118 FREE(vp->v_specinfo, M_VNODE); 1119 vp->v_specinfo = NULL; 1120 } 1121 /* 1122 * If it is on the freelist and not already at the head, 1123 * move it to the head of the list. 1124 * 1125 * See above about the VGONEHACK 1126 */ 1127 if (vp->v_usecount == 0) { 1128 simple_lock(&vnode_free_list_slock); 1129 if (vp->v_holdcnt > 0) 1130 panic("vgonel: not clean"); 1131 if (!(vp->v_flag & VGONEHACK) && 1132 TAILQ_FIRST(&vnode_free_list) != vp) { 1133 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1134 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1135 } 1136 simple_unlock(&vnode_free_list_slock); 1137 } 1138 vp->v_type = VBAD; 1139} 1140 1141/* 1142 * Lookup a vnode by device number. 1143 */ 1144int 1145vfinddev(dev, type, vpp) 1146 dev_t dev; 1147 enum vtype type; 1148 struct vnode **vpp; 1149{ 1150 register struct vnode *vp; 1151 int rc =0; 1152 1153 simple_lock(&spechash_slock); 1154 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1155 if (dev != vp->v_rdev || type != vp->v_type) 1156 continue; 1157 *vpp = vp; 1158 rc = 1; 1159 break; 1160 } 1161 simple_unlock(&spechash_slock); 1162 return (rc); 1163} 1164 1165/* 1166 * Calculate the total number of references to a special device. 1167 */ 1168int 1169vcount(vp) 1170 struct vnode *vp; 1171{ 1172 struct vnode *vq, *vnext; 1173 int count; 1174 1175loop: 1176 if ((vp->v_flag & VALIASED) == 0) 1177 return (vp->v_usecount); 1178 simple_lock(&spechash_slock); 1179 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1180 vnext = vq->v_specnext; 1181 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1182 continue; 1183 /* 1184 * Alias, but not in use, so flush it out. 1185 */ 1186 if (vq->v_usecount == 0 && vq != vp) { 1187 simple_unlock(&spechash_slock); 1188 vgone(vq); 1189 goto loop; 1190 } 1191 count += vq->v_usecount; 1192 } 1193 simple_unlock(&spechash_slock); 1194 return (count); 1195} 1196 1197/* 1198 * Print out a description of a vnode. 1199 */ 1200static char *typename[] = 1201 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1202 1203void 1204vprint(label, vp) 1205 char *label; 1206 register struct vnode *vp; 1207{ 1208 char buf[64]; 1209 1210 if (label != NULL) 1211 printf("%s: ", label); 1212 printf("type %s, usecount %d, writecount %d, refcount %ld,", 1213 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1214 vp->v_holdcnt); 1215 buf[0] = '\0'; 1216 if (vp->v_flag & VROOT) 1217 strcat(buf, "|VROOT"); 1218 if (vp->v_flag & VTEXT) 1219 strcat(buf, "|VTEXT"); 1220 if (vp->v_flag & VSYSTEM) 1221 strcat(buf, "|VSYSTEM"); 1222 if (vp->v_flag & VXLOCK) 1223 strcat(buf, "|VXLOCK"); 1224 if (vp->v_flag & VXWANT) 1225 strcat(buf, "|VXWANT"); 1226 if (vp->v_flag & VBWAIT) 1227 strcat(buf, "|VBWAIT"); 1228 if (vp->v_flag & VALIASED) 1229 strcat(buf, "|VALIASED"); 1230 if (buf[0] != '\0') 1231 printf(" flags (%s)", &buf[1]); 1232 if (vp->v_data == NULL) { 1233 printf("\n"); 1234 } else { 1235 printf("\n\t"); 1236 VOP_PRINT(vp); 1237 } 1238} 1239 1240#ifdef DEBUG 1241/* 1242 * List all of the locked vnodes in the system. 1243 * Called when debugging the kernel. 1244 */ 1245void 1246printlockedvnodes() 1247{ 1248 struct proc *p = curproc; 1249 register struct mount *mp, *nmp; 1250 register struct vnode *vp; 1251 1252 printf("Locked vnodes\n"); 1253 simple_lock(&mountlist_slock); 1254 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1255 mp = nmp) { 1256 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1257 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1258 continue; 1259 } 1260 for (vp = mp->mnt_vnodelist.lh_first; 1261 vp != NULL; 1262 vp = vp->v_mntvnodes.le_next) { 1263 if (VOP_ISLOCKED(vp)) 1264 vprint((char *)0, vp); 1265 } 1266 simple_lock(&mountlist_slock); 1267 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1268 vfs_unbusy(mp, p); 1269 } 1270 simple_unlock(&mountlist_slock); 1271 1272} 1273#endif 1274 1275/* 1276 * Top level filesystem related information gathering. 1277 */ 1278int 1279vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1280 int *name; 1281 u_int namelen; 1282 void *oldp; 1283 size_t *oldlenp; 1284 void *newp; 1285 size_t newlen; 1286 struct proc *p; 1287{ 1288 struct vfsconf *vfsp; 1289 1290 /* all sysctl names at this level are at least name and field */ 1291 if (namelen < 2) 1292 return (ENOTDIR); /* overloaded */ 1293 if (name[0] != VFS_GENERIC) { 1294 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1295 if (vfsp->vfc_typenum == name[0]) 1296 break; 1297 if (vfsp == NULL) 1298 return (EOPNOTSUPP); 1299 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 1300 oldp, oldlenp, newp, newlen, p)); 1301 } 1302 switch (name[1]) { 1303 case VFS_MAXTYPENUM: 1304 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf)); 1305 case VFS_CONF: 1306 if (namelen < 3) 1307 return (ENOTDIR); /* overloaded */ 1308 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1309 if (vfsp->vfc_typenum == name[2]) 1310 break; 1311 if (vfsp == NULL) 1312 return (EOPNOTSUPP); 1313 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp, 1314 sizeof(struct vfsconf))); 1315 } 1316 return (EOPNOTSUPP); 1317} 1318 1319 1320int kinfo_vdebug = 1; 1321int kinfo_vgetfailed; 1322#define KINFO_VNODESLOP 10 1323/* 1324 * Dump vnode list (via sysctl). 1325 * Copyout address of vnode followed by vnode. 1326 */ 1327/* ARGSUSED */ 1328int 1329sysctl_vnode(where, sizep, p) 1330 char *where; 1331 size_t *sizep; 1332 struct proc *p; 1333{ 1334 register struct mount *mp, *nmp; 1335 struct vnode *vp, *nvp; 1336 register char *bp = where, *savebp; 1337 char *ewhere; 1338 int error; 1339 1340#define VPTRSZ sizeof (struct vnode *) 1341#define VNODESZ sizeof (struct vnode) 1342 if (where == NULL) { 1343 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1344 return (0); 1345 } 1346 ewhere = where + *sizep; 1347 1348 simple_lock(&mountlist_slock); 1349 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1350 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1351 nmp = mp->mnt_list.cqe_next; 1352 continue; 1353 } 1354 savebp = bp; 1355again: 1356 for (vp = mp->mnt_vnodelist.lh_first; 1357 vp != NULL; 1358 vp = nvp) { 1359 /* 1360 * Check that the vp is still associated with 1361 * this filesystem. RACE: could have been 1362 * recycled onto the same filesystem. 1363 */ 1364 if (vp->v_mount != mp) { 1365 simple_unlock(&mntvnode_slock); 1366 if (kinfo_vdebug) 1367 printf("kinfo: vp changed\n"); 1368 bp = savebp; 1369 goto again; 1370 } 1371 nvp = vp->v_mntvnodes.le_next; 1372 if (bp + VPTRSZ + VNODESZ > ewhere) { 1373 simple_unlock(&mntvnode_slock); 1374 *sizep = bp - where; 1375 return (ENOMEM); 1376 } 1377 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1378 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1379 return (error); 1380 bp += VPTRSZ + VNODESZ; 1381 simple_lock(&mntvnode_slock); 1382 } 1383 1384 simple_unlock(&mntvnode_slock); 1385 simple_lock(&mountlist_slock); 1386 nmp = mp->mnt_list.cqe_next; 1387 vfs_unbusy(mp, p); 1388 } 1389 1390 simple_unlock(&mountlist_slock); 1391 1392 *sizep = bp - where; 1393 return (0); 1394} 1395 1396/* 1397 * Check to see if a filesystem is mounted on a block device. 1398 */ 1399int 1400vfs_mountedon(vp) 1401 register struct vnode *vp; 1402{ 1403 register struct vnode *vq; 1404 int error = 0; 1405 1406 if (vp->v_specmountpoint != NULL) 1407 return (EBUSY); 1408 if (vp->v_flag & VALIASED) { 1409 simple_lock(&spechash_slock); 1410 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1411 if (vq->v_rdev != vp->v_rdev || 1412 vq->v_type != vp->v_type) 1413 continue; 1414 if (vq->v_specmountpoint != NULL) { 1415 error = EBUSY; 1416 break; 1417 } 1418 } 1419 simple_unlock(&spechash_slock); 1420 } 1421 return (error); 1422} 1423 1424/* 1425 * Build hash lists of net addresses and hang them off the mount point. 1426 * Called by ufs_mount() to set up the lists of export addresses. 1427 */ 1428int 1429vfs_hang_addrlist(mp, nep, argp) 1430 struct mount *mp; 1431 struct netexport *nep; 1432 struct export_args *argp; 1433{ 1434 register struct netcred *np; 1435 register struct radix_node_head *rnh; 1436 register int i; 1437 struct radix_node *rn; 1438 struct sockaddr *saddr, *smask = 0; 1439 struct domain *dom; 1440 int error; 1441 1442 if (argp->ex_addrlen == 0) { 1443 if (mp->mnt_flag & MNT_DEFEXPORTED) 1444 return (EPERM); 1445 np = &nep->ne_defexported; 1446 np->netc_exflags = argp->ex_flags; 1447 np->netc_anon = argp->ex_anon; 1448 np->netc_anon.cr_ref = 1; 1449 mp->mnt_flag |= MNT_DEFEXPORTED; 1450 return (0); 1451 } 1452 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1453 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1454 bzero((caddr_t)np, i); 1455 saddr = (struct sockaddr *)(np + 1); 1456 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 1457 if (error) 1458 goto out; 1459 if (saddr->sa_len > argp->ex_addrlen) 1460 saddr->sa_len = argp->ex_addrlen; 1461 if (argp->ex_masklen) { 1462 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1463 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 1464 if (error) 1465 goto out; 1466 if (smask->sa_len > argp->ex_masklen) 1467 smask->sa_len = argp->ex_masklen; 1468 } 1469 i = saddr->sa_family; 1470 if ((rnh = nep->ne_rtable[i]) == 0) { 1471 /* 1472 * Seems silly to initialize every AF when most are not 1473 * used, do so on demand here 1474 */ 1475 for (dom = domains; dom; dom = dom->dom_next) 1476 if (dom->dom_family == i && dom->dom_rtattach) { 1477 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1478 dom->dom_rtoffset); 1479 break; 1480 } 1481 if ((rnh = nep->ne_rtable[i]) == 0) { 1482 error = ENOBUFS; 1483 goto out; 1484 } 1485 } 1486 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1487 np->netc_rnodes); 1488 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1489 error = EPERM; 1490 goto out; 1491 } 1492 np->netc_exflags = argp->ex_flags; 1493 np->netc_anon = argp->ex_anon; 1494 np->netc_anon.cr_ref = 1; 1495 return (0); 1496out: 1497 free(np, M_NETADDR); 1498 return (error); 1499} 1500 1501/* ARGSUSED */ 1502int 1503vfs_free_netcred(rn, w) 1504 struct radix_node *rn; 1505 void *w; 1506{ 1507 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1508 1509 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1510 free((caddr_t)rn, M_NETADDR); 1511 return (0); 1512} 1513 1514/* 1515 * Free the net address hash lists that are hanging off the mount points. 1516 */ 1517void 1518vfs_free_addrlist(nep) 1519 struct netexport *nep; 1520{ 1521 register int i; 1522 register struct radix_node_head *rnh; 1523 1524 for (i = 0; i <= AF_MAX; i++) 1525 if ((rnh = nep->ne_rtable[i]) != NULL) { 1526 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 1527 free((caddr_t)rnh, M_RTABLE); 1528 nep->ne_rtable[i] = 0; 1529 } 1530} 1531 1532int 1533vfs_export(mp, nep, argp) 1534 struct mount *mp; 1535 struct netexport *nep; 1536 struct export_args *argp; 1537{ 1538 int error; 1539 1540 if (argp->ex_flags & MNT_DELEXPORT) { 1541 vfs_free_addrlist(nep); 1542 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1543 } 1544 if (argp->ex_flags & MNT_EXPORTED) { 1545 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 1546 return (error); 1547 mp->mnt_flag |= MNT_EXPORTED; 1548 } 1549 return (0); 1550} 1551 1552struct netcred * 1553vfs_export_lookup(mp, nep, nam) 1554 register struct mount *mp; 1555 struct netexport *nep; 1556 struct mbuf *nam; 1557{ 1558 register struct netcred *np; 1559 register struct radix_node_head *rnh; 1560 struct sockaddr *saddr; 1561 1562 np = NULL; 1563 if (mp->mnt_flag & MNT_EXPORTED) { 1564 /* 1565 * Lookup in the export list first. 1566 */ 1567 if (nam != NULL) { 1568 saddr = mtod(nam, struct sockaddr *); 1569 rnh = nep->ne_rtable[saddr->sa_family]; 1570 if (rnh != NULL) { 1571 np = (struct netcred *) 1572 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1573 rnh); 1574 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1575 np = NULL; 1576 } 1577 } 1578 /* 1579 * If no address match, use the default if it exists. 1580 */ 1581 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1582 np = &nep->ne_defexported; 1583 } 1584 return (np); 1585} 1586 1587/* 1588 * Do the usual access checking. 1589 * file_mode, uid and gid are from the vnode in question, 1590 * while acc_mode and cred are from the VOP_ACCESS parameter list 1591 */ 1592int 1593vaccess(file_mode, uid, gid, acc_mode, cred) 1594 mode_t file_mode; 1595 uid_t uid; 1596 gid_t gid; 1597 mode_t acc_mode; 1598 struct ucred *cred; 1599{ 1600 mode_t mask; 1601 1602 /* User id 0 always gets access. */ 1603 if (cred->cr_uid == 0) 1604 return 0; 1605 1606 mask = 0; 1607 1608 /* Otherwise, check the owner. */ 1609 if (cred->cr_uid == uid) { 1610 if (acc_mode & VEXEC) 1611 mask |= S_IXUSR; 1612 if (acc_mode & VREAD) 1613 mask |= S_IRUSR; 1614 if (acc_mode & VWRITE) 1615 mask |= S_IWUSR; 1616 return (file_mode & mask) == mask ? 0 : EACCES; 1617 } 1618 1619 /* Otherwise, check the groups. */ 1620 if (cred->cr_gid == gid || groupmember(gid, cred)) { 1621 if (acc_mode & VEXEC) 1622 mask |= S_IXGRP; 1623 if (acc_mode & VREAD) 1624 mask |= S_IRGRP; 1625 if (acc_mode & VWRITE) 1626 mask |= S_IWGRP; 1627 return (file_mode & mask) == mask ? 0 : EACCES; 1628 } 1629 1630 /* Otherwise, check everyone else. */ 1631 if (acc_mode & VEXEC) 1632 mask |= S_IXOTH; 1633 if (acc_mode & VREAD) 1634 mask |= S_IROTH; 1635 if (acc_mode & VWRITE) 1636 mask |= S_IWOTH; 1637 return (file_mode & mask) == mask ? 0 : EACCES; 1638} 1639 1640/* 1641 * Unmount all file systems. 1642 * We traverse the list in reverse order under the assumption that doing so 1643 * will avoid needing to worry about dependencies. 1644 */ 1645void 1646vfs_unmountall() 1647{ 1648 register struct mount *mp, *nmp; 1649 int allerror, error; 1650 1651 for (allerror = 0, 1652 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 1653 nmp = mp->mnt_list.cqe_prev; 1654 if ((error = dounmount(mp, MNT_FORCE, curproc)) != 0) { 1655 printf("unmount of %s failed with error %d\n", 1656 mp->mnt_stat.f_mntonname, error); 1657 allerror = 1; 1658 } 1659 } 1660 if (allerror) 1661 printf("WARNING: some file systems would not unmount\n"); 1662} 1663 1664/* 1665 * Sync and unmount file systems before shutting down. 1666 */ 1667void 1668vfs_shutdown() 1669{ 1670 register struct buf *bp; 1671 int iter, nbusy; 1672 1673 /* XXX Should suspend scheduling. */ 1674 (void) spl0(); 1675 1676 printf("syncing disks... "); 1677 1678 if (panicstr == 0) { 1679 /* Release inodes held by texts before update. */ 1680 vnode_pager_umount(NULL); 1681#ifdef notdef 1682 vnshutdown(); 1683#endif 1684 1685 /* Sync before unmount, in case we hang on something. */ 1686 sys_sync(&proc0, (void *)0, (register_t *)0); 1687 1688 /* Unmount file systems. */ 1689 vfs_unmountall(); 1690 } 1691 1692 /* Sync again after unmount, just in case. */ 1693 sys_sync(&proc0, (void *)0, (register_t *)0); 1694 1695 /* Wait for sync to finish. */ 1696 for (iter = 0; iter < 20; iter++) { 1697 nbusy = 0; 1698 for (bp = &buf[nbuf]; --bp >= buf; ) 1699 if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) 1700 nbusy++; 1701 if (nbusy == 0) 1702 break; 1703 printf("%d ", nbusy); 1704 DELAY(40000 * iter); 1705 } 1706 if (nbusy) 1707 printf("giving up\n"); 1708 else 1709 printf("done\n"); 1710} 1711 1712/* 1713 * posix file system related system variables. 1714 */ 1715int 1716fs_posix_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1717 int *name; 1718 u_int namelen; 1719 void *oldp; 1720 size_t *oldlenp; 1721 void *newp; 1722 size_t newlen; 1723 struct proc *p; 1724{ 1725 /* all sysctl names at this level are terminal */ 1726 if (namelen != 1) 1727 return (ENOTDIR); 1728 1729 switch (name[0]) { 1730 case FS_POSIX_SETUID: 1731 if (newp && securelevel > 0) 1732 return (EPERM); 1733 return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear)); 1734 default: 1735 return (EOPNOTSUPP); 1736 } 1737 /* NOTREACHED */ 1738} 1739 1740/* 1741 * file system related system variables. 1742 */ 1743int 1744fs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1745 int *name; 1746 u_int namelen; 1747 void *oldp; 1748 size_t *oldlenp; 1749 void *newp; 1750 size_t newlen; 1751 struct proc *p; 1752{ 1753 sysctlfn *fn; 1754 1755 switch (name[0]) { 1756 case FS_POSIX: 1757 fn = fs_posix_sysctl; 1758 break; 1759 default: 1760 return (EOPNOTSUPP); 1761 } 1762 return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p); 1763} 1764 1765 1766/* 1767 * Routines dealing with vnodes and buffers 1768 */ 1769 1770/* 1771 * Update outstanding I/O count and do wakeup if requested. 1772 */ 1773void 1774vwakeup(bp) 1775 register struct buf *bp; 1776{ 1777 register struct vnode *vp; 1778 1779 bp->b_flags &= ~B_WRITEINPROG; 1780 if ((vp = bp->b_vp) != NULL) { 1781 if (--vp->v_numoutput < 0) 1782 panic("vwakeup: neg numoutput"); 1783 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 1784 vp->v_flag &= ~VBWAIT; 1785 wakeup((caddr_t)&vp->v_numoutput); 1786 } 1787 } 1788} 1789 1790/* 1791 * Flush out and invalidate all buffers associated with a vnode. 1792 * Called with the underlying object locked. 1793 */ 1794int 1795vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 1796 register struct vnode *vp; 1797 int flags; 1798 struct ucred *cred; 1799 struct proc *p; 1800 int slpflag, slptimeo; 1801{ 1802 register struct buf *bp; 1803 struct buf *nbp, *blist; 1804 int s, error; 1805 1806 if ((flags & V_SAVE) && vp->v_dirtyblkhd.lh_first != NULL) { 1807 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 1808 return (error); 1809 if (vp->v_dirtyblkhd.lh_first != NULL) 1810 panic("vinvalbuf: dirty bufs"); 1811 } 1812 for (;;) { 1813 if ((blist = vp->v_cleanblkhd.lh_first) && 1814 (flags & V_SAVEMETA)) 1815 while (blist && blist->b_lblkno < 0) 1816 blist = blist->b_vnbufs.le_next; 1817 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 1818 (flags & V_SAVEMETA)) 1819 while (blist && blist->b_lblkno < 0) 1820 blist = blist->b_vnbufs.le_next; 1821 if (!blist) 1822 break; 1823 1824 for (bp = blist; bp; bp = nbp) { 1825 nbp = bp->b_vnbufs.le_next; 1826 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 1827 continue; 1828 s = splbio(); 1829 if (bp->b_flags & B_BUSY) { 1830 bp->b_flags |= B_WANTED; 1831 error = tsleep((caddr_t)bp, 1832 slpflag | (PRIBIO + 1), "vinvalbuf", 1833 slptimeo); 1834 splx(s); 1835 if (error) 1836 return (error); 1837 break; 1838 } 1839 bp->b_flags |= B_BUSY | B_VFLUSH; 1840 splx(s); 1841 /* 1842 * XXX Since there are no node locks for NFS, I believe 1843 * there is a slight chance that a delayed write will 1844 * occur while sleeping just above, so check for it. 1845 */ 1846 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 1847 (void) VOP_BWRITE(bp); 1848 break; 1849 } 1850 bp->b_flags |= B_INVAL; 1851 brelse(bp); 1852 } 1853 } 1854 if (!(flags & V_SAVEMETA) && 1855 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 1856 panic("vinvalbuf: flush failed"); 1857 return (0); 1858} 1859 1860void 1861vflushbuf(vp, sync) 1862 register struct vnode *vp; 1863 int sync; 1864{ 1865 register struct buf *bp, *nbp; 1866 int s; 1867 1868loop: 1869 s = splbio(); 1870 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 1871 nbp = bp->b_vnbufs.le_next; 1872 if ((bp->b_flags & B_BUSY)) 1873 continue; 1874 if ((bp->b_flags & B_DELWRI) == 0) 1875 panic("vflushbuf: not dirty"); 1876 bp->b_flags |= B_BUSY | B_VFLUSH; 1877 splx(s); 1878 /* 1879 * Wait for I/O associated with indirect blocks to complete, 1880 * since there is no way to quickly wait for them below. 1881 */ 1882 if (bp->b_vp == vp || sync == 0) 1883 (void) bawrite(bp); 1884 else 1885 (void) bwrite(bp); 1886 goto loop; 1887 } 1888 if (sync == 0) { 1889 splx(s); 1890 return; 1891 } 1892 while (vp->v_numoutput) { 1893 vp->v_flag |= VBWAIT; 1894 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 1895 } 1896 splx(s); 1897 if (vp->v_dirtyblkhd.lh_first != NULL) { 1898 vprint("vflushbuf: dirty", vp); 1899 goto loop; 1900 } 1901} 1902 1903/* 1904 * Associate a buffer with a vnode. 1905 */ 1906void 1907bgetvp(vp, bp) 1908 register struct vnode *vp; 1909 register struct buf *bp; 1910{ 1911 1912 if (bp->b_vp) 1913 panic("bgetvp: not free"); 1914 VHOLD(vp); 1915 bp->b_vp = vp; 1916 if (vp->v_type == VBLK || vp->v_type == VCHR) 1917 bp->b_dev = vp->v_rdev; 1918 else 1919 bp->b_dev = NODEV; 1920 /* 1921 * Insert onto list for new vnode. 1922 */ 1923 bufinsvn(bp, &vp->v_cleanblkhd); 1924} 1925 1926/* 1927 * Disassociate a buffer from a vnode. 1928 */ 1929void 1930brelvp(bp) 1931 register struct buf *bp; 1932{ 1933 struct vnode *vp; 1934 struct buf *wasdirty; 1935 1936 if ((vp = bp->b_vp) == (struct vnode *) 0) 1937 panic("brelvp: NULL"); 1938 /* 1939 * Delete from old vnode list, if on one. 1940 */ 1941 wasdirty = vp->v_dirtyblkhd.lh_first; 1942 if (bp->b_vnbufs.le_next != NOLIST) 1943 bufremvn(bp); 1944 if (wasdirty && LIST_FIRST(&vp->v_dirtyblkhd) == NULL) 1945 LIST_REMOVE(vp, v_synclist); 1946 bp->b_vp = (struct vnode *) 0; 1947 HOLDRELE(vp); 1948} 1949 1950/* 1951 * Reassign a buffer from one vnode to another. Used to assign buffers 1952 * to the appropriate clean or dirty list and to add newly dirty vnodes 1953 * to the appropriate filesystem syncer list. 1954 */ 1955void 1956reassignbuf(bp, newvp) 1957 register struct buf *bp; 1958 register struct vnode *newvp; 1959{ 1960 struct buflists *listheadp; 1961 struct buf *wasdirty; 1962 int delay; 1963 1964 if (newvp == NULL) { 1965 printf("reassignbuf: NULL"); 1966 return; 1967 } 1968 /* 1969 * Delete from old vnode list, if on one. 1970 */ 1971 wasdirty = newvp->v_dirtyblkhd.lh_first; 1972 if (bp->b_vnbufs.le_next != NOLIST) 1973 bufremvn(bp); 1974 /* 1975 * If dirty, put on list of dirty buffers; 1976 * otherwise insert onto list of clean buffers. 1977 */ 1978 if ((bp->b_flags & B_DELWRI) == 0) { 1979 listheadp = &newvp->v_cleanblkhd; 1980 if (wasdirty && LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) 1981 LIST_REMOVE(newvp, v_synclist); 1982 } else { 1983 listheadp = &newvp->v_dirtyblkhd; 1984 if (LIST_FIRST(listheadp) == NULL) { 1985 switch (newvp->v_type) { 1986 case VDIR: 1987 delay = syncdelay / 3; 1988 break; 1989 case VBLK: 1990 if (newvp->v_specmountpoint != NULL) { 1991 delay = syncdelay / 2; 1992 break; 1993 } 1994 /* fall through */ 1995 default: 1996 delay = syncdelay; 1997 } 1998 vn_syncer_add_to_worklist(newvp, delay); 1999 } 2000 } 2001 bufinsvn(bp, listheadp); 2002} 2003