vfs_subr.c revision 1.24
1/* $OpenBSD: vfs_subr.c,v 1.24 1998/11/12 04:30:02 csapuntz Exp $ */ 2/* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */ 3 4/* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 42 */ 43 44/* 45 * External virtual filesystem routines 46 */ 47 48#include <sys/param.h> 49#include <sys/systm.h> 50#include <sys/proc.h> 51#include <sys/mount.h> 52#include <sys/time.h> 53#include <sys/fcntl.h> 54#include <sys/kernel.h> 55#include <sys/vnode.h> 56#include <sys/stat.h> 57#include <sys/namei.h> 58#include <sys/ucred.h> 59#include <sys/buf.h> 60#include <sys/errno.h> 61#include <sys/malloc.h> 62#include <sys/domain.h> 63#include <sys/mbuf.h> 64#include <sys/syscallargs.h> 65 66#include <vm/vm.h> 67#include <sys/sysctl.h> 68 69#include <miscfs/specfs/specdev.h> 70 71enum vtype iftovt_tab[16] = { 72 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 73 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 74}; 75int vttoif_tab[9] = { 76 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 77 S_IFSOCK, S_IFIFO, S_IFMT, 78}; 79 80int doforce = 1; /* 1 => permit forcible unmounting */ 81int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 82int suid_clear = 1; /* 1 => clear SUID / SGID on owner change */ 83 84/* 85 * Insq/Remq for the vnode usage lists. 86 */ 87#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 88#define bufremvn(bp) { \ 89 LIST_REMOVE(bp, b_vnbufs); \ 90 (bp)->b_vnbufs.le_next = NOLIST; \ 91} 92 93struct freelst vnode_hold_list; /* list of vnodes referencing buffers */ 94struct freelst vnode_free_list; /* vnode free list */ 95 96struct mntlist mountlist; /* mounted filesystem list */ 97struct simplelock mountlist_slock; 98static struct simplelock mntid_slock; 99struct simplelock mntvnode_slock; 100struct simplelock vnode_free_list_slock; 101struct simplelock spechash_slock; 102 103 104void insmntque __P((struct vnode *, struct mount *)); 105int getdevvp __P((dev_t, struct vnode **, enum vtype)); 106 107int vfs_hang_addrlist __P((struct mount *, struct netexport *, 108 struct export_args *)); 109int vfs_free_netcred __P((struct radix_node *, void *)); 110void vfs_free_addrlist __P((struct netexport *)); 111static __inline__ void vputonfreelist __P((struct vnode *)); 112 113#ifdef DEBUG 114void printlockedvnodes __P((void)); 115#endif 116 117/* 118 * Initialize the vnode management data structures. 119 */ 120void 121vntblinit() 122{ 123 124 simple_lock_init(&mntvnode_slock); 125 simple_lock_init(&mntid_slock); 126 simple_lock_init(&spechash_slock); 127 TAILQ_INIT(&vnode_hold_list); 128 TAILQ_INIT(&vnode_free_list); 129 simple_lock_init(&vnode_free_list_slock); 130 CIRCLEQ_INIT(&mountlist); 131 /* 132 * Initialize the filesystem syncer. 133 */ 134 vn_initialize_syncerd(); 135} 136 137 138/* 139 * Mark a mount point as busy. Used to synchronize access and to delay 140 * unmounting. Interlock is not released on failure. 141 */ 142 143int 144vfs_busy(mp, flags, interlkp, p) 145 struct mount *mp; 146 int flags; 147 struct simplelock *interlkp; 148 struct proc *p; 149{ 150 int lkflags; 151 152 if (mp->mnt_flag & MNT_UNMOUNT) { 153 if (flags & LK_NOWAIT) 154 return (ENOENT); 155 mp->mnt_flag |= MNT_MWAIT; 156 if (interlkp) 157 simple_unlock(interlkp); 158 /* 159 * Since all busy locks are shared except the exclusive 160 * lock granted when unmounting, the only place that a 161 * wakeup needs to be done is at the release of the 162 * exclusive lock at the end of dounmount. 163 */ 164 sleep((caddr_t)mp, PVFS); 165 if (interlkp) 166 simple_lock(interlkp); 167 return (ENOENT); 168 } 169 lkflags = LK_SHARED; 170 if (interlkp) 171 lkflags |= LK_INTERLOCK; 172 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 173 panic("vfs_busy: unexpected lock failure"); 174 return (0); 175} 176 177 178/* 179 * Free a busy file system 180 */ 181void 182vfs_unbusy(mp, p) 183 struct mount *mp; 184 struct proc *p; 185{ 186 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 187} 188 189/* 190 * Lookup a filesystem type, and if found allocate and initialize 191 * a mount structure for it. 192 * 193 * Devname is usually updated by mount(8) after booting. 194 */ 195 196int 197vfs_rootmountalloc(fstypename, devname, mpp) 198 char *fstypename; 199 char *devname; 200 struct mount **mpp; 201{ 202 struct proc *p = curproc; /* XXX */ 203 struct vfsconf *vfsp; 204 struct mount *mp; 205 206 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 207 if (!strcmp(vfsp->vfc_name, fstypename)) 208 break; 209 if (vfsp == NULL) 210 return (ENODEV); 211 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 212 bzero((char *)mp, (u_long)sizeof(struct mount)); 213 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 214 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 215 LIST_INIT(&mp->mnt_vnodelist); 216 mp->mnt_vfc = vfsp; 217 mp->mnt_op = vfsp->vfc_vfsops; 218 mp->mnt_flag = MNT_RDONLY; 219 mp->mnt_vnodecovered = NULLVP; 220 vfsp->vfc_refcount++; 221 mp->mnt_stat.f_type = vfsp->vfc_typenum; 222 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 223 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 224 mp->mnt_stat.f_mntonname[0] = '/'; 225 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 226 *mpp = mp; 227 return (0); 228 } 229 230/* 231 * Find an appropriate filesystem to use for the root. If a filesystem 232 * has not been preselected, walk through the list of known filesystems 233 * trying those that have mountroot routines, and try them until one 234 * works or we have tried them all. 235 */ 236int 237vfs_mountroot() 238{ 239 struct vfsconf *vfsp; 240 extern int (*mountroot)(void); 241 int error; 242 243 if (mountroot != NULL) 244 return ((*mountroot)()); 245 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 246 if (vfsp->vfc_mountroot == NULL) 247 continue; 248 if ((error = (*vfsp->vfc_mountroot)()) == 0) 249 return (0); 250 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 251 } 252 return (ENODEV); 253} 254 255/* 256 * Lookup a mount point by filesystem identifier. 257 */ 258struct mount * 259vfs_getvfs(fsid) 260 fsid_t *fsid; 261{ 262 register struct mount *mp; 263 264 simple_lock(&mountlist_slock); 265 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 266 mp = mp->mnt_list.cqe_next) { 267 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 268 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 269 simple_unlock(&mountlist_slock); 270 return (mp); 271 } 272 } 273 simple_unlock(&mountlist_slock); 274 return ((struct mount *)0); 275} 276 277 278/* 279 * Get a new unique fsid 280 */ 281void 282vfs_getnewfsid(mp) 283 struct mount *mp; 284{ 285 static u_short xxxfs_mntid; 286 287 fsid_t tfsid; 288 int mtype; 289 290 simple_lock(&mntid_slock); 291 mtype = mp->mnt_vfc->vfc_typenum; 292 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 293 mp->mnt_stat.f_fsid.val[1] = mtype; 294 if (xxxfs_mntid == 0) 295 ++xxxfs_mntid; 296 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 297 tfsid.val[1] = mtype; 298 if (mountlist.cqh_first != (void *)&mountlist) { 299 while (vfs_getvfs(&tfsid)) { 300 tfsid.val[0]++; 301 xxxfs_mntid++; 302 } 303 } 304 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 305 simple_unlock(&mntid_slock); 306} 307 308/* 309 * Make a 'unique' number from a mount type name. 310 * Note that this is no longer used for ffs which 311 * now has an on-disk filesystem id. 312 */ 313long 314makefstype(type) 315 char *type; 316{ 317 long rv; 318 319 for (rv = 0; *type; type++) { 320 rv <<= 2; 321 rv ^= *type; 322 } 323 return rv; 324} 325 326/* 327 * Set vnode attributes to VNOVAL 328 */ 329void 330vattr_null(vap) 331 register struct vattr *vap; 332{ 333 334 vap->va_type = VNON; 335 /* XXX These next two used to be one line, but for a GCC bug. */ 336 vap->va_size = VNOVAL; 337 vap->va_bytes = VNOVAL; 338 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 339 vap->va_fsid = vap->va_fileid = 340 vap->va_blocksize = vap->va_rdev = 341 vap->va_atime.tv_sec = vap->va_atime.tv_nsec = 342 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec = 343 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec = 344 vap->va_flags = vap->va_gen = VNOVAL; 345 vap->va_vaflags = 0; 346} 347 348/* 349 * Routines having to do with the management of the vnode table. 350 */ 351extern int (**dead_vnodeop_p) __P((void *)); 352long numvnodes; 353 354/* 355 * Return the next vnode from the free list. 356 */ 357int 358getnewvnode(tag, mp, vops, vpp) 359 enum vtagtype tag; 360 struct mount *mp; 361 int (**vops) __P((void *)); 362 struct vnode **vpp; 363{ 364 struct proc *p = curproc; /* XXX */ 365 struct freelst *listhd; 366 static int toggle; 367 struct vnode *vp; 368#ifdef DIAGNOSTIC 369 int s; 370#endif 371 372 /* 373 * We must choose whether to allocate a new vnode or recycle an 374 * existing one. The criterion for allocating a new one is that 375 * the total number of vnodes is less than the number desired or 376 * there are no vnodes on either free list. Generally we only 377 * want to recycle vnodes that have no buffers associated with 378 * them, so we look first on the vnode_free_list. If it is empty, 379 * we next consider vnodes with referencing buffers on the 380 * vnode_hold_list. The toggle ensures that half the time we 381 * will use a buffer from the vnode_hold_list, and half the time 382 * we will allocate a new one unless the list has grown to twice 383 * the desired size. We are reticent to recycle vnodes from the 384 * vnode_hold_list because we will lose the identity of all its 385 * referencing buffers. 386 */ 387 toggle ^= 1; 388 if (numvnodes > 2 * desiredvnodes) 389 toggle = 0; 390 391 392 simple_lock(&vnode_free_list_slock); 393 if ((numvnodes < desiredvnodes) || 394 ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) && 395 ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) { 396 simple_unlock(&vnode_free_list_slock); 397 vp = (struct vnode *)malloc((u_long)sizeof *vp, 398 M_VNODE, M_WAITOK); 399 bzero((char *)vp, sizeof *vp); 400 numvnodes++; 401 } else { 402 for (vp = TAILQ_FIRST(listhd); vp != NULLVP; 403 vp = TAILQ_NEXT(vp, v_freelist)) { 404 if (simple_lock_try(&vp->v_interlock)) 405 break; 406 } 407 /* 408 * Unless this is a bad time of the month, at most 409 * the first NCPUS items on the free list are 410 * locked, so this is close enough to being empty. 411 */ 412 if (vp == NULLVP) { 413 simple_unlock(&vnode_free_list_slock); 414 tablefull("vnode"); 415 *vpp = 0; 416 return (ENFILE); 417 } 418 if (vp->v_usecount) { 419 vprint("free vnode", vp); 420 panic("free vnode isn't"); 421 } 422 423 TAILQ_REMOVE(listhd, vp, v_freelist); 424 vp->v_flag &= ~VONFREELIST; 425 426 simple_unlock(&vnode_free_list_slock); 427 vp->v_lease = NULL; 428 if (vp->v_type != VBAD) 429 vgonel(vp, p); 430 else 431 simple_unlock(&vp->v_interlock); 432#ifdef DIAGNOSTIC 433 if (vp->v_data) { 434 vprint("cleaned vnode", vp); 435 panic("cleaned vnode isn't"); 436 } 437 s = splbio(); 438 if (vp->v_numoutput) 439 panic("Clean vnode has pending I/O's"); 440 splx(s); 441#endif 442 vp->v_flag = 0; 443 vp->v_lastr = 0; 444 vp->v_ralen = 0; 445 vp->v_maxra = 0; 446 vp->v_lastw = 0; 447 vp->v_lasta = 0; 448 vp->v_cstart = 0; 449 vp->v_clen = 0; 450 vp->v_socket = 0; 451 } 452 vp->v_type = VNON; 453 cache_purge(vp); 454 vp->v_tag = tag; 455 vp->v_op = vops; 456 insmntque(vp, mp); 457 *vpp = vp; 458 vp->v_usecount = 1; 459 vp->v_data = 0; 460 return (0); 461} 462 463/* 464 * Move a vnode from one mount queue to another. 465 */ 466void 467insmntque(vp, mp) 468 register struct vnode *vp; 469 register struct mount *mp; 470{ 471 simple_lock(&mntvnode_slock); 472 /* 473 * Delete from old mount point vnode list, if on one. 474 */ 475 476 if (vp->v_mount != NULL) 477 LIST_REMOVE(vp, v_mntvnodes); 478 /* 479 * Insert into list of vnodes for the new mount point, if available. 480 */ 481 if ((vp->v_mount = mp) != NULL) 482 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 483 simple_unlock(&mntvnode_slock); 484} 485 486 487/* 488 * Create a vnode for a block device. 489 * Used for root filesystem, argdev, and swap areas. 490 * Also used for memory file system special devices. 491 */ 492int 493bdevvp(dev, vpp) 494 dev_t dev; 495 struct vnode **vpp; 496{ 497 498 return (getdevvp(dev, vpp, VBLK)); 499} 500 501/* 502 * Create a vnode for a character device. 503 * Used for kernfs and some console handling. 504 */ 505int 506cdevvp(dev, vpp) 507 dev_t dev; 508 struct vnode **vpp; 509{ 510 511 return (getdevvp(dev, vpp, VCHR)); 512} 513 514/* 515 * Create a vnode for a device. 516 * Used by bdevvp (block device) for root file system etc., 517 * and by cdevvp (character device) for console and kernfs. 518 */ 519int 520getdevvp(dev, vpp, type) 521 dev_t dev; 522 struct vnode **vpp; 523 enum vtype type; 524{ 525 register struct vnode *vp; 526 struct vnode *nvp; 527 int error; 528 529 if (dev == NODEV) { 530 *vpp = NULLVP; 531 return (0); 532 } 533 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 534 if (error) { 535 *vpp = NULLVP; 536 return (error); 537 } 538 vp = nvp; 539 vp->v_type = type; 540 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 541 vput(vp); 542 vp = nvp; 543 } 544 *vpp = vp; 545 return (0); 546} 547 548/* 549 * Check to see if the new vnode represents a special device 550 * for which we already have a vnode (either because of 551 * bdevvp() or because of a different vnode representing 552 * the same block device). If such an alias exists, deallocate 553 * the existing contents and return the aliased vnode. The 554 * caller is responsible for filling it with its new contents. 555 */ 556struct vnode * 557checkalias(nvp, nvp_rdev, mp) 558 register struct vnode *nvp; 559 dev_t nvp_rdev; 560 struct mount *mp; 561{ 562 struct proc *p = curproc; 563 register struct vnode *vp; 564 struct vnode **vpp; 565 566 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 567 return (NULLVP); 568 569 vpp = &speclisth[SPECHASH(nvp_rdev)]; 570loop: 571 simple_lock(&spechash_slock); 572 for (vp = *vpp; vp; vp = vp->v_specnext) { 573 simple_lock(&vp->v_interlock); 574 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 575 continue; 576 /* 577 * Alias, but not in use, so flush it out. 578 */ 579 if (vp->v_usecount == 0) { 580 simple_unlock(&spechash_slock); 581 vgonel(vp, p); 582 goto loop; 583 } 584 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 585 simple_unlock(&spechash_slock); 586 goto loop; 587 } 588 break; 589 } 590 591 592 /* 593 * Common case is actually in the if statement 594 */ 595 if (vp == NULL || !(vp->v_tag == VT_NON && vp->v_type == VBLK)) { 596 MALLOC(nvp->v_specinfo, struct specinfo *, 597 sizeof(struct specinfo), M_VNODE, M_WAITOK); 598 nvp->v_rdev = nvp_rdev; 599 nvp->v_hashchain = vpp; 600 nvp->v_specnext = *vpp; 601 nvp->v_specmountpoint = NULL; 602 nvp->v_speclockf = NULL; 603 simple_unlock(&spechash_slock); 604 *vpp = nvp; 605 if (vp != NULLVP) { 606 nvp->v_flag |= VALIASED; 607 vp->v_flag |= VALIASED; 608 vput(vp); 609 } 610 return (NULLVP); 611 } 612 613 /* 614 * This code is the uncommon case. It is called in case 615 * we found an alias that was VT_NON && vtype of VBLK 616 * This means we found a block device that was created 617 * using bdevvp. 618 * An example of such a vnode is the root partition device vnode 619 * craeted in ffs_mountroot. 620 * 621 * The vnodes created by bdevvp should not be aliased (why?). 622 */ 623 624 simple_unlock(&spechash_slock); 625 VOP_UNLOCK(vp, 0, p); 626 simple_lock(&vp->v_interlock); 627 vclean(vp, 0, p); 628 vp->v_op = nvp->v_op; 629 vp->v_tag = nvp->v_tag; 630 nvp->v_type = VNON; 631 insmntque(vp, mp); 632 return (vp); 633} 634 635/* 636 * Grab a particular vnode from the free list, increment its 637 * reference count and lock it. The vnode lock bit is set the 638 * vnode is being eliminated in vgone. The process is awakened 639 * when the transition is completed, and an error returned to 640 * indicate that the vnode is no longer usable (possibly having 641 * been changed to a new file system type). 642 */ 643int 644vget(vp, flags, p) 645 struct vnode *vp; 646 int flags; 647 struct proc *p; 648{ 649 int error; 650 /* 651 * If the vnode is in the process of being cleaned out for 652 * another use, we wait for the cleaning to finish and then 653 * return failure. Cleaning is determined by checking that 654 * the VXLOCK flag is set. 655 */ 656 if ((flags & LK_INTERLOCK) == 0) 657 simple_lock(&vp->v_interlock); 658 if (vp->v_flag & VXLOCK) { 659 vp->v_flag |= VXWANT; 660 simple_unlock(&vp->v_interlock); 661 tsleep((caddr_t)vp, PINOD, "vget", 0); 662 return (ENOENT); 663 } 664 if ((vp->v_flag & VONFREELIST) && (vp->v_usecount == 0)) { 665 simple_lock(&vnode_free_list_slock); 666 if (vp->v_holdcnt > 0) 667 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 668 else 669 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 670 simple_unlock(&vnode_free_list_slock); 671 vp->v_flag &= ~VONFREELIST; 672 } 673 vp->v_usecount++; 674 if (flags & LK_TYPE_MASK) { 675 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 676 vp->v_usecount--; 677 if (vp->v_usecount == 0) 678 vputonfreelist(vp); 679 680 simple_unlock(&vp->v_interlock); 681 } 682 return (error); 683 } 684 simple_unlock(&vp->v_interlock); 685 return (0); 686} 687 688 689#ifdef DIAGNOSTIC 690/* 691 * Vnode reference. 692 */ 693void 694vref(vp) 695 struct vnode *vp; 696{ 697 simple_lock(&vp->v_interlock); 698 if (vp->v_usecount <= 0) 699 panic("vref used where vget required"); 700 vp->v_usecount++; 701 simple_unlock(&vp->v_interlock); 702} 703#endif /* DIAGNOSTIC */ 704 705static __inline__ void 706vputonfreelist(vp) 707 struct vnode *vp; 708 709{ 710 struct freelst *lst; 711 712 /* 713 * insert at tail of LRU list 714 */ 715#ifdef DIAGNOSTIC 716 if (vp->v_usecount != 0) { 717 panic("Use count is not zero!"); 718 } 719 720 if (vp->v_flag & VONFREELIST) { 721 vprint ("vnode already on free list: ", vp); 722 panic ("vnode already on free list"); 723 return; 724 } 725#endif 726 727 vp->v_flag |= VONFREELIST; 728 729 simple_lock(&vnode_free_list_slock); 730 731 if (vp->v_holdcnt > 0) 732 lst = &vnode_hold_list; 733 else 734 lst = &vnode_free_list; 735 736 737 if (vp->v_type == VBAD) 738 TAILQ_INSERT_HEAD(lst, vp, v_freelist); 739 else 740 TAILQ_INSERT_TAIL(lst, vp, v_freelist); 741 742 simple_unlock(&vnode_free_list_slock); 743} 744 745/* 746 * vput(), just unlock and vrele() 747 */ 748void 749vput(vp) 750 register struct vnode *vp; 751{ 752 struct proc *p = curproc; /* XXX */ 753 754#ifdef DIAGNOSTIC 755 if (vp == NULL) 756 panic("vput: null vp"); 757#endif 758 simple_lock(&vp->v_interlock); 759 vp->v_usecount--; 760 if (vp->v_usecount > 0) { 761 simple_unlock(&vp->v_interlock); 762 VOP_UNLOCK(vp, 0, p); 763 return; 764 } 765#ifdef DIAGNOSTIC 766 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 767 vprint("vput: bad ref count", vp); 768 panic("vput: ref cnt"); 769 } 770#endif 771 vputonfreelist(vp); 772 773 VOP_INACTIVE(vp, p); 774 simple_unlock(&vp->v_interlock); 775} 776 777/* 778 * Vnode release - use for active VNODES. 779 * If count drops to zero, call inactive routine and return to freelist. 780 */ 781void 782vrele(vp) 783 register struct vnode *vp; 784{ 785 struct proc *p = curproc; /* XXX */ 786 787#ifdef DIAGNOSTIC 788 if (vp == NULL) 789 panic("vrele: null vp"); 790#endif 791 simple_lock(&vp->v_interlock); 792 vp->v_usecount--; 793 if (vp->v_usecount > 0) { 794 simple_unlock(&vp->v_interlock); 795 return; 796 } 797#ifdef DIAGNOSTIC 798 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 799 vprint("vrele: bad ref count", vp); 800 panic("vrele: ref cnt"); 801 } 802#endif 803 vputonfreelist(vp); 804 805 if (vn_lock(vp, LK_EXCLUSIVE |LK_INTERLOCK, p) == 0) 806 VOP_INACTIVE(vp, p); 807 808 simple_unlock(&vp->v_interlock); 809} 810 811#ifdef DIAGNOSTIC 812/* 813 * Page or buffer structure gets a reference. 814 */ 815void 816vhold(vp) 817 register struct vnode *vp; 818{ 819 820 /* 821 * If it is on the freelist and the hold count is currently 822 * zero, move it to the hold list. 823 */ 824 simple_lock(&vp->v_interlock); 825 if ((vp->v_flag & VONFREELIST) && 826 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 827 simple_lock(&vnode_free_list_slock); 828 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 829 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 830 simple_unlock(&vnode_free_list_slock); 831 } 832 vp->v_holdcnt++; 833 simple_unlock(&vp->v_interlock); 834} 835 836/* 837 * Page or buffer structure frees a reference. 838 */ 839void 840holdrele(vp) 841 register struct vnode *vp; 842{ 843 844 simple_lock(&vp->v_interlock); 845 if (vp->v_holdcnt <= 0) 846 panic("holdrele: holdcnt"); 847 vp->v_holdcnt--; 848 /* 849 * If it is on the holdlist and the hold count drops to 850 * zero, move it to the free list. 851 */ 852 if ((vp->v_flag & VONFREELIST) && 853 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 854 simple_lock(&vnode_free_list_slock); 855 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 856 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 857 simple_unlock(&vnode_free_list_slock); 858 } 859 simple_unlock(&vp->v_interlock); 860} 861#endif /* DIAGNOSTIC */ 862 863/* 864 * Remove any vnodes in the vnode table belonging to mount point mp. 865 * 866 * If MNT_NOFORCE is specified, there should not be any active ones, 867 * return error if any are found (nb: this is a user error, not a 868 * system error). If MNT_FORCE is specified, detach any active vnodes 869 * that are found. 870 */ 871#ifdef DEBUG 872int busyprt = 0; /* print out busy vnodes */ 873struct ctldebug debug1 = { "busyprt", &busyprt }; 874#endif 875 876int 877vflush(mp, skipvp, flags) 878 struct mount *mp; 879 struct vnode *skipvp; 880 int flags; 881{ 882 struct proc *p = curproc; 883 register struct vnode *vp, *nvp; 884 int busy = 0; 885 886 simple_lock(&mntvnode_slock); 887loop: 888 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 889 if (vp->v_mount != mp) 890 goto loop; 891 nvp = vp->v_mntvnodes.le_next; 892 /* 893 * Skip over a selected vnode. 894 */ 895 if (vp == skipvp) 896 continue; 897 898 simple_lock(&vp->v_interlock); 899 /* 900 * Skip over a vnodes marked VSYSTEM. 901 */ 902 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 903 simple_unlock(&vp->v_interlock); 904 continue; 905 } 906 /* 907 * If WRITECLOSE is set, only flush out regular file 908 * vnodes open for writing. 909 */ 910 if ((flags & WRITECLOSE) && 911 (vp->v_writecount == 0 || vp->v_type != VREG)) { 912 simple_unlock(&vp->v_interlock); 913 continue; 914 } 915 /* 916 * With v_usecount == 0, all we need to do is clear 917 * out the vnode data structures and we are done. 918 */ 919 if (vp->v_usecount == 0) { 920 simple_unlock(&mntvnode_slock); 921 vgonel(vp, p); 922 simple_lock(&mntvnode_slock); 923 continue; 924 } 925 /* 926 * If FORCECLOSE is set, forcibly close the vnode. 927 * For block or character devices, revert to an 928 * anonymous device. For all other files, just kill them. 929 */ 930 if (flags & FORCECLOSE) { 931 simple_unlock(&mntvnode_slock); 932 if (vp->v_type != VBLK && vp->v_type != VCHR) { 933 vgonel(vp, p); 934 } else { 935 vclean(vp, 0, p); 936 vp->v_op = spec_vnodeop_p; 937 insmntque(vp, (struct mount *)0); 938 } 939 simple_lock(&mntvnode_slock); 940 continue; 941 } 942#ifdef DEBUG 943 if (busyprt) 944 vprint("vflush: busy vnode", vp); 945#endif 946 simple_unlock(&vp->v_interlock); 947 busy++; 948 } 949 simple_unlock(&mntvnode_slock); 950 if (busy) 951 return (EBUSY); 952 return (0); 953} 954 955/* 956 * Disassociate the underlying file system from a vnode. 957 * The vnode interlock is held on entry. 958 */ 959void 960vclean(vp, flags, p) 961 register struct vnode *vp; 962 int flags; 963 struct proc *p; 964{ 965 int active; 966 967 /* 968 * Check to see if the vnode is in use. 969 * If so we have to reference it before we clean it out 970 * so that its count cannot fall to zero and generate a 971 * race against ourselves to recycle it. 972 */ 973 if ((active = vp->v_usecount) != 0) 974 vp->v_usecount++; 975 976 /* 977 * Prevent the vnode from being recycled or 978 * brought into use while we clean it out. 979 */ 980 if (vp->v_flag & VXLOCK) 981 panic("vclean: deadlock"); 982 vp->v_flag |= VXLOCK; 983 984 985 /* 986 * Even if the count is zero, the VOP_INACTIVE routine may still 987 * have the object locked while it cleans it out. The VOP_LOCK 988 * ensures that the VOP_INACTIVE routine is done with its work. 989 * For active vnodes, it ensures that no other activity can 990 * occur while the underlying object is being cleaned out. 991 */ 992 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 993 994 /* 995 * Clean out any buffers associated with the vnode. 996 */ 997 if (flags & DOCLOSE) 998 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 999 /* 1000 * If purging an active vnode, it must be closed and 1001 * deactivated before being reclaimed. Note that the 1002 * VOP_INACTIVE will unlock the vnode 1003 */ 1004 if (active) { 1005 if (flags & DOCLOSE) 1006 VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 1007 VOP_INACTIVE(vp, p); 1008 } else { 1009 /* 1010 * Any other processes trying to obtain this lock must first 1011 * wait for VXLOCK to clear, then call the new lock operation. 1012 */ 1013 VOP_UNLOCK(vp, 0, p); 1014 } 1015 1016 /* 1017 * Reclaim the vnode. 1018 */ 1019 if (VOP_RECLAIM(vp, p)) 1020 panic("vclean: cannot reclaim"); 1021 if (active) { 1022 vp->v_usecount--; 1023 if (vp->v_usecount == 0) { 1024 if (vp->v_holdcnt > 0) 1025 panic("vclean: not clean"); 1026 vputonfreelist(vp); 1027 } 1028 1029 simple_unlock(&vp->v_interlock); 1030 } 1031 cache_purge(vp); 1032 if (vp->v_vnlock) { 1033 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1034 vprint("vclean: lock not drained", vp); 1035 FREE(vp->v_vnlock, M_VNODE); 1036 vp->v_vnlock = NULL; 1037 } 1038 1039 /* 1040 * Done with purge, notify sleepers of the grim news. 1041 */ 1042 vp->v_op = dead_vnodeop_p; 1043 vp->v_tag = VT_NON; 1044 vp->v_flag &= ~VXLOCK; 1045 if (vp->v_flag & VXWANT) { 1046 vp->v_flag &= ~VXWANT; 1047 wakeup((caddr_t)vp); 1048 } 1049} 1050 1051 1052 1053/* 1054 * Recycle an unused vnode to the front of the free list. 1055 * Release the passed interlock if the vnode will be recycled. 1056 */ 1057int 1058vrecycle(vp, inter_lkp, p) 1059 struct vnode *vp; 1060 struct simplelock *inter_lkp; 1061 struct proc *p; 1062{ 1063 1064 simple_lock(&vp->v_interlock); 1065 if (vp->v_usecount == 0) { 1066 if (inter_lkp) 1067 simple_unlock(inter_lkp); 1068 vgonel(vp, p); 1069 return (1); 1070 } 1071 simple_unlock(&vp->v_interlock); 1072 return (0); 1073} 1074 1075/* 1076 * Eliminate all activity associated with a vnode 1077 * in preparation for reuse. 1078 */ 1079void 1080vgone(vp) 1081 register struct vnode *vp; 1082{ 1083 struct proc *p = curproc; 1084 1085 simple_lock (&vp->v_interlock); 1086 vgonel(vp, p); 1087} 1088 1089/* 1090 * vgone, with the vp interlock held. 1091 */ 1092void 1093vgonel(vp, p) 1094 struct vnode *vp; 1095 struct proc *p; 1096{ 1097 register struct vnode *vq; 1098 struct vnode *vx; 1099 1100 /* 1101 * If a vgone (or vclean) is already in progress, 1102 * wait until it is done and return. 1103 */ 1104 if (vp->v_flag & VXLOCK) { 1105 vp->v_flag |= VXWANT; 1106 simple_unlock(&vp->v_interlock); 1107 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1108 return; 1109 } 1110 /* 1111 * Clean out the filesystem specific data. 1112 */ 1113 vclean(vp, DOCLOSE, p); 1114 /* 1115 * Delete from old mount point vnode list, if on one. 1116 */ 1117 if (vp->v_mount != NULL) 1118 insmntque(vp, (struct mount *)0); 1119 /* 1120 * If special device, remove it from special device alias list 1121 * if it is on one. 1122 */ 1123 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1124 simple_lock(&spechash_slock); 1125 if (*vp->v_hashchain == vp) { 1126 *vp->v_hashchain = vp->v_specnext; 1127 } else { 1128 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1129 if (vq->v_specnext != vp) 1130 continue; 1131 vq->v_specnext = vp->v_specnext; 1132 break; 1133 } 1134 if (vq == NULL) 1135 panic("missing bdev"); 1136 } 1137 if (vp->v_flag & VALIASED) { 1138 vx = NULL; 1139 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1140 if (vq->v_rdev != vp->v_rdev || 1141 vq->v_type != vp->v_type) 1142 continue; 1143 if (vx) 1144 break; 1145 vx = vq; 1146 } 1147 if (vx == NULL) 1148 panic("missing alias"); 1149 if (vq == NULL) 1150 vx->v_flag &= ~VALIASED; 1151 vp->v_flag &= ~VALIASED; 1152 } 1153 simple_unlock(&spechash_slock); 1154 FREE(vp->v_specinfo, M_VNODE); 1155 vp->v_specinfo = NULL; 1156 } 1157 /* 1158 * If it is on the freelist and not already at the head, 1159 * move it to the head of the list. 1160 */ 1161 vp->v_type = VBAD; 1162 1163 if ((vp->v_flag & VONFREELIST) && 1164 vp->v_usecount == 0) { 1165 simple_lock(&vnode_free_list_slock); 1166 if (vp->v_holdcnt > 0) 1167 panic("vgonel: not clean"); 1168 if (TAILQ_FIRST(&vnode_free_list) != vp) { 1169 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1170 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1171 } 1172 simple_unlock(&vnode_free_list_slock); 1173 } 1174} 1175 1176/* 1177 * Lookup a vnode by device number. 1178 */ 1179int 1180vfinddev(dev, type, vpp) 1181 dev_t dev; 1182 enum vtype type; 1183 struct vnode **vpp; 1184{ 1185 register struct vnode *vp; 1186 int rc =0; 1187 1188 simple_lock(&spechash_slock); 1189 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1190 if (dev != vp->v_rdev || type != vp->v_type) 1191 continue; 1192 *vpp = vp; 1193 rc = 1; 1194 break; 1195 } 1196 simple_unlock(&spechash_slock); 1197 return (rc); 1198} 1199 1200/* 1201 * Calculate the total number of references to a special device. 1202 */ 1203int 1204vcount(vp) 1205 struct vnode *vp; 1206{ 1207 struct vnode *vq, *vnext; 1208 int count; 1209 1210loop: 1211 if ((vp->v_flag & VALIASED) == 0) 1212 return (vp->v_usecount); 1213 simple_lock(&spechash_slock); 1214 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1215 vnext = vq->v_specnext; 1216 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1217 continue; 1218 /* 1219 * Alias, but not in use, so flush it out. 1220 */ 1221 if (vq->v_usecount == 0 && vq != vp) { 1222 simple_unlock(&spechash_slock); 1223 vgone(vq); 1224 goto loop; 1225 } 1226 count += vq->v_usecount; 1227 } 1228 simple_unlock(&spechash_slock); 1229 return (count); 1230} 1231 1232/* 1233 * Print out a description of a vnode. 1234 */ 1235static char *typename[] = 1236 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1237 1238void 1239vprint(label, vp) 1240 char *label; 1241 register struct vnode *vp; 1242{ 1243 char buf[64]; 1244 1245 if (label != NULL) 1246 printf("%s: ", label); 1247 printf("type %s, usecount %d, writecount %d, refcount %ld,", 1248 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1249 vp->v_holdcnt); 1250 buf[0] = '\0'; 1251 if (vp->v_flag & VROOT) 1252 strcat(buf, "|VROOT"); 1253 if (vp->v_flag & VTEXT) 1254 strcat(buf, "|VTEXT"); 1255 if (vp->v_flag & VSYSTEM) 1256 strcat(buf, "|VSYSTEM"); 1257 if (vp->v_flag & VXLOCK) 1258 strcat(buf, "|VXLOCK"); 1259 if (vp->v_flag & VXWANT) 1260 strcat(buf, "|VXWANT"); 1261 if (vp->v_flag & VBWAIT) 1262 strcat(buf, "|VBWAIT"); 1263 if (vp->v_flag & VALIASED) 1264 strcat(buf, "|VALIASED"); 1265 if (buf[0] != '\0') 1266 printf(" flags (%s)", &buf[1]); 1267 if (vp->v_data == NULL) { 1268 printf("\n"); 1269 } else { 1270 printf("\n\t"); 1271 VOP_PRINT(vp); 1272 } 1273} 1274 1275#ifdef DEBUG 1276/* 1277 * List all of the locked vnodes in the system. 1278 * Called when debugging the kernel. 1279 */ 1280void 1281printlockedvnodes() 1282{ 1283 struct proc *p = curproc; 1284 register struct mount *mp, *nmp; 1285 register struct vnode *vp; 1286 1287 printf("Locked vnodes\n"); 1288 simple_lock(&mountlist_slock); 1289 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1290 mp = nmp) { 1291 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1292 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1293 continue; 1294 } 1295 for (vp = mp->mnt_vnodelist.lh_first; 1296 vp != NULL; 1297 vp = vp->v_mntvnodes.le_next) { 1298 if (VOP_ISLOCKED(vp)) 1299 vprint((char *)0, vp); 1300 } 1301 simple_lock(&mountlist_slock); 1302 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1303 vfs_unbusy(mp, p); 1304 } 1305 simple_unlock(&mountlist_slock); 1306 1307} 1308#endif 1309 1310/* 1311 * Top level filesystem related information gathering. 1312 */ 1313int 1314vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1315 int *name; 1316 u_int namelen; 1317 void *oldp; 1318 size_t *oldlenp; 1319 void *newp; 1320 size_t newlen; 1321 struct proc *p; 1322{ 1323 struct vfsconf *vfsp; 1324 1325 /* all sysctl names at this level are at least name and field */ 1326 if (namelen < 2) 1327 return (ENOTDIR); /* overloaded */ 1328 if (name[0] != VFS_GENERIC) { 1329 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1330 if (vfsp->vfc_typenum == name[0]) 1331 break; 1332 if (vfsp == NULL) 1333 return (EOPNOTSUPP); 1334 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 1335 oldp, oldlenp, newp, newlen, p)); 1336 } 1337 switch (name[1]) { 1338 case VFS_MAXTYPENUM: 1339 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf)); 1340 case VFS_CONF: 1341 if (namelen < 3) 1342 return (ENOTDIR); /* overloaded */ 1343 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1344 if (vfsp->vfc_typenum == name[2]) 1345 break; 1346 if (vfsp == NULL) 1347 return (EOPNOTSUPP); 1348 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp, 1349 sizeof(struct vfsconf))); 1350 } 1351 return (EOPNOTSUPP); 1352} 1353 1354 1355int kinfo_vdebug = 1; 1356int kinfo_vgetfailed; 1357#define KINFO_VNODESLOP 10 1358/* 1359 * Dump vnode list (via sysctl). 1360 * Copyout address of vnode followed by vnode. 1361 */ 1362/* ARGSUSED */ 1363int 1364sysctl_vnode(where, sizep, p) 1365 char *where; 1366 size_t *sizep; 1367 struct proc *p; 1368{ 1369 register struct mount *mp, *nmp; 1370 struct vnode *vp, *nvp; 1371 register char *bp = where, *savebp; 1372 char *ewhere; 1373 int error; 1374 1375#define VPTRSZ sizeof (struct vnode *) 1376#define VNODESZ sizeof (struct vnode) 1377 if (where == NULL) { 1378 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1379 return (0); 1380 } 1381 ewhere = where + *sizep; 1382 1383 simple_lock(&mountlist_slock); 1384 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1385 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1386 nmp = mp->mnt_list.cqe_next; 1387 continue; 1388 } 1389 savebp = bp; 1390again: 1391 for (vp = mp->mnt_vnodelist.lh_first; 1392 vp != NULL; 1393 vp = nvp) { 1394 /* 1395 * Check that the vp is still associated with 1396 * this filesystem. RACE: could have been 1397 * recycled onto the same filesystem. 1398 */ 1399 if (vp->v_mount != mp) { 1400 simple_unlock(&mntvnode_slock); 1401 if (kinfo_vdebug) 1402 printf("kinfo: vp changed\n"); 1403 bp = savebp; 1404 goto again; 1405 } 1406 nvp = vp->v_mntvnodes.le_next; 1407 if (bp + VPTRSZ + VNODESZ > ewhere) { 1408 simple_unlock(&mntvnode_slock); 1409 *sizep = bp - where; 1410 return (ENOMEM); 1411 } 1412 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1413 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1414 return (error); 1415 bp += VPTRSZ + VNODESZ; 1416 simple_lock(&mntvnode_slock); 1417 } 1418 1419 simple_unlock(&mntvnode_slock); 1420 simple_lock(&mountlist_slock); 1421 nmp = mp->mnt_list.cqe_next; 1422 vfs_unbusy(mp, p); 1423 } 1424 1425 simple_unlock(&mountlist_slock); 1426 1427 *sizep = bp - where; 1428 return (0); 1429} 1430 1431/* 1432 * Check to see if a filesystem is mounted on a block device. 1433 */ 1434int 1435vfs_mountedon(vp) 1436 register struct vnode *vp; 1437{ 1438 register struct vnode *vq; 1439 int error = 0; 1440 1441 if (vp->v_specmountpoint != NULL) 1442 return (EBUSY); 1443 if (vp->v_flag & VALIASED) { 1444 simple_lock(&spechash_slock); 1445 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1446 if (vq->v_rdev != vp->v_rdev || 1447 vq->v_type != vp->v_type) 1448 continue; 1449 if (vq->v_specmountpoint != NULL) { 1450 error = EBUSY; 1451 break; 1452 } 1453 } 1454 simple_unlock(&spechash_slock); 1455 } 1456 return (error); 1457} 1458 1459/* 1460 * Build hash lists of net addresses and hang them off the mount point. 1461 * Called by ufs_mount() to set up the lists of export addresses. 1462 */ 1463int 1464vfs_hang_addrlist(mp, nep, argp) 1465 struct mount *mp; 1466 struct netexport *nep; 1467 struct export_args *argp; 1468{ 1469 register struct netcred *np; 1470 register struct radix_node_head *rnh; 1471 register int i; 1472 struct radix_node *rn; 1473 struct sockaddr *saddr, *smask = 0; 1474 struct domain *dom; 1475 int error; 1476 1477 if (argp->ex_addrlen == 0) { 1478 if (mp->mnt_flag & MNT_DEFEXPORTED) 1479 return (EPERM); 1480 np = &nep->ne_defexported; 1481 np->netc_exflags = argp->ex_flags; 1482 np->netc_anon = argp->ex_anon; 1483 np->netc_anon.cr_ref = 1; 1484 mp->mnt_flag |= MNT_DEFEXPORTED; 1485 return (0); 1486 } 1487 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1488 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1489 bzero((caddr_t)np, i); 1490 saddr = (struct sockaddr *)(np + 1); 1491 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 1492 if (error) 1493 goto out; 1494 if (saddr->sa_len > argp->ex_addrlen) 1495 saddr->sa_len = argp->ex_addrlen; 1496 if (argp->ex_masklen) { 1497 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1498 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 1499 if (error) 1500 goto out; 1501 if (smask->sa_len > argp->ex_masklen) 1502 smask->sa_len = argp->ex_masklen; 1503 } 1504 i = saddr->sa_family; 1505 if ((rnh = nep->ne_rtable[i]) == 0) { 1506 /* 1507 * Seems silly to initialize every AF when most are not 1508 * used, do so on demand here 1509 */ 1510 for (dom = domains; dom; dom = dom->dom_next) 1511 if (dom->dom_family == i && dom->dom_rtattach) { 1512 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1513 dom->dom_rtoffset); 1514 break; 1515 } 1516 if ((rnh = nep->ne_rtable[i]) == 0) { 1517 error = ENOBUFS; 1518 goto out; 1519 } 1520 } 1521 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1522 np->netc_rnodes); 1523 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1524 error = EPERM; 1525 goto out; 1526 } 1527 np->netc_exflags = argp->ex_flags; 1528 np->netc_anon = argp->ex_anon; 1529 np->netc_anon.cr_ref = 1; 1530 return (0); 1531out: 1532 free(np, M_NETADDR); 1533 return (error); 1534} 1535 1536/* ARGSUSED */ 1537int 1538vfs_free_netcred(rn, w) 1539 struct radix_node *rn; 1540 void *w; 1541{ 1542 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1543 1544 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1545 free((caddr_t)rn, M_NETADDR); 1546 return (0); 1547} 1548 1549/* 1550 * Free the net address hash lists that are hanging off the mount points. 1551 */ 1552void 1553vfs_free_addrlist(nep) 1554 struct netexport *nep; 1555{ 1556 register int i; 1557 register struct radix_node_head *rnh; 1558 1559 for (i = 0; i <= AF_MAX; i++) 1560 if ((rnh = nep->ne_rtable[i]) != NULL) { 1561 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 1562 free((caddr_t)rnh, M_RTABLE); 1563 nep->ne_rtable[i] = 0; 1564 } 1565} 1566 1567int 1568vfs_export(mp, nep, argp) 1569 struct mount *mp; 1570 struct netexport *nep; 1571 struct export_args *argp; 1572{ 1573 int error; 1574 1575 if (argp->ex_flags & MNT_DELEXPORT) { 1576 vfs_free_addrlist(nep); 1577 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1578 } 1579 if (argp->ex_flags & MNT_EXPORTED) { 1580 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 1581 return (error); 1582 mp->mnt_flag |= MNT_EXPORTED; 1583 } 1584 return (0); 1585} 1586 1587struct netcred * 1588vfs_export_lookup(mp, nep, nam) 1589 register struct mount *mp; 1590 struct netexport *nep; 1591 struct mbuf *nam; 1592{ 1593 register struct netcred *np; 1594 register struct radix_node_head *rnh; 1595 struct sockaddr *saddr; 1596 1597 np = NULL; 1598 if (mp->mnt_flag & MNT_EXPORTED) { 1599 /* 1600 * Lookup in the export list first. 1601 */ 1602 if (nam != NULL) { 1603 saddr = mtod(nam, struct sockaddr *); 1604 rnh = nep->ne_rtable[saddr->sa_family]; 1605 if (rnh != NULL) { 1606 np = (struct netcred *) 1607 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1608 rnh); 1609 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1610 np = NULL; 1611 } 1612 } 1613 /* 1614 * If no address match, use the default if it exists. 1615 */ 1616 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1617 np = &nep->ne_defexported; 1618 } 1619 return (np); 1620} 1621 1622/* 1623 * Do the usual access checking. 1624 * file_mode, uid and gid are from the vnode in question, 1625 * while acc_mode and cred are from the VOP_ACCESS parameter list 1626 */ 1627int 1628vaccess(file_mode, uid, gid, acc_mode, cred) 1629 mode_t file_mode; 1630 uid_t uid; 1631 gid_t gid; 1632 mode_t acc_mode; 1633 struct ucred *cred; 1634{ 1635 mode_t mask; 1636 1637 /* User id 0 always gets access. */ 1638 if (cred->cr_uid == 0) 1639 return 0; 1640 1641 mask = 0; 1642 1643 /* Otherwise, check the owner. */ 1644 if (cred->cr_uid == uid) { 1645 if (acc_mode & VEXEC) 1646 mask |= S_IXUSR; 1647 if (acc_mode & VREAD) 1648 mask |= S_IRUSR; 1649 if (acc_mode & VWRITE) 1650 mask |= S_IWUSR; 1651 return (file_mode & mask) == mask ? 0 : EACCES; 1652 } 1653 1654 /* Otherwise, check the groups. */ 1655 if (cred->cr_gid == gid || groupmember(gid, cred)) { 1656 if (acc_mode & VEXEC) 1657 mask |= S_IXGRP; 1658 if (acc_mode & VREAD) 1659 mask |= S_IRGRP; 1660 if (acc_mode & VWRITE) 1661 mask |= S_IWGRP; 1662 return (file_mode & mask) == mask ? 0 : EACCES; 1663 } 1664 1665 /* Otherwise, check everyone else. */ 1666 if (acc_mode & VEXEC) 1667 mask |= S_IXOTH; 1668 if (acc_mode & VREAD) 1669 mask |= S_IROTH; 1670 if (acc_mode & VWRITE) 1671 mask |= S_IWOTH; 1672 return (file_mode & mask) == mask ? 0 : EACCES; 1673} 1674 1675/* 1676 * Unmount all file systems. 1677 * We traverse the list in reverse order under the assumption that doing so 1678 * will avoid needing to worry about dependencies. 1679 */ 1680void 1681vfs_unmountall() 1682{ 1683 register struct mount *mp, *nmp; 1684 int allerror, error; 1685 1686 for (allerror = 0, 1687 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 1688 nmp = mp->mnt_list.cqe_prev; 1689 if ((error = dounmount(mp, MNT_FORCE, curproc)) != 0) { 1690 printf("unmount of %s failed with error %d\n", 1691 mp->mnt_stat.f_mntonname, error); 1692 allerror = 1; 1693 } 1694 } 1695 if (allerror) 1696 printf("WARNING: some file systems would not unmount\n"); 1697} 1698 1699/* 1700 * Sync and unmount file systems before shutting down. 1701 */ 1702void 1703vfs_shutdown() 1704{ 1705 register struct buf *bp; 1706 int iter, nbusy; 1707 1708 /* XXX Should suspend scheduling. */ 1709 (void) spl0(); 1710 1711 printf("syncing disks... "); 1712 1713 if (panicstr == 0) { 1714 /* Release inodes held by texts before update. */ 1715 vnode_pager_umount(NULL); 1716#ifdef notdef 1717 vnshutdown(); 1718#endif 1719 1720 /* Sync before unmount, in case we hang on something. */ 1721 sys_sync(&proc0, (void *)0, (register_t *)0); 1722 1723 /* Unmount file systems. */ 1724 vfs_unmountall(); 1725 } 1726 1727 /* Sync again after unmount, just in case. */ 1728 sys_sync(&proc0, (void *)0, (register_t *)0); 1729 1730 /* Wait for sync to finish. */ 1731 for (iter = 0; iter < 20; iter++) { 1732 nbusy = 0; 1733 for (bp = &buf[nbuf]; --bp >= buf; ) 1734 if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) 1735 nbusy++; 1736 if (nbusy == 0) 1737 break; 1738 printf("%d ", nbusy); 1739 DELAY(40000 * iter); 1740 } 1741 if (nbusy) 1742 printf("giving up\n"); 1743 else 1744 printf("done\n"); 1745} 1746 1747/* 1748 * posix file system related system variables. 1749 */ 1750int 1751fs_posix_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1752 int *name; 1753 u_int namelen; 1754 void *oldp; 1755 size_t *oldlenp; 1756 void *newp; 1757 size_t newlen; 1758 struct proc *p; 1759{ 1760 /* all sysctl names at this level are terminal */ 1761 if (namelen != 1) 1762 return (ENOTDIR); 1763 1764 switch (name[0]) { 1765 case FS_POSIX_SETUID: 1766 if (newp && securelevel > 0) 1767 return (EPERM); 1768 return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear)); 1769 default: 1770 return (EOPNOTSUPP); 1771 } 1772 /* NOTREACHED */ 1773} 1774 1775/* 1776 * file system related system variables. 1777 */ 1778int 1779fs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1780 int *name; 1781 u_int namelen; 1782 void *oldp; 1783 size_t *oldlenp; 1784 void *newp; 1785 size_t newlen; 1786 struct proc *p; 1787{ 1788 sysctlfn *fn; 1789 1790 switch (name[0]) { 1791 case FS_POSIX: 1792 fn = fs_posix_sysctl; 1793 break; 1794 default: 1795 return (EOPNOTSUPP); 1796 } 1797 return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p); 1798} 1799 1800 1801/* 1802 * Routines dealing with vnodes and buffers 1803 */ 1804 1805/* 1806 * Update outstanding I/O count and do wakeup if requested. 1807 */ 1808void 1809vwakeup(bp) 1810 register struct buf *bp; 1811{ 1812 register struct vnode *vp; 1813 1814 bp->b_flags &= ~B_WRITEINPROG; 1815 if ((vp = bp->b_vp) != NULL) { 1816 if (--vp->v_numoutput < 0) 1817 panic("vwakeup: neg numoutput"); 1818 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 1819 vp->v_flag &= ~VBWAIT; 1820 wakeup((caddr_t)&vp->v_numoutput); 1821 } 1822 } 1823} 1824 1825/* 1826 * Flush out and invalidate all buffers associated with a vnode. 1827 * Called with the underlying object locked. 1828 */ 1829int 1830vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 1831 register struct vnode *vp; 1832 int flags; 1833 struct ucred *cred; 1834 struct proc *p; 1835 int slpflag, slptimeo; 1836{ 1837 register struct buf *bp; 1838 struct buf *nbp, *blist; 1839 int s, error; 1840 1841 if (flags & V_SAVE) { 1842 s = splbio(); 1843 while (vp->v_numoutput) { 1844 vp->v_flag |= VBWAIT; 1845 sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); 1846 } 1847 if (vp->v_dirtyblkhd.lh_first != NULL) { 1848 splx(s); 1849 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 1850 return (error); 1851 s = splbio(); 1852 if (vp->v_numoutput > 0 || 1853 vp->v_dirtyblkhd.lh_first != NULL) 1854 panic("vinvalbuf: dirty bufs"); 1855 } 1856 splx(s); 1857 } 1858 for (;;) { 1859 if ((blist = vp->v_cleanblkhd.lh_first) && 1860 (flags & V_SAVEMETA)) 1861 while (blist && blist->b_lblkno < 0) 1862 blist = blist->b_vnbufs.le_next; 1863 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 1864 (flags & V_SAVEMETA)) 1865 while (blist && blist->b_lblkno < 0) 1866 blist = blist->b_vnbufs.le_next; 1867 if (!blist) 1868 break; 1869 1870 for (bp = blist; bp; bp = nbp) { 1871 nbp = bp->b_vnbufs.le_next; 1872 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 1873 continue; 1874 s = splbio(); 1875 if (bp->b_flags & B_BUSY) { 1876 bp->b_flags |= B_WANTED; 1877 error = tsleep((caddr_t)bp, 1878 slpflag | (PRIBIO + 1), "vinvalbuf", 1879 slptimeo); 1880 splx(s); 1881 if (error) 1882 return (error); 1883 break; 1884 } 1885 bp->b_flags |= B_BUSY | B_VFLUSH; 1886 splx(s); 1887 /* 1888 * XXX Since there are no node locks for NFS, I believe 1889 * there is a slight chance that a delayed write will 1890 * occur while sleeping just above, so check for it. 1891 */ 1892 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 1893 (void) VOP_BWRITE(bp); 1894 break; 1895 } 1896 bp->b_flags |= B_INVAL; 1897 brelse(bp); 1898 } 1899 } 1900 if (!(flags & V_SAVEMETA) && 1901 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 1902 panic("vinvalbuf: flush failed"); 1903 return (0); 1904} 1905 1906void 1907vflushbuf(vp, sync) 1908 register struct vnode *vp; 1909 int sync; 1910{ 1911 register struct buf *bp, *nbp; 1912 int s; 1913 1914loop: 1915 s = splbio(); 1916 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 1917 nbp = bp->b_vnbufs.le_next; 1918 if ((bp->b_flags & B_BUSY)) 1919 continue; 1920 if ((bp->b_flags & B_DELWRI) == 0) 1921 panic("vflushbuf: not dirty"); 1922 bp->b_flags |= B_BUSY | B_VFLUSH; 1923 splx(s); 1924 /* 1925 * Wait for I/O associated with indirect blocks to complete, 1926 * since there is no way to quickly wait for them below. 1927 */ 1928 if (bp->b_vp == vp || sync == 0) 1929 (void) bawrite(bp); 1930 else 1931 (void) bwrite(bp); 1932 goto loop; 1933 } 1934 if (sync == 0) { 1935 splx(s); 1936 return; 1937 } 1938 while (vp->v_numoutput) { 1939 vp->v_flag |= VBWAIT; 1940 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 1941 } 1942 splx(s); 1943 if (vp->v_dirtyblkhd.lh_first != NULL) { 1944 vprint("vflushbuf: dirty", vp); 1945 goto loop; 1946 } 1947} 1948 1949/* 1950 * Associate a buffer with a vnode. 1951 */ 1952void 1953bgetvp(vp, bp) 1954 register struct vnode *vp; 1955 register struct buf *bp; 1956{ 1957 1958 if (bp->b_vp) 1959 panic("bgetvp: not free"); 1960 VHOLD(vp); 1961 bp->b_vp = vp; 1962 if (vp->v_type == VBLK || vp->v_type == VCHR) 1963 bp->b_dev = vp->v_rdev; 1964 else 1965 bp->b_dev = NODEV; 1966 /* 1967 * Insert onto list for new vnode. 1968 */ 1969 bufinsvn(bp, &vp->v_cleanblkhd); 1970} 1971 1972/* 1973 * Disassociate a buffer from a vnode. 1974 */ 1975void 1976brelvp(bp) 1977 register struct buf *bp; 1978{ 1979 struct vnode *vp; 1980 struct buf *wasdirty; 1981 1982 if ((vp = bp->b_vp) == (struct vnode *) 0) 1983 panic("brelvp: NULL"); 1984 /* 1985 * Delete from old vnode list, if on one. 1986 */ 1987 wasdirty = vp->v_dirtyblkhd.lh_first; 1988 if (bp->b_vnbufs.le_next != NOLIST) 1989 bufremvn(bp); 1990 if (wasdirty && LIST_FIRST(&vp->v_dirtyblkhd) == NULL) 1991 LIST_REMOVE(vp, v_synclist); 1992 bp->b_vp = (struct vnode *) 0; 1993 HOLDRELE(vp); 1994} 1995 1996/* 1997 * Reassign a buffer from one vnode to another. Used to assign buffers 1998 * to the appropriate clean or dirty list and to add newly dirty vnodes 1999 * to the appropriate filesystem syncer list. 2000 */ 2001void 2002reassignbuf(bp, newvp) 2003 register struct buf *bp; 2004 register struct vnode *newvp; 2005{ 2006 struct buflists *listheadp; 2007 struct buf *wasdirty; 2008 int delay; 2009 2010 if (newvp == NULL) { 2011 printf("reassignbuf: NULL"); 2012 return; 2013 } 2014 /* 2015 * Delete from old vnode list, if on one. 2016 */ 2017 wasdirty = newvp->v_dirtyblkhd.lh_first; 2018 if (bp->b_vnbufs.le_next != NOLIST) 2019 bufremvn(bp); 2020 /* 2021 * If dirty, put on list of dirty buffers; 2022 * otherwise insert onto list of clean buffers. 2023 */ 2024 if ((bp->b_flags & B_DELWRI) == 0) { 2025 listheadp = &newvp->v_cleanblkhd; 2026 if (wasdirty && LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) 2027 LIST_REMOVE(newvp, v_synclist); 2028 } else { 2029 listheadp = &newvp->v_dirtyblkhd; 2030 if (LIST_FIRST(listheadp) == NULL) { 2031 switch (newvp->v_type) { 2032 case VDIR: 2033 delay = syncdelay / 3; 2034 break; 2035 case VBLK: 2036 if (newvp->v_specmountpoint != NULL) { 2037 delay = syncdelay / 2; 2038 break; 2039 } 2040 /* fall through */ 2041 default: 2042 delay = syncdelay; 2043 } 2044 vn_syncer_add_to_worklist(newvp, delay); 2045 } 2046 } 2047 bufinsvn(bp, listheadp); 2048} 2049