vfs_default.c revision 140734
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed 6 * to Berkeley by John Heidemann of the UCLA Ficus project. 7 * 8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: head/sys/kern/vfs_default.c 140734 2005-01-24 13:13:57Z phk $"); 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/bio.h> 41#include <sys/buf.h> 42#include <sys/conf.h> 43#include <sys/kernel.h> 44#include <sys/limits.h> 45#include <sys/lock.h> 46#include <sys/malloc.h> 47#include <sys/mount.h> 48#include <sys/mutex.h> 49#include <sys/unistd.h> 50#include <sys/vnode.h> 51#include <sys/poll.h> 52 53#include <vm/vm.h> 54#include <vm/vm_object.h> 55#include <vm/vm_extern.h> 56#include <vm/pmap.h> 57#include <vm/vm_map.h> 58#include <vm/vm_page.h> 59#include <vm/vm_pager.h> 60#include <vm/vnode_pager.h> 61 62static int vop_nolookup(struct vop_lookup_args *); 63static int vop_nostrategy(struct vop_strategy_args *); 64 65/* 66 * This vnode table stores what we want to do if the filesystem doesn't 67 * implement a particular VOP. 68 * 69 * If there is no specific entry here, we will return EOPNOTSUPP. 70 * 71 */ 72 73struct vop_vector default_vnodeops = { 74 .vop_default = NULL, 75 .vop_bypass = VOP_EOPNOTSUPP, 76 77 .vop_advlock = VOP_EINVAL, 78 .vop_bmap = vop_stdbmap, 79 .vop_close = VOP_NULL, 80 .vop_createvobject = vop_stdcreatevobject, 81 .vop_destroyvobject = vop_stddestroyvobject, 82 .vop_fsync = VOP_NULL, 83 .vop_getpages = vop_stdgetpages, 84 .vop_getvobject = vop_stdgetvobject, 85 .vop_getwritemount = vop_stdgetwritemount, 86 .vop_inactive = vop_stdinactive, 87 .vop_ioctl = VOP_ENOTTY, 88 .vop_islocked = vop_stdislocked, 89 .vop_lease = VOP_NULL, 90 .vop_lock = vop_stdlock, 91 .vop_lookup = vop_nolookup, 92 .vop_open = VOP_NULL, 93 .vop_pathconf = VOP_EINVAL, 94 .vop_poll = vop_nopoll, 95 .vop_putpages = vop_stdputpages, 96 .vop_readlink = VOP_EINVAL, 97 .vop_revoke = VOP_PANIC, 98 .vop_strategy = vop_nostrategy, 99 .vop_unlock = vop_stdunlock, 100}; 101 102/* 103 * Series of placeholder functions for various error returns for 104 * VOPs. 105 */ 106 107int 108vop_eopnotsupp(struct vop_generic_args *ap) 109{ 110 /* 111 printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name); 112 */ 113 114 return (EOPNOTSUPP); 115} 116 117int 118vop_ebadf(struct vop_generic_args *ap) 119{ 120 121 return (EBADF); 122} 123 124int 125vop_enotty(struct vop_generic_args *ap) 126{ 127 128 return (ENOTTY); 129} 130 131int 132vop_einval(struct vop_generic_args *ap) 133{ 134 135 return (EINVAL); 136} 137 138int 139vop_null(struct vop_generic_args *ap) 140{ 141 142 return (0); 143} 144 145/* 146 * Helper function to panic on some bad VOPs in some filesystems. 147 */ 148int 149vop_panic(struct vop_generic_args *ap) 150{ 151 152 panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name); 153} 154 155/* 156 * vop_std<something> and vop_no<something> are default functions for use by 157 * filesystems that need the "default reasonable" implementation for a 158 * particular operation. 159 * 160 * The documentation for the operations they implement exists (if it exists) 161 * in the VOP_<SOMETHING>(9) manpage (all uppercase). 162 */ 163 164/* 165 * Default vop for filesystems that do not support name lookup 166 */ 167static int 168vop_nolookup(ap) 169 struct vop_lookup_args /* { 170 struct vnode *a_dvp; 171 struct vnode **a_vpp; 172 struct componentname *a_cnp; 173 } */ *ap; 174{ 175 176 *ap->a_vpp = NULL; 177 return (ENOTDIR); 178} 179 180/* 181 * vop_nostrategy: 182 * 183 * Strategy routine for VFS devices that have none. 184 * 185 * BIO_ERROR and B_INVAL must be cleared prior to calling any strategy 186 * routine. Typically this is done for a BIO_READ strategy call. 187 * Typically B_INVAL is assumed to already be clear prior to a write 188 * and should not be cleared manually unless you just made the buffer 189 * invalid. BIO_ERROR should be cleared either way. 190 */ 191 192static int 193vop_nostrategy (struct vop_strategy_args *ap) 194{ 195 printf("No strategy for buffer at %p\n", ap->a_bp); 196 vprint("vnode", ap->a_vp); 197 ap->a_bp->b_ioflags |= BIO_ERROR; 198 ap->a_bp->b_error = EOPNOTSUPP; 199 bufdone(ap->a_bp); 200 return (EOPNOTSUPP); 201} 202 203/* 204 * vop_stdpathconf: 205 * 206 * Standard implementation of POSIX pathconf, to get information about limits 207 * for a filesystem. 208 * Override per filesystem for the case where the filesystem has smaller 209 * limits. 210 */ 211int 212vop_stdpathconf(ap) 213 struct vop_pathconf_args /* { 214 struct vnode *a_vp; 215 int a_name; 216 int *a_retval; 217 } */ *ap; 218{ 219 220 switch (ap->a_name) { 221 case _PC_LINK_MAX: 222 *ap->a_retval = LINK_MAX; 223 return (0); 224 case _PC_MAX_CANON: 225 *ap->a_retval = MAX_CANON; 226 return (0); 227 case _PC_MAX_INPUT: 228 *ap->a_retval = MAX_INPUT; 229 return (0); 230 case _PC_PIPE_BUF: 231 *ap->a_retval = PIPE_BUF; 232 return (0); 233 case _PC_CHOWN_RESTRICTED: 234 *ap->a_retval = 1; 235 return (0); 236 case _PC_VDISABLE: 237 *ap->a_retval = _POSIX_VDISABLE; 238 return (0); 239 default: 240 return (EINVAL); 241 } 242 /* NOTREACHED */ 243} 244 245/* 246 * Standard lock, unlock and islocked functions. 247 */ 248int 249vop_stdlock(ap) 250 struct vop_lock_args /* { 251 struct vnode *a_vp; 252 int a_flags; 253 struct thread *a_td; 254 } */ *ap; 255{ 256 struct vnode *vp = ap->a_vp; 257 258#ifndef DEBUG_LOCKS 259 return (lockmgr(vp->v_vnlock, ap->a_flags, VI_MTX(vp), ap->a_td)); 260#else 261 return (debuglockmgr(vp->v_vnlock, ap->a_flags, VI_MTX(vp), 262 ap->a_td, "vop_stdlock", vp->filename, vp->line)); 263#endif 264} 265 266/* See above. */ 267int 268vop_stdunlock(ap) 269 struct vop_unlock_args /* { 270 struct vnode *a_vp; 271 int a_flags; 272 struct thread *a_td; 273 } */ *ap; 274{ 275 struct vnode *vp = ap->a_vp; 276 277 return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp), 278 ap->a_td)); 279} 280 281/* See above. */ 282int 283vop_stdislocked(ap) 284 struct vop_islocked_args /* { 285 struct vnode *a_vp; 286 struct thread *a_td; 287 } */ *ap; 288{ 289 290 return (lockstatus(ap->a_vp->v_vnlock, ap->a_td)); 291} 292 293/* Mark the vnode inactive */ 294int 295vop_stdinactive(ap) 296 struct vop_inactive_args /* { 297 struct vnode *a_vp; 298 struct thread *a_td; 299 } */ *ap; 300{ 301 302 VOP_UNLOCK(ap->a_vp, 0, ap->a_td); 303 return (0); 304} 305 306/* 307 * Return true for select/poll. 308 */ 309int 310vop_nopoll(ap) 311 struct vop_poll_args /* { 312 struct vnode *a_vp; 313 int a_events; 314 struct ucred *a_cred; 315 struct thread *a_td; 316 } */ *ap; 317{ 318 /* 319 * Return true for read/write. If the user asked for something 320 * special, return POLLNVAL, so that clients have a way of 321 * determining reliably whether or not the extended 322 * functionality is present without hard-coding knowledge 323 * of specific filesystem implementations. 324 * Stay in sync with kern_conf.c::no_poll(). 325 */ 326 if (ap->a_events & ~POLLSTANDARD) 327 return (POLLNVAL); 328 329 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 330} 331 332/* 333 * Implement poll for local filesystems that support it. 334 */ 335int 336vop_stdpoll(ap) 337 struct vop_poll_args /* { 338 struct vnode *a_vp; 339 int a_events; 340 struct ucred *a_cred; 341 struct thread *a_td; 342 } */ *ap; 343{ 344 if (ap->a_events & ~POLLSTANDARD) 345 return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events)); 346 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 347} 348 349/* 350 * Return our mount point, as we will take charge of the writes. 351 */ 352int 353vop_stdgetwritemount(ap) 354 struct vop_getwritemount_args /* { 355 struct vnode *a_vp; 356 struct mount **a_mpp; 357 } */ *ap; 358{ 359 360 *(ap->a_mpp) = ap->a_vp->v_mount; 361 return (0); 362} 363 364/* Create the VM system backing object for this vnode */ 365int 366vop_stdcreatevobject(ap) 367 struct vop_createvobject_args /* { 368 struct vnode *vp; 369 struct ucred *cred; 370 struct thread *td; 371 } */ *ap; 372{ 373 struct vnode *vp = ap->a_vp; 374 struct ucred *cred = ap->a_cred; 375 struct thread *td = ap->a_td; 376 struct vattr vat; 377 vm_object_t object; 378 int error = 0; 379 vm_ooffset_t size; 380 381 if (!vn_isdisk(vp, NULL) && vn_canvmio(vp) == FALSE) 382 return (0); 383 384 while ((object = vp->v_object) != NULL) { 385 VM_OBJECT_LOCK(object); 386 if (!(object->flags & OBJ_DEAD)) { 387 VM_OBJECT_UNLOCK(object); 388 break; 389 } 390 VOP_UNLOCK(vp, 0, td); 391 vm_object_set_flag(object, OBJ_DISCONNECTWNT); 392 msleep(object, VM_OBJECT_MTX(object), PDROP | PVM, "vodead", 0); 393 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 394 } 395 396 if (object == NULL) { 397 if (vn_isdisk(vp, NULL)) { 398 /* 399 * This simply allocates the biggest object possible 400 * for a disk vnode. This should be fixed, but doesn't 401 * cause any problems (yet). 402 */ 403 size = IDX_TO_OFF(INT_MAX); 404 } else { 405 if ((error = VOP_GETATTR(vp, &vat, cred, td)) != 0) 406 return (error); 407 size = vat.va_size; 408 } 409 410 object = vnode_pager_alloc(vp, size, 0, 0); 411 /* 412 * Dereference the reference we just created. This assumes 413 * that the object is associated with the vp. 414 */ 415 VM_OBJECT_LOCK(object); 416 object->ref_count--; 417 VM_OBJECT_UNLOCK(object); 418 vrele(vp); 419 } 420 421 KASSERT(vp->v_object != NULL, ("vop_stdcreatevobject: NULL object")); 422 423 return (error); 424} 425 426/* Destroy the VM system object associated with this vnode */ 427int 428vop_stddestroyvobject(ap) 429 struct vop_destroyvobject_args /* { 430 struct vnode *vp; 431 } */ *ap; 432{ 433 struct vnode *vp = ap->a_vp; 434 vm_object_t obj = vp->v_object; 435 436 if (obj == NULL) 437 return (0); 438 VM_OBJECT_LOCK(obj); 439 if (obj->ref_count == 0) { 440 /* 441 * vclean() may be called twice. The first time 442 * removes the primary reference to the object, 443 * the second time goes one further and is a 444 * special-case to terminate the object. 445 * 446 * don't double-terminate the object 447 */ 448 if ((obj->flags & OBJ_DEAD) == 0) 449 vm_object_terminate(obj); 450 else 451 VM_OBJECT_UNLOCK(obj); 452 } else { 453 /* 454 * Woe to the process that tries to page now :-). 455 */ 456 vm_pager_deallocate(obj); 457 VM_OBJECT_UNLOCK(obj); 458 } 459 return (0); 460} 461 462/* 463 * Return the underlying VM object. This routine may be called with or 464 * without the vnode interlock held. If called without, the returned 465 * object is not guarenteed to be valid. The syncer typically gets the 466 * object without holding the interlock in order to quickly test whether 467 * it might be dirty before going heavy-weight. vm_object's use zalloc 468 * and thus stable-storage, so this is safe. 469 */ 470int 471vop_stdgetvobject(ap) 472 struct vop_getvobject_args /* { 473 struct vnode *vp; 474 struct vm_object **objpp; 475 } */ *ap; 476{ 477 struct vnode *vp = ap->a_vp; 478 struct vm_object **objpp = ap->a_objpp; 479 480 if (objpp) 481 *objpp = vp->v_object; 482 return (vp->v_object ? 0 : EINVAL); 483} 484 485/* XXX Needs good comment and VOP_BMAP(9) manpage */ 486int 487vop_stdbmap(ap) 488 struct vop_bmap_args /* { 489 struct vnode *a_vp; 490 daddr_t a_bn; 491 struct bufobj **a_bop; 492 daddr_t *a_bnp; 493 int *a_runp; 494 int *a_runb; 495 } */ *ap; 496{ 497 498 if (ap->a_bop != NULL) 499 *ap->a_bop = &ap->a_vp->v_bufobj; 500 if (ap->a_bnp != NULL) 501 *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize); 502 if (ap->a_runp != NULL) 503 *ap->a_runp = 0; 504 if (ap->a_runb != NULL) 505 *ap->a_runb = 0; 506 return (0); 507} 508 509int 510vop_stdfsync(ap) 511 struct vop_fsync_args /* { 512 struct vnode *a_vp; 513 struct ucred *a_cred; 514 int a_waitfor; 515 struct thread *a_td; 516 } */ *ap; 517{ 518 struct vnode *vp = ap->a_vp; 519 struct buf *bp; 520 struct bufobj *bo; 521 struct buf *nbp; 522 int s, error = 0; 523 int maxretry = 100; /* large, arbitrarily chosen */ 524 525 VI_LOCK(vp); 526loop1: 527 /* 528 * MARK/SCAN initialization to avoid infinite loops. 529 */ 530 s = splbio(); 531 TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) { 532 bp->b_vflags &= ~BV_SCANNED; 533 bp->b_error = 0; 534 } 535 splx(s); 536 537 /* 538 * Flush all dirty buffers associated with a block device. 539 */ 540loop2: 541 s = splbio(); 542 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { 543 if ((bp->b_vflags & BV_SCANNED) != 0) 544 continue; 545 bp->b_vflags |= BV_SCANNED; 546 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) 547 continue; 548 VI_UNLOCK(vp); 549 if ((bp->b_flags & B_DELWRI) == 0) 550 panic("fsync: not dirty"); 551 if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) { 552 vfs_bio_awrite(bp); 553 splx(s); 554 } else { 555 bremfree(bp); 556 splx(s); 557 bawrite(bp); 558 } 559 VI_LOCK(vp); 560 goto loop2; 561 } 562 563 /* 564 * If synchronous the caller expects us to completely resolve all 565 * dirty buffers in the system. Wait for in-progress I/O to 566 * complete (which could include background bitmap writes), then 567 * retry if dirty blocks still exist. 568 */ 569 if (ap->a_waitfor == MNT_WAIT) { 570 bo = &vp->v_bufobj; 571 bufobj_wwait(bo, 0, 0); 572 if (bo->bo_dirty.bv_cnt > 0) { 573 /* 574 * If we are unable to write any of these buffers 575 * then we fail now rather than trying endlessly 576 * to write them out. 577 */ 578 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) 579 if ((error = bp->b_error) == 0) 580 continue; 581 if (error == 0 && --maxretry >= 0) { 582 splx(s); 583 goto loop1; 584 } 585 vprint("fsync: giving up on dirty", vp); 586 error = EAGAIN; 587 } 588 } 589 VI_UNLOCK(vp); 590 splx(s); 591 592 return (error); 593} 594 595/* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */ 596int 597vop_stdgetpages(ap) 598 struct vop_getpages_args /* { 599 struct vnode *a_vp; 600 vm_page_t *a_m; 601 int a_count; 602 int a_reqpage; 603 vm_ooffset_t a_offset; 604 } */ *ap; 605{ 606 607 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 608 ap->a_count, ap->a_reqpage); 609} 610 611/* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */ 612int 613vop_stdputpages(ap) 614 struct vop_putpages_args /* { 615 struct vnode *a_vp; 616 vm_page_t *a_m; 617 int a_count; 618 int a_sync; 619 int *a_rtvals; 620 vm_ooffset_t a_offset; 621 } */ *ap; 622{ 623 624 return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, 625 ap->a_sync, ap->a_rtvals); 626} 627 628/* 629 * vfs default ops 630 * used to fill the vfs function table to get reasonable default return values. 631 */ 632int 633vfs_stdroot (mp, vpp, td) 634 struct mount *mp; 635 struct vnode **vpp; 636 struct thread *td; 637{ 638 639 return (EOPNOTSUPP); 640} 641 642int 643vfs_stdstatfs (mp, sbp, td) 644 struct mount *mp; 645 struct statfs *sbp; 646 struct thread *td; 647{ 648 649 return (EOPNOTSUPP); 650} 651 652int 653vfs_stdvptofh (vp, fhp) 654 struct vnode *vp; 655 struct fid *fhp; 656{ 657 658 return (EOPNOTSUPP); 659} 660 661int 662vfs_stdstart (mp, flags, td) 663 struct mount *mp; 664 int flags; 665 struct thread *td; 666{ 667 668 return (0); 669} 670 671int 672vfs_stdquotactl (mp, cmds, uid, arg, td) 673 struct mount *mp; 674 int cmds; 675 uid_t uid; 676 caddr_t arg; 677 struct thread *td; 678{ 679 680 return (EOPNOTSUPP); 681} 682 683int 684vfs_stdsync(mp, waitfor, td) 685 struct mount *mp; 686 int waitfor; 687 struct thread *td; 688{ 689 struct vnode *vp, *nvp; 690 int error, lockreq, allerror = 0; 691 692 lockreq = LK_EXCLUSIVE | LK_INTERLOCK; 693 if (waitfor != MNT_WAIT) 694 lockreq |= LK_NOWAIT; 695 /* 696 * Force stale buffer cache information to be flushed. 697 */ 698 MNT_ILOCK(mp); 699loop: 700 MNT_VNODE_FOREACH(vp, mp, nvp) { 701 702 VI_LOCK(vp); 703 if (vp->v_bufobj.bo_dirty.bv_cnt == 0) { 704 VI_UNLOCK(vp); 705 continue; 706 } 707 MNT_IUNLOCK(mp); 708 709 if ((error = vget(vp, lockreq, td)) != 0) { 710 MNT_ILOCK(mp); 711 if (error == ENOENT) 712 goto loop; 713 continue; 714 } 715 error = VOP_FSYNC(vp, waitfor, td); 716 if (error) 717 allerror = error; 718 719 VOP_UNLOCK(vp, 0, td); 720 vrele(vp); 721 MNT_ILOCK(mp); 722 } 723 MNT_IUNLOCK(mp); 724 return (allerror); 725} 726 727int 728vfs_stdnosync (mp, waitfor, td) 729 struct mount *mp; 730 int waitfor; 731 struct thread *td; 732{ 733 734 return (0); 735} 736 737int 738vfs_stdvget (mp, ino, flags, vpp) 739 struct mount *mp; 740 ino_t ino; 741 int flags; 742 struct vnode **vpp; 743{ 744 745 return (EOPNOTSUPP); 746} 747 748int 749vfs_stdfhtovp (mp, fhp, vpp) 750 struct mount *mp; 751 struct fid *fhp; 752 struct vnode **vpp; 753{ 754 755 return (EOPNOTSUPP); 756} 757 758int 759vfs_stdinit (vfsp) 760 struct vfsconf *vfsp; 761{ 762 763 return (0); 764} 765 766int 767vfs_stduninit (vfsp) 768 struct vfsconf *vfsp; 769{ 770 771 return(0); 772} 773 774int 775vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname, td) 776 struct mount *mp; 777 int cmd; 778 struct vnode *filename_vp; 779 int attrnamespace; 780 const char *attrname; 781 struct thread *td; 782{ 783 784 if (filename_vp != NULL) 785 VOP_UNLOCK(filename_vp, 0, td); 786 return (EOPNOTSUPP); 787} 788 789int 790vfs_stdsysctl(mp, op, req) 791 struct mount *mp; 792 fsctlop_t op; 793 struct sysctl_req *req; 794{ 795 796 return (EOPNOTSUPP); 797} 798 799/* end of vfs default ops */ 800