vfs_default.c revision 189450
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed 6 * to Berkeley by John Heidemann of the UCLA Ficus project. 7 * 8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: head/sys/kern/vfs_default.c 189450 2009-03-06 15:35:37Z kib $"); 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/bio.h> 41#include <sys/buf.h> 42#include <sys/conf.h> 43#include <sys/event.h> 44#include <sys/kernel.h> 45#include <sys/limits.h> 46#include <sys/lock.h> 47#include <sys/lockf.h> 48#include <sys/malloc.h> 49#include <sys/mount.h> 50#include <sys/mutex.h> 51#include <sys/unistd.h> 52#include <sys/vnode.h> 53#include <sys/poll.h> 54 55#include <vm/vm.h> 56#include <vm/vm_object.h> 57#include <vm/vm_extern.h> 58#include <vm/pmap.h> 59#include <vm/vm_map.h> 60#include <vm/vm_page.h> 61#include <vm/vm_pager.h> 62#include <vm/vnode_pager.h> 63 64static int vop_nolookup(struct vop_lookup_args *); 65static int vop_nostrategy(struct vop_strategy_args *); 66 67/* 68 * This vnode table stores what we want to do if the filesystem doesn't 69 * implement a particular VOP. 70 * 71 * If there is no specific entry here, we will return EOPNOTSUPP. 72 * 73 */ 74 75struct vop_vector default_vnodeops = { 76 .vop_default = NULL, 77 .vop_bypass = VOP_EOPNOTSUPP, 78 79 .vop_advlock = vop_stdadvlock, 80 .vop_advlockasync = vop_stdadvlockasync, 81 .vop_bmap = vop_stdbmap, 82 .vop_close = VOP_NULL, 83 .vop_fsync = VOP_NULL, 84 .vop_getpages = vop_stdgetpages, 85 .vop_getwritemount = vop_stdgetwritemount, 86 .vop_inactive = VOP_NULL, 87 .vop_ioctl = VOP_ENOTTY, 88 .vop_kqfilter = vop_stdkqfilter, 89 .vop_islocked = vop_stdislocked, 90 .vop_lease = VOP_NULL, 91 .vop_lock1 = vop_stdlock, 92 .vop_lookup = vop_nolookup, 93 .vop_open = VOP_NULL, 94 .vop_pathconf = VOP_EINVAL, 95 .vop_poll = vop_nopoll, 96 .vop_putpages = vop_stdputpages, 97 .vop_readlink = VOP_EINVAL, 98 .vop_revoke = VOP_PANIC, 99 .vop_strategy = vop_nostrategy, 100 .vop_unlock = vop_stdunlock, 101 .vop_vptocnp = VOP_ENOENT, 102 .vop_vptofh = vop_stdvptofh, 103}; 104 105/* 106 * Series of placeholder functions for various error returns for 107 * VOPs. 108 */ 109 110int 111vop_eopnotsupp(struct vop_generic_args *ap) 112{ 113 /* 114 printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name); 115 */ 116 117 return (EOPNOTSUPP); 118} 119 120int 121vop_ebadf(struct vop_generic_args *ap) 122{ 123 124 return (EBADF); 125} 126 127int 128vop_enotty(struct vop_generic_args *ap) 129{ 130 131 return (ENOTTY); 132} 133 134int 135vop_einval(struct vop_generic_args *ap) 136{ 137 138 return (EINVAL); 139} 140 141int 142vop_enoent(struct vop_generic_args *ap) 143{ 144 145 return (ENOENT); 146} 147 148int 149vop_null(struct vop_generic_args *ap) 150{ 151 152 return (0); 153} 154 155/* 156 * Helper function to panic on some bad VOPs in some filesystems. 157 */ 158int 159vop_panic(struct vop_generic_args *ap) 160{ 161 162 panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name); 163} 164 165/* 166 * vop_std<something> and vop_no<something> are default functions for use by 167 * filesystems that need the "default reasonable" implementation for a 168 * particular operation. 169 * 170 * The documentation for the operations they implement exists (if it exists) 171 * in the VOP_<SOMETHING>(9) manpage (all uppercase). 172 */ 173 174/* 175 * Default vop for filesystems that do not support name lookup 176 */ 177static int 178vop_nolookup(ap) 179 struct vop_lookup_args /* { 180 struct vnode *a_dvp; 181 struct vnode **a_vpp; 182 struct componentname *a_cnp; 183 } */ *ap; 184{ 185 186 *ap->a_vpp = NULL; 187 return (ENOTDIR); 188} 189 190/* 191 * vop_nostrategy: 192 * 193 * Strategy routine for VFS devices that have none. 194 * 195 * BIO_ERROR and B_INVAL must be cleared prior to calling any strategy 196 * routine. Typically this is done for a BIO_READ strategy call. 197 * Typically B_INVAL is assumed to already be clear prior to a write 198 * and should not be cleared manually unless you just made the buffer 199 * invalid. BIO_ERROR should be cleared either way. 200 */ 201 202static int 203vop_nostrategy (struct vop_strategy_args *ap) 204{ 205 printf("No strategy for buffer at %p\n", ap->a_bp); 206 vprint("vnode", ap->a_vp); 207 ap->a_bp->b_ioflags |= BIO_ERROR; 208 ap->a_bp->b_error = EOPNOTSUPP; 209 bufdone(ap->a_bp); 210 return (EOPNOTSUPP); 211} 212 213/* 214 * Advisory record locking support 215 */ 216int 217vop_stdadvlock(struct vop_advlock_args *ap) 218{ 219 struct vnode *vp; 220 struct ucred *cred; 221 struct vattr vattr; 222 int error; 223 224 vp = ap->a_vp; 225 cred = curthread->td_ucred; 226 vn_lock(vp, LK_SHARED | LK_RETRY); 227 error = VOP_GETATTR(vp, &vattr, cred); 228 VOP_UNLOCK(vp, 0); 229 if (error) 230 return (error); 231 232 return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size)); 233} 234 235int 236vop_stdadvlockasync(struct vop_advlockasync_args *ap) 237{ 238 struct vnode *vp; 239 struct ucred *cred; 240 struct vattr vattr; 241 int error; 242 243 vp = ap->a_vp; 244 cred = curthread->td_ucred; 245 vn_lock(vp, LK_SHARED | LK_RETRY); 246 error = VOP_GETATTR(vp, &vattr, cred); 247 VOP_UNLOCK(vp, 0); 248 if (error) 249 return (error); 250 251 return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size)); 252} 253 254/* 255 * vop_stdpathconf: 256 * 257 * Standard implementation of POSIX pathconf, to get information about limits 258 * for a filesystem. 259 * Override per filesystem for the case where the filesystem has smaller 260 * limits. 261 */ 262int 263vop_stdpathconf(ap) 264 struct vop_pathconf_args /* { 265 struct vnode *a_vp; 266 int a_name; 267 int *a_retval; 268 } */ *ap; 269{ 270 271 switch (ap->a_name) { 272 case _PC_NAME_MAX: 273 *ap->a_retval = NAME_MAX; 274 return (0); 275 case _PC_PATH_MAX: 276 *ap->a_retval = PATH_MAX; 277 return (0); 278 case _PC_LINK_MAX: 279 *ap->a_retval = LINK_MAX; 280 return (0); 281 case _PC_MAX_CANON: 282 *ap->a_retval = MAX_CANON; 283 return (0); 284 case _PC_MAX_INPUT: 285 *ap->a_retval = MAX_INPUT; 286 return (0); 287 case _PC_PIPE_BUF: 288 *ap->a_retval = PIPE_BUF; 289 return (0); 290 case _PC_CHOWN_RESTRICTED: 291 *ap->a_retval = 1; 292 return (0); 293 case _PC_VDISABLE: 294 *ap->a_retval = _POSIX_VDISABLE; 295 return (0); 296 default: 297 return (EINVAL); 298 } 299 /* NOTREACHED */ 300} 301 302/* 303 * Standard lock, unlock and islocked functions. 304 */ 305int 306vop_stdlock(ap) 307 struct vop_lock1_args /* { 308 struct vnode *a_vp; 309 int a_flags; 310 char *file; 311 int line; 312 } */ *ap; 313{ 314 struct vnode *vp = ap->a_vp; 315 316 return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp), 317 LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file, 318 ap->a_line)); 319} 320 321/* See above. */ 322int 323vop_stdunlock(ap) 324 struct vop_unlock_args /* { 325 struct vnode *a_vp; 326 int a_flags; 327 } */ *ap; 328{ 329 struct vnode *vp = ap->a_vp; 330 331 return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp))); 332} 333 334/* See above. */ 335int 336vop_stdislocked(ap) 337 struct vop_islocked_args /* { 338 struct vnode *a_vp; 339 } */ *ap; 340{ 341 342 return (lockstatus(ap->a_vp->v_vnlock)); 343} 344 345/* 346 * Return true for select/poll. 347 */ 348int 349vop_nopoll(ap) 350 struct vop_poll_args /* { 351 struct vnode *a_vp; 352 int a_events; 353 struct ucred *a_cred; 354 struct thread *a_td; 355 } */ *ap; 356{ 357 358 return (poll_no_poll(ap->a_events)); 359} 360 361/* 362 * Implement poll for local filesystems that support it. 363 */ 364int 365vop_stdpoll(ap) 366 struct vop_poll_args /* { 367 struct vnode *a_vp; 368 int a_events; 369 struct ucred *a_cred; 370 struct thread *a_td; 371 } */ *ap; 372{ 373 if (ap->a_events & ~POLLSTANDARD) 374 return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events)); 375 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 376} 377 378/* 379 * Return our mount point, as we will take charge of the writes. 380 */ 381int 382vop_stdgetwritemount(ap) 383 struct vop_getwritemount_args /* { 384 struct vnode *a_vp; 385 struct mount **a_mpp; 386 } */ *ap; 387{ 388 struct mount *mp; 389 390 /* 391 * XXX Since this is called unlocked we may be recycled while 392 * attempting to ref the mount. If this is the case or mountpoint 393 * will be set to NULL. We only have to prevent this call from 394 * returning with a ref to an incorrect mountpoint. It is not 395 * harmful to return with a ref to our previous mountpoint. 396 */ 397 mp = ap->a_vp->v_mount; 398 if (mp != NULL) { 399 vfs_ref(mp); 400 if (mp != ap->a_vp->v_mount) { 401 vfs_rel(mp); 402 mp = NULL; 403 } 404 } 405 *(ap->a_mpp) = mp; 406 return (0); 407} 408 409/* XXX Needs good comment and VOP_BMAP(9) manpage */ 410int 411vop_stdbmap(ap) 412 struct vop_bmap_args /* { 413 struct vnode *a_vp; 414 daddr_t a_bn; 415 struct bufobj **a_bop; 416 daddr_t *a_bnp; 417 int *a_runp; 418 int *a_runb; 419 } */ *ap; 420{ 421 422 if (ap->a_bop != NULL) 423 *ap->a_bop = &ap->a_vp->v_bufobj; 424 if (ap->a_bnp != NULL) 425 *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize); 426 if (ap->a_runp != NULL) 427 *ap->a_runp = 0; 428 if (ap->a_runb != NULL) 429 *ap->a_runb = 0; 430 return (0); 431} 432 433int 434vop_stdfsync(ap) 435 struct vop_fsync_args /* { 436 struct vnode *a_vp; 437 struct ucred *a_cred; 438 int a_waitfor; 439 struct thread *a_td; 440 } */ *ap; 441{ 442 struct vnode *vp = ap->a_vp; 443 struct buf *bp; 444 struct bufobj *bo; 445 struct buf *nbp; 446 int error = 0; 447 int maxretry = 1000; /* large, arbitrarily chosen */ 448 449 bo = &vp->v_bufobj; 450 BO_LOCK(bo); 451loop1: 452 /* 453 * MARK/SCAN initialization to avoid infinite loops. 454 */ 455 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) { 456 bp->b_vflags &= ~BV_SCANNED; 457 bp->b_error = 0; 458 } 459 460 /* 461 * Flush all dirty buffers associated with a vnode. 462 */ 463loop2: 464 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 465 if ((bp->b_vflags & BV_SCANNED) != 0) 466 continue; 467 bp->b_vflags |= BV_SCANNED; 468 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) 469 continue; 470 BO_UNLOCK(bo); 471 KASSERT(bp->b_bufobj == bo, 472 ("bp %p wrong b_bufobj %p should be %p", 473 bp, bp->b_bufobj, bo)); 474 if ((bp->b_flags & B_DELWRI) == 0) 475 panic("fsync: not dirty"); 476 if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) { 477 vfs_bio_awrite(bp); 478 } else { 479 bremfree(bp); 480 bawrite(bp); 481 } 482 BO_LOCK(bo); 483 goto loop2; 484 } 485 486 /* 487 * If synchronous the caller expects us to completely resolve all 488 * dirty buffers in the system. Wait for in-progress I/O to 489 * complete (which could include background bitmap writes), then 490 * retry if dirty blocks still exist. 491 */ 492 if (ap->a_waitfor == MNT_WAIT) { 493 bufobj_wwait(bo, 0, 0); 494 if (bo->bo_dirty.bv_cnt > 0) { 495 /* 496 * If we are unable to write any of these buffers 497 * then we fail now rather than trying endlessly 498 * to write them out. 499 */ 500 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) 501 if ((error = bp->b_error) == 0) 502 continue; 503 if (error == 0 && --maxretry >= 0) 504 goto loop1; 505 error = EAGAIN; 506 } 507 } 508 BO_UNLOCK(bo); 509 if (error == EAGAIN) 510 vprint("fsync: giving up on dirty", vp); 511 512 return (error); 513} 514 515/* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */ 516int 517vop_stdgetpages(ap) 518 struct vop_getpages_args /* { 519 struct vnode *a_vp; 520 vm_page_t *a_m; 521 int a_count; 522 int a_reqpage; 523 vm_ooffset_t a_offset; 524 } */ *ap; 525{ 526 527 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 528 ap->a_count, ap->a_reqpage); 529} 530 531int 532vop_stdkqfilter(struct vop_kqfilter_args *ap) 533{ 534 return vfs_kqfilter(ap); 535} 536 537/* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */ 538int 539vop_stdputpages(ap) 540 struct vop_putpages_args /* { 541 struct vnode *a_vp; 542 vm_page_t *a_m; 543 int a_count; 544 int a_sync; 545 int *a_rtvals; 546 vm_ooffset_t a_offset; 547 } */ *ap; 548{ 549 550 return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, 551 ap->a_sync, ap->a_rtvals); 552} 553 554int 555vop_stdvptofh(struct vop_vptofh_args *ap) 556{ 557 return (EOPNOTSUPP); 558} 559 560/* 561 * vfs default ops 562 * used to fill the vfs function table to get reasonable default return values. 563 */ 564int 565vfs_stdroot (mp, flags, vpp, td) 566 struct mount *mp; 567 int flags; 568 struct vnode **vpp; 569 struct thread *td; 570{ 571 572 return (EOPNOTSUPP); 573} 574 575int 576vfs_stdstatfs (mp, sbp, td) 577 struct mount *mp; 578 struct statfs *sbp; 579 struct thread *td; 580{ 581 582 return (EOPNOTSUPP); 583} 584 585int 586vfs_stdquotactl (mp, cmds, uid, arg, td) 587 struct mount *mp; 588 int cmds; 589 uid_t uid; 590 void *arg; 591 struct thread *td; 592{ 593 594 return (EOPNOTSUPP); 595} 596 597int 598vfs_stdsync(mp, waitfor, td) 599 struct mount *mp; 600 int waitfor; 601 struct thread *td; 602{ 603 struct vnode *vp, *mvp; 604 int error, lockreq, allerror = 0; 605 606 lockreq = LK_EXCLUSIVE | LK_INTERLOCK; 607 if (waitfor != MNT_WAIT) 608 lockreq |= LK_NOWAIT; 609 /* 610 * Force stale buffer cache information to be flushed. 611 */ 612 MNT_ILOCK(mp); 613loop: 614 MNT_VNODE_FOREACH(vp, mp, mvp) { 615 /* bv_cnt is an acceptable race here. */ 616 if (vp->v_bufobj.bo_dirty.bv_cnt == 0) 617 continue; 618 VI_LOCK(vp); 619 MNT_IUNLOCK(mp); 620 if ((error = vget(vp, lockreq, td)) != 0) { 621 MNT_ILOCK(mp); 622 if (error == ENOENT) { 623 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); 624 goto loop; 625 } 626 continue; 627 } 628 error = VOP_FSYNC(vp, waitfor, td); 629 if (error) 630 allerror = error; 631 632 /* Do not turn this into vput. td is not always curthread. */ 633 VOP_UNLOCK(vp, 0); 634 vrele(vp); 635 MNT_ILOCK(mp); 636 } 637 MNT_IUNLOCK(mp); 638 return (allerror); 639} 640 641int 642vfs_stdnosync (mp, waitfor, td) 643 struct mount *mp; 644 int waitfor; 645 struct thread *td; 646{ 647 648 return (0); 649} 650 651int 652vfs_stdvget (mp, ino, flags, vpp) 653 struct mount *mp; 654 ino_t ino; 655 int flags; 656 struct vnode **vpp; 657{ 658 659 return (EOPNOTSUPP); 660} 661 662int 663vfs_stdfhtovp (mp, fhp, vpp) 664 struct mount *mp; 665 struct fid *fhp; 666 struct vnode **vpp; 667{ 668 669 return (EOPNOTSUPP); 670} 671 672int 673vfs_stdinit (vfsp) 674 struct vfsconf *vfsp; 675{ 676 677 return (0); 678} 679 680int 681vfs_stduninit (vfsp) 682 struct vfsconf *vfsp; 683{ 684 685 return(0); 686} 687 688int 689vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname, td) 690 struct mount *mp; 691 int cmd; 692 struct vnode *filename_vp; 693 int attrnamespace; 694 const char *attrname; 695 struct thread *td; 696{ 697 698 if (filename_vp != NULL) 699 VOP_UNLOCK(filename_vp, 0); 700 return (EOPNOTSUPP); 701} 702 703int 704vfs_stdsysctl(mp, op, req) 705 struct mount *mp; 706 fsctlop_t op; 707 struct sysctl_req *req; 708{ 709 710 return (EOPNOTSUPP); 711} 712 713/* end of vfs default ops */ 714