vfs_default.c revision 147388
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed 6 * to Berkeley by John Heidemann of the UCLA Ficus project. 7 * 8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: head/sys/kern/vfs_default.c 147388 2005-06-14 20:32:27Z jeff $"); 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/bio.h> 41#include <sys/buf.h> 42#include <sys/conf.h> 43#include <sys/event.h> 44#include <sys/kernel.h> 45#include <sys/limits.h> 46#include <sys/lock.h> 47#include <sys/malloc.h> 48#include <sys/mount.h> 49#include <sys/mutex.h> 50#include <sys/unistd.h> 51#include <sys/vnode.h> 52#include <sys/poll.h> 53 54#include <vm/vm.h> 55#include <vm/vm_object.h> 56#include <vm/vm_extern.h> 57#include <vm/pmap.h> 58#include <vm/vm_map.h> 59#include <vm/vm_page.h> 60#include <vm/vm_pager.h> 61#include <vm/vnode_pager.h> 62 63static int vop_nolookup(struct vop_lookup_args *); 64static int vop_nostrategy(struct vop_strategy_args *); 65 66/* 67 * This vnode table stores what we want to do if the filesystem doesn't 68 * implement a particular VOP. 69 * 70 * If there is no specific entry here, we will return EOPNOTSUPP. 71 * 72 */ 73 74struct vop_vector default_vnodeops = { 75 .vop_default = NULL, 76 .vop_bypass = VOP_EOPNOTSUPP, 77 78 .vop_advlock = VOP_EINVAL, 79 .vop_bmap = vop_stdbmap, 80 .vop_close = VOP_NULL, 81 .vop_fsync = VOP_NULL, 82 .vop_getpages = vop_stdgetpages, 83 .vop_getwritemount = vop_stdgetwritemount, 84 .vop_inactive = VOP_NULL, 85 .vop_ioctl = VOP_ENOTTY, 86 .vop_kqfilter = vop_stdkqfilter, 87 .vop_islocked = vop_stdislocked, 88 .vop_lease = VOP_NULL, 89 .vop_lock = vop_stdlock, 90 .vop_lookup = vop_nolookup, 91 .vop_open = VOP_NULL, 92 .vop_pathconf = VOP_EINVAL, 93 .vop_poll = vop_nopoll, 94 .vop_putpages = vop_stdputpages, 95 .vop_readlink = VOP_EINVAL, 96 .vop_revoke = VOP_PANIC, 97 .vop_strategy = vop_nostrategy, 98 .vop_unlock = vop_stdunlock, 99}; 100 101/* 102 * Series of placeholder functions for various error returns for 103 * VOPs. 104 */ 105 106int 107vop_eopnotsupp(struct vop_generic_args *ap) 108{ 109 /* 110 printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name); 111 */ 112 113 return (EOPNOTSUPP); 114} 115 116int 117vop_ebadf(struct vop_generic_args *ap) 118{ 119 120 return (EBADF); 121} 122 123int 124vop_enotty(struct vop_generic_args *ap) 125{ 126 127 return (ENOTTY); 128} 129 130int 131vop_einval(struct vop_generic_args *ap) 132{ 133 134 return (EINVAL); 135} 136 137int 138vop_null(struct vop_generic_args *ap) 139{ 140 141 return (0); 142} 143 144/* 145 * Helper function to panic on some bad VOPs in some filesystems. 146 */ 147int 148vop_panic(struct vop_generic_args *ap) 149{ 150 151 panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name); 152} 153 154/* 155 * vop_std<something> and vop_no<something> are default functions for use by 156 * filesystems that need the "default reasonable" implementation for a 157 * particular operation. 158 * 159 * The documentation for the operations they implement exists (if it exists) 160 * in the VOP_<SOMETHING>(9) manpage (all uppercase). 161 */ 162 163/* 164 * Default vop for filesystems that do not support name lookup 165 */ 166static int 167vop_nolookup(ap) 168 struct vop_lookup_args /* { 169 struct vnode *a_dvp; 170 struct vnode **a_vpp; 171 struct componentname *a_cnp; 172 } */ *ap; 173{ 174 175 *ap->a_vpp = NULL; 176 return (ENOTDIR); 177} 178 179/* 180 * vop_nostrategy: 181 * 182 * Strategy routine for VFS devices that have none. 183 * 184 * BIO_ERROR and B_INVAL must be cleared prior to calling any strategy 185 * routine. Typically this is done for a BIO_READ strategy call. 186 * Typically B_INVAL is assumed to already be clear prior to a write 187 * and should not be cleared manually unless you just made the buffer 188 * invalid. BIO_ERROR should be cleared either way. 189 */ 190 191static int 192vop_nostrategy (struct vop_strategy_args *ap) 193{ 194 printf("No strategy for buffer at %p\n", ap->a_bp); 195 vprint("vnode", ap->a_vp); 196 ap->a_bp->b_ioflags |= BIO_ERROR; 197 ap->a_bp->b_error = EOPNOTSUPP; 198 bufdone(ap->a_bp); 199 return (EOPNOTSUPP); 200} 201 202/* 203 * vop_stdpathconf: 204 * 205 * Standard implementation of POSIX pathconf, to get information about limits 206 * for a filesystem. 207 * Override per filesystem for the case where the filesystem has smaller 208 * limits. 209 */ 210int 211vop_stdpathconf(ap) 212 struct vop_pathconf_args /* { 213 struct vnode *a_vp; 214 int a_name; 215 int *a_retval; 216 } */ *ap; 217{ 218 219 switch (ap->a_name) { 220 case _PC_LINK_MAX: 221 *ap->a_retval = LINK_MAX; 222 return (0); 223 case _PC_MAX_CANON: 224 *ap->a_retval = MAX_CANON; 225 return (0); 226 case _PC_MAX_INPUT: 227 *ap->a_retval = MAX_INPUT; 228 return (0); 229 case _PC_PIPE_BUF: 230 *ap->a_retval = PIPE_BUF; 231 return (0); 232 case _PC_CHOWN_RESTRICTED: 233 *ap->a_retval = 1; 234 return (0); 235 case _PC_VDISABLE: 236 *ap->a_retval = _POSIX_VDISABLE; 237 return (0); 238 default: 239 return (EINVAL); 240 } 241 /* NOTREACHED */ 242} 243 244/* 245 * Standard lock, unlock and islocked functions. 246 */ 247int 248vop_stdlock(ap) 249 struct vop_lock_args /* { 250 struct vnode *a_vp; 251 int a_flags; 252 struct thread *a_td; 253 } */ *ap; 254{ 255 struct vnode *vp = ap->a_vp; 256 257#ifndef DEBUG_LOCKS 258 return (lockmgr(vp->v_vnlock, ap->a_flags, VI_MTX(vp), ap->a_td)); 259#else 260 return (debuglockmgr(vp->v_vnlock, ap->a_flags, VI_MTX(vp), 261 ap->a_td, "vop_stdlock", vp->filename, vp->line)); 262#endif 263} 264 265/* See above. */ 266int 267vop_stdunlock(ap) 268 struct vop_unlock_args /* { 269 struct vnode *a_vp; 270 int a_flags; 271 struct thread *a_td; 272 } */ *ap; 273{ 274 struct vnode *vp = ap->a_vp; 275 276 return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp), 277 ap->a_td)); 278} 279 280/* See above. */ 281int 282vop_stdislocked(ap) 283 struct vop_islocked_args /* { 284 struct vnode *a_vp; 285 struct thread *a_td; 286 } */ *ap; 287{ 288 289 return (lockstatus(ap->a_vp->v_vnlock, ap->a_td)); 290} 291 292/* 293 * Return true for select/poll. 294 */ 295int 296vop_nopoll(ap) 297 struct vop_poll_args /* { 298 struct vnode *a_vp; 299 int a_events; 300 struct ucred *a_cred; 301 struct thread *a_td; 302 } */ *ap; 303{ 304 /* 305 * Return true for read/write. If the user asked for something 306 * special, return POLLNVAL, so that clients have a way of 307 * determining reliably whether or not the extended 308 * functionality is present without hard-coding knowledge 309 * of specific filesystem implementations. 310 * Stay in sync with kern_conf.c::no_poll(). 311 */ 312 if (ap->a_events & ~POLLSTANDARD) 313 return (POLLNVAL); 314 315 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 316} 317 318/* 319 * Implement poll for local filesystems that support it. 320 */ 321int 322vop_stdpoll(ap) 323 struct vop_poll_args /* { 324 struct vnode *a_vp; 325 int a_events; 326 struct ucred *a_cred; 327 struct thread *a_td; 328 } */ *ap; 329{ 330 if (ap->a_events & ~POLLSTANDARD) 331 return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events)); 332 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 333} 334 335/* 336 * Return our mount point, as we will take charge of the writes. 337 */ 338int 339vop_stdgetwritemount(ap) 340 struct vop_getwritemount_args /* { 341 struct vnode *a_vp; 342 struct mount **a_mpp; 343 } */ *ap; 344{ 345 346 *(ap->a_mpp) = ap->a_vp->v_mount; 347 return (0); 348} 349 350/* XXX Needs good comment and VOP_BMAP(9) manpage */ 351int 352vop_stdbmap(ap) 353 struct vop_bmap_args /* { 354 struct vnode *a_vp; 355 daddr_t a_bn; 356 struct bufobj **a_bop; 357 daddr_t *a_bnp; 358 int *a_runp; 359 int *a_runb; 360 } */ *ap; 361{ 362 363 if (ap->a_bop != NULL) 364 *ap->a_bop = &ap->a_vp->v_bufobj; 365 if (ap->a_bnp != NULL) 366 *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize); 367 if (ap->a_runp != NULL) 368 *ap->a_runp = 0; 369 if (ap->a_runb != NULL) 370 *ap->a_runb = 0; 371 return (0); 372} 373 374int 375vop_stdfsync(ap) 376 struct vop_fsync_args /* { 377 struct vnode *a_vp; 378 struct ucred *a_cred; 379 int a_waitfor; 380 struct thread *a_td; 381 } */ *ap; 382{ 383 struct vnode *vp = ap->a_vp; 384 struct buf *bp; 385 struct bufobj *bo; 386 struct buf *nbp; 387 int error = 0; 388 int maxretry = 1000; /* large, arbitrarily chosen */ 389 390 VI_LOCK(vp); 391loop1: 392 /* 393 * MARK/SCAN initialization to avoid infinite loops. 394 */ 395 TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) { 396 bp->b_vflags &= ~BV_SCANNED; 397 bp->b_error = 0; 398 } 399 400 /* 401 * Flush all dirty buffers associated with a vnode. 402 */ 403loop2: 404 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { 405 if ((bp->b_vflags & BV_SCANNED) != 0) 406 continue; 407 bp->b_vflags |= BV_SCANNED; 408 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) 409 continue; 410 VI_UNLOCK(vp); 411 KASSERT(bp->b_bufobj == &vp->v_bufobj, 412 ("bp %p wrong b_bufobj %p should be %p", 413 bp, bp->b_bufobj, &vp->v_bufobj)); 414 if ((bp->b_flags & B_DELWRI) == 0) 415 panic("fsync: not dirty"); 416 if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) { 417 vfs_bio_awrite(bp); 418 } else { 419 bremfree(bp); 420 bawrite(bp); 421 } 422 VI_LOCK(vp); 423 goto loop2; 424 } 425 426 /* 427 * If synchronous the caller expects us to completely resolve all 428 * dirty buffers in the system. Wait for in-progress I/O to 429 * complete (which could include background bitmap writes), then 430 * retry if dirty blocks still exist. 431 */ 432 if (ap->a_waitfor == MNT_WAIT) { 433 bo = &vp->v_bufobj; 434 bufobj_wwait(bo, 0, 0); 435 if (bo->bo_dirty.bv_cnt > 0) { 436 /* 437 * If we are unable to write any of these buffers 438 * then we fail now rather than trying endlessly 439 * to write them out. 440 */ 441 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) 442 if ((error = bp->b_error) == 0) 443 continue; 444 if (error == 0 && --maxretry >= 0) 445 goto loop1; 446 error = EAGAIN; 447 } 448 } 449 VI_UNLOCK(vp); 450 if (error == EAGAIN) 451 vprint("fsync: giving up on dirty", vp); 452 453 return (error); 454} 455 456/* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */ 457int 458vop_stdgetpages(ap) 459 struct vop_getpages_args /* { 460 struct vnode *a_vp; 461 vm_page_t *a_m; 462 int a_count; 463 int a_reqpage; 464 vm_ooffset_t a_offset; 465 } */ *ap; 466{ 467 468 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 469 ap->a_count, ap->a_reqpage); 470} 471 472int 473vop_stdkqfilter(struct vop_kqfilter_args *ap) 474{ 475 return vfs_kqfilter(ap); 476} 477 478/* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */ 479int 480vop_stdputpages(ap) 481 struct vop_putpages_args /* { 482 struct vnode *a_vp; 483 vm_page_t *a_m; 484 int a_count; 485 int a_sync; 486 int *a_rtvals; 487 vm_ooffset_t a_offset; 488 } */ *ap; 489{ 490 491 return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, 492 ap->a_sync, ap->a_rtvals); 493} 494 495/* 496 * vfs default ops 497 * used to fill the vfs function table to get reasonable default return values. 498 */ 499int 500vfs_stdroot (mp, flags, vpp, td) 501 struct mount *mp; 502 int flags; 503 struct vnode **vpp; 504 struct thread *td; 505{ 506 507 return (EOPNOTSUPP); 508} 509 510int 511vfs_stdstatfs (mp, sbp, td) 512 struct mount *mp; 513 struct statfs *sbp; 514 struct thread *td; 515{ 516 517 return (EOPNOTSUPP); 518} 519 520int 521vfs_stdvptofh (vp, fhp) 522 struct vnode *vp; 523 struct fid *fhp; 524{ 525 526 return (EOPNOTSUPP); 527} 528 529int 530vfs_stdquotactl (mp, cmds, uid, arg, td) 531 struct mount *mp; 532 int cmds; 533 uid_t uid; 534 caddr_t arg; 535 struct thread *td; 536{ 537 538 return (EOPNOTSUPP); 539} 540 541int 542vfs_stdsync(mp, waitfor, td) 543 struct mount *mp; 544 int waitfor; 545 struct thread *td; 546{ 547 struct vnode *vp, *nvp; 548 int error, lockreq, allerror = 0; 549 550 lockreq = LK_EXCLUSIVE | LK_INTERLOCK; 551 if (waitfor != MNT_WAIT) 552 lockreq |= LK_NOWAIT; 553 /* 554 * Force stale buffer cache information to be flushed. 555 */ 556 MNT_ILOCK(mp); 557loop: 558 MNT_VNODE_FOREACH(vp, mp, nvp) { 559 560 VI_LOCK(vp); 561 if (vp->v_bufobj.bo_dirty.bv_cnt == 0) { 562 VI_UNLOCK(vp); 563 continue; 564 } 565 MNT_IUNLOCK(mp); 566 567 if ((error = vget(vp, lockreq, td)) != 0) { 568 MNT_ILOCK(mp); 569 if (error == ENOENT) 570 goto loop; 571 continue; 572 } 573 error = VOP_FSYNC(vp, waitfor, td); 574 if (error) 575 allerror = error; 576 577 VOP_UNLOCK(vp, 0, td); 578 vrele(vp); 579 MNT_ILOCK(mp); 580 } 581 MNT_IUNLOCK(mp); 582 return (allerror); 583} 584 585int 586vfs_stdnosync (mp, waitfor, td) 587 struct mount *mp; 588 int waitfor; 589 struct thread *td; 590{ 591 592 return (0); 593} 594 595int 596vfs_stdvget (mp, ino, flags, vpp) 597 struct mount *mp; 598 ino_t ino; 599 int flags; 600 struct vnode **vpp; 601{ 602 603 return (EOPNOTSUPP); 604} 605 606int 607vfs_stdfhtovp (mp, fhp, vpp) 608 struct mount *mp; 609 struct fid *fhp; 610 struct vnode **vpp; 611{ 612 613 return (EOPNOTSUPP); 614} 615 616int 617vfs_stdinit (vfsp) 618 struct vfsconf *vfsp; 619{ 620 621 return (0); 622} 623 624int 625vfs_stduninit (vfsp) 626 struct vfsconf *vfsp; 627{ 628 629 return(0); 630} 631 632int 633vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname, td) 634 struct mount *mp; 635 int cmd; 636 struct vnode *filename_vp; 637 int attrnamespace; 638 const char *attrname; 639 struct thread *td; 640{ 641 642 if (filename_vp != NULL) 643 VOP_UNLOCK(filename_vp, 0, td); 644 return (EOPNOTSUPP); 645} 646 647int 648vfs_stdsysctl(mp, op, req) 649 struct mount *mp; 650 fsctlop_t op; 651 struct sysctl_req *req; 652{ 653 654 return (EOPNOTSUPP); 655} 656 657/* end of vfs default ops */ 658