1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed 6 * to Berkeley by John Heidemann of the UCLA Ficus project. 7 * 8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: stable/10/sys/kern/vfs_default.c 330266 2018-03-02 04:43:07Z mckusick $"); 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/bio.h> 41#include <sys/buf.h> 42#include <sys/conf.h> 43#include <sys/event.h> 44#include <sys/kernel.h> 45#include <sys/limits.h> 46#include <sys/lock.h> 47#include <sys/lockf.h> 48#include <sys/malloc.h> 49#include <sys/mount.h> 50#include <sys/namei.h> 51#include <sys/rwlock.h> 52#include <sys/fcntl.h> 53#include <sys/unistd.h> 54#include <sys/vnode.h> 55#include <sys/dirent.h> 56#include <sys/poll.h> 57 58#include <security/mac/mac_framework.h> 59 60#include <vm/vm.h> 61#include <vm/vm_object.h> 62#include <vm/vm_extern.h> 63#include <vm/pmap.h> 64#include <vm/vm_map.h> 65#include <vm/vm_page.h> 66#include <vm/vm_pager.h> 67#include <vm/vnode_pager.h> 68 69static int vop_nolookup(struct vop_lookup_args *); 70static int vop_norename(struct vop_rename_args *); 71static int vop_nostrategy(struct vop_strategy_args *); 72static int get_next_dirent(struct vnode *vp, struct dirent **dpp, 73 char *dirbuf, int dirbuflen, off_t *off, 74 char **cpos, int *len, int *eofflag, 75 struct thread *td); 76static int dirent_exists(struct vnode *vp, const char *dirname, 77 struct thread *td); 78 79#define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4) 80 81static int vop_stdis_text(struct vop_is_text_args *ap); 82static int vop_stdset_text(struct vop_set_text_args *ap); 83static int vop_stdunset_text(struct vop_unset_text_args *ap); 84static int vop_stdget_writecount(struct vop_get_writecount_args *ap); 85static int vop_stdadd_writecount(struct vop_add_writecount_args *ap); 86 87/* 88 * This vnode table stores what we want to do if the filesystem doesn't 89 * implement a particular VOP. 90 * 91 * If there is no specific entry here, we will return EOPNOTSUPP. 92 * 93 * Note that every filesystem has to implement either vop_access 94 * or vop_accessx; failing to do so will result in immediate crash 95 * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(), 96 * which calls vop_stdaccess() etc. 97 */ 98 99struct vop_vector default_vnodeops = { 100 .vop_default = NULL, 101 .vop_bypass = VOP_EOPNOTSUPP, 102 103 .vop_access = vop_stdaccess, 104 .vop_accessx = vop_stdaccessx, 105 .vop_advise = vop_stdadvise, 106 .vop_advlock = vop_stdadvlock, 107 .vop_advlockasync = vop_stdadvlockasync, 108 .vop_advlockpurge = vop_stdadvlockpurge, 109 .vop_allocate = vop_stdallocate, 110 .vop_bmap = vop_stdbmap, 111 .vop_close = VOP_NULL, 112 .vop_fsync = VOP_NULL, 113 .vop_getpages = vop_stdgetpages, 114 .vop_getwritemount = vop_stdgetwritemount, 115 .vop_inactive = VOP_NULL, 116 .vop_ioctl = VOP_ENOTTY, 117 .vop_kqfilter = vop_stdkqfilter, 118 .vop_islocked = vop_stdislocked, 119 .vop_lock1 = vop_stdlock, 120 .vop_lookup = vop_nolookup, 121 .vop_open = VOP_NULL, 122 .vop_pathconf = VOP_EINVAL, 123 .vop_poll = vop_nopoll, 124 .vop_putpages = vop_stdputpages, 125 .vop_readlink = VOP_EINVAL, 126 .vop_rename = vop_norename, 127 .vop_revoke = VOP_PANIC, 128 .vop_strategy = vop_nostrategy, 129 .vop_unlock = vop_stdunlock, 130 .vop_vptocnp = vop_stdvptocnp, 131 .vop_vptofh = vop_stdvptofh, 132 .vop_unp_bind = vop_stdunp_bind, 133 .vop_unp_connect = vop_stdunp_connect, 134 .vop_unp_detach = vop_stdunp_detach, 135 .vop_is_text = vop_stdis_text, 136 .vop_set_text = vop_stdset_text, 137 .vop_unset_text = vop_stdunset_text, 138 .vop_get_writecount = vop_stdget_writecount, 139 .vop_add_writecount = vop_stdadd_writecount, 140}; 141 142/* 143 * Series of placeholder functions for various error returns for 144 * VOPs. 145 */ 146 147int 148vop_eopnotsupp(struct vop_generic_args *ap) 149{ 150 /* 151 printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name); 152 */ 153 154 return (EOPNOTSUPP); 155} 156 157int 158vop_ebadf(struct vop_generic_args *ap) 159{ 160 161 return (EBADF); 162} 163 164int 165vop_enotty(struct vop_generic_args *ap) 166{ 167 168 return (ENOTTY); 169} 170 171int 172vop_einval(struct vop_generic_args *ap) 173{ 174 175 return (EINVAL); 176} 177 178int 179vop_enoent(struct vop_generic_args *ap) 180{ 181 182 return (ENOENT); 183} 184 185int 186vop_null(struct vop_generic_args *ap) 187{ 188 189 return (0); 190} 191 192/* 193 * Helper function to panic on some bad VOPs in some filesystems. 194 */ 195int 196vop_panic(struct vop_generic_args *ap) 197{ 198 199 panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name); 200} 201 202/* 203 * vop_std<something> and vop_no<something> are default functions for use by 204 * filesystems that need the "default reasonable" implementation for a 205 * particular operation. 206 * 207 * The documentation for the operations they implement exists (if it exists) 208 * in the VOP_<SOMETHING>(9) manpage (all uppercase). 209 */ 210 211/* 212 * Default vop for filesystems that do not support name lookup 213 */ 214static int 215vop_nolookup(ap) 216 struct vop_lookup_args /* { 217 struct vnode *a_dvp; 218 struct vnode **a_vpp; 219 struct componentname *a_cnp; 220 } */ *ap; 221{ 222 223 *ap->a_vpp = NULL; 224 return (ENOTDIR); 225} 226 227/* 228 * vop_norename: 229 * 230 * Handle unlock and reference counting for arguments of vop_rename 231 * for filesystems that do not implement rename operation. 232 */ 233static int 234vop_norename(struct vop_rename_args *ap) 235{ 236 237 vop_rename_fail(ap); 238 return (EOPNOTSUPP); 239} 240 241/* 242 * vop_nostrategy: 243 * 244 * Strategy routine for VFS devices that have none. 245 * 246 * BIO_ERROR and B_INVAL must be cleared prior to calling any strategy 247 * routine. Typically this is done for a BIO_READ strategy call. 248 * Typically B_INVAL is assumed to already be clear prior to a write 249 * and should not be cleared manually unless you just made the buffer 250 * invalid. BIO_ERROR should be cleared either way. 251 */ 252 253static int 254vop_nostrategy (struct vop_strategy_args *ap) 255{ 256 printf("No strategy for buffer at %p\n", ap->a_bp); 257 vprint("vnode", ap->a_vp); 258 ap->a_bp->b_ioflags |= BIO_ERROR; 259 ap->a_bp->b_error = EOPNOTSUPP; 260 bufdone(ap->a_bp); 261 return (EOPNOTSUPP); 262} 263 264static int 265get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf, 266 int dirbuflen, off_t *off, char **cpos, int *len, 267 int *eofflag, struct thread *td) 268{ 269 int error, reclen; 270 struct uio uio; 271 struct iovec iov; 272 struct dirent *dp; 273 274 KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); 275 KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); 276 277 if (*len == 0) { 278 iov.iov_base = dirbuf; 279 iov.iov_len = dirbuflen; 280 281 uio.uio_iov = &iov; 282 uio.uio_iovcnt = 1; 283 uio.uio_offset = *off; 284 uio.uio_resid = dirbuflen; 285 uio.uio_segflg = UIO_SYSSPACE; 286 uio.uio_rw = UIO_READ; 287 uio.uio_td = td; 288 289 *eofflag = 0; 290 291#ifdef MAC 292 error = mac_vnode_check_readdir(td->td_ucred, vp); 293 if (error == 0) 294#endif 295 error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag, 296 NULL, NULL); 297 if (error) 298 return (error); 299 300 *off = uio.uio_offset; 301 302 *cpos = dirbuf; 303 *len = (dirbuflen - uio.uio_resid); 304 305 if (*len == 0) 306 return (ENOENT); 307 } 308 309 dp = (struct dirent *)(*cpos); 310 reclen = dp->d_reclen; 311 *dpp = dp; 312 313 /* check for malformed directory.. */ 314 if (reclen < DIRENT_MINSIZE) 315 return (EINVAL); 316 317 *cpos += reclen; 318 *len -= reclen; 319 320 return (0); 321} 322 323/* 324 * Check if a named file exists in a given directory vnode. 325 */ 326static int 327dirent_exists(struct vnode *vp, const char *dirname, struct thread *td) 328{ 329 char *dirbuf, *cpos; 330 int error, eofflag, dirbuflen, len, found; 331 off_t off; 332 struct dirent *dp; 333 struct vattr va; 334 335 KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); 336 KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); 337 338 found = 0; 339 340 error = VOP_GETATTR(vp, &va, td->td_ucred); 341 if (error) 342 return (found); 343 344 dirbuflen = DEV_BSIZE; 345 if (dirbuflen < va.va_blocksize) 346 dirbuflen = va.va_blocksize; 347 dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); 348 349 off = 0; 350 len = 0; 351 do { 352 error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off, 353 &cpos, &len, &eofflag, td); 354 if (error) 355 goto out; 356 357 if (dp->d_type != DT_WHT && dp->d_fileno != 0 && 358 strcmp(dp->d_name, dirname) == 0) { 359 found = 1; 360 goto out; 361 } 362 } while (len > 0 || !eofflag); 363 364out: 365 free(dirbuf, M_TEMP); 366 return (found); 367} 368 369int 370vop_stdaccess(struct vop_access_args *ap) 371{ 372 373 KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | 374 VAPPEND)) == 0, ("invalid bit in accmode")); 375 376 return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td)); 377} 378 379int 380vop_stdaccessx(struct vop_accessx_args *ap) 381{ 382 int error; 383 accmode_t accmode = ap->a_accmode; 384 385 error = vfs_unixify_accmode(&accmode); 386 if (error != 0) 387 return (error); 388 389 if (accmode == 0) 390 return (0); 391 392 return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td)); 393} 394 395/* 396 * Advisory record locking support 397 */ 398int 399vop_stdadvlock(struct vop_advlock_args *ap) 400{ 401 struct vnode *vp; 402 struct vattr vattr; 403 int error; 404 405 vp = ap->a_vp; 406 if (ap->a_fl->l_whence == SEEK_END) { 407 /* 408 * The NFSv4 server must avoid doing a vn_lock() here, since it 409 * can deadlock the nfsd threads, due to a LOR. Fortunately 410 * the NFSv4 server always uses SEEK_SET and this code is 411 * only required for the SEEK_END case. 412 */ 413 vn_lock(vp, LK_SHARED | LK_RETRY); 414 error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); 415 VOP_UNLOCK(vp, 0); 416 if (error) 417 return (error); 418 } else 419 vattr.va_size = 0; 420 421 return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size)); 422} 423 424int 425vop_stdadvlockasync(struct vop_advlockasync_args *ap) 426{ 427 struct vnode *vp; 428 struct vattr vattr; 429 int error; 430 431 vp = ap->a_vp; 432 if (ap->a_fl->l_whence == SEEK_END) { 433 /* The size argument is only needed for SEEK_END. */ 434 vn_lock(vp, LK_SHARED | LK_RETRY); 435 error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); 436 VOP_UNLOCK(vp, 0); 437 if (error) 438 return (error); 439 } else 440 vattr.va_size = 0; 441 442 return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size)); 443} 444 445int 446vop_stdadvlockpurge(struct vop_advlockpurge_args *ap) 447{ 448 struct vnode *vp; 449 450 vp = ap->a_vp; 451 lf_purgelocks(vp, &vp->v_lockf); 452 return (0); 453} 454 455/* 456 * vop_stdpathconf: 457 * 458 * Standard implementation of POSIX pathconf, to get information about limits 459 * for a filesystem. 460 * Override per filesystem for the case where the filesystem has smaller 461 * limits. 462 */ 463int 464vop_stdpathconf(ap) 465 struct vop_pathconf_args /* { 466 struct vnode *a_vp; 467 int a_name; 468 int *a_retval; 469 } */ *ap; 470{ 471 472 switch (ap->a_name) { 473 case _PC_NAME_MAX: 474 *ap->a_retval = NAME_MAX; 475 return (0); 476 case _PC_PATH_MAX: 477 *ap->a_retval = PATH_MAX; 478 return (0); 479 case _PC_LINK_MAX: 480 *ap->a_retval = LINK_MAX; 481 return (0); 482 case _PC_MAX_CANON: 483 *ap->a_retval = MAX_CANON; 484 return (0); 485 case _PC_MAX_INPUT: 486 *ap->a_retval = MAX_INPUT; 487 return (0); 488 case _PC_PIPE_BUF: 489 *ap->a_retval = PIPE_BUF; 490 return (0); 491 case _PC_CHOWN_RESTRICTED: 492 *ap->a_retval = 1; 493 return (0); 494 case _PC_VDISABLE: 495 *ap->a_retval = _POSIX_VDISABLE; 496 return (0); 497 default: 498 return (EINVAL); 499 } 500 /* NOTREACHED */ 501} 502 503/* 504 * Standard lock, unlock and islocked functions. 505 */ 506int 507vop_stdlock(ap) 508 struct vop_lock1_args /* { 509 struct vnode *a_vp; 510 int a_flags; 511 char *file; 512 int line; 513 } */ *ap; 514{ 515 struct vnode *vp = ap->a_vp; 516 517 return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp), 518 LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file, 519 ap->a_line)); 520} 521 522/* See above. */ 523int 524vop_stdunlock(ap) 525 struct vop_unlock_args /* { 526 struct vnode *a_vp; 527 int a_flags; 528 } */ *ap; 529{ 530 struct vnode *vp = ap->a_vp; 531 532 return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp))); 533} 534 535/* See above. */ 536int 537vop_stdislocked(ap) 538 struct vop_islocked_args /* { 539 struct vnode *a_vp; 540 } */ *ap; 541{ 542 543 return (lockstatus(ap->a_vp->v_vnlock)); 544} 545 546/* 547 * Return true for select/poll. 548 */ 549int 550vop_nopoll(ap) 551 struct vop_poll_args /* { 552 struct vnode *a_vp; 553 int a_events; 554 struct ucred *a_cred; 555 struct thread *a_td; 556 } */ *ap; 557{ 558 559 return (poll_no_poll(ap->a_events)); 560} 561 562/* 563 * Implement poll for local filesystems that support it. 564 */ 565int 566vop_stdpoll(ap) 567 struct vop_poll_args /* { 568 struct vnode *a_vp; 569 int a_events; 570 struct ucred *a_cred; 571 struct thread *a_td; 572 } */ *ap; 573{ 574 if (ap->a_events & ~POLLSTANDARD) 575 return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events)); 576 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 577} 578 579/* 580 * Return our mount point, as we will take charge of the writes. 581 */ 582int 583vop_stdgetwritemount(ap) 584 struct vop_getwritemount_args /* { 585 struct vnode *a_vp; 586 struct mount **a_mpp; 587 } */ *ap; 588{ 589 struct mount *mp; 590 591 /* 592 * XXX Since this is called unlocked we may be recycled while 593 * attempting to ref the mount. If this is the case or mountpoint 594 * will be set to NULL. We only have to prevent this call from 595 * returning with a ref to an incorrect mountpoint. It is not 596 * harmful to return with a ref to our previous mountpoint. 597 */ 598 mp = ap->a_vp->v_mount; 599 if (mp != NULL) { 600 vfs_ref(mp); 601 if (mp != ap->a_vp->v_mount) { 602 vfs_rel(mp); 603 mp = NULL; 604 } 605 } 606 *(ap->a_mpp) = mp; 607 return (0); 608} 609 610/* XXX Needs good comment and VOP_BMAP(9) manpage */ 611int 612vop_stdbmap(ap) 613 struct vop_bmap_args /* { 614 struct vnode *a_vp; 615 daddr_t a_bn; 616 struct bufobj **a_bop; 617 daddr_t *a_bnp; 618 int *a_runp; 619 int *a_runb; 620 } */ *ap; 621{ 622 623 if (ap->a_bop != NULL) 624 *ap->a_bop = &ap->a_vp->v_bufobj; 625 if (ap->a_bnp != NULL) 626 *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize); 627 if (ap->a_runp != NULL) 628 *ap->a_runp = 0; 629 if (ap->a_runb != NULL) 630 *ap->a_runb = 0; 631 return (0); 632} 633 634int 635vop_stdfsync(ap) 636 struct vop_fsync_args /* { 637 struct vnode *a_vp; 638 int a_waitfor; 639 struct thread *a_td; 640 } */ *ap; 641{ 642 struct vnode *vp; 643 struct buf *bp, *nbp; 644 struct bufobj *bo; 645 struct mount *mp; 646 int error, maxretry; 647 648 error = 0; 649 maxretry = 10000; /* large, arbitrarily chosen */ 650 vp = ap->a_vp; 651 mp = NULL; 652 if (vp->v_type == VCHR) { 653 VI_LOCK(vp); 654 mp = vp->v_rdev->si_mountpt; 655 VI_UNLOCK(vp); 656 } 657 bo = &vp->v_bufobj; 658 BO_LOCK(bo); 659loop1: 660 /* 661 * MARK/SCAN initialization to avoid infinite loops. 662 */ 663 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) { 664 bp->b_vflags &= ~BV_SCANNED; 665 bp->b_error = 0; 666 } 667 668 /* 669 * Flush all dirty buffers associated with a vnode. 670 */ 671loop2: 672 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 673 if ((bp->b_vflags & BV_SCANNED) != 0) 674 continue; 675 bp->b_vflags |= BV_SCANNED; 676 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 677 if (ap->a_waitfor != MNT_WAIT) 678 continue; 679 if (BUF_LOCK(bp, 680 LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL, 681 BO_LOCKPTR(bo)) != 0) { 682 BO_LOCK(bo); 683 goto loop1; 684 } 685 BO_LOCK(bo); 686 } 687 BO_UNLOCK(bo); 688 KASSERT(bp->b_bufobj == bo, 689 ("bp %p wrong b_bufobj %p should be %p", 690 bp, bp->b_bufobj, bo)); 691 if ((bp->b_flags & B_DELWRI) == 0) 692 panic("fsync: not dirty"); 693 if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) { 694 vfs_bio_awrite(bp); 695 } else { 696 bremfree(bp); 697 bawrite(bp); 698 } 699 if (maxretry < 1000) 700 pause("dirty", hz < 1000 ? 1 : hz / 1000); 701 BO_LOCK(bo); 702 goto loop2; 703 } 704 705 /* 706 * If synchronous the caller expects us to completely resolve all 707 * dirty buffers in the system. Wait for in-progress I/O to 708 * complete (which could include background bitmap writes), then 709 * retry if dirty blocks still exist. 710 */ 711 if (ap->a_waitfor == MNT_WAIT) { 712 bufobj_wwait(bo, 0, 0); 713 if (bo->bo_dirty.bv_cnt > 0) { 714 /* 715 * If we are unable to write any of these buffers 716 * then we fail now rather than trying endlessly 717 * to write them out. 718 */ 719 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) 720 if ((error = bp->b_error) == 0) 721 continue; 722 if ((mp != NULL && mp->mnt_secondary_writes > 0) || 723 (error == 0 && --maxretry >= 0)) 724 goto loop1; 725 if (error == 0) 726 error = EAGAIN; 727 } 728 } 729 BO_UNLOCK(bo); 730 if (error != 0) 731 vn_printf(vp, "fsync: giving up on dirty (error = %d) ", error); 732 733 return (error); 734} 735 736/* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */ 737int 738vop_stdgetpages(ap) 739 struct vop_getpages_args /* { 740 struct vnode *a_vp; 741 vm_page_t *a_m; 742 int a_count; 743 int a_reqpage; 744 vm_ooffset_t a_offset; 745 } */ *ap; 746{ 747 748 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 749 ap->a_count, ap->a_reqpage); 750} 751 752int 753vop_stdkqfilter(struct vop_kqfilter_args *ap) 754{ 755 return vfs_kqfilter(ap); 756} 757 758/* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */ 759int 760vop_stdputpages(ap) 761 struct vop_putpages_args /* { 762 struct vnode *a_vp; 763 vm_page_t *a_m; 764 int a_count; 765 int a_sync; 766 int *a_rtvals; 767 vm_ooffset_t a_offset; 768 } */ *ap; 769{ 770 771 return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, 772 ap->a_sync, ap->a_rtvals); 773} 774 775int 776vop_stdvptofh(struct vop_vptofh_args *ap) 777{ 778 return (EOPNOTSUPP); 779} 780 781int 782vop_stdvptocnp(struct vop_vptocnp_args *ap) 783{ 784 struct vnode *vp = ap->a_vp; 785 struct vnode **dvp = ap->a_vpp; 786 struct ucred *cred = ap->a_cred; 787 char *buf = ap->a_buf; 788 int *buflen = ap->a_buflen; 789 char *dirbuf, *cpos; 790 int i, error, eofflag, dirbuflen, flags, locked, len, covered; 791 off_t off; 792 ino_t fileno; 793 struct vattr va; 794 struct nameidata nd; 795 struct thread *td; 796 struct dirent *dp; 797 struct vnode *mvp; 798 799 i = *buflen; 800 error = 0; 801 covered = 0; 802 td = curthread; 803 804 if (vp->v_type != VDIR) 805 return (ENOENT); 806 807 error = VOP_GETATTR(vp, &va, cred); 808 if (error) 809 return (error); 810 811 VREF(vp); 812 locked = VOP_ISLOCKED(vp); 813 VOP_UNLOCK(vp, 0); 814 NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, 815 "..", vp, td); 816 flags = FREAD; 817 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL); 818 if (error) { 819 vn_lock(vp, locked | LK_RETRY); 820 return (error); 821 } 822 NDFREE(&nd, NDF_ONLY_PNBUF); 823 824 mvp = *dvp = nd.ni_vp; 825 826 if (vp->v_mount != (*dvp)->v_mount && 827 ((*dvp)->v_vflag & VV_ROOT) && 828 ((*dvp)->v_mount->mnt_flag & MNT_UNION)) { 829 *dvp = (*dvp)->v_mount->mnt_vnodecovered; 830 VREF(mvp); 831 VOP_UNLOCK(mvp, 0); 832 vn_close(mvp, FREAD, cred, td); 833 VREF(*dvp); 834 vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY); 835 covered = 1; 836 } 837 838 fileno = va.va_fileid; 839 840 dirbuflen = DEV_BSIZE; 841 if (dirbuflen < va.va_blocksize) 842 dirbuflen = va.va_blocksize; 843 dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); 844 845 if ((*dvp)->v_type != VDIR) { 846 error = ENOENT; 847 goto out; 848 } 849 850 off = 0; 851 len = 0; 852 do { 853 /* call VOP_READDIR of parent */ 854 error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off, 855 &cpos, &len, &eofflag, td); 856 if (error) 857 goto out; 858 859 if ((dp->d_type != DT_WHT) && 860 (dp->d_fileno == fileno)) { 861 if (covered) { 862 VOP_UNLOCK(*dvp, 0); 863 vn_lock(mvp, LK_EXCLUSIVE | LK_RETRY); 864 if (dirent_exists(mvp, dp->d_name, td)) { 865 error = ENOENT; 866 VOP_UNLOCK(mvp, 0); 867 vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY); 868 goto out; 869 } 870 VOP_UNLOCK(mvp, 0); 871 vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY); 872 } 873 i -= dp->d_namlen; 874 875 if (i < 0) { 876 error = ENOMEM; 877 goto out; 878 } 879 if (dp->d_namlen == 1 && dp->d_name[0] == '.') { 880 error = ENOENT; 881 } else { 882 bcopy(dp->d_name, buf + i, dp->d_namlen); 883 error = 0; 884 } 885 goto out; 886 } 887 } while (len > 0 || !eofflag); 888 error = ENOENT; 889 890out: 891 free(dirbuf, M_TEMP); 892 if (!error) { 893 *buflen = i; 894 vref(*dvp); 895 } 896 if (covered) { 897 vput(*dvp); 898 vrele(mvp); 899 } else { 900 VOP_UNLOCK(mvp, 0); 901 vn_close(mvp, FREAD, cred, td); 902 } 903 vn_lock(vp, locked | LK_RETRY); 904 return (error); 905} 906 907int 908vop_stdallocate(struct vop_allocate_args *ap) 909{ 910#ifdef __notyet__ 911 struct statfs sfs; 912#endif 913 struct iovec aiov; 914 struct vattr vattr, *vap; 915 struct uio auio; 916 off_t fsize, len, cur, offset; 917 uint8_t *buf; 918 struct thread *td; 919 struct vnode *vp; 920 size_t iosize; 921 int error; 922 923 buf = NULL; 924 error = 0; 925 td = curthread; 926 vap = &vattr; 927 vp = ap->a_vp; 928 len = *ap->a_len; 929 offset = *ap->a_offset; 930 931 error = VOP_GETATTR(vp, vap, td->td_ucred); 932 if (error != 0) 933 goto out; 934 fsize = vap->va_size; 935 iosize = vap->va_blocksize; 936 if (iosize == 0) 937 iosize = BLKDEV_IOSIZE; 938 if (iosize > MAXPHYS) 939 iosize = MAXPHYS; 940 buf = malloc(iosize, M_TEMP, M_WAITOK); 941 942#ifdef __notyet__ 943 /* 944 * Check if the filesystem sets f_maxfilesize; if not use 945 * VOP_SETATTR to perform the check. 946 */ 947 error = VFS_STATFS(vp->v_mount, &sfs, td); 948 if (error != 0) 949 goto out; 950 if (sfs.f_maxfilesize) { 951 if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize || 952 offset + len > sfs.f_maxfilesize) { 953 error = EFBIG; 954 goto out; 955 } 956 } else 957#endif 958 if (offset + len > vap->va_size) { 959 /* 960 * Test offset + len against the filesystem's maxfilesize. 961 */ 962 VATTR_NULL(vap); 963 vap->va_size = offset + len; 964 error = VOP_SETATTR(vp, vap, td->td_ucred); 965 if (error != 0) 966 goto out; 967 VATTR_NULL(vap); 968 vap->va_size = fsize; 969 error = VOP_SETATTR(vp, vap, td->td_ucred); 970 if (error != 0) 971 goto out; 972 } 973 974 for (;;) { 975 /* 976 * Read and write back anything below the nominal file 977 * size. There's currently no way outside the filesystem 978 * to know whether this area is sparse or not. 979 */ 980 cur = iosize; 981 if ((offset % iosize) != 0) 982 cur -= (offset % iosize); 983 if (cur > len) 984 cur = len; 985 if (offset < fsize) { 986 aiov.iov_base = buf; 987 aiov.iov_len = cur; 988 auio.uio_iov = &aiov; 989 auio.uio_iovcnt = 1; 990 auio.uio_offset = offset; 991 auio.uio_resid = cur; 992 auio.uio_segflg = UIO_SYSSPACE; 993 auio.uio_rw = UIO_READ; 994 auio.uio_td = td; 995 error = VOP_READ(vp, &auio, 0, td->td_ucred); 996 if (error != 0) 997 break; 998 if (auio.uio_resid > 0) { 999 bzero(buf + cur - auio.uio_resid, 1000 auio.uio_resid); 1001 } 1002 } else { 1003 bzero(buf, cur); 1004 } 1005 1006 aiov.iov_base = buf; 1007 aiov.iov_len = cur; 1008 auio.uio_iov = &aiov; 1009 auio.uio_iovcnt = 1; 1010 auio.uio_offset = offset; 1011 auio.uio_resid = cur; 1012 auio.uio_segflg = UIO_SYSSPACE; 1013 auio.uio_rw = UIO_WRITE; 1014 auio.uio_td = td; 1015 1016 error = VOP_WRITE(vp, &auio, 0, td->td_ucred); 1017 if (error != 0) 1018 break; 1019 1020 len -= cur; 1021 offset += cur; 1022 if (len == 0) 1023 break; 1024 if (should_yield()) 1025 break; 1026 } 1027 1028 out: 1029 *ap->a_len = len; 1030 *ap->a_offset = offset; 1031 free(buf, M_TEMP); 1032 return (error); 1033} 1034 1035int 1036vop_stdadvise(struct vop_advise_args *ap) 1037{ 1038 struct vnode *vp; 1039 off_t start, end; 1040 int error; 1041 1042 vp = ap->a_vp; 1043 switch (ap->a_advice) { 1044 case POSIX_FADV_WILLNEED: 1045 /* 1046 * Do nothing for now. Filesystems should provide a 1047 * custom method which starts an asynchronous read of 1048 * the requested region. 1049 */ 1050 error = 0; 1051 break; 1052 case POSIX_FADV_DONTNEED: 1053 /* 1054 * Flush any open FS buffers and then remove pages 1055 * from the backing VM object. Using vinvalbuf() here 1056 * is a bit heavy-handed as it flushes all buffers for 1057 * the given vnode, not just the buffers covering the 1058 * requested range. 1059 */ 1060 error = 0; 1061 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1062 if (vp->v_iflag & VI_DOOMED) { 1063 VOP_UNLOCK(vp, 0); 1064 break; 1065 } 1066 vinvalbuf(vp, V_CLEANONLY, 0, 0); 1067 if (vp->v_object != NULL) { 1068 start = trunc_page(ap->a_start); 1069 end = round_page(ap->a_end); 1070 VM_OBJECT_WLOCK(vp->v_object); 1071 vm_object_page_cache(vp->v_object, OFF_TO_IDX(start), 1072 OFF_TO_IDX(end)); 1073 VM_OBJECT_WUNLOCK(vp->v_object); 1074 } 1075 VOP_UNLOCK(vp, 0); 1076 break; 1077 default: 1078 error = EINVAL; 1079 break; 1080 } 1081 return (error); 1082} 1083 1084int 1085vop_stdunp_bind(struct vop_unp_bind_args *ap) 1086{ 1087 1088 ap->a_vp->v_socket = ap->a_socket; 1089 return (0); 1090} 1091 1092int 1093vop_stdunp_connect(struct vop_unp_connect_args *ap) 1094{ 1095 1096 *ap->a_socket = ap->a_vp->v_socket; 1097 return (0); 1098} 1099 1100int 1101vop_stdunp_detach(struct vop_unp_detach_args *ap) 1102{ 1103 1104 ap->a_vp->v_socket = NULL; 1105 return (0); 1106} 1107 1108static int 1109vop_stdis_text(struct vop_is_text_args *ap) 1110{ 1111 1112 return ((ap->a_vp->v_vflag & VV_TEXT) != 0); 1113} 1114 1115static int 1116vop_stdset_text(struct vop_set_text_args *ap) 1117{ 1118 1119 ap->a_vp->v_vflag |= VV_TEXT; 1120 return (0); 1121} 1122 1123static int 1124vop_stdunset_text(struct vop_unset_text_args *ap) 1125{ 1126 1127 ap->a_vp->v_vflag &= ~VV_TEXT; 1128 return (0); 1129} 1130 1131static int 1132vop_stdget_writecount(struct vop_get_writecount_args *ap) 1133{ 1134 1135 *ap->a_writecount = ap->a_vp->v_writecount; 1136 return (0); 1137} 1138 1139static int 1140vop_stdadd_writecount(struct vop_add_writecount_args *ap) 1141{ 1142 1143 ap->a_vp->v_writecount += ap->a_inc; 1144 return (0); 1145} 1146 1147/* 1148 * vfs default ops 1149 * used to fill the vfs function table to get reasonable default return values. 1150 */ 1151int 1152vfs_stdroot (mp, flags, vpp) 1153 struct mount *mp; 1154 int flags; 1155 struct vnode **vpp; 1156{ 1157 1158 return (EOPNOTSUPP); 1159} 1160 1161int 1162vfs_stdstatfs (mp, sbp) 1163 struct mount *mp; 1164 struct statfs *sbp; 1165{ 1166 1167 return (EOPNOTSUPP); 1168} 1169 1170int 1171vfs_stdquotactl (mp, cmds, uid, arg) 1172 struct mount *mp; 1173 int cmds; 1174 uid_t uid; 1175 void *arg; 1176{ 1177 1178 return (EOPNOTSUPP); 1179} 1180 1181int 1182vfs_stdsync(mp, waitfor) 1183 struct mount *mp; 1184 int waitfor; 1185{ 1186 struct vnode *vp, *mvp; 1187 struct thread *td; 1188 int error, lockreq, allerror = 0; 1189 1190 td = curthread; 1191 lockreq = LK_EXCLUSIVE | LK_INTERLOCK; 1192 if (waitfor != MNT_WAIT) 1193 lockreq |= LK_NOWAIT; 1194 /* 1195 * Force stale buffer cache information to be flushed. 1196 */ 1197loop: 1198 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 1199 if (vp->v_bufobj.bo_dirty.bv_cnt == 0) { 1200 VI_UNLOCK(vp); 1201 continue; 1202 } 1203 if ((error = vget(vp, lockreq, td)) != 0) { 1204 if (error == ENOENT) { 1205 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 1206 goto loop; 1207 } 1208 continue; 1209 } 1210 error = VOP_FSYNC(vp, waitfor, td); 1211 if (error) 1212 allerror = error; 1213 vput(vp); 1214 } 1215 return (allerror); 1216} 1217 1218int 1219vfs_stdnosync (mp, waitfor) 1220 struct mount *mp; 1221 int waitfor; 1222{ 1223 1224 return (0); 1225} 1226 1227int 1228vfs_stdvget (mp, ino, flags, vpp) 1229 struct mount *mp; 1230 ino_t ino; 1231 int flags; 1232 struct vnode **vpp; 1233{ 1234 1235 return (EOPNOTSUPP); 1236} 1237 1238int 1239vfs_stdfhtovp (mp, fhp, flags, vpp) 1240 struct mount *mp; 1241 struct fid *fhp; 1242 int flags; 1243 struct vnode **vpp; 1244{ 1245 1246 return (EOPNOTSUPP); 1247} 1248 1249int 1250vfs_stdinit (vfsp) 1251 struct vfsconf *vfsp; 1252{ 1253 1254 return (0); 1255} 1256 1257int 1258vfs_stduninit (vfsp) 1259 struct vfsconf *vfsp; 1260{ 1261 1262 return(0); 1263} 1264 1265int 1266vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname) 1267 struct mount *mp; 1268 int cmd; 1269 struct vnode *filename_vp; 1270 int attrnamespace; 1271 const char *attrname; 1272{ 1273 1274 if (filename_vp != NULL) 1275 VOP_UNLOCK(filename_vp, 0); 1276 return (EOPNOTSUPP); 1277} 1278 1279int 1280vfs_stdsysctl(mp, op, req) 1281 struct mount *mp; 1282 fsctlop_t op; 1283 struct sysctl_req *req; 1284{ 1285 1286 return (EOPNOTSUPP); 1287} 1288 1289/* end of vfs default ops */ 1290