1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed 6 * to Berkeley by John Heidemann of the UCLA Ficus project. 7 * 8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: stable/11/sys/kern/vfs_default.c 350126 2019-07-19 14:24:33Z asomers $"); 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/bio.h> 41#include <sys/buf.h> 42#include <sys/conf.h> 43#include <sys/event.h> 44#include <sys/kernel.h> 45#include <sys/limits.h> 46#include <sys/lock.h> 47#include <sys/lockf.h> 48#include <sys/malloc.h> 49#include <sys/mount.h> 50#include <sys/namei.h> 51#include <sys/rwlock.h> 52#include <sys/fcntl.h> 53#include <sys/unistd.h> 54#include <sys/vnode.h> 55#include <sys/dirent.h> 56#include <sys/poll.h> 57 58#include <security/mac/mac_framework.h> 59 60#include <vm/vm.h> 61#include <vm/vm_object.h> 62#include <vm/vm_extern.h> 63#include <vm/pmap.h> 64#include <vm/vm_map.h> 65#include <vm/vm_page.h> 66#include <vm/vm_pager.h> 67#include <vm/vnode_pager.h> 68 69static int vop_nolookup(struct vop_lookup_args *); 70static int vop_norename(struct vop_rename_args *); 71static int vop_nostrategy(struct vop_strategy_args *); 72static int get_next_dirent(struct vnode *vp, struct dirent **dpp, 73 char *dirbuf, int dirbuflen, off_t *off, 74 char **cpos, int *len, int *eofflag, 75 struct thread *td); 76static int dirent_exists(struct vnode *vp, const char *dirname, 77 struct thread *td); 78 79#define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4) 80 81static int vop_stdis_text(struct vop_is_text_args *ap); 82static int vop_stdset_text(struct vop_set_text_args *ap); 83static int vop_stdunset_text(struct vop_unset_text_args *ap); 84static int vop_stdget_writecount(struct vop_get_writecount_args *ap); 85static int vop_stdadd_writecount(struct vop_add_writecount_args *ap); 86static int vop_stdfdatasync(struct vop_fdatasync_args *ap); 87static int vop_stdgetpages_async(struct vop_getpages_async_args *ap); 88 89/* 90 * This vnode table stores what we want to do if the filesystem doesn't 91 * implement a particular VOP. 92 * 93 * If there is no specific entry here, we will return EOPNOTSUPP. 94 * 95 * Note that every filesystem has to implement either vop_access 96 * or vop_accessx; failing to do so will result in immediate crash 97 * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(), 98 * which calls vop_stdaccess() etc. 99 */ 100 101struct vop_vector default_vnodeops = { 102 .vop_default = NULL, 103 .vop_bypass = VOP_EOPNOTSUPP, 104 105 .vop_access = vop_stdaccess, 106 .vop_accessx = vop_stdaccessx, 107 .vop_advise = vop_stdadvise, 108 .vop_advlock = vop_stdadvlock, 109 .vop_advlockasync = vop_stdadvlockasync, 110 .vop_advlockpurge = vop_stdadvlockpurge, 111 .vop_allocate = vop_stdallocate, 112 .vop_bmap = vop_stdbmap, 113 .vop_close = VOP_NULL, 114 .vop_fsync = VOP_NULL, 115 .vop_fdatasync = vop_stdfdatasync, 116 .vop_getpages = vop_stdgetpages, 117 .vop_getpages_async = vop_stdgetpages_async, 118 .vop_getwritemount = vop_stdgetwritemount, 119 .vop_inactive = VOP_NULL, 120 .vop_ioctl = VOP_ENOTTY, 121 .vop_kqfilter = vop_stdkqfilter, 122 .vop_islocked = vop_stdislocked, 123 .vop_lock1 = vop_stdlock, 124 .vop_lookup = vop_nolookup, 125 .vop_open = VOP_NULL, 126 .vop_pathconf = VOP_EINVAL, 127 .vop_poll = vop_nopoll, 128 .vop_putpages = vop_stdputpages, 129 .vop_readlink = VOP_EINVAL, 130 .vop_rename = vop_norename, 131 .vop_revoke = VOP_PANIC, 132 .vop_strategy = vop_nostrategy, 133 .vop_unlock = vop_stdunlock, 134 .vop_vptocnp = vop_stdvptocnp, 135 .vop_vptofh = vop_stdvptofh, 136 .vop_unp_bind = vop_stdunp_bind, 137 .vop_unp_connect = vop_stdunp_connect, 138 .vop_unp_detach = vop_stdunp_detach, 139 .vop_is_text = vop_stdis_text, 140 .vop_set_text = vop_stdset_text, 141 .vop_unset_text = vop_stdunset_text, 142 .vop_get_writecount = vop_stdget_writecount, 143 .vop_add_writecount = vop_stdadd_writecount, 144}; 145 146/* 147 * Series of placeholder functions for various error returns for 148 * VOPs. 149 */ 150 151int 152vop_eopnotsupp(struct vop_generic_args *ap) 153{ 154 /* 155 printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name); 156 */ 157 158 return (EOPNOTSUPP); 159} 160 161int 162vop_ebadf(struct vop_generic_args *ap) 163{ 164 165 return (EBADF); 166} 167 168int 169vop_enotty(struct vop_generic_args *ap) 170{ 171 172 return (ENOTTY); 173} 174 175int 176vop_einval(struct vop_generic_args *ap) 177{ 178 179 return (EINVAL); 180} 181 182int 183vop_enoent(struct vop_generic_args *ap) 184{ 185 186 return (ENOENT); 187} 188 189int 190vop_null(struct vop_generic_args *ap) 191{ 192 193 return (0); 194} 195 196/* 197 * Helper function to panic on some bad VOPs in some filesystems. 198 */ 199int 200vop_panic(struct vop_generic_args *ap) 201{ 202 203 panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name); 204} 205 206/* 207 * vop_std<something> and vop_no<something> are default functions for use by 208 * filesystems that need the "default reasonable" implementation for a 209 * particular operation. 210 * 211 * The documentation for the operations they implement exists (if it exists) 212 * in the VOP_<SOMETHING>(9) manpage (all uppercase). 213 */ 214 215/* 216 * Default vop for filesystems that do not support name lookup 217 */ 218static int 219vop_nolookup(ap) 220 struct vop_lookup_args /* { 221 struct vnode *a_dvp; 222 struct vnode **a_vpp; 223 struct componentname *a_cnp; 224 } */ *ap; 225{ 226 227 *ap->a_vpp = NULL; 228 return (ENOTDIR); 229} 230 231/* 232 * vop_norename: 233 * 234 * Handle unlock and reference counting for arguments of vop_rename 235 * for filesystems that do not implement rename operation. 236 */ 237static int 238vop_norename(struct vop_rename_args *ap) 239{ 240 241 vop_rename_fail(ap); 242 return (EOPNOTSUPP); 243} 244 245/* 246 * vop_nostrategy: 247 * 248 * Strategy routine for VFS devices that have none. 249 * 250 * BIO_ERROR and B_INVAL must be cleared prior to calling any strategy 251 * routine. Typically this is done for a BIO_READ strategy call. 252 * Typically B_INVAL is assumed to already be clear prior to a write 253 * and should not be cleared manually unless you just made the buffer 254 * invalid. BIO_ERROR should be cleared either way. 255 */ 256 257static int 258vop_nostrategy (struct vop_strategy_args *ap) 259{ 260 printf("No strategy for buffer at %p\n", ap->a_bp); 261 vn_printf(ap->a_vp, "vnode "); 262 ap->a_bp->b_ioflags |= BIO_ERROR; 263 ap->a_bp->b_error = EOPNOTSUPP; 264 bufdone(ap->a_bp); 265 return (EOPNOTSUPP); 266} 267 268static int 269get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf, 270 int dirbuflen, off_t *off, char **cpos, int *len, 271 int *eofflag, struct thread *td) 272{ 273 int error, reclen; 274 struct uio uio; 275 struct iovec iov; 276 struct dirent *dp; 277 278 KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); 279 KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); 280 281 if (*len == 0) { 282 iov.iov_base = dirbuf; 283 iov.iov_len = dirbuflen; 284 285 uio.uio_iov = &iov; 286 uio.uio_iovcnt = 1; 287 uio.uio_offset = *off; 288 uio.uio_resid = dirbuflen; 289 uio.uio_segflg = UIO_SYSSPACE; 290 uio.uio_rw = UIO_READ; 291 uio.uio_td = td; 292 293 *eofflag = 0; 294 295#ifdef MAC 296 error = mac_vnode_check_readdir(td->td_ucred, vp); 297 if (error == 0) 298#endif 299 error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag, 300 NULL, NULL); 301 if (error) 302 return (error); 303 304 *off = uio.uio_offset; 305 306 *cpos = dirbuf; 307 *len = (dirbuflen - uio.uio_resid); 308 309 if (*len == 0) 310 return (ENOENT); 311 } 312 313 dp = (struct dirent *)(*cpos); 314 reclen = dp->d_reclen; 315 *dpp = dp; 316 317 /* check for malformed directory.. */ 318 if (reclen < DIRENT_MINSIZE) 319 return (EINVAL); 320 321 *cpos += reclen; 322 *len -= reclen; 323 324 return (0); 325} 326 327/* 328 * Check if a named file exists in a given directory vnode. 329 */ 330static int 331dirent_exists(struct vnode *vp, const char *dirname, struct thread *td) 332{ 333 char *dirbuf, *cpos; 334 int error, eofflag, dirbuflen, len, found; 335 off_t off; 336 struct dirent *dp; 337 struct vattr va; 338 339 KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); 340 KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); 341 342 found = 0; 343 344 error = VOP_GETATTR(vp, &va, td->td_ucred); 345 if (error) 346 return (found); 347 348 dirbuflen = DEV_BSIZE; 349 if (dirbuflen < va.va_blocksize) 350 dirbuflen = va.va_blocksize; 351 dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); 352 353 off = 0; 354 len = 0; 355 do { 356 error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off, 357 &cpos, &len, &eofflag, td); 358 if (error) 359 goto out; 360 361 if (dp->d_type != DT_WHT && dp->d_fileno != 0 && 362 strcmp(dp->d_name, dirname) == 0) { 363 found = 1; 364 goto out; 365 } 366 } while (len > 0 || !eofflag); 367 368out: 369 free(dirbuf, M_TEMP); 370 return (found); 371} 372 373int 374vop_stdaccess(struct vop_access_args *ap) 375{ 376 377 KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | 378 VAPPEND)) == 0, ("invalid bit in accmode")); 379 380 return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td)); 381} 382 383int 384vop_stdaccessx(struct vop_accessx_args *ap) 385{ 386 int error; 387 accmode_t accmode = ap->a_accmode; 388 389 error = vfs_unixify_accmode(&accmode); 390 if (error != 0) 391 return (error); 392 393 if (accmode == 0) 394 return (0); 395 396 return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td)); 397} 398 399/* 400 * Advisory record locking support 401 */ 402int 403vop_stdadvlock(struct vop_advlock_args *ap) 404{ 405 struct vnode *vp; 406 struct vattr vattr; 407 int error; 408 409 vp = ap->a_vp; 410 if (ap->a_fl->l_whence == SEEK_END) { 411 /* 412 * The NFSv4 server must avoid doing a vn_lock() here, since it 413 * can deadlock the nfsd threads, due to a LOR. Fortunately 414 * the NFSv4 server always uses SEEK_SET and this code is 415 * only required for the SEEK_END case. 416 */ 417 vn_lock(vp, LK_SHARED | LK_RETRY); 418 error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); 419 VOP_UNLOCK(vp, 0); 420 if (error) 421 return (error); 422 } else 423 vattr.va_size = 0; 424 425 return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size)); 426} 427 428int 429vop_stdadvlockasync(struct vop_advlockasync_args *ap) 430{ 431 struct vnode *vp; 432 struct vattr vattr; 433 int error; 434 435 vp = ap->a_vp; 436 if (ap->a_fl->l_whence == SEEK_END) { 437 /* The size argument is only needed for SEEK_END. */ 438 vn_lock(vp, LK_SHARED | LK_RETRY); 439 error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); 440 VOP_UNLOCK(vp, 0); 441 if (error) 442 return (error); 443 } else 444 vattr.va_size = 0; 445 446 return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size)); 447} 448 449int 450vop_stdadvlockpurge(struct vop_advlockpurge_args *ap) 451{ 452 struct vnode *vp; 453 454 vp = ap->a_vp; 455 lf_purgelocks(vp, &vp->v_lockf); 456 return (0); 457} 458 459/* 460 * vop_stdpathconf: 461 * 462 * Standard implementation of POSIX pathconf, to get information about limits 463 * for a filesystem. 464 * Override per filesystem for the case where the filesystem has smaller 465 * limits. 466 */ 467int 468vop_stdpathconf(ap) 469 struct vop_pathconf_args /* { 470 struct vnode *a_vp; 471 int a_name; 472 int *a_retval; 473 } */ *ap; 474{ 475 476 switch (ap->a_name) { 477 case _PC_ASYNC_IO: 478 *ap->a_retval = _POSIX_ASYNCHRONOUS_IO; 479 return (0); 480 case _PC_PATH_MAX: 481 *ap->a_retval = PATH_MAX; 482 return (0); 483 case _PC_ACL_EXTENDED: 484 case _PC_ACL_NFS4: 485 case _PC_CAP_PRESENT: 486 case _PC_INF_PRESENT: 487 case _PC_MAC_PRESENT: 488 *ap->a_retval = 0; 489 return (0); 490 default: 491 return (EINVAL); 492 } 493 /* NOTREACHED */ 494} 495 496/* 497 * Standard lock, unlock and islocked functions. 498 */ 499int 500vop_stdlock(ap) 501 struct vop_lock1_args /* { 502 struct vnode *a_vp; 503 int a_flags; 504 char *file; 505 int line; 506 } */ *ap; 507{ 508 struct vnode *vp = ap->a_vp; 509 struct mtx *ilk; 510 511 ilk = VI_MTX(vp); 512 return (lockmgr_lock_fast_path(vp->v_vnlock, ap->a_flags, 513 (ilk != NULL) ? &ilk->lock_object : NULL, ap->a_file, ap->a_line)); 514} 515 516/* See above. */ 517int 518vop_stdunlock(ap) 519 struct vop_unlock_args /* { 520 struct vnode *a_vp; 521 int a_flags; 522 } */ *ap; 523{ 524 struct vnode *vp = ap->a_vp; 525 struct mtx *ilk; 526 527 ilk = VI_MTX(vp); 528 return (lockmgr_unlock_fast_path(vp->v_vnlock, ap->a_flags, 529 (ilk != NULL) ? &ilk->lock_object : NULL)); 530} 531 532/* See above. */ 533int 534vop_stdislocked(ap) 535 struct vop_islocked_args /* { 536 struct vnode *a_vp; 537 } */ *ap; 538{ 539 540 return (lockstatus(ap->a_vp->v_vnlock)); 541} 542 543/* 544 * Return true for select/poll. 545 */ 546int 547vop_nopoll(ap) 548 struct vop_poll_args /* { 549 struct vnode *a_vp; 550 int a_events; 551 struct ucred *a_cred; 552 struct thread *a_td; 553 } */ *ap; 554{ 555 556 return (poll_no_poll(ap->a_events)); 557} 558 559/* 560 * Implement poll for local filesystems that support it. 561 */ 562int 563vop_stdpoll(ap) 564 struct vop_poll_args /* { 565 struct vnode *a_vp; 566 int a_events; 567 struct ucred *a_cred; 568 struct thread *a_td; 569 } */ *ap; 570{ 571 if (ap->a_events & ~POLLSTANDARD) 572 return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events)); 573 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 574} 575 576/* 577 * Return our mount point, as we will take charge of the writes. 578 */ 579int 580vop_stdgetwritemount(ap) 581 struct vop_getwritemount_args /* { 582 struct vnode *a_vp; 583 struct mount **a_mpp; 584 } */ *ap; 585{ 586 struct mount *mp; 587 588 /* 589 * XXX Since this is called unlocked we may be recycled while 590 * attempting to ref the mount. If this is the case or mountpoint 591 * will be set to NULL. We only have to prevent this call from 592 * returning with a ref to an incorrect mountpoint. It is not 593 * harmful to return with a ref to our previous mountpoint. 594 */ 595 mp = ap->a_vp->v_mount; 596 if (mp != NULL) { 597 vfs_ref(mp); 598 if (mp != ap->a_vp->v_mount) { 599 vfs_rel(mp); 600 mp = NULL; 601 } 602 } 603 *(ap->a_mpp) = mp; 604 return (0); 605} 606 607/* 608 * If the file system doesn't implement VOP_BMAP, then return sensible defaults: 609 * - Return the vnode's bufobj instead of any underlying device's bufobj 610 * - Calculate the physical block number as if there were equal size 611 * consecutive blocks, but 612 * - Report no contiguous runs of blocks. 613 */ 614int 615vop_stdbmap(ap) 616 struct vop_bmap_args /* { 617 struct vnode *a_vp; 618 daddr_t a_bn; 619 struct bufobj **a_bop; 620 daddr_t *a_bnp; 621 int *a_runp; 622 int *a_runb; 623 } */ *ap; 624{ 625 626 if (ap->a_bop != NULL) 627 *ap->a_bop = &ap->a_vp->v_bufobj; 628 if (ap->a_bnp != NULL) 629 *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize); 630 if (ap->a_runp != NULL) 631 *ap->a_runp = 0; 632 if (ap->a_runb != NULL) 633 *ap->a_runb = 0; 634 return (0); 635} 636 637int 638vop_stdfsync(ap) 639 struct vop_fsync_args /* { 640 struct vnode *a_vp; 641 int a_waitfor; 642 struct thread *a_td; 643 } */ *ap; 644{ 645 struct vnode *vp; 646 struct buf *bp, *nbp; 647 struct bufobj *bo; 648 struct mount *mp; 649 int error, maxretry; 650 651 error = 0; 652 maxretry = 10000; /* large, arbitrarily chosen */ 653 vp = ap->a_vp; 654 mp = NULL; 655 if (vp->v_type == VCHR) { 656 VI_LOCK(vp); 657 mp = vp->v_rdev->si_mountpt; 658 VI_UNLOCK(vp); 659 } 660 bo = &vp->v_bufobj; 661 BO_LOCK(bo); 662loop1: 663 /* 664 * MARK/SCAN initialization to avoid infinite loops. 665 */ 666 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) { 667 bp->b_vflags &= ~BV_SCANNED; 668 bp->b_error = 0; 669 } 670 671 /* 672 * Flush all dirty buffers associated with a vnode. 673 */ 674loop2: 675 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 676 if ((bp->b_vflags & BV_SCANNED) != 0) 677 continue; 678 bp->b_vflags |= BV_SCANNED; 679 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 680 if (ap->a_waitfor != MNT_WAIT) 681 continue; 682 if (BUF_LOCK(bp, 683 LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL, 684 BO_LOCKPTR(bo)) != 0) { 685 BO_LOCK(bo); 686 goto loop1; 687 } 688 BO_LOCK(bo); 689 } 690 BO_UNLOCK(bo); 691 KASSERT(bp->b_bufobj == bo, 692 ("bp %p wrong b_bufobj %p should be %p", 693 bp, bp->b_bufobj, bo)); 694 if ((bp->b_flags & B_DELWRI) == 0) 695 panic("fsync: not dirty"); 696 if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) { 697 vfs_bio_awrite(bp); 698 } else { 699 bremfree(bp); 700 bawrite(bp); 701 } 702 if (maxretry < 1000) 703 pause("dirty", hz < 1000 ? 1 : hz / 1000); 704 BO_LOCK(bo); 705 goto loop2; 706 } 707 708 /* 709 * If synchronous the caller expects us to completely resolve all 710 * dirty buffers in the system. Wait for in-progress I/O to 711 * complete (which could include background bitmap writes), then 712 * retry if dirty blocks still exist. 713 */ 714 if (ap->a_waitfor == MNT_WAIT) { 715 bufobj_wwait(bo, 0, 0); 716 if (bo->bo_dirty.bv_cnt > 0) { 717 /* 718 * If we are unable to write any of these buffers 719 * then we fail now rather than trying endlessly 720 * to write them out. 721 */ 722 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) 723 if ((error = bp->b_error) == 0) 724 continue; 725 if ((mp != NULL && mp->mnt_secondary_writes > 0) || 726 (error == 0 && --maxretry >= 0)) 727 goto loop1; 728 if (error == 0) 729 error = EAGAIN; 730 } 731 } 732 BO_UNLOCK(bo); 733 if (error != 0) 734 vn_printf(vp, "fsync: giving up on dirty (error = %d) ", error); 735 736 return (error); 737} 738 739static int 740vop_stdfdatasync(struct vop_fdatasync_args *ap) 741{ 742 743 return (VOP_FSYNC(ap->a_vp, MNT_WAIT, ap->a_td)); 744} 745 746int 747vop_stdfdatasync_buf(struct vop_fdatasync_args *ap) 748{ 749 struct vop_fsync_args apf; 750 751 apf.a_vp = ap->a_vp; 752 apf.a_waitfor = MNT_WAIT; 753 apf.a_td = ap->a_td; 754 return (vop_stdfsync(&apf)); 755} 756 757/* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */ 758int 759vop_stdgetpages(ap) 760 struct vop_getpages_args /* { 761 struct vnode *a_vp; 762 vm_page_t *a_m; 763 int a_count; 764 int *a_rbehind; 765 int *a_rahead; 766 } */ *ap; 767{ 768 769 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 770 ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL); 771} 772 773static int 774vop_stdgetpages_async(struct vop_getpages_async_args *ap) 775{ 776 int error; 777 778 error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, 779 ap->a_rahead); 780 ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error); 781 return (error); 782} 783 784int 785vop_stdkqfilter(struct vop_kqfilter_args *ap) 786{ 787 return vfs_kqfilter(ap); 788} 789 790/* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */ 791int 792vop_stdputpages(ap) 793 struct vop_putpages_args /* { 794 struct vnode *a_vp; 795 vm_page_t *a_m; 796 int a_count; 797 int a_sync; 798 int *a_rtvals; 799 } */ *ap; 800{ 801 802 return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, 803 ap->a_sync, ap->a_rtvals); 804} 805 806int 807vop_stdvptofh(struct vop_vptofh_args *ap) 808{ 809 return (EOPNOTSUPP); 810} 811 812int 813vop_stdvptocnp(struct vop_vptocnp_args *ap) 814{ 815 struct vnode *vp = ap->a_vp; 816 struct vnode **dvp = ap->a_vpp; 817 struct ucred *cred = ap->a_cred; 818 char *buf = ap->a_buf; 819 int *buflen = ap->a_buflen; 820 char *dirbuf, *cpos; 821 int i, error, eofflag, dirbuflen, flags, locked, len, covered; 822 off_t off; 823 ino_t fileno; 824 struct vattr va; 825 struct nameidata nd; 826 struct thread *td; 827 struct dirent *dp; 828 struct vnode *mvp; 829 830 i = *buflen; 831 error = 0; 832 covered = 0; 833 td = curthread; 834 835 if (vp->v_type != VDIR) 836 return (ENOENT); 837 838 error = VOP_GETATTR(vp, &va, cred); 839 if (error) 840 return (error); 841 842 VREF(vp); 843 locked = VOP_ISLOCKED(vp); 844 VOP_UNLOCK(vp, 0); 845 NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, 846 "..", vp, td); 847 flags = FREAD; 848 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL); 849 if (error) { 850 vn_lock(vp, locked | LK_RETRY); 851 return (error); 852 } 853 NDFREE(&nd, NDF_ONLY_PNBUF); 854 855 mvp = *dvp = nd.ni_vp; 856 857 if (vp->v_mount != (*dvp)->v_mount && 858 ((*dvp)->v_vflag & VV_ROOT) && 859 ((*dvp)->v_mount->mnt_flag & MNT_UNION)) { 860 *dvp = (*dvp)->v_mount->mnt_vnodecovered; 861 VREF(mvp); 862 VOP_UNLOCK(mvp, 0); 863 vn_close(mvp, FREAD, cred, td); 864 VREF(*dvp); 865 vn_lock(*dvp, LK_SHARED | LK_RETRY); 866 covered = 1; 867 } 868 869 fileno = va.va_fileid; 870 871 dirbuflen = DEV_BSIZE; 872 if (dirbuflen < va.va_blocksize) 873 dirbuflen = va.va_blocksize; 874 dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); 875 876 if ((*dvp)->v_type != VDIR) { 877 error = ENOENT; 878 goto out; 879 } 880 881 off = 0; 882 len = 0; 883 do { 884 /* call VOP_READDIR of parent */ 885 error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off, 886 &cpos, &len, &eofflag, td); 887 if (error) 888 goto out; 889 890 if ((dp->d_type != DT_WHT) && 891 (dp->d_fileno == fileno)) { 892 if (covered) { 893 VOP_UNLOCK(*dvp, 0); 894 vn_lock(mvp, LK_SHARED | LK_RETRY); 895 if (dirent_exists(mvp, dp->d_name, td)) { 896 error = ENOENT; 897 VOP_UNLOCK(mvp, 0); 898 vn_lock(*dvp, LK_SHARED | LK_RETRY); 899 goto out; 900 } 901 VOP_UNLOCK(mvp, 0); 902 vn_lock(*dvp, LK_SHARED | LK_RETRY); 903 } 904 i -= dp->d_namlen; 905 906 if (i < 0) { 907 error = ENOMEM; 908 goto out; 909 } 910 if (dp->d_namlen == 1 && dp->d_name[0] == '.') { 911 error = ENOENT; 912 } else { 913 bcopy(dp->d_name, buf + i, dp->d_namlen); 914 error = 0; 915 } 916 goto out; 917 } 918 } while (len > 0 || !eofflag); 919 error = ENOENT; 920 921out: 922 free(dirbuf, M_TEMP); 923 if (!error) { 924 *buflen = i; 925 vref(*dvp); 926 } 927 if (covered) { 928 vput(*dvp); 929 vrele(mvp); 930 } else { 931 VOP_UNLOCK(mvp, 0); 932 vn_close(mvp, FREAD, cred, td); 933 } 934 vn_lock(vp, locked | LK_RETRY); 935 return (error); 936} 937 938int 939vop_stdallocate(struct vop_allocate_args *ap) 940{ 941#ifdef __notyet__ 942 struct statfs *sfs; 943 off_t maxfilesize = 0; 944#endif 945 struct iovec aiov; 946 struct vattr vattr, *vap; 947 struct uio auio; 948 off_t fsize, len, cur, offset; 949 uint8_t *buf; 950 struct thread *td; 951 struct vnode *vp; 952 size_t iosize; 953 int error; 954 955 buf = NULL; 956 error = 0; 957 td = curthread; 958 vap = &vattr; 959 vp = ap->a_vp; 960 len = *ap->a_len; 961 offset = *ap->a_offset; 962 963 error = VOP_GETATTR(vp, vap, td->td_ucred); 964 if (error != 0) 965 goto out; 966 fsize = vap->va_size; 967 iosize = vap->va_blocksize; 968 if (iosize == 0) 969 iosize = BLKDEV_IOSIZE; 970 if (iosize > MAXPHYS) 971 iosize = MAXPHYS; 972 buf = malloc(iosize, M_TEMP, M_WAITOK); 973 974#ifdef __notyet__ 975 /* 976 * Check if the filesystem sets f_maxfilesize; if not use 977 * VOP_SETATTR to perform the check. 978 */ 979 sfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 980 error = VFS_STATFS(vp->v_mount, sfs, td); 981 if (error == 0) 982 maxfilesize = sfs->f_maxfilesize; 983 free(sfs, M_STATFS); 984 if (error != 0) 985 goto out; 986 if (maxfilesize) { 987 if (offset > maxfilesize || len > maxfilesize || 988 offset + len > maxfilesize) { 989 error = EFBIG; 990 goto out; 991 } 992 } else 993#endif 994 if (offset + len > vap->va_size) { 995 /* 996 * Test offset + len against the filesystem's maxfilesize. 997 */ 998 VATTR_NULL(vap); 999 vap->va_size = offset + len; 1000 error = VOP_SETATTR(vp, vap, td->td_ucred); 1001 if (error != 0) 1002 goto out; 1003 VATTR_NULL(vap); 1004 vap->va_size = fsize; 1005 error = VOP_SETATTR(vp, vap, td->td_ucred); 1006 if (error != 0) 1007 goto out; 1008 } 1009 1010 for (;;) { 1011 /* 1012 * Read and write back anything below the nominal file 1013 * size. There's currently no way outside the filesystem 1014 * to know whether this area is sparse or not. 1015 */ 1016 cur = iosize; 1017 if ((offset % iosize) != 0) 1018 cur -= (offset % iosize); 1019 if (cur > len) 1020 cur = len; 1021 if (offset < fsize) { 1022 aiov.iov_base = buf; 1023 aiov.iov_len = cur; 1024 auio.uio_iov = &aiov; 1025 auio.uio_iovcnt = 1; 1026 auio.uio_offset = offset; 1027 auio.uio_resid = cur; 1028 auio.uio_segflg = UIO_SYSSPACE; 1029 auio.uio_rw = UIO_READ; 1030 auio.uio_td = td; 1031 error = VOP_READ(vp, &auio, 0, td->td_ucred); 1032 if (error != 0) 1033 break; 1034 if (auio.uio_resid > 0) { 1035 bzero(buf + cur - auio.uio_resid, 1036 auio.uio_resid); 1037 } 1038 } else { 1039 bzero(buf, cur); 1040 } 1041 1042 aiov.iov_base = buf; 1043 aiov.iov_len = cur; 1044 auio.uio_iov = &aiov; 1045 auio.uio_iovcnt = 1; 1046 auio.uio_offset = offset; 1047 auio.uio_resid = cur; 1048 auio.uio_segflg = UIO_SYSSPACE; 1049 auio.uio_rw = UIO_WRITE; 1050 auio.uio_td = td; 1051 1052 error = VOP_WRITE(vp, &auio, 0, td->td_ucred); 1053 if (error != 0) 1054 break; 1055 1056 len -= cur; 1057 offset += cur; 1058 if (len == 0) 1059 break; 1060 if (should_yield()) 1061 break; 1062 } 1063 1064 out: 1065 *ap->a_len = len; 1066 *ap->a_offset = offset; 1067 free(buf, M_TEMP); 1068 return (error); 1069} 1070 1071int 1072vop_stdadvise(struct vop_advise_args *ap) 1073{ 1074 struct vnode *vp; 1075 struct bufobj *bo; 1076 daddr_t startn, endn; 1077 off_t start, end; 1078 int bsize, error; 1079 1080 vp = ap->a_vp; 1081 switch (ap->a_advice) { 1082 case POSIX_FADV_WILLNEED: 1083 /* 1084 * Do nothing for now. Filesystems should provide a 1085 * custom method which starts an asynchronous read of 1086 * the requested region. 1087 */ 1088 error = 0; 1089 break; 1090 case POSIX_FADV_DONTNEED: 1091 error = 0; 1092 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1093 if (vp->v_iflag & VI_DOOMED) { 1094 VOP_UNLOCK(vp, 0); 1095 break; 1096 } 1097 1098 /* 1099 * Deactivate pages in the specified range from the backing VM 1100 * object. Pages that are resident in the buffer cache will 1101 * remain wired until their corresponding buffers are released 1102 * below. 1103 */ 1104 if (vp->v_object != NULL) { 1105 start = trunc_page(ap->a_start); 1106 end = round_page(ap->a_end); 1107 VM_OBJECT_RLOCK(vp->v_object); 1108 vm_object_page_noreuse(vp->v_object, OFF_TO_IDX(start), 1109 OFF_TO_IDX(end)); 1110 VM_OBJECT_RUNLOCK(vp->v_object); 1111 } 1112 1113 bo = &vp->v_bufobj; 1114 BO_RLOCK(bo); 1115 bsize = vp->v_bufobj.bo_bsize; 1116 startn = ap->a_start / bsize; 1117 endn = ap->a_end / bsize; 1118 error = bnoreuselist(&bo->bo_clean, bo, startn, endn); 1119 if (error == 0) 1120 error = bnoreuselist(&bo->bo_dirty, bo, startn, endn); 1121 BO_RUNLOCK(bo); 1122 VOP_UNLOCK(vp, 0); 1123 break; 1124 default: 1125 error = EINVAL; 1126 break; 1127 } 1128 return (error); 1129} 1130 1131int 1132vop_stdunp_bind(struct vop_unp_bind_args *ap) 1133{ 1134 1135 ap->a_vp->v_socket = ap->a_socket; 1136 return (0); 1137} 1138 1139int 1140vop_stdunp_connect(struct vop_unp_connect_args *ap) 1141{ 1142 1143 *ap->a_socket = ap->a_vp->v_socket; 1144 return (0); 1145} 1146 1147int 1148vop_stdunp_detach(struct vop_unp_detach_args *ap) 1149{ 1150 1151 ap->a_vp->v_socket = NULL; 1152 return (0); 1153} 1154 1155static int 1156vop_stdis_text(struct vop_is_text_args *ap) 1157{ 1158 1159 return ((ap->a_vp->v_vflag & VV_TEXT) != 0); 1160} 1161 1162static int 1163vop_stdset_text(struct vop_set_text_args *ap) 1164{ 1165 1166 ap->a_vp->v_vflag |= VV_TEXT; 1167 return (0); 1168} 1169 1170static int 1171vop_stdunset_text(struct vop_unset_text_args *ap) 1172{ 1173 1174 ap->a_vp->v_vflag &= ~VV_TEXT; 1175 return (0); 1176} 1177 1178static int 1179vop_stdget_writecount(struct vop_get_writecount_args *ap) 1180{ 1181 1182 *ap->a_writecount = ap->a_vp->v_writecount; 1183 return (0); 1184} 1185 1186static int 1187vop_stdadd_writecount(struct vop_add_writecount_args *ap) 1188{ 1189 1190 ap->a_vp->v_writecount += ap->a_inc; 1191 return (0); 1192} 1193 1194/* 1195 * vfs default ops 1196 * used to fill the vfs function table to get reasonable default return values. 1197 */ 1198int 1199vfs_stdroot (mp, flags, vpp) 1200 struct mount *mp; 1201 int flags; 1202 struct vnode **vpp; 1203{ 1204 1205 return (EOPNOTSUPP); 1206} 1207 1208int 1209vfs_stdstatfs (mp, sbp) 1210 struct mount *mp; 1211 struct statfs *sbp; 1212{ 1213 1214 return (EOPNOTSUPP); 1215} 1216 1217int 1218vfs_stdquotactl (mp, cmds, uid, arg) 1219 struct mount *mp; 1220 int cmds; 1221 uid_t uid; 1222 void *arg; 1223{ 1224 1225 return (EOPNOTSUPP); 1226} 1227 1228int 1229vfs_stdsync(mp, waitfor) 1230 struct mount *mp; 1231 int waitfor; 1232{ 1233 struct vnode *vp, *mvp; 1234 struct thread *td; 1235 int error, lockreq, allerror = 0; 1236 1237 td = curthread; 1238 lockreq = LK_EXCLUSIVE | LK_INTERLOCK; 1239 if (waitfor != MNT_WAIT) 1240 lockreq |= LK_NOWAIT; 1241 /* 1242 * Force stale buffer cache information to be flushed. 1243 */ 1244loop: 1245 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 1246 if (vp->v_bufobj.bo_dirty.bv_cnt == 0) { 1247 VI_UNLOCK(vp); 1248 continue; 1249 } 1250 if ((error = vget(vp, lockreq, td)) != 0) { 1251 if (error == ENOENT) { 1252 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 1253 goto loop; 1254 } 1255 continue; 1256 } 1257 error = VOP_FSYNC(vp, waitfor, td); 1258 if (error) 1259 allerror = error; 1260 vput(vp); 1261 } 1262 return (allerror); 1263} 1264 1265int 1266vfs_stdnosync (mp, waitfor) 1267 struct mount *mp; 1268 int waitfor; 1269{ 1270 1271 return (0); 1272} 1273 1274int 1275vfs_stdvget (mp, ino, flags, vpp) 1276 struct mount *mp; 1277 ino_t ino; 1278 int flags; 1279 struct vnode **vpp; 1280{ 1281 1282 return (EOPNOTSUPP); 1283} 1284 1285int 1286vfs_stdfhtovp (mp, fhp, flags, vpp) 1287 struct mount *mp; 1288 struct fid *fhp; 1289 int flags; 1290 struct vnode **vpp; 1291{ 1292 1293 return (EOPNOTSUPP); 1294} 1295 1296int 1297vfs_stdinit (vfsp) 1298 struct vfsconf *vfsp; 1299{ 1300 1301 return (0); 1302} 1303 1304int 1305vfs_stduninit (vfsp) 1306 struct vfsconf *vfsp; 1307{ 1308 1309 return(0); 1310} 1311 1312int 1313vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname) 1314 struct mount *mp; 1315 int cmd; 1316 struct vnode *filename_vp; 1317 int attrnamespace; 1318 const char *attrname; 1319{ 1320 1321 if (filename_vp != NULL) 1322 VOP_UNLOCK(filename_vp, 0); 1323 return (EOPNOTSUPP); 1324} 1325 1326int 1327vfs_stdsysctl(mp, op, req) 1328 struct mount *mp; 1329 fsctlop_t op; 1330 struct sysctl_req *req; 1331{ 1332 1333 return (EOPNOTSUPP); 1334} 1335 1336/* end of vfs default ops */ 1337