1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2014 Integros [integros.com] 26 */ 27 28/* Portions Copyright 2007 Jeremy Teo */ 29/* Portions Copyright 2010 Robert Milkowski */ 30 31#include <sys/types.h> 32#include <sys/param.h> 33#include <sys/time.h> 34#include <sys/systm.h> 35#include <sys/sysmacros.h> 36#include <sys/resource.h> 37#include <sys/vfs.h> 38#include <sys/vm.h> 39#include <sys/vnode.h> 40#include <sys/file.h> 41#include <sys/stat.h> 42#include <sys/kmem.h> 43#include <sys/taskq.h> 44#include <sys/uio.h> 45#include <sys/atomic.h> 46#include <sys/namei.h> 47#include <sys/mman.h> 48#include <sys/cmn_err.h> 49#include <sys/errno.h> 50#include <sys/unistd.h> 51#include <sys/zfs_dir.h> 52#include <sys/zfs_ioctl.h> 53#include <sys/fs/zfs.h> 54#include <sys/dmu.h> 55#include <sys/dmu_objset.h> 56#include <sys/spa.h> 57#include <sys/txg.h> 58#include <sys/dbuf.h> 59#include <sys/zap.h> 60#include <sys/sa.h> 61#include <sys/dirent.h> 62#include <sys/policy.h> 63#include <sys/sunddi.h> 64#include <sys/filio.h> 65#include <sys/sid.h> 66#include <sys/zfs_ctldir.h> 67#include <sys/zfs_fuid.h> 68#include <sys/zfs_sa.h> 69#include <sys/dnlc.h> 70#include <sys/zfs_rlock.h> 71#include <sys/buf.h> 72#include <sys/sched.h> 73#include <sys/acl.h> 74#include <sys/extdirent.h> 75 76#ifdef __FreeBSD__ 77#include <sys/kidmap.h> 78#include <sys/bio.h> 79#include <vm/vm_param.h> 80#endif 81 82#ifdef __NetBSD__ 83#include <dev/mm.h> 84#include <miscfs/fifofs/fifo.h> 85#include <miscfs/genfs/genfs.h> 86#include <miscfs/genfs/genfs_node.h> 87#include <uvm/uvm_extern.h> 88#include <sys/fstrans.h> 89#include <sys/malloc.h> 90 91uint_t zfs_putpage_key; 92#endif 93 94/* 95 * Programming rules. 96 * 97 * Each vnode op performs some logical unit of work. To do this, the ZPL must 98 * properly lock its in-core state, create a DMU transaction, do the work, 99 * record this work in the intent log (ZIL), commit the DMU transaction, 100 * and wait for the intent log to commit if it is a synchronous operation. 101 * Moreover, the vnode ops must work in both normal and log replay context. 102 * The ordering of events is important to avoid deadlocks and references 103 * to freed memory. The example below illustrates the following Big Rules: 104 * 105 * (1) A check must be made in each zfs thread for a mounted file system. 106 * This is done avoiding races using ZFS_ENTER(zfsvfs). 107 * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 108 * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 109 * can return EIO from the calling function. 110 * 111 * (2) VN_RELE() should always be the last thing except for zil_commit() 112 * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 113 * First, if it's the last reference, the vnode/znode 114 * can be freed, so the zp may point to freed memory. Second, the last 115 * reference will call zfs_zinactive(), which may induce a lot of work -- 116 * pushing cached pages (which acquires range locks) and syncing out 117 * cached atime changes. Third, zfs_zinactive() may require a new tx, 118 * which could deadlock the system if you were already holding one. 119 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 120 * 121 * (3) All range locks must be grabbed before calling dmu_tx_assign(), 122 * as they can span dmu_tx_assign() calls. 123 * 124 * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 125 * dmu_tx_assign(). This is critical because we don't want to block 126 * while holding locks. 127 * 128 * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 129 * reduces lock contention and CPU usage when we must wait (note that if 130 * throughput is constrained by the storage, nearly every transaction 131 * must wait). 132 * 133 * Note, in particular, that if a lock is sometimes acquired before 134 * the tx assigns, and sometimes after (e.g. z_lock), then failing 135 * to use a non-blocking assign can deadlock the system. The scenario: 136 * 137 * Thread A has grabbed a lock before calling dmu_tx_assign(). 138 * Thread B is in an already-assigned tx, and blocks for this lock. 139 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 140 * forever, because the previous txg can't quiesce until B's tx commits. 141 * 142 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 143 * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 144 * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT, 145 * to indicate that this operation has already called dmu_tx_wait(). 146 * This will ensure that we don't retry forever, waiting a short bit 147 * each time. 148 * 149 * (5) If the operation succeeded, generate the intent log entry for it 150 * before dropping locks. This ensures that the ordering of events 151 * in the intent log matches the order in which they actually occurred. 152 * During ZIL replay the zfs_log_* functions will update the sequence 153 * number to indicate the zil transaction has replayed. 154 * 155 * (6) At the end of each vnode op, the DMU tx must always commit, 156 * regardless of whether there were any errors. 157 * 158 * (7) After dropping all locks, invoke zil_commit(zilog, foid) 159 * to ensure that synchronous semantics are provided when necessary. 160 * 161 * In general, this is how things should be ordered in each vnode op: 162 * 163 * ZFS_ENTER(zfsvfs); // exit if unmounted 164 * top: 165 * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD()) 166 * rw_enter(...); // grab any other locks you need 167 * tx = dmu_tx_create(...); // get DMU tx 168 * dmu_tx_hold_*(); // hold each object you might modify 169 * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 170 * if (error) { 171 * rw_exit(...); // drop locks 172 * zfs_dirent_unlock(dl); // unlock directory entry 173 * VN_RELE(...); // release held vnodes 174 * if (error == ERESTART) { 175 * waited = B_TRUE; 176 * dmu_tx_wait(tx); 177 * dmu_tx_abort(tx); 178 * goto top; 179 * } 180 * dmu_tx_abort(tx); // abort DMU tx 181 * ZFS_EXIT(zfsvfs); // finished in zfs 182 * return (error); // really out of space 183 * } 184 * error = do_real_work(); // do whatever this VOP does 185 * if (error == 0) 186 * zfs_log_*(...); // on success, make ZIL entry 187 * dmu_tx_commit(tx); // commit DMU tx -- error or not 188 * rw_exit(...); // drop locks 189 * zfs_dirent_unlock(dl); // unlock directory entry 190 * VN_RELE(...); // release held vnodes 191 * zil_commit(zilog, foid); // synchronous when necessary 192 * ZFS_EXIT(zfsvfs); // finished in zfs 193 * return (error); // done, report error 194 */ 195 196/* ARGSUSED */ 197static int 198zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 199{ 200 znode_t *zp = VTOZ(*vpp); 201 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 202 203 ZFS_ENTER(zfsvfs); 204 ZFS_VERIFY_ZP(zp); 205 206 if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 207 ((flag & FAPPEND) == 0)) { 208 ZFS_EXIT(zfsvfs); 209 return (SET_ERROR(EPERM)); 210 } 211 212 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 213 ZTOV(zp)->v_type == VREG && 214 !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 215 if (fs_vscan(*vpp, cr, 0) != 0) { 216 ZFS_EXIT(zfsvfs); 217 return (SET_ERROR(EACCES)); 218 } 219 } 220 221 /* Keep a count of the synchronous opens in the znode */ 222 if (flag & (FSYNC | FDSYNC)) 223 atomic_inc_32(&zp->z_sync_cnt); 224 225 ZFS_EXIT(zfsvfs); 226 return (0); 227} 228 229/* ARGSUSED */ 230static int 231zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 232 caller_context_t *ct) 233{ 234 znode_t *zp = VTOZ(vp); 235 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 236 237 /* 238 * Clean up any locks held by this process on the vp. 239 */ 240 cleanlocks(vp, ddi_get_pid(), 0); 241 cleanshares(vp, ddi_get_pid()); 242 243 ZFS_ENTER(zfsvfs); 244 ZFS_VERIFY_ZP(zp); 245 246 /* Decrement the synchronous opens in the znode */ 247 if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 248 atomic_dec_32(&zp->z_sync_cnt); 249 250 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 251 ZTOV(zp)->v_type == VREG && 252 !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 253 VERIFY(fs_vscan(vp, cr, 1) == 0); 254 255 ZFS_EXIT(zfsvfs); 256 return (0); 257} 258 259/* 260 * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 261 * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 262 */ 263static int 264zfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 265{ 266 znode_t *zp = VTOZ(vp); 267 uint64_t noff = (uint64_t)*off; /* new offset */ 268 uint64_t file_sz; 269 int error; 270 boolean_t hole; 271 272 file_sz = zp->z_size; 273 if (noff >= file_sz) { 274 return (SET_ERROR(ENXIO)); 275 } 276 277 if (cmd == _FIO_SEEK_HOLE) 278 hole = B_TRUE; 279 else 280 hole = B_FALSE; 281 282 error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 283 284 if (error == ESRCH) 285 return (SET_ERROR(ENXIO)); 286 287 /* 288 * We could find a hole that begins after the logical end-of-file, 289 * because dmu_offset_next() only works on whole blocks. If the 290 * EOF falls mid-block, then indicate that the "virtual hole" 291 * at the end of the file begins at the logical EOF, rather than 292 * at the end of the last block. 293 */ 294 if (noff > file_sz) { 295 ASSERT(hole); 296 noff = file_sz; 297 } 298 299 if (noff < *off) 300 return (error); 301 *off = noff; 302 return (error); 303} 304 305/* ARGSUSED */ 306static int 307zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 308 int *rvalp, caller_context_t *ct) 309{ 310 offset_t off; 311 offset_t ndata; 312 dmu_object_info_t doi; 313 int error; 314 zfsvfs_t *zfsvfs; 315 znode_t *zp; 316 317 switch (com) { 318 case _FIOFFS: 319 { 320 return (0); 321 322 /* 323 * The following two ioctls are used by bfu. Faking out, 324 * necessary to avoid bfu errors. 325 */ 326 } 327 case _FIOGDIO: 328 case _FIOSDIO: 329 { 330 return (0); 331 } 332 333 case _FIO_SEEK_DATA: 334 case _FIO_SEEK_HOLE: 335 { 336#ifdef illumos 337 if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 338 return (SET_ERROR(EFAULT)); 339#else 340 off = *(offset_t *)data; 341#endif 342 zp = VTOZ(vp); 343 zfsvfs = zp->z_zfsvfs; 344 ZFS_ENTER(zfsvfs); 345 ZFS_VERIFY_ZP(zp); 346 347 /* offset parameter is in/out */ 348 error = zfs_holey(vp, com, &off); 349 ZFS_EXIT(zfsvfs); 350 if (error) 351 return (error); 352#ifdef illumos 353 if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 354 return (SET_ERROR(EFAULT)); 355#else 356 *(offset_t *)data = off; 357#endif 358 return (0); 359 } 360#ifdef illumos 361 case _FIO_COUNT_FILLED: 362 { 363 /* 364 * _FIO_COUNT_FILLED adds a new ioctl command which 365 * exposes the number of filled blocks in a 366 * ZFS object. 367 */ 368 zp = VTOZ(vp); 369 zfsvfs = zp->z_zfsvfs; 370 ZFS_ENTER(zfsvfs); 371 ZFS_VERIFY_ZP(zp); 372 373 /* 374 * Wait for all dirty blocks for this object 375 * to get synced out to disk, and the DMU info 376 * updated. 377 */ 378 error = dmu_object_wait_synced(zfsvfs->z_os, zp->z_id); 379 if (error) { 380 ZFS_EXIT(zfsvfs); 381 return (error); 382 } 383 384 /* 385 * Retrieve fill count from DMU object. 386 */ 387 error = dmu_object_info(zfsvfs->z_os, zp->z_id, &doi); 388 if (error) { 389 ZFS_EXIT(zfsvfs); 390 return (error); 391 } 392 393 ndata = doi.doi_fill_count; 394 395 ZFS_EXIT(zfsvfs); 396 if (ddi_copyout(&ndata, (void *)data, sizeof (ndata), flag)) 397 return (SET_ERROR(EFAULT)); 398 return (0); 399 } 400#endif 401 } 402 return (SET_ERROR(ENOTTY)); 403} 404 405#ifdef __FreeBSD__ 406static vm_page_t 407page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 408{ 409 vm_object_t obj; 410 vm_page_t pp; 411 int64_t end; 412 413 /* 414 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE 415 * aligned boundaries, if the range is not aligned. As a result a 416 * DEV_BSIZE subrange with partially dirty data may get marked as clean. 417 * It may happen that all DEV_BSIZE subranges are marked clean and thus 418 * the whole page would be considred clean despite have some dirty data. 419 * For this reason we should shrink the range to DEV_BSIZE aligned 420 * boundaries before calling vm_page_clear_dirty. 421 */ 422 end = rounddown2(off + nbytes, DEV_BSIZE); 423 off = roundup2(off, DEV_BSIZE); 424 nbytes = end - off; 425 426 obj = vp->v_object; 427 zfs_vmobject_assert_wlocked(obj); 428 429 for (;;) { 430 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 431 pp->valid) { 432 if (vm_page_xbusied(pp)) { 433 /* 434 * Reference the page before unlocking and 435 * sleeping so that the page daemon is less 436 * likely to reclaim it. 437 */ 438 vm_page_reference(pp); 439 vm_page_lock(pp); 440 zfs_vmobject_wunlock(obj); 441 vm_page_busy_sleep(pp, "zfsmwb", true); 442 zfs_vmobject_wlock(obj); 443 continue; 444 } 445 vm_page_sbusy(pp); 446 } else if (pp != NULL) { 447 ASSERT(!pp->valid); 448 pp = NULL; 449 } 450 451 if (pp != NULL) { 452 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 453 vm_object_pip_add(obj, 1); 454 pmap_remove_write(pp); 455 if (nbytes != 0) 456 vm_page_clear_dirty(pp, off, nbytes); 457 } 458 break; 459 } 460 return (pp); 461} 462 463static void 464page_unbusy(vm_page_t pp) 465{ 466 467 vm_page_sunbusy(pp); 468 vm_object_pip_subtract(pp->object, 1); 469} 470 471static vm_page_t 472page_hold(vnode_t *vp, int64_t start) 473{ 474 vm_object_t obj; 475 vm_page_t pp; 476 477 obj = vp->v_object; 478 zfs_vmobject_assert_wlocked(obj); 479 480 for (;;) { 481 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 482 pp->valid) { 483 if (vm_page_xbusied(pp)) { 484 /* 485 * Reference the page before unlocking and 486 * sleeping so that the page daemon is less 487 * likely to reclaim it. 488 */ 489 vm_page_reference(pp); 490 vm_page_lock(pp); 491 zfs_vmobject_wunlock(obj); 492 vm_page_busy_sleep(pp, "zfsmwb", true); 493 zfs_vmobject_wlock(obj); 494 continue; 495 } 496 497 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 498 vm_page_lock(pp); 499 vm_page_hold(pp); 500 vm_page_unlock(pp); 501 502 } else 503 pp = NULL; 504 break; 505 } 506 return (pp); 507} 508 509static void 510page_unhold(vm_page_t pp) 511{ 512 513 vm_page_lock(pp); 514 vm_page_unhold(pp); 515 vm_page_unlock(pp); 516} 517 518/* 519 * When a file is memory mapped, we must keep the IO data synchronized 520 * between the DMU cache and the memory mapped pages. What this means: 521 * 522 * On Write: If we find a memory mapped page, we write to *both* 523 * the page and the dmu buffer. 524 */ 525static void 526update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 527 int segflg, dmu_tx_t *tx) 528{ 529 vm_object_t obj; 530 struct sf_buf *sf; 531 caddr_t va; 532 int off; 533 534 ASSERT(segflg != UIO_NOCOPY); 535 ASSERT(vp->v_mount != NULL); 536 obj = vp->v_object; 537 ASSERT(obj != NULL); 538 539 off = start & PAGEOFFSET; 540 zfs_vmobject_wlock(obj); 541 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 542 vm_page_t pp; 543 int nbytes = imin(PAGESIZE - off, len); 544 545 if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 546 zfs_vmobject_wunlock(obj); 547 548 va = zfs_map_page(pp, &sf); 549 (void) dmu_read(os, oid, start+off, nbytes, 550 va+off, DMU_READ_PREFETCH);; 551 zfs_unmap_page(sf); 552 553 zfs_vmobject_wlock(obj); 554 page_unbusy(pp); 555 } 556 len -= nbytes; 557 off = 0; 558 } 559 vm_object_pip_wakeupn(obj, 0); 560 zfs_vmobject_wunlock(obj); 561} 562 563/* 564 * Read with UIO_NOCOPY flag means that sendfile(2) requests 565 * ZFS to populate a range of page cache pages with data. 566 * 567 * NOTE: this function could be optimized to pre-allocate 568 * all pages in advance, drain exclusive busy on all of them, 569 * map them into contiguous KVA region and populate them 570 * in one single dmu_read() call. 571 */ 572static int 573mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) 574{ 575 znode_t *zp = VTOZ(vp); 576 objset_t *os = zp->z_zfsvfs->z_os; 577 struct sf_buf *sf; 578 vm_object_t obj; 579 vm_page_t pp; 580 int64_t start; 581 caddr_t va; 582 int len = nbytes; 583 int off; 584 int error = 0; 585 586 ASSERT(uio->uio_segflg == UIO_NOCOPY); 587 ASSERT(vp->v_mount != NULL); 588 obj = vp->v_object; 589 ASSERT(obj != NULL); 590 ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); 591 592 zfs_vmobject_wlock(obj); 593 for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { 594 int bytes = MIN(PAGESIZE, len); 595 596 pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY | 597 VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 598 if (pp->valid == 0) { 599 zfs_vmobject_wunlock(obj); 600 va = zfs_map_page(pp, &sf); 601 error = dmu_read(os, zp->z_id, start, bytes, va, 602 DMU_READ_PREFETCH); 603 if (bytes != PAGESIZE && error == 0) 604 bzero(va + bytes, PAGESIZE - bytes); 605 zfs_unmap_page(sf); 606 zfs_vmobject_wlock(obj); 607 vm_page_sunbusy(pp); 608 vm_page_lock(pp); 609 if (error) { 610 if (pp->wire_count == 0 && pp->valid == 0 && 611 !vm_page_busied(pp)) 612 vm_page_free(pp); 613 } else { 614 pp->valid = VM_PAGE_BITS_ALL; 615 vm_page_activate(pp); 616 } 617 vm_page_unlock(pp); 618 } else { 619 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 620 vm_page_sunbusy(pp); 621 } 622 if (error) 623 break; 624 uio->uio_resid -= bytes; 625 uio->uio_offset += bytes; 626 len -= bytes; 627 } 628 zfs_vmobject_wunlock(obj); 629 return (error); 630} 631 632/* 633 * When a file is memory mapped, we must keep the IO data synchronized 634 * between the DMU cache and the memory mapped pages. What this means: 635 * 636 * On Read: We "read" preferentially from memory mapped pages, 637 * else we default from the dmu buffer. 638 * 639 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 640 * the file is memory mapped. 641 */ 642static int 643mappedread(vnode_t *vp, int nbytes, uio_t *uio) 644{ 645 znode_t *zp = VTOZ(vp); 646 vm_object_t obj; 647 int64_t start; 648 caddr_t va; 649 int len = nbytes; 650 int off; 651 int error = 0; 652 653 ASSERT(vp->v_mount != NULL); 654 obj = vp->v_object; 655 ASSERT(obj != NULL); 656 657 start = uio->uio_loffset; 658 off = start & PAGEOFFSET; 659 zfs_vmobject_wlock(obj); 660 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 661 vm_page_t pp; 662 uint64_t bytes = MIN(PAGESIZE - off, len); 663 664 if (pp = page_hold(vp, start)) { 665 struct sf_buf *sf; 666 caddr_t va; 667 668 zfs_vmobject_wunlock(obj); 669 va = zfs_map_page(pp, &sf); 670#ifdef illumos 671 error = uiomove(va + off, bytes, UIO_READ, uio); 672#else 673 error = vn_io_fault_uiomove(va + off, bytes, uio); 674#endif 675 zfs_unmap_page(sf); 676 zfs_vmobject_wlock(obj); 677 page_unhold(pp); 678 } else { 679 zfs_vmobject_wunlock(obj); 680 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 681 uio, bytes); 682 zfs_vmobject_wlock(obj); 683 } 684 len -= bytes; 685 off = 0; 686 if (error) 687 break; 688 } 689 zfs_vmobject_wunlock(obj); 690 return (error); 691} 692#endif /* __FreeBSD__ */ 693 694#ifdef __NetBSD__ 695 696caddr_t 697zfs_map_page(page_t *pp, enum seg_rw rw) 698{ 699 vaddr_t va; 700 int flags; 701 702#ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS 703 if (mm_md_direct_mapped_phys(VM_PAGE_TO_PHYS(pp), &va)) 704 return (caddr_t)va; 705#endif 706 707 flags = UVMPAGER_MAPIN_WAITOK | 708 (rw == S_READ ? UVMPAGER_MAPIN_WRITE : UVMPAGER_MAPIN_READ); 709 va = uvm_pagermapin(&pp, 1, flags); 710 return (caddr_t)va; 711} 712 713void 714zfs_unmap_page(page_t *pp, caddr_t addr) 715{ 716 717#ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS 718 vaddr_t va; 719 720 if (mm_md_direct_mapped_phys(VM_PAGE_TO_PHYS(pp), &va)) 721 return; 722#endif 723 uvm_pagermapout((vaddr_t)addr, 1); 724} 725 726static int 727mappedread(vnode_t *vp, int nbytes, uio_t *uio) 728{ 729 znode_t *zp = VTOZ(vp); 730 struct uvm_object *uobj = &vp->v_uobj; 731 krwlock_t *rw = uobj->vmobjlock; 732 int64_t start; 733 caddr_t va; 734 size_t len = nbytes; 735 int off; 736 int error = 0; 737 int npages, found; 738 739 start = uio->uio_loffset; 740 off = start & PAGEOFFSET; 741 742 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 743 page_t *pp; 744 uint64_t bytes = MIN(PAGESIZE - off, len); 745 746 pp = NULL; 747 npages = 1; 748 rw_enter(rw, RW_WRITER); 749 found = uvn_findpages(uobj, start, &npages, &pp, NULL, 750 UFP_NOALLOC); 751 rw_exit(rw); 752 753 /* XXXNETBSD shouldn't access userspace with the page busy */ 754 if (found) { 755 va = zfs_map_page(pp, S_READ); 756 error = uiomove(va + off, bytes, UIO_READ, uio); 757 zfs_unmap_page(pp, va); 758 rw_enter(rw, RW_WRITER); 759 uvm_page_unbusy(&pp, 1); 760 rw_exit(rw); 761 } else { 762 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 763 uio, bytes); 764 } 765 766 len -= bytes; 767 off = 0; 768 if (error) 769 break; 770 } 771 return (error); 772} 773 774static void 775update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 776 int segflg, dmu_tx_t *tx) 777{ 778 struct uvm_object *uobj = &vp->v_uobj; 779 krwlock_t *rw = uobj->vmobjlock; 780 caddr_t va; 781 int off, status; 782 783 ASSERT(vp->v_mount != NULL); 784 785 rw_enter(rw, RW_WRITER); 786 787 off = start & PAGEOFFSET; 788 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 789 page_t *pp; 790 int nbytes = MIN(PAGESIZE - off, len); 791 int npages, found; 792 793 pp = NULL; 794 npages = 1; 795 found = uvn_findpages(uobj, start, &npages, &pp, NULL, 796 UFP_NOALLOC); 797 if (found) { 798 /* 799 * We're about to zap the page's contents and don't 800 * care about any existing modifications. We must 801 * keep track of any new modifications past this 802 * point. Clear the modified bit in the pmap, and 803 * if the page is marked dirty revert to tracking 804 * the modified bit. 805 */ 806 switch (uvm_pagegetdirty(pp)) { 807 case UVM_PAGE_STATUS_DIRTY: 808 /* Does pmap_clear_modify(). */ 809 uvm_pagemarkdirty(pp, UVM_PAGE_STATUS_UNKNOWN); 810 break; 811 case UVM_PAGE_STATUS_UNKNOWN: 812 pmap_clear_modify(pp); 813 break; 814 case UVM_PAGE_STATUS_CLEAN: 815 /* Nothing to do. */ 816 break; 817 } 818 rw_exit(rw); 819 820 va = zfs_map_page(pp, S_WRITE); 821 (void) dmu_read(os, oid, start + off, nbytes, 822 va + off, DMU_READ_PREFETCH); 823 zfs_unmap_page(pp, va); 824 825 rw_enter(rw, RW_WRITER); 826 uvm_page_unbusy(&pp, 1); 827 } 828 len -= nbytes; 829 off = 0; 830 } 831 rw_exit(rw); 832} 833#endif /* __NetBSD__ */ 834 835offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 836 837/* 838 * Read bytes from specified file into supplied buffer. 839 * 840 * IN: vp - vnode of file to be read from. 841 * uio - structure supplying read location, range info, 842 * and return buffer. 843 * ioflag - SYNC flags; used to provide FRSYNC semantics. 844 * cr - credentials of caller. 845 * ct - caller context 846 * 847 * OUT: uio - updated offset and range, buffer filled. 848 * 849 * RETURN: 0 on success, error code on failure. 850 * 851 * Side Effects: 852 * vp - atime updated if byte count > 0 853 */ 854/* ARGSUSED */ 855static int 856zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 857{ 858 znode_t *zp = VTOZ(vp); 859 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 860 ssize_t n, nbytes; 861 int error = 0; 862 rl_t *rl; 863 xuio_t *xuio = NULL; 864 865 ZFS_ENTER(zfsvfs); 866 ZFS_VERIFY_ZP(zp); 867 868 if (zp->z_pflags & ZFS_AV_QUARANTINED) { 869 ZFS_EXIT(zfsvfs); 870 return (SET_ERROR(EACCES)); 871 } 872 873 /* 874 * Validate file offset 875 */ 876 if (uio->uio_loffset < (offset_t)0) { 877 ZFS_EXIT(zfsvfs); 878 return (SET_ERROR(EINVAL)); 879 } 880 881 /* 882 * Fasttrack empty reads 883 */ 884 if (uio->uio_resid == 0) { 885 ZFS_EXIT(zfsvfs); 886 return (0); 887 } 888 889 /* 890 * Check for mandatory locks 891 */ 892 if (MANDMODE(zp->z_mode)) { 893 if (error = chklock(vp, FREAD, 894 uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 895 ZFS_EXIT(zfsvfs); 896 return (error); 897 } 898 } 899 900 /* 901 * If we're in FRSYNC mode, sync out this znode before reading it. 902 */ 903 if (zfsvfs->z_log && 904 (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) 905 zil_commit(zfsvfs->z_log, zp->z_id); 906 907 /* 908 * Lock the range against changes. 909 */ 910 rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 911 912 /* 913 * If we are reading past end-of-file we can skip 914 * to the end; but we might still need to set atime. 915 */ 916 if (uio->uio_loffset >= zp->z_size) { 917 error = 0; 918 goto out; 919 } 920 921 ASSERT(uio->uio_loffset < zp->z_size); 922 n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 923 924#ifdef illumos 925 if ((uio->uio_extflg == UIO_XUIO) && 926 (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 927 int nblk; 928 int blksz = zp->z_blksz; 929 uint64_t offset = uio->uio_loffset; 930 931 xuio = (xuio_t *)uio; 932 if ((ISP2(blksz))) { 933 nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 934 blksz)) / blksz; 935 } else { 936 ASSERT(offset + n <= blksz); 937 nblk = 1; 938 } 939 (void) dmu_xuio_init(xuio, nblk); 940 941 if (vn_has_cached_data(vp)) { 942 /* 943 * For simplicity, we always allocate a full buffer 944 * even if we only expect to read a portion of a block. 945 */ 946 while (--nblk >= 0) { 947 (void) dmu_xuio_add(xuio, 948 dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 949 blksz), 0, blksz); 950 } 951 } 952 } 953#endif /* illumos */ 954 955 while (n > 0) { 956 nbytes = MIN(n, zfs_read_chunk_size - 957 P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 958 959#ifdef __FreeBSD__ 960 if (uio->uio_segflg == UIO_NOCOPY) 961 error = mappedread_sf(vp, nbytes, uio); 962 else 963#endif /* __FreeBSD__ */ 964 if (vn_has_cached_data(vp)) { 965 error = mappedread(vp, nbytes, uio); 966 } else { 967 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 968 uio, nbytes); 969 } 970 if (error) { 971 /* convert checksum errors into IO errors */ 972 if (error == ECKSUM) 973 error = SET_ERROR(EIO); 974 break; 975 } 976 977 n -= nbytes; 978 } 979out: 980 zfs_range_unlock(rl); 981 982 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 983 ZFS_EXIT(zfsvfs); 984 return (error); 985} 986 987/* 988 * Write the bytes to a file. 989 * 990 * IN: vp - vnode of file to be written to. 991 * uio - structure supplying write location, range info, 992 * and data buffer. 993 * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 994 * set if in append mode. 995 * cr - credentials of caller. 996 * ct - caller context (NFS/CIFS fem monitor only) 997 * 998 * OUT: uio - updated offset and range. 999 * 1000 * RETURN: 0 on success, error code on failure. 1001 * 1002 * Timestamps: 1003 * vp - ctime|mtime updated if byte count > 0 1004 */ 1005 1006/* ARGSUSED */ 1007static int 1008zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 1009{ 1010 znode_t *zp = VTOZ(vp); 1011 rlim64_t limit = MAXOFFSET_T; 1012 ssize_t start_resid = uio->uio_resid; 1013 ssize_t tx_bytes; 1014 uint64_t end_size; 1015 dmu_tx_t *tx; 1016 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1017 zilog_t *zilog; 1018 offset_t woff; 1019 ssize_t n, nbytes; 1020 rl_t *rl; 1021 int max_blksz = zfsvfs->z_max_blksz; 1022 int error = 0; 1023 arc_buf_t *abuf; 1024 iovec_t *aiov = NULL; 1025 xuio_t *xuio = NULL; 1026 int i_iov = 0; 1027 int iovcnt = uio->uio_iovcnt; 1028 iovec_t *iovp = uio->uio_iov; 1029 int write_eof; 1030 int count = 0; 1031 sa_bulk_attr_t bulk[4]; 1032 uint64_t mtime[2], ctime[2]; 1033 int segflg; 1034 1035#ifdef __NetBSD__ 1036 segflg = VMSPACE_IS_KERNEL_P(uio->uio_vmspace) ? 1037 UIO_SYSSPACE : UIO_USERSPACE; 1038#else 1039 segflg = uio->uio_segflg; 1040#endif 1041 1042 /* 1043 * Fasttrack empty write 1044 */ 1045 n = start_resid; 1046 if (n == 0) 1047 return (0); 1048 1049 if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 1050 limit = MAXOFFSET_T; 1051 1052 ZFS_ENTER(zfsvfs); 1053 ZFS_VERIFY_ZP(zp); 1054 1055 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 1056 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 1057 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 1058 &zp->z_size, 8); 1059 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 1060 &zp->z_pflags, 8); 1061 1062 /* 1063 * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 1064 * callers might not be able to detect properly that we are read-only, 1065 * so check it explicitly here. 1066 */ 1067 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 1068 ZFS_EXIT(zfsvfs); 1069 return (SET_ERROR(EROFS)); 1070 } 1071 1072 /* 1073 * If immutable or not appending then return EPERM 1074 */ 1075 if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 1076 ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 1077 (uio->uio_loffset < zp->z_size))) { 1078 ZFS_EXIT(zfsvfs); 1079 return (SET_ERROR(EPERM)); 1080 } 1081 1082 zilog = zfsvfs->z_log; 1083 1084 /* 1085 * Validate file offset 1086 */ 1087 woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 1088 if (woff < 0) { 1089 ZFS_EXIT(zfsvfs); 1090 return (SET_ERROR(EINVAL)); 1091 } 1092 1093 /* 1094 * Check for mandatory locks before calling zfs_range_lock() 1095 * in order to prevent a deadlock with locks set via fcntl(). 1096 */ 1097 if (MANDMODE((mode_t)zp->z_mode) && 1098 (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 1099 ZFS_EXIT(zfsvfs); 1100 return (error); 1101 } 1102 1103#ifdef illumos 1104 /* 1105 * Pre-fault the pages to ensure slow (eg NFS) pages 1106 * don't hold up txg. 1107 * Skip this if uio contains loaned arc_buf. 1108 */ 1109 if ((uio->uio_extflg == UIO_XUIO) && 1110 (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 1111 xuio = (xuio_t *)uio; 1112 else 1113 uio_prefaultpages(MIN(n, max_blksz), uio); 1114#endif 1115 1116 /* 1117 * If in append mode, set the io offset pointer to eof. 1118 */ 1119 if (ioflag & FAPPEND) { 1120 /* 1121 * Obtain an appending range lock to guarantee file append 1122 * semantics. We reset the write offset once we have the lock. 1123 */ 1124 rl = zfs_range_lock(zp, 0, n, RL_APPEND); 1125 woff = rl->r_off; 1126 if (rl->r_len == UINT64_MAX) { 1127 /* 1128 * We overlocked the file because this write will cause 1129 * the file block size to increase. 1130 * Note that zp_size cannot change with this lock held. 1131 */ 1132 woff = zp->z_size; 1133 } 1134 uio->uio_loffset = woff; 1135 } else { 1136 /* 1137 * Note that if the file block size will change as a result of 1138 * this write, then this range lock will lock the entire file 1139 * so that we can re-write the block safely. 1140 */ 1141 rl = zfs_range_lock(zp, woff, n, RL_WRITER); 1142 } 1143 1144#ifdef illumos 1145 if (woff >= limit) { 1146 zfs_range_unlock(rl); 1147 ZFS_EXIT(zfsvfs); 1148 return (SET_ERROR(EFBIG)); 1149 } 1150 1151#endif 1152#ifdef __FreeBSD__ 1153 if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { 1154 zfs_range_unlock(rl); 1155 ZFS_EXIT(zfsvfs); 1156 return (SET_ERROR(EFBIG)); 1157 } 1158#endif 1159#ifdef __NetBSD__ 1160 /* XXXNETBSD we might need vn_rlimit_fsize() too here eventually */ 1161#endif 1162 1163 if ((woff + n) > limit || woff > (limit - n)) 1164 n = limit - woff; 1165 1166 /* Will this write extend the file length? */ 1167 write_eof = (woff + n > zp->z_size); 1168 1169 end_size = MAX(zp->z_size, woff + n); 1170 1171 /* 1172 * Write the file in reasonable size chunks. Each chunk is written 1173 * in a separate transaction; this keeps the intent log records small 1174 * and allows us to do more fine-grained space accounting. 1175 */ 1176 while (n > 0) { 1177 abuf = NULL; 1178 woff = uio->uio_loffset; 1179 if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 1180 zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 1181 if (abuf != NULL) 1182 dmu_return_arcbuf(abuf); 1183 error = SET_ERROR(EDQUOT); 1184 break; 1185 } 1186 1187 if (xuio && abuf == NULL) { 1188 ASSERT(i_iov < iovcnt); 1189 aiov = &iovp[i_iov]; 1190 abuf = dmu_xuio_arcbuf(xuio, i_iov); 1191 dmu_xuio_clear(xuio, i_iov); 1192 DTRACE_PROBE3(zfs_cp_write, int, i_iov, 1193 iovec_t *, aiov, arc_buf_t *, abuf); 1194 ASSERT((aiov->iov_base == abuf->b_data) || 1195 ((char *)aiov->iov_base - (char *)abuf->b_data + 1196 aiov->iov_len == arc_buf_size(abuf))); 1197 i_iov++; 1198 } else if (abuf == NULL && n >= max_blksz && 1199 woff >= zp->z_size && 1200 P2PHASE(woff, max_blksz) == 0 && 1201 zp->z_blksz == max_blksz) { 1202 /* 1203 * This write covers a full block. "Borrow" a buffer 1204 * from the dmu so that we can fill it before we enter 1205 * a transaction. This avoids the possibility of 1206 * holding up the transaction if the data copy hangs 1207 * up on a pagefault (e.g., from an NFS server mapping). 1208 */ 1209#if defined(illumos) || defined(__NetBSD__) 1210 size_t cbytes; 1211#endif 1212 1213 abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 1214 max_blksz); 1215 ASSERT(abuf != NULL); 1216 ASSERT(arc_buf_size(abuf) == max_blksz); 1217#if defined(illumos) || defined(__NetBSD__) 1218 if (error = uiocopy(abuf->b_data, max_blksz, 1219 UIO_WRITE, uio, &cbytes)) { 1220 dmu_return_arcbuf(abuf); 1221 break; 1222 } 1223 ASSERT(cbytes == max_blksz); 1224#endif 1225#ifdef __FreeBSD__ 1226 ssize_t resid = uio->uio_resid; 1227 1228 error = vn_io_fault_uiomove(abuf->b_data, max_blksz, uio); 1229 if (error != 0) { 1230 uio->uio_offset -= resid - uio->uio_resid; 1231 uio->uio_resid = resid; 1232 dmu_return_arcbuf(abuf); 1233 break; 1234 } 1235#endif 1236 } 1237 1238 /* 1239 * Start a transaction. 1240 */ 1241 tx = dmu_tx_create(zfsvfs->z_os); 1242 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1243 dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 1244 zfs_sa_upgrade_txholds(tx, zp); 1245 error = dmu_tx_assign(tx, TXG_WAIT); 1246 if (error) { 1247 dmu_tx_abort(tx); 1248 if (abuf != NULL) 1249 dmu_return_arcbuf(abuf); 1250 break; 1251 } 1252 1253 /* 1254 * If zfs_range_lock() over-locked we grow the blocksize 1255 * and then reduce the lock range. This will only happen 1256 * on the first iteration since zfs_range_reduce() will 1257 * shrink down r_len to the appropriate size. 1258 */ 1259 if (rl->r_len == UINT64_MAX) { 1260 uint64_t new_blksz; 1261 1262 if (zp->z_blksz > max_blksz) { 1263 /* 1264 * File's blocksize is already larger than the 1265 * "recordsize" property. Only let it grow to 1266 * the next power of 2. 1267 */ 1268 ASSERT(!ISP2(zp->z_blksz)); 1269 new_blksz = MIN(end_size, 1270 1 << highbit64(zp->z_blksz)); 1271 } else { 1272 new_blksz = MIN(end_size, max_blksz); 1273 } 1274 zfs_grow_blocksize(zp, new_blksz, tx); 1275 zfs_range_reduce(rl, woff, n); 1276 } 1277 1278 /* 1279 * XXX - should we really limit each write to z_max_blksz? 1280 * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 1281 */ 1282 nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 1283 1284 if (woff + nbytes > zp->z_size) 1285 vnode_pager_setsize(vp, woff + nbytes); 1286 1287 if (abuf == NULL) { 1288 tx_bytes = uio->uio_resid; 1289 error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 1290 uio, nbytes, tx); 1291 tx_bytes -= uio->uio_resid; 1292 } else { 1293 tx_bytes = nbytes; 1294 ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 1295 /* 1296 * If this is not a full block write, but we are 1297 * extending the file past EOF and this data starts 1298 * block-aligned, use assign_arcbuf(). Otherwise, 1299 * write via dmu_write(). 1300 */ 1301 if (tx_bytes < max_blksz && (!write_eof || 1302 aiov->iov_base != abuf->b_data)) { 1303 ASSERT(xuio); 1304 dmu_write(zfsvfs->z_os, zp->z_id, woff, 1305 aiov->iov_len, aiov->iov_base, tx); 1306 dmu_return_arcbuf(abuf); 1307 xuio_stat_wbuf_copied(); 1308 } else { 1309 ASSERT(xuio || tx_bytes == max_blksz); 1310 dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 1311 woff, abuf, tx); 1312 } 1313#if defined(illumos) || defined(__NetBSD__) 1314 ASSERT(tx_bytes <= uio->uio_resid); 1315 uioskip(uio, tx_bytes); 1316#endif 1317 } 1318 if (tx_bytes && vn_has_cached_data(vp)) { 1319 update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 1320 zp->z_id, segflg, tx); 1321 } 1322 1323 /* 1324 * If we made no progress, we're done. If we made even 1325 * partial progress, update the znode and ZIL accordingly. 1326 */ 1327 if (tx_bytes == 0) { 1328 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 1329 (void *)&zp->z_size, sizeof (uint64_t), tx); 1330 dmu_tx_commit(tx); 1331 ASSERT(error != 0); 1332 break; 1333 } 1334 1335 /* 1336 * Clear Set-UID/Set-GID bits on successful write if not 1337 * privileged and at least one of the excute bits is set. 1338 * 1339 * It would be nice to to this after all writes have 1340 * been done, but that would still expose the ISUID/ISGID 1341 * to another app after the partial write is committed. 1342 * 1343 * Note: we don't call zfs_fuid_map_id() here because 1344 * user 0 is not an ephemeral uid. 1345 */ 1346 mutex_enter(&zp->z_acl_lock); 1347 if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 1348 (S_IXUSR >> 6))) != 0 && 1349 (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 1350 secpolicy_vnode_setid_retain(vp, cr, 1351 (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 1352 uint64_t newmode; 1353 zp->z_mode &= ~(S_ISUID | S_ISGID); 1354 newmode = zp->z_mode; 1355 (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 1356 (void *)&newmode, sizeof (uint64_t), tx); 1357#ifdef __NetBSD__ 1358 cache_enter_id(vp, zp->z_mode, zp->z_uid, zp->z_gid, 1359 true); 1360#endif 1361 } 1362 mutex_exit(&zp->z_acl_lock); 1363 1364 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 1365 B_TRUE); 1366 1367 /* 1368 * Update the file size (zp_size) if it has changed; 1369 * account for possible concurrent updates. 1370 */ 1371 while ((end_size = zp->z_size) < uio->uio_loffset) { 1372 (void) atomic_cas_64(&zp->z_size, end_size, 1373 uio->uio_loffset); 1374#ifdef illumos 1375 ASSERT(error == 0); 1376#else 1377 ASSERT(error == 0 || error == EFAULT); 1378#endif 1379 } 1380 /* 1381 * If we are replaying and eof is non zero then force 1382 * the file size to the specified eof. Note, there's no 1383 * concurrency during replay. 1384 */ 1385 if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 1386 zp->z_size = zfsvfs->z_replay_eof; 1387 1388 if (error == 0) 1389 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1390 else 1391 (void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1392 1393 zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 1394 dmu_tx_commit(tx); 1395 1396 if (error != 0) 1397 break; 1398 ASSERT(tx_bytes == nbytes); 1399 n -= nbytes; 1400 1401#ifdef illumos 1402 if (!xuio && n > 0) 1403 uio_prefaultpages(MIN(n, max_blksz), uio); 1404#endif 1405 } 1406 1407 zfs_range_unlock(rl); 1408 1409 /* 1410 * If we're in replay mode, or we made no progress, return error. 1411 * Otherwise, it's at least a partial write, so it's successful. 1412 */ 1413 if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1414 ZFS_EXIT(zfsvfs); 1415 return (error); 1416 } 1417 1418#ifdef __FreeBSD__ 1419 /* 1420 * EFAULT means that at least one page of the source buffer was not 1421 * available. VFS will re-try remaining I/O upon this error. 1422 */ 1423 if (error == EFAULT) { 1424 ZFS_EXIT(zfsvfs); 1425 return (error); 1426 } 1427#endif 1428 1429 if (ioflag & (FSYNC | FDSYNC) || 1430 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1431 zil_commit(zilog, zp->z_id); 1432 1433 ZFS_EXIT(zfsvfs); 1434 return (0); 1435} 1436 1437void 1438zfs_get_done(zgd_t *zgd, int error) 1439{ 1440 znode_t *zp = zgd->zgd_private; 1441 objset_t *os = zp->z_zfsvfs->z_os; 1442 1443 if (zgd->zgd_db) 1444 dmu_buf_rele(zgd->zgd_db, zgd); 1445 1446 zfs_range_unlock(zgd->zgd_rl); 1447 1448 /* 1449 * Release the vnode asynchronously as we currently have the 1450 * txg stopped from syncing. 1451 */ 1452 VN_RELE_CLEANER(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1453 1454 if (error == 0 && zgd->zgd_bp) 1455 zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1456 1457 kmem_free(zgd, sizeof (zgd_t)); 1458} 1459 1460#ifdef DEBUG 1461static int zil_fault_io = 0; 1462#endif 1463 1464/* 1465 * Get data to generate a TX_WRITE intent log record. 1466 */ 1467int 1468zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1469{ 1470 zfsvfs_t *zfsvfs = arg; 1471 objset_t *os = zfsvfs->z_os; 1472 znode_t *zp; 1473 uint64_t object = lr->lr_foid; 1474 uint64_t offset = lr->lr_offset; 1475 uint64_t size = lr->lr_length; 1476 blkptr_t *bp = &lr->lr_blkptr; 1477 dmu_buf_t *db; 1478 zgd_t *zgd; 1479 int error = 0; 1480 1481 ASSERT(zio != NULL); 1482 ASSERT(size != 0); 1483 1484 /* 1485 * Nothing to do if the file has been removed 1486 */ 1487 if (zfs_zget_cleaner(zfsvfs, object, &zp) != 0) 1488 return (SET_ERROR(ENOENT)); 1489 if (zp->z_unlinked) { 1490 /* 1491 * Release the vnode asynchronously as we currently have the 1492 * txg stopped from syncing. 1493 */ 1494 VN_RELE_CLEANER(ZTOV(zp), 1495 dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1496 return (SET_ERROR(ENOENT)); 1497 } 1498 1499 zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1500 zgd->zgd_zilog = zfsvfs->z_log; 1501 zgd->zgd_private = zp; 1502 1503 /* 1504 * Write records come in two flavors: immediate and indirect. 1505 * For small writes it's cheaper to store the data with the 1506 * log record (immediate); for large writes it's cheaper to 1507 * sync the data and get a pointer to it (indirect) so that 1508 * we don't have to write the data twice. 1509 */ 1510 if (buf != NULL) { /* immediate write */ 1511 zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1512 /* test for truncation needs to be done while range locked */ 1513 if (offset >= zp->z_size) { 1514 error = SET_ERROR(ENOENT); 1515 } else { 1516 error = dmu_read(os, object, offset, size, buf, 1517 DMU_READ_NO_PREFETCH); 1518 } 1519 ASSERT(error == 0 || error == ENOENT); 1520 } else { /* indirect write */ 1521 /* 1522 * Have to lock the whole block to ensure when it's 1523 * written out and it's checksum is being calculated 1524 * that no one can change the data. We need to re-check 1525 * blocksize after we get the lock in case it's changed! 1526 */ 1527 for (;;) { 1528 uint64_t blkoff; 1529 size = zp->z_blksz; 1530 blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1531 offset -= blkoff; 1532 zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1533 RL_READER); 1534 if (zp->z_blksz == size) 1535 break; 1536 offset += blkoff; 1537 zfs_range_unlock(zgd->zgd_rl); 1538 } 1539 /* test for truncation needs to be done while range locked */ 1540 if (lr->lr_offset >= zp->z_size) 1541 error = SET_ERROR(ENOENT); 1542#ifdef DEBUG 1543 if (zil_fault_io) { 1544 error = SET_ERROR(EIO); 1545 zil_fault_io = 0; 1546 } 1547#endif 1548 if (error == 0) 1549 error = dmu_buf_hold(os, object, offset, zgd, &db, 1550 DMU_READ_NO_PREFETCH); 1551 1552 if (error == 0) { 1553 blkptr_t *obp = dmu_buf_get_blkptr(db); 1554 if (obp) { 1555 ASSERT(BP_IS_HOLE(bp)); 1556 *bp = *obp; 1557 } 1558 1559 zgd->zgd_db = db; 1560 zgd->zgd_bp = bp; 1561 1562 ASSERT(db->db_offset == offset); 1563 ASSERT(db->db_size == size); 1564 1565 error = dmu_sync(zio, lr->lr_common.lrc_txg, 1566 zfs_get_done, zgd); 1567 ASSERT(error || lr->lr_length <= zp->z_blksz); 1568 1569 /* 1570 * On success, we need to wait for the write I/O 1571 * initiated by dmu_sync() to complete before we can 1572 * release this dbuf. We will finish everything up 1573 * in the zfs_get_done() callback. 1574 */ 1575 if (error == 0) 1576 return (0); 1577 1578 if (error == EALREADY) { 1579 lr->lr_common.lrc_txtype = TX_WRITE2; 1580 error = 0; 1581 } 1582 } 1583 } 1584 1585 zfs_get_done(zgd, error); 1586 1587 return (error); 1588} 1589 1590/*ARGSUSED*/ 1591static int 1592zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1593 caller_context_t *ct) 1594{ 1595 znode_t *zp = VTOZ(vp); 1596 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1597 int error; 1598 1599 ZFS_ENTER(zfsvfs); 1600 ZFS_VERIFY_ZP(zp); 1601 1602 if (flag & V_ACE_MASK) 1603 error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1604 else 1605 error = zfs_zaccess_rwx(zp, mode, flag, cr); 1606 1607 ZFS_EXIT(zfsvfs); 1608 return (error); 1609} 1610 1611#ifdef __FreeBSD__ 1612static int 1613zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) 1614{ 1615 int error; 1616 1617 *vpp = arg; 1618 error = vn_lock(*vpp, lkflags); 1619 if (error != 0) 1620 vrele(*vpp); 1621 return (error); 1622} 1623 1624static int 1625zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags) 1626{ 1627 znode_t *zdp = VTOZ(dvp); 1628 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1629 int error; 1630 int ltype; 1631 1632 ASSERT_VOP_LOCKED(dvp, __func__); 1633#ifdef DIAGNOSTIC 1634 if ((zdp->z_pflags & ZFS_XATTR) == 0) 1635 VERIFY(!RRM_LOCK_HELD(&zfsvfs->z_teardown_lock)); 1636#endif 1637 1638 if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 1639 ASSERT3P(dvp, ==, vp); 1640 vref(dvp); 1641 ltype = lkflags & LK_TYPE_MASK; 1642 if (ltype != VOP_ISLOCKED(dvp)) { 1643 if (ltype == LK_EXCLUSIVE) 1644 vn_lock(dvp, LK_UPGRADE | LK_RETRY); 1645 else /* if (ltype == LK_SHARED) */ 1646 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); 1647 1648 /* 1649 * Relock for the "." case could leave us with 1650 * reclaimed vnode. 1651 */ 1652 if (dvp->v_iflag & VI_DOOMED) { 1653 vrele(dvp); 1654 return (SET_ERROR(ENOENT)); 1655 } 1656 } 1657 return (0); 1658 } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 1659 /* 1660 * Note that in this case, dvp is the child vnode, and we 1661 * are looking up the parent vnode - exactly reverse from 1662 * normal operation. Unlocking dvp requires some rather 1663 * tricky unlock/relock dance to prevent mp from being freed; 1664 * use vn_vget_ino_gen() which takes care of all that. 1665 * 1666 * XXX Note that there is a time window when both vnodes are 1667 * unlocked. It is possible, although highly unlikely, that 1668 * during that window the parent-child relationship between 1669 * the vnodes may change, for example, get reversed. 1670 * In that case we would have a wrong lock order for the vnodes. 1671 * All other filesystems seem to ignore this problem, so we 1672 * do the same here. 1673 * A potential solution could be implemented as follows: 1674 * - using LK_NOWAIT when locking the second vnode and retrying 1675 * if necessary 1676 * - checking that the parent-child relationship still holds 1677 * after locking both vnodes and retrying if it doesn't 1678 */ 1679 error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp); 1680 return (error); 1681 } else { 1682 error = vn_lock(vp, lkflags); 1683 if (error != 0) 1684 vrele(vp); 1685 return (error); 1686 } 1687} 1688 1689/* 1690 * Lookup an entry in a directory, or an extended attribute directory. 1691 * If it exists, return a held vnode reference for it. 1692 * 1693 * IN: dvp - vnode of directory to search. 1694 * nm - name of entry to lookup. 1695 * pnp - full pathname to lookup [UNUSED]. 1696 * flags - LOOKUP_XATTR set if looking for an attribute. 1697 * rdir - root directory vnode [UNUSED]. 1698 * cr - credentials of caller. 1699 * ct - caller context 1700 * 1701 * OUT: vpp - vnode of located entry, NULL if not found. 1702 * 1703 * RETURN: 0 on success, error code on failure. 1704 * 1705 * Timestamps: 1706 * NA 1707 */ 1708/* ARGSUSED */ 1709static int 1710zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1711 int nameiop, cred_t *cr, kthread_t *td, int flags) 1712{ 1713 znode_t *zdp = VTOZ(dvp); 1714 znode_t *zp; 1715 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1716 int error = 0; 1717 1718 /* fast path (should be redundant with vfs namecache) */ 1719 if (!(flags & LOOKUP_XATTR)) { 1720 if (dvp->v_type != VDIR) { 1721 return (SET_ERROR(ENOTDIR)); 1722 } else if (zdp->z_sa_hdl == NULL) { 1723 return (SET_ERROR(EIO)); 1724 } 1725 } 1726 1727 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1728 1729 ZFS_ENTER(zfsvfs); 1730 ZFS_VERIFY_ZP(zdp); 1731 1732 *vpp = NULL; 1733 1734 if (flags & LOOKUP_XATTR) { 1735#ifdef TODO 1736 /* 1737 * If the xattr property is off, refuse the lookup request. 1738 */ 1739 if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1740 ZFS_EXIT(zfsvfs); 1741 return (SET_ERROR(EINVAL)); 1742 } 1743#endif 1744 1745 /* 1746 * We don't allow recursive attributes.. 1747 * Maybe someday we will. 1748 */ 1749 if (zdp->z_pflags & ZFS_XATTR) { 1750 ZFS_EXIT(zfsvfs); 1751 return (SET_ERROR(EINVAL)); 1752 } 1753 1754 if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1755 ZFS_EXIT(zfsvfs); 1756 return (error); 1757 } 1758 1759 /* 1760 * Do we have permission to get into attribute directory? 1761 */ 1762 if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1763 B_FALSE, cr)) { 1764 vrele(*vpp); 1765 *vpp = NULL; 1766 } 1767 1768 ZFS_EXIT(zfsvfs); 1769 return (error); 1770 } 1771 1772 /* 1773 * Check accessibility of directory. 1774 */ 1775 if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1776 ZFS_EXIT(zfsvfs); 1777 return (error); 1778 } 1779 1780 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1781 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1782 ZFS_EXIT(zfsvfs); 1783 return (SET_ERROR(EILSEQ)); 1784 } 1785 1786 1787 /* 1788 * First handle the special cases. 1789 */ 1790 if ((cnp->cn_flags & ISDOTDOT) != 0) { 1791 /* 1792 * If we are a snapshot mounted under .zfs, return 1793 * the vp for the snapshot directory. 1794 */ 1795 if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 1796 struct componentname cn; 1797 vnode_t *zfsctl_vp; 1798 int ltype; 1799 1800 ZFS_EXIT(zfsvfs); 1801 ltype = VOP_ISLOCKED(dvp); 1802 VOP_UNLOCK(dvp, 0); 1803 error = zfsctl_root(zfsvfs->z_parent, LK_SHARED, 1804 &zfsctl_vp); 1805 if (error == 0) { 1806 cn.cn_nameptr = "snapshot"; 1807 cn.cn_namelen = strlen(cn.cn_nameptr); 1808 cn.cn_nameiop = cnp->cn_nameiop; 1809 cn.cn_flags = cnp->cn_flags; 1810 cn.cn_lkflags = cnp->cn_lkflags; 1811 error = VOP_LOOKUP(zfsctl_vp, vpp, &cn); 1812 vput(zfsctl_vp); 1813 } 1814 vn_lock(dvp, ltype | LK_RETRY); 1815 return (error); 1816 } 1817 } 1818 if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { 1819 ZFS_EXIT(zfsvfs); 1820 if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) 1821 return (SET_ERROR(ENOTSUP)); 1822 error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp); 1823 return (error); 1824 } 1825 1826 /* 1827 * The loop is retry the lookup if the parent-child relationship 1828 * changes during the dot-dot locking complexities. 1829 */ 1830 for (;;) { 1831 uint64_t parent; 1832 1833 error = zfs_dirlook(zdp, nm, &zp); 1834 if (error == 0) 1835 *vpp = ZTOV(zp); 1836 1837 ZFS_EXIT(zfsvfs); 1838 if (error != 0) 1839 break; 1840 1841 error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags); 1842 if (error != 0) { 1843 /* 1844 * If we've got a locking error, then the vnode 1845 * got reclaimed because of a force unmount. 1846 * We never enter doomed vnodes into the name cache. 1847 */ 1848 *vpp = NULL; 1849 return (error); 1850 } 1851 1852 if ((cnp->cn_flags & ISDOTDOT) == 0) 1853 break; 1854 1855 ZFS_ENTER(zfsvfs); 1856 if (zdp->z_sa_hdl == NULL) { 1857 error = SET_ERROR(EIO); 1858 } else { 1859 error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 1860 &parent, sizeof (parent)); 1861 } 1862 if (error != 0) { 1863 ZFS_EXIT(zfsvfs); 1864 vput(ZTOV(zp)); 1865 break; 1866 } 1867 if (zp->z_id == parent) { 1868 ZFS_EXIT(zfsvfs); 1869 break; 1870 } 1871 vput(ZTOV(zp)); 1872 } 1873 1874out: 1875 if (error != 0) 1876 *vpp = NULL; 1877 1878 /* Translate errors and add SAVENAME when needed. */ 1879 if (cnp->cn_flags & ISLASTCN) { 1880 switch (nameiop) { 1881 case CREATE: 1882 case RENAME: 1883 if (error == ENOENT) { 1884 error = EJUSTRETURN; 1885 cnp->cn_flags |= SAVENAME; 1886 break; 1887 } 1888 /* FALLTHROUGH */ 1889 case DELETE: 1890 if (error == 0) 1891 cnp->cn_flags |= SAVENAME; 1892 break; 1893 } 1894 } 1895 1896 /* Insert name into cache (as non-existent) if appropriate. */ 1897 if (zfsvfs->z_use_namecache && 1898 error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0) 1899 cache_enter(dvp, NULL, cnp); 1900 1901 /* Insert name into cache if appropriate. */ 1902 if (zfsvfs->z_use_namecache && 1903 error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1904 if (!(cnp->cn_flags & ISLASTCN) || 1905 (nameiop != DELETE && nameiop != RENAME)) { 1906 cache_enter(dvp, *vpp, cnp); 1907 } 1908 } 1909 1910 return (error); 1911} 1912#endif /* __FreeBSD__ */ 1913 1914#ifdef __NetBSD__ 1915/* 1916 * If vnode is for a device return a specfs vnode instead. 1917 */ 1918static int 1919specvp_check(vnode_t **vpp, cred_t *cr) 1920{ 1921 int error = 0; 1922 1923 if (IS_DEVVP(*vpp)) { 1924 struct vnode *svp; 1925 1926 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1927 VN_RELE(*vpp); 1928 if (svp == NULL) 1929 error = ENOSYS; 1930 *vpp = svp; 1931 } 1932 return (error); 1933} 1934 1935/* 1936 * Lookup an entry in a directory, or an extended attribute directory. 1937 * If it exists, return a held vnode reference for it. 1938 * 1939 * IN: dvp - vnode of directory to search. 1940 * nm - name of entry to lookup. 1941 * pnp - full pathname to lookup [UNUSED]. 1942 * flags - LOOKUP_XATTR set if looking for an attribute. 1943 * rdir - root directory vnode [UNUSED]. 1944 * cr - credentials of caller. 1945 * ct - caller context 1946 * direntflags - directory lookup flags 1947 * realpnp - returned pathname. 1948 * 1949 * OUT: vpp - vnode of located entry, NULL if not found. 1950 * 1951 * RETURN: 0 if success 1952 * error code if failure 1953 * 1954 * Timestamps: 1955 * NA 1956 */ 1957/* ARGSUSED */ 1958static int 1959zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, int flags, 1960 struct componentname *cnp, int nameiop, cred_t *cr) 1961{ 1962 znode_t *zdp = VTOZ(dvp); 1963 znode_t *zp; 1964 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1965 int error = 0; 1966 1967 /* fast path */ 1968 if (!(flags & LOOKUP_XATTR)) { 1969 if (dvp->v_type != VDIR) { 1970 return (ENOTDIR); 1971 } else if (zdp->z_sa_hdl == NULL) { 1972 return (SET_ERROR(EIO)); 1973 } 1974 1975 if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1976 error = zfs_fastaccesschk_execute(zdp, cr); 1977 if (!error) { 1978 *vpp = dvp; 1979 VN_HOLD(*vpp); 1980 return (0); 1981 } 1982 return (error); 1983 } else { 1984 vnode_t *tvp = dnlc_lookup(dvp, nm); 1985 1986 if (tvp) { 1987 error = zfs_fastaccesschk_execute(zdp, cr); 1988 if (error) { 1989 VN_RELE(tvp); 1990 return (error); 1991 } 1992 if (tvp == DNLC_NO_VNODE) { 1993 VN_RELE(tvp); 1994 return (ENOENT); 1995 } else { 1996 *vpp = tvp; 1997 return (specvp_check(vpp, cr)); 1998 } 1999 } 2000 } 2001 } 2002 2003 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 2004 2005 ZFS_ENTER(zfsvfs); 2006 ZFS_VERIFY_ZP(zdp); 2007 2008 *vpp = NULL; 2009 2010 if (flags & LOOKUP_XATTR) { 2011#ifdef TODO 2012 /* 2013 * If the xattr property is off, refuse the lookup request. 2014 */ 2015 if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 2016 ZFS_EXIT(zfsvfs); 2017 return (EINVAL); 2018 } 2019#endif 2020 2021 /* 2022 * We don't allow recursive attributes.. 2023 * Maybe someday we will. 2024 */ 2025 if (zdp->z_pflags & ZFS_XATTR) { 2026 ZFS_EXIT(zfsvfs); 2027 return (EINVAL); 2028 } 2029 2030 if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 2031 ZFS_EXIT(zfsvfs); 2032 return (error); 2033 } 2034 2035 /* 2036 * Do we have permission to get into attribute directory? 2037 */ 2038 if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 2039 B_FALSE, cr)) { 2040 VN_RELE(*vpp); 2041 *vpp = NULL; 2042 } 2043 2044 ZFS_EXIT(zfsvfs); 2045 return (error); 2046 } 2047 2048 if (dvp->v_type != VDIR) { 2049 ZFS_EXIT(zfsvfs); 2050 return (ENOTDIR); 2051 } 2052 2053 /* 2054 * Check accessibility of directory. 2055 */ 2056 if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 2057 ZFS_EXIT(zfsvfs); 2058 return (error); 2059 } 2060 2061 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 2062 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2063 ZFS_EXIT(zfsvfs); 2064 return (EILSEQ); 2065 } 2066 2067 /* 2068 * First handle the special cases. 2069 */ 2070 if ((cnp->cn_flags & ISDOTDOT) != 0) { 2071 /* 2072 * If we are a snapshot mounted under .zfs, return 2073 * the vp for the snapshot directory. 2074 */ 2075 if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 2076 ZFS_EXIT(zfsvfs); 2077 error = zfsctl_snapshot(zfsvfs->z_parent, vpp); 2078 2079 return (error); 2080 } 2081 } 2082 if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { 2083 ZFS_EXIT(zfsvfs); 2084 if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) 2085 return (SET_ERROR(ENOTSUP)); 2086 error = zfsctl_root(zfsvfs, vpp); 2087 return (error); 2088 } 2089 2090 error = zfs_dirlook(zdp, nm, &zp); 2091 if (error == 0) { 2092 *vpp = ZTOV(zp); 2093 error = specvp_check(vpp, cr); 2094 } 2095 2096 ZFS_EXIT(zfsvfs); 2097 return (error); 2098} 2099#endif 2100 2101/* 2102 * Attempt to create a new entry in a directory. If the entry 2103 * already exists, truncate the file if permissible, else return 2104 * an error. Return the vp of the created or trunc'd file. 2105 * 2106 * IN: dvp - vnode of directory to put new file entry in. 2107 * name - name of new file entry. 2108 * vap - attributes of new file. 2109 * excl - flag indicating exclusive or non-exclusive mode. 2110 * mode - mode to open file with. 2111 * cr - credentials of caller. 2112 * flag - large file flag [UNUSED]. 2113 * ct - caller context 2114 * vsecp - ACL to be set 2115 * 2116 * OUT: vpp - vnode of created or trunc'd entry. 2117 * 2118 * RETURN: 0 on success, error code on failure. 2119 * 2120 * Timestamps: 2121 * dvp - ctime|mtime updated if new entry created 2122 * vp - ctime|mtime always, atime if new 2123 */ 2124 2125/* ARGSUSED */ 2126static int 2127zfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 2128 vnode_t **vpp, cred_t *cr, kthread_t *td) 2129{ 2130 znode_t *zp, *dzp = VTOZ(dvp); 2131 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2132 zilog_t *zilog; 2133 objset_t *os; 2134 dmu_tx_t *tx; 2135 int error; 2136 ksid_t *ksid; 2137 uid_t uid; 2138 gid_t gid = crgetgid(cr); 2139 zfs_acl_ids_t acl_ids; 2140 boolean_t fuid_dirtied; 2141 void *vsecp = NULL; 2142 int flag = 0; 2143 uint64_t txtype; 2144 2145 /* 2146 * If we have an ephemeral id, ACL, or XVATTR then 2147 * make sure file system is at proper version 2148 */ 2149 2150 ksid = crgetsid(cr, KSID_OWNER); 2151 if (ksid) 2152 uid = ksid_getid(ksid); 2153 else 2154 uid = crgetuid(cr); 2155 2156 if (zfsvfs->z_use_fuids == B_FALSE && 2157 (vsecp || (vap->va_mask & AT_XVATTR) || 2158 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2159 return (SET_ERROR(EINVAL)); 2160 2161 ZFS_ENTER(zfsvfs); 2162 ZFS_VERIFY_ZP(dzp); 2163 os = zfsvfs->z_os; 2164 zilog = zfsvfs->z_log; 2165 2166 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 2167 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2168 ZFS_EXIT(zfsvfs); 2169 return (SET_ERROR(EILSEQ)); 2170 } 2171 2172 if (vap->va_mask & AT_XVATTR) { 2173 if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2174 crgetuid(cr), cr, vap->va_type)) != 0) { 2175 ZFS_EXIT(zfsvfs); 2176 return (error); 2177 } 2178 } 2179 2180 *vpp = NULL; 2181 2182 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 2183 vap->va_mode &= ~S_ISVTX; 2184 2185 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 2186 if (error) { 2187 ZFS_EXIT(zfsvfs); 2188 return (error); 2189 } 2190 ASSERT3P(zp, ==, NULL); 2191 2192 /* 2193 * Create a new file object and update the directory 2194 * to reference it. 2195 */ 2196 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 2197 goto out; 2198 } 2199 2200 /* 2201 * We only support the creation of regular files in 2202 * extended attribute directories. 2203 */ 2204 2205 if ((dzp->z_pflags & ZFS_XATTR) && 2206 (vap->va_type != VREG)) { 2207 error = SET_ERROR(EINVAL); 2208 goto out; 2209 } 2210 2211 if ((error = zfs_acl_ids_create(dzp, 0, vap, 2212 cr, vsecp, &acl_ids)) != 0) 2213 goto out; 2214 2215 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2216 zfs_acl_ids_free(&acl_ids); 2217 error = SET_ERROR(EDQUOT); 2218 goto out; 2219 } 2220 2221 getnewvnode_reserve(1); 2222 2223 tx = dmu_tx_create(os); 2224 2225 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2226 ZFS_SA_BASE_ATTR_SIZE); 2227 2228 fuid_dirtied = zfsvfs->z_fuid_dirty; 2229 if (fuid_dirtied) 2230 zfs_fuid_txhold(zfsvfs, tx); 2231 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 2232 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 2233 if (!zfsvfs->z_use_sa && 2234 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2235 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2236 0, acl_ids.z_aclp->z_acl_bytes); 2237 } 2238 error = dmu_tx_assign(tx, TXG_WAIT); 2239 if (error) { 2240 zfs_acl_ids_free(&acl_ids); 2241 dmu_tx_abort(tx); 2242 getnewvnode_drop_reserve(); 2243 ZFS_EXIT(zfsvfs); 2244 return (error); 2245 } 2246 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2247 2248 if (fuid_dirtied) 2249 zfs_fuid_sync(zfsvfs, tx); 2250 2251 (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 2252 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 2253 zfs_log_create(zilog, tx, txtype, dzp, zp, name, 2254 vsecp, acl_ids.z_fuidp, vap); 2255 zfs_acl_ids_free(&acl_ids); 2256 dmu_tx_commit(tx); 2257 2258 getnewvnode_drop_reserve(); 2259 2260out: 2261 if (error == 0) { 2262 *vpp = ZTOV(zp); 2263 } 2264 2265 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2266 zil_commit(zilog, 0); 2267 2268 ZFS_EXIT(zfsvfs); 2269 return (error); 2270} 2271 2272/* 2273 * Remove an entry from a directory. 2274 * 2275 * IN: dvp - vnode of directory to remove entry from. 2276 * name - name of entry to remove. 2277 * cr - credentials of caller. 2278 * ct - caller context 2279 * flags - case flags 2280 * 2281 * RETURN: 0 on success, error code on failure. 2282 * 2283 * Timestamps: 2284 * dvp - ctime|mtime 2285 * vp - ctime (if nlink > 0) 2286 */ 2287 2288/*ARGSUSED*/ 2289static int 2290zfs_remove(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr) 2291{ 2292 znode_t *dzp = VTOZ(dvp); 2293 znode_t *zp = VTOZ(vp); 2294 znode_t *xzp; 2295 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2296 zilog_t *zilog; 2297 uint64_t acl_obj, xattr_obj; 2298 uint64_t obj = 0; 2299 dmu_tx_t *tx; 2300 boolean_t unlinked, toobig = FALSE; 2301 uint64_t txtype; 2302 int error; 2303 2304 ZFS_ENTER(zfsvfs); 2305 ZFS_VERIFY_ZP(dzp); 2306 ZFS_VERIFY_ZP(zp); 2307 zilog = zfsvfs->z_log; 2308 zp = VTOZ(vp); 2309 2310 xattr_obj = 0; 2311 xzp = NULL; 2312 2313 if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2314 goto out; 2315 } 2316 2317 /* 2318 * Need to use rmdir for removing directories. 2319 */ 2320 if (vp->v_type == VDIR) { 2321 error = SET_ERROR(EPERM); 2322 goto out; 2323 } 2324 2325 vnevent_remove(vp, dvp, name, ct); 2326 2327 obj = zp->z_id; 2328 2329 /* are there any extended attributes? */ 2330 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 2331 &xattr_obj, sizeof (xattr_obj)); 2332 if (error == 0 && xattr_obj) { 2333 error = zfs_zget(zfsvfs, xattr_obj, &xzp); 2334 ASSERT0(error); 2335 } 2336 2337 /* 2338 * We may delete the znode now, or we may put it in the unlinked set; 2339 * it depends on whether we're the last link, and on whether there are 2340 * other holds on the vnode. So we dmu_tx_hold() the right things to 2341 * allow for either case. 2342 */ 2343 tx = dmu_tx_create(zfsvfs->z_os); 2344 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2345 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2346 zfs_sa_upgrade_txholds(tx, zp); 2347 zfs_sa_upgrade_txholds(tx, dzp); 2348 2349 if (xzp) { 2350 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 2351 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 2352 } 2353 2354 /* charge as an update -- would be nice not to charge at all */ 2355 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2356 2357 /* 2358 * Mark this transaction as typically resulting in a net free of space 2359 */ 2360 dmu_tx_mark_netfree(tx); 2361 2362 error = dmu_tx_assign(tx, TXG_WAIT); 2363 if (error) { 2364 dmu_tx_abort(tx); 2365 ZFS_EXIT(zfsvfs); 2366 return (error); 2367 } 2368 2369 /* 2370 * Remove the directory entry. 2371 */ 2372 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked); 2373 2374 if (error) { 2375 dmu_tx_commit(tx); 2376 goto out; 2377 } 2378 2379 if (unlinked) { 2380 zfs_unlinked_add(zp, tx); 2381 vp->v_vflag |= VV_NOSYNC; 2382 } 2383 2384 txtype = TX_REMOVE; 2385 zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 2386 2387 dmu_tx_commit(tx); 2388out: 2389 2390 if (xzp) 2391 vrele(ZTOV(xzp)); 2392 2393 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2394 zil_commit(zilog, 0); 2395 2396 ZFS_EXIT(zfsvfs); 2397 return (error); 2398} 2399 2400/* 2401 * Create a new directory and insert it into dvp using the name 2402 * provided. Return a pointer to the inserted directory. 2403 * 2404 * IN: dvp - vnode of directory to add subdir to. 2405 * dirname - name of new directory. 2406 * vap - attributes of new directory. 2407 * cr - credentials of caller. 2408 * ct - caller context 2409 * flags - case flags 2410 * vsecp - ACL to be set 2411 * 2412 * OUT: vpp - vnode of created directory. 2413 * 2414 * RETURN: 0 on success, error code on failure. 2415 * 2416 * Timestamps: 2417 * dvp - ctime|mtime updated 2418 * vp - ctime|mtime|atime updated 2419 */ 2420/*ARGSUSED*/ 2421static int 2422zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr) 2423{ 2424 znode_t *zp, *dzp = VTOZ(dvp); 2425 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2426 zilog_t *zilog; 2427 uint64_t txtype; 2428 dmu_tx_t *tx; 2429 int error; 2430 ksid_t *ksid; 2431 uid_t uid; 2432 gid_t gid = crgetgid(cr); 2433 zfs_acl_ids_t acl_ids; 2434 boolean_t fuid_dirtied; 2435 2436 ASSERT(vap->va_type == VDIR); 2437 2438 /* 2439 * If we have an ephemeral id, ACL, or XVATTR then 2440 * make sure file system is at proper version 2441 */ 2442 2443 ksid = crgetsid(cr, KSID_OWNER); 2444 if (ksid) 2445 uid = ksid_getid(ksid); 2446 else 2447 uid = crgetuid(cr); 2448 if (zfsvfs->z_use_fuids == B_FALSE && 2449 ((vap->va_mask & AT_XVATTR) || 2450 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2451 return (SET_ERROR(EINVAL)); 2452 2453 ZFS_ENTER(zfsvfs); 2454 ZFS_VERIFY_ZP(dzp); 2455 zilog = zfsvfs->z_log; 2456 2457 if (dzp->z_pflags & ZFS_XATTR) { 2458 ZFS_EXIT(zfsvfs); 2459 return (SET_ERROR(EINVAL)); 2460 } 2461 2462 if (zfsvfs->z_utf8 && u8_validate(dirname, 2463 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2464 ZFS_EXIT(zfsvfs); 2465 return (SET_ERROR(EILSEQ)); 2466 } 2467 2468 if (vap->va_mask & AT_XVATTR) { 2469 if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2470 crgetuid(cr), cr, vap->va_type)) != 0) { 2471 ZFS_EXIT(zfsvfs); 2472 return (error); 2473 } 2474 } 2475 2476 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 2477 NULL, &acl_ids)) != 0) { 2478 ZFS_EXIT(zfsvfs); 2479 return (error); 2480 } 2481 2482 /* 2483 * First make sure the new directory doesn't exist. 2484 * 2485 * Existence is checked first to make sure we don't return 2486 * EACCES instead of EEXIST which can cause some applications 2487 * to fail. 2488 */ 2489 *vpp = NULL; 2490 2491 if (error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW)) { 2492 zfs_acl_ids_free(&acl_ids); 2493 ZFS_EXIT(zfsvfs); 2494 return (error); 2495 } 2496 ASSERT3P(zp, ==, NULL); 2497 2498 if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 2499 zfs_acl_ids_free(&acl_ids); 2500 ZFS_EXIT(zfsvfs); 2501 return (error); 2502 } 2503 2504 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2505 zfs_acl_ids_free(&acl_ids); 2506 ZFS_EXIT(zfsvfs); 2507 return (SET_ERROR(EDQUOT)); 2508 } 2509 2510 /* 2511 * Add a new entry to the directory. 2512 */ 2513 getnewvnode_reserve(1); 2514 tx = dmu_tx_create(zfsvfs->z_os); 2515 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 2516 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2517 fuid_dirtied = zfsvfs->z_fuid_dirty; 2518 if (fuid_dirtied) 2519 zfs_fuid_txhold(zfsvfs, tx); 2520 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2521 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2522 acl_ids.z_aclp->z_acl_bytes); 2523 } 2524 2525 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2526 ZFS_SA_BASE_ATTR_SIZE); 2527 2528 error = dmu_tx_assign(tx, TXG_WAIT); 2529 if (error) { 2530 zfs_acl_ids_free(&acl_ids); 2531 dmu_tx_abort(tx); 2532 getnewvnode_drop_reserve(); 2533 ZFS_EXIT(zfsvfs); 2534 return (error); 2535 } 2536 2537 /* 2538 * Create new node. 2539 */ 2540 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2541 2542 if (fuid_dirtied) 2543 zfs_fuid_sync(zfsvfs, tx); 2544 2545 /* 2546 * Now put new name in parent dir. 2547 */ 2548 (void) zfs_link_create(dzp, dirname, zp, tx, ZNEW); 2549 2550 *vpp = ZTOV(zp); 2551 2552 txtype = zfs_log_create_txtype(Z_DIR, NULL, vap); 2553 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL, 2554 acl_ids.z_fuidp, vap); 2555 2556 zfs_acl_ids_free(&acl_ids); 2557 2558 dmu_tx_commit(tx); 2559 2560 getnewvnode_drop_reserve(); 2561 2562 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2563 zil_commit(zilog, 0); 2564 2565 ZFS_EXIT(zfsvfs); 2566 return (0); 2567} 2568 2569/* 2570 * Remove a directory subdir entry. If the current working 2571 * directory is the same as the subdir to be removed, the 2572 * remove will fail. 2573 * 2574 * IN: dvp - vnode of directory to remove from. 2575 * name - name of directory to be removed. 2576 * cwd - vnode of current working directory. 2577 * cr - credentials of caller. 2578 * ct - caller context 2579 * flags - case flags 2580 * 2581 * RETURN: 0 on success, error code on failure. 2582 * 2583 * Timestamps: 2584 * dvp - ctime|mtime updated 2585 */ 2586/*ARGSUSED*/ 2587static int 2588zfs_rmdir(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr) 2589{ 2590 znode_t *dzp = VTOZ(dvp); 2591 znode_t *zp = VTOZ(vp); 2592 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2593 zilog_t *zilog; 2594 dmu_tx_t *tx; 2595 int error; 2596 2597 ZFS_ENTER(zfsvfs); 2598 ZFS_VERIFY_ZP(dzp); 2599 ZFS_VERIFY_ZP(zp); 2600 zilog = zfsvfs->z_log; 2601 2602 2603 if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2604 goto out; 2605 } 2606 2607 if (vp->v_type != VDIR) { 2608 error = SET_ERROR(ENOTDIR); 2609 goto out; 2610 } 2611 2612 vnevent_rmdir(vp, dvp, name, ct); 2613 2614 tx = dmu_tx_create(zfsvfs->z_os); 2615 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2616 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2617 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2618 zfs_sa_upgrade_txholds(tx, zp); 2619 zfs_sa_upgrade_txholds(tx, dzp); 2620 dmu_tx_mark_netfree(tx); 2621 error = dmu_tx_assign(tx, TXG_WAIT); 2622 if (error) { 2623 dmu_tx_abort(tx); 2624 ZFS_EXIT(zfsvfs); 2625 return (error); 2626 } 2627 2628 cache_purge(dvp); 2629 2630 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL); 2631 2632 if (error == 0) { 2633 uint64_t txtype = TX_RMDIR; 2634 zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2635 } 2636 2637 dmu_tx_commit(tx); 2638 2639 cache_purge(vp); 2640out: 2641 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2642 zil_commit(zilog, 0); 2643 2644 ZFS_EXIT(zfsvfs); 2645 return (error); 2646} 2647 2648/* 2649 * Read as many directory entries as will fit into the provided 2650 * buffer from the given directory cursor position (specified in 2651 * the uio structure). 2652 * 2653 * IN: vp - vnode of directory to read. 2654 * uio - structure supplying read location, range info, 2655 * and return buffer. 2656 * cr - credentials of caller. 2657 * ct - caller context 2658 * flags - case flags 2659 * 2660 * OUT: uio - updated offset and range, buffer filled. 2661 * eofp - set to true if end-of-file detected. 2662 * 2663 * RETURN: 0 on success, error code on failure. 2664 * 2665 * Timestamps: 2666 * vp - atime updated 2667 * 2668 * Note that the low 4 bits of the cookie returned by zap is always zero. 2669 * This allows us to use the low range for "special" directory entries: 2670 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2671 * we use the offset 2 for the '.zfs' directory. 2672 */ 2673/* ARGSUSED */ 2674static int 2675zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, off_t **cookies) 2676{ 2677 znode_t *zp = VTOZ(vp); 2678 iovec_t *iovp; 2679 edirent_t *eodp; 2680 dirent64_t *odp; 2681 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2682 objset_t *os; 2683 caddr_t outbuf; 2684 size_t bufsize; 2685 zap_cursor_t zc; 2686 zap_attribute_t zap; 2687 uint_t bytes_wanted; 2688 uint64_t offset; /* must be unsigned; checks for < 1 */ 2689 uint64_t parent; 2690 int local_eof; 2691 int outcount; 2692 int error; 2693 uint8_t prefetch; 2694 boolean_t check_sysattrs; 2695 uint8_t type; 2696 int ncooks = 0; 2697 off_t *cooks = NULL; 2698 int flags = 0; 2699#ifdef __FreeBSD__ 2700 boolean_t user = uio->uio_segflg != UIO_SYSSPACE; 2701#endif 2702#ifdef __NetBSD__ 2703 boolean_t user = !VMSPACE_IS_KERNEL_P(uio->uio_vmspace); 2704#endif 2705 2706 ZFS_ENTER(zfsvfs); 2707 ZFS_VERIFY_ZP(zp); 2708 2709 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 2710 &parent, sizeof (parent))) != 0) { 2711 ZFS_EXIT(zfsvfs); 2712 return (error); 2713 } 2714 2715 /* 2716 * If we are not given an eof variable, 2717 * use a local one. 2718 */ 2719 if (eofp == NULL) 2720 eofp = &local_eof; 2721 2722 /* 2723 * Check for valid iov_len. 2724 */ 2725 if (uio->uio_iov->iov_len <= 0) { 2726 ZFS_EXIT(zfsvfs); 2727 return (SET_ERROR(EINVAL)); 2728 } 2729 2730 /* 2731 * Quit if directory has been removed (posix) 2732 */ 2733 if ((*eofp = zp->z_unlinked) != 0) { 2734 ZFS_EXIT(zfsvfs); 2735 return (0); 2736 } 2737 2738 error = 0; 2739 os = zfsvfs->z_os; 2740 offset = uio->uio_loffset; 2741 prefetch = zp->z_zn_prefetch; 2742 2743 /* 2744 * Initialize the iterator cursor. 2745 */ 2746 if (offset <= 3) { 2747 /* 2748 * Start iteration from the beginning of the directory. 2749 */ 2750 zap_cursor_init(&zc, os, zp->z_id); 2751 } else { 2752 /* 2753 * The offset is a serialized cursor. 2754 */ 2755 zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2756 } 2757 2758 /* 2759 * Get space to change directory entries into fs independent format. 2760 */ 2761 iovp = uio->uio_iov; 2762 bytes_wanted = iovp->iov_len; 2763 if (user || uio->uio_iovcnt != 1) { 2764 bufsize = bytes_wanted; 2765 outbuf = kmem_alloc(bufsize, KM_SLEEP); 2766 odp = (struct dirent64 *)outbuf; 2767 } else { 2768 bufsize = bytes_wanted; 2769 outbuf = NULL; 2770 odp = (struct dirent64 *)iovp->iov_base; 2771 } 2772 eodp = (struct edirent *)odp; 2773 2774 if (ncookies != NULL) { 2775 /* 2776 * Minimum entry size is dirent size and 1 byte for a file name. 2777 */ 2778#ifdef __FreeBSD__ 2779 ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2780 cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2781#endif 2782#ifdef __NetBSD__ 2783 ncooks = uio->uio_resid / _DIRENT_MINSIZE(odp); 2784 cooks = malloc(ncooks * sizeof(off_t), M_TEMP, M_WAITOK); 2785#endif 2786 *cookies = cooks; 2787 *ncookies = ncooks; 2788 } 2789 2790 /* 2791 * If this VFS supports the system attribute view interface; and 2792 * we're looking at an extended attribute directory; and we care 2793 * about normalization conflicts on this vfs; then we must check 2794 * for normalization conflicts with the sysattr name space. 2795 */ 2796#ifdef TODO 2797 check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2798 (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2799 (flags & V_RDDIR_ENTFLAGS); 2800#else 2801 check_sysattrs = 0; 2802#endif 2803 2804 /* 2805 * Transform to file-system independent format 2806 */ 2807 outcount = 0; 2808 while (outcount < bytes_wanted) { 2809 ino64_t objnum; 2810 ushort_t reclen; 2811 off64_t *next = NULL; 2812 2813 /* 2814 * Special case `.', `..', and `.zfs'. 2815 */ 2816 if (offset == 0) { 2817 (void) strcpy(zap.za_name, "."); 2818 zap.za_normalization_conflict = 0; 2819 objnum = zp->z_id; 2820 type = DT_DIR; 2821 } else if (offset == 1) { 2822 (void) strcpy(zap.za_name, ".."); 2823 zap.za_normalization_conflict = 0; 2824 objnum = parent; 2825 type = DT_DIR; 2826 } else if (offset == 2 && zfs_show_ctldir(zp)) { 2827 (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2828 zap.za_normalization_conflict = 0; 2829 objnum = ZFSCTL_INO_ROOT; 2830 type = DT_DIR; 2831 } else { 2832 /* 2833 * Grab next entry. 2834 */ 2835 if (error = zap_cursor_retrieve(&zc, &zap)) { 2836 if ((*eofp = (error == ENOENT)) != 0) 2837 break; 2838 else 2839 goto update; 2840 } 2841 2842 if (zap.za_integer_length != 8 || 2843 zap.za_num_integers != 1) { 2844 cmn_err(CE_WARN, "zap_readdir: bad directory " 2845 "entry, obj = %lld, offset = %lld\n", 2846 (u_longlong_t)zp->z_id, 2847 (u_longlong_t)offset); 2848 error = SET_ERROR(ENXIO); 2849 goto update; 2850 } 2851 2852 objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2853 /* 2854 * MacOS X can extract the object type here such as: 2855 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2856 */ 2857 type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2858 2859 if (check_sysattrs && !zap.za_normalization_conflict) { 2860#ifdef TODO 2861 zap.za_normalization_conflict = 2862 xattr_sysattr_casechk(zap.za_name); 2863#else 2864 panic("%s:%u: TODO", __func__, __LINE__); 2865#endif 2866 } 2867 } 2868 2869 if (flags & V_RDDIR_ACCFILTER) { 2870 /* 2871 * If we have no access at all, don't include 2872 * this entry in the returned information 2873 */ 2874 znode_t *ezp; 2875 if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2876 goto skip_entry; 2877 if (!zfs_has_access(ezp, cr)) { 2878 vrele(ZTOV(ezp)); 2879 goto skip_entry; 2880 } 2881 vrele(ZTOV(ezp)); 2882 } 2883 2884 if (flags & V_RDDIR_ENTFLAGS) 2885 reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2886 else 2887 reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2888 2889 /* 2890 * Will this entry fit in the buffer? 2891 */ 2892 if (outcount + reclen > bufsize) { 2893 /* 2894 * Did we manage to fit anything in the buffer? 2895 */ 2896 if (!outcount) { 2897 error = SET_ERROR(EINVAL); 2898 goto update; 2899 } 2900 break; 2901 } 2902 if (flags & V_RDDIR_ENTFLAGS) { 2903 /* 2904 * Add extended flag entry: 2905 */ 2906 eodp->ed_ino = objnum; 2907 eodp->ed_reclen = reclen; 2908 /* NOTE: ed_off is the offset for the *next* entry */ 2909 next = &(eodp->ed_off); 2910 eodp->ed_eflags = zap.za_normalization_conflict ? 2911 ED_CASE_CONFLICT : 0; 2912 (void) strncpy(eodp->ed_name, zap.za_name, 2913 EDIRENT_NAMELEN(reclen)); 2914 eodp = (edirent_t *)((intptr_t)eodp + reclen); 2915 } else { 2916 /* 2917 * Add normal entry: 2918 */ 2919 odp->d_ino = objnum; 2920 odp->d_reclen = reclen; 2921 odp->d_namlen = strlen(zap.za_name); 2922 (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2923 odp->d_type = type; 2924 odp = (dirent64_t *)((intptr_t)odp + reclen); 2925 } 2926 outcount += reclen; 2927 2928 ASSERT(outcount <= bufsize); 2929 2930 /* Prefetch znode */ 2931 if (prefetch) 2932 dmu_prefetch(os, objnum, 0, 0, 0, 2933 ZIO_PRIORITY_SYNC_READ); 2934 2935 skip_entry: 2936 /* 2937 * Move to the next entry, fill in the previous offset. 2938 */ 2939 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2940 zap_cursor_advance(&zc); 2941 offset = zap_cursor_serialize(&zc); 2942 } else { 2943 offset += 1; 2944 } 2945 2946 if (cooks != NULL) { 2947 *cooks++ = offset; 2948 ncooks--; 2949#ifdef __FreeBSD__ 2950 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2951#endif 2952#ifdef __NetBSD__ 2953 KASSERTMSG(ncooks >= 0, "ncooks=%d", ncooks); 2954#endif 2955 } 2956 } 2957 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2958 2959 /* Subtract unused cookies */ 2960 if (ncookies != NULL) 2961 *ncookies -= ncooks; 2962 2963 if (!user && uio->uio_iovcnt == 1) { 2964 iovp->iov_base += outcount; 2965 iovp->iov_len -= outcount; 2966 uio->uio_resid -= outcount; 2967 } else if (error = uiomove(outbuf, (size_t)outcount, UIO_READ, uio)) { 2968 /* 2969 * Reset the pointer. 2970 */ 2971 offset = uio->uio_loffset; 2972 } 2973 2974update: 2975 zap_cursor_fini(&zc); 2976 if (user || uio->uio_iovcnt != 1) 2977 kmem_free(outbuf, bufsize); 2978 2979 if (error == ENOENT) 2980 error = 0; 2981 2982 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2983 2984 uio->uio_loffset = offset; 2985 ZFS_EXIT(zfsvfs); 2986 if (error != 0 && cookies != NULL) { 2987#ifdef __FreeBSD__ 2988 free(*cookies, M_TEMP); 2989#endif 2990#ifdef __NetBSD__ 2991 kmem_free(*cookies, ncooks * sizeof(off_t)); 2992#endif 2993 *cookies = NULL; 2994 *ncookies = 0; 2995 } 2996 return (error); 2997} 2998 2999ulong_t zfs_fsync_sync_cnt = 4; 3000 3001static int 3002zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 3003{ 3004 znode_t *zp = VTOZ(vp); 3005 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3006 3007 (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 3008 3009 if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 3010 ZFS_ENTER(zfsvfs); 3011 ZFS_VERIFY_ZP(zp); 3012 3013#ifdef __NetBSD__ 3014 if (!zp->z_unlinked) 3015#endif 3016 zil_commit(zfsvfs->z_log, zp->z_id); 3017 ZFS_EXIT(zfsvfs); 3018 } 3019 return (0); 3020} 3021 3022 3023/* 3024 * Get the requested file attributes and place them in the provided 3025 * vattr structure. 3026 * 3027 * IN: vp - vnode of file. 3028 * vap - va_mask identifies requested attributes. 3029 * If AT_XVATTR set, then optional attrs are requested 3030 * flags - ATTR_NOACLCHECK (CIFS server context) 3031 * cr - credentials of caller. 3032 * ct - caller context 3033 * 3034 * OUT: vap - attribute values. 3035 * 3036 * RETURN: 0 (always succeeds). 3037 */ 3038/* ARGSUSED */ 3039static int 3040zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 3041 caller_context_t *ct) 3042{ 3043 znode_t *zp = VTOZ(vp); 3044 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3045 int error = 0; 3046 uint32_t blksize; 3047 u_longlong_t nblocks; 3048 uint64_t links; 3049 uint64_t mtime[2], ctime[2], crtime[2], rdev; 3050 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 3051 xoptattr_t *xoap = NULL; 3052 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 3053 sa_bulk_attr_t bulk[4]; 3054 int count = 0; 3055 3056 ZFS_ENTER(zfsvfs); 3057 ZFS_VERIFY_ZP(zp); 3058 3059 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 3060 3061 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 3062 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 3063 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 3064 if (vp->v_type == VBLK || vp->v_type == VCHR) 3065 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 3066 &rdev, 8); 3067 3068 if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 3069 ZFS_EXIT(zfsvfs); 3070 return (error); 3071 } 3072 3073 /* 3074 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 3075 * Also, if we are the owner don't bother, since owner should 3076 * always be allowed to read basic attributes of file. 3077 */ 3078 if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 3079 (vap->va_uid != crgetuid(cr))) { 3080 if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 3081 skipaclchk, cr)) { 3082 ZFS_EXIT(zfsvfs); 3083 return (error); 3084 } 3085 } 3086 3087 /* 3088 * Return all attributes. It's cheaper to provide the answer 3089 * than to determine whether we were asked the question. 3090 */ 3091 3092 vap->va_type = IFTOVT(zp->z_mode); 3093 vap->va_mode = zp->z_mode & ~S_IFMT; 3094#ifdef illumos 3095 vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 3096#endif 3097#ifdef __FreeBSD__ 3098 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 3099 vap->va_nodeid = zp->z_id; 3100#endif 3101#ifdef __NetBSD__ 3102 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid; 3103 vap->va_nodeid = zp->z_id; 3104 /* 3105 * If we are a snapshot mounted under .zfs, return 3106 * the object id of the snapshot to make getcwd happy. 3107 */ 3108 if (zp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 3109 vnode_t *cvp = vp->v_mount->mnt_vnodecovered; 3110 3111 if (cvp && zfsctl_is_node(cvp)) 3112 vap->va_nodeid = dmu_objset_id(zfsvfs->z_os); 3113 } 3114#endif 3115 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 3116 links = zp->z_links + 1; 3117 else 3118 links = zp->z_links; 3119 /* XXX NetBSD: use LINK_MAX when that value matches 32-bit nlink_t */ 3120 vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ 3121 vap->va_size = zp->z_size; 3122#ifdef illumos 3123 vap->va_rdev = vp->v_rdev; 3124#else 3125 if (vp->v_type == VBLK || vp->v_type == VCHR) 3126 vap->va_rdev = zfs_cmpldev(rdev); 3127#endif 3128 vap->va_seq = zp->z_seq; 3129 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 3130 vap->va_filerev = zp->z_seq; 3131 3132 /* 3133 * Add in any requested optional attributes and the create time. 3134 * Also set the corresponding bits in the returned attribute bitmap. 3135 */ 3136 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 3137 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 3138 xoap->xoa_archive = 3139 ((zp->z_pflags & ZFS_ARCHIVE) != 0); 3140 XVA_SET_RTN(xvap, XAT_ARCHIVE); 3141 } 3142 3143 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 3144 xoap->xoa_readonly = 3145 ((zp->z_pflags & ZFS_READONLY) != 0); 3146 XVA_SET_RTN(xvap, XAT_READONLY); 3147 } 3148 3149 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 3150 xoap->xoa_system = 3151 ((zp->z_pflags & ZFS_SYSTEM) != 0); 3152 XVA_SET_RTN(xvap, XAT_SYSTEM); 3153 } 3154 3155 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 3156 xoap->xoa_hidden = 3157 ((zp->z_pflags & ZFS_HIDDEN) != 0); 3158 XVA_SET_RTN(xvap, XAT_HIDDEN); 3159 } 3160 3161 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3162 xoap->xoa_nounlink = 3163 ((zp->z_pflags & ZFS_NOUNLINK) != 0); 3164 XVA_SET_RTN(xvap, XAT_NOUNLINK); 3165 } 3166 3167 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3168 xoap->xoa_immutable = 3169 ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 3170 XVA_SET_RTN(xvap, XAT_IMMUTABLE); 3171 } 3172 3173 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3174 xoap->xoa_appendonly = 3175 ((zp->z_pflags & ZFS_APPENDONLY) != 0); 3176 XVA_SET_RTN(xvap, XAT_APPENDONLY); 3177 } 3178 3179 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3180 xoap->xoa_nodump = 3181 ((zp->z_pflags & ZFS_NODUMP) != 0); 3182 XVA_SET_RTN(xvap, XAT_NODUMP); 3183 } 3184 3185 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 3186 xoap->xoa_opaque = 3187 ((zp->z_pflags & ZFS_OPAQUE) != 0); 3188 XVA_SET_RTN(xvap, XAT_OPAQUE); 3189 } 3190 3191 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3192 xoap->xoa_av_quarantined = 3193 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 3194 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 3195 } 3196 3197 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3198 xoap->xoa_av_modified = 3199 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 3200 XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 3201 } 3202 3203 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 3204 vp->v_type == VREG) { 3205 zfs_sa_get_scanstamp(zp, xvap); 3206 } 3207 3208 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3209 xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 3210 XVA_SET_RTN(xvap, XAT_REPARSE); 3211 } 3212 if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 3213 xoap->xoa_generation = zp->z_gen; 3214 XVA_SET_RTN(xvap, XAT_GEN); 3215 } 3216 3217 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 3218 xoap->xoa_offline = 3219 ((zp->z_pflags & ZFS_OFFLINE) != 0); 3220 XVA_SET_RTN(xvap, XAT_OFFLINE); 3221 } 3222 3223 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 3224 xoap->xoa_sparse = 3225 ((zp->z_pflags & ZFS_SPARSE) != 0); 3226 XVA_SET_RTN(xvap, XAT_SPARSE); 3227 } 3228 } 3229 3230 ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 3231 ZFS_TIME_DECODE(&vap->va_mtime, mtime); 3232 ZFS_TIME_DECODE(&vap->va_ctime, ctime); 3233 ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 3234 3235 3236 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 3237 vap->va_blksize = blksize; 3238 vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 3239 3240 if (zp->z_blksz == 0) { 3241 /* 3242 * Block size hasn't been set; suggest maximal I/O transfers. 3243 */ 3244 vap->va_blksize = zfsvfs->z_max_blksz; 3245 } 3246 3247 ZFS_EXIT(zfsvfs); 3248 return (0); 3249} 3250 3251/* 3252 * Set the file attributes to the values contained in the 3253 * vattr structure. 3254 * 3255 * IN: vp - vnode of file to be modified. 3256 * vap - new attribute values. 3257 * If AT_XVATTR set, then optional attrs are being set 3258 * flags - ATTR_UTIME set if non-default time values provided. 3259 * - ATTR_NOACLCHECK (CIFS context only). 3260 * cr - credentials of caller. 3261 * ct - caller context 3262 * 3263 * RETURN: 0 on success, error code on failure. 3264 * 3265 * Timestamps: 3266 * vp - ctime updated, mtime updated if size changed. 3267 */ 3268/* ARGSUSED */ 3269static int 3270zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 3271 caller_context_t *ct) 3272{ 3273 znode_t *zp = VTOZ(vp); 3274 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3275 zilog_t *zilog; 3276 dmu_tx_t *tx; 3277 vattr_t oldva; 3278 xvattr_t tmpxvattr; 3279 uint_t mask = vap->va_mask; 3280 uint_t saved_mask = 0; 3281 uint64_t saved_mode; 3282 int trim_mask = 0; 3283 uint64_t new_mode; 3284 uint64_t new_uid, new_gid; 3285 uint64_t xattr_obj; 3286 uint64_t mtime[2], ctime[2]; 3287 znode_t *attrzp; 3288 int need_policy = FALSE; 3289 int err, err2; 3290 zfs_fuid_info_t *fuidp = NULL; 3291 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 3292 xoptattr_t *xoap; 3293 zfs_acl_t *aclp; 3294 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 3295 boolean_t fuid_dirtied = B_FALSE; 3296 sa_bulk_attr_t bulk[7], xattr_bulk[7]; 3297 int count = 0, xattr_count = 0; 3298 3299 if (mask == 0) 3300 return (0); 3301 3302 if (mask & AT_NOSET) 3303 return (SET_ERROR(EINVAL)); 3304 3305 ZFS_ENTER(zfsvfs); 3306 ZFS_VERIFY_ZP(zp); 3307 3308 zilog = zfsvfs->z_log; 3309 3310 /* 3311 * Make sure that if we have ephemeral uid/gid or xvattr specified 3312 * that file system is at proper version level 3313 */ 3314 3315 if (zfsvfs->z_use_fuids == B_FALSE && 3316 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 3317 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 3318 (mask & AT_XVATTR))) { 3319 ZFS_EXIT(zfsvfs); 3320 return (SET_ERROR(EINVAL)); 3321 } 3322 3323 if (mask & AT_SIZE && vp->v_type == VDIR) { 3324 ZFS_EXIT(zfsvfs); 3325 return (SET_ERROR(EISDIR)); 3326 } 3327 3328 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 3329 ZFS_EXIT(zfsvfs); 3330 return (SET_ERROR(EINVAL)); 3331 } 3332 3333 /* 3334 * If this is an xvattr_t, then get a pointer to the structure of 3335 * optional attributes. If this is NULL, then we have a vattr_t. 3336 */ 3337 xoap = xva_getxoptattr(xvap); 3338 3339 xva_init(&tmpxvattr); 3340 3341 /* 3342 * Immutable files can only alter immutable bit and atime 3343 */ 3344 if ((zp->z_pflags & ZFS_IMMUTABLE) && 3345 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 3346 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 3347 ZFS_EXIT(zfsvfs); 3348 return (SET_ERROR(EPERM)); 3349 } 3350 3351 if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { 3352 ZFS_EXIT(zfsvfs); 3353 return (SET_ERROR(EPERM)); 3354 } 3355 3356 /* 3357 * Verify timestamps doesn't overflow 32 bits. 3358 * ZFS can handle large timestamps, but 32bit syscalls can't 3359 * handle times greater than 2039. This check should be removed 3360 * once large timestamps are fully supported. 3361 */ 3362 if (mask & (AT_ATIME | AT_MTIME)) { 3363 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 3364 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 3365 ZFS_EXIT(zfsvfs); 3366 return (SET_ERROR(EOVERFLOW)); 3367 } 3368 } 3369 if (xoap && (mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME) && 3370 TIMESPEC_OVERFLOW(&vap->va_birthtime)) { 3371 ZFS_EXIT(zfsvfs); 3372 return (SET_ERROR(EOVERFLOW)); 3373 } 3374 3375 attrzp = NULL; 3376 aclp = NULL; 3377 3378 /* Can this be moved to before the top label? */ 3379 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 3380 ZFS_EXIT(zfsvfs); 3381 return (SET_ERROR(EROFS)); 3382 } 3383 3384 /* 3385 * First validate permissions 3386 */ 3387 3388 if (mask & AT_SIZE) { 3389 /* 3390 * XXX - Note, we are not providing any open 3391 * mode flags here (like FNDELAY), so we may 3392 * block if there are locks present... this 3393 * should be addressed in openat(). 3394 */ 3395 /* XXX - would it be OK to generate a log record here? */ 3396 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 3397 if (err) { 3398 ZFS_EXIT(zfsvfs); 3399 return (err); 3400 } 3401 } 3402 3403 if (mask & (AT_ATIME|AT_MTIME) || 3404 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 3405 XVA_ISSET_REQ(xvap, XAT_READONLY) || 3406 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 3407 XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 3408 XVA_ISSET_REQ(xvap, XAT_SPARSE) || 3409 XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 3410 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 3411 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 3412 skipaclchk, cr); 3413 } 3414 3415 if (mask & (AT_UID|AT_GID)) { 3416 int idmask = (mask & (AT_UID|AT_GID)); 3417 int take_owner; 3418 int take_group; 3419 3420 /* 3421 * NOTE: even if a new mode is being set, 3422 * we may clear S_ISUID/S_ISGID bits. 3423 */ 3424 3425 if (!(mask & AT_MODE)) 3426 vap->va_mode = zp->z_mode; 3427 3428 /* 3429 * Take ownership or chgrp to group we are a member of 3430 */ 3431 3432 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 3433 take_group = (mask & AT_GID) && 3434 zfs_groupmember(zfsvfs, vap->va_gid, cr); 3435 3436 /* 3437 * If both AT_UID and AT_GID are set then take_owner and 3438 * take_group must both be set in order to allow taking 3439 * ownership. 3440 * 3441 * Otherwise, send the check through secpolicy_vnode_setattr() 3442 * 3443 */ 3444 3445 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 3446 ((idmask == AT_UID) && take_owner) || 3447 ((idmask == AT_GID) && take_group)) { 3448 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 3449 skipaclchk, cr) == 0) { 3450 /* 3451 * Remove setuid/setgid for non-privileged users 3452 */ 3453 secpolicy_setid_clear(vap, vp, cr); 3454 trim_mask = (mask & (AT_UID|AT_GID)); 3455 } else { 3456 need_policy = TRUE; 3457 } 3458 } else { 3459 need_policy = TRUE; 3460 } 3461 } 3462 3463 oldva.va_mode = zp->z_mode; 3464 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 3465 if (mask & AT_XVATTR) { 3466 /* 3467 * Update xvattr mask to include only those attributes 3468 * that are actually changing. 3469 * 3470 * the bits will be restored prior to actually setting 3471 * the attributes so the caller thinks they were set. 3472 */ 3473 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3474 if (xoap->xoa_appendonly != 3475 ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 3476 need_policy = TRUE; 3477 } else { 3478 XVA_CLR_REQ(xvap, XAT_APPENDONLY); 3479 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 3480 } 3481 } 3482 3483 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3484 if (xoap->xoa_nounlink != 3485 ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 3486 need_policy = TRUE; 3487 } else { 3488 XVA_CLR_REQ(xvap, XAT_NOUNLINK); 3489 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 3490 } 3491 } 3492 3493 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3494 if (xoap->xoa_immutable != 3495 ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 3496 need_policy = TRUE; 3497 } else { 3498 XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 3499 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 3500 } 3501 } 3502 3503 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3504 if (xoap->xoa_nodump != 3505 ((zp->z_pflags & ZFS_NODUMP) != 0)) { 3506#if 0 3507 /* 3508 * XXXSB - zfs_netbsd_setattr() 3509 * has already checked if this 3510 * request is authorised, and our 3511 * secpolicy_xvattr() doesn't check 3512 * kauth chflags. Fix this when we 3513 * migrate to openzfs. 3514 */ 3515 need_policy = TRUE; 3516#endif 3517 } else { 3518 XVA_CLR_REQ(xvap, XAT_NODUMP); 3519 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 3520 } 3521 } 3522 3523 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3524 if (xoap->xoa_av_modified != 3525 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 3526 need_policy = TRUE; 3527 } else { 3528 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 3529 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 3530 } 3531 } 3532 3533 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3534 if ((vp->v_type != VREG && 3535 xoap->xoa_av_quarantined) || 3536 xoap->xoa_av_quarantined != 3537 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 3538 need_policy = TRUE; 3539 } else { 3540 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 3541 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 3542 } 3543 } 3544 3545 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3546 ZFS_EXIT(zfsvfs); 3547 return (SET_ERROR(EPERM)); 3548 } 3549 3550 if (need_policy == FALSE && 3551 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3552 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3553 need_policy = TRUE; 3554 } 3555 } 3556 3557 if (mask & AT_MODE) { 3558 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3559 err = secpolicy_setid_setsticky_clear(vp, vap, 3560 &oldva, cr); 3561 if (err) { 3562 ZFS_EXIT(zfsvfs); 3563 return (err); 3564 } 3565 trim_mask |= AT_MODE; 3566 } else { 3567 need_policy = TRUE; 3568 } 3569 } 3570 3571 if (need_policy) { 3572 /* 3573 * If trim_mask is set then take ownership 3574 * has been granted or write_acl is present and user 3575 * has the ability to modify mode. In that case remove 3576 * UID|GID and or MODE from mask so that 3577 * secpolicy_vnode_setattr() doesn't revoke it. 3578 */ 3579 3580 if (trim_mask) { 3581 saved_mask = vap->va_mask; 3582 vap->va_mask &= ~trim_mask; 3583 if (trim_mask & AT_MODE) { 3584 /* 3585 * Save the mode, as secpolicy_vnode_setattr() 3586 * will overwrite it with ova.va_mode. 3587 */ 3588 saved_mode = vap->va_mode; 3589 } 3590 } 3591 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3592 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3593 if (err) { 3594 ZFS_EXIT(zfsvfs); 3595 return (err); 3596 } 3597 3598 if (trim_mask) { 3599 vap->va_mask |= saved_mask; 3600 if (trim_mask & AT_MODE) { 3601 /* 3602 * Recover the mode after 3603 * secpolicy_vnode_setattr(). 3604 */ 3605 vap->va_mode = saved_mode; 3606 } 3607 } 3608 } 3609 3610 /* 3611 * secpolicy_vnode_setattr, or take ownership may have 3612 * changed va_mask 3613 */ 3614 mask = vap->va_mask; 3615 3616 if ((mask & (AT_UID | AT_GID))) { 3617 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 3618 &xattr_obj, sizeof (xattr_obj)); 3619 3620 if (err == 0 && xattr_obj) { 3621 err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 3622 if (err == 0) { 3623 err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE); 3624 if (err != 0) 3625 vrele(ZTOV(attrzp)); 3626 } 3627 if (err) 3628 goto out2; 3629 } 3630 if (mask & AT_UID) { 3631 new_uid = zfs_fuid_create(zfsvfs, 3632 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3633 if (new_uid != zp->z_uid && 3634 zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 3635 if (attrzp) 3636 vput(ZTOV(attrzp)); 3637 err = SET_ERROR(EDQUOT); 3638 goto out2; 3639 } 3640 } 3641 3642 if (mask & AT_GID) { 3643 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3644 cr, ZFS_GROUP, &fuidp); 3645 if (new_gid != zp->z_gid && 3646 zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 3647 if (attrzp) 3648 vput(ZTOV(attrzp)); 3649 err = SET_ERROR(EDQUOT); 3650 goto out2; 3651 } 3652 } 3653 } 3654 tx = dmu_tx_create(zfsvfs->z_os); 3655 3656 if (mask & AT_MODE) { 3657 uint64_t pmode = zp->z_mode; 3658 uint64_t acl_obj; 3659 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3660 3661 if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 3662 !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3663 err = SET_ERROR(EPERM); 3664 goto out; 3665 } 3666 3667 if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3668 goto out; 3669 3670 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 3671 /* 3672 * Are we upgrading ACL from old V0 format 3673 * to V1 format? 3674 */ 3675 if (zfsvfs->z_version >= ZPL_VERSION_FUID && 3676 zfs_znode_acl_version(zp) == 3677 ZFS_ACL_VERSION_INITIAL) { 3678 dmu_tx_hold_free(tx, acl_obj, 0, 3679 DMU_OBJECT_END); 3680 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3681 0, aclp->z_acl_bytes); 3682 } else { 3683 dmu_tx_hold_write(tx, acl_obj, 0, 3684 aclp->z_acl_bytes); 3685 } 3686 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3687 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3688 0, aclp->z_acl_bytes); 3689 } 3690 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3691 } else { 3692 if ((mask & AT_XVATTR) && 3693 XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3694 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3695 else 3696 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3697 } 3698 3699 if (attrzp) { 3700 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3701 } 3702 3703 fuid_dirtied = zfsvfs->z_fuid_dirty; 3704 if (fuid_dirtied) 3705 zfs_fuid_txhold(zfsvfs, tx); 3706 3707 zfs_sa_upgrade_txholds(tx, zp); 3708 3709 err = dmu_tx_assign(tx, TXG_WAIT); 3710 if (err) 3711 goto out; 3712 3713 count = 0; 3714 /* 3715 * Set each attribute requested. 3716 * We group settings according to the locks they need to acquire. 3717 * 3718 * Note: you cannot set ctime directly, although it will be 3719 * updated as a side-effect of calling this function. 3720 */ 3721 3722 if (mask & (AT_UID|AT_GID|AT_MODE)) 3723 mutex_enter(&zp->z_acl_lock); 3724 3725 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3726 &zp->z_pflags, sizeof (zp->z_pflags)); 3727 3728 if (attrzp) { 3729 if (mask & (AT_UID|AT_GID|AT_MODE)) 3730 mutex_enter(&attrzp->z_acl_lock); 3731 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3732 SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3733 sizeof (attrzp->z_pflags)); 3734 } 3735 3736 if (mask & (AT_UID|AT_GID)) { 3737 3738 if (mask & AT_UID) { 3739 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 3740 &new_uid, sizeof (new_uid)); 3741 zp->z_uid = new_uid; 3742 if (attrzp) { 3743 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3744 SA_ZPL_UID(zfsvfs), NULL, &new_uid, 3745 sizeof (new_uid)); 3746 attrzp->z_uid = new_uid; 3747 } 3748 } 3749 3750 if (mask & AT_GID) { 3751 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 3752 NULL, &new_gid, sizeof (new_gid)); 3753 zp->z_gid = new_gid; 3754 if (attrzp) { 3755 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3756 SA_ZPL_GID(zfsvfs), NULL, &new_gid, 3757 sizeof (new_gid)); 3758 attrzp->z_gid = new_gid; 3759 } 3760 } 3761 if (!(mask & AT_MODE)) { 3762 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 3763 NULL, &new_mode, sizeof (new_mode)); 3764 new_mode = zp->z_mode; 3765 } 3766 err = zfs_acl_chown_setattr(zp); 3767 ASSERT(err == 0); 3768 if (attrzp) { 3769 err = zfs_acl_chown_setattr(attrzp); 3770 ASSERT(err == 0); 3771 } 3772 } 3773 3774 if (mask & AT_MODE) { 3775 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 3776 &new_mode, sizeof (new_mode)); 3777 zp->z_mode = new_mode; 3778 ASSERT3U((uintptr_t)aclp, !=, 0); 3779 err = zfs_aclset_common(zp, aclp, cr, tx); 3780 ASSERT0(err); 3781 if (zp->z_acl_cached) 3782 zfs_acl_free(zp->z_acl_cached); 3783 zp->z_acl_cached = aclp; 3784 aclp = NULL; 3785 } 3786 3787 3788 if (mask & AT_ATIME) { 3789 ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 3790 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 3791 &zp->z_atime, sizeof (zp->z_atime)); 3792 } 3793 3794 if (mask & AT_MTIME) { 3795 ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 3796 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 3797 mtime, sizeof (mtime)); 3798 } 3799 3800 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3801 if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3802 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3803 NULL, mtime, sizeof (mtime)); 3804 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3805 &ctime, sizeof (ctime)); 3806 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 3807 B_TRUE); 3808 } else if (mask != 0) { 3809 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3810 &ctime, sizeof (ctime)); 3811 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 3812 B_TRUE); 3813 if (attrzp) { 3814 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3815 SA_ZPL_CTIME(zfsvfs), NULL, 3816 &ctime, sizeof (ctime)); 3817 zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 3818 mtime, ctime, B_TRUE); 3819 } 3820 } 3821 /* 3822 * Do this after setting timestamps to prevent timestamp 3823 * update from toggling bit 3824 */ 3825 3826 if (xoap && (mask & AT_XVATTR)) { 3827 3828 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) 3829 xoap->xoa_createtime = vap->va_birthtime; 3830 /* 3831 * restore trimmed off masks 3832 * so that return masks can be set for caller. 3833 */ 3834 3835 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3836 XVA_SET_REQ(xvap, XAT_APPENDONLY); 3837 } 3838 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3839 XVA_SET_REQ(xvap, XAT_NOUNLINK); 3840 } 3841 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3842 XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3843 } 3844 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3845 XVA_SET_REQ(xvap, XAT_NODUMP); 3846 } 3847 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3848 XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3849 } 3850 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3851 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3852 } 3853 3854 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3855 ASSERT(vp->v_type == VREG); 3856 3857 zfs_xvattr_set(zp, xvap, tx); 3858 } 3859 3860 if (fuid_dirtied) 3861 zfs_fuid_sync(zfsvfs, tx); 3862 3863 if (mask != 0) 3864 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3865 3866 if (mask & (AT_UID|AT_GID|AT_MODE)) 3867 mutex_exit(&zp->z_acl_lock); 3868 3869 if (attrzp) { 3870 if (mask & (AT_UID|AT_GID|AT_MODE)) 3871 mutex_exit(&attrzp->z_acl_lock); 3872 } 3873out: 3874 if (err == 0 && attrzp) { 3875 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 3876 xattr_count, tx); 3877 ASSERT(err2 == 0); 3878 } 3879 3880 if (attrzp) 3881 vput(ZTOV(attrzp)); 3882 3883 if (aclp) 3884 zfs_acl_free(aclp); 3885 3886 if (fuidp) { 3887 zfs_fuid_info_free(fuidp); 3888 fuidp = NULL; 3889 } 3890 3891 if (err) { 3892 dmu_tx_abort(tx); 3893 } else { 3894 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 3895 dmu_tx_commit(tx); 3896 } 3897 3898out2: 3899 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3900 zil_commit(zilog, 0); 3901 3902 ZFS_EXIT(zfsvfs); 3903 return (err); 3904} 3905 3906/* 3907 * We acquire all but fdvp locks using non-blocking acquisitions. If we 3908 * fail to acquire any lock in the path we will drop all held locks, 3909 * acquire the new lock in a blocking fashion, and then release it and 3910 * restart the rename. This acquire/release step ensures that we do not 3911 * spin on a lock waiting for release. On error release all vnode locks 3912 * and decrement references the way tmpfs_rename() would do. 3913 */ 3914static int 3915zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, 3916 struct vnode *tdvp, struct vnode **tvpp, 3917 const struct componentname *scnp, const struct componentname *tcnp) 3918{ 3919 zfsvfs_t *zfsvfs; 3920 struct vnode *nvp, *svp, *tvp; 3921 znode_t *sdzp, *tdzp, *szp, *tzp; 3922#ifdef __FreeBSD__ 3923 const char *snm = scnp->cn_nameptr; 3924 const char *tnm = tcnp->cn_nameptr; 3925#endif 3926#ifdef __NetBSD__ 3927 char *snm, *tnm; 3928#endif 3929 int error; 3930 3931#ifdef __FreeBSD__ 3932 VOP_UNLOCK(tdvp, 0); 3933 if (*tvpp != NULL && *tvpp != tdvp) 3934 VOP_UNLOCK(*tvpp, 0); 3935#endif 3936 3937relock: 3938 error = vn_lock(sdvp, LK_EXCLUSIVE); 3939 if (error) 3940 goto out; 3941 sdzp = VTOZ(sdvp); 3942 3943#ifdef __NetBSD__ 3944 if (tdvp == sdvp) { 3945 } else { 3946#endif 3947 error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT); 3948 if (error != 0) { 3949 VOP_UNLOCK(sdvp, 0); 3950 if (error != EBUSY) 3951 goto out; 3952 error = vn_lock(tdvp, LK_EXCLUSIVE); 3953 if (error) 3954 goto out; 3955 VOP_UNLOCK(tdvp, 0); 3956 goto relock; 3957 } 3958#ifdef __NetBSD__ 3959 } /* end if (tdvp == sdvp) */ 3960#endif 3961 3962 tdzp = VTOZ(tdvp); 3963 3964 /* 3965 * Before using sdzp and tdzp we must ensure that they are live. 3966 * As a porting legacy from illumos we have two things to worry 3967 * about. One is typical for FreeBSD and it is that the vnode is 3968 * not reclaimed (doomed). The other is that the znode is live. 3969 * The current code can invalidate the znode without acquiring the 3970 * corresponding vnode lock if the object represented by the znode 3971 * and vnode is no longer valid after a rollback or receive operation. 3972 * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock 3973 * that protects the znodes from the invalidation. 3974 */ 3975 zfsvfs = sdzp->z_zfsvfs; 3976 ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs); 3977 ZFS_ENTER(zfsvfs); 3978 3979 /* 3980 * We can not use ZFS_VERIFY_ZP() here because it could directly return 3981 * bypassing the cleanup code in the case of an error. 3982 */ 3983 if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 3984 ZFS_EXIT(zfsvfs); 3985 VOP_UNLOCK(sdvp, 0); 3986#ifdef __NetBSD__ 3987 if (tdvp != sdvp) 3988#endif 3989 VOP_UNLOCK(tdvp, 0); 3990 error = SET_ERROR(EIO); 3991 goto out; 3992 } 3993 3994 /* 3995 * Re-resolve svp to be certain it still exists and fetch the 3996 * correct vnode. 3997 */ 3998#ifdef __NetBSD__ 3999 /* ZFS wants a null-terminated name. */ 4000 snm = PNBUF_GET(); 4001 strlcpy(snm, scnp->cn_nameptr, scnp->cn_namelen + 1); 4002#endif 4003 error = zfs_dirent_lookup(sdzp, snm, &szp, ZEXISTS); 4004#ifdef __NetBSD__ 4005 PNBUF_PUT(snm); 4006#endif 4007 if (error != 0) { 4008 /* Source entry invalid or not there. */ 4009 ZFS_EXIT(zfsvfs); 4010 VOP_UNLOCK(sdvp, 0); 4011#ifdef __NetBSD__ 4012 if (tdvp != sdvp) 4013#endif 4014 VOP_UNLOCK(tdvp, 0); 4015 if ((scnp->cn_flags & ISDOTDOT) != 0 || 4016 (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.')) 4017 error = SET_ERROR(EINVAL); 4018 goto out; 4019 } 4020 svp = ZTOV(szp); 4021 4022 /* 4023 * Re-resolve tvp, if it disappeared we just carry on. 4024 */ 4025#ifdef __NetBSD__ 4026 /* ZFS wants a null-terminated name. */ 4027 tnm = PNBUF_GET(); 4028 strlcpy(tnm, tcnp->cn_nameptr, tcnp->cn_namelen + 1); 4029#endif 4030 error = zfs_dirent_lookup(tdzp, tnm, &tzp, 0); 4031#ifdef __NetBSD__ 4032 PNBUF_PUT(tnm); 4033#endif 4034 if (error != 0) { 4035 ZFS_EXIT(zfsvfs); 4036 VOP_UNLOCK(sdvp, 0); 4037#ifdef __NetBSD__ 4038 if (tdvp != sdvp) 4039#endif 4040 VOP_UNLOCK(tdvp, 0); 4041 vrele(svp); 4042 if ((tcnp->cn_flags & ISDOTDOT) != 0) 4043 error = SET_ERROR(EINVAL); 4044 goto out; 4045 } 4046 if (tzp != NULL) 4047 tvp = ZTOV(tzp); 4048 else 4049 tvp = NULL; 4050 4051 /* 4052 * At present the vnode locks must be acquired before z_teardown_lock, 4053 * although it would be more logical to use the opposite order. 4054 */ 4055 ZFS_EXIT(zfsvfs); 4056 4057 /* 4058 * Now try acquire locks on svp and tvp. 4059 */ 4060 nvp = svp; 4061 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 4062 if (error != 0) { 4063 VOP_UNLOCK(sdvp, 0); 4064#ifdef __NetBSD__ 4065 if (tdvp != sdvp) 4066#endif 4067 VOP_UNLOCK(tdvp, 0); 4068 if (tvp != NULL) 4069 vrele(tvp); 4070 if (error != EBUSY) { 4071 vrele(nvp); 4072 goto out; 4073 } 4074 error = vn_lock(nvp, LK_EXCLUSIVE); 4075 if (error != 0) { 4076 vrele(nvp); 4077 goto out; 4078 } 4079 VOP_UNLOCK(nvp, 0); 4080 /* 4081 * Concurrent rename race. 4082 * XXX ? 4083 */ 4084 if (nvp == tdvp) { 4085 vrele(nvp); 4086 error = SET_ERROR(EINVAL); 4087 goto out; 4088 } 4089#ifdef __NetBSD__ 4090 if (*svpp != NULL) 4091#endif 4092 vrele(*svpp); 4093 *svpp = nvp; 4094 goto relock; 4095 } 4096#ifdef __NetBSD__ 4097 if (*svpp != NULL) 4098#endif 4099 vrele(*svpp); 4100 *svpp = nvp; 4101 4102 if (*tvpp != NULL) 4103 vrele(*tvpp); 4104 *tvpp = NULL; 4105 if (tvp != NULL) { 4106 nvp = tvp; 4107 4108#ifdef __NetBSD__ 4109 if (tvp == svp || tvp == sdvp) { 4110 } else { 4111#endif 4112 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 4113 if (error != 0) { 4114 VOP_UNLOCK(sdvp, 0); 4115#ifdef __NetBSD__ 4116 if (tdvp != sdvp) 4117#endif 4118 VOP_UNLOCK(tdvp, 0); 4119#ifdef __NetBSD__ 4120 if (*svpp != tdvp) 4121#endif 4122 VOP_UNLOCK(*svpp, 0); 4123 if (error != EBUSY) { 4124 vrele(nvp); 4125 goto out; 4126 } 4127 error = vn_lock(nvp, LK_EXCLUSIVE); 4128 if (error != 0) { 4129 vrele(nvp); 4130 goto out; 4131 } 4132 vput(nvp); 4133 goto relock; 4134 } 4135#ifdef __NetBSD__ 4136 } /* end if (tvp == svp || tvp == sdvp) */ 4137#endif 4138 4139 *tvpp = nvp; 4140 } 4141 4142 KASSERT(VOP_ISLOCKED(sdvp) == LK_EXCLUSIVE); 4143 KASSERT(VOP_ISLOCKED(*svpp) == LK_EXCLUSIVE); 4144 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4145 KASSERT(*tvpp == NULL || VOP_ISLOCKED(*tvpp) == LK_EXCLUSIVE); 4146 4147 return (0); 4148 4149out: 4150 return (error); 4151} 4152 4153/* 4154 * Note that we must use VRELE_ASYNC in this function as it walks 4155 * up the directory tree and vrele may need to acquire an exclusive 4156 * lock if a last reference to a vnode is dropped. 4157 */ 4158static int 4159zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp) 4160{ 4161 zfsvfs_t *zfsvfs; 4162 znode_t *zp, *zp1; 4163 uint64_t parent; 4164 int error; 4165 4166 zfsvfs = tdzp->z_zfsvfs; 4167 if (tdzp == szp) 4168 return (SET_ERROR(EINVAL)); 4169 if (tdzp == sdzp) 4170 return (0); 4171 if (tdzp->z_id == zfsvfs->z_root) 4172 return (0); 4173 zp = tdzp; 4174 for (;;) { 4175 ASSERT(!zp->z_unlinked); 4176 if ((error = sa_lookup(zp->z_sa_hdl, 4177 SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 4178 break; 4179 4180 if (parent == szp->z_id) { 4181 error = SET_ERROR(EINVAL); 4182 break; 4183 } 4184 if (parent == zfsvfs->z_root) 4185 break; 4186 if (parent == sdzp->z_id) 4187 break; 4188 4189 error = zfs_zget(zfsvfs, parent, &zp1); 4190 if (error != 0) 4191 break; 4192 4193 if (zp != tdzp) 4194 VN_RELE_ASYNC(ZTOV(zp), 4195 dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 4196 zp = zp1; 4197 } 4198 4199 if (error == ENOTDIR) 4200 panic("checkpath: .. not a directory\n"); 4201 if (zp != tdzp) 4202 VN_RELE_ASYNC(ZTOV(zp), 4203 dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 4204 return (error); 4205} 4206 4207/* 4208 * Move an entry from the provided source directory to the target 4209 * directory. Change the entry name as indicated. 4210 * 4211 * IN: sdvp - Source directory containing the "old entry". 4212 * snm - Old entry name. 4213 * tdvp - Target directory to contain the "new entry". 4214 * tnm - New entry name. 4215 * cr - credentials of caller. 4216 * ct - caller context 4217 * flags - case flags 4218 * 4219 * RETURN: 0 on success, error code on failure. 4220 * 4221 * Timestamps: 4222 * sdvp,tdvp - ctime|mtime updated 4223 */ 4224/*ARGSUSED*/ 4225static int 4226zfs_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 4227 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 4228 cred_t *cr) 4229{ 4230 zfsvfs_t *zfsvfs; 4231 znode_t *sdzp, *tdzp, *szp, *tzp; 4232 zilog_t *zilog = NULL; 4233 dmu_tx_t *tx; 4234#ifdef __FreeBSD__ 4235 char *snm = __UNCONST(scnp->cn_nameptr); 4236 char *tnm = __UNCONST(tcnp->cn_nameptr); 4237#endif 4238#ifdef __NetBSD__ 4239 char *snm, *tnm; 4240#endif 4241 int error = 0; 4242 4243 /* Reject renames across filesystems. */ 4244 if (((*svpp) != NULL && (*svpp)->v_mount != tdvp->v_mount) || 4245 ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) { 4246 error = SET_ERROR(EXDEV); 4247 goto out; 4248 } 4249 4250 if (zfsctl_is_node(tdvp)) { 4251 error = SET_ERROR(EXDEV); 4252 goto out; 4253 } 4254 4255 /* 4256 * Lock all four vnodes to ensure safety and semantics of renaming. 4257 */ 4258 error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp); 4259 if (error != 0) { 4260 /* no vnodes are locked in the case of error here */ 4261 return (error); 4262 } 4263 4264 tdzp = VTOZ(tdvp); 4265 sdzp = VTOZ(sdvp); 4266 zfsvfs = tdzp->z_zfsvfs; 4267 zilog = zfsvfs->z_log; 4268#ifdef __NetBSD__ 4269 /* ZFS wants a null-terminated name. */ 4270 snm = PNBUF_GET(); 4271 strlcpy(snm, scnp->cn_nameptr, scnp->cn_namelen + 1); 4272 tnm = PNBUF_GET(); 4273 strlcpy(tnm, tcnp->cn_nameptr, tcnp->cn_namelen + 1); 4274#endif 4275 4276 /* 4277 * After we re-enter ZFS_ENTER() we will have to revalidate all 4278 * znodes involved. 4279 */ 4280 ZFS_ENTER(zfsvfs); 4281 4282 if (zfsvfs->z_utf8 && u8_validate(tnm, 4283 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4284 error = SET_ERROR(EILSEQ); 4285 goto unlockout; 4286 } 4287 4288#ifndef __NetBSD__ 4289 /* If source and target are the same file, there is nothing to do. */ 4290 if ((*svpp) == (*tvpp)) { 4291 error = 0; 4292 goto unlockout; 4293 } 4294#endif 4295 4296 if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) || 4297 ((*tvpp) != NULL && (*tvpp)->v_type == VDIR && 4298 (*tvpp)->v_mountedhere != NULL)) { 4299 error = SET_ERROR(EXDEV); 4300 goto unlockout; 4301 } 4302 4303 /* 4304 * We can not use ZFS_VERIFY_ZP() here because it could directly return 4305 * bypassing the cleanup code in the case of an error. 4306 */ 4307 if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 4308 error = SET_ERROR(EIO); 4309 goto unlockout; 4310 } 4311 4312 szp = VTOZ(*svpp); 4313 tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp); 4314 if (szp->z_sa_hdl == NULL || (tzp != NULL && tzp->z_sa_hdl == NULL)) { 4315 error = SET_ERROR(EIO); 4316 goto unlockout; 4317 } 4318 4319 /* 4320 * This is to prevent the creation of links into attribute space 4321 * by renaming a linked file into/outof an attribute directory. 4322 * See the comment in zfs_link() for why this is considered bad. 4323 */ 4324 if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 4325 error = SET_ERROR(EINVAL); 4326 goto unlockout; 4327 } 4328 4329 /* 4330 * Must have write access at the source to remove the old entry 4331 * and write access at the target to create the new entry. 4332 * Note that if target and source are the same, this can be 4333 * done in a single check. 4334 */ 4335 if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 4336 goto unlockout; 4337 4338 if ((*svpp)->v_type == VDIR) { 4339 /* 4340 * Avoid ".", "..", and aliases of "." for obvious reasons. 4341 */ 4342 if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') || 4343 sdzp == szp || 4344 (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { 4345 error = SET_ERROR(EINVAL); 4346 goto unlockout; 4347 } 4348 4349 /* 4350 * Check to make sure rename is valid. 4351 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 4352 */ 4353 if (error = zfs_rename_check(szp, sdzp, tdzp)) 4354 goto unlockout; 4355 } 4356 4357 /* 4358 * Does target exist? 4359 */ 4360 if (tzp) { 4361 /* 4362 * Source and target must be the same type. 4363 */ 4364 if ((*svpp)->v_type == VDIR) { 4365 if ((*tvpp)->v_type != VDIR) { 4366 error = SET_ERROR(ENOTDIR); 4367 goto unlockout; 4368 } else { 4369 cache_purge(tdvp); 4370 if (sdvp != tdvp) 4371 cache_purge(sdvp); 4372 } 4373 } else { 4374 if ((*tvpp)->v_type == VDIR) { 4375 error = SET_ERROR(EISDIR); 4376 goto unlockout; 4377 } 4378 } 4379 4380 /* 4381 * POSIX dictates that when the source and target 4382 * entries refer to the same file object, rename 4383 * must do nothing and exit without error. 4384 */ 4385#ifndef __NetBSD__ 4386 /* 4387 * But on NetBSD we have a different system call to do 4388 * this, posix_rename, which sorta kinda handles this 4389 * case (modulo races), and our tests expect BSD 4390 * semantics for rename, so we'll do that until we can 4391 * push the choice between BSD and POSIX semantics into 4392 * the VOP_RENAME protocol as a flag. 4393 */ 4394 if (szp->z_id == tzp->z_id) { 4395 error = 0; 4396 goto unlockout; 4397 } 4398#endif 4399 } 4400 4401 vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct); 4402 if (tzp) 4403 vnevent_rename_dest(*tvpp, tdvp, tnm, ct); 4404 4405 /* 4406 * notify the target directory if it is not the same 4407 * as source directory. 4408 */ 4409 if (tdvp != sdvp) { 4410 vnevent_rename_dest_dir(tdvp, ct); 4411 } 4412 4413 tx = dmu_tx_create(zfsvfs->z_os); 4414 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4415 dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 4416 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 4417 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 4418 if (sdzp != tdzp) { 4419 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 4420 zfs_sa_upgrade_txholds(tx, tdzp); 4421 } 4422 if (tzp) { 4423 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 4424 zfs_sa_upgrade_txholds(tx, tzp); 4425 } 4426 4427 zfs_sa_upgrade_txholds(tx, szp); 4428 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 4429 error = dmu_tx_assign(tx, TXG_WAIT); 4430 if (error) { 4431 dmu_tx_abort(tx); 4432 goto unlockout; 4433 } 4434 4435 4436 if (tzp && (tzp->z_id != szp->z_id)) 4437 /* Attempt to remove the existing target */ 4438 error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL); 4439 4440 if (error == 0) { 4441 if (!tzp || (tzp->z_id != szp->z_id)) 4442 error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING); 4443 if (error == 0) { 4444 szp->z_pflags |= ZFS_AV_MODIFIED; 4445 4446 error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 4447 (void *)&szp->z_pflags, sizeof (uint64_t), tx); 4448 ASSERT0(error); 4449 4450 error = zfs_link_destroy(sdzp, snm, szp, tx, 4451 /* Kludge for BSD rename semantics. */ 4452 tzp && tzp->z_id == szp->z_id ? 0: ZRENAMING, NULL); 4453 if (error == 0) { 4454 zfs_log_rename(zilog, tx, TX_RENAME, sdzp, 4455 snm, tdzp, tnm, szp); 4456 4457 /* 4458 * Update path information for the target vnode 4459 */ 4460 vn_renamepath(tdvp, *svpp, tnm, strlen(tnm)); 4461 } else { 4462 /* 4463 * At this point, we have successfully created 4464 * the target name, but have failed to remove 4465 * the source name. Since the create was done 4466 * with the ZRENAMING flag, there are 4467 * complications; for one, the link count is 4468 * wrong. The easiest way to deal with this 4469 * is to remove the newly created target, and 4470 * return the original error. This must 4471 * succeed; fortunately, it is very unlikely to 4472 * fail, since we just created it. 4473 */ 4474 VERIFY3U(zfs_link_destroy(tdzp, tnm, szp, tx, 4475 ZRENAMING, NULL), ==, 0); 4476 } 4477 } 4478 if (error == 0) { 4479 cache_purge(*svpp); 4480 if (*tvpp != NULL) 4481 cache_purge(*tvpp); 4482 cache_purge_negative(tdvp); 4483#ifdef __NetBSD__ 4484 if (*svpp == *tvpp) { 4485 VN_KNOTE(sdvp, NOTE_WRITE); 4486 VN_KNOTE(*svpp, (szp->z_links == 0 ? 4487 NOTE_DELETE : NOTE_LINK)); 4488 } else { 4489 genfs_rename_knote(sdvp, *svpp, tdvp, *tvpp, 4490 tzp != NULL ? tzp->z_links : 0); 4491 } 4492#endif 4493 } 4494 } 4495 4496 dmu_tx_commit(tx); 4497 4498 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4499 zil_commit(zilog, 0); 4500 4501unlockout: /* all 4 vnodes are locked, ZFS_ENTER called */ 4502 ZFS_EXIT(zfsvfs); 4503 4504 VOP_UNLOCK(*svpp, 0); 4505 VOP_UNLOCK(sdvp, 0); 4506#ifdef __NetBSD__ 4507 PNBUF_PUT(snm); 4508 PNBUF_PUT(tnm); 4509#endif 4510 4511 if (*tvpp != sdvp && *tvpp != *svpp) 4512 if (*tvpp != NULL) 4513 VOP_UNLOCK(*tvpp, 0); 4514 if (tdvp != sdvp && tdvp != *svpp) 4515 if (tdvp != *tvpp) 4516 VOP_UNLOCK(tdvp, 0); 4517 4518out: 4519 return (error); 4520} 4521 4522/* 4523 * Insert the indicated symbolic reference entry into the directory. 4524 * 4525 * IN: dvp - Directory to contain new symbolic link. 4526 * link - Name for new symlink entry. 4527 * vap - Attributes of new entry. 4528 * cr - credentials of caller. 4529 * ct - caller context 4530 * flags - case flags 4531 * 4532 * RETURN: 0 on success, error code on failure. 4533 * 4534 * Timestamps: 4535 * dvp - ctime|mtime updated 4536 */ 4537/*ARGSUSED*/ 4538static int 4539zfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 4540 cred_t *cr, kthread_t *td) 4541{ 4542 znode_t *zp, *dzp = VTOZ(dvp); 4543 dmu_tx_t *tx; 4544 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4545 zilog_t *zilog; 4546 uint64_t len = strlen(link); 4547 int error; 4548 zfs_acl_ids_t acl_ids; 4549 boolean_t fuid_dirtied; 4550 uint64_t txtype = TX_SYMLINK; 4551 int flags = 0; 4552 4553 ASSERT(vap->va_type == VLNK); 4554 4555 ZFS_ENTER(zfsvfs); 4556 ZFS_VERIFY_ZP(dzp); 4557 zilog = zfsvfs->z_log; 4558 4559 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 4560 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4561 ZFS_EXIT(zfsvfs); 4562 return (SET_ERROR(EILSEQ)); 4563 } 4564 4565 if (len > MAXPATHLEN) { 4566 ZFS_EXIT(zfsvfs); 4567 return (SET_ERROR(ENAMETOOLONG)); 4568 } 4569 4570 if ((error = zfs_acl_ids_create(dzp, 0, 4571 vap, cr, NULL, &acl_ids)) != 0) { 4572 ZFS_EXIT(zfsvfs); 4573 return (error); 4574 } 4575 4576 /* 4577 * Attempt to lock directory; fail if entry already exists. 4578 */ 4579 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 4580 if (error) { 4581 zfs_acl_ids_free(&acl_ids); 4582 ZFS_EXIT(zfsvfs); 4583 return (error); 4584 } 4585 4586 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4587 zfs_acl_ids_free(&acl_ids); 4588 ZFS_EXIT(zfsvfs); 4589 return (error); 4590 } 4591 4592 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 4593 zfs_acl_ids_free(&acl_ids); 4594 ZFS_EXIT(zfsvfs); 4595 return (SET_ERROR(EDQUOT)); 4596 } 4597 4598 getnewvnode_reserve(1); 4599 tx = dmu_tx_create(zfsvfs->z_os); 4600 fuid_dirtied = zfsvfs->z_fuid_dirty; 4601 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 4602 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4603 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 4604 ZFS_SA_BASE_ATTR_SIZE + len); 4605 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 4606 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 4607 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 4608 acl_ids.z_aclp->z_acl_bytes); 4609 } 4610 if (fuid_dirtied) 4611 zfs_fuid_txhold(zfsvfs, tx); 4612 error = dmu_tx_assign(tx, TXG_WAIT); 4613 if (error) { 4614 zfs_acl_ids_free(&acl_ids); 4615 dmu_tx_abort(tx); 4616 getnewvnode_drop_reserve(); 4617 ZFS_EXIT(zfsvfs); 4618 return (error); 4619 } 4620 4621 /* 4622 * Create a new object for the symlink. 4623 * for version 4 ZPL datsets the symlink will be an SA attribute 4624 */ 4625 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 4626 4627 if (fuid_dirtied) 4628 zfs_fuid_sync(zfsvfs, tx); 4629 4630 if (zp->z_is_sa) 4631 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 4632 link, len, tx); 4633 else 4634 zfs_sa_symlink(zp, link, len, tx); 4635 4636 zp->z_size = len; 4637 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 4638 &zp->z_size, sizeof (zp->z_size), tx); 4639 /* 4640 * Insert the new object into the directory. 4641 */ 4642 (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 4643 4644 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 4645 *vpp = ZTOV(zp); 4646 4647 zfs_acl_ids_free(&acl_ids); 4648 4649 dmu_tx_commit(tx); 4650 4651 getnewvnode_drop_reserve(); 4652 4653 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4654 zil_commit(zilog, 0); 4655 4656 ZFS_EXIT(zfsvfs); 4657 return (error); 4658} 4659 4660/* 4661 * Return, in the buffer contained in the provided uio structure, 4662 * the symbolic path referred to by vp. 4663 * 4664 * IN: vp - vnode of symbolic link. 4665 * uio - structure to contain the link path. 4666 * cr - credentials of caller. 4667 * ct - caller context 4668 * 4669 * OUT: uio - structure containing the link path. 4670 * 4671 * RETURN: 0 on success, error code on failure. 4672 * 4673 * Timestamps: 4674 * vp - atime updated 4675 */ 4676/* ARGSUSED */ 4677static int 4678zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 4679{ 4680 znode_t *zp = VTOZ(vp); 4681 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4682 int error; 4683 4684 ZFS_ENTER(zfsvfs); 4685 ZFS_VERIFY_ZP(zp); 4686 4687 if (zp->z_is_sa) 4688 error = sa_lookup_uio(zp->z_sa_hdl, 4689 SA_ZPL_SYMLINK(zfsvfs), uio); 4690 else 4691 error = zfs_sa_readlink(zp, uio); 4692 4693 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4694 4695 ZFS_EXIT(zfsvfs); 4696 return (error); 4697} 4698 4699/* 4700 * Insert a new entry into directory tdvp referencing svp. 4701 * 4702 * IN: tdvp - Directory to contain new entry. 4703 * svp - vnode of new entry. 4704 * name - name of new entry. 4705 * cr - credentials of caller. 4706 * ct - caller context 4707 * 4708 * RETURN: 0 on success, error code on failure. 4709 * 4710 * Timestamps: 4711 * tdvp - ctime|mtime updated 4712 * svp - ctime updated 4713 */ 4714/* ARGSUSED */ 4715static int 4716zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4717 caller_context_t *ct, int flags) 4718{ 4719 znode_t *dzp = VTOZ(tdvp); 4720 znode_t *tzp, *szp; 4721 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4722 zilog_t *zilog; 4723 dmu_tx_t *tx; 4724 int error; 4725 uint64_t parent; 4726 uid_t owner; 4727 4728 ASSERT(tdvp->v_type == VDIR); 4729 4730 ZFS_ENTER(zfsvfs); 4731 ZFS_VERIFY_ZP(dzp); 4732 zilog = zfsvfs->z_log; 4733 4734 /* 4735 * POSIX dictates that we return EPERM here. 4736 * Better choices include ENOTSUP or EISDIR. 4737 */ 4738 if (svp->v_type == VDIR) { 4739 ZFS_EXIT(zfsvfs); 4740 return (SET_ERROR(EPERM)); 4741 } 4742 4743 szp = VTOZ(svp); 4744 ZFS_VERIFY_ZP(szp); 4745 4746 if (szp->z_pflags & (ZFS_APPENDONLY | ZFS_IMMUTABLE | ZFS_READONLY)) { 4747 ZFS_EXIT(zfsvfs); 4748 return (SET_ERROR(EPERM)); 4749 } 4750 4751 /* Prevent links to .zfs/shares files */ 4752 4753 if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4754 &parent, sizeof (uint64_t))) != 0) { 4755 ZFS_EXIT(zfsvfs); 4756 return (error); 4757 } 4758 if (parent == zfsvfs->z_shares_dir) { 4759 ZFS_EXIT(zfsvfs); 4760 return (SET_ERROR(EPERM)); 4761 } 4762 4763 if (zfsvfs->z_utf8 && u8_validate(name, 4764 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4765 ZFS_EXIT(zfsvfs); 4766 return (SET_ERROR(EILSEQ)); 4767 } 4768 4769 /* 4770 * We do not support links between attributes and non-attributes 4771 * because of the potential security risk of creating links 4772 * into "normal" file space in order to circumvent restrictions 4773 * imposed in attribute space. 4774 */ 4775 if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4776 ZFS_EXIT(zfsvfs); 4777 return (SET_ERROR(EINVAL)); 4778 } 4779 4780 4781 owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4782 if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { 4783 ZFS_EXIT(zfsvfs); 4784 return (SET_ERROR(EPERM)); 4785 } 4786 4787 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4788 ZFS_EXIT(zfsvfs); 4789 return (error); 4790 } 4791 4792 /* 4793 * Attempt to lock directory; fail if entry already exists. 4794 */ 4795 error = zfs_dirent_lookup(dzp, name, &tzp, ZNEW); 4796 if (error) { 4797 ZFS_EXIT(zfsvfs); 4798 return (error); 4799 } 4800 4801 tx = dmu_tx_create(zfsvfs->z_os); 4802 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4803 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4804 zfs_sa_upgrade_txholds(tx, szp); 4805 zfs_sa_upgrade_txholds(tx, dzp); 4806 error = dmu_tx_assign(tx, TXG_WAIT); 4807 if (error) { 4808 dmu_tx_abort(tx); 4809 ZFS_EXIT(zfsvfs); 4810 return (error); 4811 } 4812 4813 error = zfs_link_create(dzp, name, szp, tx, 0); 4814 4815 if (error == 0) { 4816 uint64_t txtype = TX_LINK; 4817 zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4818 } 4819 4820 dmu_tx_commit(tx); 4821 4822 if (error == 0) { 4823 vnevent_link(svp, ct); 4824 } 4825 4826 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4827 zil_commit(zilog, 0); 4828 4829 ZFS_EXIT(zfsvfs); 4830 return (error); 4831} 4832 4833 4834/*ARGSUSED*/ 4835void 4836zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4837{ 4838 znode_t *zp = VTOZ(vp); 4839 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4840 int error; 4841 4842 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4843 if (zp->z_sa_hdl == NULL) { 4844 /* 4845 * The fs has been unmounted, or we did a 4846 * suspend/resume and this file no longer exists. 4847 */ 4848 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4849 vrecycle(vp); 4850 return; 4851 } 4852 4853 if (zp->z_unlinked) { 4854 /* 4855 * Fast path to recycle a vnode of a removed file. 4856 */ 4857 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4858 vrecycle(vp); 4859 return; 4860 } 4861 4862 if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4863 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4864 4865 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4866 zfs_sa_upgrade_txholds(tx, zp); 4867 error = dmu_tx_assign(tx, TXG_WAIT); 4868 if (error) { 4869 dmu_tx_abort(tx); 4870 } else { 4871 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 4872 (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4873 zp->z_atime_dirty = 0; 4874 dmu_tx_commit(tx); 4875 } 4876 } 4877 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4878} 4879 4880 4881#ifdef __FreeBSD__ 4882CTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 4883CTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 4884#endif 4885 4886/*ARGSUSED*/ 4887static int 4888zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4889{ 4890 znode_t *zp = VTOZ(vp); 4891 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4892 uint32_t gen; 4893 uint64_t gen64; 4894 uint64_t object = zp->z_id; 4895 zfid_short_t *zfid; 4896 int size, i, error; 4897 4898 ZFS_ENTER(zfsvfs); 4899 ZFS_VERIFY_ZP(zp); 4900 4901 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 4902 &gen64, sizeof (uint64_t))) != 0) { 4903 ZFS_EXIT(zfsvfs); 4904 return (error); 4905 } 4906 4907 gen = (uint32_t)gen64; 4908 4909 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4910 4911#ifdef illumos 4912 if (fidp->fid_len < size) { 4913 fidp->fid_len = size; 4914 ZFS_EXIT(zfsvfs); 4915 return (SET_ERROR(ENOSPC)); 4916 } 4917#else 4918 fidp->fid_len = size; 4919#endif 4920 4921 zfid = (zfid_short_t *)fidp; 4922 4923 zfid->zf_len = size; 4924 4925 for (i = 0; i < sizeof (zfid->zf_object); i++) 4926 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4927 4928 /* Must have a non-zero generation number to distinguish from .zfs */ 4929 if (gen == 0) 4930 gen = 1; 4931 for (i = 0; i < sizeof (zfid->zf_gen); i++) 4932 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4933 4934 if (size == LONG_FID_LEN) { 4935 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4936 zfid_long_t *zlfid; 4937 4938 zlfid = (zfid_long_t *)fidp; 4939 4940 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4941 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4942 4943 /* XXX - this should be the generation number for the objset */ 4944 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4945 zlfid->zf_setgen[i] = 0; 4946 } 4947 4948 ZFS_EXIT(zfsvfs); 4949 return (0); 4950} 4951 4952static int 4953zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4954 caller_context_t *ct) 4955{ 4956 znode_t *zp, *xzp; 4957 zfsvfs_t *zfsvfs; 4958 int error; 4959 4960 switch (cmd) { 4961 case _PC_LINK_MAX: 4962 *valp = INT_MAX; 4963 return (0); 4964 4965 case _PC_FILESIZEBITS: 4966 *valp = 64; 4967 return (0); 4968#ifdef illumos 4969 case _PC_XATTR_EXISTS: 4970 zp = VTOZ(vp); 4971 zfsvfs = zp->z_zfsvfs; 4972 ZFS_ENTER(zfsvfs); 4973 ZFS_VERIFY_ZP(zp); 4974 *valp = 0; 4975 error = zfs_dirent_lookup(zp, "", &xzp, 4976 ZXATTR | ZEXISTS | ZSHARED); 4977 if (error == 0) { 4978 if (!zfs_dirempty(xzp)) 4979 *valp = 1; 4980 vrele(ZTOV(xzp)); 4981 } else if (error == ENOENT) { 4982 /* 4983 * If there aren't extended attributes, it's the 4984 * same as having zero of them. 4985 */ 4986 error = 0; 4987 } 4988 ZFS_EXIT(zfsvfs); 4989 return (error); 4990 4991 case _PC_SATTR_ENABLED: 4992 case _PC_SATTR_EXISTS: 4993 *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 4994 (vp->v_type == VREG || vp->v_type == VDIR); 4995 return (0); 4996 4997 case _PC_ACCESS_FILTERING: 4998 *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 4999 vp->v_type == VDIR; 5000 return (0); 5001 5002 case _PC_ACL_ENABLED: 5003 *valp = _ACL_ACE_ENABLED; 5004 return (0); 5005#endif /* illumos */ 5006 case _PC_MIN_HOLE_SIZE: 5007 *valp = (int)SPA_MINBLOCKSIZE; 5008 return (0); 5009#ifdef illumos 5010 case _PC_TIMESTAMP_RESOLUTION: 5011 /* nanosecond timestamp resolution */ 5012 *valp = 1L; 5013 return (0); 5014#endif 5015 case _PC_ACL_EXTENDED: 5016 *valp = 0; 5017 return (0); 5018 5019#ifndef __NetBSD__ 5020 case _PC_ACL_NFS4: 5021 *valp = 1; 5022 return (0); 5023 5024 case _PC_ACL_PATH_MAX: 5025 *valp = ACL_MAX_ENTRIES; 5026 return (0); 5027#endif 5028 5029 default: 5030 return (EOPNOTSUPP); 5031 } 5032} 5033 5034/*ARGSUSED*/ 5035static int 5036zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5037 caller_context_t *ct) 5038{ 5039 znode_t *zp = VTOZ(vp); 5040 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5041 int error; 5042 boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5043 5044 ZFS_ENTER(zfsvfs); 5045 ZFS_VERIFY_ZP(zp); 5046 error = zfs_getacl(zp, vsecp, skipaclchk, cr); 5047 ZFS_EXIT(zfsvfs); 5048 5049 return (error); 5050} 5051 5052/*ARGSUSED*/ 5053int 5054zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5055 caller_context_t *ct) 5056{ 5057 znode_t *zp = VTOZ(vp); 5058 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5059 int error; 5060 boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5061 zilog_t *zilog = zfsvfs->z_log; 5062 5063 ZFS_ENTER(zfsvfs); 5064 ZFS_VERIFY_ZP(zp); 5065 5066 error = zfs_setacl(zp, vsecp, skipaclchk, cr); 5067 5068 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5069 zil_commit(zilog, 0); 5070 5071 ZFS_EXIT(zfsvfs); 5072 return (error); 5073} 5074 5075static int 5076ioflags(int ioflags) 5077{ 5078 int flags = 0; 5079 5080 if (ioflags & IO_APPEND) 5081 flags |= FAPPEND; 5082 if (ioflags & IO_NDELAY) 5083 flags |= FNONBLOCK; 5084 if (ioflags & IO_SYNC) 5085 flags |= (FSYNC | FDSYNC | FRSYNC); 5086 5087 return (flags); 5088} 5089 5090#ifdef __NetBSD__ 5091 5092static int 5093zfs_netbsd_open(void *v) 5094{ 5095 struct vop_open_args *ap = v; 5096 5097 return (zfs_open(&ap->a_vp, ap->a_mode, ap->a_cred, NULL)); 5098} 5099 5100static int 5101zfs_netbsd_close(void *v) 5102{ 5103 struct vop_close_args *ap = v; 5104 5105 return (zfs_close(ap->a_vp, ap->a_fflag, 0, 0, ap->a_cred, NULL)); 5106} 5107 5108static int 5109zfs_netbsd_ioctl(void *v) 5110{ 5111 struct vop_ioctl_args *ap = v; 5112 5113 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 5114 ap->a_fflag, ap->a_cred, NULL, NULL)); 5115} 5116 5117 5118static int 5119zfs_netbsd_read(void *v) 5120{ 5121 struct vop_read_args *ap = v; 5122 vnode_t *vp = ap->a_vp; 5123 znode_t *zp = VTOZ(vp); 5124 5125 switch (vp->v_type) { 5126 case VBLK: 5127 case VCHR: 5128 ZFS_ACCESSTIME_STAMP(zp->z_zfsvfs, zp); 5129 return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap)); 5130 case VFIFO: 5131 ZFS_ACCESSTIME_STAMP(zp->z_zfsvfs, zp); 5132 return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap)); 5133 } 5134 5135 return (zfs_read(vp, ap->a_uio, ioflags(ap->a_ioflag), ap->a_cred, NULL)); 5136} 5137 5138static int 5139zfs_netbsd_write(void *v) 5140{ 5141 struct vop_write_args *ap = v; 5142 vnode_t *vp = ap->a_vp; 5143 znode_t *zp = VTOZ(vp); 5144 struct uio *uio = ap->a_uio; 5145 off_t osize = zp->z_size; 5146 int error, resid; 5147 5148 switch (vp->v_type) { 5149 case VBLK: 5150 case VCHR: 5151 GOP_MARKUPDATE(vp, GOP_UPDATE_MODIFIED); 5152 return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap)); 5153 case VFIFO: 5154 GOP_MARKUPDATE(vp, GOP_UPDATE_MODIFIED); 5155 return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap)); 5156 } 5157 5158 resid = uio->uio_resid; 5159 error = zfs_write(vp, uio, ioflags(ap->a_ioflag), ap->a_cred, NULL); 5160 5161 return error; 5162} 5163 5164static int 5165zfs_netbsd_access(void *v) 5166{ 5167 struct vop_access_args /* { 5168 struct vnode *a_vp; 5169 accmode_t a_accmode; 5170 kauth_cred_t a_cred; 5171 } */ *ap = v; 5172 vnode_t *vp = ap->a_vp; 5173 znode_t *zp = VTOZ(vp); 5174 accmode_t accmode; 5175 kauth_cred_t cred = ap->a_cred; 5176 int error = 0; 5177 5178 /* 5179 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 5180 */ 5181 accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 5182 if (accmode != 0) 5183 error = zfs_access(vp, accmode, 0, cred, NULL); 5184 5185 /* 5186 * VADMIN has to be handled by kauth_authorize_vnode(). 5187 */ 5188 if (error == 0) { 5189 accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 5190 if (accmode != 0) { 5191 error = kauth_authorize_vnode(cred, 5192 KAUTH_ACCESS_ACTION(accmode, vp->v_type, 5193 zp->z_mode & ALLPERMS), vp, NULL, 5194 genfs_can_access(vp, cred, zp->z_uid, 5195 zp->z_gid, zp->z_mode & ALLPERMS, NULL, accmode)); 5196 } 5197 } 5198 5199 /* 5200 * For VEXEC, ensure that at least one execute bit is set for 5201 * non-directories. 5202 */ 5203 if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 5204 (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 5205 error = EACCES; 5206 } 5207 5208 /* We expect EACCES as common error. */ 5209 if (error == EPERM) 5210 error = EACCES; 5211 5212 return error; 5213} 5214 5215static int 5216zfs_netbsd_lookup(void *v) 5217{ 5218 struct vop_lookup_v2_args /* { 5219 struct vnode *a_dvp; 5220 struct vnode **a_vpp; 5221 struct componentname *a_cnp; 5222 } */ *ap = v; 5223 struct vnode *dvp = ap->a_dvp; 5224 struct vnode **vpp = ap->a_vpp; 5225 struct componentname *cnp = ap->a_cnp; 5226 char *nm, short_nm[31]; 5227 int error; 5228 int iswhiteout; 5229 5230 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5231 5232 *vpp = NULL; 5233 5234 /* 5235 * Do an access check before the cache lookup. zfs_lookup does 5236 * an access check too, but it's too scary to contemplate 5237 * injecting our namecache stuff into zfs internals. 5238 * 5239 * XXX Is this the correct access check? 5240 */ 5241 if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred)) != 0) 5242 goto out; 5243 5244 /* 5245 * Check the namecache before entering zfs_lookup. 5246 * cache_lookup does the locking dance for us. 5247 */ 5248 if (cache_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 5249 cnp->cn_nameiop, cnp->cn_flags, &iswhiteout, vpp)) { 5250 if (iswhiteout) { 5251 cnp->cn_flags |= ISWHITEOUT; 5252 } 5253 return *vpp == NULL ? ENOENT : 0; 5254 } 5255 5256 /* 5257 * zfs_lookup wants a null-terminated component name, but namei 5258 * gives us a pointer into the full pathname. 5259 */ 5260 ASSERT(cnp->cn_namelen < PATH_MAX - 1); 5261 if (cnp->cn_namelen + 1 > sizeof(short_nm)) 5262 nm = PNBUF_GET(); 5263 else 5264 nm = short_nm; 5265 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5266 5267 error = zfs_lookup(dvp, nm, vpp, 0, cnp, cnp->cn_nameiop, cnp->cn_cred); 5268 5269 if (nm != short_nm) 5270 PNBUF_PUT(nm); 5271 5272 /* 5273 * Translate errors to match our namei insanity. Also, if the 5274 * caller wants to create an entry here, it's apparently our 5275 * responsibility as lookup to make sure that's permissible. 5276 * Go figure. 5277 */ 5278 if (cnp->cn_flags & ISLASTCN) { 5279 switch (cnp->cn_nameiop) { 5280 case CREATE: 5281 case RENAME: 5282 if (error == ENOENT) { 5283 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred); 5284 if (error) 5285 break; 5286 error = EJUSTRETURN; 5287 break; 5288 } 5289 break; 5290 case DELETE: 5291 if (error == 0) { 5292 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred); 5293 if (error) { 5294 VN_RELE(*vpp); 5295 *vpp = NULL; 5296 } 5297 } 5298 break; 5299 } 5300 } 5301 5302 if (error) { 5303 KASSERT(*vpp == NULL); 5304 goto out; 5305 } 5306 KASSERT(*vpp != NULL); 5307 5308 if ((cnp->cn_namelen == 1) && (cnp->cn_nameptr[0] == '.')) { 5309 KASSERT(!(cnp->cn_flags & ISDOTDOT)); 5310 KASSERT(dvp == *vpp); 5311 } else if ((cnp->cn_namelen == 2) && 5312 (cnp->cn_nameptr[0] == '.') && 5313 (cnp->cn_nameptr[1] == '.')) { 5314 KASSERT(cnp->cn_flags & ISDOTDOT); 5315 } else { 5316 KASSERT(!(cnp->cn_flags & ISDOTDOT)); 5317 } 5318 5319out: 5320 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5321 5322 /* 5323 * Insert name into cache if appropriate. 5324 */ 5325 5326 if (error == 0 || (error == ENOENT && cnp->cn_nameiop != CREATE)) 5327 cache_enter(dvp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, 5328 cnp->cn_flags); 5329 5330 return (error); 5331} 5332 5333static int 5334zfs_netbsd_create(void *v) 5335{ 5336 struct vop_create_v3_args /* { 5337 struct vnode *a_dvp; 5338 struct vnode **a_vpp; 5339 struct componentname *a_cnp; 5340 struct vattr *a_vap; 5341 } */ *ap = v; 5342 struct vnode *dvp = ap->a_dvp; 5343 struct vnode **vpp = ap->a_vpp; 5344 struct componentname *cnp = ap->a_cnp; 5345 struct vattr *vap = ap->a_vap; 5346 char *nm; 5347 int mode; 5348 int error; 5349 5350 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5351 5352 vattr_init_mask(vap); 5353 mode = vap->va_mode & ALLPERMS; 5354 5355 /* ZFS wants a null-terminated name. */ 5356 nm = PNBUF_GET(); 5357 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5358 5359 /* XXX !EXCL is wrong here... */ 5360 error = zfs_create(dvp, nm, vap, !EXCL, mode, vpp, cnp->cn_cred, NULL); 5361 5362 PNBUF_PUT(nm); 5363 5364 KASSERT((error == 0) == (*vpp != NULL)); 5365 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5366 if (*vpp != NULL) 5367 VOP_UNLOCK(*vpp, 0); 5368 5369 return (error); 5370} 5371 5372static int 5373zfs_netbsd_mknod(void *v) 5374{ 5375 struct vop_mknod_v3_args /* { 5376 struct vnode *a_dvp; 5377 struct vnode **a_vpp; 5378 struct componentname *a_cnp; 5379 struct vattr *a_vap; 5380 } */ *ap = v; 5381 struct vnode *dvp = ap->a_dvp; 5382 struct vnode **vpp = ap->a_vpp; 5383 struct componentname *cnp = ap->a_cnp; 5384 struct vattr *vap = ap->a_vap; 5385 char *nm; 5386 int mode; 5387 int error; 5388 5389 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5390 5391 vattr_init_mask(vap); 5392 mode = vap->va_mode & ALLPERMS; 5393 5394 /* ZFS wants a null-terminated name. */ 5395 nm = PNBUF_GET(); 5396 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5397 5398 /* XXX !EXCL is wrong here... */ 5399 error = zfs_create(dvp, nm, vap, !EXCL, mode, vpp, cnp->cn_cred, NULL); 5400 5401 PNBUF_PUT(nm); 5402 5403 KASSERT((error == 0) == (*vpp != NULL)); 5404 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5405 if (*vpp != NULL) 5406 VOP_UNLOCK(*vpp, 0); 5407 5408 return (error); 5409} 5410 5411static int 5412zfs_netbsd_remove(void *v) 5413{ 5414 struct vop_remove_v3_args /* { 5415 struct vnode *a_dvp; 5416 struct vnode *a_vp; 5417 struct componentname *a_cnp; 5418 nlink_t ctx_vp_new_nlink; 5419 } */ *ap = v; 5420 struct vnode *dvp = ap->a_dvp; 5421 struct vnode *vp = ap->a_vp; 5422 struct componentname *cnp = ap->a_cnp; 5423 char *nm; 5424 int error; 5425 5426 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5427 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 5428 5429 /* ZFS wants a null-terminated name. */ 5430 nm = PNBUF_GET(); 5431 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5432 5433 error = zfs_remove(dvp, vp, nm, cnp->cn_cred); 5434 5435 /* 5436 * XXX Should update ctx_vp_new_nlink, but for now the 5437 * XXX the kevent sent on "vp" matches historical behavior. 5438 */ 5439 5440 PNBUF_PUT(nm); 5441 vput(vp); 5442 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5443 return (error); 5444} 5445 5446static int 5447zfs_netbsd_mkdir(void *v) 5448{ 5449 struct vop_mkdir_v3_args /* { 5450 struct vnode *a_dvp; 5451 struct vnode **a_vpp; 5452 struct componentname *a_cnp; 5453 struct vattr *a_vap; 5454 } */ *ap = v; 5455 struct vnode *dvp = ap->a_dvp; 5456 struct vnode **vpp = ap->a_vpp; 5457 struct componentname *cnp = ap->a_cnp; 5458 struct vattr *vap = ap->a_vap; 5459 char *nm; 5460 int error; 5461 5462 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5463 5464 vattr_init_mask(vap); 5465 5466 /* ZFS wants a null-terminated name. */ 5467 nm = PNBUF_GET(); 5468 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5469 5470 error = zfs_mkdir(dvp, nm, vap, vpp, cnp->cn_cred); 5471 5472 PNBUF_PUT(nm); 5473 5474 KASSERT((error == 0) == (*vpp != NULL)); 5475 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5476 if (*vpp != NULL) 5477 VOP_UNLOCK(*vpp, 0); 5478 5479 return (error); 5480} 5481 5482static int 5483zfs_netbsd_rmdir(void *v) 5484{ 5485 struct vop_rmdir_v2_args /* { 5486 struct vnode *a_dvp; 5487 struct vnode *a_vp; 5488 struct componentname *a_cnp; 5489 } */ *ap = v; 5490 struct vnode *dvp = ap->a_dvp; 5491 struct vnode *vp = ap->a_vp; 5492 struct componentname *cnp = ap->a_cnp; 5493 char *nm; 5494 int error; 5495 5496 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5497 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 5498 5499 /* ZFS wants a null-terminated name. */ 5500 nm = PNBUF_GET(); 5501 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5502 5503 error = zfs_rmdir(dvp, vp, nm, cnp->cn_cred); 5504 5505 PNBUF_PUT(nm); 5506 vput(vp); 5507 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5508 return error; 5509} 5510 5511static int 5512zfs_netbsd_readdir(void *v) 5513{ 5514 struct vop_readdir_args *ap = v; 5515 5516 return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 5517 ap->a_ncookies, ap->a_cookies)); 5518} 5519 5520static int 5521zfs_netbsd_fsync(void *v) 5522{ 5523 struct vop_fsync_args *ap = v; 5524 5525 return (zfs_fsync(ap->a_vp, ap->a_flags, ap->a_cred, NULL)); 5526} 5527 5528static int 5529zfs_spec_fsync(void *v) 5530{ 5531 struct vop_fsync_args *ap = v; 5532 int error; 5533 5534 error = spec_fsync(v); 5535 if (error) 5536 return error; 5537 5538 return (zfs_fsync(ap->a_vp, ap->a_flags, ap->a_cred, NULL)); 5539} 5540 5541static int 5542zfs_netbsd_getattr(void *v) 5543{ 5544 struct vop_getattr_args *ap = v; 5545 vattr_t *vap = ap->a_vap; 5546 xvattr_t xvap; 5547 u_long fflags = 0; 5548 int error; 5549 5550 xva_init(&xvap); 5551 xvap.xva_vattr = *vap; 5552 xvap.xva_vattr.va_mask |= AT_XVATTR; 5553 5554 /* Convert chflags into ZFS-type flags. */ 5555 /* XXX: what about SF_SETTABLE?. */ 5556 XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 5557 XVA_SET_REQ(&xvap, XAT_APPENDONLY); 5558 XVA_SET_REQ(&xvap, XAT_NOUNLINK); 5559 XVA_SET_REQ(&xvap, XAT_NODUMP); 5560 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 5561 if (error != 0) 5562 return (error); 5563 5564 /* Convert ZFS xattr into chflags. */ 5565#define FLAG_CHECK(fflag, xflag, xfield) do { \ 5566 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 5567 fflags |= (fflag); \ 5568} while (0) 5569 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 5570 xvap.xva_xoptattrs.xoa_immutable); 5571 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 5572 xvap.xva_xoptattrs.xoa_appendonly); 5573 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 5574 xvap.xva_xoptattrs.xoa_nounlink); 5575 FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 5576 xvap.xva_xoptattrs.xoa_nodump); 5577#undef FLAG_CHECK 5578 *vap = xvap.xva_vattr; 5579 vap->va_flags = fflags; 5580 return (0); 5581} 5582 5583static int 5584zfs_netbsd_setattr(void *v) 5585{ 5586 struct vop_setattr_args *ap = v; 5587 vnode_t *vp = ap->a_vp; 5588 vattr_t *vap = ap->a_vap; 5589 cred_t *cred = ap->a_cred; 5590 znode_t *zp = VTOZ(vp); 5591 xvattr_t xvap; 5592 kauth_action_t action; 5593 u_long fflags, sfflags = 0; 5594 uint64_t zflags; 5595 int error, flags = 0; 5596 bool changing_sysflags; 5597 5598 vattr_init_mask(vap); 5599 vap->va_mask &= ~AT_NOSET; 5600 if (ISSET(vap->va_vaflags, VA_UTIMES_NULL)) 5601 flags |= ATTR_UTIME; 5602 5603 xva_init(&xvap); 5604 xvap.xva_vattr = *vap; 5605 5606 zflags = VTOZ(vp)->z_pflags; 5607 5608 /* Ignore size changes on device nodes. */ 5609 if (vp->v_type == VBLK || vp->v_type == VCHR) 5610 xvap.xva_vattr.va_mask &= ~AT_SIZE; 5611 if (vap->va_flags != VNOVAL) { 5612 int error; 5613 5614 fflags = vap->va_flags; 5615 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_NODUMP)) != 0) 5616 return (EOPNOTSUPP); 5617 5618#define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 5619 if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 5620 ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 5621 XVA_SET_REQ(&xvap, (xflag)); \ 5622 (xfield) = ((fflags & (fflag)) != 0); \ 5623 if (((fflag) & SF_SETTABLE) != 0) \ 5624 sfflags |= (fflag); \ 5625 } \ 5626} while (0) 5627 /* Convert chflags into ZFS-type flags. */ 5628 /* XXX: what about SF_SETTABLE?. */ 5629 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 5630 xvap.xva_xoptattrs.xoa_immutable); 5631 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 5632 xvap.xva_xoptattrs.xoa_appendonly); 5633 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 5634 xvap.xva_xoptattrs.xoa_nounlink); 5635 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 5636 xvap.xva_xoptattrs.xoa_nodump); 5637#undef FLAG_CHANGE 5638 5639 action = KAUTH_VNODE_WRITE_FLAGS; 5640 changing_sysflags = false; 5641 5642 if (zflags & (ZFS_IMMUTABLE|ZFS_APPENDONLY|ZFS_NOUNLINK)) { 5643 action |= KAUTH_VNODE_HAS_SYSFLAGS; 5644 } 5645 if (sfflags != 0) { 5646 action |= KAUTH_VNODE_WRITE_SYSFLAGS; 5647 changing_sysflags = true; 5648 } 5649 5650 error = kauth_authorize_vnode(cred, action, vp, NULL, 5651 genfs_can_chflags(vp, cred, zp->z_uid, changing_sysflags)); 5652 if (error) 5653 return error; 5654 } 5655 5656 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || 5657 vap->va_birthtime.tv_sec != VNOVAL) { 5658 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp, 5659 NULL, genfs_can_chtimes(vp, cred, zp->z_uid, 5660 vap->va_vaflags)); 5661 if (error) 5662 return error; 5663 } 5664 5665 error = zfs_setattr(vp, (vattr_t *)&xvap, flags, cred, NULL); 5666 if (error) 5667 return error; 5668 5669 cache_enter_id(vp, zp->z_mode, zp->z_uid, zp->z_gid, true); 5670 5671 return error; 5672} 5673 5674static int 5675zfs_netbsd_rename(void *v) 5676{ 5677 struct vop_rename_args /* { 5678 struct vnode *a_fdvp; 5679 struct vnode *a_fvp; 5680 struct componentname *a_fcnp; 5681 struct vnode *a_tdvp; 5682 struct vnode *a_tvp; 5683 struct componentname *a_tcnp; 5684 } */ *ap = v; 5685 vnode_t *fdvp = ap->a_fdvp; 5686 vnode_t *fvp = ap->a_fvp; 5687 struct componentname *fcnp = ap->a_fcnp; 5688 vnode_t *tdvp = ap->a_tdvp; 5689 vnode_t *tvp = ap->a_tvp; 5690 struct componentname *tcnp = ap->a_tcnp; 5691 kauth_cred_t cred; 5692 int error; 5693 5694 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 5695 KASSERT(tvp == NULL || VOP_ISLOCKED(tvp) == LK_EXCLUSIVE); 5696 KASSERT(fdvp->v_type == VDIR); 5697 KASSERT(tdvp->v_type == VDIR); 5698 5699 cred = fcnp->cn_cred; 5700 5701 /* 5702 * XXX Want a better equality test. `tcnp->cn_cred == cred' 5703 * hoses p2k because puffs transmits the creds separately and 5704 * allocates distinct but equivalent structures for them. 5705 */ 5706 KASSERT(kauth_cred_uidmatch(cred, tcnp->cn_cred)); 5707 5708 /* 5709 * Drop the insane locks. 5710 */ 5711 VOP_UNLOCK(tdvp, 0); 5712 if (tvp != NULL && tvp != tdvp) 5713 VOP_UNLOCK(tvp, 0); 5714 5715 /* 5716 * Release the source and target nodes; zfs_rename will look 5717 * them up again once the locking situation is sane. 5718 */ 5719 VN_RELE(fvp); 5720 if (tvp != NULL) 5721 VN_RELE(tvp); 5722 fvp = NULL; 5723 tvp = NULL; 5724 5725 /* 5726 * Do the rename ZFSly. 5727 */ 5728 error = zfs_rename(fdvp, &fvp, fcnp, tdvp, &tvp, tcnp, cred); 5729 5730 /* 5731 * Release the directories now too, because the VOP_RENAME 5732 * protocol is insane. 5733 */ 5734 5735 VN_RELE(fdvp); 5736 VN_RELE(tdvp); 5737 if (fvp != NULL) 5738 VN_RELE(fvp); 5739 if (tvp != NULL) 5740 VN_RELE(tvp); 5741 5742 return (error); 5743} 5744 5745static int 5746zfs_netbsd_symlink(void *v) 5747{ 5748 struct vop_symlink_v3_args /* { 5749 struct vnode *a_dvp; 5750 struct vnode **a_vpp; 5751 struct componentname *a_cnp; 5752 struct vattr *a_vap; 5753 char *a_target; 5754 } */ *ap = v; 5755 struct vnode *dvp = ap->a_dvp; 5756 struct vnode **vpp = ap->a_vpp; 5757 struct componentname *cnp = ap->a_cnp; 5758 struct vattr *vap = ap->a_vap; 5759 char *target = ap->a_target; 5760 char *nm; 5761 int error; 5762 5763 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5764 5765 vap->va_type = VLNK; /* Netbsd: Syscall only sets va_mode. */ 5766 vattr_init_mask(vap); 5767 5768 /* ZFS wants a null-terminated name. */ 5769 nm = PNBUF_GET(); 5770 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5771 5772 error = zfs_symlink(dvp, vpp, nm, vap, target, cnp->cn_cred, 0); 5773 5774 PNBUF_PUT(nm); 5775 KASSERT((error == 0) == (*vpp != NULL)); 5776 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5777 if (*vpp != NULL) 5778 VOP_UNLOCK(*vpp, 0); 5779 5780 return (error); 5781} 5782 5783static int 5784zfs_netbsd_readlink(void *v) 5785{ 5786 struct vop_readlink_args *ap = v; 5787 5788 return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 5789} 5790 5791static int 5792zfs_netbsd_link(void *v) 5793{ 5794 struct vop_link_v2_args /* { 5795 struct vnode *a_dvp; 5796 struct vnode *a_vp; 5797 struct componentname *a_cnp; 5798 } */ *ap = v; 5799 struct vnode *dvp = ap->a_dvp; 5800 struct vnode *vp = ap->a_vp; 5801 struct componentname *cnp = ap->a_cnp; 5802 char *nm; 5803 int error; 5804 5805 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5806 5807 /* ZFS wants a null-terminated name. */ 5808 nm = PNBUF_GET(); 5809 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5810 5811 if ((error = vn_lock(vp, LK_EXCLUSIVE)) != 0) { 5812 /* XXX: No ABORTOP? */ 5813 PNBUF_PUT(nm); 5814 return error; 5815 } 5816 error = kauth_authorize_vnode(cnp->cn_cred, KAUTH_VNODE_ADD_LINK, vp, 5817 dvp, 0); 5818 if (error) 5819 goto out; 5820 error = zfs_link(dvp, vp, nm, cnp->cn_cred, 5821 NULL, 0); 5822 5823out: 5824 PNBUF_PUT(nm); 5825 VOP_UNLOCK(vp, 0); 5826 return error; 5827} 5828 5829static int 5830zfs_netbsd_inactive(void *v) 5831{ 5832 struct vop_inactive_v2_args *ap = v; 5833 vnode_t *vp = ap->a_vp; 5834 znode_t *zp = VTOZ(vp); 5835 5836 /* 5837 * NetBSD: nothing to do here, other than indicate if the 5838 * vnode should be reclaimed. No need to lock, if we race 5839 * vrele() will call us again. 5840 */ 5841 *ap->a_recycle = (zp->z_unlinked != 0); 5842 5843 return (0); 5844} 5845 5846static int 5847zfs_netbsd_reclaim(void *v) 5848{ 5849 struct vop_reclaim_v2_args /* { 5850 struct vnode *a_vp; 5851 } */ *ap = v; 5852 struct vnode *vp = ap->a_vp; 5853 znode_t *zp; 5854 zfsvfs_t *zfsvfs; 5855 int error; 5856 5857 VOP_UNLOCK(vp, 0); 5858 zp = VTOZ(vp); 5859 zfsvfs = zp->z_zfsvfs; 5860 5861 KASSERTMSG(!vn_has_cached_data(vp), "vp %p", vp); 5862 5863 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 5864 5865 /* 5866 * Process a deferred atime update. 5867 */ 5868 if (zp->z_atime_dirty && zp->z_unlinked == 0 && zp->z_sa_hdl != NULL) { 5869 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 5870 5871 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 5872 zfs_sa_upgrade_txholds(tx, zp); 5873 error = dmu_tx_assign(tx, TXG_WAIT); 5874 if (error) { 5875 dmu_tx_abort(tx); 5876 } else { 5877 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 5878 (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 5879 zp->z_atime_dirty = 0; 5880 dmu_tx_commit(tx); 5881 } 5882 } 5883 5884 /* 5885 * Operation zfs_znode.c::zfs_zget_cleaner() depends on this 5886 * zil_commit() as a barrier to guarantee the znode cannot 5887 * get freed before its log entries are resolved. 5888 */ 5889 if (zfsvfs->z_log) 5890 zil_commit(zfsvfs->z_log, zp->z_id); 5891 5892 if (zp->z_sa_hdl == NULL) 5893 zfs_znode_free(zp); 5894 else 5895 zfs_zinactive(zp); 5896 rw_exit(&zfsvfs->z_teardown_inactive_lock); 5897 return 0; 5898} 5899 5900static int 5901zfs_netbsd_fid(void *v) 5902{ 5903 struct vop_fid_args *ap = v; 5904 5905 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 5906} 5907 5908static int 5909zfs_netbsd_pathconf(void *v) 5910{ 5911 struct vop_pathconf_args *ap = v; 5912 ulong_t val; 5913 int error; 5914 5915 error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->l_cred, NULL); 5916 if (error == 0) 5917 *ap->a_retval = val; 5918 else if (error == EOPNOTSUPP) { 5919 switch (ap->a_name) { 5920 case _PC_NAME_MAX: 5921 *ap->a_retval = NAME_MAX; 5922 return (0); 5923 case _PC_PATH_MAX: 5924 *ap->a_retval = PATH_MAX; 5925 return (0); 5926 case _PC_LINK_MAX: 5927 *ap->a_retval = LINK_MAX; 5928 return (0); 5929 case _PC_MAX_CANON: 5930 *ap->a_retval = MAX_CANON; 5931 return (0); 5932 case _PC_MAX_INPUT: 5933 *ap->a_retval = MAX_INPUT; 5934 return (0); 5935 case _PC_PIPE_BUF: 5936 *ap->a_retval = PIPE_BUF; 5937 return (0); 5938 case _PC_CHOWN_RESTRICTED: 5939 *ap->a_retval = 1; 5940 return (0); 5941 case _PC_NO_TRUNC: 5942 *ap->a_retval = 1; 5943 return (0); 5944 case _PC_VDISABLE: 5945 *ap->a_retval = _POSIX_VDISABLE; 5946 return (0); 5947 default: 5948 return (EINVAL); 5949 } 5950 /* NOTREACHED */ 5951 } 5952 return (error); 5953} 5954 5955static int 5956zfs_netbsd_advlock(void *v) 5957{ 5958 struct vop_advlock_args /* { 5959 struct vnode *a_vp; 5960 void *a_id; 5961 int a_op; 5962 struct flock *a_fl; 5963 int a_flags; 5964 } */ *ap = v; 5965 struct vnode *vp; 5966 struct znode *zp; 5967 struct zfsvfs *zfsvfs; 5968 int error; 5969 5970 vp = ap->a_vp; 5971 zp = VTOZ(vp); 5972 zfsvfs = zp->z_zfsvfs; 5973 5974 ZFS_ENTER(zfsvfs); 5975 ZFS_VERIFY_ZP(zp); 5976 error = lf_advlock(ap, &zp->z_lockf, zp->z_size); 5977 ZFS_EXIT(zfsvfs); 5978 5979 return error; 5980} 5981 5982static int 5983zfs_netbsd_getpages(void *v) 5984{ 5985 struct vop_getpages_args /* { 5986 struct vnode *a_vp; 5987 voff_t a_offset; 5988 struct vm_page **a_m; 5989 int *a_count; 5990 int a_centeridx; 5991 vm_prot_t a_access_type; 5992 int a_advice; 5993 int a_flags; 5994 } */ * const ap = v; 5995 5996 vnode_t *const vp = ap->a_vp; 5997 const int flags = ap->a_flags; 5998 const bool async = (flags & PGO_SYNCIO) == 0; 5999 const bool memwrite = (ap->a_access_type & VM_PROT_WRITE) != 0; 6000 6001 struct uvm_object * const uobj = &vp->v_uobj; 6002 krwlock_t * const rw = uobj->vmobjlock; 6003 znode_t *zp = VTOZ(vp); 6004 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6005 vfs_t *mp; 6006 struct vm_page *pg; 6007 caddr_t va; 6008 int npages = *ap->a_count, found, err = 0; 6009 6010 if (flags & PGO_LOCKED) { 6011 uvn_findpages(uobj, ap->a_offset, &npages, ap->a_m, NULL, 6012 UFP_NOWAIT | UFP_NOALLOC | UFP_NOBUSY | 6013 (memwrite ? UFP_NORDONLY : 0)); 6014 KASSERT(npages == *ap->a_count); 6015 if (memwrite) { 6016 KASSERT(rw_write_held(uobj->vmobjlock)); 6017 for (int i = 0; i < npages; i++) { 6018 pg = ap->a_m[i]; 6019 if (pg == NULL || pg == PGO_DONTCARE) { 6020 continue; 6021 } 6022 if (uvm_pagegetdirty(pg) == 6023 UVM_PAGE_STATUS_CLEAN) { 6024 uvm_pagemarkdirty(pg, 6025 UVM_PAGE_STATUS_UNKNOWN); 6026 } 6027 } 6028 } 6029 return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0; 6030 } 6031 rw_exit(rw); 6032 6033 if (async) { 6034 return 0; 6035 } 6036 6037 mp = vp->v_mount; 6038 fstrans_start(mp); 6039 if (vp->v_mount != mp) { 6040 fstrans_done(mp); 6041 return ENOENT; 6042 } 6043 ZFS_ENTER(zfsvfs); 6044 ZFS_VERIFY_ZP(zp); 6045 6046 rw_enter(rw, RW_WRITER); 6047 if (ap->a_offset + (npages << PAGE_SHIFT) > round_page(vp->v_size)) { 6048 rw_exit(rw); 6049 ZFS_EXIT(zfsvfs); 6050 fstrans_done(mp); 6051 return EINVAL; 6052 } 6053 uvn_findpages(uobj, ap->a_offset, &npages, ap->a_m, NULL, UFP_ALL); 6054 KASSERT(npages == *ap->a_count); 6055 6056 for (int i = 0; i < npages; i++) { 6057 pg = ap->a_m[i]; 6058 if (pg->flags & PG_FAKE) { 6059 voff_t offset = pg->offset; 6060 KASSERT(pg->offset == ap->a_offset + (i << PAGE_SHIFT)); 6061 rw_exit(rw); 6062 6063 va = zfs_map_page(pg, S_WRITE); 6064 err = dmu_read(zfsvfs->z_os, zp->z_id, offset, 6065 PAGE_SIZE, va, DMU_READ_PREFETCH); 6066 zfs_unmap_page(pg, va); 6067 6068 if (err != 0) { 6069 uvm_aio_aiodone_pages(ap->a_m, npages, false, err); 6070 memset(ap->a_m, 0, sizeof(ap->a_m[0]) * 6071 npages); 6072 break; 6073 } 6074 rw_enter(rw, RW_WRITER); 6075 pg->flags &= ~(PG_FAKE); 6076 } 6077 6078 if (memwrite && uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN) { 6079 /* For write faults, start dirtiness tracking. */ 6080 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_UNKNOWN); 6081 } 6082 } 6083 rw_exit(rw); 6084 6085 ZFS_EXIT(zfsvfs); 6086 fstrans_done(mp); 6087 6088 return (err); 6089} 6090 6091static int 6092zfs_putapage(vnode_t *vp, page_t **pp, int count, int flags) 6093{ 6094 znode_t *zp = VTOZ(vp); 6095 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6096 dmu_tx_t *tx; 6097 voff_t off, koff; 6098 voff_t len, klen; 6099 int err; 6100 6101 bool *cleanedp; 6102 struct uvm_object *uobj = &vp->v_uobj; 6103 krwlock_t *rw = uobj->vmobjlock; 6104 6105 if (zp->z_sa_hdl == NULL) { 6106 err = 0; 6107 goto out; 6108 } 6109 6110 /* 6111 * Calculate the length and assert that no whole pages are past EOF. 6112 * This check is equivalent to "off + len <= round_page(zp->z_size)", 6113 * with gyrations to avoid signed integer overflow. 6114 */ 6115 6116 off = pp[0]->offset; 6117 len = count * PAGESIZE; 6118 KASSERT(off <= zp->z_size); 6119 KASSERT(len <= round_page(zp->z_size)); 6120 KASSERT(off <= round_page(zp->z_size) - len); 6121 6122 /* 6123 * If EOF is within the last page, reduce len to avoid writing past 6124 * the file size in the ZFS buffer. Assert that 6125 * "off + len <= zp->z_size", again avoiding signed integer overflow. 6126 */ 6127 6128 if (len > zp->z_size - off) { 6129 len = zp->z_size - off; 6130 } 6131 KASSERT(len <= zp->z_size); 6132 KASSERT(off <= zp->z_size - len); 6133 6134 if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 6135 zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 6136 err = SET_ERROR(EDQUOT); 6137 goto out; 6138 } 6139 tx = dmu_tx_create(zfsvfs->z_os); 6140 dmu_tx_hold_write(tx, zp->z_id, off, len); 6141 6142 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 6143 zfs_sa_upgrade_txholds(tx, zp); 6144 err = dmu_tx_assign(tx, TXG_WAIT); 6145 if (err != 0) { 6146 dmu_tx_abort(tx); 6147 goto out; 6148 } 6149 6150 if (zp->z_blksz <= PAGESIZE) { 6151 KASSERTMSG(count == 1, "vp %p pp %p count %d", vp, pp, count); 6152 caddr_t va = zfs_map_page(*pp, S_READ); 6153 ASSERT3U(len, <=, PAGESIZE); 6154 dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 6155 zfs_unmap_page(*pp, va); 6156 } else { 6157 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 6158 } 6159 cleanedp = tsd_get(zfs_putpage_key); 6160 *cleanedp = true; 6161 6162 if (err == 0) { 6163 uint64_t mtime[2], ctime[2]; 6164 sa_bulk_attr_t bulk[3]; 6165 int count = 0; 6166 6167 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 6168 &mtime, 16); 6169 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 6170 &ctime, 16); 6171 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 6172 &zp->z_pflags, 8); 6173 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 6174 B_TRUE); 6175 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 6176 ASSERT0(err); 6177 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 6178 } 6179 dmu_tx_commit(tx); 6180 6181out: 6182 uvm_aio_aiodone_pages(pp, count, true, err); 6183 return (err); 6184} 6185 6186static void 6187zfs_netbsd_gop_markupdate(vnode_t *vp, int flags) 6188{ 6189 znode_t *zp = VTOZ(vp); 6190 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6191 dmu_tx_t *tx; 6192 sa_bulk_attr_t bulk[2]; 6193 uint64_t mtime[2], ctime[2]; 6194 int count = 0, err; 6195 6196 KASSERT(flags == GOP_UPDATE_MODIFIED); 6197 6198 tx = dmu_tx_create(zfsvfs->z_os); 6199 err = dmu_tx_assign(tx, TXG_WAIT); 6200 if (err != 0) { 6201 dmu_tx_abort(tx); 6202 return; 6203 } 6204 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 6205 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 6206 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); 6207 dmu_tx_commit(tx); 6208} 6209 6210static int 6211zfs_netbsd_putpages(void *v) 6212{ 6213 struct vop_putpages_args /* { 6214 struct vnode *a_vp; 6215 voff_t a_offlo; 6216 voff_t a_offhi; 6217 int a_flags; 6218 } */ * const ap = v; 6219 6220 struct vnode *vp = ap->a_vp; 6221 voff_t offlo = ap->a_offlo; 6222 voff_t offhi = ap->a_offhi; 6223 int flags = ap->a_flags; 6224 6225 znode_t *zp = VTOZ(vp); 6226 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6227 rl_t *rl = NULL; 6228 uint64_t len; 6229 int error; 6230 bool cleaned = false; 6231 6232 bool async = (flags & PGO_SYNCIO) == 0; 6233 bool cleaning = (flags & PGO_CLEANIT) != 0; 6234 6235 if (cleaning) { 6236 ASSERT((offlo & PAGE_MASK) == 0 && (offhi & PAGE_MASK) == 0); 6237 ASSERT(offlo < offhi || offhi == 0); 6238 if (offhi == 0) 6239 len = UINT64_MAX; 6240 else 6241 len = offhi - offlo; 6242 rw_exit(vp->v_uobj.vmobjlock); 6243 if (curlwp == uvm.pagedaemon_lwp) { 6244 error = fstrans_start_nowait(vp->v_mount); 6245 if (error) 6246 return error; 6247 } else { 6248 vfs_t *mp = vp->v_mount; 6249 fstrans_start(mp); 6250 if (vp->v_mount != mp) { 6251 fstrans_done(mp); 6252 ASSERT(!vn_has_cached_data(vp)); 6253 return 0; 6254 } 6255 } 6256 /* 6257 * Cannot use ZFS_ENTER() here as it returns with error 6258 * if z_unmounted. The next statement is equivalent. 6259 */ 6260 rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); 6261 6262 rl = zfs_range_lock(zp, offlo, len, RL_WRITER); 6263 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 6264 tsd_set(zfs_putpage_key, &cleaned); 6265 } 6266 error = genfs_putpages(v); 6267 if (cleaning) { 6268 tsd_set(zfs_putpage_key, NULL); 6269 zfs_range_unlock(rl); 6270 6271 /* 6272 * Only zil_commit() if we cleaned something. This avoids 6273 * deadlock if we're called from zfs_netbsd_setsize(). 6274 */ 6275 6276 if (cleaned) 6277 if (!async || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 6278 zil_commit(zfsvfs->z_log, zp->z_id); 6279 ZFS_EXIT(zfsvfs); 6280 fstrans_done(vp->v_mount); 6281 } 6282 return error; 6283} 6284 6285/* 6286 * Restrict the putpages range to the ZFS block containing the offset. 6287 */ 6288static void 6289zfs_netbsd_gop_putrange(struct vnode *vp, off_t off, off_t *lop, off_t *hip) 6290{ 6291 znode_t *zp = VTOZ(vp); 6292 6293 *lop = trunc_page(rounddown2(off, zp->z_blksz)); 6294 *hip = round_page(*lop + zp->z_blksz); 6295} 6296 6297void 6298zfs_netbsd_setsize(vnode_t *vp, off_t size) 6299{ 6300 struct uvm_object *uobj = &vp->v_uobj; 6301 krwlock_t *rw = uobj->vmobjlock; 6302 page_t *pg; 6303 int count, pgoff; 6304 caddr_t va; 6305 off_t tsize; 6306 6307 uvm_vnp_setsize(vp, size); 6308 if (!vn_has_cached_data(vp)) 6309 return; 6310 6311 tsize = trunc_page(size); 6312 if (tsize == size) 6313 return; 6314 6315 /* 6316 * If there's a partial page, we need to zero the tail. 6317 */ 6318 6319 rw_enter(rw, RW_WRITER); 6320 count = 1; 6321 pg = NULL; 6322 if (uvn_findpages(uobj, tsize, &count, &pg, NULL, UFP_NOALLOC)) { 6323 va = zfs_map_page(pg, S_WRITE); 6324 pgoff = size - tsize; 6325 memset(va + pgoff, 0, PAGESIZE - pgoff); 6326 zfs_unmap_page(pg, va); 6327 uvm_page_unbusy(&pg, 1); 6328 } 6329 6330 rw_exit(rw); 6331} 6332 6333static int 6334zfs_netbsd_print(void *v) 6335{ 6336 struct vop_print_args /* { 6337 struct vnode *a_vp; 6338 } */ *ap = v; 6339 vnode_t *vp; 6340 znode_t *zp; 6341 6342 vp = ap->a_vp; 6343 zp = VTOZ(vp); 6344 6345 printf("\tino %" PRIu64 " size %" PRIu64 "\n", 6346 zp->z_id, zp->z_size); 6347 return 0; 6348} 6349 6350const struct genfs_ops zfs_genfsops = { 6351 .gop_write = zfs_putapage, 6352 .gop_markupdate = zfs_netbsd_gop_markupdate, 6353 .gop_putrange = zfs_netbsd_gop_putrange, 6354}; 6355 6356int (**zfs_vnodeop_p)(void *); 6357const struct vnodeopv_entry_desc zfs_vnodeop_entries[] = { 6358 { &vop_default_desc, vn_default_error }, 6359 { &vop_parsepath_desc, genfs_parsepath }, 6360 { &vop_lookup_desc, zfs_netbsd_lookup }, 6361 { &vop_create_desc, zfs_netbsd_create }, 6362 { &vop_mknod_desc, zfs_netbsd_mknod }, 6363 { &vop_open_desc, zfs_netbsd_open }, 6364 { &vop_close_desc, zfs_netbsd_close }, 6365 { &vop_access_desc, zfs_netbsd_access }, 6366 { &vop_accessx_desc, genfs_accessx }, 6367 { &vop_getattr_desc, zfs_netbsd_getattr }, 6368 { &vop_setattr_desc, zfs_netbsd_setattr }, 6369 { &vop_read_desc, zfs_netbsd_read }, 6370 { &vop_write_desc, zfs_netbsd_write }, 6371 { &vop_ioctl_desc, zfs_netbsd_ioctl }, 6372 { &vop_poll_desc, genfs_poll }, 6373 { &vop_kqfilter_desc, genfs_kqfilter }, 6374 { &vop_revoke_desc, genfs_revoke }, 6375 { &vop_fsync_desc, zfs_netbsd_fsync }, 6376 { &vop_remove_desc, zfs_netbsd_remove }, 6377 { &vop_link_desc, zfs_netbsd_link }, 6378 { &vop_lock_desc, genfs_lock }, 6379 { &vop_unlock_desc, genfs_unlock }, 6380 { &vop_rename_desc, zfs_netbsd_rename }, 6381 { &vop_mkdir_desc, zfs_netbsd_mkdir }, 6382 { &vop_rmdir_desc, zfs_netbsd_rmdir }, 6383 { &vop_symlink_desc, zfs_netbsd_symlink }, 6384 { &vop_readdir_desc, zfs_netbsd_readdir }, 6385 { &vop_readlink_desc, zfs_netbsd_readlink }, 6386 { &vop_inactive_desc, zfs_netbsd_inactive }, 6387 { &vop_reclaim_desc, zfs_netbsd_reclaim }, 6388 { &vop_pathconf_desc, zfs_netbsd_pathconf }, 6389 { &vop_seek_desc, genfs_seek }, 6390 { &vop_getpages_desc, zfs_netbsd_getpages }, 6391 { &vop_putpages_desc, zfs_netbsd_putpages }, 6392 { &vop_mmap_desc, genfs_mmap }, 6393 { &vop_islocked_desc, genfs_islocked }, 6394 { &vop_advlock_desc, zfs_netbsd_advlock }, 6395 { &vop_print_desc, zfs_netbsd_print }, 6396 { &vop_fcntl_desc, genfs_fcntl }, 6397 { NULL, NULL } 6398}; 6399 6400const struct vnodeopv_desc zfs_vnodeop_opv_desc = 6401 { &zfs_vnodeop_p, zfs_vnodeop_entries }; 6402 6403int (**zfs_specop_p)(void *); 6404const struct vnodeopv_entry_desc zfs_specop_entries[] = { 6405 { &vop_default_desc, vn_default_error }, 6406 GENFS_SPECOP_ENTRIES, 6407 { &vop_close_desc, spec_close }, 6408 { &vop_access_desc, zfs_netbsd_access }, 6409 { &vop_accessx_desc, genfs_accessx }, 6410 { &vop_getattr_desc, zfs_netbsd_getattr }, 6411 { &vop_setattr_desc, zfs_netbsd_setattr }, 6412 { &vop_read_desc, /**/zfs_netbsd_read }, 6413 { &vop_write_desc, /**/zfs_netbsd_write }, 6414 { &vop_fsync_desc, zfs_spec_fsync }, 6415 { &vop_lock_desc, genfs_lock }, 6416 { &vop_unlock_desc, genfs_unlock }, 6417 { &vop_inactive_desc, zfs_netbsd_inactive }, 6418 { &vop_reclaim_desc, zfs_netbsd_reclaim }, 6419 { &vop_islocked_desc, genfs_islocked }, 6420 { &vop_bwrite_desc, vn_bwrite }, 6421 { &vop_print_desc, zfs_netbsd_print }, 6422 { &vop_fcntl_desc, genfs_fcntl }, 6423 { NULL, NULL } 6424}; 6425 6426const struct vnodeopv_desc zfs_specop_opv_desc = 6427 { &zfs_specop_p, zfs_specop_entries }; 6428 6429int (**zfs_fifoop_p)(void *); 6430const struct vnodeopv_entry_desc zfs_fifoop_entries[] = { 6431 { &vop_default_desc, vn_default_error }, 6432 GENFS_FIFOOP_ENTRIES, 6433 { &vop_close_desc, vn_fifo_bypass }, 6434 { &vop_access_desc, zfs_netbsd_access }, 6435 { &vop_accessx_desc, genfs_accessx }, 6436 { &vop_getattr_desc, zfs_netbsd_getattr }, 6437 { &vop_setattr_desc, zfs_netbsd_setattr }, 6438 { &vop_read_desc, /**/zfs_netbsd_read }, 6439 { &vop_write_desc, /**/zfs_netbsd_write }, 6440 { &vop_fsync_desc, zfs_netbsd_fsync }, 6441 { &vop_lock_desc, genfs_lock }, 6442 { &vop_unlock_desc, genfs_unlock }, 6443 { &vop_inactive_desc, zfs_netbsd_inactive }, 6444 { &vop_reclaim_desc, zfs_netbsd_reclaim }, 6445 { &vop_islocked_desc, genfs_islocked }, 6446 { &vop_bwrite_desc, vn_bwrite }, 6447 { &vop_strategy_desc, vn_fifo_bypass }, 6448 { &vop_print_desc, zfs_netbsd_print }, 6449 { &vop_fcntl_desc, genfs_fcntl }, 6450 { NULL, NULL } 6451}; 6452 6453const struct vnodeopv_desc zfs_fifoop_opv_desc = 6454 { &zfs_fifoop_p, zfs_fifoop_entries }; 6455 6456#endif /* __NetBSD__ */ 6457