zfs_vnops.c revision 254982
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2013 by Delphix. All rights reserved. 24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 25 */ 26 27/* Portions Copyright 2007 Jeremy Teo */ 28/* Portions Copyright 2010 Robert Milkowski */ 29 30#include <sys/types.h> 31#include <sys/param.h> 32#include <sys/time.h> 33#include <sys/systm.h> 34#include <sys/sysmacros.h> 35#include <sys/resource.h> 36#include <sys/vfs.h> 37#include <sys/vm.h> 38#include <sys/vnode.h> 39#include <sys/file.h> 40#include <sys/stat.h> 41#include <sys/kmem.h> 42#include <sys/taskq.h> 43#include <sys/uio.h> 44#include <sys/atomic.h> 45#include <sys/namei.h> 46#include <sys/mman.h> 47#include <sys/cmn_err.h> 48#include <sys/errno.h> 49#include <sys/unistd.h> 50#include <sys/zfs_dir.h> 51#include <sys/zfs_ioctl.h> 52#include <sys/fs/zfs.h> 53#include <sys/dmu.h> 54#include <sys/dmu_objset.h> 55#include <sys/spa.h> 56#include <sys/txg.h> 57#include <sys/dbuf.h> 58#include <sys/zap.h> 59#include <sys/sa.h> 60#include <sys/dirent.h> 61#include <sys/policy.h> 62#include <sys/sunddi.h> 63#include <sys/filio.h> 64#include <sys/sid.h> 65#include <sys/zfs_ctldir.h> 66#include <sys/zfs_fuid.h> 67#include <sys/zfs_sa.h> 68#include <sys/dnlc.h> 69#include <sys/zfs_rlock.h> 70#include <sys/extdirent.h> 71#include <sys/kidmap.h> 72#include <sys/bio.h> 73#include <sys/buf.h> 74#include <sys/sf_buf.h> 75#include <sys/sched.h> 76#include <sys/acl.h> 77#include <vm/vm_param.h> 78#include <vm/vm_pageout.h> 79 80/* 81 * Programming rules. 82 * 83 * Each vnode op performs some logical unit of work. To do this, the ZPL must 84 * properly lock its in-core state, create a DMU transaction, do the work, 85 * record this work in the intent log (ZIL), commit the DMU transaction, 86 * and wait for the intent log to commit if it is a synchronous operation. 87 * Moreover, the vnode ops must work in both normal and log replay context. 88 * The ordering of events is important to avoid deadlocks and references 89 * to freed memory. The example below illustrates the following Big Rules: 90 * 91 * (1) A check must be made in each zfs thread for a mounted file system. 92 * This is done avoiding races using ZFS_ENTER(zfsvfs). 93 * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 94 * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 95 * can return EIO from the calling function. 96 * 97 * (2) VN_RELE() should always be the last thing except for zil_commit() 98 * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 99 * First, if it's the last reference, the vnode/znode 100 * can be freed, so the zp may point to freed memory. Second, the last 101 * reference will call zfs_zinactive(), which may induce a lot of work -- 102 * pushing cached pages (which acquires range locks) and syncing out 103 * cached atime changes. Third, zfs_zinactive() may require a new tx, 104 * which could deadlock the system if you were already holding one. 105 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 106 * 107 * (3) All range locks must be grabbed before calling dmu_tx_assign(), 108 * as they can span dmu_tx_assign() calls. 109 * 110 * (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign(). 111 * This is critical because we don't want to block while holding locks. 112 * Note, in particular, that if a lock is sometimes acquired before 113 * the tx assigns, and sometimes after (e.g. z_lock), then failing to 114 * use a non-blocking assign can deadlock the system. The scenario: 115 * 116 * Thread A has grabbed a lock before calling dmu_tx_assign(). 117 * Thread B is in an already-assigned tx, and blocks for this lock. 118 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 119 * forever, because the previous txg can't quiesce until B's tx commits. 120 * 121 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 122 * then drop all locks, call dmu_tx_wait(), and try again. 123 * 124 * (5) If the operation succeeded, generate the intent log entry for it 125 * before dropping locks. This ensures that the ordering of events 126 * in the intent log matches the order in which they actually occurred. 127 * During ZIL replay the zfs_log_* functions will update the sequence 128 * number to indicate the zil transaction has replayed. 129 * 130 * (6) At the end of each vnode op, the DMU tx must always commit, 131 * regardless of whether there were any errors. 132 * 133 * (7) After dropping all locks, invoke zil_commit(zilog, foid) 134 * to ensure that synchronous semantics are provided when necessary. 135 * 136 * In general, this is how things should be ordered in each vnode op: 137 * 138 * ZFS_ENTER(zfsvfs); // exit if unmounted 139 * top: 140 * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 141 * rw_enter(...); // grab any other locks you need 142 * tx = dmu_tx_create(...); // get DMU tx 143 * dmu_tx_hold_*(); // hold each object you might modify 144 * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign 145 * if (error) { 146 * rw_exit(...); // drop locks 147 * zfs_dirent_unlock(dl); // unlock directory entry 148 * VN_RELE(...); // release held vnodes 149 * if (error == ERESTART) { 150 * dmu_tx_wait(tx); 151 * dmu_tx_abort(tx); 152 * goto top; 153 * } 154 * dmu_tx_abort(tx); // abort DMU tx 155 * ZFS_EXIT(zfsvfs); // finished in zfs 156 * return (error); // really out of space 157 * } 158 * error = do_real_work(); // do whatever this VOP does 159 * if (error == 0) 160 * zfs_log_*(...); // on success, make ZIL entry 161 * dmu_tx_commit(tx); // commit DMU tx -- error or not 162 * rw_exit(...); // drop locks 163 * zfs_dirent_unlock(dl); // unlock directory entry 164 * VN_RELE(...); // release held vnodes 165 * zil_commit(zilog, foid); // synchronous when necessary 166 * ZFS_EXIT(zfsvfs); // finished in zfs 167 * return (error); // done, report error 168 */ 169 170/* ARGSUSED */ 171static int 172zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 173{ 174 znode_t *zp = VTOZ(*vpp); 175 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 176 177 ZFS_ENTER(zfsvfs); 178 ZFS_VERIFY_ZP(zp); 179 180 if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 181 ((flag & FAPPEND) == 0)) { 182 ZFS_EXIT(zfsvfs); 183 return (SET_ERROR(EPERM)); 184 } 185 186 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 187 ZTOV(zp)->v_type == VREG && 188 !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 189 if (fs_vscan(*vpp, cr, 0) != 0) { 190 ZFS_EXIT(zfsvfs); 191 return (SET_ERROR(EACCES)); 192 } 193 } 194 195 /* Keep a count of the synchronous opens in the znode */ 196 if (flag & (FSYNC | FDSYNC)) 197 atomic_inc_32(&zp->z_sync_cnt); 198 199 ZFS_EXIT(zfsvfs); 200 return (0); 201} 202 203/* ARGSUSED */ 204static int 205zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 206 caller_context_t *ct) 207{ 208 znode_t *zp = VTOZ(vp); 209 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 210 211 /* 212 * Clean up any locks held by this process on the vp. 213 */ 214 cleanlocks(vp, ddi_get_pid(), 0); 215 cleanshares(vp, ddi_get_pid()); 216 217 ZFS_ENTER(zfsvfs); 218 ZFS_VERIFY_ZP(zp); 219 220 /* Decrement the synchronous opens in the znode */ 221 if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 222 atomic_dec_32(&zp->z_sync_cnt); 223 224 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 225 ZTOV(zp)->v_type == VREG && 226 !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 227 VERIFY(fs_vscan(vp, cr, 1) == 0); 228 229 ZFS_EXIT(zfsvfs); 230 return (0); 231} 232 233/* 234 * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 235 * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 236 */ 237static int 238zfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 239{ 240 znode_t *zp = VTOZ(vp); 241 uint64_t noff = (uint64_t)*off; /* new offset */ 242 uint64_t file_sz; 243 int error; 244 boolean_t hole; 245 246 file_sz = zp->z_size; 247 if (noff >= file_sz) { 248 return (SET_ERROR(ENXIO)); 249 } 250 251 if (cmd == _FIO_SEEK_HOLE) 252 hole = B_TRUE; 253 else 254 hole = B_FALSE; 255 256 error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 257 258 /* end of file? */ 259 if ((error == ESRCH) || (noff > file_sz)) { 260 /* 261 * Handle the virtual hole at the end of file. 262 */ 263 if (hole) { 264 *off = file_sz; 265 return (0); 266 } 267 return (SET_ERROR(ENXIO)); 268 } 269 270 if (noff < *off) 271 return (error); 272 *off = noff; 273 return (error); 274} 275 276/* ARGSUSED */ 277static int 278zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 279 int *rvalp, caller_context_t *ct) 280{ 281 offset_t off; 282 int error; 283 zfsvfs_t *zfsvfs; 284 znode_t *zp; 285 286 switch (com) { 287 case _FIOFFS: 288 return (0); 289 290 /* 291 * The following two ioctls are used by bfu. Faking out, 292 * necessary to avoid bfu errors. 293 */ 294 case _FIOGDIO: 295 case _FIOSDIO: 296 return (0); 297 298 case _FIO_SEEK_DATA: 299 case _FIO_SEEK_HOLE: 300#ifdef sun 301 if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 302 return (SET_ERROR(EFAULT)); 303#else 304 off = *(offset_t *)data; 305#endif 306 zp = VTOZ(vp); 307 zfsvfs = zp->z_zfsvfs; 308 ZFS_ENTER(zfsvfs); 309 ZFS_VERIFY_ZP(zp); 310 311 /* offset parameter is in/out */ 312 error = zfs_holey(vp, com, &off); 313 ZFS_EXIT(zfsvfs); 314 if (error) 315 return (error); 316#ifdef sun 317 if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 318 return (SET_ERROR(EFAULT)); 319#else 320 *(offset_t *)data = off; 321#endif 322 return (0); 323 } 324 return (SET_ERROR(ENOTTY)); 325} 326 327static vm_page_t 328page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 329{ 330 vm_object_t obj; 331 vm_page_t pp; 332 333 obj = vp->v_object; 334 zfs_vmobject_assert_wlocked(obj); 335 336 for (;;) { 337 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 338 pp->valid) { 339 if (vm_page_xbusied(pp)) { 340 /* 341 * Reference the page before unlocking and 342 * sleeping so that the page daemon is less 343 * likely to reclaim it. 344 */ 345 vm_page_reference(pp); 346 vm_page_lock(pp); 347 zfs_vmobject_wunlock(obj); 348 vm_page_busy_sleep(pp, "zfsmwb"); 349 zfs_vmobject_wlock(obj); 350 continue; 351 } 352 vm_page_sbusy(pp); 353 } else if (pp == NULL) { 354 pp = vm_page_alloc(obj, OFF_TO_IDX(start), 355 VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED | 356 VM_ALLOC_SBUSY); 357 } else { 358 ASSERT(pp != NULL && !pp->valid); 359 pp = NULL; 360 } 361 362 if (pp != NULL) { 363 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 364 vm_object_pip_add(obj, 1); 365 pmap_remove_write(pp); 366 vm_page_clear_dirty(pp, off, nbytes); 367 } 368 break; 369 } 370 return (pp); 371} 372 373static void 374page_unbusy(vm_page_t pp) 375{ 376 377 vm_page_sunbusy(pp); 378 vm_object_pip_subtract(pp->object, 1); 379} 380 381static vm_page_t 382page_hold(vnode_t *vp, int64_t start) 383{ 384 vm_object_t obj; 385 vm_page_t pp; 386 387 obj = vp->v_object; 388 zfs_vmobject_assert_wlocked(obj); 389 390 for (;;) { 391 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 392 pp->valid) { 393 if (vm_page_xbusied(pp)) { 394 /* 395 * Reference the page before unlocking and 396 * sleeping so that the page daemon is less 397 * likely to reclaim it. 398 */ 399 vm_page_reference(pp); 400 vm_page_lock(pp); 401 zfs_vmobject_wunlock(obj); 402 vm_page_busy_sleep(pp, "zfsmwb"); 403 zfs_vmobject_wlock(obj); 404 continue; 405 } 406 407 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 408 vm_page_lock(pp); 409 vm_page_hold(pp); 410 vm_page_unlock(pp); 411 412 } else 413 pp = NULL; 414 break; 415 } 416 return (pp); 417} 418 419static void 420page_unhold(vm_page_t pp) 421{ 422 423 vm_page_lock(pp); 424 vm_page_unhold(pp); 425 vm_page_unlock(pp); 426} 427 428static caddr_t 429zfs_map_page(vm_page_t pp, struct sf_buf **sfp) 430{ 431 432 *sfp = sf_buf_alloc(pp, 0); 433 return ((caddr_t)sf_buf_kva(*sfp)); 434} 435 436static void 437zfs_unmap_page(struct sf_buf *sf) 438{ 439 440 sf_buf_free(sf); 441} 442 443/* 444 * When a file is memory mapped, we must keep the IO data synchronized 445 * between the DMU cache and the memory mapped pages. What this means: 446 * 447 * On Write: If we find a memory mapped page, we write to *both* 448 * the page and the dmu buffer. 449 */ 450static void 451update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 452 int segflg, dmu_tx_t *tx) 453{ 454 vm_object_t obj; 455 struct sf_buf *sf; 456 caddr_t va; 457 int off; 458 459 ASSERT(vp->v_mount != NULL); 460 obj = vp->v_object; 461 ASSERT(obj != NULL); 462 463 off = start & PAGEOFFSET; 464 zfs_vmobject_wlock(obj); 465 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 466 vm_page_t pp; 467 int nbytes = imin(PAGESIZE - off, len); 468 469 if (segflg == UIO_NOCOPY) { 470 pp = vm_page_lookup(obj, OFF_TO_IDX(start)); 471 KASSERT(pp != NULL, 472 ("zfs update_pages: NULL page in putpages case")); 473 KASSERT(off == 0, 474 ("zfs update_pages: unaligned data in putpages case")); 475 KASSERT(pp->valid == VM_PAGE_BITS_ALL, 476 ("zfs update_pages: invalid page in putpages case")); 477 KASSERT(vm_page_sbusied(pp), 478 ("zfs update_pages: unbusy page in putpages case")); 479 KASSERT(!pmap_page_is_write_mapped(pp), 480 ("zfs update_pages: writable page in putpages case")); 481 zfs_vmobject_wunlock(obj); 482 483 va = zfs_map_page(pp, &sf); 484 (void) dmu_write(os, oid, start, nbytes, va, tx); 485 zfs_unmap_page(sf); 486 487 zfs_vmobject_wlock(obj); 488 vm_page_undirty(pp); 489 } else if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 490 zfs_vmobject_wunlock(obj); 491 492 va = zfs_map_page(pp, &sf); 493 (void) dmu_read(os, oid, start+off, nbytes, 494 va+off, DMU_READ_PREFETCH);; 495 zfs_unmap_page(sf); 496 497 zfs_vmobject_wlock(obj); 498 page_unbusy(pp); 499 } 500 len -= nbytes; 501 off = 0; 502 } 503 if (segflg != UIO_NOCOPY) 504 vm_object_pip_wakeupn(obj, 0); 505 zfs_vmobject_wunlock(obj); 506} 507 508/* 509 * Read with UIO_NOCOPY flag means that sendfile(2) requests 510 * ZFS to populate a range of page cache pages with data. 511 * 512 * NOTE: this function could be optimized to pre-allocate 513 * all pages in advance, drain exclusive busy on all of them, 514 * map them into contiguous KVA region and populate them 515 * in one single dmu_read() call. 516 */ 517static int 518mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) 519{ 520 znode_t *zp = VTOZ(vp); 521 objset_t *os = zp->z_zfsvfs->z_os; 522 struct sf_buf *sf; 523 vm_object_t obj; 524 vm_page_t pp; 525 int64_t start; 526 caddr_t va; 527 int len = nbytes; 528 int off; 529 int error = 0; 530 531 ASSERT(uio->uio_segflg == UIO_NOCOPY); 532 ASSERT(vp->v_mount != NULL); 533 obj = vp->v_object; 534 ASSERT(obj != NULL); 535 ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); 536 537 zfs_vmobject_wlock(obj); 538 for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { 539 int bytes = MIN(PAGESIZE, len); 540 541 pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY | 542 VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 543 if (pp->valid == 0) { 544 zfs_vmobject_wunlock(obj); 545 va = zfs_map_page(pp, &sf); 546 error = dmu_read(os, zp->z_id, start, bytes, va, 547 DMU_READ_PREFETCH); 548 if (bytes != PAGESIZE && error == 0) 549 bzero(va + bytes, PAGESIZE - bytes); 550 zfs_unmap_page(sf); 551 zfs_vmobject_wlock(obj); 552 vm_page_sunbusy(pp); 553 vm_page_lock(pp); 554 if (error) { 555 if (pp->wire_count == 0 && pp->valid == 0 && 556 !vm_page_busied(pp)) 557 vm_page_free(pp); 558 } else { 559 pp->valid = VM_PAGE_BITS_ALL; 560 vm_page_activate(pp); 561 } 562 vm_page_unlock(pp); 563 } else 564 vm_page_sunbusy(pp); 565 if (error) 566 break; 567 uio->uio_resid -= bytes; 568 uio->uio_offset += bytes; 569 len -= bytes; 570 } 571 zfs_vmobject_wunlock(obj); 572 return (error); 573} 574 575/* 576 * When a file is memory mapped, we must keep the IO data synchronized 577 * between the DMU cache and the memory mapped pages. What this means: 578 * 579 * On Read: We "read" preferentially from memory mapped pages, 580 * else we default from the dmu buffer. 581 * 582 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 583 * the file is memory mapped. 584 */ 585static int 586mappedread(vnode_t *vp, int nbytes, uio_t *uio) 587{ 588 znode_t *zp = VTOZ(vp); 589 objset_t *os = zp->z_zfsvfs->z_os; 590 vm_object_t obj; 591 int64_t start; 592 caddr_t va; 593 int len = nbytes; 594 int off; 595 int error = 0; 596 597 ASSERT(vp->v_mount != NULL); 598 obj = vp->v_object; 599 ASSERT(obj != NULL); 600 601 start = uio->uio_loffset; 602 off = start & PAGEOFFSET; 603 zfs_vmobject_wlock(obj); 604 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 605 vm_page_t pp; 606 uint64_t bytes = MIN(PAGESIZE - off, len); 607 608 if (pp = page_hold(vp, start)) { 609 struct sf_buf *sf; 610 caddr_t va; 611 612 zfs_vmobject_wunlock(obj); 613 va = zfs_map_page(pp, &sf); 614 error = uiomove(va + off, bytes, UIO_READ, uio); 615 zfs_unmap_page(sf); 616 zfs_vmobject_wlock(obj); 617 page_unhold(pp); 618 } else { 619 zfs_vmobject_wunlock(obj); 620 error = dmu_read_uio(os, zp->z_id, uio, bytes); 621 zfs_vmobject_wlock(obj); 622 } 623 len -= bytes; 624 off = 0; 625 if (error) 626 break; 627 } 628 zfs_vmobject_wunlock(obj); 629 return (error); 630} 631 632offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 633 634/* 635 * Read bytes from specified file into supplied buffer. 636 * 637 * IN: vp - vnode of file to be read from. 638 * uio - structure supplying read location, range info, 639 * and return buffer. 640 * ioflag - SYNC flags; used to provide FRSYNC semantics. 641 * cr - credentials of caller. 642 * ct - caller context 643 * 644 * OUT: uio - updated offset and range, buffer filled. 645 * 646 * RETURN: 0 on success, error code on failure. 647 * 648 * Side Effects: 649 * vp - atime updated if byte count > 0 650 */ 651/* ARGSUSED */ 652static int 653zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 654{ 655 znode_t *zp = VTOZ(vp); 656 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 657 objset_t *os; 658 ssize_t n, nbytes; 659 int error = 0; 660 rl_t *rl; 661 xuio_t *xuio = NULL; 662 663 ZFS_ENTER(zfsvfs); 664 ZFS_VERIFY_ZP(zp); 665 os = zfsvfs->z_os; 666 667 if (zp->z_pflags & ZFS_AV_QUARANTINED) { 668 ZFS_EXIT(zfsvfs); 669 return (SET_ERROR(EACCES)); 670 } 671 672 /* 673 * Validate file offset 674 */ 675 if (uio->uio_loffset < (offset_t)0) { 676 ZFS_EXIT(zfsvfs); 677 return (SET_ERROR(EINVAL)); 678 } 679 680 /* 681 * Fasttrack empty reads 682 */ 683 if (uio->uio_resid == 0) { 684 ZFS_EXIT(zfsvfs); 685 return (0); 686 } 687 688 /* 689 * Check for mandatory locks 690 */ 691 if (MANDMODE(zp->z_mode)) { 692 if (error = chklock(vp, FREAD, 693 uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 694 ZFS_EXIT(zfsvfs); 695 return (error); 696 } 697 } 698 699 /* 700 * If we're in FRSYNC mode, sync out this znode before reading it. 701 */ 702 if (zfsvfs->z_log && 703 (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) 704 zil_commit(zfsvfs->z_log, zp->z_id); 705 706 /* 707 * Lock the range against changes. 708 */ 709 rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 710 711 /* 712 * If we are reading past end-of-file we can skip 713 * to the end; but we might still need to set atime. 714 */ 715 if (uio->uio_loffset >= zp->z_size) { 716 error = 0; 717 goto out; 718 } 719 720 ASSERT(uio->uio_loffset < zp->z_size); 721 n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 722 723#ifdef sun 724 if ((uio->uio_extflg == UIO_XUIO) && 725 (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 726 int nblk; 727 int blksz = zp->z_blksz; 728 uint64_t offset = uio->uio_loffset; 729 730 xuio = (xuio_t *)uio; 731 if ((ISP2(blksz))) { 732 nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 733 blksz)) / blksz; 734 } else { 735 ASSERT(offset + n <= blksz); 736 nblk = 1; 737 } 738 (void) dmu_xuio_init(xuio, nblk); 739 740 if (vn_has_cached_data(vp)) { 741 /* 742 * For simplicity, we always allocate a full buffer 743 * even if we only expect to read a portion of a block. 744 */ 745 while (--nblk >= 0) { 746 (void) dmu_xuio_add(xuio, 747 dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 748 blksz), 0, blksz); 749 } 750 } 751 } 752#endif /* sun */ 753 754 while (n > 0) { 755 nbytes = MIN(n, zfs_read_chunk_size - 756 P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 757 758#ifdef __FreeBSD__ 759 if (uio->uio_segflg == UIO_NOCOPY) 760 error = mappedread_sf(vp, nbytes, uio); 761 else 762#endif /* __FreeBSD__ */ 763 if (vn_has_cached_data(vp)) 764 error = mappedread(vp, nbytes, uio); 765 else 766 error = dmu_read_uio(os, zp->z_id, uio, nbytes); 767 if (error) { 768 /* convert checksum errors into IO errors */ 769 if (error == ECKSUM) 770 error = SET_ERROR(EIO); 771 break; 772 } 773 774 n -= nbytes; 775 } 776out: 777 zfs_range_unlock(rl); 778 779 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 780 ZFS_EXIT(zfsvfs); 781 return (error); 782} 783 784/* 785 * Write the bytes to a file. 786 * 787 * IN: vp - vnode of file to be written to. 788 * uio - structure supplying write location, range info, 789 * and data buffer. 790 * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 791 * set if in append mode. 792 * cr - credentials of caller. 793 * ct - caller context (NFS/CIFS fem monitor only) 794 * 795 * OUT: uio - updated offset and range. 796 * 797 * RETURN: 0 on success, error code on failure. 798 * 799 * Timestamps: 800 * vp - ctime|mtime updated if byte count > 0 801 */ 802 803/* ARGSUSED */ 804static int 805zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 806{ 807 znode_t *zp = VTOZ(vp); 808 rlim64_t limit = MAXOFFSET_T; 809 ssize_t start_resid = uio->uio_resid; 810 ssize_t tx_bytes; 811 uint64_t end_size; 812 dmu_tx_t *tx; 813 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 814 zilog_t *zilog; 815 offset_t woff; 816 ssize_t n, nbytes; 817 rl_t *rl; 818 int max_blksz = zfsvfs->z_max_blksz; 819 int error = 0; 820 arc_buf_t *abuf; 821 iovec_t *aiov = NULL; 822 xuio_t *xuio = NULL; 823 int i_iov = 0; 824 int iovcnt = uio->uio_iovcnt; 825 iovec_t *iovp = uio->uio_iov; 826 int write_eof; 827 int count = 0; 828 sa_bulk_attr_t bulk[4]; 829 uint64_t mtime[2], ctime[2]; 830 831 /* 832 * Fasttrack empty write 833 */ 834 n = start_resid; 835 if (n == 0) 836 return (0); 837 838 if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 839 limit = MAXOFFSET_T; 840 841 ZFS_ENTER(zfsvfs); 842 ZFS_VERIFY_ZP(zp); 843 844 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 845 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 846 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 847 &zp->z_size, 8); 848 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 849 &zp->z_pflags, 8); 850 851 /* 852 * If immutable or not appending then return EPERM 853 */ 854 if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 855 ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 856 (uio->uio_loffset < zp->z_size))) { 857 ZFS_EXIT(zfsvfs); 858 return (SET_ERROR(EPERM)); 859 } 860 861 zilog = zfsvfs->z_log; 862 863 /* 864 * Validate file offset 865 */ 866 woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 867 if (woff < 0) { 868 ZFS_EXIT(zfsvfs); 869 return (SET_ERROR(EINVAL)); 870 } 871 872 /* 873 * Check for mandatory locks before calling zfs_range_lock() 874 * in order to prevent a deadlock with locks set via fcntl(). 875 */ 876 if (MANDMODE((mode_t)zp->z_mode) && 877 (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 878 ZFS_EXIT(zfsvfs); 879 return (error); 880 } 881 882#ifdef sun 883 /* 884 * Pre-fault the pages to ensure slow (eg NFS) pages 885 * don't hold up txg. 886 * Skip this if uio contains loaned arc_buf. 887 */ 888 if ((uio->uio_extflg == UIO_XUIO) && 889 (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 890 xuio = (xuio_t *)uio; 891 else 892 uio_prefaultpages(MIN(n, max_blksz), uio); 893#endif /* sun */ 894 895 /* 896 * If in append mode, set the io offset pointer to eof. 897 */ 898 if (ioflag & FAPPEND) { 899 /* 900 * Obtain an appending range lock to guarantee file append 901 * semantics. We reset the write offset once we have the lock. 902 */ 903 rl = zfs_range_lock(zp, 0, n, RL_APPEND); 904 woff = rl->r_off; 905 if (rl->r_len == UINT64_MAX) { 906 /* 907 * We overlocked the file because this write will cause 908 * the file block size to increase. 909 * Note that zp_size cannot change with this lock held. 910 */ 911 woff = zp->z_size; 912 } 913 uio->uio_loffset = woff; 914 } else { 915 /* 916 * Note that if the file block size will change as a result of 917 * this write, then this range lock will lock the entire file 918 * so that we can re-write the block safely. 919 */ 920 rl = zfs_range_lock(zp, woff, n, RL_WRITER); 921 } 922 923 if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { 924 zfs_range_unlock(rl); 925 ZFS_EXIT(zfsvfs); 926 return (EFBIG); 927 } 928 929 if (woff >= limit) { 930 zfs_range_unlock(rl); 931 ZFS_EXIT(zfsvfs); 932 return (SET_ERROR(EFBIG)); 933 } 934 935 if ((woff + n) > limit || woff > (limit - n)) 936 n = limit - woff; 937 938 /* Will this write extend the file length? */ 939 write_eof = (woff + n > zp->z_size); 940 941 end_size = MAX(zp->z_size, woff + n); 942 943 /* 944 * Write the file in reasonable size chunks. Each chunk is written 945 * in a separate transaction; this keeps the intent log records small 946 * and allows us to do more fine-grained space accounting. 947 */ 948 while (n > 0) { 949 abuf = NULL; 950 woff = uio->uio_loffset; 951again: 952 if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 953 zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 954 if (abuf != NULL) 955 dmu_return_arcbuf(abuf); 956 error = SET_ERROR(EDQUOT); 957 break; 958 } 959 960 if (xuio && abuf == NULL) { 961 ASSERT(i_iov < iovcnt); 962 aiov = &iovp[i_iov]; 963 abuf = dmu_xuio_arcbuf(xuio, i_iov); 964 dmu_xuio_clear(xuio, i_iov); 965 DTRACE_PROBE3(zfs_cp_write, int, i_iov, 966 iovec_t *, aiov, arc_buf_t *, abuf); 967 ASSERT((aiov->iov_base == abuf->b_data) || 968 ((char *)aiov->iov_base - (char *)abuf->b_data + 969 aiov->iov_len == arc_buf_size(abuf))); 970 i_iov++; 971 } else if (abuf == NULL && n >= max_blksz && 972 woff >= zp->z_size && 973 P2PHASE(woff, max_blksz) == 0 && 974 zp->z_blksz == max_blksz) { 975 /* 976 * This write covers a full block. "Borrow" a buffer 977 * from the dmu so that we can fill it before we enter 978 * a transaction. This avoids the possibility of 979 * holding up the transaction if the data copy hangs 980 * up on a pagefault (e.g., from an NFS server mapping). 981 */ 982 size_t cbytes; 983 984 abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 985 max_blksz); 986 ASSERT(abuf != NULL); 987 ASSERT(arc_buf_size(abuf) == max_blksz); 988 if (error = uiocopy(abuf->b_data, max_blksz, 989 UIO_WRITE, uio, &cbytes)) { 990 dmu_return_arcbuf(abuf); 991 break; 992 } 993 ASSERT(cbytes == max_blksz); 994 } 995 996 /* 997 * Start a transaction. 998 */ 999 tx = dmu_tx_create(zfsvfs->z_os); 1000 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1001 dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 1002 zfs_sa_upgrade_txholds(tx, zp); 1003 error = dmu_tx_assign(tx, TXG_NOWAIT); 1004 if (error) { 1005 if (error == ERESTART) { 1006 dmu_tx_wait(tx); 1007 dmu_tx_abort(tx); 1008 goto again; 1009 } 1010 dmu_tx_abort(tx); 1011 if (abuf != NULL) 1012 dmu_return_arcbuf(abuf); 1013 break; 1014 } 1015 1016 /* 1017 * If zfs_range_lock() over-locked we grow the blocksize 1018 * and then reduce the lock range. This will only happen 1019 * on the first iteration since zfs_range_reduce() will 1020 * shrink down r_len to the appropriate size. 1021 */ 1022 if (rl->r_len == UINT64_MAX) { 1023 uint64_t new_blksz; 1024 1025 if (zp->z_blksz > max_blksz) { 1026 ASSERT(!ISP2(zp->z_blksz)); 1027 new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 1028 } else { 1029 new_blksz = MIN(end_size, max_blksz); 1030 } 1031 zfs_grow_blocksize(zp, new_blksz, tx); 1032 zfs_range_reduce(rl, woff, n); 1033 } 1034 1035 /* 1036 * XXX - should we really limit each write to z_max_blksz? 1037 * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 1038 */ 1039 nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 1040 1041 if (woff + nbytes > zp->z_size) 1042 vnode_pager_setsize(vp, woff + nbytes); 1043 1044 if (abuf == NULL) { 1045 tx_bytes = uio->uio_resid; 1046 error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 1047 uio, nbytes, tx); 1048 tx_bytes -= uio->uio_resid; 1049 } else { 1050 tx_bytes = nbytes; 1051 ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 1052 /* 1053 * If this is not a full block write, but we are 1054 * extending the file past EOF and this data starts 1055 * block-aligned, use assign_arcbuf(). Otherwise, 1056 * write via dmu_write(). 1057 */ 1058 if (tx_bytes < max_blksz && (!write_eof || 1059 aiov->iov_base != abuf->b_data)) { 1060 ASSERT(xuio); 1061 dmu_write(zfsvfs->z_os, zp->z_id, woff, 1062 aiov->iov_len, aiov->iov_base, tx); 1063 dmu_return_arcbuf(abuf); 1064 xuio_stat_wbuf_copied(); 1065 } else { 1066 ASSERT(xuio || tx_bytes == max_blksz); 1067 dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 1068 woff, abuf, tx); 1069 } 1070 ASSERT(tx_bytes <= uio->uio_resid); 1071 uioskip(uio, tx_bytes); 1072 } 1073 if (tx_bytes && vn_has_cached_data(vp)) { 1074 update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 1075 zp->z_id, uio->uio_segflg, tx); 1076 } 1077 1078 /* 1079 * If we made no progress, we're done. If we made even 1080 * partial progress, update the znode and ZIL accordingly. 1081 */ 1082 if (tx_bytes == 0) { 1083 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 1084 (void *)&zp->z_size, sizeof (uint64_t), tx); 1085 dmu_tx_commit(tx); 1086 ASSERT(error != 0); 1087 break; 1088 } 1089 1090 /* 1091 * Clear Set-UID/Set-GID bits on successful write if not 1092 * privileged and at least one of the excute bits is set. 1093 * 1094 * It would be nice to to this after all writes have 1095 * been done, but that would still expose the ISUID/ISGID 1096 * to another app after the partial write is committed. 1097 * 1098 * Note: we don't call zfs_fuid_map_id() here because 1099 * user 0 is not an ephemeral uid. 1100 */ 1101 mutex_enter(&zp->z_acl_lock); 1102 if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 1103 (S_IXUSR >> 6))) != 0 && 1104 (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 1105 secpolicy_vnode_setid_retain(vp, cr, 1106 (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 1107 uint64_t newmode; 1108 zp->z_mode &= ~(S_ISUID | S_ISGID); 1109 newmode = zp->z_mode; 1110 (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 1111 (void *)&newmode, sizeof (uint64_t), tx); 1112 } 1113 mutex_exit(&zp->z_acl_lock); 1114 1115 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 1116 B_TRUE); 1117 1118 /* 1119 * Update the file size (zp_size) if it has changed; 1120 * account for possible concurrent updates. 1121 */ 1122 while ((end_size = zp->z_size) < uio->uio_loffset) { 1123 (void) atomic_cas_64(&zp->z_size, end_size, 1124 uio->uio_loffset); 1125 ASSERT(error == 0); 1126 } 1127 /* 1128 * If we are replaying and eof is non zero then force 1129 * the file size to the specified eof. Note, there's no 1130 * concurrency during replay. 1131 */ 1132 if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 1133 zp->z_size = zfsvfs->z_replay_eof; 1134 1135 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1136 1137 zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 1138 dmu_tx_commit(tx); 1139 1140 if (error != 0) 1141 break; 1142 ASSERT(tx_bytes == nbytes); 1143 n -= nbytes; 1144 1145#ifdef sun 1146 if (!xuio && n > 0) 1147 uio_prefaultpages(MIN(n, max_blksz), uio); 1148#endif /* sun */ 1149 } 1150 1151 zfs_range_unlock(rl); 1152 1153 /* 1154 * If we're in replay mode, or we made no progress, return error. 1155 * Otherwise, it's at least a partial write, so it's successful. 1156 */ 1157 if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1158 ZFS_EXIT(zfsvfs); 1159 return (error); 1160 } 1161 1162 if (ioflag & (FSYNC | FDSYNC) || 1163 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1164 zil_commit(zilog, zp->z_id); 1165 1166 ZFS_EXIT(zfsvfs); 1167 return (0); 1168} 1169 1170void 1171zfs_get_done(zgd_t *zgd, int error) 1172{ 1173 znode_t *zp = zgd->zgd_private; 1174 objset_t *os = zp->z_zfsvfs->z_os; 1175 1176 if (zgd->zgd_db) 1177 dmu_buf_rele(zgd->zgd_db, zgd); 1178 1179 zfs_range_unlock(zgd->zgd_rl); 1180 1181 /* 1182 * Release the vnode asynchronously as we currently have the 1183 * txg stopped from syncing. 1184 */ 1185 VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1186 1187 if (error == 0 && zgd->zgd_bp) 1188 zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1189 1190 kmem_free(zgd, sizeof (zgd_t)); 1191} 1192 1193#ifdef DEBUG 1194static int zil_fault_io = 0; 1195#endif 1196 1197/* 1198 * Get data to generate a TX_WRITE intent log record. 1199 */ 1200int 1201zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1202{ 1203 zfsvfs_t *zfsvfs = arg; 1204 objset_t *os = zfsvfs->z_os; 1205 znode_t *zp; 1206 uint64_t object = lr->lr_foid; 1207 uint64_t offset = lr->lr_offset; 1208 uint64_t size = lr->lr_length; 1209 blkptr_t *bp = &lr->lr_blkptr; 1210 dmu_buf_t *db; 1211 zgd_t *zgd; 1212 int error = 0; 1213 1214 ASSERT(zio != NULL); 1215 ASSERT(size != 0); 1216 1217 /* 1218 * Nothing to do if the file has been removed 1219 */ 1220 if (zfs_zget(zfsvfs, object, &zp) != 0) 1221 return (SET_ERROR(ENOENT)); 1222 if (zp->z_unlinked) { 1223 /* 1224 * Release the vnode asynchronously as we currently have the 1225 * txg stopped from syncing. 1226 */ 1227 VN_RELE_ASYNC(ZTOV(zp), 1228 dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1229 return (SET_ERROR(ENOENT)); 1230 } 1231 1232 zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1233 zgd->zgd_zilog = zfsvfs->z_log; 1234 zgd->zgd_private = zp; 1235 1236 /* 1237 * Write records come in two flavors: immediate and indirect. 1238 * For small writes it's cheaper to store the data with the 1239 * log record (immediate); for large writes it's cheaper to 1240 * sync the data and get a pointer to it (indirect) so that 1241 * we don't have to write the data twice. 1242 */ 1243 if (buf != NULL) { /* immediate write */ 1244 zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1245 /* test for truncation needs to be done while range locked */ 1246 if (offset >= zp->z_size) { 1247 error = SET_ERROR(ENOENT); 1248 } else { 1249 error = dmu_read(os, object, offset, size, buf, 1250 DMU_READ_NO_PREFETCH); 1251 } 1252 ASSERT(error == 0 || error == ENOENT); 1253 } else { /* indirect write */ 1254 /* 1255 * Have to lock the whole block to ensure when it's 1256 * written out and it's checksum is being calculated 1257 * that no one can change the data. We need to re-check 1258 * blocksize after we get the lock in case it's changed! 1259 */ 1260 for (;;) { 1261 uint64_t blkoff; 1262 size = zp->z_blksz; 1263 blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1264 offset -= blkoff; 1265 zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1266 RL_READER); 1267 if (zp->z_blksz == size) 1268 break; 1269 offset += blkoff; 1270 zfs_range_unlock(zgd->zgd_rl); 1271 } 1272 /* test for truncation needs to be done while range locked */ 1273 if (lr->lr_offset >= zp->z_size) 1274 error = SET_ERROR(ENOENT); 1275#ifdef DEBUG 1276 if (zil_fault_io) { 1277 error = SET_ERROR(EIO); 1278 zil_fault_io = 0; 1279 } 1280#endif 1281 if (error == 0) 1282 error = dmu_buf_hold(os, object, offset, zgd, &db, 1283 DMU_READ_NO_PREFETCH); 1284 1285 if (error == 0) { 1286 blkptr_t *obp = dmu_buf_get_blkptr(db); 1287 if (obp) { 1288 ASSERT(BP_IS_HOLE(bp)); 1289 *bp = *obp; 1290 } 1291 1292 zgd->zgd_db = db; 1293 zgd->zgd_bp = bp; 1294 1295 ASSERT(db->db_offset == offset); 1296 ASSERT(db->db_size == size); 1297 1298 error = dmu_sync(zio, lr->lr_common.lrc_txg, 1299 zfs_get_done, zgd); 1300 ASSERT(error || lr->lr_length <= zp->z_blksz); 1301 1302 /* 1303 * On success, we need to wait for the write I/O 1304 * initiated by dmu_sync() to complete before we can 1305 * release this dbuf. We will finish everything up 1306 * in the zfs_get_done() callback. 1307 */ 1308 if (error == 0) 1309 return (0); 1310 1311 if (error == EALREADY) { 1312 lr->lr_common.lrc_txtype = TX_WRITE2; 1313 error = 0; 1314 } 1315 } 1316 } 1317 1318 zfs_get_done(zgd, error); 1319 1320 return (error); 1321} 1322 1323/*ARGSUSED*/ 1324static int 1325zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1326 caller_context_t *ct) 1327{ 1328 znode_t *zp = VTOZ(vp); 1329 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1330 int error; 1331 1332 ZFS_ENTER(zfsvfs); 1333 ZFS_VERIFY_ZP(zp); 1334 1335 if (flag & V_ACE_MASK) 1336 error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1337 else 1338 error = zfs_zaccess_rwx(zp, mode, flag, cr); 1339 1340 ZFS_EXIT(zfsvfs); 1341 return (error); 1342} 1343 1344/* 1345 * If vnode is for a device return a specfs vnode instead. 1346 */ 1347static int 1348specvp_check(vnode_t **vpp, cred_t *cr) 1349{ 1350 int error = 0; 1351 1352 if (IS_DEVVP(*vpp)) { 1353 struct vnode *svp; 1354 1355 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1356 VN_RELE(*vpp); 1357 if (svp == NULL) 1358 error = SET_ERROR(ENOSYS); 1359 *vpp = svp; 1360 } 1361 return (error); 1362} 1363 1364 1365/* 1366 * Lookup an entry in a directory, or an extended attribute directory. 1367 * If it exists, return a held vnode reference for it. 1368 * 1369 * IN: dvp - vnode of directory to search. 1370 * nm - name of entry to lookup. 1371 * pnp - full pathname to lookup [UNUSED]. 1372 * flags - LOOKUP_XATTR set if looking for an attribute. 1373 * rdir - root directory vnode [UNUSED]. 1374 * cr - credentials of caller. 1375 * ct - caller context 1376 * direntflags - directory lookup flags 1377 * realpnp - returned pathname. 1378 * 1379 * OUT: vpp - vnode of located entry, NULL if not found. 1380 * 1381 * RETURN: 0 on success, error code on failure. 1382 * 1383 * Timestamps: 1384 * NA 1385 */ 1386/* ARGSUSED */ 1387static int 1388zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1389 int nameiop, cred_t *cr, kthread_t *td, int flags) 1390{ 1391 znode_t *zdp = VTOZ(dvp); 1392 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1393 int error = 0; 1394 int *direntflags = NULL; 1395 void *realpnp = NULL; 1396 1397 /* fast path */ 1398 if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 1399 1400 if (dvp->v_type != VDIR) { 1401 return (SET_ERROR(ENOTDIR)); 1402 } else if (zdp->z_sa_hdl == NULL) { 1403 return (SET_ERROR(EIO)); 1404 } 1405 1406 if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1407 error = zfs_fastaccesschk_execute(zdp, cr); 1408 if (!error) { 1409 *vpp = dvp; 1410 VN_HOLD(*vpp); 1411 return (0); 1412 } 1413 return (error); 1414 } else { 1415 vnode_t *tvp = dnlc_lookup(dvp, nm); 1416 1417 if (tvp) { 1418 error = zfs_fastaccesschk_execute(zdp, cr); 1419 if (error) { 1420 VN_RELE(tvp); 1421 return (error); 1422 } 1423 if (tvp == DNLC_NO_VNODE) { 1424 VN_RELE(tvp); 1425 return (SET_ERROR(ENOENT)); 1426 } else { 1427 *vpp = tvp; 1428 return (specvp_check(vpp, cr)); 1429 } 1430 } 1431 } 1432 } 1433 1434 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1435 1436 ZFS_ENTER(zfsvfs); 1437 ZFS_VERIFY_ZP(zdp); 1438 1439 *vpp = NULL; 1440 1441 if (flags & LOOKUP_XATTR) { 1442#ifdef TODO 1443 /* 1444 * If the xattr property is off, refuse the lookup request. 1445 */ 1446 if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1447 ZFS_EXIT(zfsvfs); 1448 return (SET_ERROR(EINVAL)); 1449 } 1450#endif 1451 1452 /* 1453 * We don't allow recursive attributes.. 1454 * Maybe someday we will. 1455 */ 1456 if (zdp->z_pflags & ZFS_XATTR) { 1457 ZFS_EXIT(zfsvfs); 1458 return (SET_ERROR(EINVAL)); 1459 } 1460 1461 if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1462 ZFS_EXIT(zfsvfs); 1463 return (error); 1464 } 1465 1466 /* 1467 * Do we have permission to get into attribute directory? 1468 */ 1469 1470 if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1471 B_FALSE, cr)) { 1472 VN_RELE(*vpp); 1473 *vpp = NULL; 1474 } 1475 1476 ZFS_EXIT(zfsvfs); 1477 return (error); 1478 } 1479 1480 if (dvp->v_type != VDIR) { 1481 ZFS_EXIT(zfsvfs); 1482 return (SET_ERROR(ENOTDIR)); 1483 } 1484 1485 /* 1486 * Check accessibility of directory. 1487 */ 1488 1489 if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1490 ZFS_EXIT(zfsvfs); 1491 return (error); 1492 } 1493 1494 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1495 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1496 ZFS_EXIT(zfsvfs); 1497 return (SET_ERROR(EILSEQ)); 1498 } 1499 1500 error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1501 if (error == 0) 1502 error = specvp_check(vpp, cr); 1503 1504 /* Translate errors and add SAVENAME when needed. */ 1505 if (cnp->cn_flags & ISLASTCN) { 1506 switch (nameiop) { 1507 case CREATE: 1508 case RENAME: 1509 if (error == ENOENT) { 1510 error = EJUSTRETURN; 1511 cnp->cn_flags |= SAVENAME; 1512 break; 1513 } 1514 /* FALLTHROUGH */ 1515 case DELETE: 1516 if (error == 0) 1517 cnp->cn_flags |= SAVENAME; 1518 break; 1519 } 1520 } 1521 if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) { 1522 int ltype = 0; 1523 1524 if (cnp->cn_flags & ISDOTDOT) { 1525 ltype = VOP_ISLOCKED(dvp); 1526 VOP_UNLOCK(dvp, 0); 1527 } 1528 ZFS_EXIT(zfsvfs); 1529 error = vn_lock(*vpp, cnp->cn_lkflags); 1530 if (cnp->cn_flags & ISDOTDOT) 1531 vn_lock(dvp, ltype | LK_RETRY); 1532 if (error != 0) { 1533 VN_RELE(*vpp); 1534 *vpp = NULL; 1535 return (error); 1536 } 1537 } else { 1538 ZFS_EXIT(zfsvfs); 1539 } 1540 1541#ifdef FREEBSD_NAMECACHE 1542 /* 1543 * Insert name into cache (as non-existent) if appropriate. 1544 */ 1545 if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 1546 cache_enter(dvp, *vpp, cnp); 1547 /* 1548 * Insert name into cache if appropriate. 1549 */ 1550 if (error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1551 if (!(cnp->cn_flags & ISLASTCN) || 1552 (nameiop != DELETE && nameiop != RENAME)) { 1553 cache_enter(dvp, *vpp, cnp); 1554 } 1555 } 1556#endif 1557 1558 return (error); 1559} 1560 1561/* 1562 * Attempt to create a new entry in a directory. If the entry 1563 * already exists, truncate the file if permissible, else return 1564 * an error. Return the vp of the created or trunc'd file. 1565 * 1566 * IN: dvp - vnode of directory to put new file entry in. 1567 * name - name of new file entry. 1568 * vap - attributes of new file. 1569 * excl - flag indicating exclusive or non-exclusive mode. 1570 * mode - mode to open file with. 1571 * cr - credentials of caller. 1572 * flag - large file flag [UNUSED]. 1573 * ct - caller context 1574 * vsecp - ACL to be set 1575 * 1576 * OUT: vpp - vnode of created or trunc'd entry. 1577 * 1578 * RETURN: 0 on success, error code on failure. 1579 * 1580 * Timestamps: 1581 * dvp - ctime|mtime updated if new entry created 1582 * vp - ctime|mtime always, atime if new 1583 */ 1584 1585/* ARGSUSED */ 1586static int 1587zfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 1588 vnode_t **vpp, cred_t *cr, kthread_t *td) 1589{ 1590 znode_t *zp, *dzp = VTOZ(dvp); 1591 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1592 zilog_t *zilog; 1593 objset_t *os; 1594 zfs_dirlock_t *dl; 1595 dmu_tx_t *tx; 1596 int error; 1597 ksid_t *ksid; 1598 uid_t uid; 1599 gid_t gid = crgetgid(cr); 1600 zfs_acl_ids_t acl_ids; 1601 boolean_t fuid_dirtied; 1602 boolean_t have_acl = B_FALSE; 1603 void *vsecp = NULL; 1604 int flag = 0; 1605 1606 /* 1607 * If we have an ephemeral id, ACL, or XVATTR then 1608 * make sure file system is at proper version 1609 */ 1610 1611 ksid = crgetsid(cr, KSID_OWNER); 1612 if (ksid) 1613 uid = ksid_getid(ksid); 1614 else 1615 uid = crgetuid(cr); 1616 1617 if (zfsvfs->z_use_fuids == B_FALSE && 1618 (vsecp || (vap->va_mask & AT_XVATTR) || 1619 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1620 return (SET_ERROR(EINVAL)); 1621 1622 ZFS_ENTER(zfsvfs); 1623 ZFS_VERIFY_ZP(dzp); 1624 os = zfsvfs->z_os; 1625 zilog = zfsvfs->z_log; 1626 1627 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1628 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1629 ZFS_EXIT(zfsvfs); 1630 return (SET_ERROR(EILSEQ)); 1631 } 1632 1633 if (vap->va_mask & AT_XVATTR) { 1634 if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1635 crgetuid(cr), cr, vap->va_type)) != 0) { 1636 ZFS_EXIT(zfsvfs); 1637 return (error); 1638 } 1639 } 1640top: 1641 *vpp = NULL; 1642 1643 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1644 vap->va_mode &= ~S_ISVTX; 1645 1646 if (*name == '\0') { 1647 /* 1648 * Null component name refers to the directory itself. 1649 */ 1650 VN_HOLD(dvp); 1651 zp = dzp; 1652 dl = NULL; 1653 error = 0; 1654 } else { 1655 /* possible VN_HOLD(zp) */ 1656 int zflg = 0; 1657 1658 if (flag & FIGNORECASE) 1659 zflg |= ZCILOOK; 1660 1661 error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1662 NULL, NULL); 1663 if (error) { 1664 if (have_acl) 1665 zfs_acl_ids_free(&acl_ids); 1666 if (strcmp(name, "..") == 0) 1667 error = SET_ERROR(EISDIR); 1668 ZFS_EXIT(zfsvfs); 1669 return (error); 1670 } 1671 } 1672 1673 if (zp == NULL) { 1674 uint64_t txtype; 1675 1676 /* 1677 * Create a new file object and update the directory 1678 * to reference it. 1679 */ 1680 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1681 if (have_acl) 1682 zfs_acl_ids_free(&acl_ids); 1683 goto out; 1684 } 1685 1686 /* 1687 * We only support the creation of regular files in 1688 * extended attribute directories. 1689 */ 1690 1691 if ((dzp->z_pflags & ZFS_XATTR) && 1692 (vap->va_type != VREG)) { 1693 if (have_acl) 1694 zfs_acl_ids_free(&acl_ids); 1695 error = SET_ERROR(EINVAL); 1696 goto out; 1697 } 1698 1699 if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, 1700 cr, vsecp, &acl_ids)) != 0) 1701 goto out; 1702 have_acl = B_TRUE; 1703 1704 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1705 zfs_acl_ids_free(&acl_ids); 1706 error = SET_ERROR(EDQUOT); 1707 goto out; 1708 } 1709 1710 tx = dmu_tx_create(os); 1711 1712 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1713 ZFS_SA_BASE_ATTR_SIZE); 1714 1715 fuid_dirtied = zfsvfs->z_fuid_dirty; 1716 if (fuid_dirtied) 1717 zfs_fuid_txhold(zfsvfs, tx); 1718 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1719 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 1720 if (!zfsvfs->z_use_sa && 1721 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1722 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1723 0, acl_ids.z_aclp->z_acl_bytes); 1724 } 1725 error = dmu_tx_assign(tx, TXG_NOWAIT); 1726 if (error) { 1727 zfs_dirent_unlock(dl); 1728 if (error == ERESTART) { 1729 dmu_tx_wait(tx); 1730 dmu_tx_abort(tx); 1731 goto top; 1732 } 1733 zfs_acl_ids_free(&acl_ids); 1734 dmu_tx_abort(tx); 1735 ZFS_EXIT(zfsvfs); 1736 return (error); 1737 } 1738 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1739 1740 if (fuid_dirtied) 1741 zfs_fuid_sync(zfsvfs, tx); 1742 1743 (void) zfs_link_create(dl, zp, tx, ZNEW); 1744 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1745 if (flag & FIGNORECASE) 1746 txtype |= TX_CI; 1747 zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1748 vsecp, acl_ids.z_fuidp, vap); 1749 zfs_acl_ids_free(&acl_ids); 1750 dmu_tx_commit(tx); 1751 } else { 1752 int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1753 1754 if (have_acl) 1755 zfs_acl_ids_free(&acl_ids); 1756 have_acl = B_FALSE; 1757 1758 /* 1759 * A directory entry already exists for this name. 1760 */ 1761 /* 1762 * Can't truncate an existing file if in exclusive mode. 1763 */ 1764 if (excl == EXCL) { 1765 error = SET_ERROR(EEXIST); 1766 goto out; 1767 } 1768 /* 1769 * Can't open a directory for writing. 1770 */ 1771 if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1772 error = SET_ERROR(EISDIR); 1773 goto out; 1774 } 1775 /* 1776 * Verify requested access to file. 1777 */ 1778 if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1779 goto out; 1780 } 1781 1782 mutex_enter(&dzp->z_lock); 1783 dzp->z_seq++; 1784 mutex_exit(&dzp->z_lock); 1785 1786 /* 1787 * Truncate regular files if requested. 1788 */ 1789 if ((ZTOV(zp)->v_type == VREG) && 1790 (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1791 /* we can't hold any locks when calling zfs_freesp() */ 1792 zfs_dirent_unlock(dl); 1793 dl = NULL; 1794 error = zfs_freesp(zp, 0, 0, mode, TRUE); 1795 if (error == 0) { 1796 vnevent_create(ZTOV(zp), ct); 1797 } 1798 } 1799 } 1800out: 1801 if (dl) 1802 zfs_dirent_unlock(dl); 1803 1804 if (error) { 1805 if (zp) 1806 VN_RELE(ZTOV(zp)); 1807 } else { 1808 *vpp = ZTOV(zp); 1809 error = specvp_check(vpp, cr); 1810 } 1811 1812 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1813 zil_commit(zilog, 0); 1814 1815 ZFS_EXIT(zfsvfs); 1816 return (error); 1817} 1818 1819/* 1820 * Remove an entry from a directory. 1821 * 1822 * IN: dvp - vnode of directory to remove entry from. 1823 * name - name of entry to remove. 1824 * cr - credentials of caller. 1825 * ct - caller context 1826 * flags - case flags 1827 * 1828 * RETURN: 0 on success, error code on failure. 1829 * 1830 * Timestamps: 1831 * dvp - ctime|mtime 1832 * vp - ctime (if nlink > 0) 1833 */ 1834 1835uint64_t null_xattr = 0; 1836 1837/*ARGSUSED*/ 1838static int 1839zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1840 int flags) 1841{ 1842 znode_t *zp, *dzp = VTOZ(dvp); 1843 znode_t *xzp; 1844 vnode_t *vp; 1845 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1846 zilog_t *zilog; 1847 uint64_t acl_obj, xattr_obj; 1848 uint64_t xattr_obj_unlinked = 0; 1849 uint64_t obj = 0; 1850 zfs_dirlock_t *dl; 1851 dmu_tx_t *tx; 1852 boolean_t may_delete_now, delete_now = FALSE; 1853 boolean_t unlinked, toobig = FALSE; 1854 uint64_t txtype; 1855 pathname_t *realnmp = NULL; 1856 pathname_t realnm; 1857 int error; 1858 int zflg = ZEXISTS; 1859 1860 ZFS_ENTER(zfsvfs); 1861 ZFS_VERIFY_ZP(dzp); 1862 zilog = zfsvfs->z_log; 1863 1864 if (flags & FIGNORECASE) { 1865 zflg |= ZCILOOK; 1866 pn_alloc(&realnm); 1867 realnmp = &realnm; 1868 } 1869 1870top: 1871 xattr_obj = 0; 1872 xzp = NULL; 1873 /* 1874 * Attempt to lock directory; fail if entry doesn't exist. 1875 */ 1876 if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1877 NULL, realnmp)) { 1878 if (realnmp) 1879 pn_free(realnmp); 1880 ZFS_EXIT(zfsvfs); 1881 return (error); 1882 } 1883 1884 vp = ZTOV(zp); 1885 1886 if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1887 goto out; 1888 } 1889 1890 /* 1891 * Need to use rmdir for removing directories. 1892 */ 1893 if (vp->v_type == VDIR) { 1894 error = SET_ERROR(EPERM); 1895 goto out; 1896 } 1897 1898 vnevent_remove(vp, dvp, name, ct); 1899 1900 if (realnmp) 1901 dnlc_remove(dvp, realnmp->pn_buf); 1902 else 1903 dnlc_remove(dvp, name); 1904 1905 VI_LOCK(vp); 1906 may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); 1907 VI_UNLOCK(vp); 1908 1909 /* 1910 * We may delete the znode now, or we may put it in the unlinked set; 1911 * it depends on whether we're the last link, and on whether there are 1912 * other holds on the vnode. So we dmu_tx_hold() the right things to 1913 * allow for either case. 1914 */ 1915 obj = zp->z_id; 1916 tx = dmu_tx_create(zfsvfs->z_os); 1917 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1918 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1919 zfs_sa_upgrade_txholds(tx, zp); 1920 zfs_sa_upgrade_txholds(tx, dzp); 1921 if (may_delete_now) { 1922 toobig = 1923 zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1924 /* if the file is too big, only hold_free a token amount */ 1925 dmu_tx_hold_free(tx, zp->z_id, 0, 1926 (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1927 } 1928 1929 /* are there any extended attributes? */ 1930 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1931 &xattr_obj, sizeof (xattr_obj)); 1932 if (error == 0 && xattr_obj) { 1933 error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1934 ASSERT0(error); 1935 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 1936 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1937 } 1938 1939 mutex_enter(&zp->z_lock); 1940 if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) 1941 dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 1942 mutex_exit(&zp->z_lock); 1943 1944 /* charge as an update -- would be nice not to charge at all */ 1945 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1946 1947 error = dmu_tx_assign(tx, TXG_NOWAIT); 1948 if (error) { 1949 zfs_dirent_unlock(dl); 1950 VN_RELE(vp); 1951 if (xzp) 1952 VN_RELE(ZTOV(xzp)); 1953 if (error == ERESTART) { 1954 dmu_tx_wait(tx); 1955 dmu_tx_abort(tx); 1956 goto top; 1957 } 1958 if (realnmp) 1959 pn_free(realnmp); 1960 dmu_tx_abort(tx); 1961 ZFS_EXIT(zfsvfs); 1962 return (error); 1963 } 1964 1965 /* 1966 * Remove the directory entry. 1967 */ 1968 error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1969 1970 if (error) { 1971 dmu_tx_commit(tx); 1972 goto out; 1973 } 1974 1975 if (unlinked) { 1976 1977 /* 1978 * Hold z_lock so that we can make sure that the ACL obj 1979 * hasn't changed. Could have been deleted due to 1980 * zfs_sa_upgrade(). 1981 */ 1982 mutex_enter(&zp->z_lock); 1983 VI_LOCK(vp); 1984 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1985 &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); 1986 delete_now = may_delete_now && !toobig && 1987 vp->v_count == 1 && !vn_has_cached_data(vp) && 1988 xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == 1989 acl_obj; 1990 VI_UNLOCK(vp); 1991 } 1992 1993 if (delete_now) { 1994#ifdef __FreeBSD__ 1995 panic("zfs_remove: delete_now branch taken"); 1996#endif 1997 if (xattr_obj_unlinked) { 1998 ASSERT3U(xzp->z_links, ==, 2); 1999 mutex_enter(&xzp->z_lock); 2000 xzp->z_unlinked = 1; 2001 xzp->z_links = 0; 2002 error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 2003 &xzp->z_links, sizeof (xzp->z_links), tx); 2004 ASSERT3U(error, ==, 0); 2005 mutex_exit(&xzp->z_lock); 2006 zfs_unlinked_add(xzp, tx); 2007 2008 if (zp->z_is_sa) 2009 error = sa_remove(zp->z_sa_hdl, 2010 SA_ZPL_XATTR(zfsvfs), tx); 2011 else 2012 error = sa_update(zp->z_sa_hdl, 2013 SA_ZPL_XATTR(zfsvfs), &null_xattr, 2014 sizeof (uint64_t), tx); 2015 ASSERT0(error); 2016 } 2017 VI_LOCK(vp); 2018 vp->v_count--; 2019 ASSERT0(vp->v_count); 2020 VI_UNLOCK(vp); 2021 mutex_exit(&zp->z_lock); 2022 zfs_znode_delete(zp, tx); 2023 } else if (unlinked) { 2024 mutex_exit(&zp->z_lock); 2025 zfs_unlinked_add(zp, tx); 2026#ifdef __FreeBSD__ 2027 vp->v_vflag |= VV_NOSYNC; 2028#endif 2029 } 2030 2031 txtype = TX_REMOVE; 2032 if (flags & FIGNORECASE) 2033 txtype |= TX_CI; 2034 zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 2035 2036 dmu_tx_commit(tx); 2037out: 2038 if (realnmp) 2039 pn_free(realnmp); 2040 2041 zfs_dirent_unlock(dl); 2042 2043 if (!delete_now) 2044 VN_RELE(vp); 2045 if (xzp) 2046 VN_RELE(ZTOV(xzp)); 2047 2048 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2049 zil_commit(zilog, 0); 2050 2051 ZFS_EXIT(zfsvfs); 2052 return (error); 2053} 2054 2055/* 2056 * Create a new directory and insert it into dvp using the name 2057 * provided. Return a pointer to the inserted directory. 2058 * 2059 * IN: dvp - vnode of directory to add subdir to. 2060 * dirname - name of new directory. 2061 * vap - attributes of new directory. 2062 * cr - credentials of caller. 2063 * ct - caller context 2064 * flags - case flags 2065 * vsecp - ACL to be set 2066 * 2067 * OUT: vpp - vnode of created directory. 2068 * 2069 * RETURN: 0 on success, error code on failure. 2070 * 2071 * Timestamps: 2072 * dvp - ctime|mtime updated 2073 * vp - ctime|mtime|atime updated 2074 */ 2075/*ARGSUSED*/ 2076static int 2077zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 2078 caller_context_t *ct, int flags, vsecattr_t *vsecp) 2079{ 2080 znode_t *zp, *dzp = VTOZ(dvp); 2081 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2082 zilog_t *zilog; 2083 zfs_dirlock_t *dl; 2084 uint64_t txtype; 2085 dmu_tx_t *tx; 2086 int error; 2087 int zf = ZNEW; 2088 ksid_t *ksid; 2089 uid_t uid; 2090 gid_t gid = crgetgid(cr); 2091 zfs_acl_ids_t acl_ids; 2092 boolean_t fuid_dirtied; 2093 2094 ASSERT(vap->va_type == VDIR); 2095 2096 /* 2097 * If we have an ephemeral id, ACL, or XVATTR then 2098 * make sure file system is at proper version 2099 */ 2100 2101 ksid = crgetsid(cr, KSID_OWNER); 2102 if (ksid) 2103 uid = ksid_getid(ksid); 2104 else 2105 uid = crgetuid(cr); 2106 if (zfsvfs->z_use_fuids == B_FALSE && 2107 (vsecp || (vap->va_mask & AT_XVATTR) || 2108 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2109 return (SET_ERROR(EINVAL)); 2110 2111 ZFS_ENTER(zfsvfs); 2112 ZFS_VERIFY_ZP(dzp); 2113 zilog = zfsvfs->z_log; 2114 2115 if (dzp->z_pflags & ZFS_XATTR) { 2116 ZFS_EXIT(zfsvfs); 2117 return (SET_ERROR(EINVAL)); 2118 } 2119 2120 if (zfsvfs->z_utf8 && u8_validate(dirname, 2121 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2122 ZFS_EXIT(zfsvfs); 2123 return (SET_ERROR(EILSEQ)); 2124 } 2125 if (flags & FIGNORECASE) 2126 zf |= ZCILOOK; 2127 2128 if (vap->va_mask & AT_XVATTR) { 2129 if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2130 crgetuid(cr), cr, vap->va_type)) != 0) { 2131 ZFS_EXIT(zfsvfs); 2132 return (error); 2133 } 2134 } 2135 2136 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 2137 vsecp, &acl_ids)) != 0) { 2138 ZFS_EXIT(zfsvfs); 2139 return (error); 2140 } 2141 /* 2142 * First make sure the new directory doesn't exist. 2143 * 2144 * Existence is checked first to make sure we don't return 2145 * EACCES instead of EEXIST which can cause some applications 2146 * to fail. 2147 */ 2148top: 2149 *vpp = NULL; 2150 2151 if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 2152 NULL, NULL)) { 2153 zfs_acl_ids_free(&acl_ids); 2154 ZFS_EXIT(zfsvfs); 2155 return (error); 2156 } 2157 2158 if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 2159 zfs_acl_ids_free(&acl_ids); 2160 zfs_dirent_unlock(dl); 2161 ZFS_EXIT(zfsvfs); 2162 return (error); 2163 } 2164 2165 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2166 zfs_acl_ids_free(&acl_ids); 2167 zfs_dirent_unlock(dl); 2168 ZFS_EXIT(zfsvfs); 2169 return (SET_ERROR(EDQUOT)); 2170 } 2171 2172 /* 2173 * Add a new entry to the directory. 2174 */ 2175 tx = dmu_tx_create(zfsvfs->z_os); 2176 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 2177 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2178 fuid_dirtied = zfsvfs->z_fuid_dirty; 2179 if (fuid_dirtied) 2180 zfs_fuid_txhold(zfsvfs, tx); 2181 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2182 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2183 acl_ids.z_aclp->z_acl_bytes); 2184 } 2185 2186 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2187 ZFS_SA_BASE_ATTR_SIZE); 2188 2189 error = dmu_tx_assign(tx, TXG_NOWAIT); 2190 if (error) { 2191 zfs_dirent_unlock(dl); 2192 if (error == ERESTART) { 2193 dmu_tx_wait(tx); 2194 dmu_tx_abort(tx); 2195 goto top; 2196 } 2197 zfs_acl_ids_free(&acl_ids); 2198 dmu_tx_abort(tx); 2199 ZFS_EXIT(zfsvfs); 2200 return (error); 2201 } 2202 2203 /* 2204 * Create new node. 2205 */ 2206 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2207 2208 if (fuid_dirtied) 2209 zfs_fuid_sync(zfsvfs, tx); 2210 2211 /* 2212 * Now put new name in parent dir. 2213 */ 2214 (void) zfs_link_create(dl, zp, tx, ZNEW); 2215 2216 *vpp = ZTOV(zp); 2217 2218 txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 2219 if (flags & FIGNORECASE) 2220 txtype |= TX_CI; 2221 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 2222 acl_ids.z_fuidp, vap); 2223 2224 zfs_acl_ids_free(&acl_ids); 2225 2226 dmu_tx_commit(tx); 2227 2228 zfs_dirent_unlock(dl); 2229 2230 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2231 zil_commit(zilog, 0); 2232 2233 ZFS_EXIT(zfsvfs); 2234 return (0); 2235} 2236 2237/* 2238 * Remove a directory subdir entry. If the current working 2239 * directory is the same as the subdir to be removed, the 2240 * remove will fail. 2241 * 2242 * IN: dvp - vnode of directory to remove from. 2243 * name - name of directory to be removed. 2244 * cwd - vnode of current working directory. 2245 * cr - credentials of caller. 2246 * ct - caller context 2247 * flags - case flags 2248 * 2249 * RETURN: 0 on success, error code on failure. 2250 * 2251 * Timestamps: 2252 * dvp - ctime|mtime updated 2253 */ 2254/*ARGSUSED*/ 2255static int 2256zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 2257 caller_context_t *ct, int flags) 2258{ 2259 znode_t *dzp = VTOZ(dvp); 2260 znode_t *zp; 2261 vnode_t *vp; 2262 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2263 zilog_t *zilog; 2264 zfs_dirlock_t *dl; 2265 dmu_tx_t *tx; 2266 int error; 2267 int zflg = ZEXISTS; 2268 2269 ZFS_ENTER(zfsvfs); 2270 ZFS_VERIFY_ZP(dzp); 2271 zilog = zfsvfs->z_log; 2272 2273 if (flags & FIGNORECASE) 2274 zflg |= ZCILOOK; 2275top: 2276 zp = NULL; 2277 2278 /* 2279 * Attempt to lock directory; fail if entry doesn't exist. 2280 */ 2281 if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 2282 NULL, NULL)) { 2283 ZFS_EXIT(zfsvfs); 2284 return (error); 2285 } 2286 2287 vp = ZTOV(zp); 2288 2289 if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2290 goto out; 2291 } 2292 2293 if (vp->v_type != VDIR) { 2294 error = SET_ERROR(ENOTDIR); 2295 goto out; 2296 } 2297 2298 if (vp == cwd) { 2299 error = SET_ERROR(EINVAL); 2300 goto out; 2301 } 2302 2303 vnevent_rmdir(vp, dvp, name, ct); 2304 2305 /* 2306 * Grab a lock on the directory to make sure that noone is 2307 * trying to add (or lookup) entries while we are removing it. 2308 */ 2309 rw_enter(&zp->z_name_lock, RW_WRITER); 2310 2311 /* 2312 * Grab a lock on the parent pointer to make sure we play well 2313 * with the treewalk and directory rename code. 2314 */ 2315 rw_enter(&zp->z_parent_lock, RW_WRITER); 2316 2317 tx = dmu_tx_create(zfsvfs->z_os); 2318 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2319 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2320 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2321 zfs_sa_upgrade_txholds(tx, zp); 2322 zfs_sa_upgrade_txholds(tx, dzp); 2323 error = dmu_tx_assign(tx, TXG_NOWAIT); 2324 if (error) { 2325 rw_exit(&zp->z_parent_lock); 2326 rw_exit(&zp->z_name_lock); 2327 zfs_dirent_unlock(dl); 2328 VN_RELE(vp); 2329 if (error == ERESTART) { 2330 dmu_tx_wait(tx); 2331 dmu_tx_abort(tx); 2332 goto top; 2333 } 2334 dmu_tx_abort(tx); 2335 ZFS_EXIT(zfsvfs); 2336 return (error); 2337 } 2338 2339#ifdef FREEBSD_NAMECACHE 2340 cache_purge(dvp); 2341#endif 2342 2343 error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 2344 2345 if (error == 0) { 2346 uint64_t txtype = TX_RMDIR; 2347 if (flags & FIGNORECASE) 2348 txtype |= TX_CI; 2349 zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2350 } 2351 2352 dmu_tx_commit(tx); 2353 2354 rw_exit(&zp->z_parent_lock); 2355 rw_exit(&zp->z_name_lock); 2356#ifdef FREEBSD_NAMECACHE 2357 cache_purge(vp); 2358#endif 2359out: 2360 zfs_dirent_unlock(dl); 2361 2362 VN_RELE(vp); 2363 2364 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2365 zil_commit(zilog, 0); 2366 2367 ZFS_EXIT(zfsvfs); 2368 return (error); 2369} 2370 2371/* 2372 * Read as many directory entries as will fit into the provided 2373 * buffer from the given directory cursor position (specified in 2374 * the uio structure). 2375 * 2376 * IN: vp - vnode of directory to read. 2377 * uio - structure supplying read location, range info, 2378 * and return buffer. 2379 * cr - credentials of caller. 2380 * ct - caller context 2381 * flags - case flags 2382 * 2383 * OUT: uio - updated offset and range, buffer filled. 2384 * eofp - set to true if end-of-file detected. 2385 * 2386 * RETURN: 0 on success, error code on failure. 2387 * 2388 * Timestamps: 2389 * vp - atime updated 2390 * 2391 * Note that the low 4 bits of the cookie returned by zap is always zero. 2392 * This allows us to use the low range for "special" directory entries: 2393 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2394 * we use the offset 2 for the '.zfs' directory. 2395 */ 2396/* ARGSUSED */ 2397static int 2398zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) 2399{ 2400 znode_t *zp = VTOZ(vp); 2401 iovec_t *iovp; 2402 edirent_t *eodp; 2403 dirent64_t *odp; 2404 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2405 objset_t *os; 2406 caddr_t outbuf; 2407 size_t bufsize; 2408 zap_cursor_t zc; 2409 zap_attribute_t zap; 2410 uint_t bytes_wanted; 2411 uint64_t offset; /* must be unsigned; checks for < 1 */ 2412 uint64_t parent; 2413 int local_eof; 2414 int outcount; 2415 int error; 2416 uint8_t prefetch; 2417 boolean_t check_sysattrs; 2418 uint8_t type; 2419 int ncooks; 2420 u_long *cooks = NULL; 2421 int flags = 0; 2422 2423 ZFS_ENTER(zfsvfs); 2424 ZFS_VERIFY_ZP(zp); 2425 2426 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 2427 &parent, sizeof (parent))) != 0) { 2428 ZFS_EXIT(zfsvfs); 2429 return (error); 2430 } 2431 2432 /* 2433 * If we are not given an eof variable, 2434 * use a local one. 2435 */ 2436 if (eofp == NULL) 2437 eofp = &local_eof; 2438 2439 /* 2440 * Check for valid iov_len. 2441 */ 2442 if (uio->uio_iov->iov_len <= 0) { 2443 ZFS_EXIT(zfsvfs); 2444 return (SET_ERROR(EINVAL)); 2445 } 2446 2447 /* 2448 * Quit if directory has been removed (posix) 2449 */ 2450 if ((*eofp = zp->z_unlinked) != 0) { 2451 ZFS_EXIT(zfsvfs); 2452 return (0); 2453 } 2454 2455 error = 0; 2456 os = zfsvfs->z_os; 2457 offset = uio->uio_loffset; 2458 prefetch = zp->z_zn_prefetch; 2459 2460 /* 2461 * Initialize the iterator cursor. 2462 */ 2463 if (offset <= 3) { 2464 /* 2465 * Start iteration from the beginning of the directory. 2466 */ 2467 zap_cursor_init(&zc, os, zp->z_id); 2468 } else { 2469 /* 2470 * The offset is a serialized cursor. 2471 */ 2472 zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2473 } 2474 2475 /* 2476 * Get space to change directory entries into fs independent format. 2477 */ 2478 iovp = uio->uio_iov; 2479 bytes_wanted = iovp->iov_len; 2480 if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2481 bufsize = bytes_wanted; 2482 outbuf = kmem_alloc(bufsize, KM_SLEEP); 2483 odp = (struct dirent64 *)outbuf; 2484 } else { 2485 bufsize = bytes_wanted; 2486 outbuf = NULL; 2487 odp = (struct dirent64 *)iovp->iov_base; 2488 } 2489 eodp = (struct edirent *)odp; 2490 2491 if (ncookies != NULL) { 2492 /* 2493 * Minimum entry size is dirent size and 1 byte for a file name. 2494 */ 2495 ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2496 cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2497 *cookies = cooks; 2498 *ncookies = ncooks; 2499 } 2500 /* 2501 * If this VFS supports the system attribute view interface; and 2502 * we're looking at an extended attribute directory; and we care 2503 * about normalization conflicts on this vfs; then we must check 2504 * for normalization conflicts with the sysattr name space. 2505 */ 2506#ifdef TODO 2507 check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2508 (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2509 (flags & V_RDDIR_ENTFLAGS); 2510#else 2511 check_sysattrs = 0; 2512#endif 2513 2514 /* 2515 * Transform to file-system independent format 2516 */ 2517 outcount = 0; 2518 while (outcount < bytes_wanted) { 2519 ino64_t objnum; 2520 ushort_t reclen; 2521 off64_t *next = NULL; 2522 2523 /* 2524 * Special case `.', `..', and `.zfs'. 2525 */ 2526 if (offset == 0) { 2527 (void) strcpy(zap.za_name, "."); 2528 zap.za_normalization_conflict = 0; 2529 objnum = zp->z_id; 2530 type = DT_DIR; 2531 } else if (offset == 1) { 2532 (void) strcpy(zap.za_name, ".."); 2533 zap.za_normalization_conflict = 0; 2534 objnum = parent; 2535 type = DT_DIR; 2536 } else if (offset == 2 && zfs_show_ctldir(zp)) { 2537 (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2538 zap.za_normalization_conflict = 0; 2539 objnum = ZFSCTL_INO_ROOT; 2540 type = DT_DIR; 2541 } else { 2542 /* 2543 * Grab next entry. 2544 */ 2545 if (error = zap_cursor_retrieve(&zc, &zap)) { 2546 if ((*eofp = (error == ENOENT)) != 0) 2547 break; 2548 else 2549 goto update; 2550 } 2551 2552 if (zap.za_integer_length != 8 || 2553 zap.za_num_integers != 1) { 2554 cmn_err(CE_WARN, "zap_readdir: bad directory " 2555 "entry, obj = %lld, offset = %lld\n", 2556 (u_longlong_t)zp->z_id, 2557 (u_longlong_t)offset); 2558 error = SET_ERROR(ENXIO); 2559 goto update; 2560 } 2561 2562 objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2563 /* 2564 * MacOS X can extract the object type here such as: 2565 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2566 */ 2567 type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2568 2569 if (check_sysattrs && !zap.za_normalization_conflict) { 2570#ifdef TODO 2571 zap.za_normalization_conflict = 2572 xattr_sysattr_casechk(zap.za_name); 2573#else 2574 panic("%s:%u: TODO", __func__, __LINE__); 2575#endif 2576 } 2577 } 2578 2579 if (flags & V_RDDIR_ACCFILTER) { 2580 /* 2581 * If we have no access at all, don't include 2582 * this entry in the returned information 2583 */ 2584 znode_t *ezp; 2585 if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2586 goto skip_entry; 2587 if (!zfs_has_access(ezp, cr)) { 2588 VN_RELE(ZTOV(ezp)); 2589 goto skip_entry; 2590 } 2591 VN_RELE(ZTOV(ezp)); 2592 } 2593 2594 if (flags & V_RDDIR_ENTFLAGS) 2595 reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2596 else 2597 reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2598 2599 /* 2600 * Will this entry fit in the buffer? 2601 */ 2602 if (outcount + reclen > bufsize) { 2603 /* 2604 * Did we manage to fit anything in the buffer? 2605 */ 2606 if (!outcount) { 2607 error = SET_ERROR(EINVAL); 2608 goto update; 2609 } 2610 break; 2611 } 2612 if (flags & V_RDDIR_ENTFLAGS) { 2613 /* 2614 * Add extended flag entry: 2615 */ 2616 eodp->ed_ino = objnum; 2617 eodp->ed_reclen = reclen; 2618 /* NOTE: ed_off is the offset for the *next* entry */ 2619 next = &(eodp->ed_off); 2620 eodp->ed_eflags = zap.za_normalization_conflict ? 2621 ED_CASE_CONFLICT : 0; 2622 (void) strncpy(eodp->ed_name, zap.za_name, 2623 EDIRENT_NAMELEN(reclen)); 2624 eodp = (edirent_t *)((intptr_t)eodp + reclen); 2625 } else { 2626 /* 2627 * Add normal entry: 2628 */ 2629 odp->d_ino = objnum; 2630 odp->d_reclen = reclen; 2631 odp->d_namlen = strlen(zap.za_name); 2632 (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2633 odp->d_type = type; 2634 odp = (dirent64_t *)((intptr_t)odp + reclen); 2635 } 2636 outcount += reclen; 2637 2638 ASSERT(outcount <= bufsize); 2639 2640 /* Prefetch znode */ 2641 if (prefetch) 2642 dmu_prefetch(os, objnum, 0, 0); 2643 2644 skip_entry: 2645 /* 2646 * Move to the next entry, fill in the previous offset. 2647 */ 2648 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2649 zap_cursor_advance(&zc); 2650 offset = zap_cursor_serialize(&zc); 2651 } else { 2652 offset += 1; 2653 } 2654 2655 if (cooks != NULL) { 2656 *cooks++ = offset; 2657 ncooks--; 2658 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2659 } 2660 } 2661 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2662 2663 /* Subtract unused cookies */ 2664 if (ncookies != NULL) 2665 *ncookies -= ncooks; 2666 2667 if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2668 iovp->iov_base += outcount; 2669 iovp->iov_len -= outcount; 2670 uio->uio_resid -= outcount; 2671 } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2672 /* 2673 * Reset the pointer. 2674 */ 2675 offset = uio->uio_loffset; 2676 } 2677 2678update: 2679 zap_cursor_fini(&zc); 2680 if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2681 kmem_free(outbuf, bufsize); 2682 2683 if (error == ENOENT) 2684 error = 0; 2685 2686 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2687 2688 uio->uio_loffset = offset; 2689 ZFS_EXIT(zfsvfs); 2690 if (error != 0 && cookies != NULL) { 2691 free(*cookies, M_TEMP); 2692 *cookies = NULL; 2693 *ncookies = 0; 2694 } 2695 return (error); 2696} 2697 2698ulong_t zfs_fsync_sync_cnt = 4; 2699 2700static int 2701zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2702{ 2703 znode_t *zp = VTOZ(vp); 2704 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2705 2706 (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2707 2708 if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 2709 ZFS_ENTER(zfsvfs); 2710 ZFS_VERIFY_ZP(zp); 2711 zil_commit(zfsvfs->z_log, zp->z_id); 2712 ZFS_EXIT(zfsvfs); 2713 } 2714 return (0); 2715} 2716 2717 2718/* 2719 * Get the requested file attributes and place them in the provided 2720 * vattr structure. 2721 * 2722 * IN: vp - vnode of file. 2723 * vap - va_mask identifies requested attributes. 2724 * If AT_XVATTR set, then optional attrs are requested 2725 * flags - ATTR_NOACLCHECK (CIFS server context) 2726 * cr - credentials of caller. 2727 * ct - caller context 2728 * 2729 * OUT: vap - attribute values. 2730 * 2731 * RETURN: 0 (always succeeds). 2732 */ 2733/* ARGSUSED */ 2734static int 2735zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2736 caller_context_t *ct) 2737{ 2738 znode_t *zp = VTOZ(vp); 2739 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2740 int error = 0; 2741 uint32_t blksize; 2742 u_longlong_t nblocks; 2743 uint64_t links; 2744 uint64_t mtime[2], ctime[2], crtime[2], rdev; 2745 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2746 xoptattr_t *xoap = NULL; 2747 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2748 sa_bulk_attr_t bulk[4]; 2749 int count = 0; 2750 2751 ZFS_ENTER(zfsvfs); 2752 ZFS_VERIFY_ZP(zp); 2753 2754 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2755 2756 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 2757 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 2758 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 2759 if (vp->v_type == VBLK || vp->v_type == VCHR) 2760 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 2761 &rdev, 8); 2762 2763 if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 2764 ZFS_EXIT(zfsvfs); 2765 return (error); 2766 } 2767 2768 /* 2769 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2770 * Also, if we are the owner don't bother, since owner should 2771 * always be allowed to read basic attributes of file. 2772 */ 2773 if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2774 (vap->va_uid != crgetuid(cr))) { 2775 if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2776 skipaclchk, cr)) { 2777 ZFS_EXIT(zfsvfs); 2778 return (error); 2779 } 2780 } 2781 2782 /* 2783 * Return all attributes. It's cheaper to provide the answer 2784 * than to determine whether we were asked the question. 2785 */ 2786 2787 mutex_enter(&zp->z_lock); 2788 vap->va_type = IFTOVT(zp->z_mode); 2789 vap->va_mode = zp->z_mode & ~S_IFMT; 2790#ifdef sun 2791 vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2792#else 2793 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 2794#endif 2795 vap->va_nodeid = zp->z_id; 2796 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2797 links = zp->z_links + 1; 2798 else 2799 links = zp->z_links; 2800 vap->va_nlink = MIN(links, LINK_MAX); /* nlink_t limit! */ 2801 vap->va_size = zp->z_size; 2802#ifdef sun 2803 vap->va_rdev = vp->v_rdev; 2804#else 2805 if (vp->v_type == VBLK || vp->v_type == VCHR) 2806 vap->va_rdev = zfs_cmpldev(rdev); 2807#endif 2808 vap->va_seq = zp->z_seq; 2809 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2810 2811 /* 2812 * Add in any requested optional attributes and the create time. 2813 * Also set the corresponding bits in the returned attribute bitmap. 2814 */ 2815 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2816 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2817 xoap->xoa_archive = 2818 ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2819 XVA_SET_RTN(xvap, XAT_ARCHIVE); 2820 } 2821 2822 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2823 xoap->xoa_readonly = 2824 ((zp->z_pflags & ZFS_READONLY) != 0); 2825 XVA_SET_RTN(xvap, XAT_READONLY); 2826 } 2827 2828 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2829 xoap->xoa_system = 2830 ((zp->z_pflags & ZFS_SYSTEM) != 0); 2831 XVA_SET_RTN(xvap, XAT_SYSTEM); 2832 } 2833 2834 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2835 xoap->xoa_hidden = 2836 ((zp->z_pflags & ZFS_HIDDEN) != 0); 2837 XVA_SET_RTN(xvap, XAT_HIDDEN); 2838 } 2839 2840 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2841 xoap->xoa_nounlink = 2842 ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2843 XVA_SET_RTN(xvap, XAT_NOUNLINK); 2844 } 2845 2846 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2847 xoap->xoa_immutable = 2848 ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2849 XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2850 } 2851 2852 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2853 xoap->xoa_appendonly = 2854 ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2855 XVA_SET_RTN(xvap, XAT_APPENDONLY); 2856 } 2857 2858 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2859 xoap->xoa_nodump = 2860 ((zp->z_pflags & ZFS_NODUMP) != 0); 2861 XVA_SET_RTN(xvap, XAT_NODUMP); 2862 } 2863 2864 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2865 xoap->xoa_opaque = 2866 ((zp->z_pflags & ZFS_OPAQUE) != 0); 2867 XVA_SET_RTN(xvap, XAT_OPAQUE); 2868 } 2869 2870 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2871 xoap->xoa_av_quarantined = 2872 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2873 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2874 } 2875 2876 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2877 xoap->xoa_av_modified = 2878 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2879 XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2880 } 2881 2882 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2883 vp->v_type == VREG) { 2884 zfs_sa_get_scanstamp(zp, xvap); 2885 } 2886 2887 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 2888 uint64_t times[2]; 2889 2890 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 2891 times, sizeof (times)); 2892 ZFS_TIME_DECODE(&xoap->xoa_createtime, times); 2893 XVA_SET_RTN(xvap, XAT_CREATETIME); 2894 } 2895 2896 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2897 xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 2898 XVA_SET_RTN(xvap, XAT_REPARSE); 2899 } 2900 if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 2901 xoap->xoa_generation = zp->z_gen; 2902 XVA_SET_RTN(xvap, XAT_GEN); 2903 } 2904 2905 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2906 xoap->xoa_offline = 2907 ((zp->z_pflags & ZFS_OFFLINE) != 0); 2908 XVA_SET_RTN(xvap, XAT_OFFLINE); 2909 } 2910 2911 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2912 xoap->xoa_sparse = 2913 ((zp->z_pflags & ZFS_SPARSE) != 0); 2914 XVA_SET_RTN(xvap, XAT_SPARSE); 2915 } 2916 } 2917 2918 ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 2919 ZFS_TIME_DECODE(&vap->va_mtime, mtime); 2920 ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2921 ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 2922 2923 mutex_exit(&zp->z_lock); 2924 2925 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 2926 vap->va_blksize = blksize; 2927 vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2928 2929 if (zp->z_blksz == 0) { 2930 /* 2931 * Block size hasn't been set; suggest maximal I/O transfers. 2932 */ 2933 vap->va_blksize = zfsvfs->z_max_blksz; 2934 } 2935 2936 ZFS_EXIT(zfsvfs); 2937 return (0); 2938} 2939 2940/* 2941 * Set the file attributes to the values contained in the 2942 * vattr structure. 2943 * 2944 * IN: vp - vnode of file to be modified. 2945 * vap - new attribute values. 2946 * If AT_XVATTR set, then optional attrs are being set 2947 * flags - ATTR_UTIME set if non-default time values provided. 2948 * - ATTR_NOACLCHECK (CIFS context only). 2949 * cr - credentials of caller. 2950 * ct - caller context 2951 * 2952 * RETURN: 0 on success, error code on failure. 2953 * 2954 * Timestamps: 2955 * vp - ctime updated, mtime updated if size changed. 2956 */ 2957/* ARGSUSED */ 2958static int 2959zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2960 caller_context_t *ct) 2961{ 2962 znode_t *zp = VTOZ(vp); 2963 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2964 zilog_t *zilog; 2965 dmu_tx_t *tx; 2966 vattr_t oldva; 2967 xvattr_t tmpxvattr; 2968 uint_t mask = vap->va_mask; 2969 uint_t saved_mask = 0; 2970 uint64_t saved_mode; 2971 int trim_mask = 0; 2972 uint64_t new_mode; 2973 uint64_t new_uid, new_gid; 2974 uint64_t xattr_obj; 2975 uint64_t mtime[2], ctime[2]; 2976 znode_t *attrzp; 2977 int need_policy = FALSE; 2978 int err, err2; 2979 zfs_fuid_info_t *fuidp = NULL; 2980 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2981 xoptattr_t *xoap; 2982 zfs_acl_t *aclp; 2983 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2984 boolean_t fuid_dirtied = B_FALSE; 2985 sa_bulk_attr_t bulk[7], xattr_bulk[7]; 2986 int count = 0, xattr_count = 0; 2987 2988 if (mask == 0) 2989 return (0); 2990 2991 if (mask & AT_NOSET) 2992 return (SET_ERROR(EINVAL)); 2993 2994 ZFS_ENTER(zfsvfs); 2995 ZFS_VERIFY_ZP(zp); 2996 2997 zilog = zfsvfs->z_log; 2998 2999 /* 3000 * Make sure that if we have ephemeral uid/gid or xvattr specified 3001 * that file system is at proper version level 3002 */ 3003 3004 if (zfsvfs->z_use_fuids == B_FALSE && 3005 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 3006 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 3007 (mask & AT_XVATTR))) { 3008 ZFS_EXIT(zfsvfs); 3009 return (SET_ERROR(EINVAL)); 3010 } 3011 3012 if (mask & AT_SIZE && vp->v_type == VDIR) { 3013 ZFS_EXIT(zfsvfs); 3014 return (SET_ERROR(EISDIR)); 3015 } 3016 3017 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 3018 ZFS_EXIT(zfsvfs); 3019 return (SET_ERROR(EINVAL)); 3020 } 3021 3022 /* 3023 * If this is an xvattr_t, then get a pointer to the structure of 3024 * optional attributes. If this is NULL, then we have a vattr_t. 3025 */ 3026 xoap = xva_getxoptattr(xvap); 3027 3028 xva_init(&tmpxvattr); 3029 3030 /* 3031 * Immutable files can only alter immutable bit and atime 3032 */ 3033 if ((zp->z_pflags & ZFS_IMMUTABLE) && 3034 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 3035 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 3036 ZFS_EXIT(zfsvfs); 3037 return (SET_ERROR(EPERM)); 3038 } 3039 3040 if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { 3041 ZFS_EXIT(zfsvfs); 3042 return (SET_ERROR(EPERM)); 3043 } 3044 3045 /* 3046 * Verify timestamps doesn't overflow 32 bits. 3047 * ZFS can handle large timestamps, but 32bit syscalls can't 3048 * handle times greater than 2039. This check should be removed 3049 * once large timestamps are fully supported. 3050 */ 3051 if (mask & (AT_ATIME | AT_MTIME)) { 3052 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 3053 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 3054 ZFS_EXIT(zfsvfs); 3055 return (SET_ERROR(EOVERFLOW)); 3056 } 3057 } 3058 3059top: 3060 attrzp = NULL; 3061 aclp = NULL; 3062 3063 /* Can this be moved to before the top label? */ 3064 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 3065 ZFS_EXIT(zfsvfs); 3066 return (SET_ERROR(EROFS)); 3067 } 3068 3069 /* 3070 * First validate permissions 3071 */ 3072 3073 if (mask & AT_SIZE) { 3074 /* 3075 * XXX - Note, we are not providing any open 3076 * mode flags here (like FNDELAY), so we may 3077 * block if there are locks present... this 3078 * should be addressed in openat(). 3079 */ 3080 /* XXX - would it be OK to generate a log record here? */ 3081 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 3082 if (err) { 3083 ZFS_EXIT(zfsvfs); 3084 return (err); 3085 } 3086 } 3087 3088 if (mask & (AT_ATIME|AT_MTIME) || 3089 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 3090 XVA_ISSET_REQ(xvap, XAT_READONLY) || 3091 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 3092 XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 3093 XVA_ISSET_REQ(xvap, XAT_SPARSE) || 3094 XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 3095 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 3096 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 3097 skipaclchk, cr); 3098 } 3099 3100 if (mask & (AT_UID|AT_GID)) { 3101 int idmask = (mask & (AT_UID|AT_GID)); 3102 int take_owner; 3103 int take_group; 3104 3105 /* 3106 * NOTE: even if a new mode is being set, 3107 * we may clear S_ISUID/S_ISGID bits. 3108 */ 3109 3110 if (!(mask & AT_MODE)) 3111 vap->va_mode = zp->z_mode; 3112 3113 /* 3114 * Take ownership or chgrp to group we are a member of 3115 */ 3116 3117 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 3118 take_group = (mask & AT_GID) && 3119 zfs_groupmember(zfsvfs, vap->va_gid, cr); 3120 3121 /* 3122 * If both AT_UID and AT_GID are set then take_owner and 3123 * take_group must both be set in order to allow taking 3124 * ownership. 3125 * 3126 * Otherwise, send the check through secpolicy_vnode_setattr() 3127 * 3128 */ 3129 3130 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 3131 ((idmask == AT_UID) && take_owner) || 3132 ((idmask == AT_GID) && take_group)) { 3133 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 3134 skipaclchk, cr) == 0) { 3135 /* 3136 * Remove setuid/setgid for non-privileged users 3137 */ 3138 secpolicy_setid_clear(vap, vp, cr); 3139 trim_mask = (mask & (AT_UID|AT_GID)); 3140 } else { 3141 need_policy = TRUE; 3142 } 3143 } else { 3144 need_policy = TRUE; 3145 } 3146 } 3147 3148 mutex_enter(&zp->z_lock); 3149 oldva.va_mode = zp->z_mode; 3150 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 3151 if (mask & AT_XVATTR) { 3152 /* 3153 * Update xvattr mask to include only those attributes 3154 * that are actually changing. 3155 * 3156 * the bits will be restored prior to actually setting 3157 * the attributes so the caller thinks they were set. 3158 */ 3159 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3160 if (xoap->xoa_appendonly != 3161 ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 3162 need_policy = TRUE; 3163 } else { 3164 XVA_CLR_REQ(xvap, XAT_APPENDONLY); 3165 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 3166 } 3167 } 3168 3169 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3170 if (xoap->xoa_nounlink != 3171 ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 3172 need_policy = TRUE; 3173 } else { 3174 XVA_CLR_REQ(xvap, XAT_NOUNLINK); 3175 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 3176 } 3177 } 3178 3179 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3180 if (xoap->xoa_immutable != 3181 ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 3182 need_policy = TRUE; 3183 } else { 3184 XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 3185 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 3186 } 3187 } 3188 3189 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3190 if (xoap->xoa_nodump != 3191 ((zp->z_pflags & ZFS_NODUMP) != 0)) { 3192 need_policy = TRUE; 3193 } else { 3194 XVA_CLR_REQ(xvap, XAT_NODUMP); 3195 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 3196 } 3197 } 3198 3199 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3200 if (xoap->xoa_av_modified != 3201 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 3202 need_policy = TRUE; 3203 } else { 3204 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 3205 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 3206 } 3207 } 3208 3209 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3210 if ((vp->v_type != VREG && 3211 xoap->xoa_av_quarantined) || 3212 xoap->xoa_av_quarantined != 3213 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 3214 need_policy = TRUE; 3215 } else { 3216 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 3217 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 3218 } 3219 } 3220 3221 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3222 mutex_exit(&zp->z_lock); 3223 ZFS_EXIT(zfsvfs); 3224 return (SET_ERROR(EPERM)); 3225 } 3226 3227 if (need_policy == FALSE && 3228 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3229 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3230 need_policy = TRUE; 3231 } 3232 } 3233 3234 mutex_exit(&zp->z_lock); 3235 3236 if (mask & AT_MODE) { 3237 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3238 err = secpolicy_setid_setsticky_clear(vp, vap, 3239 &oldva, cr); 3240 if (err) { 3241 ZFS_EXIT(zfsvfs); 3242 return (err); 3243 } 3244 trim_mask |= AT_MODE; 3245 } else { 3246 need_policy = TRUE; 3247 } 3248 } 3249 3250 if (need_policy) { 3251 /* 3252 * If trim_mask is set then take ownership 3253 * has been granted or write_acl is present and user 3254 * has the ability to modify mode. In that case remove 3255 * UID|GID and or MODE from mask so that 3256 * secpolicy_vnode_setattr() doesn't revoke it. 3257 */ 3258 3259 if (trim_mask) { 3260 saved_mask = vap->va_mask; 3261 vap->va_mask &= ~trim_mask; 3262 if (trim_mask & AT_MODE) { 3263 /* 3264 * Save the mode, as secpolicy_vnode_setattr() 3265 * will overwrite it with ova.va_mode. 3266 */ 3267 saved_mode = vap->va_mode; 3268 } 3269 } 3270 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3271 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3272 if (err) { 3273 ZFS_EXIT(zfsvfs); 3274 return (err); 3275 } 3276 3277 if (trim_mask) { 3278 vap->va_mask |= saved_mask; 3279 if (trim_mask & AT_MODE) { 3280 /* 3281 * Recover the mode after 3282 * secpolicy_vnode_setattr(). 3283 */ 3284 vap->va_mode = saved_mode; 3285 } 3286 } 3287 } 3288 3289 /* 3290 * secpolicy_vnode_setattr, or take ownership may have 3291 * changed va_mask 3292 */ 3293 mask = vap->va_mask; 3294 3295 if ((mask & (AT_UID | AT_GID))) { 3296 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 3297 &xattr_obj, sizeof (xattr_obj)); 3298 3299 if (err == 0 && xattr_obj) { 3300 err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 3301 if (err) 3302 goto out2; 3303 } 3304 if (mask & AT_UID) { 3305 new_uid = zfs_fuid_create(zfsvfs, 3306 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3307 if (new_uid != zp->z_uid && 3308 zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 3309 if (attrzp) 3310 VN_RELE(ZTOV(attrzp)); 3311 err = SET_ERROR(EDQUOT); 3312 goto out2; 3313 } 3314 } 3315 3316 if (mask & AT_GID) { 3317 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3318 cr, ZFS_GROUP, &fuidp); 3319 if (new_gid != zp->z_gid && 3320 zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 3321 if (attrzp) 3322 VN_RELE(ZTOV(attrzp)); 3323 err = SET_ERROR(EDQUOT); 3324 goto out2; 3325 } 3326 } 3327 } 3328 tx = dmu_tx_create(zfsvfs->z_os); 3329 3330 if (mask & AT_MODE) { 3331 uint64_t pmode = zp->z_mode; 3332 uint64_t acl_obj; 3333 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3334 3335 if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 3336 !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3337 err = SET_ERROR(EPERM); 3338 goto out; 3339 } 3340 3341 if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3342 goto out; 3343 3344 mutex_enter(&zp->z_lock); 3345 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 3346 /* 3347 * Are we upgrading ACL from old V0 format 3348 * to V1 format? 3349 */ 3350 if (zfsvfs->z_version >= ZPL_VERSION_FUID && 3351 zfs_znode_acl_version(zp) == 3352 ZFS_ACL_VERSION_INITIAL) { 3353 dmu_tx_hold_free(tx, acl_obj, 0, 3354 DMU_OBJECT_END); 3355 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3356 0, aclp->z_acl_bytes); 3357 } else { 3358 dmu_tx_hold_write(tx, acl_obj, 0, 3359 aclp->z_acl_bytes); 3360 } 3361 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3362 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3363 0, aclp->z_acl_bytes); 3364 } 3365 mutex_exit(&zp->z_lock); 3366 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3367 } else { 3368 if ((mask & AT_XVATTR) && 3369 XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3370 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3371 else 3372 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3373 } 3374 3375 if (attrzp) { 3376 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3377 } 3378 3379 fuid_dirtied = zfsvfs->z_fuid_dirty; 3380 if (fuid_dirtied) 3381 zfs_fuid_txhold(zfsvfs, tx); 3382 3383 zfs_sa_upgrade_txholds(tx, zp); 3384 3385 err = dmu_tx_assign(tx, TXG_NOWAIT); 3386 if (err) { 3387 if (err == ERESTART) 3388 dmu_tx_wait(tx); 3389 goto out; 3390 } 3391 3392 count = 0; 3393 /* 3394 * Set each attribute requested. 3395 * We group settings according to the locks they need to acquire. 3396 * 3397 * Note: you cannot set ctime directly, although it will be 3398 * updated as a side-effect of calling this function. 3399 */ 3400 3401 3402 if (mask & (AT_UID|AT_GID|AT_MODE)) 3403 mutex_enter(&zp->z_acl_lock); 3404 mutex_enter(&zp->z_lock); 3405 3406 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3407 &zp->z_pflags, sizeof (zp->z_pflags)); 3408 3409 if (attrzp) { 3410 if (mask & (AT_UID|AT_GID|AT_MODE)) 3411 mutex_enter(&attrzp->z_acl_lock); 3412 mutex_enter(&attrzp->z_lock); 3413 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3414 SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3415 sizeof (attrzp->z_pflags)); 3416 } 3417 3418 if (mask & (AT_UID|AT_GID)) { 3419 3420 if (mask & AT_UID) { 3421 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 3422 &new_uid, sizeof (new_uid)); 3423 zp->z_uid = new_uid; 3424 if (attrzp) { 3425 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3426 SA_ZPL_UID(zfsvfs), NULL, &new_uid, 3427 sizeof (new_uid)); 3428 attrzp->z_uid = new_uid; 3429 } 3430 } 3431 3432 if (mask & AT_GID) { 3433 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 3434 NULL, &new_gid, sizeof (new_gid)); 3435 zp->z_gid = new_gid; 3436 if (attrzp) { 3437 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3438 SA_ZPL_GID(zfsvfs), NULL, &new_gid, 3439 sizeof (new_gid)); 3440 attrzp->z_gid = new_gid; 3441 } 3442 } 3443 if (!(mask & AT_MODE)) { 3444 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 3445 NULL, &new_mode, sizeof (new_mode)); 3446 new_mode = zp->z_mode; 3447 } 3448 err = zfs_acl_chown_setattr(zp); 3449 ASSERT(err == 0); 3450 if (attrzp) { 3451 err = zfs_acl_chown_setattr(attrzp); 3452 ASSERT(err == 0); 3453 } 3454 } 3455 3456 if (mask & AT_MODE) { 3457 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 3458 &new_mode, sizeof (new_mode)); 3459 zp->z_mode = new_mode; 3460 ASSERT3U((uintptr_t)aclp, !=, 0); 3461 err = zfs_aclset_common(zp, aclp, cr, tx); 3462 ASSERT0(err); 3463 if (zp->z_acl_cached) 3464 zfs_acl_free(zp->z_acl_cached); 3465 zp->z_acl_cached = aclp; 3466 aclp = NULL; 3467 } 3468 3469 3470 if (mask & AT_ATIME) { 3471 ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 3472 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 3473 &zp->z_atime, sizeof (zp->z_atime)); 3474 } 3475 3476 if (mask & AT_MTIME) { 3477 ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 3478 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 3479 mtime, sizeof (mtime)); 3480 } 3481 3482 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3483 if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3484 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3485 NULL, mtime, sizeof (mtime)); 3486 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3487 &ctime, sizeof (ctime)); 3488 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 3489 B_TRUE); 3490 } else if (mask != 0) { 3491 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3492 &ctime, sizeof (ctime)); 3493 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 3494 B_TRUE); 3495 if (attrzp) { 3496 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3497 SA_ZPL_CTIME(zfsvfs), NULL, 3498 &ctime, sizeof (ctime)); 3499 zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 3500 mtime, ctime, B_TRUE); 3501 } 3502 } 3503 /* 3504 * Do this after setting timestamps to prevent timestamp 3505 * update from toggling bit 3506 */ 3507 3508 if (xoap && (mask & AT_XVATTR)) { 3509 3510 /* 3511 * restore trimmed off masks 3512 * so that return masks can be set for caller. 3513 */ 3514 3515 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3516 XVA_SET_REQ(xvap, XAT_APPENDONLY); 3517 } 3518 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3519 XVA_SET_REQ(xvap, XAT_NOUNLINK); 3520 } 3521 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3522 XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3523 } 3524 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3525 XVA_SET_REQ(xvap, XAT_NODUMP); 3526 } 3527 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3528 XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3529 } 3530 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3531 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3532 } 3533 3534 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3535 ASSERT(vp->v_type == VREG); 3536 3537 zfs_xvattr_set(zp, xvap, tx); 3538 } 3539 3540 if (fuid_dirtied) 3541 zfs_fuid_sync(zfsvfs, tx); 3542 3543 if (mask != 0) 3544 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3545 3546 mutex_exit(&zp->z_lock); 3547 if (mask & (AT_UID|AT_GID|AT_MODE)) 3548 mutex_exit(&zp->z_acl_lock); 3549 3550 if (attrzp) { 3551 if (mask & (AT_UID|AT_GID|AT_MODE)) 3552 mutex_exit(&attrzp->z_acl_lock); 3553 mutex_exit(&attrzp->z_lock); 3554 } 3555out: 3556 if (err == 0 && attrzp) { 3557 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 3558 xattr_count, tx); 3559 ASSERT(err2 == 0); 3560 } 3561 3562 if (attrzp) 3563 VN_RELE(ZTOV(attrzp)); 3564 3565 if (aclp) 3566 zfs_acl_free(aclp); 3567 3568 if (fuidp) { 3569 zfs_fuid_info_free(fuidp); 3570 fuidp = NULL; 3571 } 3572 3573 if (err) { 3574 dmu_tx_abort(tx); 3575 if (err == ERESTART) 3576 goto top; 3577 } else { 3578 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 3579 dmu_tx_commit(tx); 3580 } 3581 3582out2: 3583 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3584 zil_commit(zilog, 0); 3585 3586 ZFS_EXIT(zfsvfs); 3587 return (err); 3588} 3589 3590typedef struct zfs_zlock { 3591 krwlock_t *zl_rwlock; /* lock we acquired */ 3592 znode_t *zl_znode; /* znode we held */ 3593 struct zfs_zlock *zl_next; /* next in list */ 3594} zfs_zlock_t; 3595 3596/* 3597 * Drop locks and release vnodes that were held by zfs_rename_lock(). 3598 */ 3599static void 3600zfs_rename_unlock(zfs_zlock_t **zlpp) 3601{ 3602 zfs_zlock_t *zl; 3603 3604 while ((zl = *zlpp) != NULL) { 3605 if (zl->zl_znode != NULL) 3606 VN_RELE(ZTOV(zl->zl_znode)); 3607 rw_exit(zl->zl_rwlock); 3608 *zlpp = zl->zl_next; 3609 kmem_free(zl, sizeof (*zl)); 3610 } 3611} 3612 3613/* 3614 * Search back through the directory tree, using the ".." entries. 3615 * Lock each directory in the chain to prevent concurrent renames. 3616 * Fail any attempt to move a directory into one of its own descendants. 3617 * XXX - z_parent_lock can overlap with map or grow locks 3618 */ 3619static int 3620zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 3621{ 3622 zfs_zlock_t *zl; 3623 znode_t *zp = tdzp; 3624 uint64_t rootid = zp->z_zfsvfs->z_root; 3625 uint64_t oidp = zp->z_id; 3626 krwlock_t *rwlp = &szp->z_parent_lock; 3627 krw_t rw = RW_WRITER; 3628 3629 /* 3630 * First pass write-locks szp and compares to zp->z_id. 3631 * Later passes read-lock zp and compare to zp->z_parent. 3632 */ 3633 do { 3634 if (!rw_tryenter(rwlp, rw)) { 3635 /* 3636 * Another thread is renaming in this path. 3637 * Note that if we are a WRITER, we don't have any 3638 * parent_locks held yet. 3639 */ 3640 if (rw == RW_READER && zp->z_id > szp->z_id) { 3641 /* 3642 * Drop our locks and restart 3643 */ 3644 zfs_rename_unlock(&zl); 3645 *zlpp = NULL; 3646 zp = tdzp; 3647 oidp = zp->z_id; 3648 rwlp = &szp->z_parent_lock; 3649 rw = RW_WRITER; 3650 continue; 3651 } else { 3652 /* 3653 * Wait for other thread to drop its locks 3654 */ 3655 rw_enter(rwlp, rw); 3656 } 3657 } 3658 3659 zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3660 zl->zl_rwlock = rwlp; 3661 zl->zl_znode = NULL; 3662 zl->zl_next = *zlpp; 3663 *zlpp = zl; 3664 3665 if (oidp == szp->z_id) /* We're a descendant of szp */ 3666 return (SET_ERROR(EINVAL)); 3667 3668 if (oidp == rootid) /* We've hit the top */ 3669 return (0); 3670 3671 if (rw == RW_READER) { /* i.e. not the first pass */ 3672 int error = zfs_zget(zp->z_zfsvfs, oidp, &zp); 3673 if (error) 3674 return (error); 3675 zl->zl_znode = zp; 3676 } 3677 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), 3678 &oidp, sizeof (oidp)); 3679 rwlp = &zp->z_parent_lock; 3680 rw = RW_READER; 3681 3682 } while (zp->z_id != sdzp->z_id); 3683 3684 return (0); 3685} 3686 3687/* 3688 * Move an entry from the provided source directory to the target 3689 * directory. Change the entry name as indicated. 3690 * 3691 * IN: sdvp - Source directory containing the "old entry". 3692 * snm - Old entry name. 3693 * tdvp - Target directory to contain the "new entry". 3694 * tnm - New entry name. 3695 * cr - credentials of caller. 3696 * ct - caller context 3697 * flags - case flags 3698 * 3699 * RETURN: 0 on success, error code on failure. 3700 * 3701 * Timestamps: 3702 * sdvp,tdvp - ctime|mtime updated 3703 */ 3704/*ARGSUSED*/ 3705static int 3706zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3707 caller_context_t *ct, int flags) 3708{ 3709 znode_t *tdzp, *szp, *tzp; 3710 znode_t *sdzp = VTOZ(sdvp); 3711 zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 3712 zilog_t *zilog; 3713 vnode_t *realvp; 3714 zfs_dirlock_t *sdl, *tdl; 3715 dmu_tx_t *tx; 3716 zfs_zlock_t *zl; 3717 int cmp, serr, terr; 3718 int error = 0; 3719 int zflg = 0; 3720 3721 ZFS_ENTER(zfsvfs); 3722 ZFS_VERIFY_ZP(sdzp); 3723 zilog = zfsvfs->z_log; 3724 3725 /* 3726 * Make sure we have the real vp for the target directory. 3727 */ 3728 if (VOP_REALVP(tdvp, &realvp, ct) == 0) 3729 tdvp = realvp; 3730 3731 tdzp = VTOZ(tdvp); 3732 ZFS_VERIFY_ZP(tdzp); 3733 3734 /* 3735 * We check z_zfsvfs rather than v_vfsp here, because snapshots and the 3736 * ctldir appear to have the same v_vfsp. 3737 */ 3738 if (tdzp->z_zfsvfs != zfsvfs || zfsctl_is_node(tdvp)) { 3739 ZFS_EXIT(zfsvfs); 3740 return (SET_ERROR(EXDEV)); 3741 } 3742 3743 if (zfsvfs->z_utf8 && u8_validate(tnm, 3744 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3745 ZFS_EXIT(zfsvfs); 3746 return (SET_ERROR(EILSEQ)); 3747 } 3748 3749 if (flags & FIGNORECASE) 3750 zflg |= ZCILOOK; 3751 3752top: 3753 szp = NULL; 3754 tzp = NULL; 3755 zl = NULL; 3756 3757 /* 3758 * This is to prevent the creation of links into attribute space 3759 * by renaming a linked file into/outof an attribute directory. 3760 * See the comment in zfs_link() for why this is considered bad. 3761 */ 3762 if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3763 ZFS_EXIT(zfsvfs); 3764 return (SET_ERROR(EINVAL)); 3765 } 3766 3767 /* 3768 * Lock source and target directory entries. To prevent deadlock, 3769 * a lock ordering must be defined. We lock the directory with 3770 * the smallest object id first, or if it's a tie, the one with 3771 * the lexically first name. 3772 */ 3773 if (sdzp->z_id < tdzp->z_id) { 3774 cmp = -1; 3775 } else if (sdzp->z_id > tdzp->z_id) { 3776 cmp = 1; 3777 } else { 3778 /* 3779 * First compare the two name arguments without 3780 * considering any case folding. 3781 */ 3782 int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3783 3784 cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3785 ASSERT(error == 0 || !zfsvfs->z_utf8); 3786 if (cmp == 0) { 3787 /* 3788 * POSIX: "If the old argument and the new argument 3789 * both refer to links to the same existing file, 3790 * the rename() function shall return successfully 3791 * and perform no other action." 3792 */ 3793 ZFS_EXIT(zfsvfs); 3794 return (0); 3795 } 3796 /* 3797 * If the file system is case-folding, then we may 3798 * have some more checking to do. A case-folding file 3799 * system is either supporting mixed case sensitivity 3800 * access or is completely case-insensitive. Note 3801 * that the file system is always case preserving. 3802 * 3803 * In mixed sensitivity mode case sensitive behavior 3804 * is the default. FIGNORECASE must be used to 3805 * explicitly request case insensitive behavior. 3806 * 3807 * If the source and target names provided differ only 3808 * by case (e.g., a request to rename 'tim' to 'Tim'), 3809 * we will treat this as a special case in the 3810 * case-insensitive mode: as long as the source name 3811 * is an exact match, we will allow this to proceed as 3812 * a name-change request. 3813 */ 3814 if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3815 (zfsvfs->z_case == ZFS_CASE_MIXED && 3816 flags & FIGNORECASE)) && 3817 u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3818 &error) == 0) { 3819 /* 3820 * case preserving rename request, require exact 3821 * name matches 3822 */ 3823 zflg |= ZCIEXACT; 3824 zflg &= ~ZCILOOK; 3825 } 3826 } 3827 3828 /* 3829 * If the source and destination directories are the same, we should 3830 * grab the z_name_lock of that directory only once. 3831 */ 3832 if (sdzp == tdzp) { 3833 zflg |= ZHAVELOCK; 3834 rw_enter(&sdzp->z_name_lock, RW_READER); 3835 } 3836 3837 if (cmp < 0) { 3838 serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3839 ZEXISTS | zflg, NULL, NULL); 3840 terr = zfs_dirent_lock(&tdl, 3841 tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3842 } else { 3843 terr = zfs_dirent_lock(&tdl, 3844 tdzp, tnm, &tzp, zflg, NULL, NULL); 3845 serr = zfs_dirent_lock(&sdl, 3846 sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3847 NULL, NULL); 3848 } 3849 3850 if (serr) { 3851 /* 3852 * Source entry invalid or not there. 3853 */ 3854 if (!terr) { 3855 zfs_dirent_unlock(tdl); 3856 if (tzp) 3857 VN_RELE(ZTOV(tzp)); 3858 } 3859 3860 if (sdzp == tdzp) 3861 rw_exit(&sdzp->z_name_lock); 3862 3863 /* 3864 * FreeBSD: In OpenSolaris they only check if rename source is 3865 * ".." here, because "." is handled in their lookup. This is 3866 * not the case for FreeBSD, so we check for "." explicitly. 3867 */ 3868 if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0) 3869 serr = SET_ERROR(EINVAL); 3870 ZFS_EXIT(zfsvfs); 3871 return (serr); 3872 } 3873 if (terr) { 3874 zfs_dirent_unlock(sdl); 3875 VN_RELE(ZTOV(szp)); 3876 3877 if (sdzp == tdzp) 3878 rw_exit(&sdzp->z_name_lock); 3879 3880 if (strcmp(tnm, "..") == 0) 3881 terr = SET_ERROR(EINVAL); 3882 ZFS_EXIT(zfsvfs); 3883 return (terr); 3884 } 3885 3886 /* 3887 * Must have write access at the source to remove the old entry 3888 * and write access at the target to create the new entry. 3889 * Note that if target and source are the same, this can be 3890 * done in a single check. 3891 */ 3892 3893 if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3894 goto out; 3895 3896 if (ZTOV(szp)->v_type == VDIR) { 3897 /* 3898 * Check to make sure rename is valid. 3899 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3900 */ 3901 if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3902 goto out; 3903 } 3904 3905 /* 3906 * Does target exist? 3907 */ 3908 if (tzp) { 3909 /* 3910 * Source and target must be the same type. 3911 */ 3912 if (ZTOV(szp)->v_type == VDIR) { 3913 if (ZTOV(tzp)->v_type != VDIR) { 3914 error = SET_ERROR(ENOTDIR); 3915 goto out; 3916 } 3917 } else { 3918 if (ZTOV(tzp)->v_type == VDIR) { 3919 error = SET_ERROR(EISDIR); 3920 goto out; 3921 } 3922 } 3923 /* 3924 * POSIX dictates that when the source and target 3925 * entries refer to the same file object, rename 3926 * must do nothing and exit without error. 3927 */ 3928 if (szp->z_id == tzp->z_id) { 3929 error = 0; 3930 goto out; 3931 } 3932 } 3933 3934 vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 3935 if (tzp) 3936 vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3937 3938 /* 3939 * notify the target directory if it is not the same 3940 * as source directory. 3941 */ 3942 if (tdvp != sdvp) { 3943 vnevent_rename_dest_dir(tdvp, ct); 3944 } 3945 3946 tx = dmu_tx_create(zfsvfs->z_os); 3947 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3948 dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3949 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3950 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3951 if (sdzp != tdzp) { 3952 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 3953 zfs_sa_upgrade_txholds(tx, tdzp); 3954 } 3955 if (tzp) { 3956 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 3957 zfs_sa_upgrade_txholds(tx, tzp); 3958 } 3959 3960 zfs_sa_upgrade_txholds(tx, szp); 3961 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3962 error = dmu_tx_assign(tx, TXG_NOWAIT); 3963 if (error) { 3964 if (zl != NULL) 3965 zfs_rename_unlock(&zl); 3966 zfs_dirent_unlock(sdl); 3967 zfs_dirent_unlock(tdl); 3968 3969 if (sdzp == tdzp) 3970 rw_exit(&sdzp->z_name_lock); 3971 3972 VN_RELE(ZTOV(szp)); 3973 if (tzp) 3974 VN_RELE(ZTOV(tzp)); 3975 if (error == ERESTART) { 3976 dmu_tx_wait(tx); 3977 dmu_tx_abort(tx); 3978 goto top; 3979 } 3980 dmu_tx_abort(tx); 3981 ZFS_EXIT(zfsvfs); 3982 return (error); 3983 } 3984 3985 if (tzp) /* Attempt to remove the existing target */ 3986 error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 3987 3988 if (error == 0) { 3989 error = zfs_link_create(tdl, szp, tx, ZRENAMING); 3990 if (error == 0) { 3991 szp->z_pflags |= ZFS_AV_MODIFIED; 3992 3993 error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 3994 (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3995 ASSERT0(error); 3996 3997 error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 3998 if (error == 0) { 3999 zfs_log_rename(zilog, tx, TX_RENAME | 4000 (flags & FIGNORECASE ? TX_CI : 0), sdzp, 4001 sdl->dl_name, tdzp, tdl->dl_name, szp); 4002 4003 /* 4004 * Update path information for the target vnode 4005 */ 4006 vn_renamepath(tdvp, ZTOV(szp), tnm, 4007 strlen(tnm)); 4008 } else { 4009 /* 4010 * At this point, we have successfully created 4011 * the target name, but have failed to remove 4012 * the source name. Since the create was done 4013 * with the ZRENAMING flag, there are 4014 * complications; for one, the link count is 4015 * wrong. The easiest way to deal with this 4016 * is to remove the newly created target, and 4017 * return the original error. This must 4018 * succeed; fortunately, it is very unlikely to 4019 * fail, since we just created it. 4020 */ 4021 VERIFY3U(zfs_link_destroy(tdl, szp, tx, 4022 ZRENAMING, NULL), ==, 0); 4023 } 4024 } 4025#ifdef FREEBSD_NAMECACHE 4026 if (error == 0) { 4027 cache_purge(sdvp); 4028 cache_purge(tdvp); 4029 cache_purge(ZTOV(szp)); 4030 if (tzp) 4031 cache_purge(ZTOV(tzp)); 4032 } 4033#endif 4034 } 4035 4036 dmu_tx_commit(tx); 4037out: 4038 if (zl != NULL) 4039 zfs_rename_unlock(&zl); 4040 4041 zfs_dirent_unlock(sdl); 4042 zfs_dirent_unlock(tdl); 4043 4044 if (sdzp == tdzp) 4045 rw_exit(&sdzp->z_name_lock); 4046 4047 4048 VN_RELE(ZTOV(szp)); 4049 if (tzp) 4050 VN_RELE(ZTOV(tzp)); 4051 4052 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4053 zil_commit(zilog, 0); 4054 4055 ZFS_EXIT(zfsvfs); 4056 4057 return (error); 4058} 4059 4060/* 4061 * Insert the indicated symbolic reference entry into the directory. 4062 * 4063 * IN: dvp - Directory to contain new symbolic link. 4064 * link - Name for new symlink entry. 4065 * vap - Attributes of new entry. 4066 * cr - credentials of caller. 4067 * ct - caller context 4068 * flags - case flags 4069 * 4070 * RETURN: 0 on success, error code on failure. 4071 * 4072 * Timestamps: 4073 * dvp - ctime|mtime updated 4074 */ 4075/*ARGSUSED*/ 4076static int 4077zfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 4078 cred_t *cr, kthread_t *td) 4079{ 4080 znode_t *zp, *dzp = VTOZ(dvp); 4081 zfs_dirlock_t *dl; 4082 dmu_tx_t *tx; 4083 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4084 zilog_t *zilog; 4085 uint64_t len = strlen(link); 4086 int error; 4087 int zflg = ZNEW; 4088 zfs_acl_ids_t acl_ids; 4089 boolean_t fuid_dirtied; 4090 uint64_t txtype = TX_SYMLINK; 4091 int flags = 0; 4092 4093 ASSERT(vap->va_type == VLNK); 4094 4095 ZFS_ENTER(zfsvfs); 4096 ZFS_VERIFY_ZP(dzp); 4097 zilog = zfsvfs->z_log; 4098 4099 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 4100 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4101 ZFS_EXIT(zfsvfs); 4102 return (SET_ERROR(EILSEQ)); 4103 } 4104 if (flags & FIGNORECASE) 4105 zflg |= ZCILOOK; 4106 4107 if (len > MAXPATHLEN) { 4108 ZFS_EXIT(zfsvfs); 4109 return (SET_ERROR(ENAMETOOLONG)); 4110 } 4111 4112 if ((error = zfs_acl_ids_create(dzp, 0, 4113 vap, cr, NULL, &acl_ids)) != 0) { 4114 ZFS_EXIT(zfsvfs); 4115 return (error); 4116 } 4117top: 4118 /* 4119 * Attempt to lock directory; fail if entry already exists. 4120 */ 4121 error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 4122 if (error) { 4123 zfs_acl_ids_free(&acl_ids); 4124 ZFS_EXIT(zfsvfs); 4125 return (error); 4126 } 4127 4128 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4129 zfs_acl_ids_free(&acl_ids); 4130 zfs_dirent_unlock(dl); 4131 ZFS_EXIT(zfsvfs); 4132 return (error); 4133 } 4134 4135 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 4136 zfs_acl_ids_free(&acl_ids); 4137 zfs_dirent_unlock(dl); 4138 ZFS_EXIT(zfsvfs); 4139 return (SET_ERROR(EDQUOT)); 4140 } 4141 tx = dmu_tx_create(zfsvfs->z_os); 4142 fuid_dirtied = zfsvfs->z_fuid_dirty; 4143 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 4144 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4145 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 4146 ZFS_SA_BASE_ATTR_SIZE + len); 4147 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 4148 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 4149 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 4150 acl_ids.z_aclp->z_acl_bytes); 4151 } 4152 if (fuid_dirtied) 4153 zfs_fuid_txhold(zfsvfs, tx); 4154 error = dmu_tx_assign(tx, TXG_NOWAIT); 4155 if (error) { 4156 zfs_dirent_unlock(dl); 4157 if (error == ERESTART) { 4158 dmu_tx_wait(tx); 4159 dmu_tx_abort(tx); 4160 goto top; 4161 } 4162 zfs_acl_ids_free(&acl_ids); 4163 dmu_tx_abort(tx); 4164 ZFS_EXIT(zfsvfs); 4165 return (error); 4166 } 4167 4168 /* 4169 * Create a new object for the symlink. 4170 * for version 4 ZPL datsets the symlink will be an SA attribute 4171 */ 4172 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 4173 4174 if (fuid_dirtied) 4175 zfs_fuid_sync(zfsvfs, tx); 4176 4177 mutex_enter(&zp->z_lock); 4178 if (zp->z_is_sa) 4179 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 4180 link, len, tx); 4181 else 4182 zfs_sa_symlink(zp, link, len, tx); 4183 mutex_exit(&zp->z_lock); 4184 4185 zp->z_size = len; 4186 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 4187 &zp->z_size, sizeof (zp->z_size), tx); 4188 /* 4189 * Insert the new object into the directory. 4190 */ 4191 (void) zfs_link_create(dl, zp, tx, ZNEW); 4192 4193 if (flags & FIGNORECASE) 4194 txtype |= TX_CI; 4195 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 4196 *vpp = ZTOV(zp); 4197 4198 zfs_acl_ids_free(&acl_ids); 4199 4200 dmu_tx_commit(tx); 4201 4202 zfs_dirent_unlock(dl); 4203 4204 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4205 zil_commit(zilog, 0); 4206 4207 ZFS_EXIT(zfsvfs); 4208 return (error); 4209} 4210 4211/* 4212 * Return, in the buffer contained in the provided uio structure, 4213 * the symbolic path referred to by vp. 4214 * 4215 * IN: vp - vnode of symbolic link. 4216 * uio - structure to contain the link path. 4217 * cr - credentials of caller. 4218 * ct - caller context 4219 * 4220 * OUT: uio - structure containing the link path. 4221 * 4222 * RETURN: 0 on success, error code on failure. 4223 * 4224 * Timestamps: 4225 * vp - atime updated 4226 */ 4227/* ARGSUSED */ 4228static int 4229zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 4230{ 4231 znode_t *zp = VTOZ(vp); 4232 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4233 int error; 4234 4235 ZFS_ENTER(zfsvfs); 4236 ZFS_VERIFY_ZP(zp); 4237 4238 mutex_enter(&zp->z_lock); 4239 if (zp->z_is_sa) 4240 error = sa_lookup_uio(zp->z_sa_hdl, 4241 SA_ZPL_SYMLINK(zfsvfs), uio); 4242 else 4243 error = zfs_sa_readlink(zp, uio); 4244 mutex_exit(&zp->z_lock); 4245 4246 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4247 4248 ZFS_EXIT(zfsvfs); 4249 return (error); 4250} 4251 4252/* 4253 * Insert a new entry into directory tdvp referencing svp. 4254 * 4255 * IN: tdvp - Directory to contain new entry. 4256 * svp - vnode of new entry. 4257 * name - name of new entry. 4258 * cr - credentials of caller. 4259 * ct - caller context 4260 * 4261 * RETURN: 0 on success, error code on failure. 4262 * 4263 * Timestamps: 4264 * tdvp - ctime|mtime updated 4265 * svp - ctime updated 4266 */ 4267/* ARGSUSED */ 4268static int 4269zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4270 caller_context_t *ct, int flags) 4271{ 4272 znode_t *dzp = VTOZ(tdvp); 4273 znode_t *tzp, *szp; 4274 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4275 zilog_t *zilog; 4276 zfs_dirlock_t *dl; 4277 dmu_tx_t *tx; 4278 vnode_t *realvp; 4279 int error; 4280 int zf = ZNEW; 4281 uint64_t parent; 4282 uid_t owner; 4283 4284 ASSERT(tdvp->v_type == VDIR); 4285 4286 ZFS_ENTER(zfsvfs); 4287 ZFS_VERIFY_ZP(dzp); 4288 zilog = zfsvfs->z_log; 4289 4290 if (VOP_REALVP(svp, &realvp, ct) == 0) 4291 svp = realvp; 4292 4293 /* 4294 * POSIX dictates that we return EPERM here. 4295 * Better choices include ENOTSUP or EISDIR. 4296 */ 4297 if (svp->v_type == VDIR) { 4298 ZFS_EXIT(zfsvfs); 4299 return (SET_ERROR(EPERM)); 4300 } 4301 4302 szp = VTOZ(svp); 4303 ZFS_VERIFY_ZP(szp); 4304 4305 /* 4306 * We check z_zfsvfs rather than v_vfsp here, because snapshots and the 4307 * ctldir appear to have the same v_vfsp. 4308 */ 4309 if (szp->z_zfsvfs != zfsvfs || zfsctl_is_node(svp)) { 4310 ZFS_EXIT(zfsvfs); 4311 return (SET_ERROR(EXDEV)); 4312 } 4313 4314 /* Prevent links to .zfs/shares files */ 4315 4316 if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4317 &parent, sizeof (uint64_t))) != 0) { 4318 ZFS_EXIT(zfsvfs); 4319 return (error); 4320 } 4321 if (parent == zfsvfs->z_shares_dir) { 4322 ZFS_EXIT(zfsvfs); 4323 return (SET_ERROR(EPERM)); 4324 } 4325 4326 if (zfsvfs->z_utf8 && u8_validate(name, 4327 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4328 ZFS_EXIT(zfsvfs); 4329 return (SET_ERROR(EILSEQ)); 4330 } 4331 if (flags & FIGNORECASE) 4332 zf |= ZCILOOK; 4333 4334 /* 4335 * We do not support links between attributes and non-attributes 4336 * because of the potential security risk of creating links 4337 * into "normal" file space in order to circumvent restrictions 4338 * imposed in attribute space. 4339 */ 4340 if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4341 ZFS_EXIT(zfsvfs); 4342 return (SET_ERROR(EINVAL)); 4343 } 4344 4345 4346 owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4347 if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { 4348 ZFS_EXIT(zfsvfs); 4349 return (SET_ERROR(EPERM)); 4350 } 4351 4352 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4353 ZFS_EXIT(zfsvfs); 4354 return (error); 4355 } 4356 4357top: 4358 /* 4359 * Attempt to lock directory; fail if entry already exists. 4360 */ 4361 error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 4362 if (error) { 4363 ZFS_EXIT(zfsvfs); 4364 return (error); 4365 } 4366 4367 tx = dmu_tx_create(zfsvfs->z_os); 4368 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4369 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4370 zfs_sa_upgrade_txholds(tx, szp); 4371 zfs_sa_upgrade_txholds(tx, dzp); 4372 error = dmu_tx_assign(tx, TXG_NOWAIT); 4373 if (error) { 4374 zfs_dirent_unlock(dl); 4375 if (error == ERESTART) { 4376 dmu_tx_wait(tx); 4377 dmu_tx_abort(tx); 4378 goto top; 4379 } 4380 dmu_tx_abort(tx); 4381 ZFS_EXIT(zfsvfs); 4382 return (error); 4383 } 4384 4385 error = zfs_link_create(dl, szp, tx, 0); 4386 4387 if (error == 0) { 4388 uint64_t txtype = TX_LINK; 4389 if (flags & FIGNORECASE) 4390 txtype |= TX_CI; 4391 zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4392 } 4393 4394 dmu_tx_commit(tx); 4395 4396 zfs_dirent_unlock(dl); 4397 4398 if (error == 0) { 4399 vnevent_link(svp, ct); 4400 } 4401 4402 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4403 zil_commit(zilog, 0); 4404 4405 ZFS_EXIT(zfsvfs); 4406 return (error); 4407} 4408 4409#ifdef sun 4410/* 4411 * zfs_null_putapage() is used when the file system has been force 4412 * unmounted. It just drops the pages. 4413 */ 4414/* ARGSUSED */ 4415static int 4416zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4417 size_t *lenp, int flags, cred_t *cr) 4418{ 4419 pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); 4420 return (0); 4421} 4422 4423/* 4424 * Push a page out to disk, klustering if possible. 4425 * 4426 * IN: vp - file to push page to. 4427 * pp - page to push. 4428 * flags - additional flags. 4429 * cr - credentials of caller. 4430 * 4431 * OUT: offp - start of range pushed. 4432 * lenp - len of range pushed. 4433 * 4434 * RETURN: 0 on success, error code on failure. 4435 * 4436 * NOTE: callers must have locked the page to be pushed. On 4437 * exit, the page (and all other pages in the kluster) must be 4438 * unlocked. 4439 */ 4440/* ARGSUSED */ 4441static int 4442zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4443 size_t *lenp, int flags, cred_t *cr) 4444{ 4445 znode_t *zp = VTOZ(vp); 4446 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4447 dmu_tx_t *tx; 4448 u_offset_t off, koff; 4449 size_t len, klen; 4450 int err; 4451 4452 off = pp->p_offset; 4453 len = PAGESIZE; 4454 /* 4455 * If our blocksize is bigger than the page size, try to kluster 4456 * multiple pages so that we write a full block (thus avoiding 4457 * a read-modify-write). 4458 */ 4459 if (off < zp->z_size && zp->z_blksz > PAGESIZE) { 4460 klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); 4461 koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; 4462 ASSERT(koff <= zp->z_size); 4463 if (koff + klen > zp->z_size) 4464 klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE); 4465 pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); 4466 } 4467 ASSERT3U(btop(len), ==, btopr(len)); 4468 4469 /* 4470 * Can't push pages past end-of-file. 4471 */ 4472 if (off >= zp->z_size) { 4473 /* ignore all pages */ 4474 err = 0; 4475 goto out; 4476 } else if (off + len > zp->z_size) { 4477 int npages = btopr(zp->z_size - off); 4478 page_t *trunc; 4479 4480 page_list_break(&pp, &trunc, npages); 4481 /* ignore pages past end of file */ 4482 if (trunc) 4483 pvn_write_done(trunc, flags); 4484 len = zp->z_size - off; 4485 } 4486 4487 if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 4488 zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 4489 err = SET_ERROR(EDQUOT); 4490 goto out; 4491 } 4492top: 4493 tx = dmu_tx_create(zfsvfs->z_os); 4494 dmu_tx_hold_write(tx, zp->z_id, off, len); 4495 4496 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4497 zfs_sa_upgrade_txholds(tx, zp); 4498 err = dmu_tx_assign(tx, TXG_NOWAIT); 4499 if (err != 0) { 4500 if (err == ERESTART) { 4501 dmu_tx_wait(tx); 4502 dmu_tx_abort(tx); 4503 goto top; 4504 } 4505 dmu_tx_abort(tx); 4506 goto out; 4507 } 4508 4509 if (zp->z_blksz <= PAGESIZE) { 4510 caddr_t va = zfs_map_page(pp, S_READ); 4511 ASSERT3U(len, <=, PAGESIZE); 4512 dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 4513 zfs_unmap_page(pp, va); 4514 } else { 4515 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 4516 } 4517 4518 if (err == 0) { 4519 uint64_t mtime[2], ctime[2]; 4520 sa_bulk_attr_t bulk[3]; 4521 int count = 0; 4522 4523 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 4524 &mtime, 16); 4525 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 4526 &ctime, 16); 4527 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4528 &zp->z_pflags, 8); 4529 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 4530 B_TRUE); 4531 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 4532 } 4533 dmu_tx_commit(tx); 4534 4535out: 4536 pvn_write_done(pp, (err ? B_ERROR : 0) | flags); 4537 if (offp) 4538 *offp = off; 4539 if (lenp) 4540 *lenp = len; 4541 4542 return (err); 4543} 4544 4545/* 4546 * Copy the portion of the file indicated from pages into the file. 4547 * The pages are stored in a page list attached to the files vnode. 4548 * 4549 * IN: vp - vnode of file to push page data to. 4550 * off - position in file to put data. 4551 * len - amount of data to write. 4552 * flags - flags to control the operation. 4553 * cr - credentials of caller. 4554 * ct - caller context. 4555 * 4556 * RETURN: 0 on success, error code on failure. 4557 * 4558 * Timestamps: 4559 * vp - ctime|mtime updated 4560 */ 4561/*ARGSUSED*/ 4562static int 4563zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 4564 caller_context_t *ct) 4565{ 4566 znode_t *zp = VTOZ(vp); 4567 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4568 page_t *pp; 4569 size_t io_len; 4570 u_offset_t io_off; 4571 uint_t blksz; 4572 rl_t *rl; 4573 int error = 0; 4574 4575 ZFS_ENTER(zfsvfs); 4576 ZFS_VERIFY_ZP(zp); 4577 4578 /* 4579 * Align this request to the file block size in case we kluster. 4580 * XXX - this can result in pretty aggresive locking, which can 4581 * impact simultanious read/write access. One option might be 4582 * to break up long requests (len == 0) into block-by-block 4583 * operations to get narrower locking. 4584 */ 4585 blksz = zp->z_blksz; 4586 if (ISP2(blksz)) 4587 io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); 4588 else 4589 io_off = 0; 4590 if (len > 0 && ISP2(blksz)) 4591 io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); 4592 else 4593 io_len = 0; 4594 4595 if (io_len == 0) { 4596 /* 4597 * Search the entire vp list for pages >= io_off. 4598 */ 4599 rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); 4600 error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); 4601 goto out; 4602 } 4603 rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); 4604 4605 if (off > zp->z_size) { 4606 /* past end of file */ 4607 zfs_range_unlock(rl); 4608 ZFS_EXIT(zfsvfs); 4609 return (0); 4610 } 4611 4612 len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); 4613 4614 for (off = io_off; io_off < off + len; io_off += io_len) { 4615 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 4616 pp = page_lookup(vp, io_off, 4617 (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); 4618 } else { 4619 pp = page_lookup_nowait(vp, io_off, 4620 (flags & B_FREE) ? SE_EXCL : SE_SHARED); 4621 } 4622 4623 if (pp != NULL && pvn_getdirty(pp, flags)) { 4624 int err; 4625 4626 /* 4627 * Found a dirty page to push 4628 */ 4629 err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); 4630 if (err) 4631 error = err; 4632 } else { 4633 io_len = PAGESIZE; 4634 } 4635 } 4636out: 4637 zfs_range_unlock(rl); 4638 if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4639 zil_commit(zfsvfs->z_log, zp->z_id); 4640 ZFS_EXIT(zfsvfs); 4641 return (error); 4642} 4643#endif /* sun */ 4644 4645/*ARGSUSED*/ 4646void 4647zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4648{ 4649 znode_t *zp = VTOZ(vp); 4650 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4651 int error; 4652 4653 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4654 if (zp->z_sa_hdl == NULL) { 4655 /* 4656 * The fs has been unmounted, or we did a 4657 * suspend/resume and this file no longer exists. 4658 */ 4659 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4660 vrecycle(vp); 4661 return; 4662 } 4663 4664 mutex_enter(&zp->z_lock); 4665 if (zp->z_unlinked) { 4666 /* 4667 * Fast path to recycle a vnode of a removed file. 4668 */ 4669 mutex_exit(&zp->z_lock); 4670 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4671 vrecycle(vp); 4672 return; 4673 } 4674 mutex_exit(&zp->z_lock); 4675 4676 if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4677 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4678 4679 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4680 zfs_sa_upgrade_txholds(tx, zp); 4681 error = dmu_tx_assign(tx, TXG_WAIT); 4682 if (error) { 4683 dmu_tx_abort(tx); 4684 } else { 4685 mutex_enter(&zp->z_lock); 4686 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 4687 (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4688 zp->z_atime_dirty = 0; 4689 mutex_exit(&zp->z_lock); 4690 dmu_tx_commit(tx); 4691 } 4692 } 4693 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4694} 4695 4696#ifdef sun 4697/* 4698 * Bounds-check the seek operation. 4699 * 4700 * IN: vp - vnode seeking within 4701 * ooff - old file offset 4702 * noffp - pointer to new file offset 4703 * ct - caller context 4704 * 4705 * RETURN: 0 on success, EINVAL if new offset invalid. 4706 */ 4707/* ARGSUSED */ 4708static int 4709zfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, 4710 caller_context_t *ct) 4711{ 4712 if (vp->v_type == VDIR) 4713 return (0); 4714 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 4715} 4716 4717/* 4718 * Pre-filter the generic locking function to trap attempts to place 4719 * a mandatory lock on a memory mapped file. 4720 */ 4721static int 4722zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 4723 flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 4724{ 4725 znode_t *zp = VTOZ(vp); 4726 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4727 4728 ZFS_ENTER(zfsvfs); 4729 ZFS_VERIFY_ZP(zp); 4730 4731 /* 4732 * We are following the UFS semantics with respect to mapcnt 4733 * here: If we see that the file is mapped already, then we will 4734 * return an error, but we don't worry about races between this 4735 * function and zfs_map(). 4736 */ 4737 if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) { 4738 ZFS_EXIT(zfsvfs); 4739 return (SET_ERROR(EAGAIN)); 4740 } 4741 ZFS_EXIT(zfsvfs); 4742 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 4743} 4744 4745/* 4746 * If we can't find a page in the cache, we will create a new page 4747 * and fill it with file data. For efficiency, we may try to fill 4748 * multiple pages at once (klustering) to fill up the supplied page 4749 * list. Note that the pages to be filled are held with an exclusive 4750 * lock to prevent access by other threads while they are being filled. 4751 */ 4752static int 4753zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, 4754 caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) 4755{ 4756 znode_t *zp = VTOZ(vp); 4757 page_t *pp, *cur_pp; 4758 objset_t *os = zp->z_zfsvfs->z_os; 4759 u_offset_t io_off, total; 4760 size_t io_len; 4761 int err; 4762 4763 if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { 4764 /* 4765 * We only have a single page, don't bother klustering 4766 */ 4767 io_off = off; 4768 io_len = PAGESIZE; 4769 pp = page_create_va(vp, io_off, io_len, 4770 PG_EXCL | PG_WAIT, seg, addr); 4771 } else { 4772 /* 4773 * Try to find enough pages to fill the page list 4774 */ 4775 pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 4776 &io_len, off, plsz, 0); 4777 } 4778 if (pp == NULL) { 4779 /* 4780 * The page already exists, nothing to do here. 4781 */ 4782 *pl = NULL; 4783 return (0); 4784 } 4785 4786 /* 4787 * Fill the pages in the kluster. 4788 */ 4789 cur_pp = pp; 4790 for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { 4791 caddr_t va; 4792 4793 ASSERT3U(io_off, ==, cur_pp->p_offset); 4794 va = zfs_map_page(cur_pp, S_WRITE); 4795 err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, 4796 DMU_READ_PREFETCH); 4797 zfs_unmap_page(cur_pp, va); 4798 if (err) { 4799 /* On error, toss the entire kluster */ 4800 pvn_read_done(pp, B_ERROR); 4801 /* convert checksum errors into IO errors */ 4802 if (err == ECKSUM) 4803 err = SET_ERROR(EIO); 4804 return (err); 4805 } 4806 cur_pp = cur_pp->p_next; 4807 } 4808 4809 /* 4810 * Fill in the page list array from the kluster starting 4811 * from the desired offset `off'. 4812 * NOTE: the page list will always be null terminated. 4813 */ 4814 pvn_plist_init(pp, pl, plsz, off, io_len, rw); 4815 ASSERT(pl == NULL || (*pl)->p_offset == off); 4816 4817 return (0); 4818} 4819 4820/* 4821 * Return pointers to the pages for the file region [off, off + len] 4822 * in the pl array. If plsz is greater than len, this function may 4823 * also return page pointers from after the specified region 4824 * (i.e. the region [off, off + plsz]). These additional pages are 4825 * only returned if they are already in the cache, or were created as 4826 * part of a klustered read. 4827 * 4828 * IN: vp - vnode of file to get data from. 4829 * off - position in file to get data from. 4830 * len - amount of data to retrieve. 4831 * plsz - length of provided page list. 4832 * seg - segment to obtain pages for. 4833 * addr - virtual address of fault. 4834 * rw - mode of created pages. 4835 * cr - credentials of caller. 4836 * ct - caller context. 4837 * 4838 * OUT: protp - protection mode of created pages. 4839 * pl - list of pages created. 4840 * 4841 * RETURN: 0 on success, error code on failure. 4842 * 4843 * Timestamps: 4844 * vp - atime updated 4845 */ 4846/* ARGSUSED */ 4847static int 4848zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 4849 page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 4850 enum seg_rw rw, cred_t *cr, caller_context_t *ct) 4851{ 4852 znode_t *zp = VTOZ(vp); 4853 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4854 page_t **pl0 = pl; 4855 int err = 0; 4856 4857 /* we do our own caching, faultahead is unnecessary */ 4858 if (pl == NULL) 4859 return (0); 4860 else if (len > plsz) 4861 len = plsz; 4862 else 4863 len = P2ROUNDUP(len, PAGESIZE); 4864 ASSERT(plsz >= len); 4865 4866 ZFS_ENTER(zfsvfs); 4867 ZFS_VERIFY_ZP(zp); 4868 4869 if (protp) 4870 *protp = PROT_ALL; 4871 4872 /* 4873 * Loop through the requested range [off, off + len) looking 4874 * for pages. If we don't find a page, we will need to create 4875 * a new page and fill it with data from the file. 4876 */ 4877 while (len > 0) { 4878 if (*pl = page_lookup(vp, off, SE_SHARED)) 4879 *(pl+1) = NULL; 4880 else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) 4881 goto out; 4882 while (*pl) { 4883 ASSERT3U((*pl)->p_offset, ==, off); 4884 off += PAGESIZE; 4885 addr += PAGESIZE; 4886 if (len > 0) { 4887 ASSERT3U(len, >=, PAGESIZE); 4888 len -= PAGESIZE; 4889 } 4890 ASSERT3U(plsz, >=, PAGESIZE); 4891 plsz -= PAGESIZE; 4892 pl++; 4893 } 4894 } 4895 4896 /* 4897 * Fill out the page array with any pages already in the cache. 4898 */ 4899 while (plsz > 0 && 4900 (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { 4901 off += PAGESIZE; 4902 plsz -= PAGESIZE; 4903 } 4904out: 4905 if (err) { 4906 /* 4907 * Release any pages we have previously locked. 4908 */ 4909 while (pl > pl0) 4910 page_unlock(*--pl); 4911 } else { 4912 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4913 } 4914 4915 *pl = NULL; 4916 4917 ZFS_EXIT(zfsvfs); 4918 return (err); 4919} 4920 4921/* 4922 * Request a memory map for a section of a file. This code interacts 4923 * with common code and the VM system as follows: 4924 * 4925 * - common code calls mmap(), which ends up in smmap_common() 4926 * - this calls VOP_MAP(), which takes you into (say) zfs 4927 * - zfs_map() calls as_map(), passing segvn_create() as the callback 4928 * - segvn_create() creates the new segment and calls VOP_ADDMAP() 4929 * - zfs_addmap() updates z_mapcnt 4930 */ 4931/*ARGSUSED*/ 4932static int 4933zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 4934 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4935 caller_context_t *ct) 4936{ 4937 znode_t *zp = VTOZ(vp); 4938 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4939 segvn_crargs_t vn_a; 4940 int error; 4941 4942 ZFS_ENTER(zfsvfs); 4943 ZFS_VERIFY_ZP(zp); 4944 4945 if ((prot & PROT_WRITE) && (zp->z_pflags & 4946 (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) { 4947 ZFS_EXIT(zfsvfs); 4948 return (SET_ERROR(EPERM)); 4949 } 4950 4951 if ((prot & (PROT_READ | PROT_EXEC)) && 4952 (zp->z_pflags & ZFS_AV_QUARANTINED)) { 4953 ZFS_EXIT(zfsvfs); 4954 return (SET_ERROR(EACCES)); 4955 } 4956 4957 if (vp->v_flag & VNOMAP) { 4958 ZFS_EXIT(zfsvfs); 4959 return (SET_ERROR(ENOSYS)); 4960 } 4961 4962 if (off < 0 || len > MAXOFFSET_T - off) { 4963 ZFS_EXIT(zfsvfs); 4964 return (SET_ERROR(ENXIO)); 4965 } 4966 4967 if (vp->v_type != VREG) { 4968 ZFS_EXIT(zfsvfs); 4969 return (SET_ERROR(ENODEV)); 4970 } 4971 4972 /* 4973 * If file is locked, disallow mapping. 4974 */ 4975 if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) { 4976 ZFS_EXIT(zfsvfs); 4977 return (SET_ERROR(EAGAIN)); 4978 } 4979 4980 as_rangelock(as); 4981 error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 4982 if (error != 0) { 4983 as_rangeunlock(as); 4984 ZFS_EXIT(zfsvfs); 4985 return (error); 4986 } 4987 4988 vn_a.vp = vp; 4989 vn_a.offset = (u_offset_t)off; 4990 vn_a.type = flags & MAP_TYPE; 4991 vn_a.prot = prot; 4992 vn_a.maxprot = maxprot; 4993 vn_a.cred = cr; 4994 vn_a.amp = NULL; 4995 vn_a.flags = flags & ~MAP_TYPE; 4996 vn_a.szc = 0; 4997 vn_a.lgrp_mem_policy_flags = 0; 4998 4999 error = as_map(as, *addrp, len, segvn_create, &vn_a); 5000 5001 as_rangeunlock(as); 5002 ZFS_EXIT(zfsvfs); 5003 return (error); 5004} 5005 5006/* ARGSUSED */ 5007static int 5008zfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 5009 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 5010 caller_context_t *ct) 5011{ 5012 uint64_t pages = btopr(len); 5013 5014 atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); 5015 return (0); 5016} 5017 5018/* 5019 * The reason we push dirty pages as part of zfs_delmap() is so that we get a 5020 * more accurate mtime for the associated file. Since we don't have a way of 5021 * detecting when the data was actually modified, we have to resort to 5022 * heuristics. If an explicit msync() is done, then we mark the mtime when the 5023 * last page is pushed. The problem occurs when the msync() call is omitted, 5024 * which by far the most common case: 5025 * 5026 * open() 5027 * mmap() 5028 * <modify memory> 5029 * munmap() 5030 * close() 5031 * <time lapse> 5032 * putpage() via fsflush 5033 * 5034 * If we wait until fsflush to come along, we can have a modification time that 5035 * is some arbitrary point in the future. In order to prevent this in the 5036 * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is 5037 * torn down. 5038 */ 5039/* ARGSUSED */ 5040static int 5041zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 5042 size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, 5043 caller_context_t *ct) 5044{ 5045 uint64_t pages = btopr(len); 5046 5047 ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); 5048 atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); 5049 5050 if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && 5051 vn_has_cached_data(vp)) 5052 (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); 5053 5054 return (0); 5055} 5056 5057/* 5058 * Free or allocate space in a file. Currently, this function only 5059 * supports the `F_FREESP' command. However, this command is somewhat 5060 * misnamed, as its functionality includes the ability to allocate as 5061 * well as free space. 5062 * 5063 * IN: vp - vnode of file to free data in. 5064 * cmd - action to take (only F_FREESP supported). 5065 * bfp - section of file to free/alloc. 5066 * flag - current file open mode flags. 5067 * offset - current file offset. 5068 * cr - credentials of caller [UNUSED]. 5069 * ct - caller context. 5070 * 5071 * RETURN: 0 on success, error code on failure. 5072 * 5073 * Timestamps: 5074 * vp - ctime|mtime updated 5075 */ 5076/* ARGSUSED */ 5077static int 5078zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, 5079 offset_t offset, cred_t *cr, caller_context_t *ct) 5080{ 5081 znode_t *zp = VTOZ(vp); 5082 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5083 uint64_t off, len; 5084 int error; 5085 5086 ZFS_ENTER(zfsvfs); 5087 ZFS_VERIFY_ZP(zp); 5088 5089 if (cmd != F_FREESP) { 5090 ZFS_EXIT(zfsvfs); 5091 return (SET_ERROR(EINVAL)); 5092 } 5093 5094 if (error = convoff(vp, bfp, 0, offset)) { 5095 ZFS_EXIT(zfsvfs); 5096 return (error); 5097 } 5098 5099 if (bfp->l_len < 0) { 5100 ZFS_EXIT(zfsvfs); 5101 return (SET_ERROR(EINVAL)); 5102 } 5103 5104 off = bfp->l_start; 5105 len = bfp->l_len; /* 0 means from off to end of file */ 5106 5107 error = zfs_freesp(zp, off, len, flag, TRUE); 5108 5109 ZFS_EXIT(zfsvfs); 5110 return (error); 5111} 5112#endif /* sun */ 5113 5114CTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 5115CTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 5116 5117/*ARGSUSED*/ 5118static int 5119zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 5120{ 5121 znode_t *zp = VTOZ(vp); 5122 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5123 uint32_t gen; 5124 uint64_t gen64; 5125 uint64_t object = zp->z_id; 5126 zfid_short_t *zfid; 5127 int size, i, error; 5128 5129 ZFS_ENTER(zfsvfs); 5130 ZFS_VERIFY_ZP(zp); 5131 5132 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 5133 &gen64, sizeof (uint64_t))) != 0) { 5134 ZFS_EXIT(zfsvfs); 5135 return (error); 5136 } 5137 5138 gen = (uint32_t)gen64; 5139 5140 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 5141 5142#ifdef illumos 5143 if (fidp->fid_len < size) { 5144 fidp->fid_len = size; 5145 ZFS_EXIT(zfsvfs); 5146 return (SET_ERROR(ENOSPC)); 5147 } 5148#else 5149 fidp->fid_len = size; 5150#endif 5151 5152 zfid = (zfid_short_t *)fidp; 5153 5154 zfid->zf_len = size; 5155 5156 for (i = 0; i < sizeof (zfid->zf_object); i++) 5157 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 5158 5159 /* Must have a non-zero generation number to distinguish from .zfs */ 5160 if (gen == 0) 5161 gen = 1; 5162 for (i = 0; i < sizeof (zfid->zf_gen); i++) 5163 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 5164 5165 if (size == LONG_FID_LEN) { 5166 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 5167 zfid_long_t *zlfid; 5168 5169 zlfid = (zfid_long_t *)fidp; 5170 5171 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 5172 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 5173 5174 /* XXX - this should be the generation number for the objset */ 5175 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 5176 zlfid->zf_setgen[i] = 0; 5177 } 5178 5179 ZFS_EXIT(zfsvfs); 5180 return (0); 5181} 5182 5183static int 5184zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 5185 caller_context_t *ct) 5186{ 5187 znode_t *zp, *xzp; 5188 zfsvfs_t *zfsvfs; 5189 zfs_dirlock_t *dl; 5190 int error; 5191 5192 switch (cmd) { 5193 case _PC_LINK_MAX: 5194 *valp = INT_MAX; 5195 return (0); 5196 5197 case _PC_FILESIZEBITS: 5198 *valp = 64; 5199 return (0); 5200#ifdef sun 5201 case _PC_XATTR_EXISTS: 5202 zp = VTOZ(vp); 5203 zfsvfs = zp->z_zfsvfs; 5204 ZFS_ENTER(zfsvfs); 5205 ZFS_VERIFY_ZP(zp); 5206 *valp = 0; 5207 error = zfs_dirent_lock(&dl, zp, "", &xzp, 5208 ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 5209 if (error == 0) { 5210 zfs_dirent_unlock(dl); 5211 if (!zfs_dirempty(xzp)) 5212 *valp = 1; 5213 VN_RELE(ZTOV(xzp)); 5214 } else if (error == ENOENT) { 5215 /* 5216 * If there aren't extended attributes, it's the 5217 * same as having zero of them. 5218 */ 5219 error = 0; 5220 } 5221 ZFS_EXIT(zfsvfs); 5222 return (error); 5223 5224 case _PC_SATTR_ENABLED: 5225 case _PC_SATTR_EXISTS: 5226 *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 5227 (vp->v_type == VREG || vp->v_type == VDIR); 5228 return (0); 5229 5230 case _PC_ACCESS_FILTERING: 5231 *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 5232 vp->v_type == VDIR; 5233 return (0); 5234 5235 case _PC_ACL_ENABLED: 5236 *valp = _ACL_ACE_ENABLED; 5237 return (0); 5238#endif /* sun */ 5239 case _PC_MIN_HOLE_SIZE: 5240 *valp = (int)SPA_MINBLOCKSIZE; 5241 return (0); 5242#ifdef sun 5243 case _PC_TIMESTAMP_RESOLUTION: 5244 /* nanosecond timestamp resolution */ 5245 *valp = 1L; 5246 return (0); 5247#endif /* sun */ 5248 case _PC_ACL_EXTENDED: 5249 *valp = 0; 5250 return (0); 5251 5252 case _PC_ACL_NFS4: 5253 *valp = 1; 5254 return (0); 5255 5256 case _PC_ACL_PATH_MAX: 5257 *valp = ACL_MAX_ENTRIES; 5258 return (0); 5259 5260 default: 5261 return (EOPNOTSUPP); 5262 } 5263} 5264 5265/*ARGSUSED*/ 5266static int 5267zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5268 caller_context_t *ct) 5269{ 5270 znode_t *zp = VTOZ(vp); 5271 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5272 int error; 5273 boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5274 5275 ZFS_ENTER(zfsvfs); 5276 ZFS_VERIFY_ZP(zp); 5277 error = zfs_getacl(zp, vsecp, skipaclchk, cr); 5278 ZFS_EXIT(zfsvfs); 5279 5280 return (error); 5281} 5282 5283/*ARGSUSED*/ 5284int 5285zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5286 caller_context_t *ct) 5287{ 5288 znode_t *zp = VTOZ(vp); 5289 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5290 int error; 5291 boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5292 zilog_t *zilog = zfsvfs->z_log; 5293 5294 ZFS_ENTER(zfsvfs); 5295 ZFS_VERIFY_ZP(zp); 5296 5297 error = zfs_setacl(zp, vsecp, skipaclchk, cr); 5298 5299 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5300 zil_commit(zilog, 0); 5301 5302 ZFS_EXIT(zfsvfs); 5303 return (error); 5304} 5305 5306#ifdef sun 5307/* 5308 * The smallest read we may consider to loan out an arcbuf. 5309 * This must be a power of 2. 5310 */ 5311int zcr_blksz_min = (1 << 10); /* 1K */ 5312/* 5313 * If set to less than the file block size, allow loaning out of an 5314 * arcbuf for a partial block read. This must be a power of 2. 5315 */ 5316int zcr_blksz_max = (1 << 17); /* 128K */ 5317 5318/*ARGSUSED*/ 5319static int 5320zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, 5321 caller_context_t *ct) 5322{ 5323 znode_t *zp = VTOZ(vp); 5324 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5325 int max_blksz = zfsvfs->z_max_blksz; 5326 uio_t *uio = &xuio->xu_uio; 5327 ssize_t size = uio->uio_resid; 5328 offset_t offset = uio->uio_loffset; 5329 int blksz; 5330 int fullblk, i; 5331 arc_buf_t *abuf; 5332 ssize_t maxsize; 5333 int preamble, postamble; 5334 5335 if (xuio->xu_type != UIOTYPE_ZEROCOPY) 5336 return (SET_ERROR(EINVAL)); 5337 5338 ZFS_ENTER(zfsvfs); 5339 ZFS_VERIFY_ZP(zp); 5340 switch (ioflag) { 5341 case UIO_WRITE: 5342 /* 5343 * Loan out an arc_buf for write if write size is bigger than 5344 * max_blksz, and the file's block size is also max_blksz. 5345 */ 5346 blksz = max_blksz; 5347 if (size < blksz || zp->z_blksz != blksz) { 5348 ZFS_EXIT(zfsvfs); 5349 return (SET_ERROR(EINVAL)); 5350 } 5351 /* 5352 * Caller requests buffers for write before knowing where the 5353 * write offset might be (e.g. NFS TCP write). 5354 */ 5355 if (offset == -1) { 5356 preamble = 0; 5357 } else { 5358 preamble = P2PHASE(offset, blksz); 5359 if (preamble) { 5360 preamble = blksz - preamble; 5361 size -= preamble; 5362 } 5363 } 5364 5365 postamble = P2PHASE(size, blksz); 5366 size -= postamble; 5367 5368 fullblk = size / blksz; 5369 (void) dmu_xuio_init(xuio, 5370 (preamble != 0) + fullblk + (postamble != 0)); 5371 DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble, 5372 int, postamble, int, 5373 (preamble != 0) + fullblk + (postamble != 0)); 5374 5375 /* 5376 * Have to fix iov base/len for partial buffers. They 5377 * currently represent full arc_buf's. 5378 */ 5379 if (preamble) { 5380 /* data begins in the middle of the arc_buf */ 5381 abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5382 blksz); 5383 ASSERT(abuf); 5384 (void) dmu_xuio_add(xuio, abuf, 5385 blksz - preamble, preamble); 5386 } 5387 5388 for (i = 0; i < fullblk; i++) { 5389 abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5390 blksz); 5391 ASSERT(abuf); 5392 (void) dmu_xuio_add(xuio, abuf, 0, blksz); 5393 } 5394 5395 if (postamble) { 5396 /* data ends in the middle of the arc_buf */ 5397 abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5398 blksz); 5399 ASSERT(abuf); 5400 (void) dmu_xuio_add(xuio, abuf, 0, postamble); 5401 } 5402 break; 5403 case UIO_READ: 5404 /* 5405 * Loan out an arc_buf for read if the read size is larger than 5406 * the current file block size. Block alignment is not 5407 * considered. Partial arc_buf will be loaned out for read. 5408 */ 5409 blksz = zp->z_blksz; 5410 if (blksz < zcr_blksz_min) 5411 blksz = zcr_blksz_min; 5412 if (blksz > zcr_blksz_max) 5413 blksz = zcr_blksz_max; 5414 /* avoid potential complexity of dealing with it */ 5415 if (blksz > max_blksz) { 5416 ZFS_EXIT(zfsvfs); 5417 return (SET_ERROR(EINVAL)); 5418 } 5419 5420 maxsize = zp->z_size - uio->uio_loffset; 5421 if (size > maxsize) 5422 size = maxsize; 5423 5424 if (size < blksz || vn_has_cached_data(vp)) { 5425 ZFS_EXIT(zfsvfs); 5426 return (SET_ERROR(EINVAL)); 5427 } 5428 break; 5429 default: 5430 ZFS_EXIT(zfsvfs); 5431 return (SET_ERROR(EINVAL)); 5432 } 5433 5434 uio->uio_extflg = UIO_XUIO; 5435 XUIO_XUZC_RW(xuio) = ioflag; 5436 ZFS_EXIT(zfsvfs); 5437 return (0); 5438} 5439 5440/*ARGSUSED*/ 5441static int 5442zfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) 5443{ 5444 int i; 5445 arc_buf_t *abuf; 5446 int ioflag = XUIO_XUZC_RW(xuio); 5447 5448 ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); 5449 5450 i = dmu_xuio_cnt(xuio); 5451 while (i-- > 0) { 5452 abuf = dmu_xuio_arcbuf(xuio, i); 5453 /* 5454 * if abuf == NULL, it must be a write buffer 5455 * that has been returned in zfs_write(). 5456 */ 5457 if (abuf) 5458 dmu_return_arcbuf(abuf); 5459 ASSERT(abuf || ioflag == UIO_WRITE); 5460 } 5461 5462 dmu_xuio_fini(xuio); 5463 return (0); 5464} 5465 5466/* 5467 * Predeclare these here so that the compiler assumes that 5468 * this is an "old style" function declaration that does 5469 * not include arguments => we won't get type mismatch errors 5470 * in the initializations that follow. 5471 */ 5472static int zfs_inval(); 5473static int zfs_isdir(); 5474 5475static int 5476zfs_inval() 5477{ 5478 return (SET_ERROR(EINVAL)); 5479} 5480 5481static int 5482zfs_isdir() 5483{ 5484 return (SET_ERROR(EISDIR)); 5485} 5486/* 5487 * Directory vnode operations template 5488 */ 5489vnodeops_t *zfs_dvnodeops; 5490const fs_operation_def_t zfs_dvnodeops_template[] = { 5491 VOPNAME_OPEN, { .vop_open = zfs_open }, 5492 VOPNAME_CLOSE, { .vop_close = zfs_close }, 5493 VOPNAME_READ, { .error = zfs_isdir }, 5494 VOPNAME_WRITE, { .error = zfs_isdir }, 5495 VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5496 VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5497 VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5498 VOPNAME_ACCESS, { .vop_access = zfs_access }, 5499 VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5500 VOPNAME_CREATE, { .vop_create = zfs_create }, 5501 VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5502 VOPNAME_LINK, { .vop_link = zfs_link }, 5503 VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5504 VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, 5505 VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5506 VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5507 VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, 5508 VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5509 VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5510 VOPNAME_FID, { .vop_fid = zfs_fid }, 5511 VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5512 VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5513 VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5514 VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5515 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5516 NULL, NULL 5517}; 5518 5519/* 5520 * Regular file vnode operations template 5521 */ 5522vnodeops_t *zfs_fvnodeops; 5523const fs_operation_def_t zfs_fvnodeops_template[] = { 5524 VOPNAME_OPEN, { .vop_open = zfs_open }, 5525 VOPNAME_CLOSE, { .vop_close = zfs_close }, 5526 VOPNAME_READ, { .vop_read = zfs_read }, 5527 VOPNAME_WRITE, { .vop_write = zfs_write }, 5528 VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5529 VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5530 VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5531 VOPNAME_ACCESS, { .vop_access = zfs_access }, 5532 VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5533 VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5534 VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5535 VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5536 VOPNAME_FID, { .vop_fid = zfs_fid }, 5537 VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5538 VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, 5539 VOPNAME_SPACE, { .vop_space = zfs_space }, 5540 VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, 5541 VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, 5542 VOPNAME_MAP, { .vop_map = zfs_map }, 5543 VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, 5544 VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, 5545 VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5546 VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5547 VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5548 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5549 VOPNAME_REQZCBUF, { .vop_reqzcbuf = zfs_reqzcbuf }, 5550 VOPNAME_RETZCBUF, { .vop_retzcbuf = zfs_retzcbuf }, 5551 NULL, NULL 5552}; 5553 5554/* 5555 * Symbolic link vnode operations template 5556 */ 5557vnodeops_t *zfs_symvnodeops; 5558const fs_operation_def_t zfs_symvnodeops_template[] = { 5559 VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5560 VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5561 VOPNAME_ACCESS, { .vop_access = zfs_access }, 5562 VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5563 VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, 5564 VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5565 VOPNAME_FID, { .vop_fid = zfs_fid }, 5566 VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5567 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5568 NULL, NULL 5569}; 5570 5571/* 5572 * special share hidden files vnode operations template 5573 */ 5574vnodeops_t *zfs_sharevnodeops; 5575const fs_operation_def_t zfs_sharevnodeops_template[] = { 5576 VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5577 VOPNAME_ACCESS, { .vop_access = zfs_access }, 5578 VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5579 VOPNAME_FID, { .vop_fid = zfs_fid }, 5580 VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5581 VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5582 VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5583 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5584 NULL, NULL 5585}; 5586 5587/* 5588 * Extended attribute directory vnode operations template 5589 * 5590 * This template is identical to the directory vnodes 5591 * operation template except for restricted operations: 5592 * VOP_MKDIR() 5593 * VOP_SYMLINK() 5594 * 5595 * Note that there are other restrictions embedded in: 5596 * zfs_create() - restrict type to VREG 5597 * zfs_link() - no links into/out of attribute space 5598 * zfs_rename() - no moves into/out of attribute space 5599 */ 5600vnodeops_t *zfs_xdvnodeops; 5601const fs_operation_def_t zfs_xdvnodeops_template[] = { 5602 VOPNAME_OPEN, { .vop_open = zfs_open }, 5603 VOPNAME_CLOSE, { .vop_close = zfs_close }, 5604 VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5605 VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5606 VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5607 VOPNAME_ACCESS, { .vop_access = zfs_access }, 5608 VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5609 VOPNAME_CREATE, { .vop_create = zfs_create }, 5610 VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5611 VOPNAME_LINK, { .vop_link = zfs_link }, 5612 VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5613 VOPNAME_MKDIR, { .error = zfs_inval }, 5614 VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5615 VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5616 VOPNAME_SYMLINK, { .error = zfs_inval }, 5617 VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5618 VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5619 VOPNAME_FID, { .vop_fid = zfs_fid }, 5620 VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5621 VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5622 VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5623 VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5624 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5625 NULL, NULL 5626}; 5627 5628/* 5629 * Error vnode operations template 5630 */ 5631vnodeops_t *zfs_evnodeops; 5632const fs_operation_def_t zfs_evnodeops_template[] = { 5633 VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5634 VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5635 NULL, NULL 5636}; 5637#endif /* sun */ 5638 5639static int 5640ioflags(int ioflags) 5641{ 5642 int flags = 0; 5643 5644 if (ioflags & IO_APPEND) 5645 flags |= FAPPEND; 5646 if (ioflags & IO_NDELAY) 5647 flags |= FNONBLOCK; 5648 if (ioflags & IO_SYNC) 5649 flags |= (FSYNC | FDSYNC | FRSYNC); 5650 5651 return (flags); 5652} 5653 5654static int 5655zfs_getpages(struct vnode *vp, vm_page_t *m, int count, int reqpage) 5656{ 5657 znode_t *zp = VTOZ(vp); 5658 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5659 objset_t *os = zp->z_zfsvfs->z_os; 5660 vm_page_t mfirst, mlast, mreq; 5661 vm_object_t object; 5662 caddr_t va; 5663 struct sf_buf *sf; 5664 off_t startoff, endoff; 5665 int i, error; 5666 vm_pindex_t reqstart, reqend; 5667 int pcount, lsize, reqsize, size; 5668 5669 ZFS_ENTER(zfsvfs); 5670 ZFS_VERIFY_ZP(zp); 5671 5672 pcount = OFF_TO_IDX(round_page(count)); 5673 mreq = m[reqpage]; 5674 object = mreq->object; 5675 error = 0; 5676 5677 KASSERT(vp->v_object == object, ("mismatching object")); 5678 5679 if (pcount > 1 && zp->z_blksz > PAGESIZE) { 5680 startoff = rounddown(IDX_TO_OFF(mreq->pindex), zp->z_blksz); 5681 reqstart = OFF_TO_IDX(round_page(startoff)); 5682 if (reqstart < m[0]->pindex) 5683 reqstart = 0; 5684 else 5685 reqstart = reqstart - m[0]->pindex; 5686 endoff = roundup(IDX_TO_OFF(mreq->pindex) + PAGE_SIZE, 5687 zp->z_blksz); 5688 reqend = OFF_TO_IDX(trunc_page(endoff)) - 1; 5689 if (reqend > m[pcount - 1]->pindex) 5690 reqend = m[pcount - 1]->pindex; 5691 reqsize = reqend - m[reqstart]->pindex + 1; 5692 KASSERT(reqstart <= reqpage && reqpage < reqstart + reqsize, 5693 ("reqpage beyond [reqstart, reqstart + reqsize[ bounds")); 5694 } else { 5695 reqstart = reqpage; 5696 reqsize = 1; 5697 } 5698 mfirst = m[reqstart]; 5699 mlast = m[reqstart + reqsize - 1]; 5700 5701 zfs_vmobject_wlock(object); 5702 5703 for (i = 0; i < reqstart; i++) { 5704 vm_page_lock(m[i]); 5705 vm_page_free(m[i]); 5706 vm_page_unlock(m[i]); 5707 } 5708 for (i = reqstart + reqsize; i < pcount; i++) { 5709 vm_page_lock(m[i]); 5710 vm_page_free(m[i]); 5711 vm_page_unlock(m[i]); 5712 } 5713 5714 if (mreq->valid && reqsize == 1) { 5715 if (mreq->valid != VM_PAGE_BITS_ALL) 5716 vm_page_zero_invalid(mreq, TRUE); 5717 zfs_vmobject_wunlock(object); 5718 ZFS_EXIT(zfsvfs); 5719 return (zfs_vm_pagerret_ok); 5720 } 5721 5722 PCPU_INC(cnt.v_vnodein); 5723 PCPU_ADD(cnt.v_vnodepgsin, reqsize); 5724 5725 if (IDX_TO_OFF(mreq->pindex) >= object->un_pager.vnp.vnp_size) { 5726 for (i = reqstart; i < reqstart + reqsize; i++) { 5727 if (i != reqpage) { 5728 vm_page_lock(m[i]); 5729 vm_page_free(m[i]); 5730 vm_page_unlock(m[i]); 5731 } 5732 } 5733 zfs_vmobject_wunlock(object); 5734 ZFS_EXIT(zfsvfs); 5735 return (zfs_vm_pagerret_bad); 5736 } 5737 5738 lsize = PAGE_SIZE; 5739 if (IDX_TO_OFF(mlast->pindex) + lsize > object->un_pager.vnp.vnp_size) 5740 lsize = object->un_pager.vnp.vnp_size - IDX_TO_OFF(mlast->pindex); 5741 5742 zfs_vmobject_wunlock(object); 5743 5744 for (i = reqstart; i < reqstart + reqsize; i++) { 5745 size = PAGE_SIZE; 5746 if (i == (reqstart + reqsize - 1)) 5747 size = lsize; 5748 va = zfs_map_page(m[i], &sf); 5749 error = dmu_read(os, zp->z_id, IDX_TO_OFF(m[i]->pindex), 5750 size, va, DMU_READ_PREFETCH); 5751 if (size != PAGE_SIZE) 5752 bzero(va + size, PAGE_SIZE - size); 5753 zfs_unmap_page(sf); 5754 if (error != 0) 5755 break; 5756 } 5757 5758 zfs_vmobject_wlock(object); 5759 5760 for (i = reqstart; i < reqstart + reqsize; i++) { 5761 if (!error) 5762 m[i]->valid = VM_PAGE_BITS_ALL; 5763 KASSERT(m[i]->dirty == 0, ("zfs_getpages: page %p is dirty", m[i])); 5764 if (i != reqpage) 5765 vm_page_readahead_finish(m[i]); 5766 } 5767 5768 zfs_vmobject_wunlock(object); 5769 5770 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 5771 ZFS_EXIT(zfsvfs); 5772 return (error ? zfs_vm_pagerret_error : zfs_vm_pagerret_ok); 5773} 5774 5775static int 5776zfs_freebsd_getpages(ap) 5777 struct vop_getpages_args /* { 5778 struct vnode *a_vp; 5779 vm_page_t *a_m; 5780 int a_count; 5781 int a_reqpage; 5782 vm_ooffset_t a_offset; 5783 } */ *ap; 5784{ 5785 5786 return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage)); 5787} 5788 5789static int 5790zfs_freebsd_bmap(ap) 5791 struct vop_bmap_args /* { 5792 struct vnode *a_vp; 5793 daddr_t a_bn; 5794 struct bufobj **a_bop; 5795 daddr_t *a_bnp; 5796 int *a_runp; 5797 int *a_runb; 5798 } */ *ap; 5799{ 5800 5801 if (ap->a_bop != NULL) 5802 *ap->a_bop = &ap->a_vp->v_bufobj; 5803 if (ap->a_bnp != NULL) 5804 *ap->a_bnp = ap->a_bn; 5805 if (ap->a_runp != NULL) 5806 *ap->a_runp = 0; 5807 if (ap->a_runb != NULL) 5808 *ap->a_runb = 0; 5809 5810 return (0); 5811} 5812 5813static int 5814zfs_freebsd_open(ap) 5815 struct vop_open_args /* { 5816 struct vnode *a_vp; 5817 int a_mode; 5818 struct ucred *a_cred; 5819 struct thread *a_td; 5820 } */ *ap; 5821{ 5822 vnode_t *vp = ap->a_vp; 5823 znode_t *zp = VTOZ(vp); 5824 int error; 5825 5826 error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); 5827 if (error == 0) 5828 vnode_create_vobject(vp, zp->z_size, ap->a_td); 5829 return (error); 5830} 5831 5832static int 5833zfs_freebsd_close(ap) 5834 struct vop_close_args /* { 5835 struct vnode *a_vp; 5836 int a_fflag; 5837 struct ucred *a_cred; 5838 struct thread *a_td; 5839 } */ *ap; 5840{ 5841 5842 return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred, NULL)); 5843} 5844 5845static int 5846zfs_freebsd_ioctl(ap) 5847 struct vop_ioctl_args /* { 5848 struct vnode *a_vp; 5849 u_long a_command; 5850 caddr_t a_data; 5851 int a_fflag; 5852 struct ucred *cred; 5853 struct thread *td; 5854 } */ *ap; 5855{ 5856 5857 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 5858 ap->a_fflag, ap->a_cred, NULL, NULL)); 5859} 5860 5861static int 5862zfs_freebsd_read(ap) 5863 struct vop_read_args /* { 5864 struct vnode *a_vp; 5865 struct uio *a_uio; 5866 int a_ioflag; 5867 struct ucred *a_cred; 5868 } */ *ap; 5869{ 5870 5871 return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 5872 ap->a_cred, NULL)); 5873} 5874 5875static int 5876zfs_freebsd_write(ap) 5877 struct vop_write_args /* { 5878 struct vnode *a_vp; 5879 struct uio *a_uio; 5880 int a_ioflag; 5881 struct ucred *a_cred; 5882 } */ *ap; 5883{ 5884 5885 return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 5886 ap->a_cred, NULL)); 5887} 5888 5889static int 5890zfs_freebsd_access(ap) 5891 struct vop_access_args /* { 5892 struct vnode *a_vp; 5893 accmode_t a_accmode; 5894 struct ucred *a_cred; 5895 struct thread *a_td; 5896 } */ *ap; 5897{ 5898 vnode_t *vp = ap->a_vp; 5899 znode_t *zp = VTOZ(vp); 5900 accmode_t accmode; 5901 int error = 0; 5902 5903 /* 5904 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 5905 */ 5906 accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 5907 if (accmode != 0) 5908 error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL); 5909 5910 /* 5911 * VADMIN has to be handled by vaccess(). 5912 */ 5913 if (error == 0) { 5914 accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 5915 if (accmode != 0) { 5916 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 5917 zp->z_gid, accmode, ap->a_cred, NULL); 5918 } 5919 } 5920 5921 /* 5922 * For VEXEC, ensure that at least one execute bit is set for 5923 * non-directories. 5924 */ 5925 if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 5926 (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 5927 error = EACCES; 5928 } 5929 5930 return (error); 5931} 5932 5933static int 5934zfs_freebsd_lookup(ap) 5935 struct vop_lookup_args /* { 5936 struct vnode *a_dvp; 5937 struct vnode **a_vpp; 5938 struct componentname *a_cnp; 5939 } */ *ap; 5940{ 5941 struct componentname *cnp = ap->a_cnp; 5942 char nm[NAME_MAX + 1]; 5943 5944 ASSERT(cnp->cn_namelen < sizeof(nm)); 5945 strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 5946 5947 return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 5948 cnp->cn_cred, cnp->cn_thread, 0)); 5949} 5950 5951static int 5952zfs_freebsd_create(ap) 5953 struct vop_create_args /* { 5954 struct vnode *a_dvp; 5955 struct vnode **a_vpp; 5956 struct componentname *a_cnp; 5957 struct vattr *a_vap; 5958 } */ *ap; 5959{ 5960 struct componentname *cnp = ap->a_cnp; 5961 vattr_t *vap = ap->a_vap; 5962 int mode; 5963 5964 ASSERT(cnp->cn_flags & SAVENAME); 5965 5966 vattr_init_mask(vap); 5967 mode = vap->va_mode & ALLPERMS; 5968 5969 return (zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 5970 ap->a_vpp, cnp->cn_cred, cnp->cn_thread)); 5971} 5972 5973static int 5974zfs_freebsd_remove(ap) 5975 struct vop_remove_args /* { 5976 struct vnode *a_dvp; 5977 struct vnode *a_vp; 5978 struct componentname *a_cnp; 5979 } */ *ap; 5980{ 5981 5982 ASSERT(ap->a_cnp->cn_flags & SAVENAME); 5983 5984 return (zfs_remove(ap->a_dvp, ap->a_cnp->cn_nameptr, 5985 ap->a_cnp->cn_cred, NULL, 0)); 5986} 5987 5988static int 5989zfs_freebsd_mkdir(ap) 5990 struct vop_mkdir_args /* { 5991 struct vnode *a_dvp; 5992 struct vnode **a_vpp; 5993 struct componentname *a_cnp; 5994 struct vattr *a_vap; 5995 } */ *ap; 5996{ 5997 vattr_t *vap = ap->a_vap; 5998 5999 ASSERT(ap->a_cnp->cn_flags & SAVENAME); 6000 6001 vattr_init_mask(vap); 6002 6003 return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 6004 ap->a_cnp->cn_cred, NULL, 0, NULL)); 6005} 6006 6007static int 6008zfs_freebsd_rmdir(ap) 6009 struct vop_rmdir_args /* { 6010 struct vnode *a_dvp; 6011 struct vnode *a_vp; 6012 struct componentname *a_cnp; 6013 } */ *ap; 6014{ 6015 struct componentname *cnp = ap->a_cnp; 6016 6017 ASSERT(cnp->cn_flags & SAVENAME); 6018 6019 return (zfs_rmdir(ap->a_dvp, cnp->cn_nameptr, NULL, cnp->cn_cred, NULL, 0)); 6020} 6021 6022static int 6023zfs_freebsd_readdir(ap) 6024 struct vop_readdir_args /* { 6025 struct vnode *a_vp; 6026 struct uio *a_uio; 6027 struct ucred *a_cred; 6028 int *a_eofflag; 6029 int *a_ncookies; 6030 u_long **a_cookies; 6031 } */ *ap; 6032{ 6033 6034 return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 6035 ap->a_ncookies, ap->a_cookies)); 6036} 6037 6038static int 6039zfs_freebsd_fsync(ap) 6040 struct vop_fsync_args /* { 6041 struct vnode *a_vp; 6042 int a_waitfor; 6043 struct thread *a_td; 6044 } */ *ap; 6045{ 6046 6047 vop_stdfsync(ap); 6048 return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); 6049} 6050 6051static int 6052zfs_freebsd_getattr(ap) 6053 struct vop_getattr_args /* { 6054 struct vnode *a_vp; 6055 struct vattr *a_vap; 6056 struct ucred *a_cred; 6057 } */ *ap; 6058{ 6059 vattr_t *vap = ap->a_vap; 6060 xvattr_t xvap; 6061 u_long fflags = 0; 6062 int error; 6063 6064 xva_init(&xvap); 6065 xvap.xva_vattr = *vap; 6066 xvap.xva_vattr.va_mask |= AT_XVATTR; 6067 6068 /* Convert chflags into ZFS-type flags. */ 6069 /* XXX: what about SF_SETTABLE?. */ 6070 XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 6071 XVA_SET_REQ(&xvap, XAT_APPENDONLY); 6072 XVA_SET_REQ(&xvap, XAT_NOUNLINK); 6073 XVA_SET_REQ(&xvap, XAT_NODUMP); 6074 XVA_SET_REQ(&xvap, XAT_READONLY); 6075 XVA_SET_REQ(&xvap, XAT_ARCHIVE); 6076 XVA_SET_REQ(&xvap, XAT_SYSTEM); 6077 XVA_SET_REQ(&xvap, XAT_HIDDEN); 6078 XVA_SET_REQ(&xvap, XAT_REPARSE); 6079 XVA_SET_REQ(&xvap, XAT_OFFLINE); 6080 XVA_SET_REQ(&xvap, XAT_SPARSE); 6081 6082 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 6083 if (error != 0) 6084 return (error); 6085 6086 /* Convert ZFS xattr into chflags. */ 6087#define FLAG_CHECK(fflag, xflag, xfield) do { \ 6088 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 6089 fflags |= (fflag); \ 6090} while (0) 6091 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 6092 xvap.xva_xoptattrs.xoa_immutable); 6093 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 6094 xvap.xva_xoptattrs.xoa_appendonly); 6095 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 6096 xvap.xva_xoptattrs.xoa_nounlink); 6097 FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE, 6098 xvap.xva_xoptattrs.xoa_archive); 6099 FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 6100 xvap.xva_xoptattrs.xoa_nodump); 6101 FLAG_CHECK(UF_READONLY, XAT_READONLY, 6102 xvap.xva_xoptattrs.xoa_readonly); 6103 FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM, 6104 xvap.xva_xoptattrs.xoa_system); 6105 FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN, 6106 xvap.xva_xoptattrs.xoa_hidden); 6107 FLAG_CHECK(UF_REPARSE, XAT_REPARSE, 6108 xvap.xva_xoptattrs.xoa_reparse); 6109 FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE, 6110 xvap.xva_xoptattrs.xoa_offline); 6111 FLAG_CHECK(UF_SPARSE, XAT_SPARSE, 6112 xvap.xva_xoptattrs.xoa_sparse); 6113 6114#undef FLAG_CHECK 6115 *vap = xvap.xva_vattr; 6116 vap->va_flags = fflags; 6117 return (0); 6118} 6119 6120static int 6121zfs_freebsd_setattr(ap) 6122 struct vop_setattr_args /* { 6123 struct vnode *a_vp; 6124 struct vattr *a_vap; 6125 struct ucred *a_cred; 6126 } */ *ap; 6127{ 6128 vnode_t *vp = ap->a_vp; 6129 vattr_t *vap = ap->a_vap; 6130 cred_t *cred = ap->a_cred; 6131 xvattr_t xvap; 6132 u_long fflags; 6133 uint64_t zflags; 6134 6135 vattr_init_mask(vap); 6136 vap->va_mask &= ~AT_NOSET; 6137 6138 xva_init(&xvap); 6139 xvap.xva_vattr = *vap; 6140 6141 zflags = VTOZ(vp)->z_pflags; 6142 6143 if (vap->va_flags != VNOVAL) { 6144 zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 6145 int error; 6146 6147 if (zfsvfs->z_use_fuids == B_FALSE) 6148 return (EOPNOTSUPP); 6149 6150 fflags = vap->va_flags; 6151 /* 6152 * XXX KDM 6153 * We need to figure out whether it makes sense to allow 6154 * UF_REPARSE through, since we don't really have other 6155 * facilities to handle reparse points and zfs_setattr() 6156 * doesn't currently allow setting that attribute anyway. 6157 */ 6158 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE| 6159 UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE| 6160 UF_OFFLINE|UF_SPARSE)) != 0) 6161 return (EOPNOTSUPP); 6162 /* 6163 * Unprivileged processes are not permitted to unset system 6164 * flags, or modify flags if any system flags are set. 6165 * Privileged non-jail processes may not modify system flags 6166 * if securelevel > 0 and any existing system flags are set. 6167 * Privileged jail processes behave like privileged non-jail 6168 * processes if the security.jail.chflags_allowed sysctl is 6169 * is non-zero; otherwise, they behave like unprivileged 6170 * processes. 6171 */ 6172 if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 6173 priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) { 6174 if (zflags & 6175 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 6176 error = securelevel_gt(cred, 0); 6177 if (error != 0) 6178 return (error); 6179 } 6180 } else { 6181 /* 6182 * Callers may only modify the file flags on objects they 6183 * have VADMIN rights for. 6184 */ 6185 if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0) 6186 return (error); 6187 if (zflags & 6188 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 6189 return (EPERM); 6190 } 6191 if (fflags & 6192 (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 6193 return (EPERM); 6194 } 6195 } 6196 6197#define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 6198 if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 6199 ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 6200 XVA_SET_REQ(&xvap, (xflag)); \ 6201 (xfield) = ((fflags & (fflag)) != 0); \ 6202 } \ 6203} while (0) 6204 /* Convert chflags into ZFS-type flags. */ 6205 /* XXX: what about SF_SETTABLE?. */ 6206 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 6207 xvap.xva_xoptattrs.xoa_immutable); 6208 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 6209 xvap.xva_xoptattrs.xoa_appendonly); 6210 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 6211 xvap.xva_xoptattrs.xoa_nounlink); 6212 FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE, 6213 xvap.xva_xoptattrs.xoa_archive); 6214 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 6215 xvap.xva_xoptattrs.xoa_nodump); 6216 FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY, 6217 xvap.xva_xoptattrs.xoa_readonly); 6218 FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM, 6219 xvap.xva_xoptattrs.xoa_system); 6220 FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN, 6221 xvap.xva_xoptattrs.xoa_hidden); 6222 FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE, 6223 xvap.xva_xoptattrs.xoa_hidden); 6224 FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE, 6225 xvap.xva_xoptattrs.xoa_offline); 6226 FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE, 6227 xvap.xva_xoptattrs.xoa_sparse); 6228#undef FLAG_CHANGE 6229 } 6230 return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL)); 6231} 6232 6233static int 6234zfs_freebsd_rename(ap) 6235 struct vop_rename_args /* { 6236 struct vnode *a_fdvp; 6237 struct vnode *a_fvp; 6238 struct componentname *a_fcnp; 6239 struct vnode *a_tdvp; 6240 struct vnode *a_tvp; 6241 struct componentname *a_tcnp; 6242 } */ *ap; 6243{ 6244 vnode_t *fdvp = ap->a_fdvp; 6245 vnode_t *fvp = ap->a_fvp; 6246 vnode_t *tdvp = ap->a_tdvp; 6247 vnode_t *tvp = ap->a_tvp; 6248 int error; 6249 6250 ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 6251 ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 6252 6253 if (fdvp->v_mount == tdvp->v_mount) 6254 error = zfs_rename(fdvp, ap->a_fcnp->cn_nameptr, tdvp, 6255 ap->a_tcnp->cn_nameptr, ap->a_fcnp->cn_cred, NULL, 0); 6256 else 6257 error = EXDEV; 6258 6259 if (tdvp == tvp) 6260 VN_RELE(tdvp); 6261 else 6262 VN_URELE(tdvp); 6263 if (tvp) 6264 VN_URELE(tvp); 6265 VN_RELE(fdvp); 6266 VN_RELE(fvp); 6267 6268 return (error); 6269} 6270 6271static int 6272zfs_freebsd_symlink(ap) 6273 struct vop_symlink_args /* { 6274 struct vnode *a_dvp; 6275 struct vnode **a_vpp; 6276 struct componentname *a_cnp; 6277 struct vattr *a_vap; 6278 char *a_target; 6279 } */ *ap; 6280{ 6281 struct componentname *cnp = ap->a_cnp; 6282 vattr_t *vap = ap->a_vap; 6283 6284 ASSERT(cnp->cn_flags & SAVENAME); 6285 6286 vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 6287 vattr_init_mask(vap); 6288 6289 return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 6290 ap->a_target, cnp->cn_cred, cnp->cn_thread)); 6291} 6292 6293static int 6294zfs_freebsd_readlink(ap) 6295 struct vop_readlink_args /* { 6296 struct vnode *a_vp; 6297 struct uio *a_uio; 6298 struct ucred *a_cred; 6299 } */ *ap; 6300{ 6301 6302 return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 6303} 6304 6305static int 6306zfs_freebsd_link(ap) 6307 struct vop_link_args /* { 6308 struct vnode *a_tdvp; 6309 struct vnode *a_vp; 6310 struct componentname *a_cnp; 6311 } */ *ap; 6312{ 6313 struct componentname *cnp = ap->a_cnp; 6314 vnode_t *vp = ap->a_vp; 6315 vnode_t *tdvp = ap->a_tdvp; 6316 6317 if (tdvp->v_mount != vp->v_mount) 6318 return (EXDEV); 6319 6320 ASSERT(cnp->cn_flags & SAVENAME); 6321 6322 return (zfs_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); 6323} 6324 6325static int 6326zfs_freebsd_inactive(ap) 6327 struct vop_inactive_args /* { 6328 struct vnode *a_vp; 6329 struct thread *a_td; 6330 } */ *ap; 6331{ 6332 vnode_t *vp = ap->a_vp; 6333 6334 zfs_inactive(vp, ap->a_td->td_ucred, NULL); 6335 return (0); 6336} 6337 6338static int 6339zfs_freebsd_reclaim(ap) 6340 struct vop_reclaim_args /* { 6341 struct vnode *a_vp; 6342 struct thread *a_td; 6343 } */ *ap; 6344{ 6345 vnode_t *vp = ap->a_vp; 6346 znode_t *zp = VTOZ(vp); 6347 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6348 6349 ASSERT(zp != NULL); 6350 6351 /* Destroy the vm object and flush associated pages. */ 6352 vnode_destroy_vobject(vp); 6353 6354 /* 6355 * z_teardown_inactive_lock protects from a race with 6356 * zfs_znode_dmu_fini in zfsvfs_teardown during 6357 * force unmount. 6358 */ 6359 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 6360 if (zp->z_sa_hdl == NULL) 6361 zfs_znode_free(zp); 6362 else 6363 zfs_zinactive(zp); 6364 rw_exit(&zfsvfs->z_teardown_inactive_lock); 6365 6366 vp->v_data = NULL; 6367 return (0); 6368} 6369 6370static int 6371zfs_freebsd_fid(ap) 6372 struct vop_fid_args /* { 6373 struct vnode *a_vp; 6374 struct fid *a_fid; 6375 } */ *ap; 6376{ 6377 6378 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 6379} 6380 6381static int 6382zfs_freebsd_pathconf(ap) 6383 struct vop_pathconf_args /* { 6384 struct vnode *a_vp; 6385 int a_name; 6386 register_t *a_retval; 6387 } */ *ap; 6388{ 6389 ulong_t val; 6390 int error; 6391 6392 error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); 6393 if (error == 0) 6394 *ap->a_retval = val; 6395 else if (error == EOPNOTSUPP) 6396 error = vop_stdpathconf(ap); 6397 return (error); 6398} 6399 6400static int 6401zfs_freebsd_fifo_pathconf(ap) 6402 struct vop_pathconf_args /* { 6403 struct vnode *a_vp; 6404 int a_name; 6405 register_t *a_retval; 6406 } */ *ap; 6407{ 6408 6409 switch (ap->a_name) { 6410 case _PC_ACL_EXTENDED: 6411 case _PC_ACL_NFS4: 6412 case _PC_ACL_PATH_MAX: 6413 case _PC_MAC_PRESENT: 6414 return (zfs_freebsd_pathconf(ap)); 6415 default: 6416 return (fifo_specops.vop_pathconf(ap)); 6417 } 6418} 6419 6420/* 6421 * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 6422 * extended attribute name: 6423 * 6424 * NAMESPACE PREFIX 6425 * system freebsd:system: 6426 * user (none, can be used to access ZFS fsattr(5) attributes 6427 * created on Solaris) 6428 */ 6429static int 6430zfs_create_attrname(int attrnamespace, const char *name, char *attrname, 6431 size_t size) 6432{ 6433 const char *namespace, *prefix, *suffix; 6434 6435 /* We don't allow '/' character in attribute name. */ 6436 if (strchr(name, '/') != NULL) 6437 return (EINVAL); 6438 /* We don't allow attribute names that start with "freebsd:" string. */ 6439 if (strncmp(name, "freebsd:", 8) == 0) 6440 return (EINVAL); 6441 6442 bzero(attrname, size); 6443 6444 switch (attrnamespace) { 6445 case EXTATTR_NAMESPACE_USER: 6446#if 0 6447 prefix = "freebsd:"; 6448 namespace = EXTATTR_NAMESPACE_USER_STRING; 6449 suffix = ":"; 6450#else 6451 /* 6452 * This is the default namespace by which we can access all 6453 * attributes created on Solaris. 6454 */ 6455 prefix = namespace = suffix = ""; 6456#endif 6457 break; 6458 case EXTATTR_NAMESPACE_SYSTEM: 6459 prefix = "freebsd:"; 6460 namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 6461 suffix = ":"; 6462 break; 6463 case EXTATTR_NAMESPACE_EMPTY: 6464 default: 6465 return (EINVAL); 6466 } 6467 if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 6468 name) >= size) { 6469 return (ENAMETOOLONG); 6470 } 6471 return (0); 6472} 6473 6474/* 6475 * Vnode operating to retrieve a named extended attribute. 6476 */ 6477static int 6478zfs_getextattr(struct vop_getextattr_args *ap) 6479/* 6480vop_getextattr { 6481 IN struct vnode *a_vp; 6482 IN int a_attrnamespace; 6483 IN const char *a_name; 6484 INOUT struct uio *a_uio; 6485 OUT size_t *a_size; 6486 IN struct ucred *a_cred; 6487 IN struct thread *a_td; 6488}; 6489*/ 6490{ 6491 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6492 struct thread *td = ap->a_td; 6493 struct nameidata nd; 6494 char attrname[255]; 6495 struct vattr va; 6496 vnode_t *xvp = NULL, *vp; 6497 int error, flags; 6498 6499 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6500 ap->a_cred, ap->a_td, VREAD); 6501 if (error != 0) 6502 return (error); 6503 6504 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6505 sizeof(attrname)); 6506 if (error != 0) 6507 return (error); 6508 6509 ZFS_ENTER(zfsvfs); 6510 6511 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6512 LOOKUP_XATTR); 6513 if (error != 0) { 6514 ZFS_EXIT(zfsvfs); 6515 return (error); 6516 } 6517 6518 flags = FREAD; 6519 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 6520 xvp, td); 6521 error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL); 6522 vp = nd.ni_vp; 6523 NDFREE(&nd, NDF_ONLY_PNBUF); 6524 if (error != 0) { 6525 ZFS_EXIT(zfsvfs); 6526 if (error == ENOENT) 6527 error = ENOATTR; 6528 return (error); 6529 } 6530 6531 if (ap->a_size != NULL) { 6532 error = VOP_GETATTR(vp, &va, ap->a_cred); 6533 if (error == 0) 6534 *ap->a_size = (size_t)va.va_size; 6535 } else if (ap->a_uio != NULL) 6536 error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 6537 6538 VOP_UNLOCK(vp, 0); 6539 vn_close(vp, flags, ap->a_cred, td); 6540 ZFS_EXIT(zfsvfs); 6541 6542 return (error); 6543} 6544 6545/* 6546 * Vnode operation to remove a named attribute. 6547 */ 6548int 6549zfs_deleteextattr(struct vop_deleteextattr_args *ap) 6550/* 6551vop_deleteextattr { 6552 IN struct vnode *a_vp; 6553 IN int a_attrnamespace; 6554 IN const char *a_name; 6555 IN struct ucred *a_cred; 6556 IN struct thread *a_td; 6557}; 6558*/ 6559{ 6560 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6561 struct thread *td = ap->a_td; 6562 struct nameidata nd; 6563 char attrname[255]; 6564 struct vattr va; 6565 vnode_t *xvp = NULL, *vp; 6566 int error, flags; 6567 6568 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6569 ap->a_cred, ap->a_td, VWRITE); 6570 if (error != 0) 6571 return (error); 6572 6573 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6574 sizeof(attrname)); 6575 if (error != 0) 6576 return (error); 6577 6578 ZFS_ENTER(zfsvfs); 6579 6580 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6581 LOOKUP_XATTR); 6582 if (error != 0) { 6583 ZFS_EXIT(zfsvfs); 6584 return (error); 6585 } 6586 6587 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 6588 UIO_SYSSPACE, attrname, xvp, td); 6589 error = namei(&nd); 6590 vp = nd.ni_vp; 6591 NDFREE(&nd, NDF_ONLY_PNBUF); 6592 if (error != 0) { 6593 ZFS_EXIT(zfsvfs); 6594 if (error == ENOENT) 6595 error = ENOATTR; 6596 return (error); 6597 } 6598 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 6599 6600 vput(nd.ni_dvp); 6601 if (vp == nd.ni_dvp) 6602 vrele(vp); 6603 else 6604 vput(vp); 6605 ZFS_EXIT(zfsvfs); 6606 6607 return (error); 6608} 6609 6610/* 6611 * Vnode operation to set a named attribute. 6612 */ 6613static int 6614zfs_setextattr(struct vop_setextattr_args *ap) 6615/* 6616vop_setextattr { 6617 IN struct vnode *a_vp; 6618 IN int a_attrnamespace; 6619 IN const char *a_name; 6620 INOUT struct uio *a_uio; 6621 IN struct ucred *a_cred; 6622 IN struct thread *a_td; 6623}; 6624*/ 6625{ 6626 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6627 struct thread *td = ap->a_td; 6628 struct nameidata nd; 6629 char attrname[255]; 6630 struct vattr va; 6631 vnode_t *xvp = NULL, *vp; 6632 int error, flags; 6633 6634 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6635 ap->a_cred, ap->a_td, VWRITE); 6636 if (error != 0) 6637 return (error); 6638 6639 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6640 sizeof(attrname)); 6641 if (error != 0) 6642 return (error); 6643 6644 ZFS_ENTER(zfsvfs); 6645 6646 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6647 LOOKUP_XATTR | CREATE_XATTR_DIR); 6648 if (error != 0) { 6649 ZFS_EXIT(zfsvfs); 6650 return (error); 6651 } 6652 6653 flags = FFLAGS(O_WRONLY | O_CREAT); 6654 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 6655 xvp, td); 6656 error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL); 6657 vp = nd.ni_vp; 6658 NDFREE(&nd, NDF_ONLY_PNBUF); 6659 if (error != 0) { 6660 ZFS_EXIT(zfsvfs); 6661 return (error); 6662 } 6663 6664 VATTR_NULL(&va); 6665 va.va_size = 0; 6666 error = VOP_SETATTR(vp, &va, ap->a_cred); 6667 if (error == 0) 6668 VOP_WRITE(vp, ap->a_uio, IO_UNIT | IO_SYNC, ap->a_cred); 6669 6670 VOP_UNLOCK(vp, 0); 6671 vn_close(vp, flags, ap->a_cred, td); 6672 ZFS_EXIT(zfsvfs); 6673 6674 return (error); 6675} 6676 6677/* 6678 * Vnode operation to retrieve extended attributes on a vnode. 6679 */ 6680static int 6681zfs_listextattr(struct vop_listextattr_args *ap) 6682/* 6683vop_listextattr { 6684 IN struct vnode *a_vp; 6685 IN int a_attrnamespace; 6686 INOUT struct uio *a_uio; 6687 OUT size_t *a_size; 6688 IN struct ucred *a_cred; 6689 IN struct thread *a_td; 6690}; 6691*/ 6692{ 6693 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6694 struct thread *td = ap->a_td; 6695 struct nameidata nd; 6696 char attrprefix[16]; 6697 u_char dirbuf[sizeof(struct dirent)]; 6698 struct dirent *dp; 6699 struct iovec aiov; 6700 struct uio auio, *uio = ap->a_uio; 6701 size_t *sizep = ap->a_size; 6702 size_t plen; 6703 vnode_t *xvp = NULL, *vp; 6704 int done, error, eof, pos; 6705 6706 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6707 ap->a_cred, ap->a_td, VREAD); 6708 if (error != 0) 6709 return (error); 6710 6711 error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 6712 sizeof(attrprefix)); 6713 if (error != 0) 6714 return (error); 6715 plen = strlen(attrprefix); 6716 6717 ZFS_ENTER(zfsvfs); 6718 6719 if (sizep != NULL) 6720 *sizep = 0; 6721 6722 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6723 LOOKUP_XATTR); 6724 if (error != 0) { 6725 ZFS_EXIT(zfsvfs); 6726 /* 6727 * ENOATTR means that the EA directory does not yet exist, 6728 * i.e. there are no extended attributes there. 6729 */ 6730 if (error == ENOATTR) 6731 error = 0; 6732 return (error); 6733 } 6734 6735 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 6736 UIO_SYSSPACE, ".", xvp, td); 6737 error = namei(&nd); 6738 vp = nd.ni_vp; 6739 NDFREE(&nd, NDF_ONLY_PNBUF); 6740 if (error != 0) { 6741 ZFS_EXIT(zfsvfs); 6742 return (error); 6743 } 6744 6745 auio.uio_iov = &aiov; 6746 auio.uio_iovcnt = 1; 6747 auio.uio_segflg = UIO_SYSSPACE; 6748 auio.uio_td = td; 6749 auio.uio_rw = UIO_READ; 6750 auio.uio_offset = 0; 6751 6752 do { 6753 u_char nlen; 6754 6755 aiov.iov_base = (void *)dirbuf; 6756 aiov.iov_len = sizeof(dirbuf); 6757 auio.uio_resid = sizeof(dirbuf); 6758 error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 6759 done = sizeof(dirbuf) - auio.uio_resid; 6760 if (error != 0) 6761 break; 6762 for (pos = 0; pos < done;) { 6763 dp = (struct dirent *)(dirbuf + pos); 6764 pos += dp->d_reclen; 6765 /* 6766 * XXX: Temporarily we also accept DT_UNKNOWN, as this 6767 * is what we get when attribute was created on Solaris. 6768 */ 6769 if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 6770 continue; 6771 if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) 6772 continue; 6773 else if (strncmp(dp->d_name, attrprefix, plen) != 0) 6774 continue; 6775 nlen = dp->d_namlen - plen; 6776 if (sizep != NULL) 6777 *sizep += 1 + nlen; 6778 else if (uio != NULL) { 6779 /* 6780 * Format of extattr name entry is one byte for 6781 * length and the rest for name. 6782 */ 6783 error = uiomove(&nlen, 1, uio->uio_rw, uio); 6784 if (error == 0) { 6785 error = uiomove(dp->d_name + plen, nlen, 6786 uio->uio_rw, uio); 6787 } 6788 if (error != 0) 6789 break; 6790 } 6791 } 6792 } while (!eof && error == 0); 6793 6794 vput(vp); 6795 ZFS_EXIT(zfsvfs); 6796 6797 return (error); 6798} 6799 6800int 6801zfs_freebsd_getacl(ap) 6802 struct vop_getacl_args /* { 6803 struct vnode *vp; 6804 acl_type_t type; 6805 struct acl *aclp; 6806 struct ucred *cred; 6807 struct thread *td; 6808 } */ *ap; 6809{ 6810 int error; 6811 vsecattr_t vsecattr; 6812 6813 if (ap->a_type != ACL_TYPE_NFS4) 6814 return (EINVAL); 6815 6816 vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 6817 if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL)) 6818 return (error); 6819 6820 error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt); 6821 if (vsecattr.vsa_aclentp != NULL) 6822 kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 6823 6824 return (error); 6825} 6826 6827int 6828zfs_freebsd_setacl(ap) 6829 struct vop_setacl_args /* { 6830 struct vnode *vp; 6831 acl_type_t type; 6832 struct acl *aclp; 6833 struct ucred *cred; 6834 struct thread *td; 6835 } */ *ap; 6836{ 6837 int error; 6838 vsecattr_t vsecattr; 6839 int aclbsize; /* size of acl list in bytes */ 6840 aclent_t *aaclp; 6841 6842 if (ap->a_type != ACL_TYPE_NFS4) 6843 return (EINVAL); 6844 6845 if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 6846 return (EINVAL); 6847 6848 /* 6849 * With NFSv4 ACLs, chmod(2) may need to add additional entries, 6850 * splitting every entry into two and appending "canonical six" 6851 * entries at the end. Don't allow for setting an ACL that would 6852 * cause chmod(2) to run out of ACL entries. 6853 */ 6854 if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 6855 return (ENOSPC); 6856 6857 error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 6858 if (error != 0) 6859 return (error); 6860 6861 vsecattr.vsa_mask = VSA_ACE; 6862 aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t); 6863 vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 6864 aaclp = vsecattr.vsa_aclentp; 6865 vsecattr.vsa_aclentsz = aclbsize; 6866 6867 aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 6868 error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL); 6869 kmem_free(aaclp, aclbsize); 6870 6871 return (error); 6872} 6873 6874int 6875zfs_freebsd_aclcheck(ap) 6876 struct vop_aclcheck_args /* { 6877 struct vnode *vp; 6878 acl_type_t type; 6879 struct acl *aclp; 6880 struct ucred *cred; 6881 struct thread *td; 6882 } */ *ap; 6883{ 6884 6885 return (EOPNOTSUPP); 6886} 6887 6888struct vop_vector zfs_vnodeops; 6889struct vop_vector zfs_fifoops; 6890struct vop_vector zfs_shareops; 6891 6892struct vop_vector zfs_vnodeops = { 6893 .vop_default = &default_vnodeops, 6894 .vop_inactive = zfs_freebsd_inactive, 6895 .vop_reclaim = zfs_freebsd_reclaim, 6896 .vop_access = zfs_freebsd_access, 6897#ifdef FREEBSD_NAMECACHE 6898 .vop_lookup = vfs_cache_lookup, 6899 .vop_cachedlookup = zfs_freebsd_lookup, 6900#else 6901 .vop_lookup = zfs_freebsd_lookup, 6902#endif 6903 .vop_getattr = zfs_freebsd_getattr, 6904 .vop_setattr = zfs_freebsd_setattr, 6905 .vop_create = zfs_freebsd_create, 6906 .vop_mknod = zfs_freebsd_create, 6907 .vop_mkdir = zfs_freebsd_mkdir, 6908 .vop_readdir = zfs_freebsd_readdir, 6909 .vop_fsync = zfs_freebsd_fsync, 6910 .vop_open = zfs_freebsd_open, 6911 .vop_close = zfs_freebsd_close, 6912 .vop_rmdir = zfs_freebsd_rmdir, 6913 .vop_ioctl = zfs_freebsd_ioctl, 6914 .vop_link = zfs_freebsd_link, 6915 .vop_symlink = zfs_freebsd_symlink, 6916 .vop_readlink = zfs_freebsd_readlink, 6917 .vop_read = zfs_freebsd_read, 6918 .vop_write = zfs_freebsd_write, 6919 .vop_remove = zfs_freebsd_remove, 6920 .vop_rename = zfs_freebsd_rename, 6921 .vop_pathconf = zfs_freebsd_pathconf, 6922 .vop_bmap = zfs_freebsd_bmap, 6923 .vop_fid = zfs_freebsd_fid, 6924 .vop_getextattr = zfs_getextattr, 6925 .vop_deleteextattr = zfs_deleteextattr, 6926 .vop_setextattr = zfs_setextattr, 6927 .vop_listextattr = zfs_listextattr, 6928 .vop_getacl = zfs_freebsd_getacl, 6929 .vop_setacl = zfs_freebsd_setacl, 6930 .vop_aclcheck = zfs_freebsd_aclcheck, 6931 .vop_getpages = zfs_freebsd_getpages, 6932}; 6933 6934struct vop_vector zfs_fifoops = { 6935 .vop_default = &fifo_specops, 6936 .vop_fsync = zfs_freebsd_fsync, 6937 .vop_access = zfs_freebsd_access, 6938 .vop_getattr = zfs_freebsd_getattr, 6939 .vop_inactive = zfs_freebsd_inactive, 6940 .vop_read = VOP_PANIC, 6941 .vop_reclaim = zfs_freebsd_reclaim, 6942 .vop_setattr = zfs_freebsd_setattr, 6943 .vop_write = VOP_PANIC, 6944 .vop_pathconf = zfs_freebsd_fifo_pathconf, 6945 .vop_fid = zfs_freebsd_fid, 6946 .vop_getacl = zfs_freebsd_getacl, 6947 .vop_setacl = zfs_freebsd_setacl, 6948 .vop_aclcheck = zfs_freebsd_aclcheck, 6949}; 6950 6951/* 6952 * special share hidden files vnode operations template 6953 */ 6954struct vop_vector zfs_shareops = { 6955 .vop_default = &default_vnodeops, 6956 .vop_access = zfs_freebsd_access, 6957 .vop_inactive = zfs_freebsd_inactive, 6958 .vop_reclaim = zfs_freebsd_reclaim, 6959 .vop_fid = zfs_freebsd_fid, 6960 .vop_pathconf = zfs_freebsd_pathconf, 6961}; 6962