zfs_vnops.c revision 212951
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25/* Portions Copyright 2007 Jeremy Teo */ 26 27#include <sys/types.h> 28#include <sys/param.h> 29#include <sys/time.h> 30#include <sys/systm.h> 31#include <sys/sysmacros.h> 32#include <sys/resource.h> 33#include <sys/vfs.h> 34#include <sys/vnode.h> 35#include <sys/file.h> 36#include <sys/stat.h> 37#include <sys/kmem.h> 38#include <sys/taskq.h> 39#include <sys/uio.h> 40#include <sys/atomic.h> 41#include <sys/namei.h> 42#include <sys/mman.h> 43#include <sys/cmn_err.h> 44#include <sys/errno.h> 45#include <sys/unistd.h> 46#include <sys/zfs_dir.h> 47#include <sys/zfs_ioctl.h> 48#include <sys/fs/zfs.h> 49#include <sys/dmu.h> 50#include <sys/spa.h> 51#include <sys/txg.h> 52#include <sys/dbuf.h> 53#include <sys/zap.h> 54#include <sys/dirent.h> 55#include <sys/policy.h> 56#include <sys/sunddi.h> 57#include <sys/filio.h> 58#include <sys/sid.h> 59#include <sys/zfs_ctldir.h> 60#include <sys/zfs_fuid.h> 61#include <sys/dnlc.h> 62#include <sys/zfs_rlock.h> 63#include <sys/extdirent.h> 64#include <sys/kidmap.h> 65#include <sys/bio.h> 66#include <sys/buf.h> 67#include <sys/sf_buf.h> 68#include <sys/sched.h> 69#include <sys/acl.h> 70 71/* 72 * Programming rules. 73 * 74 * Each vnode op performs some logical unit of work. To do this, the ZPL must 75 * properly lock its in-core state, create a DMU transaction, do the work, 76 * record this work in the intent log (ZIL), commit the DMU transaction, 77 * and wait for the intent log to commit if it is a synchronous operation. 78 * Moreover, the vnode ops must work in both normal and log replay context. 79 * The ordering of events is important to avoid deadlocks and references 80 * to freed memory. The example below illustrates the following Big Rules: 81 * 82 * (1) A check must be made in each zfs thread for a mounted file system. 83 * This is done avoiding races using ZFS_ENTER(zfsvfs). 84 * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 85 * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 86 * can return EIO from the calling function. 87 * 88 * (2) VN_RELE() should always be the last thing except for zil_commit() 89 * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 90 * First, if it's the last reference, the vnode/znode 91 * can be freed, so the zp may point to freed memory. Second, the last 92 * reference will call zfs_zinactive(), which may induce a lot of work -- 93 * pushing cached pages (which acquires range locks) and syncing out 94 * cached atime changes. Third, zfs_zinactive() may require a new tx, 95 * which could deadlock the system if you were already holding one. 96 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 97 * 98 * (3) All range locks must be grabbed before calling dmu_tx_assign(), 99 * as they can span dmu_tx_assign() calls. 100 * 101 * (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign(). 102 * This is critical because we don't want to block while holding locks. 103 * Note, in particular, that if a lock is sometimes acquired before 104 * the tx assigns, and sometimes after (e.g. z_lock), then failing to 105 * use a non-blocking assign can deadlock the system. The scenario: 106 * 107 * Thread A has grabbed a lock before calling dmu_tx_assign(). 108 * Thread B is in an already-assigned tx, and blocks for this lock. 109 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 110 * forever, because the previous txg can't quiesce until B's tx commits. 111 * 112 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 113 * then drop all locks, call dmu_tx_wait(), and try again. 114 * 115 * (5) If the operation succeeded, generate the intent log entry for it 116 * before dropping locks. This ensures that the ordering of events 117 * in the intent log matches the order in which they actually occurred. 118 * During ZIL replay the zfs_log_* functions will update the sequence 119 * number to indicate the zil transaction has replayed. 120 * 121 * (6) At the end of each vnode op, the DMU tx must always commit, 122 * regardless of whether there were any errors. 123 * 124 * (7) After dropping all locks, invoke zil_commit(zilog, seq, foid) 125 * to ensure that synchronous semantics are provided when necessary. 126 * 127 * In general, this is how things should be ordered in each vnode op: 128 * 129 * ZFS_ENTER(zfsvfs); // exit if unmounted 130 * top: 131 * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 132 * rw_enter(...); // grab any other locks you need 133 * tx = dmu_tx_create(...); // get DMU tx 134 * dmu_tx_hold_*(); // hold each object you might modify 135 * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign 136 * if (error) { 137 * rw_exit(...); // drop locks 138 * zfs_dirent_unlock(dl); // unlock directory entry 139 * VN_RELE(...); // release held vnodes 140 * if (error == ERESTART) { 141 * dmu_tx_wait(tx); 142 * dmu_tx_abort(tx); 143 * goto top; 144 * } 145 * dmu_tx_abort(tx); // abort DMU tx 146 * ZFS_EXIT(zfsvfs); // finished in zfs 147 * return (error); // really out of space 148 * } 149 * error = do_real_work(); // do whatever this VOP does 150 * if (error == 0) 151 * zfs_log_*(...); // on success, make ZIL entry 152 * dmu_tx_commit(tx); // commit DMU tx -- error or not 153 * rw_exit(...); // drop locks 154 * zfs_dirent_unlock(dl); // unlock directory entry 155 * VN_RELE(...); // release held vnodes 156 * zil_commit(zilog, seq, foid); // synchronous when necessary 157 * ZFS_EXIT(zfsvfs); // finished in zfs 158 * return (error); // done, report error 159 */ 160 161/* ARGSUSED */ 162static int 163zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 164{ 165 znode_t *zp = VTOZ(*vpp); 166 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 167 168 ZFS_ENTER(zfsvfs); 169 ZFS_VERIFY_ZP(zp); 170 171 if ((flag & FWRITE) && (zp->z_phys->zp_flags & ZFS_APPENDONLY) && 172 ((flag & FAPPEND) == 0)) { 173 ZFS_EXIT(zfsvfs); 174 return (EPERM); 175 } 176 177 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 178 ZTOV(zp)->v_type == VREG && 179 !(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) && 180 zp->z_phys->zp_size > 0) { 181 if (fs_vscan(*vpp, cr, 0) != 0) { 182 ZFS_EXIT(zfsvfs); 183 return (EACCES); 184 } 185 } 186 187 /* Keep a count of the synchronous opens in the znode */ 188 if (flag & (FSYNC | FDSYNC)) 189 atomic_inc_32(&zp->z_sync_cnt); 190 191 ZFS_EXIT(zfsvfs); 192 return (0); 193} 194 195/* ARGSUSED */ 196static int 197zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 198 caller_context_t *ct) 199{ 200 znode_t *zp = VTOZ(vp); 201 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 202 203 /* 204 * Clean up any locks held by this process on the vp. 205 */ 206 cleanlocks(vp, ddi_get_pid(), 0); 207 cleanshares(vp, ddi_get_pid()); 208 209 ZFS_ENTER(zfsvfs); 210 ZFS_VERIFY_ZP(zp); 211 212 /* Decrement the synchronous opens in the znode */ 213 if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 214 atomic_dec_32(&zp->z_sync_cnt); 215 216 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 217 ZTOV(zp)->v_type == VREG && 218 !(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) && 219 zp->z_phys->zp_size > 0) 220 VERIFY(fs_vscan(vp, cr, 1) == 0); 221 222 ZFS_EXIT(zfsvfs); 223 return (0); 224} 225 226/* 227 * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 228 * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 229 */ 230static int 231zfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 232{ 233 znode_t *zp = VTOZ(vp); 234 uint64_t noff = (uint64_t)*off; /* new offset */ 235 uint64_t file_sz; 236 int error; 237 boolean_t hole; 238 239 file_sz = zp->z_phys->zp_size; 240 if (noff >= file_sz) { 241 return (ENXIO); 242 } 243 244 if (cmd == _FIO_SEEK_HOLE) 245 hole = B_TRUE; 246 else 247 hole = B_FALSE; 248 249 error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 250 251 /* end of file? */ 252 if ((error == ESRCH) || (noff > file_sz)) { 253 /* 254 * Handle the virtual hole at the end of file. 255 */ 256 if (hole) { 257 *off = file_sz; 258 return (0); 259 } 260 return (ENXIO); 261 } 262 263 if (noff < *off) 264 return (error); 265 *off = noff; 266 return (error); 267} 268 269/* ARGSUSED */ 270static int 271zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 272 int *rvalp, caller_context_t *ct) 273{ 274 offset_t off; 275 int error; 276 zfsvfs_t *zfsvfs; 277 znode_t *zp; 278 279 switch (com) { 280 case _FIOFFS: 281 return (0); 282 283 /* 284 * The following two ioctls are used by bfu. Faking out, 285 * necessary to avoid bfu errors. 286 */ 287 case _FIOGDIO: 288 case _FIOSDIO: 289 return (0); 290 291 case _FIO_SEEK_DATA: 292 case _FIO_SEEK_HOLE: 293 if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 294 return (EFAULT); 295 296 zp = VTOZ(vp); 297 zfsvfs = zp->z_zfsvfs; 298 ZFS_ENTER(zfsvfs); 299 ZFS_VERIFY_ZP(zp); 300 301 /* offset parameter is in/out */ 302 error = zfs_holey(vp, com, &off); 303 ZFS_EXIT(zfsvfs); 304 if (error) 305 return (error); 306 if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 307 return (EFAULT); 308 return (0); 309 } 310 return (ENOTTY); 311} 312 313static vm_page_t 314page_lookup(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 315{ 316 vm_object_t obj; 317 vm_page_t pp; 318 319 obj = vp->v_object; 320 VM_OBJECT_LOCK_ASSERT(obj, MA_OWNED); 321 322 for (;;) { 323 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 324 vm_page_is_valid(pp, (vm_offset_t)off, nbytes)) { 325 if ((pp->oflags & VPO_BUSY) != 0) { 326 /* 327 * Reference the page before unlocking and 328 * sleeping so that the page daemon is less 329 * likely to reclaim it. 330 */ 331 vm_page_lock_queues(); 332 vm_page_flag_set(pp, PG_REFERENCED); 333 vm_page_sleep(pp, "zfsmwb"); 334 continue; 335 } 336 vm_page_busy(pp); 337 vm_page_undirty(pp); 338 } else { 339 if (__predict_false(obj->cache != NULL)) { 340 vm_page_cache_free(obj, OFF_TO_IDX(start), 341 OFF_TO_IDX(start) + 1); 342 } 343 pp = NULL; 344 } 345 break; 346 } 347 return (pp); 348} 349 350static void 351page_unlock(vm_page_t pp) 352{ 353 354 vm_page_wakeup(pp); 355} 356 357static caddr_t 358zfs_map_page(vm_page_t pp, struct sf_buf **sfp) 359{ 360 361 *sfp = sf_buf_alloc(pp, 0); 362 return ((caddr_t)sf_buf_kva(*sfp)); 363} 364 365static void 366zfs_unmap_page(struct sf_buf *sf) 367{ 368 369 sf_buf_free(sf); 370} 371 372 373/* 374 * When a file is memory mapped, we must keep the IO data synchronized 375 * between the DMU cache and the memory mapped pages. What this means: 376 * 377 * On Write: If we find a memory mapped page, we write to *both* 378 * the page and the dmu buffer. 379 */ 380 381static void 382update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 383 int segflg, dmu_tx_t *tx) 384{ 385 vm_object_t obj; 386 struct sf_buf *sf; 387 int off; 388 389 ASSERT(vp->v_mount != NULL); 390 obj = vp->v_object; 391 ASSERT(obj != NULL); 392 393 off = start & PAGEOFFSET; 394 VM_OBJECT_LOCK(obj); 395 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 396 vm_page_t pp; 397 int nbytes = MIN(PAGESIZE - off, len); 398 399 if ((pp = page_lookup(vp, start, off, nbytes)) != NULL) { 400 caddr_t va; 401 402 VM_OBJECT_UNLOCK(obj); 403 va = zfs_map_page(pp, &sf); 404 if (segflg == UIO_NOCOPY) { 405 (void) dmu_write(os, oid, start+off, nbytes, 406 va+off, tx); 407 } else { 408 (void) dmu_read(os, oid, start+off, nbytes, 409 va+off, DMU_READ_PREFETCH);; 410 } 411 zfs_unmap_page(sf); 412 VM_OBJECT_LOCK(obj); 413 page_unlock(pp); 414 415 } 416 len -= nbytes; 417 off = 0; 418 } 419 VM_OBJECT_UNLOCK(obj); 420} 421 422/* 423 * When a file is memory mapped, we must keep the IO data synchronized 424 * between the DMU cache and the memory mapped pages. What this means: 425 * 426 * On Read: We "read" preferentially from memory mapped pages, 427 * else we default from the dmu buffer. 428 * 429 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 430 * the file is memory mapped. 431 */ 432static int 433mappedread(vnode_t *vp, int nbytes, uio_t *uio) 434{ 435 znode_t *zp = VTOZ(vp); 436 objset_t *os = zp->z_zfsvfs->z_os; 437 vm_object_t obj; 438 vm_page_t m; 439 struct sf_buf *sf; 440 int64_t start; 441 caddr_t va; 442 int len = nbytes; 443 int off; 444 int error = 0; 445 uint64_t dirbytes; 446 447 ASSERT(vp->v_mount != NULL); 448 obj = vp->v_object; 449 ASSERT(obj != NULL); 450 451 start = uio->uio_loffset; 452 off = start & PAGEOFFSET; 453 dirbytes = 0; 454 VM_OBJECT_LOCK(obj); 455 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 456 int bytes = MIN(PAGESIZE - off, len); 457 458again: 459 if ((m = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 460 vm_page_is_valid(m, off, bytes)) { 461 if ((m->oflags & VPO_BUSY) != 0) { 462 /* 463 * Reference the page before unlocking and 464 * sleeping so that the page daemon is less 465 * likely to reclaim it. 466 */ 467 vm_page_lock_queues(); 468 vm_page_flag_set(m, PG_REFERENCED); 469 vm_page_sleep(m, "zfsmrb"); 470 goto again; 471 } 472 473 vm_page_busy(m); 474 VM_OBJECT_UNLOCK(obj); 475 if (dirbytes > 0) { 476 error = dmu_read_uio(os, zp->z_id, uio, 477 dirbytes); 478 dirbytes = 0; 479 } 480 if (error == 0) 481 uiomove_fromphys(&m, off, bytes, uio); 482 VM_OBJECT_LOCK(obj); 483 vm_page_wakeup(m); 484 } else if (m != NULL && uio->uio_segflg == UIO_NOCOPY) { 485 /* 486 * The code below is here to make sendfile(2) work 487 * correctly with ZFS. As pointed out by ups@ 488 * sendfile(2) should be changed to use VOP_GETPAGES(), 489 * but it pessimize performance of sendfile/UFS, that's 490 * why I handle this special case in ZFS code. 491 */ 492 if ((m->oflags & VPO_BUSY) != 0) { 493 /* 494 * Reference the page before unlocking and 495 * sleeping so that the page daemon is less 496 * likely to reclaim it. 497 */ 498 vm_page_lock_queues(); 499 vm_page_flag_set(m, PG_REFERENCED); 500 vm_page_sleep(m, "zfsmrb"); 501 goto again; 502 } 503 vm_page_busy(m); 504 VM_OBJECT_UNLOCK(obj); 505 if (dirbytes > 0) { 506 error = dmu_read_uio(os, zp->z_id, uio, 507 dirbytes); 508 dirbytes = 0; 509 } 510 if (error == 0) { 511 va = zfs_map_page(m, &sf); 512 error = dmu_read(os, zp->z_id, start + off, 513 bytes, (void *)(va + off), 514 DMU_READ_PREFETCH); 515 zfs_unmap_page(sf); 516 } 517 VM_OBJECT_LOCK(obj); 518 if (error == 0) 519 vm_page_set_valid(m, off, bytes); 520 vm_page_wakeup(m); 521 if (error == 0) { 522 uio->uio_resid -= bytes; 523 uio->uio_offset += bytes; 524 } 525 } else { 526 dirbytes += bytes; 527 } 528 len -= bytes; 529 off = 0; 530 if (error) 531 break; 532 } 533 VM_OBJECT_UNLOCK(obj); 534 if (error == 0 && dirbytes > 0) 535 error = dmu_read_uio(os, zp->z_id, uio, dirbytes); 536 return (error); 537} 538 539offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 540 541/* 542 * Read bytes from specified file into supplied buffer. 543 * 544 * IN: vp - vnode of file to be read from. 545 * uio - structure supplying read location, range info, 546 * and return buffer. 547 * ioflag - SYNC flags; used to provide FRSYNC semantics. 548 * cr - credentials of caller. 549 * ct - caller context 550 * 551 * OUT: uio - updated offset and range, buffer filled. 552 * 553 * RETURN: 0 if success 554 * error code if failure 555 * 556 * Side Effects: 557 * vp - atime updated if byte count > 0 558 */ 559/* ARGSUSED */ 560static int 561zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 562{ 563 znode_t *zp = VTOZ(vp); 564 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 565 objset_t *os; 566 ssize_t n, nbytes; 567 int error; 568 rl_t *rl; 569 570 ZFS_ENTER(zfsvfs); 571 ZFS_VERIFY_ZP(zp); 572 os = zfsvfs->z_os; 573 574 if (zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) { 575 ZFS_EXIT(zfsvfs); 576 return (EACCES); 577 } 578 579 /* 580 * Validate file offset 581 */ 582 if (uio->uio_loffset < (offset_t)0) { 583 ZFS_EXIT(zfsvfs); 584 return (EINVAL); 585 } 586 587 /* 588 * Fasttrack empty reads 589 */ 590 if (uio->uio_resid == 0) { 591 ZFS_EXIT(zfsvfs); 592 return (0); 593 } 594 595 /* 596 * Check for mandatory locks 597 */ 598 if (MANDMODE((mode_t)zp->z_phys->zp_mode)) { 599 if (error = chklock(vp, FREAD, 600 uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 601 ZFS_EXIT(zfsvfs); 602 return (error); 603 } 604 } 605 606 /* 607 * If we're in FRSYNC mode, sync out this znode before reading it. 608 */ 609 if (ioflag & FRSYNC) 610 zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); 611 612 /* 613 * Lock the range against changes. 614 */ 615 rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 616 617 /* 618 * If we are reading past end-of-file we can skip 619 * to the end; but we might still need to set atime. 620 */ 621 if (uio->uio_loffset >= zp->z_phys->zp_size) { 622 error = 0; 623 goto out; 624 } 625 626 ASSERT(uio->uio_loffset < zp->z_phys->zp_size); 627 n = MIN(uio->uio_resid, zp->z_phys->zp_size - uio->uio_loffset); 628 629 while (n > 0) { 630 nbytes = MIN(n, zfs_read_chunk_size - 631 P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 632 633 if (vn_has_cached_data(vp)) 634 error = mappedread(vp, nbytes, uio); 635 else 636 error = dmu_read_uio(os, zp->z_id, uio, nbytes); 637 if (error) { 638 /* convert checksum errors into IO errors */ 639 if (error == ECKSUM) 640 error = EIO; 641 break; 642 } 643 644 n -= nbytes; 645 } 646 647out: 648 zfs_range_unlock(rl); 649 650 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 651 ZFS_EXIT(zfsvfs); 652 return (error); 653} 654 655/* 656 * Fault in the pages of the first n bytes specified by the uio structure. 657 * 1 byte in each page is touched and the uio struct is unmodified. 658 * Any error will exit this routine as this is only a best 659 * attempt to get the pages resident. This is a copy of ufs_trans_touch(). 660 */ 661static void 662zfs_prefault_write(ssize_t n, struct uio *uio) 663{ 664 struct iovec *iov; 665 ulong_t cnt, incr; 666 caddr_t p; 667 668 if (uio->uio_segflg != UIO_USERSPACE) 669 return; 670 671 iov = uio->uio_iov; 672 673 while (n) { 674 cnt = MIN(iov->iov_len, n); 675 if (cnt == 0) { 676 /* empty iov entry */ 677 iov++; 678 continue; 679 } 680 n -= cnt; 681 /* 682 * touch each page in this segment. 683 */ 684 p = iov->iov_base; 685 while (cnt) { 686 if (fubyte(p) == -1) 687 return; 688 incr = MIN(cnt, PAGESIZE); 689 p += incr; 690 cnt -= incr; 691 } 692 /* 693 * touch the last byte in case it straddles a page. 694 */ 695 p--; 696 if (fubyte(p) == -1) 697 return; 698 iov++; 699 } 700} 701 702/* 703 * Write the bytes to a file. 704 * 705 * IN: vp - vnode of file to be written to. 706 * uio - structure supplying write location, range info, 707 * and data buffer. 708 * ioflag - IO_APPEND flag set if in append mode. 709 * cr - credentials of caller. 710 * ct - caller context (NFS/CIFS fem monitor only) 711 * 712 * OUT: uio - updated offset and range. 713 * 714 * RETURN: 0 if success 715 * error code if failure 716 * 717 * Timestamps: 718 * vp - ctime|mtime updated if byte count > 0 719 */ 720/* ARGSUSED */ 721static int 722zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 723{ 724 znode_t *zp = VTOZ(vp); 725 rlim64_t limit = MAXOFFSET_T; 726 ssize_t start_resid = uio->uio_resid; 727 ssize_t tx_bytes; 728 uint64_t end_size; 729 dmu_tx_t *tx; 730 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 731 zilog_t *zilog; 732 offset_t woff; 733 ssize_t n, nbytes; 734 rl_t *rl; 735 int max_blksz = zfsvfs->z_max_blksz; 736 uint64_t pflags; 737 int error; 738 arc_buf_t *abuf; 739 740 /* 741 * Fasttrack empty write 742 */ 743 n = start_resid; 744 if (n == 0) 745 return (0); 746 747 if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 748 limit = MAXOFFSET_T; 749 750 ZFS_ENTER(zfsvfs); 751 ZFS_VERIFY_ZP(zp); 752 753 /* 754 * If immutable or not appending then return EPERM 755 */ 756 pflags = zp->z_phys->zp_flags; 757 if ((pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 758 ((pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 759 (uio->uio_loffset < zp->z_phys->zp_size))) { 760 ZFS_EXIT(zfsvfs); 761 return (EPERM); 762 } 763 764 zilog = zfsvfs->z_log; 765 766 /* 767 * Pre-fault the pages to ensure slow (eg NFS) pages 768 * don't hold up txg. 769 */ 770 zfs_prefault_write(n, uio); 771 772 /* 773 * If in append mode, set the io offset pointer to eof. 774 */ 775 if (ioflag & IO_APPEND) { 776 /* 777 * Range lock for a file append: 778 * The value for the start of range will be determined by 779 * zfs_range_lock() (to guarantee append semantics). 780 * If this write will cause the block size to increase, 781 * zfs_range_lock() will lock the entire file, so we must 782 * later reduce the range after we grow the block size. 783 */ 784 rl = zfs_range_lock(zp, 0, n, RL_APPEND); 785 if (rl->r_len == UINT64_MAX) { 786 /* overlocked, zp_size can't change */ 787 woff = uio->uio_loffset = zp->z_phys->zp_size; 788 } else { 789 woff = uio->uio_loffset = rl->r_off; 790 } 791 } else { 792 woff = uio->uio_loffset; 793 /* 794 * Validate file offset 795 */ 796 if (woff < 0) { 797 ZFS_EXIT(zfsvfs); 798 return (EINVAL); 799 } 800 801 /* 802 * If we need to grow the block size then zfs_range_lock() 803 * will lock a wider range than we request here. 804 * Later after growing the block size we reduce the range. 805 */ 806 rl = zfs_range_lock(zp, woff, n, RL_WRITER); 807 } 808 809 if (woff >= limit) { 810 zfs_range_unlock(rl); 811 ZFS_EXIT(zfsvfs); 812 return (EFBIG); 813 } 814 815 if ((woff + n) > limit || woff > (limit - n)) 816 n = limit - woff; 817 818 /* 819 * Check for mandatory locks 820 */ 821 if (MANDMODE((mode_t)zp->z_phys->zp_mode) && 822 (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 823 zfs_range_unlock(rl); 824 ZFS_EXIT(zfsvfs); 825 return (error); 826 } 827 end_size = MAX(zp->z_phys->zp_size, woff + n); 828 829 /* 830 * Write the file in reasonable size chunks. Each chunk is written 831 * in a separate transaction; this keeps the intent log records small 832 * and allows us to do more fine-grained space accounting. 833 */ 834 while (n > 0) { 835 abuf = NULL; 836 woff = uio->uio_loffset; 837 838again: 839 if (zfs_usergroup_overquota(zfsvfs, 840 B_FALSE, zp->z_phys->zp_uid) || 841 zfs_usergroup_overquota(zfsvfs, 842 B_TRUE, zp->z_phys->zp_gid)) { 843 if (abuf != NULL) 844 dmu_return_arcbuf(abuf); 845 error = EDQUOT; 846 break; 847 } 848 849 /* 850 * If dmu_assign_arcbuf() is expected to execute with minimum 851 * overhead loan an arc buffer and copy user data to it before 852 * we enter a txg. This avoids holding a txg forever while we 853 * pagefault on a hanging NFS server mapping. 854 */ 855 if (abuf == NULL && n >= max_blksz && 856 woff >= zp->z_phys->zp_size && 857 P2PHASE(woff, max_blksz) == 0 && 858 zp->z_blksz == max_blksz) { 859 size_t cbytes; 860 861 abuf = dmu_request_arcbuf(zp->z_dbuf, max_blksz); 862 ASSERT(abuf != NULL); 863 ASSERT(arc_buf_size(abuf) == max_blksz); 864 if (error = uiocopy(abuf->b_data, max_blksz, 865 UIO_WRITE, uio, &cbytes)) { 866 dmu_return_arcbuf(abuf); 867 break; 868 } 869 ASSERT(cbytes == max_blksz); 870 } 871 872 /* 873 * Start a transaction. 874 */ 875 tx = dmu_tx_create(zfsvfs->z_os); 876 dmu_tx_hold_bonus(tx, zp->z_id); 877 dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 878 error = dmu_tx_assign(tx, TXG_NOWAIT); 879 if (error) { 880 if (error == ERESTART) { 881 dmu_tx_wait(tx); 882 dmu_tx_abort(tx); 883 goto again; 884 } 885 dmu_tx_abort(tx); 886 if (abuf != NULL) 887 dmu_return_arcbuf(abuf); 888 break; 889 } 890 891 /* 892 * If zfs_range_lock() over-locked we grow the blocksize 893 * and then reduce the lock range. This will only happen 894 * on the first iteration since zfs_range_reduce() will 895 * shrink down r_len to the appropriate size. 896 */ 897 if (rl->r_len == UINT64_MAX) { 898 uint64_t new_blksz; 899 900 if (zp->z_blksz > max_blksz) { 901 ASSERT(!ISP2(zp->z_blksz)); 902 new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 903 } else { 904 new_blksz = MIN(end_size, max_blksz); 905 } 906 zfs_grow_blocksize(zp, new_blksz, tx); 907 zfs_range_reduce(rl, woff, n); 908 } 909 910 /* 911 * XXX - should we really limit each write to z_max_blksz? 912 * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 913 */ 914 nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 915 916 if (woff + nbytes > zp->z_phys->zp_size) 917 vnode_pager_setsize(vp, woff + nbytes); 918 919 if (abuf == NULL) { 920 tx_bytes = uio->uio_resid; 921 error = dmu_write_uio(zfsvfs->z_os, zp->z_id, uio, 922 nbytes, tx); 923 tx_bytes -= uio->uio_resid; 924 } else { 925 tx_bytes = nbytes; 926 ASSERT(tx_bytes == max_blksz); 927 dmu_assign_arcbuf(zp->z_dbuf, woff, abuf, tx); 928 ASSERT(tx_bytes <= uio->uio_resid); 929 uioskip(uio, tx_bytes); 930 } 931 932 if (tx_bytes && vn_has_cached_data(vp)) { 933 update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 934 zp->z_id, uio->uio_segflg, tx); 935 } 936 937 /* 938 * If we made no progress, we're done. If we made even 939 * partial progress, update the znode and ZIL accordingly. 940 */ 941 if (tx_bytes == 0) { 942 dmu_tx_commit(tx); 943 ASSERT(error != 0); 944 break; 945 } 946 947 /* 948 * Clear Set-UID/Set-GID bits on successful write if not 949 * privileged and at least one of the excute bits is set. 950 * 951 * It would be nice to to this after all writes have 952 * been done, but that would still expose the ISUID/ISGID 953 * to another app after the partial write is committed. 954 * 955 * Note: we don't call zfs_fuid_map_id() here because 956 * user 0 is not an ephemeral uid. 957 */ 958 mutex_enter(&zp->z_acl_lock); 959 if ((zp->z_phys->zp_mode & (S_IXUSR | (S_IXUSR >> 3) | 960 (S_IXUSR >> 6))) != 0 && 961 (zp->z_phys->zp_mode & (S_ISUID | S_ISGID)) != 0 && 962 secpolicy_vnode_setid_retain(vp, cr, 963 (zp->z_phys->zp_mode & S_ISUID) != 0 && 964 zp->z_phys->zp_uid == 0) != 0) { 965 zp->z_phys->zp_mode &= ~(S_ISUID | S_ISGID); 966 } 967 mutex_exit(&zp->z_acl_lock); 968 969 /* 970 * Update time stamp. NOTE: This marks the bonus buffer as 971 * dirty, so we don't have to do it again for zp_size. 972 */ 973 zfs_time_stamper(zp, CONTENT_MODIFIED, tx); 974 975 /* 976 * Update the file size (zp_size) if it has changed; 977 * account for possible concurrent updates. 978 */ 979 while ((end_size = zp->z_phys->zp_size) < uio->uio_loffset) 980 (void) atomic_cas_64(&zp->z_phys->zp_size, end_size, 981 uio->uio_loffset); 982 zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 983 dmu_tx_commit(tx); 984 985 if (error != 0) 986 break; 987 ASSERT(tx_bytes == nbytes); 988 n -= nbytes; 989 } 990 991 zfs_range_unlock(rl); 992 993 /* 994 * If we're in replay mode, or we made no progress, return error. 995 * Otherwise, it's at least a partial write, so it's successful. 996 */ 997 if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 998 ZFS_EXIT(zfsvfs); 999 return (error); 1000 } 1001 1002 if (ioflag & (FSYNC | FDSYNC)) 1003 zil_commit(zilog, zp->z_last_itx, zp->z_id); 1004 1005 ZFS_EXIT(zfsvfs); 1006 return (0); 1007} 1008 1009void 1010zfs_get_done(dmu_buf_t *db, void *vzgd) 1011{ 1012 zgd_t *zgd = (zgd_t *)vzgd; 1013 rl_t *rl = zgd->zgd_rl; 1014 vnode_t *vp = ZTOV(rl->r_zp); 1015 objset_t *os = rl->r_zp->z_zfsvfs->z_os; 1016 int vfslocked; 1017 1018 vfslocked = VFS_LOCK_GIANT(vp->v_vfsp); 1019 dmu_buf_rele(db, vzgd); 1020 zfs_range_unlock(rl); 1021 /* 1022 * Release the vnode asynchronously as we currently have the 1023 * txg stopped from syncing. 1024 */ 1025 VN_RELE_ASYNC(vp, dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1026 zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1027 kmem_free(zgd, sizeof (zgd_t)); 1028 VFS_UNLOCK_GIANT(vfslocked); 1029} 1030 1031/* 1032 * Get data to generate a TX_WRITE intent log record. 1033 */ 1034int 1035zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1036{ 1037 zfsvfs_t *zfsvfs = arg; 1038 objset_t *os = zfsvfs->z_os; 1039 znode_t *zp; 1040 uint64_t off = lr->lr_offset; 1041 dmu_buf_t *db; 1042 rl_t *rl; 1043 zgd_t *zgd; 1044 int dlen = lr->lr_length; /* length of user data */ 1045 int error = 0; 1046 1047 ASSERT(zio); 1048 ASSERT(dlen != 0); 1049 1050 /* 1051 * Nothing to do if the file has been removed 1052 */ 1053 if (zfs_zget(zfsvfs, lr->lr_foid, &zp) != 0) 1054 return (ENOENT); 1055 if (zp->z_unlinked) { 1056 /* 1057 * Release the vnode asynchronously as we currently have the 1058 * txg stopped from syncing. 1059 */ 1060 VN_RELE_ASYNC(ZTOV(zp), 1061 dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1062 return (ENOENT); 1063 } 1064 1065 /* 1066 * Write records come in two flavors: immediate and indirect. 1067 * For small writes it's cheaper to store the data with the 1068 * log record (immediate); for large writes it's cheaper to 1069 * sync the data and get a pointer to it (indirect) so that 1070 * we don't have to write the data twice. 1071 */ 1072 if (buf != NULL) { /* immediate write */ 1073 rl = zfs_range_lock(zp, off, dlen, RL_READER); 1074 /* test for truncation needs to be done while range locked */ 1075 if (off >= zp->z_phys->zp_size) { 1076 error = ENOENT; 1077 goto out; 1078 } 1079 VERIFY(0 == dmu_read(os, lr->lr_foid, off, dlen, buf, 1080 DMU_READ_NO_PREFETCH)); 1081 } else { /* indirect write */ 1082 uint64_t boff; /* block starting offset */ 1083 1084 /* 1085 * Have to lock the whole block to ensure when it's 1086 * written out and it's checksum is being calculated 1087 * that no one can change the data. We need to re-check 1088 * blocksize after we get the lock in case it's changed! 1089 */ 1090 for (;;) { 1091 if (ISP2(zp->z_blksz)) { 1092 boff = P2ALIGN_TYPED(off, zp->z_blksz, 1093 uint64_t); 1094 } else { 1095 boff = 0; 1096 } 1097 dlen = zp->z_blksz; 1098 rl = zfs_range_lock(zp, boff, dlen, RL_READER); 1099 if (zp->z_blksz == dlen) 1100 break; 1101 zfs_range_unlock(rl); 1102 } 1103 /* test for truncation needs to be done while range locked */ 1104 if (off >= zp->z_phys->zp_size) { 1105 error = ENOENT; 1106 goto out; 1107 } 1108 zgd = (zgd_t *)kmem_alloc(sizeof (zgd_t), KM_SLEEP); 1109 zgd->zgd_rl = rl; 1110 zgd->zgd_zilog = zfsvfs->z_log; 1111 zgd->zgd_bp = &lr->lr_blkptr; 1112 VERIFY(0 == dmu_buf_hold(os, lr->lr_foid, boff, zgd, &db)); 1113 ASSERT(boff == db->db_offset); 1114 lr->lr_blkoff = off - boff; 1115 error = dmu_sync(zio, db, &lr->lr_blkptr, 1116 lr->lr_common.lrc_txg, zfs_get_done, zgd); 1117 ASSERT((error && error != EINPROGRESS) || 1118 lr->lr_length <= zp->z_blksz); 1119 if (error == 0) { 1120 /* 1121 * dmu_sync() can compress a block of zeros to a null 1122 * blkptr but the block size still needs to be passed 1123 * through to replay. 1124 */ 1125 BP_SET_LSIZE(&lr->lr_blkptr, db->db_size); 1126 zil_add_block(zfsvfs->z_log, &lr->lr_blkptr); 1127 } 1128 1129 /* 1130 * If we get EINPROGRESS, then we need to wait for a 1131 * write IO initiated by dmu_sync() to complete before 1132 * we can release this dbuf. We will finish everything 1133 * up in the zfs_get_done() callback. 1134 */ 1135 if (error == EINPROGRESS) { 1136 return (0); 1137 } else if (error == EALREADY) { 1138 lr->lr_common.lrc_txtype = TX_WRITE2; 1139 error = 0; 1140 } 1141 dmu_buf_rele(db, zgd); 1142 kmem_free(zgd, sizeof (zgd_t)); 1143 } 1144out: 1145 zfs_range_unlock(rl); 1146 /* 1147 * Release the vnode asynchronously as we currently have the 1148 * txg stopped from syncing. 1149 */ 1150 VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1151 return (error); 1152} 1153 1154/*ARGSUSED*/ 1155static int 1156zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1157 caller_context_t *ct) 1158{ 1159 znode_t *zp = VTOZ(vp); 1160 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1161 int error; 1162 1163 ZFS_ENTER(zfsvfs); 1164 ZFS_VERIFY_ZP(zp); 1165 1166 if (flag & V_ACE_MASK) 1167 error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1168 else 1169 error = zfs_zaccess_rwx(zp, mode, flag, cr); 1170 1171 ZFS_EXIT(zfsvfs); 1172 return (error); 1173} 1174 1175/* 1176 * If vnode is for a device return a specfs vnode instead. 1177 */ 1178static int 1179specvp_check(vnode_t **vpp, cred_t *cr) 1180{ 1181 int error = 0; 1182 1183 if (IS_DEVVP(*vpp)) { 1184 struct vnode *svp; 1185 1186 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1187 VN_RELE(*vpp); 1188 if (svp == NULL) 1189 error = ENOSYS; 1190 *vpp = svp; 1191 } 1192 return (error); 1193} 1194 1195 1196/* 1197 * Lookup an entry in a directory, or an extended attribute directory. 1198 * If it exists, return a held vnode reference for it. 1199 * 1200 * IN: dvp - vnode of directory to search. 1201 * nm - name of entry to lookup. 1202 * pnp - full pathname to lookup [UNUSED]. 1203 * flags - LOOKUP_XATTR set if looking for an attribute. 1204 * rdir - root directory vnode [UNUSED]. 1205 * cr - credentials of caller. 1206 * ct - caller context 1207 * direntflags - directory lookup flags 1208 * realpnp - returned pathname. 1209 * 1210 * OUT: vpp - vnode of located entry, NULL if not found. 1211 * 1212 * RETURN: 0 if success 1213 * error code if failure 1214 * 1215 * Timestamps: 1216 * NA 1217 */ 1218/* ARGSUSED */ 1219static int 1220zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1221 int nameiop, cred_t *cr, kthread_t *td, int flags) 1222{ 1223 znode_t *zdp = VTOZ(dvp); 1224 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1225 int error = 0; 1226 int *direntflags = NULL; 1227 void *realpnp = NULL; 1228 1229 /* fast path */ 1230 if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 1231 1232 if (dvp->v_type != VDIR) { 1233 return (ENOTDIR); 1234 } else if (zdp->z_dbuf == NULL) { 1235 return (EIO); 1236 } 1237 1238 if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1239 error = zfs_fastaccesschk_execute(zdp, cr); 1240 if (!error) { 1241 *vpp = dvp; 1242 VN_HOLD(*vpp); 1243 return (0); 1244 } 1245 return (error); 1246 } else { 1247 vnode_t *tvp = dnlc_lookup(dvp, nm); 1248 1249 if (tvp) { 1250 error = zfs_fastaccesschk_execute(zdp, cr); 1251 if (error) { 1252 VN_RELE(tvp); 1253 return (error); 1254 } 1255 if (tvp == DNLC_NO_VNODE) { 1256 VN_RELE(tvp); 1257 return (ENOENT); 1258 } else { 1259 *vpp = tvp; 1260 return (specvp_check(vpp, cr)); 1261 } 1262 } 1263 } 1264 } 1265 1266 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1267 1268 ZFS_ENTER(zfsvfs); 1269 ZFS_VERIFY_ZP(zdp); 1270 1271 *vpp = NULL; 1272 1273 if (flags & LOOKUP_XATTR) { 1274#ifdef TODO 1275 /* 1276 * If the xattr property is off, refuse the lookup request. 1277 */ 1278 if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1279 ZFS_EXIT(zfsvfs); 1280 return (EINVAL); 1281 } 1282#endif 1283 1284 /* 1285 * We don't allow recursive attributes.. 1286 * Maybe someday we will. 1287 */ 1288 if (zdp->z_phys->zp_flags & ZFS_XATTR) { 1289 ZFS_EXIT(zfsvfs); 1290 return (EINVAL); 1291 } 1292 1293 if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1294 ZFS_EXIT(zfsvfs); 1295 return (error); 1296 } 1297 1298 /* 1299 * Do we have permission to get into attribute directory? 1300 */ 1301 1302 if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1303 B_FALSE, cr)) { 1304 VN_RELE(*vpp); 1305 *vpp = NULL; 1306 } 1307 1308 ZFS_EXIT(zfsvfs); 1309 return (error); 1310 } 1311 1312 if (dvp->v_type != VDIR) { 1313 ZFS_EXIT(zfsvfs); 1314 return (ENOTDIR); 1315 } 1316 1317 /* 1318 * Check accessibility of directory. 1319 */ 1320 1321 if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1322 ZFS_EXIT(zfsvfs); 1323 return (error); 1324 } 1325 1326 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1327 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1328 ZFS_EXIT(zfsvfs); 1329 return (EILSEQ); 1330 } 1331 1332 error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1333 if (error == 0) 1334 error = specvp_check(vpp, cr); 1335 1336 /* Translate errors and add SAVENAME when needed. */ 1337 if (cnp->cn_flags & ISLASTCN) { 1338 switch (nameiop) { 1339 case CREATE: 1340 case RENAME: 1341 if (error == ENOENT) { 1342 error = EJUSTRETURN; 1343 cnp->cn_flags |= SAVENAME; 1344 break; 1345 } 1346 /* FALLTHROUGH */ 1347 case DELETE: 1348 if (error == 0) 1349 cnp->cn_flags |= SAVENAME; 1350 break; 1351 } 1352 } 1353 if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) { 1354 int ltype = 0; 1355 1356 if (cnp->cn_flags & ISDOTDOT) { 1357 ltype = VOP_ISLOCKED(dvp); 1358 VOP_UNLOCK(dvp, 0); 1359 } 1360 ZFS_EXIT(zfsvfs); 1361 error = vn_lock(*vpp, cnp->cn_lkflags); 1362 if (cnp->cn_flags & ISDOTDOT) 1363 vn_lock(dvp, ltype | LK_RETRY); 1364 if (error != 0) { 1365 VN_RELE(*vpp); 1366 *vpp = NULL; 1367 return (error); 1368 } 1369 } else { 1370 ZFS_EXIT(zfsvfs); 1371 } 1372 1373#ifdef FREEBSD_NAMECACHE 1374 /* 1375 * Insert name into cache (as non-existent) if appropriate. 1376 */ 1377 if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 1378 cache_enter(dvp, *vpp, cnp); 1379 /* 1380 * Insert name into cache if appropriate. 1381 */ 1382 if (error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1383 if (!(cnp->cn_flags & ISLASTCN) || 1384 (nameiop != DELETE && nameiop != RENAME)) { 1385 cache_enter(dvp, *vpp, cnp); 1386 } 1387 } 1388#endif 1389 1390 return (error); 1391} 1392 1393/* 1394 * Attempt to create a new entry in a directory. If the entry 1395 * already exists, truncate the file if permissible, else return 1396 * an error. Return the vp of the created or trunc'd file. 1397 * 1398 * IN: dvp - vnode of directory to put new file entry in. 1399 * name - name of new file entry. 1400 * vap - attributes of new file. 1401 * excl - flag indicating exclusive or non-exclusive mode. 1402 * mode - mode to open file with. 1403 * cr - credentials of caller. 1404 * flag - large file flag [UNUSED]. 1405 * ct - caller context 1406 * vsecp - ACL to be set 1407 * 1408 * OUT: vpp - vnode of created or trunc'd entry. 1409 * 1410 * RETURN: 0 if success 1411 * error code if failure 1412 * 1413 * Timestamps: 1414 * dvp - ctime|mtime updated if new entry created 1415 * vp - ctime|mtime always, atime if new 1416 */ 1417 1418/* ARGSUSED */ 1419static int 1420zfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 1421 vnode_t **vpp, cred_t *cr, kthread_t *td) 1422{ 1423 znode_t *zp, *dzp = VTOZ(dvp); 1424 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1425 zilog_t *zilog; 1426 objset_t *os; 1427 zfs_dirlock_t *dl; 1428 dmu_tx_t *tx; 1429 int error; 1430 ksid_t *ksid; 1431 uid_t uid; 1432 gid_t gid = crgetgid(cr); 1433 zfs_acl_ids_t acl_ids; 1434 boolean_t fuid_dirtied; 1435 void *vsecp = NULL; 1436 int flag = 0; 1437 1438 /* 1439 * If we have an ephemeral id, ACL, or XVATTR then 1440 * make sure file system is at proper version 1441 */ 1442 1443 ksid = crgetsid(cr, KSID_OWNER); 1444 if (ksid) 1445 uid = ksid_getid(ksid); 1446 else 1447 uid = crgetuid(cr); 1448 if (zfsvfs->z_use_fuids == B_FALSE && 1449 (vsecp || (vap->va_mask & AT_XVATTR) || 1450 IS_EPHEMERAL(crgetuid(cr)) || IS_EPHEMERAL(crgetgid(cr)))) 1451 return (EINVAL); 1452 1453 ZFS_ENTER(zfsvfs); 1454 ZFS_VERIFY_ZP(dzp); 1455 os = zfsvfs->z_os; 1456 zilog = zfsvfs->z_log; 1457 1458 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1459 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1460 ZFS_EXIT(zfsvfs); 1461 return (EILSEQ); 1462 } 1463 1464 if (vap->va_mask & AT_XVATTR) { 1465 if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1466 crgetuid(cr), cr, vap->va_type)) != 0) { 1467 ZFS_EXIT(zfsvfs); 1468 return (error); 1469 } 1470 } 1471top: 1472 *vpp = NULL; 1473 1474 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1475 vap->va_mode &= ~S_ISVTX; 1476 1477 if (*name == '\0') { 1478 /* 1479 * Null component name refers to the directory itself. 1480 */ 1481 VN_HOLD(dvp); 1482 zp = dzp; 1483 dl = NULL; 1484 error = 0; 1485 } else { 1486 /* possible VN_HOLD(zp) */ 1487 int zflg = 0; 1488 1489 if (flag & FIGNORECASE) 1490 zflg |= ZCILOOK; 1491 1492 error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1493 NULL, NULL); 1494 if (error) { 1495 if (strcmp(name, "..") == 0) 1496 error = EISDIR; 1497 ZFS_EXIT(zfsvfs); 1498 return (error); 1499 } 1500 } 1501 if (zp == NULL) { 1502 uint64_t txtype; 1503 1504 /* 1505 * Create a new file object and update the directory 1506 * to reference it. 1507 */ 1508 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1509 goto out; 1510 } 1511 1512 /* 1513 * We only support the creation of regular files in 1514 * extended attribute directories. 1515 */ 1516 if ((dzp->z_phys->zp_flags & ZFS_XATTR) && 1517 (vap->va_type != VREG)) { 1518 error = EINVAL; 1519 goto out; 1520 } 1521 1522 1523 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, 1524 &acl_ids)) != 0) 1525 goto out; 1526 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1527 zfs_acl_ids_free(&acl_ids); 1528 error = EDQUOT; 1529 goto out; 1530 } 1531 1532 tx = dmu_tx_create(os); 1533 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1534 fuid_dirtied = zfsvfs->z_fuid_dirty; 1535 if (fuid_dirtied) 1536 zfs_fuid_txhold(zfsvfs, tx); 1537 dmu_tx_hold_bonus(tx, dzp->z_id); 1538 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1539 if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1540 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1541 0, SPA_MAXBLOCKSIZE); 1542 } 1543 error = dmu_tx_assign(tx, TXG_NOWAIT); 1544 if (error) { 1545 zfs_acl_ids_free(&acl_ids); 1546 zfs_dirent_unlock(dl); 1547 if (error == ERESTART) { 1548 dmu_tx_wait(tx); 1549 dmu_tx_abort(tx); 1550 goto top; 1551 } 1552 dmu_tx_abort(tx); 1553 ZFS_EXIT(zfsvfs); 1554 return (error); 1555 } 1556 zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); 1557 1558 if (fuid_dirtied) 1559 zfs_fuid_sync(zfsvfs, tx); 1560 1561 (void) zfs_link_create(dl, zp, tx, ZNEW); 1562 1563 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1564 if (flag & FIGNORECASE) 1565 txtype |= TX_CI; 1566 zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1567 vsecp, acl_ids.z_fuidp, vap); 1568 zfs_acl_ids_free(&acl_ids); 1569 dmu_tx_commit(tx); 1570 } else { 1571 int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1572 1573 /* 1574 * A directory entry already exists for this name. 1575 */ 1576 /* 1577 * Can't truncate an existing file if in exclusive mode. 1578 */ 1579 if (excl == EXCL) { 1580 error = EEXIST; 1581 goto out; 1582 } 1583 /* 1584 * Can't open a directory for writing. 1585 */ 1586 if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1587 error = EISDIR; 1588 goto out; 1589 } 1590 /* 1591 * Verify requested access to file. 1592 */ 1593 if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1594 goto out; 1595 } 1596 1597 mutex_enter(&dzp->z_lock); 1598 dzp->z_seq++; 1599 mutex_exit(&dzp->z_lock); 1600 1601 /* 1602 * Truncate regular files if requested. 1603 */ 1604 if ((ZTOV(zp)->v_type == VREG) && 1605 (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1606 /* we can't hold any locks when calling zfs_freesp() */ 1607 zfs_dirent_unlock(dl); 1608 dl = NULL; 1609 error = zfs_freesp(zp, 0, 0, mode, TRUE); 1610 if (error == 0) { 1611 vnevent_create(ZTOV(zp), ct); 1612 } 1613 } 1614 } 1615out: 1616 if (dl) 1617 zfs_dirent_unlock(dl); 1618 1619 if (error) { 1620 if (zp) 1621 VN_RELE(ZTOV(zp)); 1622 } else { 1623 *vpp = ZTOV(zp); 1624 error = specvp_check(vpp, cr); 1625 } 1626 1627 ZFS_EXIT(zfsvfs); 1628 return (error); 1629} 1630 1631/* 1632 * Remove an entry from a directory. 1633 * 1634 * IN: dvp - vnode of directory to remove entry from. 1635 * name - name of entry to remove. 1636 * cr - credentials of caller. 1637 * ct - caller context 1638 * flags - case flags 1639 * 1640 * RETURN: 0 if success 1641 * error code if failure 1642 * 1643 * Timestamps: 1644 * dvp - ctime|mtime 1645 * vp - ctime (if nlink > 0) 1646 */ 1647/*ARGSUSED*/ 1648static int 1649zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1650 int flags) 1651{ 1652 znode_t *zp, *dzp = VTOZ(dvp); 1653 znode_t *xzp = NULL; 1654 vnode_t *vp; 1655 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1656 zilog_t *zilog; 1657 uint64_t acl_obj, xattr_obj; 1658 zfs_dirlock_t *dl; 1659 dmu_tx_t *tx; 1660 boolean_t may_delete_now, delete_now = FALSE; 1661 boolean_t unlinked, toobig = FALSE; 1662 uint64_t txtype; 1663 pathname_t *realnmp = NULL; 1664 pathname_t realnm; 1665 int error; 1666 int zflg = ZEXISTS; 1667 1668 ZFS_ENTER(zfsvfs); 1669 ZFS_VERIFY_ZP(dzp); 1670 zilog = zfsvfs->z_log; 1671 1672 if (flags & FIGNORECASE) { 1673 zflg |= ZCILOOK; 1674 pn_alloc(&realnm); 1675 realnmp = &realnm; 1676 } 1677 1678top: 1679 /* 1680 * Attempt to lock directory; fail if entry doesn't exist. 1681 */ 1682 if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1683 NULL, realnmp)) { 1684 if (realnmp) 1685 pn_free(realnmp); 1686 ZFS_EXIT(zfsvfs); 1687 return (error); 1688 } 1689 1690 vp = ZTOV(zp); 1691 1692 if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1693 goto out; 1694 } 1695 1696 /* 1697 * Need to use rmdir for removing directories. 1698 */ 1699 if (vp->v_type == VDIR) { 1700 error = EPERM; 1701 goto out; 1702 } 1703 1704 vnevent_remove(vp, dvp, name, ct); 1705 1706 if (realnmp) 1707 dnlc_remove(dvp, realnmp->pn_buf); 1708 else 1709 dnlc_remove(dvp, name); 1710 1711 may_delete_now = FALSE; 1712 1713 /* 1714 * We may delete the znode now, or we may put it in the unlinked set; 1715 * it depends on whether we're the last link, and on whether there are 1716 * other holds on the vnode. So we dmu_tx_hold() the right things to 1717 * allow for either case. 1718 */ 1719 tx = dmu_tx_create(zfsvfs->z_os); 1720 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1721 dmu_tx_hold_bonus(tx, zp->z_id); 1722 if (may_delete_now) { 1723 toobig = 1724 zp->z_phys->zp_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1725 /* if the file is too big, only hold_free a token amount */ 1726 dmu_tx_hold_free(tx, zp->z_id, 0, 1727 (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1728 } 1729 1730 /* are there any extended attributes? */ 1731 if ((xattr_obj = zp->z_phys->zp_xattr) != 0) { 1732 /* XXX - do we need this if we are deleting? */ 1733 dmu_tx_hold_bonus(tx, xattr_obj); 1734 } 1735 1736 /* are there any additional acls */ 1737 if ((acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj) != 0 && 1738 may_delete_now) 1739 dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 1740 1741 /* charge as an update -- would be nice not to charge at all */ 1742 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1743 1744 error = dmu_tx_assign(tx, TXG_NOWAIT); 1745 if (error) { 1746 zfs_dirent_unlock(dl); 1747 VN_RELE(vp); 1748 if (error == ERESTART) { 1749 dmu_tx_wait(tx); 1750 dmu_tx_abort(tx); 1751 goto top; 1752 } 1753 if (realnmp) 1754 pn_free(realnmp); 1755 dmu_tx_abort(tx); 1756 ZFS_EXIT(zfsvfs); 1757 return (error); 1758 } 1759 1760 /* 1761 * Remove the directory entry. 1762 */ 1763 error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1764 1765 if (error) { 1766 dmu_tx_commit(tx); 1767 goto out; 1768 } 1769 1770 if (0 && unlinked) { 1771 VI_LOCK(vp); 1772 delete_now = may_delete_now && !toobig && 1773 vp->v_count == 1 && !vn_has_cached_data(vp) && 1774 zp->z_phys->zp_xattr == xattr_obj && 1775 zp->z_phys->zp_acl.z_acl_extern_obj == acl_obj; 1776 VI_UNLOCK(vp); 1777 } 1778 1779 if (delete_now) { 1780 if (zp->z_phys->zp_xattr) { 1781 error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp); 1782 ASSERT3U(error, ==, 0); 1783 ASSERT3U(xzp->z_phys->zp_links, ==, 2); 1784 dmu_buf_will_dirty(xzp->z_dbuf, tx); 1785 mutex_enter(&xzp->z_lock); 1786 xzp->z_unlinked = 1; 1787 xzp->z_phys->zp_links = 0; 1788 mutex_exit(&xzp->z_lock); 1789 zfs_unlinked_add(xzp, tx); 1790 zp->z_phys->zp_xattr = 0; /* probably unnecessary */ 1791 } 1792 mutex_enter(&zp->z_lock); 1793 VI_LOCK(vp); 1794 vp->v_count--; 1795 ASSERT3U(vp->v_count, ==, 0); 1796 VI_UNLOCK(vp); 1797 mutex_exit(&zp->z_lock); 1798 zfs_znode_delete(zp, tx); 1799 } else if (unlinked) { 1800 zfs_unlinked_add(zp, tx); 1801 } 1802 1803 txtype = TX_REMOVE; 1804 if (flags & FIGNORECASE) 1805 txtype |= TX_CI; 1806 zfs_log_remove(zilog, tx, txtype, dzp, name); 1807 1808 dmu_tx_commit(tx); 1809out: 1810 if (realnmp) 1811 pn_free(realnmp); 1812 1813 zfs_dirent_unlock(dl); 1814 1815 if (!delete_now) { 1816 VN_RELE(vp); 1817 } else if (xzp) { 1818 /* this rele is delayed to prevent nesting transactions */ 1819 VN_RELE(ZTOV(xzp)); 1820 } 1821 1822 ZFS_EXIT(zfsvfs); 1823 return (error); 1824} 1825 1826/* 1827 * Create a new directory and insert it into dvp using the name 1828 * provided. Return a pointer to the inserted directory. 1829 * 1830 * IN: dvp - vnode of directory to add subdir to. 1831 * dirname - name of new directory. 1832 * vap - attributes of new directory. 1833 * cr - credentials of caller. 1834 * ct - caller context 1835 * vsecp - ACL to be set 1836 * 1837 * OUT: vpp - vnode of created directory. 1838 * 1839 * RETURN: 0 if success 1840 * error code if failure 1841 * 1842 * Timestamps: 1843 * dvp - ctime|mtime updated 1844 * vp - ctime|mtime|atime updated 1845 */ 1846/*ARGSUSED*/ 1847static int 1848zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 1849 caller_context_t *ct, int flags, vsecattr_t *vsecp) 1850{ 1851 znode_t *zp, *dzp = VTOZ(dvp); 1852 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1853 zilog_t *zilog; 1854 zfs_dirlock_t *dl; 1855 uint64_t txtype; 1856 dmu_tx_t *tx; 1857 int error; 1858 int zf = ZNEW; 1859 ksid_t *ksid; 1860 uid_t uid; 1861 gid_t gid = crgetgid(cr); 1862 zfs_acl_ids_t acl_ids; 1863 boolean_t fuid_dirtied; 1864 1865 ASSERT(vap->va_type == VDIR); 1866 1867 /* 1868 * If we have an ephemeral id, ACL, or XVATTR then 1869 * make sure file system is at proper version 1870 */ 1871 1872 ksid = crgetsid(cr, KSID_OWNER); 1873 if (ksid) 1874 uid = ksid_getid(ksid); 1875 else 1876 uid = crgetuid(cr); 1877 if (zfsvfs->z_use_fuids == B_FALSE && 1878 (vsecp || (vap->va_mask & AT_XVATTR) || IS_EPHEMERAL(crgetuid(cr))|| 1879 IS_EPHEMERAL(crgetgid(cr)))) 1880 return (EINVAL); 1881 1882 ZFS_ENTER(zfsvfs); 1883 ZFS_VERIFY_ZP(dzp); 1884 zilog = zfsvfs->z_log; 1885 1886 if (dzp->z_phys->zp_flags & ZFS_XATTR) { 1887 ZFS_EXIT(zfsvfs); 1888 return (EINVAL); 1889 } 1890 1891 if (zfsvfs->z_utf8 && u8_validate(dirname, 1892 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1893 ZFS_EXIT(zfsvfs); 1894 return (EILSEQ); 1895 } 1896 if (flags & FIGNORECASE) 1897 zf |= ZCILOOK; 1898 1899 if (vap->va_mask & AT_XVATTR) 1900 if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1901 crgetuid(cr), cr, vap->va_type)) != 0) { 1902 ZFS_EXIT(zfsvfs); 1903 return (error); 1904 } 1905 1906 /* 1907 * First make sure the new directory doesn't exist. 1908 */ 1909top: 1910 *vpp = NULL; 1911 1912 if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 1913 NULL, NULL)) { 1914 ZFS_EXIT(zfsvfs); 1915 return (error); 1916 } 1917 1918 if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 1919 zfs_dirent_unlock(dl); 1920 ZFS_EXIT(zfsvfs); 1921 return (error); 1922 } 1923 1924 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, 1925 &acl_ids)) != 0) { 1926 zfs_dirent_unlock(dl); 1927 ZFS_EXIT(zfsvfs); 1928 return (error); 1929 } 1930 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1931 zfs_acl_ids_free(&acl_ids); 1932 zfs_dirent_unlock(dl); 1933 ZFS_EXIT(zfsvfs); 1934 return (EDQUOT); 1935 } 1936 1937 /* 1938 * Add a new entry to the directory. 1939 */ 1940 tx = dmu_tx_create(zfsvfs->z_os); 1941 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 1942 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 1943 fuid_dirtied = zfsvfs->z_fuid_dirty; 1944 if (fuid_dirtied) 1945 zfs_fuid_txhold(zfsvfs, tx); 1946 if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) 1947 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1948 0, SPA_MAXBLOCKSIZE); 1949 error = dmu_tx_assign(tx, TXG_NOWAIT); 1950 if (error) { 1951 zfs_acl_ids_free(&acl_ids); 1952 zfs_dirent_unlock(dl); 1953 if (error == ERESTART) { 1954 dmu_tx_wait(tx); 1955 dmu_tx_abort(tx); 1956 goto top; 1957 } 1958 dmu_tx_abort(tx); 1959 ZFS_EXIT(zfsvfs); 1960 return (error); 1961 } 1962 1963 /* 1964 * Create new node. 1965 */ 1966 zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); 1967 1968 if (fuid_dirtied) 1969 zfs_fuid_sync(zfsvfs, tx); 1970 /* 1971 * Now put new name in parent dir. 1972 */ 1973 (void) zfs_link_create(dl, zp, tx, ZNEW); 1974 1975 *vpp = ZTOV(zp); 1976 1977 txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 1978 if (flags & FIGNORECASE) 1979 txtype |= TX_CI; 1980 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 1981 acl_ids.z_fuidp, vap); 1982 1983 zfs_acl_ids_free(&acl_ids); 1984 dmu_tx_commit(tx); 1985 1986 zfs_dirent_unlock(dl); 1987 1988 ZFS_EXIT(zfsvfs); 1989 return (0); 1990} 1991 1992/* 1993 * Remove a directory subdir entry. If the current working 1994 * directory is the same as the subdir to be removed, the 1995 * remove will fail. 1996 * 1997 * IN: dvp - vnode of directory to remove from. 1998 * name - name of directory to be removed. 1999 * cwd - vnode of current working directory. 2000 * cr - credentials of caller. 2001 * ct - caller context 2002 * flags - case flags 2003 * 2004 * RETURN: 0 if success 2005 * error code if failure 2006 * 2007 * Timestamps: 2008 * dvp - ctime|mtime updated 2009 */ 2010/*ARGSUSED*/ 2011static int 2012zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 2013 caller_context_t *ct, int flags) 2014{ 2015 znode_t *dzp = VTOZ(dvp); 2016 znode_t *zp; 2017 vnode_t *vp; 2018 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2019 zilog_t *zilog; 2020 zfs_dirlock_t *dl; 2021 dmu_tx_t *tx; 2022 int error; 2023 int zflg = ZEXISTS; 2024 2025 ZFS_ENTER(zfsvfs); 2026 ZFS_VERIFY_ZP(dzp); 2027 zilog = zfsvfs->z_log; 2028 2029 if (flags & FIGNORECASE) 2030 zflg |= ZCILOOK; 2031top: 2032 zp = NULL; 2033 2034 /* 2035 * Attempt to lock directory; fail if entry doesn't exist. 2036 */ 2037 if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 2038 NULL, NULL)) { 2039 ZFS_EXIT(zfsvfs); 2040 return (error); 2041 } 2042 2043 vp = ZTOV(zp); 2044 2045 if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2046 goto out; 2047 } 2048 2049 if (vp->v_type != VDIR) { 2050 error = ENOTDIR; 2051 goto out; 2052 } 2053 2054 if (vp == cwd) { 2055 error = EINVAL; 2056 goto out; 2057 } 2058 2059 vnevent_rmdir(vp, dvp, name, ct); 2060 2061 /* 2062 * Grab a lock on the directory to make sure that noone is 2063 * trying to add (or lookup) entries while we are removing it. 2064 */ 2065 rw_enter(&zp->z_name_lock, RW_WRITER); 2066 2067 /* 2068 * Grab a lock on the parent pointer to make sure we play well 2069 * with the treewalk and directory rename code. 2070 */ 2071 rw_enter(&zp->z_parent_lock, RW_WRITER); 2072 2073 tx = dmu_tx_create(zfsvfs->z_os); 2074 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2075 dmu_tx_hold_bonus(tx, zp->z_id); 2076 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2077 error = dmu_tx_assign(tx, TXG_NOWAIT); 2078 if (error) { 2079 rw_exit(&zp->z_parent_lock); 2080 rw_exit(&zp->z_name_lock); 2081 zfs_dirent_unlock(dl); 2082 VN_RELE(vp); 2083 if (error == ERESTART) { 2084 dmu_tx_wait(tx); 2085 dmu_tx_abort(tx); 2086 goto top; 2087 } 2088 dmu_tx_abort(tx); 2089 ZFS_EXIT(zfsvfs); 2090 return (error); 2091 } 2092 2093#ifdef FREEBSD_NAMECACHE 2094 cache_purge(dvp); 2095#endif 2096 2097 error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 2098 2099 if (error == 0) { 2100 uint64_t txtype = TX_RMDIR; 2101 if (flags & FIGNORECASE) 2102 txtype |= TX_CI; 2103 zfs_log_remove(zilog, tx, txtype, dzp, name); 2104 } 2105 2106 dmu_tx_commit(tx); 2107 2108 rw_exit(&zp->z_parent_lock); 2109 rw_exit(&zp->z_name_lock); 2110#ifdef FREEBSD_NAMECACHE 2111 cache_purge(vp); 2112#endif 2113out: 2114 zfs_dirent_unlock(dl); 2115 2116 VN_RELE(vp); 2117 2118 ZFS_EXIT(zfsvfs); 2119 return (error); 2120} 2121 2122/* 2123 * Read as many directory entries as will fit into the provided 2124 * buffer from the given directory cursor position (specified in 2125 * the uio structure. 2126 * 2127 * IN: vp - vnode of directory to read. 2128 * uio - structure supplying read location, range info, 2129 * and return buffer. 2130 * cr - credentials of caller. 2131 * ct - caller context 2132 * flags - case flags 2133 * 2134 * OUT: uio - updated offset and range, buffer filled. 2135 * eofp - set to true if end-of-file detected. 2136 * 2137 * RETURN: 0 if success 2138 * error code if failure 2139 * 2140 * Timestamps: 2141 * vp - atime updated 2142 * 2143 * Note that the low 4 bits of the cookie returned by zap is always zero. 2144 * This allows us to use the low range for "special" directory entries: 2145 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2146 * we use the offset 2 for the '.zfs' directory. 2147 */ 2148/* ARGSUSED */ 2149static int 2150zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) 2151{ 2152 znode_t *zp = VTOZ(vp); 2153 iovec_t *iovp; 2154 edirent_t *eodp; 2155 dirent64_t *odp; 2156 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2157 objset_t *os; 2158 caddr_t outbuf; 2159 size_t bufsize; 2160 zap_cursor_t zc; 2161 zap_attribute_t zap; 2162 uint_t bytes_wanted; 2163 uint64_t offset; /* must be unsigned; checks for < 1 */ 2164 int local_eof; 2165 int outcount; 2166 int error; 2167 uint8_t prefetch; 2168 boolean_t check_sysattrs; 2169 uint8_t type; 2170 int ncooks; 2171 u_long *cooks = NULL; 2172 int flags = 0; 2173 2174 ZFS_ENTER(zfsvfs); 2175 ZFS_VERIFY_ZP(zp); 2176 2177 /* 2178 * If we are not given an eof variable, 2179 * use a local one. 2180 */ 2181 if (eofp == NULL) 2182 eofp = &local_eof; 2183 2184 /* 2185 * Check for valid iov_len. 2186 */ 2187 if (uio->uio_iov->iov_len <= 0) { 2188 ZFS_EXIT(zfsvfs); 2189 return (EINVAL); 2190 } 2191 2192 /* 2193 * Quit if directory has been removed (posix) 2194 */ 2195 if ((*eofp = zp->z_unlinked) != 0) { 2196 ZFS_EXIT(zfsvfs); 2197 return (0); 2198 } 2199 2200 error = 0; 2201 os = zfsvfs->z_os; 2202 offset = uio->uio_loffset; 2203 prefetch = zp->z_zn_prefetch; 2204 2205 /* 2206 * Initialize the iterator cursor. 2207 */ 2208 if (offset <= 3) { 2209 /* 2210 * Start iteration from the beginning of the directory. 2211 */ 2212 zap_cursor_init(&zc, os, zp->z_id); 2213 } else { 2214 /* 2215 * The offset is a serialized cursor. 2216 */ 2217 zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2218 } 2219 2220 /* 2221 * Get space to change directory entries into fs independent format. 2222 */ 2223 iovp = uio->uio_iov; 2224 bytes_wanted = iovp->iov_len; 2225 if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2226 bufsize = bytes_wanted; 2227 outbuf = kmem_alloc(bufsize, KM_SLEEP); 2228 odp = (struct dirent64 *)outbuf; 2229 } else { 2230 bufsize = bytes_wanted; 2231 odp = (struct dirent64 *)iovp->iov_base; 2232 } 2233 eodp = (struct edirent *)odp; 2234 2235 if (ncookies != NULL) { 2236 /* 2237 * Minimum entry size is dirent size and 1 byte for a file name. 2238 */ 2239 ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2240 cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2241 *cookies = cooks; 2242 *ncookies = ncooks; 2243 } 2244 /* 2245 * If this VFS supports the system attribute view interface; and 2246 * we're looking at an extended attribute directory; and we care 2247 * about normalization conflicts on this vfs; then we must check 2248 * for normalization conflicts with the sysattr name space. 2249 */ 2250#ifdef TODO 2251 check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2252 (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2253 (flags & V_RDDIR_ENTFLAGS); 2254#else 2255 check_sysattrs = 0; 2256#endif 2257 2258 /* 2259 * Transform to file-system independent format 2260 */ 2261 outcount = 0; 2262 while (outcount < bytes_wanted) { 2263 ino64_t objnum; 2264 ushort_t reclen; 2265 off64_t *next; 2266 2267 /* 2268 * Special case `.', `..', and `.zfs'. 2269 */ 2270 if (offset == 0) { 2271 (void) strcpy(zap.za_name, "."); 2272 zap.za_normalization_conflict = 0; 2273 objnum = zp->z_id; 2274 type = DT_DIR; 2275 } else if (offset == 1) { 2276 (void) strcpy(zap.za_name, ".."); 2277 zap.za_normalization_conflict = 0; 2278 objnum = zp->z_phys->zp_parent; 2279 type = DT_DIR; 2280 } else if (offset == 2 && zfs_show_ctldir(zp)) { 2281 (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2282 zap.za_normalization_conflict = 0; 2283 objnum = ZFSCTL_INO_ROOT; 2284 type = DT_DIR; 2285 } else { 2286 /* 2287 * Grab next entry. 2288 */ 2289 if (error = zap_cursor_retrieve(&zc, &zap)) { 2290 if ((*eofp = (error == ENOENT)) != 0) 2291 break; 2292 else 2293 goto update; 2294 } 2295 2296 if (zap.za_integer_length != 8 || 2297 zap.za_num_integers != 1) { 2298 cmn_err(CE_WARN, "zap_readdir: bad directory " 2299 "entry, obj = %lld, offset = %lld\n", 2300 (u_longlong_t)zp->z_id, 2301 (u_longlong_t)offset); 2302 error = ENXIO; 2303 goto update; 2304 } 2305 2306 objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2307 /* 2308 * MacOS X can extract the object type here such as: 2309 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2310 */ 2311 type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2312 2313 if (check_sysattrs && !zap.za_normalization_conflict) { 2314#ifdef TODO 2315 zap.za_normalization_conflict = 2316 xattr_sysattr_casechk(zap.za_name); 2317#else 2318 panic("%s:%u: TODO", __func__, __LINE__); 2319#endif 2320 } 2321 } 2322 2323 if (flags & V_RDDIR_ACCFILTER) { 2324 /* 2325 * If we have no access at all, don't include 2326 * this entry in the returned information 2327 */ 2328 znode_t *ezp; 2329 if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2330 goto skip_entry; 2331 if (!zfs_has_access(ezp, cr)) { 2332 VN_RELE(ZTOV(ezp)); 2333 goto skip_entry; 2334 } 2335 VN_RELE(ZTOV(ezp)); 2336 } 2337 2338 if (flags & V_RDDIR_ENTFLAGS) 2339 reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2340 else 2341 reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2342 2343 /* 2344 * Will this entry fit in the buffer? 2345 */ 2346 if (outcount + reclen > bufsize) { 2347 /* 2348 * Did we manage to fit anything in the buffer? 2349 */ 2350 if (!outcount) { 2351 error = EINVAL; 2352 goto update; 2353 } 2354 break; 2355 } 2356 if (flags & V_RDDIR_ENTFLAGS) { 2357 /* 2358 * Add extended flag entry: 2359 */ 2360 eodp->ed_ino = objnum; 2361 eodp->ed_reclen = reclen; 2362 /* NOTE: ed_off is the offset for the *next* entry */ 2363 next = &(eodp->ed_off); 2364 eodp->ed_eflags = zap.za_normalization_conflict ? 2365 ED_CASE_CONFLICT : 0; 2366 (void) strncpy(eodp->ed_name, zap.za_name, 2367 EDIRENT_NAMELEN(reclen)); 2368 eodp = (edirent_t *)((intptr_t)eodp + reclen); 2369 } else { 2370 /* 2371 * Add normal entry: 2372 */ 2373 odp->d_ino = objnum; 2374 odp->d_reclen = reclen; 2375 odp->d_namlen = strlen(zap.za_name); 2376 (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2377 odp->d_type = type; 2378 odp = (dirent64_t *)((intptr_t)odp + reclen); 2379 } 2380 outcount += reclen; 2381 2382 ASSERT(outcount <= bufsize); 2383 2384 /* Prefetch znode */ 2385 if (prefetch) 2386 dmu_prefetch(os, objnum, 0, 0); 2387 2388 skip_entry: 2389 /* 2390 * Move to the next entry, fill in the previous offset. 2391 */ 2392 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2393 zap_cursor_advance(&zc); 2394 offset = zap_cursor_serialize(&zc); 2395 } else { 2396 offset += 1; 2397 } 2398 2399 if (cooks != NULL) { 2400 *cooks++ = offset; 2401 ncooks--; 2402 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2403 } 2404 } 2405 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2406 2407 /* Subtract unused cookies */ 2408 if (ncookies != NULL) 2409 *ncookies -= ncooks; 2410 2411 if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2412 iovp->iov_base += outcount; 2413 iovp->iov_len -= outcount; 2414 uio->uio_resid -= outcount; 2415 } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2416 /* 2417 * Reset the pointer. 2418 */ 2419 offset = uio->uio_loffset; 2420 } 2421 2422update: 2423 zap_cursor_fini(&zc); 2424 if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2425 kmem_free(outbuf, bufsize); 2426 2427 if (error == ENOENT) 2428 error = 0; 2429 2430 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2431 2432 uio->uio_loffset = offset; 2433 ZFS_EXIT(zfsvfs); 2434 if (error != 0 && cookies != NULL) { 2435 free(*cookies, M_TEMP); 2436 *cookies = NULL; 2437 *ncookies = 0; 2438 } 2439 return (error); 2440} 2441 2442ulong_t zfs_fsync_sync_cnt = 4; 2443 2444static int 2445zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2446{ 2447 znode_t *zp = VTOZ(vp); 2448 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2449 2450 (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2451 2452 ZFS_ENTER(zfsvfs); 2453 ZFS_VERIFY_ZP(zp); 2454 zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); 2455 ZFS_EXIT(zfsvfs); 2456 return (0); 2457} 2458 2459 2460/* 2461 * Get the requested file attributes and place them in the provided 2462 * vattr structure. 2463 * 2464 * IN: vp - vnode of file. 2465 * vap - va_mask identifies requested attributes. 2466 * If AT_XVATTR set, then optional attrs are requested 2467 * flags - ATTR_NOACLCHECK (CIFS server context) 2468 * cr - credentials of caller. 2469 * ct - caller context 2470 * 2471 * OUT: vap - attribute values. 2472 * 2473 * RETURN: 0 (always succeeds) 2474 */ 2475/* ARGSUSED */ 2476static int 2477zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2478 caller_context_t *ct) 2479{ 2480 znode_t *zp = VTOZ(vp); 2481 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2482 znode_phys_t *pzp; 2483 int error = 0; 2484 uint32_t blksize; 2485 u_longlong_t nblocks; 2486 uint64_t links; 2487 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2488 xoptattr_t *xoap = NULL; 2489 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2490 2491 ZFS_ENTER(zfsvfs); 2492 ZFS_VERIFY_ZP(zp); 2493 pzp = zp->z_phys; 2494 2495 /* 2496 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2497 * Also, if we are the owner don't bother, since owner should 2498 * always be allowed to read basic attributes of file. 2499 */ 2500 if (!(pzp->zp_flags & ZFS_ACL_TRIVIAL) && 2501 (pzp->zp_uid != crgetuid(cr))) { 2502 if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2503 skipaclchk, cr)) { 2504 ZFS_EXIT(zfsvfs); 2505 return (error); 2506 } 2507 } 2508 2509 /* 2510 * Return all attributes. It's cheaper to provide the answer 2511 * than to determine whether we were asked the question. 2512 */ 2513 2514 mutex_enter(&zp->z_lock); 2515 vap->va_type = IFTOVT(pzp->zp_mode); 2516 vap->va_mode = pzp->zp_mode & ~S_IFMT; 2517 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2518// vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2519 vap->va_nodeid = zp->z_id; 2520 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2521 links = pzp->zp_links + 1; 2522 else 2523 links = pzp->zp_links; 2524 vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ 2525 vap->va_size = pzp->zp_size; 2526 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 2527 vap->va_rdev = zfs_cmpldev(pzp->zp_rdev); 2528 vap->va_seq = zp->z_seq; 2529 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2530 2531 /* 2532 * Add in any requested optional attributes and the create time. 2533 * Also set the corresponding bits in the returned attribute bitmap. 2534 */ 2535 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2536 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2537 xoap->xoa_archive = 2538 ((pzp->zp_flags & ZFS_ARCHIVE) != 0); 2539 XVA_SET_RTN(xvap, XAT_ARCHIVE); 2540 } 2541 2542 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2543 xoap->xoa_readonly = 2544 ((pzp->zp_flags & ZFS_READONLY) != 0); 2545 XVA_SET_RTN(xvap, XAT_READONLY); 2546 } 2547 2548 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2549 xoap->xoa_system = 2550 ((pzp->zp_flags & ZFS_SYSTEM) != 0); 2551 XVA_SET_RTN(xvap, XAT_SYSTEM); 2552 } 2553 2554 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2555 xoap->xoa_hidden = 2556 ((pzp->zp_flags & ZFS_HIDDEN) != 0); 2557 XVA_SET_RTN(xvap, XAT_HIDDEN); 2558 } 2559 2560 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2561 xoap->xoa_nounlink = 2562 ((pzp->zp_flags & ZFS_NOUNLINK) != 0); 2563 XVA_SET_RTN(xvap, XAT_NOUNLINK); 2564 } 2565 2566 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2567 xoap->xoa_immutable = 2568 ((pzp->zp_flags & ZFS_IMMUTABLE) != 0); 2569 XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2570 } 2571 2572 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2573 xoap->xoa_appendonly = 2574 ((pzp->zp_flags & ZFS_APPENDONLY) != 0); 2575 XVA_SET_RTN(xvap, XAT_APPENDONLY); 2576 } 2577 2578 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2579 xoap->xoa_nodump = 2580 ((pzp->zp_flags & ZFS_NODUMP) != 0); 2581 XVA_SET_RTN(xvap, XAT_NODUMP); 2582 } 2583 2584 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2585 xoap->xoa_opaque = 2586 ((pzp->zp_flags & ZFS_OPAQUE) != 0); 2587 XVA_SET_RTN(xvap, XAT_OPAQUE); 2588 } 2589 2590 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2591 xoap->xoa_av_quarantined = 2592 ((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0); 2593 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2594 } 2595 2596 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2597 xoap->xoa_av_modified = 2598 ((pzp->zp_flags & ZFS_AV_MODIFIED) != 0); 2599 XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2600 } 2601 2602 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2603 vp->v_type == VREG && 2604 (pzp->zp_flags & ZFS_BONUS_SCANSTAMP)) { 2605 size_t len; 2606 dmu_object_info_t doi; 2607 2608 /* 2609 * Only VREG files have anti-virus scanstamps, so we 2610 * won't conflict with symlinks in the bonus buffer. 2611 */ 2612 dmu_object_info_from_db(zp->z_dbuf, &doi); 2613 len = sizeof (xoap->xoa_av_scanstamp) + 2614 sizeof (znode_phys_t); 2615 if (len <= doi.doi_bonus_size) { 2616 /* 2617 * pzp points to the start of the 2618 * znode_phys_t. pzp + 1 points to the 2619 * first byte after the znode_phys_t. 2620 */ 2621 (void) memcpy(xoap->xoa_av_scanstamp, 2622 pzp + 1, 2623 sizeof (xoap->xoa_av_scanstamp)); 2624 XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); 2625 } 2626 } 2627 2628 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 2629 ZFS_TIME_DECODE(&xoap->xoa_createtime, pzp->zp_crtime); 2630 XVA_SET_RTN(xvap, XAT_CREATETIME); 2631 } 2632 } 2633 2634 ZFS_TIME_DECODE(&vap->va_atime, pzp->zp_atime); 2635 ZFS_TIME_DECODE(&vap->va_mtime, pzp->zp_mtime); 2636 ZFS_TIME_DECODE(&vap->va_ctime, pzp->zp_ctime); 2637 ZFS_TIME_DECODE(&vap->va_birthtime, pzp->zp_crtime); 2638 2639 mutex_exit(&zp->z_lock); 2640 2641 dmu_object_size_from_db(zp->z_dbuf, &blksize, &nblocks); 2642 vap->va_blksize = blksize; 2643 vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2644 2645 if (zp->z_blksz == 0) { 2646 /* 2647 * Block size hasn't been set; suggest maximal I/O transfers. 2648 */ 2649 vap->va_blksize = zfsvfs->z_max_blksz; 2650 } 2651 2652 ZFS_EXIT(zfsvfs); 2653 return (0); 2654} 2655 2656/* 2657 * Set the file attributes to the values contained in the 2658 * vattr structure. 2659 * 2660 * IN: vp - vnode of file to be modified. 2661 * vap - new attribute values. 2662 * If AT_XVATTR set, then optional attrs are being set 2663 * flags - ATTR_UTIME set if non-default time values provided. 2664 * - ATTR_NOACLCHECK (CIFS context only). 2665 * cr - credentials of caller. 2666 * ct - caller context 2667 * 2668 * RETURN: 0 if success 2669 * error code if failure 2670 * 2671 * Timestamps: 2672 * vp - ctime updated, mtime updated if size changed. 2673 */ 2674/* ARGSUSED */ 2675static int 2676zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2677 caller_context_t *ct) 2678{ 2679 znode_t *zp = VTOZ(vp); 2680 znode_phys_t *pzp; 2681 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2682 zilog_t *zilog; 2683 dmu_tx_t *tx; 2684 vattr_t oldva; 2685 xvattr_t tmpxvattr; 2686 uint_t mask = vap->va_mask; 2687 uint_t saved_mask; 2688 uint64_t saved_mode; 2689 int trim_mask = 0; 2690 uint64_t new_mode; 2691 uint64_t new_uid, new_gid; 2692 znode_t *attrzp; 2693 int need_policy = FALSE; 2694 int err; 2695 zfs_fuid_info_t *fuidp = NULL; 2696 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2697 xoptattr_t *xoap; 2698 zfs_acl_t *aclp = NULL; 2699 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2700 boolean_t fuid_dirtied = B_FALSE; 2701 2702 if (mask == 0) 2703 return (0); 2704 2705 if (mask & AT_NOSET) 2706 return (EINVAL); 2707 2708 ZFS_ENTER(zfsvfs); 2709 ZFS_VERIFY_ZP(zp); 2710 2711 pzp = zp->z_phys; 2712 zilog = zfsvfs->z_log; 2713 2714 /* 2715 * Make sure that if we have ephemeral uid/gid or xvattr specified 2716 * that file system is at proper version level 2717 */ 2718 2719 if (zfsvfs->z_use_fuids == B_FALSE && 2720 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2721 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 2722 (mask & AT_XVATTR))) { 2723 ZFS_EXIT(zfsvfs); 2724 return (EINVAL); 2725 } 2726 2727 if (mask & AT_SIZE && vp->v_type == VDIR) { 2728 ZFS_EXIT(zfsvfs); 2729 return (EISDIR); 2730 } 2731 2732 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 2733 ZFS_EXIT(zfsvfs); 2734 return (EINVAL); 2735 } 2736 2737 /* 2738 * If this is an xvattr_t, then get a pointer to the structure of 2739 * optional attributes. If this is NULL, then we have a vattr_t. 2740 */ 2741 xoap = xva_getxoptattr(xvap); 2742 2743 xva_init(&tmpxvattr); 2744 2745 /* 2746 * Immutable files can only alter immutable bit and atime 2747 */ 2748 if ((pzp->zp_flags & ZFS_IMMUTABLE) && 2749 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 2750 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 2751 ZFS_EXIT(zfsvfs); 2752 return (EPERM); 2753 } 2754 2755 if ((mask & AT_SIZE) && (pzp->zp_flags & ZFS_READONLY)) { 2756 ZFS_EXIT(zfsvfs); 2757 return (EPERM); 2758 } 2759 2760 /* 2761 * Verify timestamps doesn't overflow 32 bits. 2762 * ZFS can handle large timestamps, but 32bit syscalls can't 2763 * handle times greater than 2039. This check should be removed 2764 * once large timestamps are fully supported. 2765 */ 2766 if (mask & (AT_ATIME | AT_MTIME)) { 2767 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 2768 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 2769 ZFS_EXIT(zfsvfs); 2770 return (EOVERFLOW); 2771 } 2772 } 2773 2774top: 2775 attrzp = NULL; 2776 2777 /* Can this be moved to before the top label? */ 2778 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2779 ZFS_EXIT(zfsvfs); 2780 return (EROFS); 2781 } 2782 2783 /* 2784 * First validate permissions 2785 */ 2786 2787 if (mask & AT_SIZE) { 2788 err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr); 2789 if (err) { 2790 ZFS_EXIT(zfsvfs); 2791 return (err); 2792 } 2793 /* 2794 * XXX - Note, we are not providing any open 2795 * mode flags here (like FNDELAY), so we may 2796 * block if there are locks present... this 2797 * should be addressed in openat(). 2798 */ 2799 /* XXX - would it be OK to generate a log record here? */ 2800 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 2801 if (err) { 2802 ZFS_EXIT(zfsvfs); 2803 return (err); 2804 } 2805 } 2806 2807 if (mask & (AT_ATIME|AT_MTIME) || 2808 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2809 XVA_ISSET_REQ(xvap, XAT_READONLY) || 2810 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2811 XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 2812 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) 2813 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2814 skipaclchk, cr); 2815 2816 if (mask & (AT_UID|AT_GID)) { 2817 int idmask = (mask & (AT_UID|AT_GID)); 2818 int take_owner; 2819 int take_group; 2820 2821 /* 2822 * NOTE: even if a new mode is being set, 2823 * we may clear S_ISUID/S_ISGID bits. 2824 */ 2825 2826 if (!(mask & AT_MODE)) 2827 vap->va_mode = pzp->zp_mode; 2828 2829 /* 2830 * Take ownership or chgrp to group we are a member of 2831 */ 2832 2833 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 2834 take_group = (mask & AT_GID) && 2835 zfs_groupmember(zfsvfs, vap->va_gid, cr); 2836 2837 /* 2838 * If both AT_UID and AT_GID are set then take_owner and 2839 * take_group must both be set in order to allow taking 2840 * ownership. 2841 * 2842 * Otherwise, send the check through secpolicy_vnode_setattr() 2843 * 2844 */ 2845 2846 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2847 ((idmask == AT_UID) && take_owner) || 2848 ((idmask == AT_GID) && take_group)) { 2849 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 2850 skipaclchk, cr) == 0) { 2851 /* 2852 * Remove setuid/setgid for non-privileged users 2853 */ 2854 secpolicy_setid_clear(vap, vp, cr); 2855 trim_mask = (mask & (AT_UID|AT_GID)); 2856 } else { 2857 need_policy = TRUE; 2858 } 2859 } else { 2860 need_policy = TRUE; 2861 } 2862 } 2863 2864 mutex_enter(&zp->z_lock); 2865 oldva.va_mode = pzp->zp_mode; 2866 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 2867 if (mask & AT_XVATTR) { 2868 /* 2869 * Update xvattr mask to include only those attributes 2870 * that are actually changing. 2871 * 2872 * the bits will be restored prior to actually setting 2873 * the attributes so the caller thinks they were set. 2874 */ 2875 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2876 if (xoap->xoa_appendonly != 2877 ((pzp->zp_flags & ZFS_APPENDONLY) != 0)) { 2878 need_policy = TRUE; 2879 } else { 2880 XVA_CLR_REQ(xvap, XAT_APPENDONLY); 2881 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 2882 } 2883 } 2884 2885 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2886 if (xoap->xoa_nounlink != 2887 ((pzp->zp_flags & ZFS_NOUNLINK) != 0)) { 2888 need_policy = TRUE; 2889 } else { 2890 XVA_CLR_REQ(xvap, XAT_NOUNLINK); 2891 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 2892 } 2893 } 2894 2895 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2896 if (xoap->xoa_immutable != 2897 ((pzp->zp_flags & ZFS_IMMUTABLE) != 0)) { 2898 need_policy = TRUE; 2899 } else { 2900 XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 2901 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 2902 } 2903 } 2904 2905 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2906 if (xoap->xoa_nodump != 2907 ((pzp->zp_flags & ZFS_NODUMP) != 0)) { 2908 need_policy = TRUE; 2909 } else { 2910 XVA_CLR_REQ(xvap, XAT_NODUMP); 2911 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 2912 } 2913 } 2914 2915 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2916 if (xoap->xoa_av_modified != 2917 ((pzp->zp_flags & ZFS_AV_MODIFIED) != 0)) { 2918 need_policy = TRUE; 2919 } else { 2920 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 2921 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 2922 } 2923 } 2924 2925 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2926 if ((vp->v_type != VREG && 2927 xoap->xoa_av_quarantined) || 2928 xoap->xoa_av_quarantined != 2929 ((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0)) { 2930 need_policy = TRUE; 2931 } else { 2932 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 2933 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 2934 } 2935 } 2936 2937 if (need_policy == FALSE && 2938 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 2939 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 2940 need_policy = TRUE; 2941 } 2942 } 2943 2944 mutex_exit(&zp->z_lock); 2945 2946 if (mask & AT_MODE) { 2947 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 2948 err = secpolicy_setid_setsticky_clear(vp, vap, 2949 &oldva, cr); 2950 if (err) { 2951 ZFS_EXIT(zfsvfs); 2952 return (err); 2953 } 2954 trim_mask |= AT_MODE; 2955 } else { 2956 need_policy = TRUE; 2957 } 2958 } 2959 2960 if (need_policy) { 2961 /* 2962 * If trim_mask is set then take ownership 2963 * has been granted or write_acl is present and user 2964 * has the ability to modify mode. In that case remove 2965 * UID|GID and or MODE from mask so that 2966 * secpolicy_vnode_setattr() doesn't revoke it. 2967 */ 2968 2969 if (trim_mask) { 2970 saved_mask = vap->va_mask; 2971 vap->va_mask &= ~trim_mask; 2972 if (trim_mask & AT_MODE) { 2973 /* 2974 * Save the mode, as secpolicy_vnode_setattr() 2975 * will overwrite it with ova.va_mode. 2976 */ 2977 saved_mode = vap->va_mode; 2978 } 2979 } 2980 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 2981 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 2982 if (err) { 2983 ZFS_EXIT(zfsvfs); 2984 return (err); 2985 } 2986 2987 if (trim_mask) { 2988 vap->va_mask |= saved_mask; 2989 if (trim_mask & AT_MODE) { 2990 /* 2991 * Recover the mode after 2992 * secpolicy_vnode_setattr(). 2993 */ 2994 vap->va_mode = saved_mode; 2995 } 2996 } 2997 } 2998 2999 /* 3000 * secpolicy_vnode_setattr, or take ownership may have 3001 * changed va_mask 3002 */ 3003 mask = vap->va_mask; 3004 3005 tx = dmu_tx_create(zfsvfs->z_os); 3006 dmu_tx_hold_bonus(tx, zp->z_id); 3007 3008 if (mask & AT_MODE) { 3009 uint64_t pmode = pzp->zp_mode; 3010 3011 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3012 3013 if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3014 goto out; 3015 if (pzp->zp_acl.z_acl_extern_obj) { 3016 /* Are we upgrading ACL from old V0 format to new V1 */ 3017 if (zfsvfs->z_version <= ZPL_VERSION_FUID && 3018 pzp->zp_acl.z_acl_version == 3019 ZFS_ACL_VERSION_INITIAL) { 3020 dmu_tx_hold_free(tx, 3021 pzp->zp_acl.z_acl_extern_obj, 0, 3022 DMU_OBJECT_END); 3023 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3024 0, aclp->z_acl_bytes); 3025 } else { 3026 dmu_tx_hold_write(tx, 3027 pzp->zp_acl.z_acl_extern_obj, 0, 3028 aclp->z_acl_bytes); 3029 } 3030 } else if (aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3031 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3032 0, aclp->z_acl_bytes); 3033 } 3034 } 3035 3036 if (mask & (AT_UID | AT_GID)) { 3037 if (pzp->zp_xattr) { 3038 err = zfs_zget(zp->z_zfsvfs, pzp->zp_xattr, &attrzp); 3039 if (err) 3040 goto out; 3041 dmu_tx_hold_bonus(tx, attrzp->z_id); 3042 } 3043 if (mask & AT_UID) { 3044 new_uid = zfs_fuid_create(zfsvfs, 3045 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3046 if (new_uid != pzp->zp_uid && 3047 zfs_usergroup_overquota(zfsvfs, B_FALSE, new_uid)) { 3048 err = EDQUOT; 3049 goto out; 3050 } 3051 } 3052 3053 if (mask & AT_GID) { 3054 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3055 cr, ZFS_GROUP, &fuidp); 3056 if (new_gid != pzp->zp_gid && 3057 zfs_usergroup_overquota(zfsvfs, B_TRUE, new_gid)) { 3058 err = EDQUOT; 3059 goto out; 3060 } 3061 } 3062 fuid_dirtied = zfsvfs->z_fuid_dirty; 3063 if (fuid_dirtied) { 3064 if (zfsvfs->z_fuid_obj == 0) { 3065 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 3066 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 3067 FUID_SIZE_ESTIMATE(zfsvfs)); 3068 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 3069 FALSE, NULL); 3070 } else { 3071 dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj); 3072 dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0, 3073 FUID_SIZE_ESTIMATE(zfsvfs)); 3074 } 3075 } 3076 } 3077 3078 err = dmu_tx_assign(tx, TXG_NOWAIT); 3079 if (err) { 3080 if (err == ERESTART) 3081 dmu_tx_wait(tx); 3082 goto out; 3083 } 3084 3085 dmu_buf_will_dirty(zp->z_dbuf, tx); 3086 3087 /* 3088 * Set each attribute requested. 3089 * We group settings according to the locks they need to acquire. 3090 * 3091 * Note: you cannot set ctime directly, although it will be 3092 * updated as a side-effect of calling this function. 3093 */ 3094 3095 mutex_enter(&zp->z_lock); 3096 3097 if (mask & AT_MODE) { 3098 mutex_enter(&zp->z_acl_lock); 3099 zp->z_phys->zp_mode = new_mode; 3100 err = zfs_aclset_common(zp, aclp, cr, tx); 3101 ASSERT3U(err, ==, 0); 3102 zp->z_acl_cached = aclp; 3103 aclp = NULL; 3104 mutex_exit(&zp->z_acl_lock); 3105 } 3106 3107 if (attrzp) 3108 mutex_enter(&attrzp->z_lock); 3109 3110 if (mask & AT_UID) { 3111 pzp->zp_uid = new_uid; 3112 if (attrzp) 3113 attrzp->z_phys->zp_uid = new_uid; 3114 } 3115 3116 if (mask & AT_GID) { 3117 pzp->zp_gid = new_gid; 3118 if (attrzp) 3119 attrzp->z_phys->zp_gid = new_gid; 3120 } 3121 3122 if (attrzp) 3123 mutex_exit(&attrzp->z_lock); 3124 3125 if (mask & AT_ATIME) 3126 ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime); 3127 3128 if (mask & AT_MTIME) 3129 ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime); 3130 3131 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3132 if (mask & AT_SIZE) 3133 zfs_time_stamper_locked(zp, CONTENT_MODIFIED, tx); 3134 else if (mask != 0) 3135 zfs_time_stamper_locked(zp, STATE_CHANGED, tx); 3136 /* 3137 * Do this after setting timestamps to prevent timestamp 3138 * update from toggling bit 3139 */ 3140 3141 if (xoap && (mask & AT_XVATTR)) { 3142 3143 /* 3144 * restore trimmed off masks 3145 * so that return masks can be set for caller. 3146 */ 3147 3148 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3149 XVA_SET_REQ(xvap, XAT_APPENDONLY); 3150 } 3151 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3152 XVA_SET_REQ(xvap, XAT_NOUNLINK); 3153 } 3154 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3155 XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3156 } 3157 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3158 XVA_SET_REQ(xvap, XAT_NODUMP); 3159 } 3160 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3161 XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3162 } 3163 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3164 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3165 } 3166 3167 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { 3168 size_t len; 3169 dmu_object_info_t doi; 3170 3171 ASSERT(vp->v_type == VREG); 3172 3173 /* Grow the bonus buffer if necessary. */ 3174 dmu_object_info_from_db(zp->z_dbuf, &doi); 3175 len = sizeof (xoap->xoa_av_scanstamp) + 3176 sizeof (znode_phys_t); 3177 if (len > doi.doi_bonus_size) 3178 VERIFY(dmu_set_bonus(zp->z_dbuf, len, tx) == 0); 3179 } 3180 zfs_xvattr_set(zp, xvap); 3181 } 3182 3183 if (fuid_dirtied) 3184 zfs_fuid_sync(zfsvfs, tx); 3185 3186 if (mask != 0) 3187 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3188 3189 mutex_exit(&zp->z_lock); 3190 3191out: 3192 if (attrzp) 3193 VN_RELE(ZTOV(attrzp)); 3194 3195 if (aclp) 3196 zfs_acl_free(aclp); 3197 3198 if (fuidp) { 3199 zfs_fuid_info_free(fuidp); 3200 fuidp = NULL; 3201 } 3202 3203 if (err) 3204 dmu_tx_abort(tx); 3205 else 3206 dmu_tx_commit(tx); 3207 3208 if (err == ERESTART) 3209 goto top; 3210 3211 ZFS_EXIT(zfsvfs); 3212 return (err); 3213} 3214 3215typedef struct zfs_zlock { 3216 krwlock_t *zl_rwlock; /* lock we acquired */ 3217 znode_t *zl_znode; /* znode we held */ 3218 struct zfs_zlock *zl_next; /* next in list */ 3219} zfs_zlock_t; 3220 3221/* 3222 * Drop locks and release vnodes that were held by zfs_rename_lock(). 3223 */ 3224static void 3225zfs_rename_unlock(zfs_zlock_t **zlpp) 3226{ 3227 zfs_zlock_t *zl; 3228 3229 while ((zl = *zlpp) != NULL) { 3230 if (zl->zl_znode != NULL) 3231 VN_RELE(ZTOV(zl->zl_znode)); 3232 rw_exit(zl->zl_rwlock); 3233 *zlpp = zl->zl_next; 3234 kmem_free(zl, sizeof (*zl)); 3235 } 3236} 3237 3238/* 3239 * Search back through the directory tree, using the ".." entries. 3240 * Lock each directory in the chain to prevent concurrent renames. 3241 * Fail any attempt to move a directory into one of its own descendants. 3242 * XXX - z_parent_lock can overlap with map or grow locks 3243 */ 3244static int 3245zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 3246{ 3247 zfs_zlock_t *zl; 3248 znode_t *zp = tdzp; 3249 uint64_t rootid = zp->z_zfsvfs->z_root; 3250 uint64_t *oidp = &zp->z_id; 3251 krwlock_t *rwlp = &szp->z_parent_lock; 3252 krw_t rw = RW_WRITER; 3253 3254 /* 3255 * First pass write-locks szp and compares to zp->z_id. 3256 * Later passes read-lock zp and compare to zp->z_parent. 3257 */ 3258 do { 3259 if (!rw_tryenter(rwlp, rw)) { 3260 /* 3261 * Another thread is renaming in this path. 3262 * Note that if we are a WRITER, we don't have any 3263 * parent_locks held yet. 3264 */ 3265 if (rw == RW_READER && zp->z_id > szp->z_id) { 3266 /* 3267 * Drop our locks and restart 3268 */ 3269 zfs_rename_unlock(&zl); 3270 *zlpp = NULL; 3271 zp = tdzp; 3272 oidp = &zp->z_id; 3273 rwlp = &szp->z_parent_lock; 3274 rw = RW_WRITER; 3275 continue; 3276 } else { 3277 /* 3278 * Wait for other thread to drop its locks 3279 */ 3280 rw_enter(rwlp, rw); 3281 } 3282 } 3283 3284 zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3285 zl->zl_rwlock = rwlp; 3286 zl->zl_znode = NULL; 3287 zl->zl_next = *zlpp; 3288 *zlpp = zl; 3289 3290 if (*oidp == szp->z_id) /* We're a descendant of szp */ 3291 return (EINVAL); 3292 3293 if (*oidp == rootid) /* We've hit the top */ 3294 return (0); 3295 3296 if (rw == RW_READER) { /* i.e. not the first pass */ 3297 int error = zfs_zget(zp->z_zfsvfs, *oidp, &zp); 3298 if (error) 3299 return (error); 3300 zl->zl_znode = zp; 3301 } 3302 oidp = &zp->z_phys->zp_parent; 3303 rwlp = &zp->z_parent_lock; 3304 rw = RW_READER; 3305 3306 } while (zp->z_id != sdzp->z_id); 3307 3308 return (0); 3309} 3310 3311/* 3312 * Move an entry from the provided source directory to the target 3313 * directory. Change the entry name as indicated. 3314 * 3315 * IN: sdvp - Source directory containing the "old entry". 3316 * snm - Old entry name. 3317 * tdvp - Target directory to contain the "new entry". 3318 * tnm - New entry name. 3319 * cr - credentials of caller. 3320 * ct - caller context 3321 * flags - case flags 3322 * 3323 * RETURN: 0 if success 3324 * error code if failure 3325 * 3326 * Timestamps: 3327 * sdvp,tdvp - ctime|mtime updated 3328 */ 3329/*ARGSUSED*/ 3330static int 3331zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3332 caller_context_t *ct, int flags) 3333{ 3334 znode_t *tdzp, *szp, *tzp; 3335 znode_t *sdzp = VTOZ(sdvp); 3336 zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 3337 zilog_t *zilog; 3338 vnode_t *realvp; 3339 zfs_dirlock_t *sdl, *tdl; 3340 dmu_tx_t *tx; 3341 zfs_zlock_t *zl; 3342 int cmp, serr, terr; 3343 int error = 0; 3344 int zflg = 0; 3345 3346 ZFS_ENTER(zfsvfs); 3347 ZFS_VERIFY_ZP(sdzp); 3348 zilog = zfsvfs->z_log; 3349 3350 /* 3351 * Make sure we have the real vp for the target directory. 3352 */ 3353 if (VOP_REALVP(tdvp, &realvp, ct) == 0) 3354 tdvp = realvp; 3355 3356 if (tdvp->v_vfsp != sdvp->v_vfsp || zfsctl_is_node(tdvp)) { 3357 ZFS_EXIT(zfsvfs); 3358 return (EXDEV); 3359 } 3360 3361 tdzp = VTOZ(tdvp); 3362 ZFS_VERIFY_ZP(tdzp); 3363 if (zfsvfs->z_utf8 && u8_validate(tnm, 3364 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3365 ZFS_EXIT(zfsvfs); 3366 return (EILSEQ); 3367 } 3368 3369 if (flags & FIGNORECASE) 3370 zflg |= ZCILOOK; 3371 3372top: 3373 szp = NULL; 3374 tzp = NULL; 3375 zl = NULL; 3376 3377 /* 3378 * This is to prevent the creation of links into attribute space 3379 * by renaming a linked file into/outof an attribute directory. 3380 * See the comment in zfs_link() for why this is considered bad. 3381 */ 3382 if ((tdzp->z_phys->zp_flags & ZFS_XATTR) != 3383 (sdzp->z_phys->zp_flags & ZFS_XATTR)) { 3384 ZFS_EXIT(zfsvfs); 3385 return (EINVAL); 3386 } 3387 3388 /* 3389 * Lock source and target directory entries. To prevent deadlock, 3390 * a lock ordering must be defined. We lock the directory with 3391 * the smallest object id first, or if it's a tie, the one with 3392 * the lexically first name. 3393 */ 3394 if (sdzp->z_id < tdzp->z_id) { 3395 cmp = -1; 3396 } else if (sdzp->z_id > tdzp->z_id) { 3397 cmp = 1; 3398 } else { 3399 /* 3400 * First compare the two name arguments without 3401 * considering any case folding. 3402 */ 3403 int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3404 3405 cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3406 ASSERT(error == 0 || !zfsvfs->z_utf8); 3407 if (cmp == 0) { 3408 /* 3409 * POSIX: "If the old argument and the new argument 3410 * both refer to links to the same existing file, 3411 * the rename() function shall return successfully 3412 * and perform no other action." 3413 */ 3414 ZFS_EXIT(zfsvfs); 3415 return (0); 3416 } 3417 /* 3418 * If the file system is case-folding, then we may 3419 * have some more checking to do. A case-folding file 3420 * system is either supporting mixed case sensitivity 3421 * access or is completely case-insensitive. Note 3422 * that the file system is always case preserving. 3423 * 3424 * In mixed sensitivity mode case sensitive behavior 3425 * is the default. FIGNORECASE must be used to 3426 * explicitly request case insensitive behavior. 3427 * 3428 * If the source and target names provided differ only 3429 * by case (e.g., a request to rename 'tim' to 'Tim'), 3430 * we will treat this as a special case in the 3431 * case-insensitive mode: as long as the source name 3432 * is an exact match, we will allow this to proceed as 3433 * a name-change request. 3434 */ 3435 if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3436 (zfsvfs->z_case == ZFS_CASE_MIXED && 3437 flags & FIGNORECASE)) && 3438 u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3439 &error) == 0) { 3440 /* 3441 * case preserving rename request, require exact 3442 * name matches 3443 */ 3444 zflg |= ZCIEXACT; 3445 zflg &= ~ZCILOOK; 3446 } 3447 } 3448 3449 /* 3450 * If the source and destination directories are the same, we should 3451 * grab the z_name_lock of that directory only once. 3452 */ 3453 if (sdzp == tdzp) { 3454 zflg |= ZHAVELOCK; 3455 rw_enter(&sdzp->z_name_lock, RW_READER); 3456 } 3457 3458 if (cmp < 0) { 3459 serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3460 ZEXISTS | zflg, NULL, NULL); 3461 terr = zfs_dirent_lock(&tdl, 3462 tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3463 } else { 3464 terr = zfs_dirent_lock(&tdl, 3465 tdzp, tnm, &tzp, zflg, NULL, NULL); 3466 serr = zfs_dirent_lock(&sdl, 3467 sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3468 NULL, NULL); 3469 } 3470 3471 if (serr) { 3472 /* 3473 * Source entry invalid or not there. 3474 */ 3475 if (!terr) { 3476 zfs_dirent_unlock(tdl); 3477 if (tzp) 3478 VN_RELE(ZTOV(tzp)); 3479 } 3480 3481 if (sdzp == tdzp) 3482 rw_exit(&sdzp->z_name_lock); 3483 3484 if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0) 3485 serr = EINVAL; 3486 ZFS_EXIT(zfsvfs); 3487 return (serr); 3488 } 3489 if (terr) { 3490 zfs_dirent_unlock(sdl); 3491 VN_RELE(ZTOV(szp)); 3492 3493 if (sdzp == tdzp) 3494 rw_exit(&sdzp->z_name_lock); 3495 3496 if (strcmp(tnm, "..") == 0) 3497 terr = EINVAL; 3498 ZFS_EXIT(zfsvfs); 3499 return (terr); 3500 } 3501 3502 /* 3503 * Must have write access at the source to remove the old entry 3504 * and write access at the target to create the new entry. 3505 * Note that if target and source are the same, this can be 3506 * done in a single check. 3507 */ 3508 3509 if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3510 goto out; 3511 3512 if (ZTOV(szp)->v_type == VDIR) { 3513 /* 3514 * Check to make sure rename is valid. 3515 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3516 */ 3517 if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3518 goto out; 3519 } 3520 3521 /* 3522 * Does target exist? 3523 */ 3524 if (tzp) { 3525 /* 3526 * Source and target must be the same type. 3527 */ 3528 if (ZTOV(szp)->v_type == VDIR) { 3529 if (ZTOV(tzp)->v_type != VDIR) { 3530 error = ENOTDIR; 3531 goto out; 3532 } 3533 } else { 3534 if (ZTOV(tzp)->v_type == VDIR) { 3535 error = EISDIR; 3536 goto out; 3537 } 3538 } 3539 /* 3540 * POSIX dictates that when the source and target 3541 * entries refer to the same file object, rename 3542 * must do nothing and exit without error. 3543 */ 3544 if (szp->z_id == tzp->z_id) { 3545 error = 0; 3546 goto out; 3547 } 3548 } 3549 3550 vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 3551 if (tzp) 3552 vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3553 3554 /* 3555 * notify the target directory if it is not the same 3556 * as source directory. 3557 */ 3558 if (tdvp != sdvp) { 3559 vnevent_rename_dest_dir(tdvp, ct); 3560 } 3561 3562 tx = dmu_tx_create(zfsvfs->z_os); 3563 dmu_tx_hold_bonus(tx, szp->z_id); /* nlink changes */ 3564 dmu_tx_hold_bonus(tx, sdzp->z_id); /* nlink changes */ 3565 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3566 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3567 if (sdzp != tdzp) 3568 dmu_tx_hold_bonus(tx, tdzp->z_id); /* nlink changes */ 3569 if (tzp) 3570 dmu_tx_hold_bonus(tx, tzp->z_id); /* parent changes */ 3571 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3572 error = dmu_tx_assign(tx, TXG_NOWAIT); 3573 if (error) { 3574 if (zl != NULL) 3575 zfs_rename_unlock(&zl); 3576 zfs_dirent_unlock(sdl); 3577 zfs_dirent_unlock(tdl); 3578 3579 if (sdzp == tdzp) 3580 rw_exit(&sdzp->z_name_lock); 3581 3582 VN_RELE(ZTOV(szp)); 3583 if (tzp) 3584 VN_RELE(ZTOV(tzp)); 3585 if (error == ERESTART) { 3586 dmu_tx_wait(tx); 3587 dmu_tx_abort(tx); 3588 goto top; 3589 } 3590 dmu_tx_abort(tx); 3591 ZFS_EXIT(zfsvfs); 3592 return (error); 3593 } 3594 3595 if (tzp) /* Attempt to remove the existing target */ 3596 error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 3597 3598 if (error == 0) { 3599 error = zfs_link_create(tdl, szp, tx, ZRENAMING); 3600 if (error == 0) { 3601 szp->z_phys->zp_flags |= ZFS_AV_MODIFIED; 3602 3603 error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 3604 ASSERT(error == 0); 3605 3606 zfs_log_rename(zilog, tx, 3607 TX_RENAME | (flags & FIGNORECASE ? TX_CI : 0), 3608 sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp); 3609 3610 /* Update path information for the target vnode */ 3611 vn_renamepath(tdvp, ZTOV(szp), tnm, strlen(tnm)); 3612 } 3613#ifdef FREEBSD_NAMECACHE 3614 if (error == 0) { 3615 cache_purge(sdvp); 3616 cache_purge(tdvp); 3617 } 3618#endif 3619 } 3620 3621 dmu_tx_commit(tx); 3622out: 3623 if (zl != NULL) 3624 zfs_rename_unlock(&zl); 3625 3626 zfs_dirent_unlock(sdl); 3627 zfs_dirent_unlock(tdl); 3628 3629 if (sdzp == tdzp) 3630 rw_exit(&sdzp->z_name_lock); 3631 3632 VN_RELE(ZTOV(szp)); 3633 if (tzp) 3634 VN_RELE(ZTOV(tzp)); 3635 3636 ZFS_EXIT(zfsvfs); 3637 3638 return (error); 3639} 3640 3641/* 3642 * Insert the indicated symbolic reference entry into the directory. 3643 * 3644 * IN: dvp - Directory to contain new symbolic link. 3645 * link - Name for new symlink entry. 3646 * vap - Attributes of new entry. 3647 * target - Target path of new symlink. 3648 * cr - credentials of caller. 3649 * ct - caller context 3650 * flags - case flags 3651 * 3652 * RETURN: 0 if success 3653 * error code if failure 3654 * 3655 * Timestamps: 3656 * dvp - ctime|mtime updated 3657 */ 3658/*ARGSUSED*/ 3659static int 3660zfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 3661 cred_t *cr, kthread_t *td) 3662{ 3663 znode_t *zp, *dzp = VTOZ(dvp); 3664 zfs_dirlock_t *dl; 3665 dmu_tx_t *tx; 3666 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3667 zilog_t *zilog; 3668 int len = strlen(link); 3669 int error; 3670 int zflg = ZNEW; 3671 zfs_acl_ids_t acl_ids; 3672 boolean_t fuid_dirtied; 3673 int flags = 0; 3674 3675 ASSERT(vap->va_type == VLNK); 3676 3677 ZFS_ENTER(zfsvfs); 3678 ZFS_VERIFY_ZP(dzp); 3679 zilog = zfsvfs->z_log; 3680 3681 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 3682 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3683 ZFS_EXIT(zfsvfs); 3684 return (EILSEQ); 3685 } 3686 if (flags & FIGNORECASE) 3687 zflg |= ZCILOOK; 3688top: 3689 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3690 ZFS_EXIT(zfsvfs); 3691 return (error); 3692 } 3693 3694 if (len > MAXPATHLEN) { 3695 ZFS_EXIT(zfsvfs); 3696 return (ENAMETOOLONG); 3697 } 3698 3699 /* 3700 * Attempt to lock directory; fail if entry already exists. 3701 */ 3702 error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 3703 if (error) { 3704 ZFS_EXIT(zfsvfs); 3705 return (error); 3706 } 3707 3708 VERIFY(0 == zfs_acl_ids_create(dzp, 0, vap, cr, NULL, &acl_ids)); 3709 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 3710 zfs_acl_ids_free(&acl_ids); 3711 zfs_dirent_unlock(dl); 3712 ZFS_EXIT(zfsvfs); 3713 return (EDQUOT); 3714 } 3715 tx = dmu_tx_create(zfsvfs->z_os); 3716 fuid_dirtied = zfsvfs->z_fuid_dirty; 3717 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 3718 dmu_tx_hold_bonus(tx, dzp->z_id); 3719 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 3720 if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) 3721 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE); 3722 if (fuid_dirtied) 3723 zfs_fuid_txhold(zfsvfs, tx); 3724 error = dmu_tx_assign(tx, TXG_NOWAIT); 3725 if (error) { 3726 zfs_acl_ids_free(&acl_ids); 3727 zfs_dirent_unlock(dl); 3728 if (error == ERESTART) { 3729 dmu_tx_wait(tx); 3730 dmu_tx_abort(tx); 3731 goto top; 3732 } 3733 dmu_tx_abort(tx); 3734 ZFS_EXIT(zfsvfs); 3735 return (error); 3736 } 3737 3738 dmu_buf_will_dirty(dzp->z_dbuf, tx); 3739 3740 /* 3741 * Create a new object for the symlink. 3742 * Put the link content into bonus buffer if it will fit; 3743 * otherwise, store it just like any other file data. 3744 */ 3745 if (sizeof (znode_phys_t) + len <= dmu_bonus_max()) { 3746 zfs_mknode(dzp, vap, tx, cr, 0, &zp, len, &acl_ids); 3747 if (len != 0) 3748 bcopy(link, zp->z_phys + 1, len); 3749 } else { 3750 dmu_buf_t *dbp; 3751 3752 zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); 3753 3754 if (fuid_dirtied) 3755 zfs_fuid_sync(zfsvfs, tx); 3756 /* 3757 * Nothing can access the znode yet so no locking needed 3758 * for growing the znode's blocksize. 3759 */ 3760 zfs_grow_blocksize(zp, len, tx); 3761 3762 VERIFY(0 == dmu_buf_hold(zfsvfs->z_os, 3763 zp->z_id, 0, FTAG, &dbp)); 3764 dmu_buf_will_dirty(dbp, tx); 3765 3766 ASSERT3U(len, <=, dbp->db_size); 3767 bcopy(link, dbp->db_data, len); 3768 dmu_buf_rele(dbp, FTAG); 3769 } 3770 zp->z_phys->zp_size = len; 3771 3772 /* 3773 * Insert the new object into the directory. 3774 */ 3775 (void) zfs_link_create(dl, zp, tx, ZNEW); 3776 if (error == 0) { 3777 uint64_t txtype = TX_SYMLINK; 3778 if (flags & FIGNORECASE) 3779 txtype |= TX_CI; 3780 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 3781 *vpp = ZTOV(zp); 3782 } 3783 3784 zfs_acl_ids_free(&acl_ids); 3785 3786 dmu_tx_commit(tx); 3787 3788 zfs_dirent_unlock(dl); 3789 3790 ZFS_EXIT(zfsvfs); 3791 return (error); 3792} 3793 3794/* 3795 * Return, in the buffer contained in the provided uio structure, 3796 * the symbolic path referred to by vp. 3797 * 3798 * IN: vp - vnode of symbolic link. 3799 * uoip - structure to contain the link path. 3800 * cr - credentials of caller. 3801 * ct - caller context 3802 * 3803 * OUT: uio - structure to contain the link path. 3804 * 3805 * RETURN: 0 if success 3806 * error code if failure 3807 * 3808 * Timestamps: 3809 * vp - atime updated 3810 */ 3811/* ARGSUSED */ 3812static int 3813zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 3814{ 3815 znode_t *zp = VTOZ(vp); 3816 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3817 size_t bufsz; 3818 int error; 3819 3820 ZFS_ENTER(zfsvfs); 3821 ZFS_VERIFY_ZP(zp); 3822 3823 bufsz = (size_t)zp->z_phys->zp_size; 3824 if (bufsz + sizeof (znode_phys_t) <= zp->z_dbuf->db_size) { 3825 error = uiomove(zp->z_phys + 1, 3826 MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); 3827 } else { 3828 dmu_buf_t *dbp; 3829 error = dmu_buf_hold(zfsvfs->z_os, zp->z_id, 0, FTAG, &dbp); 3830 if (error) { 3831 ZFS_EXIT(zfsvfs); 3832 return (error); 3833 } 3834 error = uiomove(dbp->db_data, 3835 MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); 3836 dmu_buf_rele(dbp, FTAG); 3837 } 3838 3839 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 3840 ZFS_EXIT(zfsvfs); 3841 return (error); 3842} 3843 3844/* 3845 * Insert a new entry into directory tdvp referencing svp. 3846 * 3847 * IN: tdvp - Directory to contain new entry. 3848 * svp - vnode of new entry. 3849 * name - name of new entry. 3850 * cr - credentials of caller. 3851 * ct - caller context 3852 * 3853 * RETURN: 0 if success 3854 * error code if failure 3855 * 3856 * Timestamps: 3857 * tdvp - ctime|mtime updated 3858 * svp - ctime updated 3859 */ 3860/* ARGSUSED */ 3861static int 3862zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 3863 caller_context_t *ct, int flags) 3864{ 3865 znode_t *dzp = VTOZ(tdvp); 3866 znode_t *tzp, *szp; 3867 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3868 zilog_t *zilog; 3869 zfs_dirlock_t *dl; 3870 dmu_tx_t *tx; 3871 vnode_t *realvp; 3872 int error; 3873 int zf = ZNEW; 3874 uint64_t parent; 3875 uid_t owner; 3876 3877 ASSERT(tdvp->v_type == VDIR); 3878 3879 ZFS_ENTER(zfsvfs); 3880 ZFS_VERIFY_ZP(dzp); 3881 zilog = zfsvfs->z_log; 3882 3883 if (VOP_REALVP(svp, &realvp, ct) == 0) 3884 svp = realvp; 3885 3886 /* 3887 * POSIX dictates that we return EPERM here. 3888 * Better choices include ENOTSUP or EISDIR. 3889 */ 3890 if (svp->v_type == VDIR) { 3891 ZFS_EXIT(zfsvfs); 3892 return (EPERM); 3893 } 3894 3895 if (svp->v_vfsp != tdvp->v_vfsp || zfsctl_is_node(svp)) { 3896 ZFS_EXIT(zfsvfs); 3897 return (EXDEV); 3898 } 3899 3900 szp = VTOZ(svp); 3901 ZFS_VERIFY_ZP(szp); 3902 3903 /* Prevent links to .zfs/shares files */ 3904 3905 if (szp->z_phys->zp_parent == zfsvfs->z_shares_dir) { 3906 ZFS_EXIT(zfsvfs); 3907 return (EPERM); 3908 } 3909 3910 if (zfsvfs->z_utf8 && u8_validate(name, 3911 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3912 ZFS_EXIT(zfsvfs); 3913 return (EILSEQ); 3914 } 3915 if (flags & FIGNORECASE) 3916 zf |= ZCILOOK; 3917 3918 /* 3919 * We do not support links between attributes and non-attributes 3920 * because of the potential security risk of creating links 3921 * into "normal" file space in order to circumvent restrictions 3922 * imposed in attribute space. 3923 */ 3924 if ((szp->z_phys->zp_flags & ZFS_XATTR) != 3925 (dzp->z_phys->zp_flags & ZFS_XATTR)) { 3926 ZFS_EXIT(zfsvfs); 3927 return (EINVAL); 3928 } 3929 3930 3931 owner = zfs_fuid_map_id(zfsvfs, szp->z_phys->zp_uid, cr, ZFS_OWNER); 3932 if (owner != crgetuid(cr) && 3933 secpolicy_basic_link(svp, cr) != 0) { 3934 ZFS_EXIT(zfsvfs); 3935 return (EPERM); 3936 } 3937 3938 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3939 ZFS_EXIT(zfsvfs); 3940 return (error); 3941 } 3942 3943top: 3944 /* 3945 * Attempt to lock directory; fail if entry already exists. 3946 */ 3947 error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 3948 if (error) { 3949 ZFS_EXIT(zfsvfs); 3950 return (error); 3951 } 3952 3953 tx = dmu_tx_create(zfsvfs->z_os); 3954 dmu_tx_hold_bonus(tx, szp->z_id); 3955 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 3956 error = dmu_tx_assign(tx, TXG_NOWAIT); 3957 if (error) { 3958 zfs_dirent_unlock(dl); 3959 if (error == ERESTART) { 3960 dmu_tx_wait(tx); 3961 dmu_tx_abort(tx); 3962 goto top; 3963 } 3964 dmu_tx_abort(tx); 3965 ZFS_EXIT(zfsvfs); 3966 return (error); 3967 } 3968 3969 error = zfs_link_create(dl, szp, tx, 0); 3970 3971 if (error == 0) { 3972 uint64_t txtype = TX_LINK; 3973 if (flags & FIGNORECASE) 3974 txtype |= TX_CI; 3975 zfs_log_link(zilog, tx, txtype, dzp, szp, name); 3976 } 3977 3978 dmu_tx_commit(tx); 3979 3980 zfs_dirent_unlock(dl); 3981 3982 if (error == 0) { 3983 vnevent_link(svp, ct); 3984 } 3985 3986 ZFS_EXIT(zfsvfs); 3987 return (error); 3988} 3989 3990/*ARGSUSED*/ 3991void 3992zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 3993{ 3994 znode_t *zp = VTOZ(vp); 3995 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3996 int error; 3997 3998 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 3999 if (zp->z_dbuf == NULL) { 4000 /* 4001 * The fs has been unmounted, or we did a 4002 * suspend/resume and this file no longer exists. 4003 */ 4004 VI_LOCK(vp); 4005 vp->v_count = 0; /* count arrives as 1 */ 4006 VI_UNLOCK(vp); 4007 vrecycle(vp, curthread); 4008 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4009 return; 4010 } 4011 4012 if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4013 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4014 4015 dmu_tx_hold_bonus(tx, zp->z_id); 4016 error = dmu_tx_assign(tx, TXG_WAIT); 4017 if (error) { 4018 dmu_tx_abort(tx); 4019 } else { 4020 dmu_buf_will_dirty(zp->z_dbuf, tx); 4021 mutex_enter(&zp->z_lock); 4022 zp->z_atime_dirty = 0; 4023 mutex_exit(&zp->z_lock); 4024 dmu_tx_commit(tx); 4025 } 4026 } 4027 4028 zfs_zinactive(zp); 4029 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4030} 4031 4032CTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 4033CTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 4034 4035/*ARGSUSED*/ 4036static int 4037zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4038{ 4039 znode_t *zp = VTOZ(vp); 4040 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4041 uint32_t gen; 4042 uint64_t object = zp->z_id; 4043 zfid_short_t *zfid; 4044 int size, i; 4045 4046 ZFS_ENTER(zfsvfs); 4047 ZFS_VERIFY_ZP(zp); 4048 gen = (uint32_t)zp->z_gen; 4049 4050 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4051 fidp->fid_len = size; 4052 4053 zfid = (zfid_short_t *)fidp; 4054 4055 zfid->zf_len = size; 4056 4057 for (i = 0; i < sizeof (zfid->zf_object); i++) 4058 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4059 4060 /* Must have a non-zero generation number to distinguish from .zfs */ 4061 if (gen == 0) 4062 gen = 1; 4063 for (i = 0; i < sizeof (zfid->zf_gen); i++) 4064 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4065 4066 if (size == LONG_FID_LEN) { 4067 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4068 zfid_long_t *zlfid; 4069 4070 zlfid = (zfid_long_t *)fidp; 4071 4072 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4073 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4074 4075 /* XXX - this should be the generation number for the objset */ 4076 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4077 zlfid->zf_setgen[i] = 0; 4078 } 4079 4080 ZFS_EXIT(zfsvfs); 4081 return (0); 4082} 4083 4084static int 4085zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4086 caller_context_t *ct) 4087{ 4088 znode_t *zp, *xzp; 4089 zfsvfs_t *zfsvfs; 4090 zfs_dirlock_t *dl; 4091 int error; 4092 4093 switch (cmd) { 4094 case _PC_LINK_MAX: 4095 *valp = INT_MAX; 4096 return (0); 4097 4098 case _PC_FILESIZEBITS: 4099 *valp = 64; 4100 return (0); 4101 4102#if 0 4103 case _PC_XATTR_EXISTS: 4104 zp = VTOZ(vp); 4105 zfsvfs = zp->z_zfsvfs; 4106 ZFS_ENTER(zfsvfs); 4107 ZFS_VERIFY_ZP(zp); 4108 *valp = 0; 4109 error = zfs_dirent_lock(&dl, zp, "", &xzp, 4110 ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 4111 if (error == 0) { 4112 zfs_dirent_unlock(dl); 4113 if (!zfs_dirempty(xzp)) 4114 *valp = 1; 4115 VN_RELE(ZTOV(xzp)); 4116 } else if (error == ENOENT) { 4117 /* 4118 * If there aren't extended attributes, it's the 4119 * same as having zero of them. 4120 */ 4121 error = 0; 4122 } 4123 ZFS_EXIT(zfsvfs); 4124 return (error); 4125#endif 4126 4127 case _PC_ACL_EXTENDED: 4128 *valp = 0; 4129 return (0); 4130 4131 case _PC_ACL_NFS4: 4132 *valp = 1; 4133 return (0); 4134 4135 case _PC_ACL_PATH_MAX: 4136 *valp = ACL_MAX_ENTRIES; 4137 return (0); 4138 4139 case _PC_MIN_HOLE_SIZE: 4140 *valp = (int)SPA_MINBLOCKSIZE; 4141 return (0); 4142 4143 default: 4144 return (EOPNOTSUPP); 4145 } 4146} 4147 4148/*ARGSUSED*/ 4149static int 4150zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4151 caller_context_t *ct) 4152{ 4153 znode_t *zp = VTOZ(vp); 4154 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4155 int error; 4156 boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4157 4158 ZFS_ENTER(zfsvfs); 4159 ZFS_VERIFY_ZP(zp); 4160 error = zfs_getacl(zp, vsecp, skipaclchk, cr); 4161 ZFS_EXIT(zfsvfs); 4162 4163 return (error); 4164} 4165 4166/*ARGSUSED*/ 4167static int 4168zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4169 caller_context_t *ct) 4170{ 4171 znode_t *zp = VTOZ(vp); 4172 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4173 int error; 4174 boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4175 4176 ZFS_ENTER(zfsvfs); 4177 ZFS_VERIFY_ZP(zp); 4178 error = zfs_setacl(zp, vsecp, skipaclchk, cr); 4179 ZFS_EXIT(zfsvfs); 4180 return (error); 4181} 4182 4183static int 4184zfs_freebsd_open(ap) 4185 struct vop_open_args /* { 4186 struct vnode *a_vp; 4187 int a_mode; 4188 struct ucred *a_cred; 4189 struct thread *a_td; 4190 } */ *ap; 4191{ 4192 vnode_t *vp = ap->a_vp; 4193 znode_t *zp = VTOZ(vp); 4194 int error; 4195 4196 error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); 4197 if (error == 0) 4198 vnode_create_vobject(vp, zp->z_phys->zp_size, ap->a_td); 4199 return (error); 4200} 4201 4202static int 4203zfs_freebsd_close(ap) 4204 struct vop_close_args /* { 4205 struct vnode *a_vp; 4206 int a_fflag; 4207 struct ucred *a_cred; 4208 struct thread *a_td; 4209 } */ *ap; 4210{ 4211 4212 return (zfs_close(ap->a_vp, ap->a_fflag, 0, 0, ap->a_cred, NULL)); 4213} 4214 4215static int 4216zfs_freebsd_ioctl(ap) 4217 struct vop_ioctl_args /* { 4218 struct vnode *a_vp; 4219 u_long a_command; 4220 caddr_t a_data; 4221 int a_fflag; 4222 struct ucred *cred; 4223 struct thread *td; 4224 } */ *ap; 4225{ 4226 4227 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 4228 ap->a_fflag, ap->a_cred, NULL, NULL)); 4229} 4230 4231static int 4232zfs_freebsd_read(ap) 4233 struct vop_read_args /* { 4234 struct vnode *a_vp; 4235 struct uio *a_uio; 4236 int a_ioflag; 4237 struct ucred *a_cred; 4238 } */ *ap; 4239{ 4240 4241 return (zfs_read(ap->a_vp, ap->a_uio, ap->a_ioflag, ap->a_cred, NULL)); 4242} 4243 4244static int 4245zfs_freebsd_write(ap) 4246 struct vop_write_args /* { 4247 struct vnode *a_vp; 4248 struct uio *a_uio; 4249 int a_ioflag; 4250 struct ucred *a_cred; 4251 } */ *ap; 4252{ 4253 4254 if (vn_rlimit_fsize(ap->a_vp, ap->a_uio, ap->a_uio->uio_td)) 4255 return (EFBIG); 4256 4257 return (zfs_write(ap->a_vp, ap->a_uio, ap->a_ioflag, ap->a_cred, NULL)); 4258} 4259 4260static int 4261zfs_freebsd_access(ap) 4262 struct vop_access_args /* { 4263 struct vnode *a_vp; 4264 accmode_t a_accmode; 4265 struct ucred *a_cred; 4266 struct thread *a_td; 4267 } */ *ap; 4268{ 4269 vnode_t *vp = ap->a_vp; 4270 znode_t *zp = VTOZ(vp); 4271 znode_phys_t *zphys = zp->z_phys; 4272 accmode_t accmode; 4273 int error = 0; 4274 4275 /* 4276 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 4277 */ 4278 accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 4279 if (accmode != 0) 4280 error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL); 4281 4282 /* 4283 * VADMIN has to be handled by vaccess(). 4284 */ 4285 if (error == 0) { 4286 accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 4287 if (accmode != 0) { 4288 error = vaccess(vp->v_type, zphys->zp_mode, 4289 zphys->zp_uid, zphys->zp_gid, accmode, ap->a_cred, 4290 NULL); 4291 } 4292 } 4293 4294 /* 4295 * For VEXEC, ensure that at least one execute bit is set for 4296 * non-directories. 4297 */ 4298 if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 4299 (zphys->zp_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) 4300 error = EACCES; 4301 4302 return (error); 4303} 4304 4305static int 4306zfs_freebsd_lookup(ap) 4307 struct vop_lookup_args /* { 4308 struct vnode *a_dvp; 4309 struct vnode **a_vpp; 4310 struct componentname *a_cnp; 4311 } */ *ap; 4312{ 4313 struct componentname *cnp = ap->a_cnp; 4314 char nm[NAME_MAX + 1]; 4315 4316 ASSERT(cnp->cn_namelen < sizeof(nm)); 4317 strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 4318 4319 return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 4320 cnp->cn_cred, cnp->cn_thread, 0)); 4321} 4322 4323static int 4324zfs_freebsd_create(ap) 4325 struct vop_create_args /* { 4326 struct vnode *a_dvp; 4327 struct vnode **a_vpp; 4328 struct componentname *a_cnp; 4329 struct vattr *a_vap; 4330 } */ *ap; 4331{ 4332 struct componentname *cnp = ap->a_cnp; 4333 vattr_t *vap = ap->a_vap; 4334 int mode; 4335 4336 ASSERT(cnp->cn_flags & SAVENAME); 4337 4338 vattr_init_mask(vap); 4339 mode = vap->va_mode & ALLPERMS; 4340 4341 return (zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 4342 ap->a_vpp, cnp->cn_cred, cnp->cn_thread)); 4343} 4344 4345static int 4346zfs_freebsd_remove(ap) 4347 struct vop_remove_args /* { 4348 struct vnode *a_dvp; 4349 struct vnode *a_vp; 4350 struct componentname *a_cnp; 4351 } */ *ap; 4352{ 4353 4354 ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4355 4356 return (zfs_remove(ap->a_dvp, ap->a_cnp->cn_nameptr, 4357 ap->a_cnp->cn_cred, NULL, 0)); 4358} 4359 4360static int 4361zfs_freebsd_mkdir(ap) 4362 struct vop_mkdir_args /* { 4363 struct vnode *a_dvp; 4364 struct vnode **a_vpp; 4365 struct componentname *a_cnp; 4366 struct vattr *a_vap; 4367 } */ *ap; 4368{ 4369 vattr_t *vap = ap->a_vap; 4370 4371 ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4372 4373 vattr_init_mask(vap); 4374 4375 return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 4376 ap->a_cnp->cn_cred, NULL, 0, NULL)); 4377} 4378 4379static int 4380zfs_freebsd_rmdir(ap) 4381 struct vop_rmdir_args /* { 4382 struct vnode *a_dvp; 4383 struct vnode *a_vp; 4384 struct componentname *a_cnp; 4385 } */ *ap; 4386{ 4387 struct componentname *cnp = ap->a_cnp; 4388 4389 ASSERT(cnp->cn_flags & SAVENAME); 4390 4391 return (zfs_rmdir(ap->a_dvp, cnp->cn_nameptr, NULL, cnp->cn_cred, NULL, 0)); 4392} 4393 4394static int 4395zfs_freebsd_readdir(ap) 4396 struct vop_readdir_args /* { 4397 struct vnode *a_vp; 4398 struct uio *a_uio; 4399 struct ucred *a_cred; 4400 int *a_eofflag; 4401 int *a_ncookies; 4402 u_long **a_cookies; 4403 } */ *ap; 4404{ 4405 4406 return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 4407 ap->a_ncookies, ap->a_cookies)); 4408} 4409 4410static int 4411zfs_freebsd_fsync(ap) 4412 struct vop_fsync_args /* { 4413 struct vnode *a_vp; 4414 int a_waitfor; 4415 struct thread *a_td; 4416 } */ *ap; 4417{ 4418 4419 vop_stdfsync(ap); 4420 return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); 4421} 4422 4423static int 4424zfs_freebsd_getattr(ap) 4425 struct vop_getattr_args /* { 4426 struct vnode *a_vp; 4427 struct vattr *a_vap; 4428 struct ucred *a_cred; 4429 struct thread *a_td; 4430 } */ *ap; 4431{ 4432 vattr_t *vap = ap->a_vap; 4433 xvattr_t xvap; 4434 u_long fflags = 0; 4435 int error; 4436 4437 xva_init(&xvap); 4438 xvap.xva_vattr = *vap; 4439 xvap.xva_vattr.va_mask |= AT_XVATTR; 4440 4441 /* Convert chflags into ZFS-type flags. */ 4442 /* XXX: what about SF_SETTABLE?. */ 4443 XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 4444 XVA_SET_REQ(&xvap, XAT_APPENDONLY); 4445 XVA_SET_REQ(&xvap, XAT_NOUNLINK); 4446 XVA_SET_REQ(&xvap, XAT_NODUMP); 4447 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 4448 if (error != 0) 4449 return (error); 4450 4451 /* Convert ZFS xattr into chflags. */ 4452#define FLAG_CHECK(fflag, xflag, xfield) do { \ 4453 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 4454 fflags |= (fflag); \ 4455} while (0) 4456 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 4457 xvap.xva_xoptattrs.xoa_immutable); 4458 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 4459 xvap.xva_xoptattrs.xoa_appendonly); 4460 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 4461 xvap.xva_xoptattrs.xoa_nounlink); 4462 FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 4463 xvap.xva_xoptattrs.xoa_nodump); 4464#undef FLAG_CHECK 4465 *vap = xvap.xva_vattr; 4466 vap->va_flags = fflags; 4467 return (0); 4468} 4469 4470static int 4471zfs_freebsd_setattr(ap) 4472 struct vop_setattr_args /* { 4473 struct vnode *a_vp; 4474 struct vattr *a_vap; 4475 struct ucred *a_cred; 4476 struct thread *a_td; 4477 } */ *ap; 4478{ 4479 vnode_t *vp = ap->a_vp; 4480 vattr_t *vap = ap->a_vap; 4481 cred_t *cred = ap->a_cred; 4482 xvattr_t xvap; 4483 u_long fflags; 4484 uint64_t zflags; 4485 4486 vattr_init_mask(vap); 4487 vap->va_mask &= ~AT_NOSET; 4488 4489 xva_init(&xvap); 4490 xvap.xva_vattr = *vap; 4491 4492 zflags = VTOZ(vp)->z_phys->zp_flags; 4493 4494 if (vap->va_flags != VNOVAL) { 4495 zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 4496 int error; 4497 4498 if (zfsvfs->z_use_fuids == B_FALSE) 4499 return (EOPNOTSUPP); 4500 4501 fflags = vap->va_flags; 4502 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_NODUMP)) != 0) 4503 return (EOPNOTSUPP); 4504 /* 4505 * Unprivileged processes are not permitted to unset system 4506 * flags, or modify flags if any system flags are set. 4507 * Privileged non-jail processes may not modify system flags 4508 * if securelevel > 0 and any existing system flags are set. 4509 * Privileged jail processes behave like privileged non-jail 4510 * processes if the security.jail.chflags_allowed sysctl is 4511 * is non-zero; otherwise, they behave like unprivileged 4512 * processes. 4513 */ 4514 if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 4515 priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) { 4516 if (zflags & 4517 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 4518 error = securelevel_gt(cred, 0); 4519 if (error != 0) 4520 return (error); 4521 } 4522 } else { 4523 /* 4524 * Callers may only modify the file flags on objects they 4525 * have VADMIN rights for. 4526 */ 4527 if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0) 4528 return (error); 4529 if (zflags & 4530 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 4531 return (EPERM); 4532 } 4533 if (fflags & 4534 (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 4535 return (EPERM); 4536 } 4537 } 4538 4539#define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 4540 if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 4541 ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 4542 XVA_SET_REQ(&xvap, (xflag)); \ 4543 (xfield) = ((fflags & (fflag)) != 0); \ 4544 } \ 4545} while (0) 4546 /* Convert chflags into ZFS-type flags. */ 4547 /* XXX: what about SF_SETTABLE?. */ 4548 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 4549 xvap.xva_xoptattrs.xoa_immutable); 4550 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 4551 xvap.xva_xoptattrs.xoa_appendonly); 4552 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 4553 xvap.xva_xoptattrs.xoa_nounlink); 4554 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 4555 xvap.xva_xoptattrs.xoa_nodump); 4556#undef FLAG_CHANGE 4557 } 4558 return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL)); 4559} 4560 4561static int 4562zfs_freebsd_rename(ap) 4563 struct vop_rename_args /* { 4564 struct vnode *a_fdvp; 4565 struct vnode *a_fvp; 4566 struct componentname *a_fcnp; 4567 struct vnode *a_tdvp; 4568 struct vnode *a_tvp; 4569 struct componentname *a_tcnp; 4570 } */ *ap; 4571{ 4572 vnode_t *fdvp = ap->a_fdvp; 4573 vnode_t *fvp = ap->a_fvp; 4574 vnode_t *tdvp = ap->a_tdvp; 4575 vnode_t *tvp = ap->a_tvp; 4576 int error; 4577 4578 ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 4579 ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 4580 4581 error = zfs_rename(fdvp, ap->a_fcnp->cn_nameptr, tdvp, 4582 ap->a_tcnp->cn_nameptr, ap->a_fcnp->cn_cred, NULL, 0); 4583 4584 if (tdvp == tvp) 4585 VN_RELE(tdvp); 4586 else 4587 VN_URELE(tdvp); 4588 if (tvp) 4589 VN_URELE(tvp); 4590 VN_RELE(fdvp); 4591 VN_RELE(fvp); 4592 4593 return (error); 4594} 4595 4596static int 4597zfs_freebsd_symlink(ap) 4598 struct vop_symlink_args /* { 4599 struct vnode *a_dvp; 4600 struct vnode **a_vpp; 4601 struct componentname *a_cnp; 4602 struct vattr *a_vap; 4603 char *a_target; 4604 } */ *ap; 4605{ 4606 struct componentname *cnp = ap->a_cnp; 4607 vattr_t *vap = ap->a_vap; 4608 4609 ASSERT(cnp->cn_flags & SAVENAME); 4610 4611 vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 4612 vattr_init_mask(vap); 4613 4614 return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 4615 ap->a_target, cnp->cn_cred, cnp->cn_thread)); 4616} 4617 4618static int 4619zfs_freebsd_readlink(ap) 4620 struct vop_readlink_args /* { 4621 struct vnode *a_vp; 4622 struct uio *a_uio; 4623 struct ucred *a_cred; 4624 } */ *ap; 4625{ 4626 4627 return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 4628} 4629 4630static int 4631zfs_freebsd_link(ap) 4632 struct vop_link_args /* { 4633 struct vnode *a_tdvp; 4634 struct vnode *a_vp; 4635 struct componentname *a_cnp; 4636 } */ *ap; 4637{ 4638 struct componentname *cnp = ap->a_cnp; 4639 4640 ASSERT(cnp->cn_flags & SAVENAME); 4641 4642 return (zfs_link(ap->a_tdvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); 4643} 4644 4645static int 4646zfs_freebsd_inactive(ap) 4647 struct vop_inactive_args /* { 4648 struct vnode *a_vp; 4649 struct thread *a_td; 4650 } */ *ap; 4651{ 4652 vnode_t *vp = ap->a_vp; 4653 4654 zfs_inactive(vp, ap->a_td->td_ucred, NULL); 4655 return (0); 4656} 4657 4658static void 4659zfs_reclaim_complete(void *arg, int pending) 4660{ 4661 znode_t *zp = arg; 4662 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4663 4664 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4665 if (zp->z_dbuf != NULL) { 4666 ZFS_OBJ_HOLD_ENTER(zfsvfs, zp->z_id); 4667 zfs_znode_dmu_fini(zp); 4668 ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); 4669 } 4670 zfs_znode_free(zp); 4671 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4672 /* 4673 * If the file system is being unmounted, there is a process waiting 4674 * for us, wake it up. 4675 */ 4676 if (zfsvfs->z_unmounted) 4677 wakeup_one(zfsvfs); 4678} 4679 4680static int 4681zfs_freebsd_reclaim(ap) 4682 struct vop_reclaim_args /* { 4683 struct vnode *a_vp; 4684 struct thread *a_td; 4685 } */ *ap; 4686{ 4687 vnode_t *vp = ap->a_vp; 4688 znode_t *zp = VTOZ(vp); 4689 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4690 4691 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4692 4693 ASSERT(zp != NULL); 4694 4695 /* 4696 * Destroy the vm object and flush associated pages. 4697 */ 4698 vnode_destroy_vobject(vp); 4699 4700 mutex_enter(&zp->z_lock); 4701 ASSERT(zp->z_phys != NULL); 4702 zp->z_vnode = NULL; 4703 mutex_exit(&zp->z_lock); 4704 4705 if (zp->z_unlinked) 4706 ; /* Do nothing. */ 4707 else if (zp->z_dbuf == NULL) 4708 zfs_znode_free(zp); 4709 else /* if (!zp->z_unlinked && zp->z_dbuf != NULL) */ { 4710 int locked; 4711 4712 locked = MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)) ? 2 : 4713 ZFS_OBJ_HOLD_TRYENTER(zfsvfs, zp->z_id); 4714 if (locked == 0) { 4715 /* 4716 * Lock can't be obtained due to deadlock possibility, 4717 * so defer znode destruction. 4718 */ 4719 TASK_INIT(&zp->z_task, 0, zfs_reclaim_complete, zp); 4720 taskqueue_enqueue(taskqueue_thread, &zp->z_task); 4721 } else { 4722 zfs_znode_dmu_fini(zp); 4723 if (locked == 1) 4724 ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); 4725 zfs_znode_free(zp); 4726 } 4727 } 4728 VI_LOCK(vp); 4729 vp->v_data = NULL; 4730 ASSERT(vp->v_holdcnt >= 1); 4731 VI_UNLOCK(vp); 4732 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4733 return (0); 4734} 4735 4736static int 4737zfs_freebsd_fid(ap) 4738 struct vop_fid_args /* { 4739 struct vnode *a_vp; 4740 struct fid *a_fid; 4741 } */ *ap; 4742{ 4743 4744 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 4745} 4746 4747static int 4748zfs_freebsd_pathconf(ap) 4749 struct vop_pathconf_args /* { 4750 struct vnode *a_vp; 4751 int a_name; 4752 register_t *a_retval; 4753 } */ *ap; 4754{ 4755 ulong_t val; 4756 int error; 4757 4758 error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); 4759 if (error == 0) 4760 *ap->a_retval = val; 4761 else if (error == EOPNOTSUPP) 4762 error = vop_stdpathconf(ap); 4763 return (error); 4764} 4765 4766static int 4767zfs_freebsd_fifo_pathconf(ap) 4768 struct vop_pathconf_args /* { 4769 struct vnode *a_vp; 4770 int a_name; 4771 register_t *a_retval; 4772 } */ *ap; 4773{ 4774 4775 switch (ap->a_name) { 4776 case _PC_ACL_EXTENDED: 4777 case _PC_ACL_NFS4: 4778 case _PC_ACL_PATH_MAX: 4779 case _PC_MAC_PRESENT: 4780 return (zfs_freebsd_pathconf(ap)); 4781 default: 4782 return (fifo_specops.vop_pathconf(ap)); 4783 } 4784} 4785 4786/* 4787 * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 4788 * extended attribute name: 4789 * 4790 * NAMESPACE PREFIX 4791 * system freebsd:system: 4792 * user (none, can be used to access ZFS fsattr(5) attributes 4793 * created on Solaris) 4794 */ 4795static int 4796zfs_create_attrname(int attrnamespace, const char *name, char *attrname, 4797 size_t size) 4798{ 4799 const char *namespace, *prefix, *suffix; 4800 4801 /* We don't allow '/' character in attribute name. */ 4802 if (strchr(name, '/') != NULL) 4803 return (EINVAL); 4804 /* We don't allow attribute names that start with "freebsd:" string. */ 4805 if (strncmp(name, "freebsd:", 8) == 0) 4806 return (EINVAL); 4807 4808 bzero(attrname, size); 4809 4810 switch (attrnamespace) { 4811 case EXTATTR_NAMESPACE_USER: 4812#if 0 4813 prefix = "freebsd:"; 4814 namespace = EXTATTR_NAMESPACE_USER_STRING; 4815 suffix = ":"; 4816#else 4817 /* 4818 * This is the default namespace by which we can access all 4819 * attributes created on Solaris. 4820 */ 4821 prefix = namespace = suffix = ""; 4822#endif 4823 break; 4824 case EXTATTR_NAMESPACE_SYSTEM: 4825 prefix = "freebsd:"; 4826 namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 4827 suffix = ":"; 4828 break; 4829 case EXTATTR_NAMESPACE_EMPTY: 4830 default: 4831 return (EINVAL); 4832 } 4833 if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 4834 name) >= size) { 4835 return (ENAMETOOLONG); 4836 } 4837 return (0); 4838} 4839 4840/* 4841 * Vnode operating to retrieve a named extended attribute. 4842 */ 4843static int 4844zfs_getextattr(struct vop_getextattr_args *ap) 4845/* 4846vop_getextattr { 4847 IN struct vnode *a_vp; 4848 IN int a_attrnamespace; 4849 IN const char *a_name; 4850 INOUT struct uio *a_uio; 4851 OUT size_t *a_size; 4852 IN struct ucred *a_cred; 4853 IN struct thread *a_td; 4854}; 4855*/ 4856{ 4857 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 4858 struct thread *td = ap->a_td; 4859 struct nameidata nd; 4860 char attrname[255]; 4861 struct vattr va; 4862 vnode_t *xvp = NULL, *vp; 4863 int error, flags; 4864 4865 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 4866 ap->a_cred, ap->a_td, VREAD); 4867 if (error != 0) 4868 return (error); 4869 4870 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 4871 sizeof(attrname)); 4872 if (error != 0) 4873 return (error); 4874 4875 ZFS_ENTER(zfsvfs); 4876 4877 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 4878 LOOKUP_XATTR); 4879 if (error != 0) { 4880 ZFS_EXIT(zfsvfs); 4881 return (error); 4882 } 4883 4884 flags = FREAD; 4885 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, attrname, 4886 xvp, td); 4887 error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL); 4888 vp = nd.ni_vp; 4889 NDFREE(&nd, NDF_ONLY_PNBUF); 4890 if (error != 0) { 4891 ZFS_EXIT(zfsvfs); 4892 if (error == ENOENT) 4893 error = ENOATTR; 4894 return (error); 4895 } 4896 4897 if (ap->a_size != NULL) { 4898 error = VOP_GETATTR(vp, &va, ap->a_cred); 4899 if (error == 0) 4900 *ap->a_size = (size_t)va.va_size; 4901 } else if (ap->a_uio != NULL) 4902 error = VOP_READ(vp, ap->a_uio, IO_UNIT | IO_SYNC, ap->a_cred); 4903 4904 VOP_UNLOCK(vp, 0); 4905 vn_close(vp, flags, ap->a_cred, td); 4906 ZFS_EXIT(zfsvfs); 4907 4908 return (error); 4909} 4910 4911/* 4912 * Vnode operation to remove a named attribute. 4913 */ 4914int 4915zfs_deleteextattr(struct vop_deleteextattr_args *ap) 4916/* 4917vop_deleteextattr { 4918 IN struct vnode *a_vp; 4919 IN int a_attrnamespace; 4920 IN const char *a_name; 4921 IN struct ucred *a_cred; 4922 IN struct thread *a_td; 4923}; 4924*/ 4925{ 4926 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 4927 struct thread *td = ap->a_td; 4928 struct nameidata nd; 4929 char attrname[255]; 4930 struct vattr va; 4931 vnode_t *xvp = NULL, *vp; 4932 int error, flags; 4933 4934 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 4935 ap->a_cred, ap->a_td, VWRITE); 4936 if (error != 0) 4937 return (error); 4938 4939 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 4940 sizeof(attrname)); 4941 if (error != 0) 4942 return (error); 4943 4944 ZFS_ENTER(zfsvfs); 4945 4946 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 4947 LOOKUP_XATTR); 4948 if (error != 0) { 4949 ZFS_EXIT(zfsvfs); 4950 return (error); 4951 } 4952 4953 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF | MPSAFE, 4954 UIO_SYSSPACE, attrname, xvp, td); 4955 error = namei(&nd); 4956 vp = nd.ni_vp; 4957 NDFREE(&nd, NDF_ONLY_PNBUF); 4958 if (error != 0) { 4959 ZFS_EXIT(zfsvfs); 4960 if (error == ENOENT) 4961 error = ENOATTR; 4962 return (error); 4963 } 4964 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 4965 4966 vput(nd.ni_dvp); 4967 if (vp == nd.ni_dvp) 4968 vrele(vp); 4969 else 4970 vput(vp); 4971 ZFS_EXIT(zfsvfs); 4972 4973 return (error); 4974} 4975 4976/* 4977 * Vnode operation to set a named attribute. 4978 */ 4979static int 4980zfs_setextattr(struct vop_setextattr_args *ap) 4981/* 4982vop_setextattr { 4983 IN struct vnode *a_vp; 4984 IN int a_attrnamespace; 4985 IN const char *a_name; 4986 INOUT struct uio *a_uio; 4987 IN struct ucred *a_cred; 4988 IN struct thread *a_td; 4989}; 4990*/ 4991{ 4992 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 4993 struct thread *td = ap->a_td; 4994 struct nameidata nd; 4995 char attrname[255]; 4996 struct vattr va; 4997 vnode_t *xvp = NULL, *vp; 4998 int error, flags; 4999 5000 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5001 ap->a_cred, ap->a_td, VWRITE); 5002 if (error != 0) 5003 return (error); 5004 5005 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5006 sizeof(attrname)); 5007 if (error != 0) 5008 return (error); 5009 5010 ZFS_ENTER(zfsvfs); 5011 5012 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5013 LOOKUP_XATTR | CREATE_XATTR_DIR); 5014 if (error != 0) { 5015 ZFS_EXIT(zfsvfs); 5016 return (error); 5017 } 5018 5019 flags = FFLAGS(O_WRONLY | O_CREAT); 5020 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, attrname, 5021 xvp, td); 5022 error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL); 5023 vp = nd.ni_vp; 5024 NDFREE(&nd, NDF_ONLY_PNBUF); 5025 if (error != 0) { 5026 ZFS_EXIT(zfsvfs); 5027 return (error); 5028 } 5029 5030 VATTR_NULL(&va); 5031 va.va_size = 0; 5032 error = VOP_SETATTR(vp, &va, ap->a_cred); 5033 if (error == 0) 5034 VOP_WRITE(vp, ap->a_uio, IO_UNIT | IO_SYNC, ap->a_cred); 5035 5036 VOP_UNLOCK(vp, 0); 5037 vn_close(vp, flags, ap->a_cred, td); 5038 ZFS_EXIT(zfsvfs); 5039 5040 return (error); 5041} 5042 5043/* 5044 * Vnode operation to retrieve extended attributes on a vnode. 5045 */ 5046static int 5047zfs_listextattr(struct vop_listextattr_args *ap) 5048/* 5049vop_listextattr { 5050 IN struct vnode *a_vp; 5051 IN int a_attrnamespace; 5052 INOUT struct uio *a_uio; 5053 OUT size_t *a_size; 5054 IN struct ucred *a_cred; 5055 IN struct thread *a_td; 5056}; 5057*/ 5058{ 5059 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5060 struct thread *td = ap->a_td; 5061 struct nameidata nd; 5062 char attrprefix[16]; 5063 u_char dirbuf[sizeof(struct dirent)]; 5064 struct dirent *dp; 5065 struct iovec aiov; 5066 struct uio auio, *uio = ap->a_uio; 5067 size_t *sizep = ap->a_size; 5068 size_t plen; 5069 vnode_t *xvp = NULL, *vp; 5070 int done, error, eof, pos; 5071 5072 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5073 ap->a_cred, ap->a_td, VREAD); 5074 if (error != 0) 5075 return (error); 5076 5077 error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 5078 sizeof(attrprefix)); 5079 if (error != 0) 5080 return (error); 5081 plen = strlen(attrprefix); 5082 5083 ZFS_ENTER(zfsvfs); 5084 5085 if (sizep != NULL) 5086 *sizep = 0; 5087 5088 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5089 LOOKUP_XATTR); 5090 if (error != 0) { 5091 ZFS_EXIT(zfsvfs); 5092 /* 5093 * ENOATTR means that the EA directory does not yet exist, 5094 * i.e. there are no extended attributes there. 5095 */ 5096 if (error == ENOATTR) 5097 error = 0; 5098 return (error); 5099 } 5100 5101 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE, 5102 UIO_SYSSPACE, ".", xvp, td); 5103 error = namei(&nd); 5104 vp = nd.ni_vp; 5105 NDFREE(&nd, NDF_ONLY_PNBUF); 5106 if (error != 0) { 5107 ZFS_EXIT(zfsvfs); 5108 return (error); 5109 } 5110 5111 auio.uio_iov = &aiov; 5112 auio.uio_iovcnt = 1; 5113 auio.uio_segflg = UIO_SYSSPACE; 5114 auio.uio_td = td; 5115 auio.uio_rw = UIO_READ; 5116 auio.uio_offset = 0; 5117 5118 do { 5119 u_char nlen; 5120 5121 aiov.iov_base = (void *)dirbuf; 5122 aiov.iov_len = sizeof(dirbuf); 5123 auio.uio_resid = sizeof(dirbuf); 5124 error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 5125 done = sizeof(dirbuf) - auio.uio_resid; 5126 if (error != 0) 5127 break; 5128 for (pos = 0; pos < done;) { 5129 dp = (struct dirent *)(dirbuf + pos); 5130 pos += dp->d_reclen; 5131 /* 5132 * XXX: Temporarily we also accept DT_UNKNOWN, as this 5133 * is what we get when attribute was created on Solaris. 5134 */ 5135 if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 5136 continue; 5137 if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) 5138 continue; 5139 else if (strncmp(dp->d_name, attrprefix, plen) != 0) 5140 continue; 5141 nlen = dp->d_namlen - plen; 5142 if (sizep != NULL) 5143 *sizep += 1 + nlen; 5144 else if (uio != NULL) { 5145 /* 5146 * Format of extattr name entry is one byte for 5147 * length and the rest for name. 5148 */ 5149 error = uiomove(&nlen, 1, uio->uio_rw, uio); 5150 if (error == 0) { 5151 error = uiomove(dp->d_name + plen, nlen, 5152 uio->uio_rw, uio); 5153 } 5154 if (error != 0) 5155 break; 5156 } 5157 } 5158 } while (!eof && error == 0); 5159 5160 vput(vp); 5161 ZFS_EXIT(zfsvfs); 5162 5163 return (error); 5164} 5165 5166int 5167zfs_freebsd_getacl(ap) 5168 struct vop_getacl_args /* { 5169 struct vnode *vp; 5170 acl_type_t type; 5171 struct acl *aclp; 5172 struct ucred *cred; 5173 struct thread *td; 5174 } */ *ap; 5175{ 5176 int error; 5177 vsecattr_t vsecattr; 5178 5179 if (ap->a_type != ACL_TYPE_NFS4) 5180 return (EINVAL); 5181 5182 vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 5183 if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL)) 5184 return (error); 5185 5186 error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt); 5187 if (vsecattr.vsa_aclentp != NULL) 5188 kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 5189 5190 return (error); 5191} 5192 5193int 5194zfs_freebsd_setacl(ap) 5195 struct vop_setacl_args /* { 5196 struct vnode *vp; 5197 acl_type_t type; 5198 struct acl *aclp; 5199 struct ucred *cred; 5200 struct thread *td; 5201 } */ *ap; 5202{ 5203 int error; 5204 vsecattr_t vsecattr; 5205 int aclbsize; /* size of acl list in bytes */ 5206 aclent_t *aaclp; 5207 5208 if (ap->a_type != ACL_TYPE_NFS4) 5209 return (EINVAL); 5210 5211 if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 5212 return (EINVAL); 5213 5214 /* 5215 * With NFSv4 ACLs, chmod(2) may need to add additional entries, 5216 * splitting every entry into two and appending "canonical six" 5217 * entries at the end. Don't allow for setting an ACL that would 5218 * cause chmod(2) to run out of ACL entries. 5219 */ 5220 if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 5221 return (ENOSPC); 5222 5223 error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 5224 if (error != 0) 5225 return (error); 5226 5227 vsecattr.vsa_mask = VSA_ACE; 5228 aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t); 5229 vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 5230 aaclp = vsecattr.vsa_aclentp; 5231 vsecattr.vsa_aclentsz = aclbsize; 5232 5233 aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 5234 error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL); 5235 kmem_free(aaclp, aclbsize); 5236 5237 return (error); 5238} 5239 5240int 5241zfs_freebsd_aclcheck(ap) 5242 struct vop_aclcheck_args /* { 5243 struct vnode *vp; 5244 acl_type_t type; 5245 struct acl *aclp; 5246 struct ucred *cred; 5247 struct thread *td; 5248 } */ *ap; 5249{ 5250 5251 return (EOPNOTSUPP); 5252} 5253 5254struct vop_vector zfs_vnodeops; 5255struct vop_vector zfs_fifoops; 5256struct vop_vector zfs_shareops; 5257 5258struct vop_vector zfs_vnodeops = { 5259 .vop_default = &default_vnodeops, 5260 .vop_inactive = zfs_freebsd_inactive, 5261 .vop_reclaim = zfs_freebsd_reclaim, 5262 .vop_access = zfs_freebsd_access, 5263#ifdef FREEBSD_NAMECACHE 5264 .vop_lookup = vfs_cache_lookup, 5265 .vop_cachedlookup = zfs_freebsd_lookup, 5266#else 5267 .vop_lookup = zfs_freebsd_lookup, 5268#endif 5269 .vop_getattr = zfs_freebsd_getattr, 5270 .vop_setattr = zfs_freebsd_setattr, 5271 .vop_create = zfs_freebsd_create, 5272 .vop_mknod = zfs_freebsd_create, 5273 .vop_mkdir = zfs_freebsd_mkdir, 5274 .vop_readdir = zfs_freebsd_readdir, 5275 .vop_fsync = zfs_freebsd_fsync, 5276 .vop_open = zfs_freebsd_open, 5277 .vop_close = zfs_freebsd_close, 5278 .vop_rmdir = zfs_freebsd_rmdir, 5279 .vop_ioctl = zfs_freebsd_ioctl, 5280 .vop_link = zfs_freebsd_link, 5281 .vop_symlink = zfs_freebsd_symlink, 5282 .vop_readlink = zfs_freebsd_readlink, 5283 .vop_read = zfs_freebsd_read, 5284 .vop_write = zfs_freebsd_write, 5285 .vop_remove = zfs_freebsd_remove, 5286 .vop_rename = zfs_freebsd_rename, 5287 .vop_pathconf = zfs_freebsd_pathconf, 5288 .vop_bmap = VOP_EOPNOTSUPP, 5289 .vop_fid = zfs_freebsd_fid, 5290 .vop_getextattr = zfs_getextattr, 5291 .vop_deleteextattr = zfs_deleteextattr, 5292 .vop_setextattr = zfs_setextattr, 5293 .vop_listextattr = zfs_listextattr, 5294 .vop_getacl = zfs_freebsd_getacl, 5295 .vop_setacl = zfs_freebsd_setacl, 5296 .vop_aclcheck = zfs_freebsd_aclcheck, 5297}; 5298 5299struct vop_vector zfs_fifoops = { 5300 .vop_default = &fifo_specops, 5301 .vop_fsync = zfs_freebsd_fsync, 5302 .vop_access = zfs_freebsd_access, 5303 .vop_getattr = zfs_freebsd_getattr, 5304 .vop_inactive = zfs_freebsd_inactive, 5305 .vop_read = VOP_PANIC, 5306 .vop_reclaim = zfs_freebsd_reclaim, 5307 .vop_setattr = zfs_freebsd_setattr, 5308 .vop_write = VOP_PANIC, 5309 .vop_pathconf = zfs_freebsd_fifo_pathconf, 5310 .vop_fid = zfs_freebsd_fid, 5311 .vop_getacl = zfs_freebsd_getacl, 5312 .vop_setacl = zfs_freebsd_setacl, 5313 .vop_aclcheck = zfs_freebsd_aclcheck, 5314}; 5315 5316/* 5317 * special share hidden files vnode operations template 5318 */ 5319struct vop_vector zfs_shareops = { 5320 .vop_default = &default_vnodeops, 5321 .vop_access = zfs_freebsd_access, 5322 .vop_inactive = zfs_freebsd_inactive, 5323 .vop_reclaim = zfs_freebsd_reclaim, 5324 .vop_fid = zfs_freebsd_fid, 5325 .vop_pathconf = zfs_freebsd_pathconf, 5326}; 5327