zfs_vnops.c revision 214378
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25/* Portions Copyright 2007 Jeremy Teo */ 26 27#include <sys/types.h> 28#include <sys/param.h> 29#include <sys/time.h> 30#include <sys/systm.h> 31#include <sys/sysmacros.h> 32#include <sys/resource.h> 33#include <sys/vfs.h> 34#include <sys/vnode.h> 35#include <sys/file.h> 36#include <sys/stat.h> 37#include <sys/kmem.h> 38#include <sys/taskq.h> 39#include <sys/uio.h> 40#include <sys/atomic.h> 41#include <sys/namei.h> 42#include <sys/mman.h> 43#include <sys/cmn_err.h> 44#include <sys/errno.h> 45#include <sys/unistd.h> 46#include <sys/zfs_dir.h> 47#include <sys/zfs_ioctl.h> 48#include <sys/fs/zfs.h> 49#include <sys/dmu.h> 50#include <sys/spa.h> 51#include <sys/txg.h> 52#include <sys/dbuf.h> 53#include <sys/zap.h> 54#include <sys/dirent.h> 55#include <sys/policy.h> 56#include <sys/sunddi.h> 57#include <sys/filio.h> 58#include <sys/sid.h> 59#include <sys/zfs_ctldir.h> 60#include <sys/zfs_fuid.h> 61#include <sys/dnlc.h> 62#include <sys/zfs_rlock.h> 63#include <sys/extdirent.h> 64#include <sys/kidmap.h> 65#include <sys/bio.h> 66#include <sys/buf.h> 67#include <sys/sf_buf.h> 68#include <sys/sched.h> 69#include <sys/acl.h> 70 71/* 72 * Programming rules. 73 * 74 * Each vnode op performs some logical unit of work. To do this, the ZPL must 75 * properly lock its in-core state, create a DMU transaction, do the work, 76 * record this work in the intent log (ZIL), commit the DMU transaction, 77 * and wait for the intent log to commit if it is a synchronous operation. 78 * Moreover, the vnode ops must work in both normal and log replay context. 79 * The ordering of events is important to avoid deadlocks and references 80 * to freed memory. The example below illustrates the following Big Rules: 81 * 82 * (1) A check must be made in each zfs thread for a mounted file system. 83 * This is done avoiding races using ZFS_ENTER(zfsvfs). 84 * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 85 * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 86 * can return EIO from the calling function. 87 * 88 * (2) VN_RELE() should always be the last thing except for zil_commit() 89 * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 90 * First, if it's the last reference, the vnode/znode 91 * can be freed, so the zp may point to freed memory. Second, the last 92 * reference will call zfs_zinactive(), which may induce a lot of work -- 93 * pushing cached pages (which acquires range locks) and syncing out 94 * cached atime changes. Third, zfs_zinactive() may require a new tx, 95 * which could deadlock the system if you were already holding one. 96 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 97 * 98 * (3) All range locks must be grabbed before calling dmu_tx_assign(), 99 * as they can span dmu_tx_assign() calls. 100 * 101 * (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign(). 102 * This is critical because we don't want to block while holding locks. 103 * Note, in particular, that if a lock is sometimes acquired before 104 * the tx assigns, and sometimes after (e.g. z_lock), then failing to 105 * use a non-blocking assign can deadlock the system. The scenario: 106 * 107 * Thread A has grabbed a lock before calling dmu_tx_assign(). 108 * Thread B is in an already-assigned tx, and blocks for this lock. 109 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 110 * forever, because the previous txg can't quiesce until B's tx commits. 111 * 112 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 113 * then drop all locks, call dmu_tx_wait(), and try again. 114 * 115 * (5) If the operation succeeded, generate the intent log entry for it 116 * before dropping locks. This ensures that the ordering of events 117 * in the intent log matches the order in which they actually occurred. 118 * During ZIL replay the zfs_log_* functions will update the sequence 119 * number to indicate the zil transaction has replayed. 120 * 121 * (6) At the end of each vnode op, the DMU tx must always commit, 122 * regardless of whether there were any errors. 123 * 124 * (7) After dropping all locks, invoke zil_commit(zilog, seq, foid) 125 * to ensure that synchronous semantics are provided when necessary. 126 * 127 * In general, this is how things should be ordered in each vnode op: 128 * 129 * ZFS_ENTER(zfsvfs); // exit if unmounted 130 * top: 131 * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 132 * rw_enter(...); // grab any other locks you need 133 * tx = dmu_tx_create(...); // get DMU tx 134 * dmu_tx_hold_*(); // hold each object you might modify 135 * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign 136 * if (error) { 137 * rw_exit(...); // drop locks 138 * zfs_dirent_unlock(dl); // unlock directory entry 139 * VN_RELE(...); // release held vnodes 140 * if (error == ERESTART) { 141 * dmu_tx_wait(tx); 142 * dmu_tx_abort(tx); 143 * goto top; 144 * } 145 * dmu_tx_abort(tx); // abort DMU tx 146 * ZFS_EXIT(zfsvfs); // finished in zfs 147 * return (error); // really out of space 148 * } 149 * error = do_real_work(); // do whatever this VOP does 150 * if (error == 0) 151 * zfs_log_*(...); // on success, make ZIL entry 152 * dmu_tx_commit(tx); // commit DMU tx -- error or not 153 * rw_exit(...); // drop locks 154 * zfs_dirent_unlock(dl); // unlock directory entry 155 * VN_RELE(...); // release held vnodes 156 * zil_commit(zilog, seq, foid); // synchronous when necessary 157 * ZFS_EXIT(zfsvfs); // finished in zfs 158 * return (error); // done, report error 159 */ 160 161/* ARGSUSED */ 162static int 163zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 164{ 165 znode_t *zp = VTOZ(*vpp); 166 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 167 168 ZFS_ENTER(zfsvfs); 169 ZFS_VERIFY_ZP(zp); 170 171 if ((flag & FWRITE) && (zp->z_phys->zp_flags & ZFS_APPENDONLY) && 172 ((flag & FAPPEND) == 0)) { 173 ZFS_EXIT(zfsvfs); 174 return (EPERM); 175 } 176 177 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 178 ZTOV(zp)->v_type == VREG && 179 !(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) && 180 zp->z_phys->zp_size > 0) { 181 if (fs_vscan(*vpp, cr, 0) != 0) { 182 ZFS_EXIT(zfsvfs); 183 return (EACCES); 184 } 185 } 186 187 /* Keep a count of the synchronous opens in the znode */ 188 if (flag & (FSYNC | FDSYNC)) 189 atomic_inc_32(&zp->z_sync_cnt); 190 191 ZFS_EXIT(zfsvfs); 192 return (0); 193} 194 195/* ARGSUSED */ 196static int 197zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 198 caller_context_t *ct) 199{ 200 znode_t *zp = VTOZ(vp); 201 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 202 203 /* 204 * Clean up any locks held by this process on the vp. 205 */ 206 cleanlocks(vp, ddi_get_pid(), 0); 207 cleanshares(vp, ddi_get_pid()); 208 209 ZFS_ENTER(zfsvfs); 210 ZFS_VERIFY_ZP(zp); 211 212 /* Decrement the synchronous opens in the znode */ 213 if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 214 atomic_dec_32(&zp->z_sync_cnt); 215 216 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 217 ZTOV(zp)->v_type == VREG && 218 !(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) && 219 zp->z_phys->zp_size > 0) 220 VERIFY(fs_vscan(vp, cr, 1) == 0); 221 222 ZFS_EXIT(zfsvfs); 223 return (0); 224} 225 226/* 227 * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 228 * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 229 */ 230static int 231zfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 232{ 233 znode_t *zp = VTOZ(vp); 234 uint64_t noff = (uint64_t)*off; /* new offset */ 235 uint64_t file_sz; 236 int error; 237 boolean_t hole; 238 239 file_sz = zp->z_phys->zp_size; 240 if (noff >= file_sz) { 241 return (ENXIO); 242 } 243 244 if (cmd == _FIO_SEEK_HOLE) 245 hole = B_TRUE; 246 else 247 hole = B_FALSE; 248 249 error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 250 251 /* end of file? */ 252 if ((error == ESRCH) || (noff > file_sz)) { 253 /* 254 * Handle the virtual hole at the end of file. 255 */ 256 if (hole) { 257 *off = file_sz; 258 return (0); 259 } 260 return (ENXIO); 261 } 262 263 if (noff < *off) 264 return (error); 265 *off = noff; 266 return (error); 267} 268 269/* ARGSUSED */ 270static int 271zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 272 int *rvalp, caller_context_t *ct) 273{ 274 offset_t off; 275 int error; 276 zfsvfs_t *zfsvfs; 277 znode_t *zp; 278 279 switch (com) { 280 case _FIOFFS: 281 return (0); 282 283 /* 284 * The following two ioctls are used by bfu. Faking out, 285 * necessary to avoid bfu errors. 286 */ 287 case _FIOGDIO: 288 case _FIOSDIO: 289 return (0); 290 291 case _FIO_SEEK_DATA: 292 case _FIO_SEEK_HOLE: 293 if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 294 return (EFAULT); 295 296 zp = VTOZ(vp); 297 zfsvfs = zp->z_zfsvfs; 298 ZFS_ENTER(zfsvfs); 299 ZFS_VERIFY_ZP(zp); 300 301 /* offset parameter is in/out */ 302 error = zfs_holey(vp, com, &off); 303 ZFS_EXIT(zfsvfs); 304 if (error) 305 return (error); 306 if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 307 return (EFAULT); 308 return (0); 309 } 310 return (ENOTTY); 311} 312 313static vm_page_t 314page_lookup(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 315{ 316 vm_object_t obj; 317 vm_page_t pp; 318 319 obj = vp->v_object; 320 VM_OBJECT_LOCK_ASSERT(obj, MA_OWNED); 321 322 for (;;) { 323 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 324 vm_page_is_valid(pp, (vm_offset_t)off, nbytes)) { 325 if ((pp->oflags & VPO_BUSY) != 0) { 326 /* 327 * Reference the page before unlocking and 328 * sleeping so that the page daemon is less 329 * likely to reclaim it. 330 */ 331 vm_page_lock_queues(); 332 vm_page_flag_set(pp, PG_REFERENCED); 333 vm_page_sleep(pp, "zfsmwb"); 334 continue; 335 } 336 vm_page_busy(pp); 337 vm_page_undirty(pp); 338 } else { 339 if (__predict_false(obj->cache != NULL)) { 340 vm_page_cache_free(obj, OFF_TO_IDX(start), 341 OFF_TO_IDX(start) + 1); 342 } 343 pp = NULL; 344 } 345 break; 346 } 347 return (pp); 348} 349 350static void 351page_unlock(vm_page_t pp) 352{ 353 354 vm_page_wakeup(pp); 355} 356 357static caddr_t 358zfs_map_page(vm_page_t pp, struct sf_buf **sfp) 359{ 360 361 *sfp = sf_buf_alloc(pp, 0); 362 return ((caddr_t)sf_buf_kva(*sfp)); 363} 364 365static void 366zfs_unmap_page(struct sf_buf *sf) 367{ 368 369 sf_buf_free(sf); 370} 371 372 373/* 374 * When a file is memory mapped, we must keep the IO data synchronized 375 * between the DMU cache and the memory mapped pages. What this means: 376 * 377 * On Write: If we find a memory mapped page, we write to *both* 378 * the page and the dmu buffer. 379 */ 380 381static void 382update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 383 int segflg, dmu_tx_t *tx) 384{ 385 vm_object_t obj; 386 struct sf_buf *sf; 387 int off; 388 389 ASSERT(vp->v_mount != NULL); 390 obj = vp->v_object; 391 ASSERT(obj != NULL); 392 393 off = start & PAGEOFFSET; 394 VM_OBJECT_LOCK(obj); 395 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 396 vm_page_t pp; 397 int nbytes = MIN(PAGESIZE - off, len); 398 399 if ((pp = page_lookup(vp, start, off, nbytes)) != NULL) { 400 caddr_t va; 401 402 VM_OBJECT_UNLOCK(obj); 403 va = zfs_map_page(pp, &sf); 404 if (segflg == UIO_NOCOPY) { 405 (void) dmu_write(os, oid, start+off, nbytes, 406 va+off, tx); 407 } else { 408 (void) dmu_read(os, oid, start+off, nbytes, 409 va+off, DMU_READ_PREFETCH);; 410 } 411 zfs_unmap_page(sf); 412 VM_OBJECT_LOCK(obj); 413 page_unlock(pp); 414 415 } 416 len -= nbytes; 417 off = 0; 418 } 419 VM_OBJECT_UNLOCK(obj); 420} 421 422/* 423 * When a file is memory mapped, we must keep the IO data synchronized 424 * between the DMU cache and the memory mapped pages. What this means: 425 * 426 * On Read: We "read" preferentially from memory mapped pages, 427 * else we default from the dmu buffer. 428 * 429 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 430 * the file is memory mapped. 431 */ 432static int 433mappedread(vnode_t *vp, int nbytes, uio_t *uio) 434{ 435 znode_t *zp = VTOZ(vp); 436 objset_t *os = zp->z_zfsvfs->z_os; 437 vm_object_t obj; 438 vm_page_t m; 439 struct sf_buf *sf; 440 int64_t start; 441 caddr_t va; 442 int len = nbytes; 443 int off; 444 int error = 0; 445 uint64_t dirbytes; 446 447 ASSERT(vp->v_mount != NULL); 448 obj = vp->v_object; 449 ASSERT(obj != NULL); 450 451 start = uio->uio_loffset; 452 off = start & PAGEOFFSET; 453 dirbytes = 0; 454 VM_OBJECT_LOCK(obj); 455 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 456 int bytes = MIN(PAGESIZE - off, len); 457 458again: 459 if ((m = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 460 vm_page_is_valid(m, off, bytes)) { 461 if ((m->oflags & VPO_BUSY) != 0) { 462 /* 463 * Reference the page before unlocking and 464 * sleeping so that the page daemon is less 465 * likely to reclaim it. 466 */ 467 vm_page_lock_queues(); 468 vm_page_flag_set(m, PG_REFERENCED); 469 vm_page_sleep(m, "zfsmrb"); 470 goto again; 471 } 472 473 vm_page_busy(m); 474 VM_OBJECT_UNLOCK(obj); 475 if (dirbytes > 0) { 476 error = dmu_read_uio(os, zp->z_id, uio, 477 dirbytes); 478 dirbytes = 0; 479 } 480 if (error == 0) 481 uiomove_fromphys(&m, off, bytes, uio); 482 VM_OBJECT_LOCK(obj); 483 vm_page_wakeup(m); 484 } else if (m != NULL && uio->uio_segflg == UIO_NOCOPY) { 485 /* 486 * The code below is here to make sendfile(2) work 487 * correctly with ZFS. As pointed out by ups@ 488 * sendfile(2) should be changed to use VOP_GETPAGES(), 489 * but it pessimize performance of sendfile/UFS, that's 490 * why I handle this special case in ZFS code. 491 */ 492 KASSERT(off == 0, 493 ("unexpected offset in mappedread for sendfile")); 494 if ((m->oflags & VPO_BUSY) != 0) { 495 /* 496 * Reference the page before unlocking and 497 * sleeping so that the page daemon is less 498 * likely to reclaim it. 499 */ 500 vm_page_lock_queues(); 501 vm_page_flag_set(m, PG_REFERENCED); 502 vm_page_sleep(m, "zfsmrb"); 503 goto again; 504 } 505 vm_page_busy(m); 506 VM_OBJECT_UNLOCK(obj); 507 if (dirbytes > 0) { 508 error = dmu_read_uio(os, zp->z_id, uio, 509 dirbytes); 510 dirbytes = 0; 511 } 512 if (error == 0) { 513 va = zfs_map_page(m, &sf); 514 error = dmu_read(os, zp->z_id, start, bytes, va, 515 DMU_READ_PREFETCH); 516 if (bytes != PAGE_SIZE) 517 bzero(va + bytes, PAGE_SIZE - bytes); 518 zfs_unmap_page(sf); 519 } 520 VM_OBJECT_LOCK(obj); 521 if (error == 0) 522 m->valid = VM_PAGE_BITS_ALL; 523 vm_page_wakeup(m); 524 if (error == 0) { 525 uio->uio_resid -= bytes; 526 uio->uio_offset += bytes; 527 } 528 } else { 529 dirbytes += bytes; 530 } 531 len -= bytes; 532 off = 0; 533 if (error) 534 break; 535 } 536 VM_OBJECT_UNLOCK(obj); 537 if (error == 0 && dirbytes > 0) 538 error = dmu_read_uio(os, zp->z_id, uio, dirbytes); 539 return (error); 540} 541 542offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 543 544/* 545 * Read bytes from specified file into supplied buffer. 546 * 547 * IN: vp - vnode of file to be read from. 548 * uio - structure supplying read location, range info, 549 * and return buffer. 550 * ioflag - SYNC flags; used to provide FRSYNC semantics. 551 * cr - credentials of caller. 552 * ct - caller context 553 * 554 * OUT: uio - updated offset and range, buffer filled. 555 * 556 * RETURN: 0 if success 557 * error code if failure 558 * 559 * Side Effects: 560 * vp - atime updated if byte count > 0 561 */ 562/* ARGSUSED */ 563static int 564zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 565{ 566 znode_t *zp = VTOZ(vp); 567 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 568 objset_t *os; 569 ssize_t n, nbytes; 570 int error; 571 rl_t *rl; 572 573 ZFS_ENTER(zfsvfs); 574 ZFS_VERIFY_ZP(zp); 575 os = zfsvfs->z_os; 576 577 if (zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) { 578 ZFS_EXIT(zfsvfs); 579 return (EACCES); 580 } 581 582 /* 583 * Validate file offset 584 */ 585 if (uio->uio_loffset < (offset_t)0) { 586 ZFS_EXIT(zfsvfs); 587 return (EINVAL); 588 } 589 590 /* 591 * Fasttrack empty reads 592 */ 593 if (uio->uio_resid == 0) { 594 ZFS_EXIT(zfsvfs); 595 return (0); 596 } 597 598 /* 599 * Check for mandatory locks 600 */ 601 if (MANDMODE((mode_t)zp->z_phys->zp_mode)) { 602 if (error = chklock(vp, FREAD, 603 uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 604 ZFS_EXIT(zfsvfs); 605 return (error); 606 } 607 } 608 609 /* 610 * If we're in FRSYNC mode, sync out this znode before reading it. 611 */ 612 if (ioflag & FRSYNC) 613 zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); 614 615 /* 616 * Lock the range against changes. 617 */ 618 rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 619 620 /* 621 * If we are reading past end-of-file we can skip 622 * to the end; but we might still need to set atime. 623 */ 624 if (uio->uio_loffset >= zp->z_phys->zp_size) { 625 error = 0; 626 goto out; 627 } 628 629 ASSERT(uio->uio_loffset < zp->z_phys->zp_size); 630 n = MIN(uio->uio_resid, zp->z_phys->zp_size - uio->uio_loffset); 631 632 while (n > 0) { 633 nbytes = MIN(n, zfs_read_chunk_size - 634 P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 635 636 if (vn_has_cached_data(vp)) 637 error = mappedread(vp, nbytes, uio); 638 else 639 error = dmu_read_uio(os, zp->z_id, uio, nbytes); 640 if (error) { 641 /* convert checksum errors into IO errors */ 642 if (error == ECKSUM) 643 error = EIO; 644 break; 645 } 646 647 n -= nbytes; 648 } 649 650out: 651 zfs_range_unlock(rl); 652 653 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 654 ZFS_EXIT(zfsvfs); 655 return (error); 656} 657 658/* 659 * Fault in the pages of the first n bytes specified by the uio structure. 660 * 1 byte in each page is touched and the uio struct is unmodified. 661 * Any error will exit this routine as this is only a best 662 * attempt to get the pages resident. This is a copy of ufs_trans_touch(). 663 */ 664static void 665zfs_prefault_write(ssize_t n, struct uio *uio) 666{ 667 struct iovec *iov; 668 ulong_t cnt, incr; 669 caddr_t p; 670 671 if (uio->uio_segflg != UIO_USERSPACE) 672 return; 673 674 iov = uio->uio_iov; 675 676 while (n) { 677 cnt = MIN(iov->iov_len, n); 678 if (cnt == 0) { 679 /* empty iov entry */ 680 iov++; 681 continue; 682 } 683 n -= cnt; 684 /* 685 * touch each page in this segment. 686 */ 687 p = iov->iov_base; 688 while (cnt) { 689 if (fubyte(p) == -1) 690 return; 691 incr = MIN(cnt, PAGESIZE); 692 p += incr; 693 cnt -= incr; 694 } 695 /* 696 * touch the last byte in case it straddles a page. 697 */ 698 p--; 699 if (fubyte(p) == -1) 700 return; 701 iov++; 702 } 703} 704 705/* 706 * Write the bytes to a file. 707 * 708 * IN: vp - vnode of file to be written to. 709 * uio - structure supplying write location, range info, 710 * and data buffer. 711 * ioflag - FAPPEND flag set if in append mode. 712 * cr - credentials of caller. 713 * ct - caller context (NFS/CIFS fem monitor only) 714 * 715 * OUT: uio - updated offset and range. 716 * 717 * RETURN: 0 if success 718 * error code if failure 719 * 720 * Timestamps: 721 * vp - ctime|mtime updated if byte count > 0 722 */ 723/* ARGSUSED */ 724static int 725zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 726{ 727 znode_t *zp = VTOZ(vp); 728 rlim64_t limit = MAXOFFSET_T; 729 ssize_t start_resid = uio->uio_resid; 730 ssize_t tx_bytes; 731 uint64_t end_size; 732 dmu_tx_t *tx; 733 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 734 zilog_t *zilog; 735 offset_t woff; 736 ssize_t n, nbytes; 737 rl_t *rl; 738 int max_blksz = zfsvfs->z_max_blksz; 739 uint64_t pflags; 740 int error; 741 arc_buf_t *abuf; 742 743 /* 744 * Fasttrack empty write 745 */ 746 n = start_resid; 747 if (n == 0) 748 return (0); 749 750 if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 751 limit = MAXOFFSET_T; 752 753 ZFS_ENTER(zfsvfs); 754 ZFS_VERIFY_ZP(zp); 755 756 /* 757 * If immutable or not appending then return EPERM 758 */ 759 pflags = zp->z_phys->zp_flags; 760 if ((pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 761 ((pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 762 (uio->uio_loffset < zp->z_phys->zp_size))) { 763 ZFS_EXIT(zfsvfs); 764 return (EPERM); 765 } 766 767 zilog = zfsvfs->z_log; 768 769 /* 770 * Pre-fault the pages to ensure slow (eg NFS) pages 771 * don't hold up txg. 772 */ 773 zfs_prefault_write(n, uio); 774 775 /* 776 * If in append mode, set the io offset pointer to eof. 777 */ 778 if (ioflag & FAPPEND) { 779 /* 780 * Range lock for a file append: 781 * The value for the start of range will be determined by 782 * zfs_range_lock() (to guarantee append semantics). 783 * If this write will cause the block size to increase, 784 * zfs_range_lock() will lock the entire file, so we must 785 * later reduce the range after we grow the block size. 786 */ 787 rl = zfs_range_lock(zp, 0, n, RL_APPEND); 788 if (rl->r_len == UINT64_MAX) { 789 /* overlocked, zp_size can't change */ 790 woff = uio->uio_loffset = zp->z_phys->zp_size; 791 } else { 792 woff = uio->uio_loffset = rl->r_off; 793 } 794 } else { 795 woff = uio->uio_loffset; 796 /* 797 * Validate file offset 798 */ 799 if (woff < 0) { 800 ZFS_EXIT(zfsvfs); 801 return (EINVAL); 802 } 803 804 /* 805 * If we need to grow the block size then zfs_range_lock() 806 * will lock a wider range than we request here. 807 * Later after growing the block size we reduce the range. 808 */ 809 rl = zfs_range_lock(zp, woff, n, RL_WRITER); 810 } 811 812 if (woff >= limit) { 813 zfs_range_unlock(rl); 814 ZFS_EXIT(zfsvfs); 815 return (EFBIG); 816 } 817 818 if ((woff + n) > limit || woff > (limit - n)) 819 n = limit - woff; 820 821 /* 822 * Check for mandatory locks 823 */ 824 if (MANDMODE((mode_t)zp->z_phys->zp_mode) && 825 (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 826 zfs_range_unlock(rl); 827 ZFS_EXIT(zfsvfs); 828 return (error); 829 } 830 end_size = MAX(zp->z_phys->zp_size, woff + n); 831 832 /* 833 * Write the file in reasonable size chunks. Each chunk is written 834 * in a separate transaction; this keeps the intent log records small 835 * and allows us to do more fine-grained space accounting. 836 */ 837 while (n > 0) { 838 abuf = NULL; 839 woff = uio->uio_loffset; 840 841again: 842 if (zfs_usergroup_overquota(zfsvfs, 843 B_FALSE, zp->z_phys->zp_uid) || 844 zfs_usergroup_overquota(zfsvfs, 845 B_TRUE, zp->z_phys->zp_gid)) { 846 if (abuf != NULL) 847 dmu_return_arcbuf(abuf); 848 error = EDQUOT; 849 break; 850 } 851 852 /* 853 * If dmu_assign_arcbuf() is expected to execute with minimum 854 * overhead loan an arc buffer and copy user data to it before 855 * we enter a txg. This avoids holding a txg forever while we 856 * pagefault on a hanging NFS server mapping. 857 */ 858 if (abuf == NULL && n >= max_blksz && 859 woff >= zp->z_phys->zp_size && 860 P2PHASE(woff, max_blksz) == 0 && 861 zp->z_blksz == max_blksz) { 862 size_t cbytes; 863 864 abuf = dmu_request_arcbuf(zp->z_dbuf, max_blksz); 865 ASSERT(abuf != NULL); 866 ASSERT(arc_buf_size(abuf) == max_blksz); 867 if (error = uiocopy(abuf->b_data, max_blksz, 868 UIO_WRITE, uio, &cbytes)) { 869 dmu_return_arcbuf(abuf); 870 break; 871 } 872 ASSERT(cbytes == max_blksz); 873 } 874 875 /* 876 * Start a transaction. 877 */ 878 tx = dmu_tx_create(zfsvfs->z_os); 879 dmu_tx_hold_bonus(tx, zp->z_id); 880 dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 881 error = dmu_tx_assign(tx, TXG_NOWAIT); 882 if (error) { 883 if (error == ERESTART) { 884 dmu_tx_wait(tx); 885 dmu_tx_abort(tx); 886 goto again; 887 } 888 dmu_tx_abort(tx); 889 if (abuf != NULL) 890 dmu_return_arcbuf(abuf); 891 break; 892 } 893 894 /* 895 * If zfs_range_lock() over-locked we grow the blocksize 896 * and then reduce the lock range. This will only happen 897 * on the first iteration since zfs_range_reduce() will 898 * shrink down r_len to the appropriate size. 899 */ 900 if (rl->r_len == UINT64_MAX) { 901 uint64_t new_blksz; 902 903 if (zp->z_blksz > max_blksz) { 904 ASSERT(!ISP2(zp->z_blksz)); 905 new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 906 } else { 907 new_blksz = MIN(end_size, max_blksz); 908 } 909 zfs_grow_blocksize(zp, new_blksz, tx); 910 zfs_range_reduce(rl, woff, n); 911 } 912 913 /* 914 * XXX - should we really limit each write to z_max_blksz? 915 * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 916 */ 917 nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 918 919 if (woff + nbytes > zp->z_phys->zp_size) 920 vnode_pager_setsize(vp, woff + nbytes); 921 922 if (abuf == NULL) { 923 tx_bytes = uio->uio_resid; 924 error = dmu_write_uio(zfsvfs->z_os, zp->z_id, uio, 925 nbytes, tx); 926 tx_bytes -= uio->uio_resid; 927 } else { 928 tx_bytes = nbytes; 929 ASSERT(tx_bytes == max_blksz); 930 dmu_assign_arcbuf(zp->z_dbuf, woff, abuf, tx); 931 ASSERT(tx_bytes <= uio->uio_resid); 932 uioskip(uio, tx_bytes); 933 } 934 935 if (tx_bytes && vn_has_cached_data(vp)) { 936 update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 937 zp->z_id, uio->uio_segflg, tx); 938 } 939 940 /* 941 * If we made no progress, we're done. If we made even 942 * partial progress, update the znode and ZIL accordingly. 943 */ 944 if (tx_bytes == 0) { 945 dmu_tx_commit(tx); 946 ASSERT(error != 0); 947 break; 948 } 949 950 /* 951 * Clear Set-UID/Set-GID bits on successful write if not 952 * privileged and at least one of the excute bits is set. 953 * 954 * It would be nice to to this after all writes have 955 * been done, but that would still expose the ISUID/ISGID 956 * to another app after the partial write is committed. 957 * 958 * Note: we don't call zfs_fuid_map_id() here because 959 * user 0 is not an ephemeral uid. 960 */ 961 mutex_enter(&zp->z_acl_lock); 962 if ((zp->z_phys->zp_mode & (S_IXUSR | (S_IXUSR >> 3) | 963 (S_IXUSR >> 6))) != 0 && 964 (zp->z_phys->zp_mode & (S_ISUID | S_ISGID)) != 0 && 965 secpolicy_vnode_setid_retain(vp, cr, 966 (zp->z_phys->zp_mode & S_ISUID) != 0 && 967 zp->z_phys->zp_uid == 0) != 0) { 968 zp->z_phys->zp_mode &= ~(S_ISUID | S_ISGID); 969 } 970 mutex_exit(&zp->z_acl_lock); 971 972 /* 973 * Update time stamp. NOTE: This marks the bonus buffer as 974 * dirty, so we don't have to do it again for zp_size. 975 */ 976 zfs_time_stamper(zp, CONTENT_MODIFIED, tx); 977 978 /* 979 * Update the file size (zp_size) if it has changed; 980 * account for possible concurrent updates. 981 */ 982 while ((end_size = zp->z_phys->zp_size) < uio->uio_loffset) 983 (void) atomic_cas_64(&zp->z_phys->zp_size, end_size, 984 uio->uio_loffset); 985 zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 986 dmu_tx_commit(tx); 987 988 if (error != 0) 989 break; 990 ASSERT(tx_bytes == nbytes); 991 n -= nbytes; 992 } 993 994 zfs_range_unlock(rl); 995 996 /* 997 * If we're in replay mode, or we made no progress, return error. 998 * Otherwise, it's at least a partial write, so it's successful. 999 */ 1000 if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1001 ZFS_EXIT(zfsvfs); 1002 return (error); 1003 } 1004 1005 if (ioflag & (FSYNC | FDSYNC)) 1006 zil_commit(zilog, zp->z_last_itx, zp->z_id); 1007 1008 ZFS_EXIT(zfsvfs); 1009 return (0); 1010} 1011 1012void 1013zfs_get_done(dmu_buf_t *db, void *vzgd) 1014{ 1015 zgd_t *zgd = (zgd_t *)vzgd; 1016 rl_t *rl = zgd->zgd_rl; 1017 vnode_t *vp = ZTOV(rl->r_zp); 1018 objset_t *os = rl->r_zp->z_zfsvfs->z_os; 1019 int vfslocked; 1020 1021 vfslocked = VFS_LOCK_GIANT(vp->v_vfsp); 1022 dmu_buf_rele(db, vzgd); 1023 zfs_range_unlock(rl); 1024 /* 1025 * Release the vnode asynchronously as we currently have the 1026 * txg stopped from syncing. 1027 */ 1028 VN_RELE_ASYNC(vp, dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1029 zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1030 kmem_free(zgd, sizeof (zgd_t)); 1031 VFS_UNLOCK_GIANT(vfslocked); 1032} 1033 1034#ifdef DEBUG 1035static int zil_fault_io = 0; 1036#endif 1037 1038/* 1039 * Get data to generate a TX_WRITE intent log record. 1040 */ 1041int 1042zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1043{ 1044 zfsvfs_t *zfsvfs = arg; 1045 objset_t *os = zfsvfs->z_os; 1046 znode_t *zp; 1047 uint64_t off = lr->lr_offset; 1048 dmu_buf_t *db; 1049 rl_t *rl; 1050 zgd_t *zgd; 1051 int dlen = lr->lr_length; /* length of user data */ 1052 int error = 0; 1053 1054 ASSERT(zio); 1055 ASSERT(dlen != 0); 1056 1057 /* 1058 * Nothing to do if the file has been removed 1059 */ 1060 if (zfs_zget(zfsvfs, lr->lr_foid, &zp) != 0) 1061 return (ENOENT); 1062 if (zp->z_unlinked) { 1063 /* 1064 * Release the vnode asynchronously as we currently have the 1065 * txg stopped from syncing. 1066 */ 1067 VN_RELE_ASYNC(ZTOV(zp), 1068 dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1069 return (ENOENT); 1070 } 1071 1072 /* 1073 * Write records come in two flavors: immediate and indirect. 1074 * For small writes it's cheaper to store the data with the 1075 * log record (immediate); for large writes it's cheaper to 1076 * sync the data and get a pointer to it (indirect) so that 1077 * we don't have to write the data twice. 1078 */ 1079 if (buf != NULL) { /* immediate write */ 1080 rl = zfs_range_lock(zp, off, dlen, RL_READER); 1081 /* test for truncation needs to be done while range locked */ 1082 if (off >= zp->z_phys->zp_size) { 1083 error = ENOENT; 1084 goto out; 1085 } 1086 VERIFY(0 == dmu_read(os, lr->lr_foid, off, dlen, buf, 1087 DMU_READ_NO_PREFETCH)); 1088 } else { /* indirect write */ 1089 uint64_t boff; /* block starting offset */ 1090 1091 /* 1092 * Have to lock the whole block to ensure when it's 1093 * written out and it's checksum is being calculated 1094 * that no one can change the data. We need to re-check 1095 * blocksize after we get the lock in case it's changed! 1096 */ 1097 for (;;) { 1098 if (ISP2(zp->z_blksz)) { 1099 boff = P2ALIGN_TYPED(off, zp->z_blksz, 1100 uint64_t); 1101 } else { 1102 boff = 0; 1103 } 1104 dlen = zp->z_blksz; 1105 rl = zfs_range_lock(zp, boff, dlen, RL_READER); 1106 if (zp->z_blksz == dlen) 1107 break; 1108 zfs_range_unlock(rl); 1109 } 1110 /* test for truncation needs to be done while range locked */ 1111 if (off >= zp->z_phys->zp_size) { 1112 error = ENOENT; 1113 goto out; 1114 } 1115 zgd = (zgd_t *)kmem_alloc(sizeof (zgd_t), KM_SLEEP); 1116 zgd->zgd_rl = rl; 1117 zgd->zgd_zilog = zfsvfs->z_log; 1118 zgd->zgd_bp = &lr->lr_blkptr; 1119#ifdef DEBUG 1120 if (zil_fault_io) { 1121 error = EIO; 1122 zil_fault_io = 0; 1123 } else { 1124 error = dmu_buf_hold(os, lr->lr_foid, boff, zgd, &db); 1125 } 1126#else 1127 error = dmu_buf_hold(os, lr->lr_foid, boff, zgd, &db); 1128#endif 1129 if (error != 0) { 1130 kmem_free(zgd, sizeof (zgd_t)); 1131 goto out; 1132 } 1133 1134 ASSERT(boff == db->db_offset); 1135 lr->lr_blkoff = off - boff; 1136 error = dmu_sync(zio, db, &lr->lr_blkptr, 1137 lr->lr_common.lrc_txg, zfs_get_done, zgd); 1138 ASSERT((error && error != EINPROGRESS) || 1139 lr->lr_length <= zp->z_blksz); 1140 if (error == 0) { 1141 /* 1142 * dmu_sync() can compress a block of zeros to a null 1143 * blkptr but the block size still needs to be passed 1144 * through to replay. 1145 */ 1146 BP_SET_LSIZE(&lr->lr_blkptr, db->db_size); 1147 zil_add_block(zfsvfs->z_log, &lr->lr_blkptr); 1148 } 1149 1150 /* 1151 * If we get EINPROGRESS, then we need to wait for a 1152 * write IO initiated by dmu_sync() to complete before 1153 * we can release this dbuf. We will finish everything 1154 * up in the zfs_get_done() callback. 1155 */ 1156 if (error == EINPROGRESS) { 1157 return (0); 1158 } else if (error == EALREADY) { 1159 lr->lr_common.lrc_txtype = TX_WRITE2; 1160 error = 0; 1161 } 1162 dmu_buf_rele(db, zgd); 1163 kmem_free(zgd, sizeof (zgd_t)); 1164 } 1165out: 1166 zfs_range_unlock(rl); 1167 /* 1168 * Release the vnode asynchronously as we currently have the 1169 * txg stopped from syncing. 1170 */ 1171 VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1172 return (error); 1173} 1174 1175/*ARGSUSED*/ 1176static int 1177zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1178 caller_context_t *ct) 1179{ 1180 znode_t *zp = VTOZ(vp); 1181 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1182 int error; 1183 1184 ZFS_ENTER(zfsvfs); 1185 ZFS_VERIFY_ZP(zp); 1186 1187 if (flag & V_ACE_MASK) 1188 error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1189 else 1190 error = zfs_zaccess_rwx(zp, mode, flag, cr); 1191 1192 ZFS_EXIT(zfsvfs); 1193 return (error); 1194} 1195 1196/* 1197 * If vnode is for a device return a specfs vnode instead. 1198 */ 1199static int 1200specvp_check(vnode_t **vpp, cred_t *cr) 1201{ 1202 int error = 0; 1203 1204 if (IS_DEVVP(*vpp)) { 1205 struct vnode *svp; 1206 1207 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1208 VN_RELE(*vpp); 1209 if (svp == NULL) 1210 error = ENOSYS; 1211 *vpp = svp; 1212 } 1213 return (error); 1214} 1215 1216 1217/* 1218 * Lookup an entry in a directory, or an extended attribute directory. 1219 * If it exists, return a held vnode reference for it. 1220 * 1221 * IN: dvp - vnode of directory to search. 1222 * nm - name of entry to lookup. 1223 * pnp - full pathname to lookup [UNUSED]. 1224 * flags - LOOKUP_XATTR set if looking for an attribute. 1225 * rdir - root directory vnode [UNUSED]. 1226 * cr - credentials of caller. 1227 * ct - caller context 1228 * direntflags - directory lookup flags 1229 * realpnp - returned pathname. 1230 * 1231 * OUT: vpp - vnode of located entry, NULL if not found. 1232 * 1233 * RETURN: 0 if success 1234 * error code if failure 1235 * 1236 * Timestamps: 1237 * NA 1238 */ 1239/* ARGSUSED */ 1240static int 1241zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1242 int nameiop, cred_t *cr, kthread_t *td, int flags) 1243{ 1244 znode_t *zdp = VTOZ(dvp); 1245 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1246 int error = 0; 1247 int *direntflags = NULL; 1248 void *realpnp = NULL; 1249 1250 /* fast path */ 1251 if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 1252 1253 if (dvp->v_type != VDIR) { 1254 return (ENOTDIR); 1255 } else if (zdp->z_dbuf == NULL) { 1256 return (EIO); 1257 } 1258 1259 if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1260 error = zfs_fastaccesschk_execute(zdp, cr); 1261 if (!error) { 1262 *vpp = dvp; 1263 VN_HOLD(*vpp); 1264 return (0); 1265 } 1266 return (error); 1267 } else { 1268 vnode_t *tvp = dnlc_lookup(dvp, nm); 1269 1270 if (tvp) { 1271 error = zfs_fastaccesschk_execute(zdp, cr); 1272 if (error) { 1273 VN_RELE(tvp); 1274 return (error); 1275 } 1276 if (tvp == DNLC_NO_VNODE) { 1277 VN_RELE(tvp); 1278 return (ENOENT); 1279 } else { 1280 *vpp = tvp; 1281 return (specvp_check(vpp, cr)); 1282 } 1283 } 1284 } 1285 } 1286 1287 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1288 1289 ZFS_ENTER(zfsvfs); 1290 ZFS_VERIFY_ZP(zdp); 1291 1292 *vpp = NULL; 1293 1294 if (flags & LOOKUP_XATTR) { 1295#ifdef TODO 1296 /* 1297 * If the xattr property is off, refuse the lookup request. 1298 */ 1299 if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1300 ZFS_EXIT(zfsvfs); 1301 return (EINVAL); 1302 } 1303#endif 1304 1305 /* 1306 * We don't allow recursive attributes.. 1307 * Maybe someday we will. 1308 */ 1309 if (zdp->z_phys->zp_flags & ZFS_XATTR) { 1310 ZFS_EXIT(zfsvfs); 1311 return (EINVAL); 1312 } 1313 1314 if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1315 ZFS_EXIT(zfsvfs); 1316 return (error); 1317 } 1318 1319 /* 1320 * Do we have permission to get into attribute directory? 1321 */ 1322 1323 if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1324 B_FALSE, cr)) { 1325 VN_RELE(*vpp); 1326 *vpp = NULL; 1327 } 1328 1329 ZFS_EXIT(zfsvfs); 1330 return (error); 1331 } 1332 1333 if (dvp->v_type != VDIR) { 1334 ZFS_EXIT(zfsvfs); 1335 return (ENOTDIR); 1336 } 1337 1338 /* 1339 * Check accessibility of directory. 1340 */ 1341 1342 if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1343 ZFS_EXIT(zfsvfs); 1344 return (error); 1345 } 1346 1347 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1348 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1349 ZFS_EXIT(zfsvfs); 1350 return (EILSEQ); 1351 } 1352 1353 error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1354 if (error == 0) 1355 error = specvp_check(vpp, cr); 1356 1357 /* Translate errors and add SAVENAME when needed. */ 1358 if (cnp->cn_flags & ISLASTCN) { 1359 switch (nameiop) { 1360 case CREATE: 1361 case RENAME: 1362 if (error == ENOENT) { 1363 error = EJUSTRETURN; 1364 cnp->cn_flags |= SAVENAME; 1365 break; 1366 } 1367 /* FALLTHROUGH */ 1368 case DELETE: 1369 if (error == 0) 1370 cnp->cn_flags |= SAVENAME; 1371 break; 1372 } 1373 } 1374 if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) { 1375 int ltype = 0; 1376 1377 if (cnp->cn_flags & ISDOTDOT) { 1378 ltype = VOP_ISLOCKED(dvp); 1379 VOP_UNLOCK(dvp, 0); 1380 } 1381 ZFS_EXIT(zfsvfs); 1382 error = vn_lock(*vpp, cnp->cn_lkflags); 1383 if (cnp->cn_flags & ISDOTDOT) 1384 vn_lock(dvp, ltype | LK_RETRY); 1385 if (error != 0) { 1386 VN_RELE(*vpp); 1387 *vpp = NULL; 1388 return (error); 1389 } 1390 } else { 1391 ZFS_EXIT(zfsvfs); 1392 } 1393 1394#ifdef FREEBSD_NAMECACHE 1395 /* 1396 * Insert name into cache (as non-existent) if appropriate. 1397 */ 1398 if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 1399 cache_enter(dvp, *vpp, cnp); 1400 /* 1401 * Insert name into cache if appropriate. 1402 */ 1403 if (error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1404 if (!(cnp->cn_flags & ISLASTCN) || 1405 (nameiop != DELETE && nameiop != RENAME)) { 1406 cache_enter(dvp, *vpp, cnp); 1407 } 1408 } 1409#endif 1410 1411 return (error); 1412} 1413 1414/* 1415 * Attempt to create a new entry in a directory. If the entry 1416 * already exists, truncate the file if permissible, else return 1417 * an error. Return the vp of the created or trunc'd file. 1418 * 1419 * IN: dvp - vnode of directory to put new file entry in. 1420 * name - name of new file entry. 1421 * vap - attributes of new file. 1422 * excl - flag indicating exclusive or non-exclusive mode. 1423 * mode - mode to open file with. 1424 * cr - credentials of caller. 1425 * flag - large file flag [UNUSED]. 1426 * ct - caller context 1427 * vsecp - ACL to be set 1428 * 1429 * OUT: vpp - vnode of created or trunc'd entry. 1430 * 1431 * RETURN: 0 if success 1432 * error code if failure 1433 * 1434 * Timestamps: 1435 * dvp - ctime|mtime updated if new entry created 1436 * vp - ctime|mtime always, atime if new 1437 */ 1438 1439/* ARGSUSED */ 1440static int 1441zfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 1442 vnode_t **vpp, cred_t *cr, kthread_t *td) 1443{ 1444 znode_t *zp, *dzp = VTOZ(dvp); 1445 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1446 zilog_t *zilog; 1447 objset_t *os; 1448 zfs_dirlock_t *dl; 1449 dmu_tx_t *tx; 1450 int error; 1451 ksid_t *ksid; 1452 uid_t uid; 1453 gid_t gid = crgetgid(cr); 1454 zfs_acl_ids_t acl_ids; 1455 boolean_t fuid_dirtied; 1456 void *vsecp = NULL; 1457 int flag = 0; 1458 1459 /* 1460 * If we have an ephemeral id, ACL, or XVATTR then 1461 * make sure file system is at proper version 1462 */ 1463 1464 ksid = crgetsid(cr, KSID_OWNER); 1465 if (ksid) 1466 uid = ksid_getid(ksid); 1467 else 1468 uid = crgetuid(cr); 1469 if (zfsvfs->z_use_fuids == B_FALSE && 1470 (vsecp || (vap->va_mask & AT_XVATTR) || 1471 IS_EPHEMERAL(crgetuid(cr)) || IS_EPHEMERAL(crgetgid(cr)))) 1472 return (EINVAL); 1473 1474 ZFS_ENTER(zfsvfs); 1475 ZFS_VERIFY_ZP(dzp); 1476 os = zfsvfs->z_os; 1477 zilog = zfsvfs->z_log; 1478 1479 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1480 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1481 ZFS_EXIT(zfsvfs); 1482 return (EILSEQ); 1483 } 1484 1485 if (vap->va_mask & AT_XVATTR) { 1486 if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1487 crgetuid(cr), cr, vap->va_type)) != 0) { 1488 ZFS_EXIT(zfsvfs); 1489 return (error); 1490 } 1491 } 1492top: 1493 *vpp = NULL; 1494 1495 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1496 vap->va_mode &= ~S_ISVTX; 1497 1498 if (*name == '\0') { 1499 /* 1500 * Null component name refers to the directory itself. 1501 */ 1502 VN_HOLD(dvp); 1503 zp = dzp; 1504 dl = NULL; 1505 error = 0; 1506 } else { 1507 /* possible VN_HOLD(zp) */ 1508 int zflg = 0; 1509 1510 if (flag & FIGNORECASE) 1511 zflg |= ZCILOOK; 1512 1513 error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1514 NULL, NULL); 1515 if (error) { 1516 if (strcmp(name, "..") == 0) 1517 error = EISDIR; 1518 ZFS_EXIT(zfsvfs); 1519 return (error); 1520 } 1521 } 1522 if (zp == NULL) { 1523 uint64_t txtype; 1524 1525 /* 1526 * Create a new file object and update the directory 1527 * to reference it. 1528 */ 1529 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1530 goto out; 1531 } 1532 1533 /* 1534 * We only support the creation of regular files in 1535 * extended attribute directories. 1536 */ 1537 if ((dzp->z_phys->zp_flags & ZFS_XATTR) && 1538 (vap->va_type != VREG)) { 1539 error = EINVAL; 1540 goto out; 1541 } 1542 1543 1544 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, 1545 &acl_ids)) != 0) 1546 goto out; 1547 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1548 zfs_acl_ids_free(&acl_ids); 1549 error = EDQUOT; 1550 goto out; 1551 } 1552 1553 tx = dmu_tx_create(os); 1554 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1555 fuid_dirtied = zfsvfs->z_fuid_dirty; 1556 if (fuid_dirtied) 1557 zfs_fuid_txhold(zfsvfs, tx); 1558 dmu_tx_hold_bonus(tx, dzp->z_id); 1559 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1560 if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1561 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1562 0, SPA_MAXBLOCKSIZE); 1563 } 1564 error = dmu_tx_assign(tx, TXG_NOWAIT); 1565 if (error) { 1566 zfs_acl_ids_free(&acl_ids); 1567 zfs_dirent_unlock(dl); 1568 if (error == ERESTART) { 1569 dmu_tx_wait(tx); 1570 dmu_tx_abort(tx); 1571 goto top; 1572 } 1573 dmu_tx_abort(tx); 1574 ZFS_EXIT(zfsvfs); 1575 return (error); 1576 } 1577 zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); 1578 1579 if (fuid_dirtied) 1580 zfs_fuid_sync(zfsvfs, tx); 1581 1582 (void) zfs_link_create(dl, zp, tx, ZNEW); 1583 1584 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1585 if (flag & FIGNORECASE) 1586 txtype |= TX_CI; 1587 zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1588 vsecp, acl_ids.z_fuidp, vap); 1589 zfs_acl_ids_free(&acl_ids); 1590 dmu_tx_commit(tx); 1591 } else { 1592 int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1593 1594 /* 1595 * A directory entry already exists for this name. 1596 */ 1597 /* 1598 * Can't truncate an existing file if in exclusive mode. 1599 */ 1600 if (excl == EXCL) { 1601 error = EEXIST; 1602 goto out; 1603 } 1604 /* 1605 * Can't open a directory for writing. 1606 */ 1607 if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1608 error = EISDIR; 1609 goto out; 1610 } 1611 /* 1612 * Verify requested access to file. 1613 */ 1614 if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1615 goto out; 1616 } 1617 1618 mutex_enter(&dzp->z_lock); 1619 dzp->z_seq++; 1620 mutex_exit(&dzp->z_lock); 1621 1622 /* 1623 * Truncate regular files if requested. 1624 */ 1625 if ((ZTOV(zp)->v_type == VREG) && 1626 (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1627 /* we can't hold any locks when calling zfs_freesp() */ 1628 zfs_dirent_unlock(dl); 1629 dl = NULL; 1630 error = zfs_freesp(zp, 0, 0, mode, TRUE); 1631 if (error == 0) { 1632 vnevent_create(ZTOV(zp), ct); 1633 } 1634 } 1635 } 1636out: 1637 if (dl) 1638 zfs_dirent_unlock(dl); 1639 1640 if (error) { 1641 if (zp) 1642 VN_RELE(ZTOV(zp)); 1643 } else { 1644 *vpp = ZTOV(zp); 1645 error = specvp_check(vpp, cr); 1646 } 1647 1648 ZFS_EXIT(zfsvfs); 1649 return (error); 1650} 1651 1652/* 1653 * Remove an entry from a directory. 1654 * 1655 * IN: dvp - vnode of directory to remove entry from. 1656 * name - name of entry to remove. 1657 * cr - credentials of caller. 1658 * ct - caller context 1659 * flags - case flags 1660 * 1661 * RETURN: 0 if success 1662 * error code if failure 1663 * 1664 * Timestamps: 1665 * dvp - ctime|mtime 1666 * vp - ctime (if nlink > 0) 1667 */ 1668/*ARGSUSED*/ 1669static int 1670zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1671 int flags) 1672{ 1673 znode_t *zp, *dzp = VTOZ(dvp); 1674 znode_t *xzp = NULL; 1675 vnode_t *vp; 1676 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1677 zilog_t *zilog; 1678 uint64_t acl_obj, xattr_obj; 1679 zfs_dirlock_t *dl; 1680 dmu_tx_t *tx; 1681 boolean_t may_delete_now, delete_now = FALSE; 1682 boolean_t unlinked, toobig = FALSE; 1683 uint64_t txtype; 1684 pathname_t *realnmp = NULL; 1685 pathname_t realnm; 1686 int error; 1687 int zflg = ZEXISTS; 1688 1689 ZFS_ENTER(zfsvfs); 1690 ZFS_VERIFY_ZP(dzp); 1691 zilog = zfsvfs->z_log; 1692 1693 if (flags & FIGNORECASE) { 1694 zflg |= ZCILOOK; 1695 pn_alloc(&realnm); 1696 realnmp = &realnm; 1697 } 1698 1699top: 1700 /* 1701 * Attempt to lock directory; fail if entry doesn't exist. 1702 */ 1703 if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1704 NULL, realnmp)) { 1705 if (realnmp) 1706 pn_free(realnmp); 1707 ZFS_EXIT(zfsvfs); 1708 return (error); 1709 } 1710 1711 vp = ZTOV(zp); 1712 1713 if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1714 goto out; 1715 } 1716 1717 /* 1718 * Need to use rmdir for removing directories. 1719 */ 1720 if (vp->v_type == VDIR) { 1721 error = EPERM; 1722 goto out; 1723 } 1724 1725 vnevent_remove(vp, dvp, name, ct); 1726 1727 if (realnmp) 1728 dnlc_remove(dvp, realnmp->pn_buf); 1729 else 1730 dnlc_remove(dvp, name); 1731 1732 may_delete_now = FALSE; 1733 1734 /* 1735 * We may delete the znode now, or we may put it in the unlinked set; 1736 * it depends on whether we're the last link, and on whether there are 1737 * other holds on the vnode. So we dmu_tx_hold() the right things to 1738 * allow for either case. 1739 */ 1740 tx = dmu_tx_create(zfsvfs->z_os); 1741 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1742 dmu_tx_hold_bonus(tx, zp->z_id); 1743 if (may_delete_now) { 1744 toobig = 1745 zp->z_phys->zp_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1746 /* if the file is too big, only hold_free a token amount */ 1747 dmu_tx_hold_free(tx, zp->z_id, 0, 1748 (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1749 } 1750 1751 /* are there any extended attributes? */ 1752 if ((xattr_obj = zp->z_phys->zp_xattr) != 0) { 1753 /* XXX - do we need this if we are deleting? */ 1754 dmu_tx_hold_bonus(tx, xattr_obj); 1755 } 1756 1757 /* are there any additional acls */ 1758 if ((acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj) != 0 && 1759 may_delete_now) 1760 dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 1761 1762 /* charge as an update -- would be nice not to charge at all */ 1763 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1764 1765 error = dmu_tx_assign(tx, TXG_NOWAIT); 1766 if (error) { 1767 zfs_dirent_unlock(dl); 1768 VN_RELE(vp); 1769 if (error == ERESTART) { 1770 dmu_tx_wait(tx); 1771 dmu_tx_abort(tx); 1772 goto top; 1773 } 1774 if (realnmp) 1775 pn_free(realnmp); 1776 dmu_tx_abort(tx); 1777 ZFS_EXIT(zfsvfs); 1778 return (error); 1779 } 1780 1781 /* 1782 * Remove the directory entry. 1783 */ 1784 error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1785 1786 if (error) { 1787 dmu_tx_commit(tx); 1788 goto out; 1789 } 1790 1791 if (0 && unlinked) { 1792 VI_LOCK(vp); 1793 delete_now = may_delete_now && !toobig && 1794 vp->v_count == 1 && !vn_has_cached_data(vp) && 1795 zp->z_phys->zp_xattr == xattr_obj && 1796 zp->z_phys->zp_acl.z_acl_extern_obj == acl_obj; 1797 VI_UNLOCK(vp); 1798 } 1799 1800 if (delete_now) { 1801 if (zp->z_phys->zp_xattr) { 1802 error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp); 1803 ASSERT3U(error, ==, 0); 1804 ASSERT3U(xzp->z_phys->zp_links, ==, 2); 1805 dmu_buf_will_dirty(xzp->z_dbuf, tx); 1806 mutex_enter(&xzp->z_lock); 1807 xzp->z_unlinked = 1; 1808 xzp->z_phys->zp_links = 0; 1809 mutex_exit(&xzp->z_lock); 1810 zfs_unlinked_add(xzp, tx); 1811 zp->z_phys->zp_xattr = 0; /* probably unnecessary */ 1812 } 1813 mutex_enter(&zp->z_lock); 1814 VI_LOCK(vp); 1815 vp->v_count--; 1816 ASSERT3U(vp->v_count, ==, 0); 1817 VI_UNLOCK(vp); 1818 mutex_exit(&zp->z_lock); 1819 zfs_znode_delete(zp, tx); 1820 } else if (unlinked) { 1821 zfs_unlinked_add(zp, tx); 1822 } 1823 1824 txtype = TX_REMOVE; 1825 if (flags & FIGNORECASE) 1826 txtype |= TX_CI; 1827 zfs_log_remove(zilog, tx, txtype, dzp, name); 1828 1829 dmu_tx_commit(tx); 1830out: 1831 if (realnmp) 1832 pn_free(realnmp); 1833 1834 zfs_dirent_unlock(dl); 1835 1836 if (!delete_now) { 1837 VN_RELE(vp); 1838 } else if (xzp) { 1839 /* this rele is delayed to prevent nesting transactions */ 1840 VN_RELE(ZTOV(xzp)); 1841 } 1842 1843 ZFS_EXIT(zfsvfs); 1844 return (error); 1845} 1846 1847/* 1848 * Create a new directory and insert it into dvp using the name 1849 * provided. Return a pointer to the inserted directory. 1850 * 1851 * IN: dvp - vnode of directory to add subdir to. 1852 * dirname - name of new directory. 1853 * vap - attributes of new directory. 1854 * cr - credentials of caller. 1855 * ct - caller context 1856 * vsecp - ACL to be set 1857 * 1858 * OUT: vpp - vnode of created directory. 1859 * 1860 * RETURN: 0 if success 1861 * error code if failure 1862 * 1863 * Timestamps: 1864 * dvp - ctime|mtime updated 1865 * vp - ctime|mtime|atime updated 1866 */ 1867/*ARGSUSED*/ 1868static int 1869zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 1870 caller_context_t *ct, int flags, vsecattr_t *vsecp) 1871{ 1872 znode_t *zp, *dzp = VTOZ(dvp); 1873 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1874 zilog_t *zilog; 1875 zfs_dirlock_t *dl; 1876 uint64_t txtype; 1877 dmu_tx_t *tx; 1878 int error; 1879 int zf = ZNEW; 1880 ksid_t *ksid; 1881 uid_t uid; 1882 gid_t gid = crgetgid(cr); 1883 zfs_acl_ids_t acl_ids; 1884 boolean_t fuid_dirtied; 1885 1886 ASSERT(vap->va_type == VDIR); 1887 1888 /* 1889 * If we have an ephemeral id, ACL, or XVATTR then 1890 * make sure file system is at proper version 1891 */ 1892 1893 ksid = crgetsid(cr, KSID_OWNER); 1894 if (ksid) 1895 uid = ksid_getid(ksid); 1896 else 1897 uid = crgetuid(cr); 1898 if (zfsvfs->z_use_fuids == B_FALSE && 1899 (vsecp || (vap->va_mask & AT_XVATTR) || IS_EPHEMERAL(crgetuid(cr))|| 1900 IS_EPHEMERAL(crgetgid(cr)))) 1901 return (EINVAL); 1902 1903 ZFS_ENTER(zfsvfs); 1904 ZFS_VERIFY_ZP(dzp); 1905 zilog = zfsvfs->z_log; 1906 1907 if (dzp->z_phys->zp_flags & ZFS_XATTR) { 1908 ZFS_EXIT(zfsvfs); 1909 return (EINVAL); 1910 } 1911 1912 if (zfsvfs->z_utf8 && u8_validate(dirname, 1913 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1914 ZFS_EXIT(zfsvfs); 1915 return (EILSEQ); 1916 } 1917 if (flags & FIGNORECASE) 1918 zf |= ZCILOOK; 1919 1920 if (vap->va_mask & AT_XVATTR) 1921 if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1922 crgetuid(cr), cr, vap->va_type)) != 0) { 1923 ZFS_EXIT(zfsvfs); 1924 return (error); 1925 } 1926 1927 /* 1928 * First make sure the new directory doesn't exist. 1929 */ 1930top: 1931 *vpp = NULL; 1932 1933 if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 1934 NULL, NULL)) { 1935 ZFS_EXIT(zfsvfs); 1936 return (error); 1937 } 1938 1939 if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 1940 zfs_dirent_unlock(dl); 1941 ZFS_EXIT(zfsvfs); 1942 return (error); 1943 } 1944 1945 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, 1946 &acl_ids)) != 0) { 1947 zfs_dirent_unlock(dl); 1948 ZFS_EXIT(zfsvfs); 1949 return (error); 1950 } 1951 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1952 zfs_acl_ids_free(&acl_ids); 1953 zfs_dirent_unlock(dl); 1954 ZFS_EXIT(zfsvfs); 1955 return (EDQUOT); 1956 } 1957 1958 /* 1959 * Add a new entry to the directory. 1960 */ 1961 tx = dmu_tx_create(zfsvfs->z_os); 1962 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 1963 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 1964 fuid_dirtied = zfsvfs->z_fuid_dirty; 1965 if (fuid_dirtied) 1966 zfs_fuid_txhold(zfsvfs, tx); 1967 if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) 1968 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1969 0, SPA_MAXBLOCKSIZE); 1970 error = dmu_tx_assign(tx, TXG_NOWAIT); 1971 if (error) { 1972 zfs_acl_ids_free(&acl_ids); 1973 zfs_dirent_unlock(dl); 1974 if (error == ERESTART) { 1975 dmu_tx_wait(tx); 1976 dmu_tx_abort(tx); 1977 goto top; 1978 } 1979 dmu_tx_abort(tx); 1980 ZFS_EXIT(zfsvfs); 1981 return (error); 1982 } 1983 1984 /* 1985 * Create new node. 1986 */ 1987 zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); 1988 1989 if (fuid_dirtied) 1990 zfs_fuid_sync(zfsvfs, tx); 1991 /* 1992 * Now put new name in parent dir. 1993 */ 1994 (void) zfs_link_create(dl, zp, tx, ZNEW); 1995 1996 *vpp = ZTOV(zp); 1997 1998 txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 1999 if (flags & FIGNORECASE) 2000 txtype |= TX_CI; 2001 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 2002 acl_ids.z_fuidp, vap); 2003 2004 zfs_acl_ids_free(&acl_ids); 2005 dmu_tx_commit(tx); 2006 2007 zfs_dirent_unlock(dl); 2008 2009 ZFS_EXIT(zfsvfs); 2010 return (0); 2011} 2012 2013/* 2014 * Remove a directory subdir entry. If the current working 2015 * directory is the same as the subdir to be removed, the 2016 * remove will fail. 2017 * 2018 * IN: dvp - vnode of directory to remove from. 2019 * name - name of directory to be removed. 2020 * cwd - vnode of current working directory. 2021 * cr - credentials of caller. 2022 * ct - caller context 2023 * flags - case flags 2024 * 2025 * RETURN: 0 if success 2026 * error code if failure 2027 * 2028 * Timestamps: 2029 * dvp - ctime|mtime updated 2030 */ 2031/*ARGSUSED*/ 2032static int 2033zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 2034 caller_context_t *ct, int flags) 2035{ 2036 znode_t *dzp = VTOZ(dvp); 2037 znode_t *zp; 2038 vnode_t *vp; 2039 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2040 zilog_t *zilog; 2041 zfs_dirlock_t *dl; 2042 dmu_tx_t *tx; 2043 int error; 2044 int zflg = ZEXISTS; 2045 2046 ZFS_ENTER(zfsvfs); 2047 ZFS_VERIFY_ZP(dzp); 2048 zilog = zfsvfs->z_log; 2049 2050 if (flags & FIGNORECASE) 2051 zflg |= ZCILOOK; 2052top: 2053 zp = NULL; 2054 2055 /* 2056 * Attempt to lock directory; fail if entry doesn't exist. 2057 */ 2058 if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 2059 NULL, NULL)) { 2060 ZFS_EXIT(zfsvfs); 2061 return (error); 2062 } 2063 2064 vp = ZTOV(zp); 2065 2066 if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2067 goto out; 2068 } 2069 2070 if (vp->v_type != VDIR) { 2071 error = ENOTDIR; 2072 goto out; 2073 } 2074 2075 if (vp == cwd) { 2076 error = EINVAL; 2077 goto out; 2078 } 2079 2080 vnevent_rmdir(vp, dvp, name, ct); 2081 2082 /* 2083 * Grab a lock on the directory to make sure that noone is 2084 * trying to add (or lookup) entries while we are removing it. 2085 */ 2086 rw_enter(&zp->z_name_lock, RW_WRITER); 2087 2088 /* 2089 * Grab a lock on the parent pointer to make sure we play well 2090 * with the treewalk and directory rename code. 2091 */ 2092 rw_enter(&zp->z_parent_lock, RW_WRITER); 2093 2094 tx = dmu_tx_create(zfsvfs->z_os); 2095 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2096 dmu_tx_hold_bonus(tx, zp->z_id); 2097 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2098 error = dmu_tx_assign(tx, TXG_NOWAIT); 2099 if (error) { 2100 rw_exit(&zp->z_parent_lock); 2101 rw_exit(&zp->z_name_lock); 2102 zfs_dirent_unlock(dl); 2103 VN_RELE(vp); 2104 if (error == ERESTART) { 2105 dmu_tx_wait(tx); 2106 dmu_tx_abort(tx); 2107 goto top; 2108 } 2109 dmu_tx_abort(tx); 2110 ZFS_EXIT(zfsvfs); 2111 return (error); 2112 } 2113 2114#ifdef FREEBSD_NAMECACHE 2115 cache_purge(dvp); 2116#endif 2117 2118 error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 2119 2120 if (error == 0) { 2121 uint64_t txtype = TX_RMDIR; 2122 if (flags & FIGNORECASE) 2123 txtype |= TX_CI; 2124 zfs_log_remove(zilog, tx, txtype, dzp, name); 2125 } 2126 2127 dmu_tx_commit(tx); 2128 2129 rw_exit(&zp->z_parent_lock); 2130 rw_exit(&zp->z_name_lock); 2131#ifdef FREEBSD_NAMECACHE 2132 cache_purge(vp); 2133#endif 2134out: 2135 zfs_dirent_unlock(dl); 2136 2137 VN_RELE(vp); 2138 2139 ZFS_EXIT(zfsvfs); 2140 return (error); 2141} 2142 2143/* 2144 * Read as many directory entries as will fit into the provided 2145 * buffer from the given directory cursor position (specified in 2146 * the uio structure. 2147 * 2148 * IN: vp - vnode of directory to read. 2149 * uio - structure supplying read location, range info, 2150 * and return buffer. 2151 * cr - credentials of caller. 2152 * ct - caller context 2153 * flags - case flags 2154 * 2155 * OUT: uio - updated offset and range, buffer filled. 2156 * eofp - set to true if end-of-file detected. 2157 * 2158 * RETURN: 0 if success 2159 * error code if failure 2160 * 2161 * Timestamps: 2162 * vp - atime updated 2163 * 2164 * Note that the low 4 bits of the cookie returned by zap is always zero. 2165 * This allows us to use the low range for "special" directory entries: 2166 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2167 * we use the offset 2 for the '.zfs' directory. 2168 */ 2169/* ARGSUSED */ 2170static int 2171zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) 2172{ 2173 znode_t *zp = VTOZ(vp); 2174 iovec_t *iovp; 2175 edirent_t *eodp; 2176 dirent64_t *odp; 2177 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2178 objset_t *os; 2179 caddr_t outbuf; 2180 size_t bufsize; 2181 zap_cursor_t zc; 2182 zap_attribute_t zap; 2183 uint_t bytes_wanted; 2184 uint64_t offset; /* must be unsigned; checks for < 1 */ 2185 int local_eof; 2186 int outcount; 2187 int error; 2188 uint8_t prefetch; 2189 boolean_t check_sysattrs; 2190 uint8_t type; 2191 int ncooks; 2192 u_long *cooks = NULL; 2193 int flags = 0; 2194 2195 ZFS_ENTER(zfsvfs); 2196 ZFS_VERIFY_ZP(zp); 2197 2198 /* 2199 * If we are not given an eof variable, 2200 * use a local one. 2201 */ 2202 if (eofp == NULL) 2203 eofp = &local_eof; 2204 2205 /* 2206 * Check for valid iov_len. 2207 */ 2208 if (uio->uio_iov->iov_len <= 0) { 2209 ZFS_EXIT(zfsvfs); 2210 return (EINVAL); 2211 } 2212 2213 /* 2214 * Quit if directory has been removed (posix) 2215 */ 2216 if ((*eofp = zp->z_unlinked) != 0) { 2217 ZFS_EXIT(zfsvfs); 2218 return (0); 2219 } 2220 2221 error = 0; 2222 os = zfsvfs->z_os; 2223 offset = uio->uio_loffset; 2224 prefetch = zp->z_zn_prefetch; 2225 2226 /* 2227 * Initialize the iterator cursor. 2228 */ 2229 if (offset <= 3) { 2230 /* 2231 * Start iteration from the beginning of the directory. 2232 */ 2233 zap_cursor_init(&zc, os, zp->z_id); 2234 } else { 2235 /* 2236 * The offset is a serialized cursor. 2237 */ 2238 zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2239 } 2240 2241 /* 2242 * Get space to change directory entries into fs independent format. 2243 */ 2244 iovp = uio->uio_iov; 2245 bytes_wanted = iovp->iov_len; 2246 if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2247 bufsize = bytes_wanted; 2248 outbuf = kmem_alloc(bufsize, KM_SLEEP); 2249 odp = (struct dirent64 *)outbuf; 2250 } else { 2251 bufsize = bytes_wanted; 2252 odp = (struct dirent64 *)iovp->iov_base; 2253 } 2254 eodp = (struct edirent *)odp; 2255 2256 if (ncookies != NULL) { 2257 /* 2258 * Minimum entry size is dirent size and 1 byte for a file name. 2259 */ 2260 ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2261 cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2262 *cookies = cooks; 2263 *ncookies = ncooks; 2264 } 2265 /* 2266 * If this VFS supports the system attribute view interface; and 2267 * we're looking at an extended attribute directory; and we care 2268 * about normalization conflicts on this vfs; then we must check 2269 * for normalization conflicts with the sysattr name space. 2270 */ 2271#ifdef TODO 2272 check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2273 (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2274 (flags & V_RDDIR_ENTFLAGS); 2275#else 2276 check_sysattrs = 0; 2277#endif 2278 2279 /* 2280 * Transform to file-system independent format 2281 */ 2282 outcount = 0; 2283 while (outcount < bytes_wanted) { 2284 ino64_t objnum; 2285 ushort_t reclen; 2286 off64_t *next; 2287 2288 /* 2289 * Special case `.', `..', and `.zfs'. 2290 */ 2291 if (offset == 0) { 2292 (void) strcpy(zap.za_name, "."); 2293 zap.za_normalization_conflict = 0; 2294 objnum = zp->z_id; 2295 type = DT_DIR; 2296 } else if (offset == 1) { 2297 (void) strcpy(zap.za_name, ".."); 2298 zap.za_normalization_conflict = 0; 2299 objnum = zp->z_phys->zp_parent; 2300 type = DT_DIR; 2301 } else if (offset == 2 && zfs_show_ctldir(zp)) { 2302 (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2303 zap.za_normalization_conflict = 0; 2304 objnum = ZFSCTL_INO_ROOT; 2305 type = DT_DIR; 2306 } else { 2307 /* 2308 * Grab next entry. 2309 */ 2310 if (error = zap_cursor_retrieve(&zc, &zap)) { 2311 if ((*eofp = (error == ENOENT)) != 0) 2312 break; 2313 else 2314 goto update; 2315 } 2316 2317 if (zap.za_integer_length != 8 || 2318 zap.za_num_integers != 1) { 2319 cmn_err(CE_WARN, "zap_readdir: bad directory " 2320 "entry, obj = %lld, offset = %lld\n", 2321 (u_longlong_t)zp->z_id, 2322 (u_longlong_t)offset); 2323 error = ENXIO; 2324 goto update; 2325 } 2326 2327 objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2328 /* 2329 * MacOS X can extract the object type here such as: 2330 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2331 */ 2332 type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2333 2334 if (check_sysattrs && !zap.za_normalization_conflict) { 2335#ifdef TODO 2336 zap.za_normalization_conflict = 2337 xattr_sysattr_casechk(zap.za_name); 2338#else 2339 panic("%s:%u: TODO", __func__, __LINE__); 2340#endif 2341 } 2342 } 2343 2344 if (flags & V_RDDIR_ACCFILTER) { 2345 /* 2346 * If we have no access at all, don't include 2347 * this entry in the returned information 2348 */ 2349 znode_t *ezp; 2350 if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2351 goto skip_entry; 2352 if (!zfs_has_access(ezp, cr)) { 2353 VN_RELE(ZTOV(ezp)); 2354 goto skip_entry; 2355 } 2356 VN_RELE(ZTOV(ezp)); 2357 } 2358 2359 if (flags & V_RDDIR_ENTFLAGS) 2360 reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2361 else 2362 reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2363 2364 /* 2365 * Will this entry fit in the buffer? 2366 */ 2367 if (outcount + reclen > bufsize) { 2368 /* 2369 * Did we manage to fit anything in the buffer? 2370 */ 2371 if (!outcount) { 2372 error = EINVAL; 2373 goto update; 2374 } 2375 break; 2376 } 2377 if (flags & V_RDDIR_ENTFLAGS) { 2378 /* 2379 * Add extended flag entry: 2380 */ 2381 eodp->ed_ino = objnum; 2382 eodp->ed_reclen = reclen; 2383 /* NOTE: ed_off is the offset for the *next* entry */ 2384 next = &(eodp->ed_off); 2385 eodp->ed_eflags = zap.za_normalization_conflict ? 2386 ED_CASE_CONFLICT : 0; 2387 (void) strncpy(eodp->ed_name, zap.za_name, 2388 EDIRENT_NAMELEN(reclen)); 2389 eodp = (edirent_t *)((intptr_t)eodp + reclen); 2390 } else { 2391 /* 2392 * Add normal entry: 2393 */ 2394 odp->d_ino = objnum; 2395 odp->d_reclen = reclen; 2396 odp->d_namlen = strlen(zap.za_name); 2397 (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2398 odp->d_type = type; 2399 odp = (dirent64_t *)((intptr_t)odp + reclen); 2400 } 2401 outcount += reclen; 2402 2403 ASSERT(outcount <= bufsize); 2404 2405 /* Prefetch znode */ 2406 if (prefetch) 2407 dmu_prefetch(os, objnum, 0, 0); 2408 2409 skip_entry: 2410 /* 2411 * Move to the next entry, fill in the previous offset. 2412 */ 2413 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2414 zap_cursor_advance(&zc); 2415 offset = zap_cursor_serialize(&zc); 2416 } else { 2417 offset += 1; 2418 } 2419 2420 if (cooks != NULL) { 2421 *cooks++ = offset; 2422 ncooks--; 2423 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2424 } 2425 } 2426 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2427 2428 /* Subtract unused cookies */ 2429 if (ncookies != NULL) 2430 *ncookies -= ncooks; 2431 2432 if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2433 iovp->iov_base += outcount; 2434 iovp->iov_len -= outcount; 2435 uio->uio_resid -= outcount; 2436 } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2437 /* 2438 * Reset the pointer. 2439 */ 2440 offset = uio->uio_loffset; 2441 } 2442 2443update: 2444 zap_cursor_fini(&zc); 2445 if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2446 kmem_free(outbuf, bufsize); 2447 2448 if (error == ENOENT) 2449 error = 0; 2450 2451 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2452 2453 uio->uio_loffset = offset; 2454 ZFS_EXIT(zfsvfs); 2455 if (error != 0 && cookies != NULL) { 2456 free(*cookies, M_TEMP); 2457 *cookies = NULL; 2458 *ncookies = 0; 2459 } 2460 return (error); 2461} 2462 2463ulong_t zfs_fsync_sync_cnt = 4; 2464 2465static int 2466zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2467{ 2468 znode_t *zp = VTOZ(vp); 2469 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2470 2471 (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2472 2473 ZFS_ENTER(zfsvfs); 2474 ZFS_VERIFY_ZP(zp); 2475 zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); 2476 ZFS_EXIT(zfsvfs); 2477 return (0); 2478} 2479 2480 2481/* 2482 * Get the requested file attributes and place them in the provided 2483 * vattr structure. 2484 * 2485 * IN: vp - vnode of file. 2486 * vap - va_mask identifies requested attributes. 2487 * If AT_XVATTR set, then optional attrs are requested 2488 * flags - ATTR_NOACLCHECK (CIFS server context) 2489 * cr - credentials of caller. 2490 * ct - caller context 2491 * 2492 * OUT: vap - attribute values. 2493 * 2494 * RETURN: 0 (always succeeds) 2495 */ 2496/* ARGSUSED */ 2497static int 2498zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2499 caller_context_t *ct) 2500{ 2501 znode_t *zp = VTOZ(vp); 2502 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2503 znode_phys_t *pzp; 2504 int error = 0; 2505 uint32_t blksize; 2506 u_longlong_t nblocks; 2507 uint64_t links; 2508 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2509 xoptattr_t *xoap = NULL; 2510 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2511 2512 ZFS_ENTER(zfsvfs); 2513 ZFS_VERIFY_ZP(zp); 2514 pzp = zp->z_phys; 2515 2516 /* 2517 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2518 * Also, if we are the owner don't bother, since owner should 2519 * always be allowed to read basic attributes of file. 2520 */ 2521 if (!(pzp->zp_flags & ZFS_ACL_TRIVIAL) && 2522 (pzp->zp_uid != crgetuid(cr))) { 2523 if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2524 skipaclchk, cr)) { 2525 ZFS_EXIT(zfsvfs); 2526 return (error); 2527 } 2528 } 2529 2530 /* 2531 * Return all attributes. It's cheaper to provide the answer 2532 * than to determine whether we were asked the question. 2533 */ 2534 2535 mutex_enter(&zp->z_lock); 2536 vap->va_type = IFTOVT(pzp->zp_mode); 2537 vap->va_mode = pzp->zp_mode & ~S_IFMT; 2538 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2539// vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2540 vap->va_nodeid = zp->z_id; 2541 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2542 links = pzp->zp_links + 1; 2543 else 2544 links = pzp->zp_links; 2545 vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ 2546 vap->va_size = pzp->zp_size; 2547 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 2548 vap->va_rdev = zfs_cmpldev(pzp->zp_rdev); 2549 vap->va_seq = zp->z_seq; 2550 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2551 2552 /* 2553 * Add in any requested optional attributes and the create time. 2554 * Also set the corresponding bits in the returned attribute bitmap. 2555 */ 2556 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2557 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2558 xoap->xoa_archive = 2559 ((pzp->zp_flags & ZFS_ARCHIVE) != 0); 2560 XVA_SET_RTN(xvap, XAT_ARCHIVE); 2561 } 2562 2563 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2564 xoap->xoa_readonly = 2565 ((pzp->zp_flags & ZFS_READONLY) != 0); 2566 XVA_SET_RTN(xvap, XAT_READONLY); 2567 } 2568 2569 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2570 xoap->xoa_system = 2571 ((pzp->zp_flags & ZFS_SYSTEM) != 0); 2572 XVA_SET_RTN(xvap, XAT_SYSTEM); 2573 } 2574 2575 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2576 xoap->xoa_hidden = 2577 ((pzp->zp_flags & ZFS_HIDDEN) != 0); 2578 XVA_SET_RTN(xvap, XAT_HIDDEN); 2579 } 2580 2581 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2582 xoap->xoa_nounlink = 2583 ((pzp->zp_flags & ZFS_NOUNLINK) != 0); 2584 XVA_SET_RTN(xvap, XAT_NOUNLINK); 2585 } 2586 2587 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2588 xoap->xoa_immutable = 2589 ((pzp->zp_flags & ZFS_IMMUTABLE) != 0); 2590 XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2591 } 2592 2593 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2594 xoap->xoa_appendonly = 2595 ((pzp->zp_flags & ZFS_APPENDONLY) != 0); 2596 XVA_SET_RTN(xvap, XAT_APPENDONLY); 2597 } 2598 2599 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2600 xoap->xoa_nodump = 2601 ((pzp->zp_flags & ZFS_NODUMP) != 0); 2602 XVA_SET_RTN(xvap, XAT_NODUMP); 2603 } 2604 2605 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2606 xoap->xoa_opaque = 2607 ((pzp->zp_flags & ZFS_OPAQUE) != 0); 2608 XVA_SET_RTN(xvap, XAT_OPAQUE); 2609 } 2610 2611 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2612 xoap->xoa_av_quarantined = 2613 ((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0); 2614 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2615 } 2616 2617 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2618 xoap->xoa_av_modified = 2619 ((pzp->zp_flags & ZFS_AV_MODIFIED) != 0); 2620 XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2621 } 2622 2623 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2624 vp->v_type == VREG && 2625 (pzp->zp_flags & ZFS_BONUS_SCANSTAMP)) { 2626 size_t len; 2627 dmu_object_info_t doi; 2628 2629 /* 2630 * Only VREG files have anti-virus scanstamps, so we 2631 * won't conflict with symlinks in the bonus buffer. 2632 */ 2633 dmu_object_info_from_db(zp->z_dbuf, &doi); 2634 len = sizeof (xoap->xoa_av_scanstamp) + 2635 sizeof (znode_phys_t); 2636 if (len <= doi.doi_bonus_size) { 2637 /* 2638 * pzp points to the start of the 2639 * znode_phys_t. pzp + 1 points to the 2640 * first byte after the znode_phys_t. 2641 */ 2642 (void) memcpy(xoap->xoa_av_scanstamp, 2643 pzp + 1, 2644 sizeof (xoap->xoa_av_scanstamp)); 2645 XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); 2646 } 2647 } 2648 2649 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 2650 ZFS_TIME_DECODE(&xoap->xoa_createtime, pzp->zp_crtime); 2651 XVA_SET_RTN(xvap, XAT_CREATETIME); 2652 } 2653 } 2654 2655 ZFS_TIME_DECODE(&vap->va_atime, pzp->zp_atime); 2656 ZFS_TIME_DECODE(&vap->va_mtime, pzp->zp_mtime); 2657 ZFS_TIME_DECODE(&vap->va_ctime, pzp->zp_ctime); 2658 ZFS_TIME_DECODE(&vap->va_birthtime, pzp->zp_crtime); 2659 2660 mutex_exit(&zp->z_lock); 2661 2662 dmu_object_size_from_db(zp->z_dbuf, &blksize, &nblocks); 2663 vap->va_blksize = blksize; 2664 vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2665 2666 if (zp->z_blksz == 0) { 2667 /* 2668 * Block size hasn't been set; suggest maximal I/O transfers. 2669 */ 2670 vap->va_blksize = zfsvfs->z_max_blksz; 2671 } 2672 2673 ZFS_EXIT(zfsvfs); 2674 return (0); 2675} 2676 2677/* 2678 * Set the file attributes to the values contained in the 2679 * vattr structure. 2680 * 2681 * IN: vp - vnode of file to be modified. 2682 * vap - new attribute values. 2683 * If AT_XVATTR set, then optional attrs are being set 2684 * flags - ATTR_UTIME set if non-default time values provided. 2685 * - ATTR_NOACLCHECK (CIFS context only). 2686 * cr - credentials of caller. 2687 * ct - caller context 2688 * 2689 * RETURN: 0 if success 2690 * error code if failure 2691 * 2692 * Timestamps: 2693 * vp - ctime updated, mtime updated if size changed. 2694 */ 2695/* ARGSUSED */ 2696static int 2697zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2698 caller_context_t *ct) 2699{ 2700 znode_t *zp = VTOZ(vp); 2701 znode_phys_t *pzp; 2702 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2703 zilog_t *zilog; 2704 dmu_tx_t *tx; 2705 vattr_t oldva; 2706 xvattr_t tmpxvattr; 2707 uint_t mask = vap->va_mask; 2708 uint_t saved_mask; 2709 uint64_t saved_mode; 2710 int trim_mask = 0; 2711 uint64_t new_mode; 2712 uint64_t new_uid, new_gid; 2713 znode_t *attrzp; 2714 int need_policy = FALSE; 2715 int err; 2716 zfs_fuid_info_t *fuidp = NULL; 2717 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2718 xoptattr_t *xoap; 2719 zfs_acl_t *aclp = NULL; 2720 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2721 boolean_t fuid_dirtied = B_FALSE; 2722 2723 if (mask == 0) 2724 return (0); 2725 2726 if (mask & AT_NOSET) 2727 return (EINVAL); 2728 2729 ZFS_ENTER(zfsvfs); 2730 ZFS_VERIFY_ZP(zp); 2731 2732 pzp = zp->z_phys; 2733 zilog = zfsvfs->z_log; 2734 2735 /* 2736 * Make sure that if we have ephemeral uid/gid or xvattr specified 2737 * that file system is at proper version level 2738 */ 2739 2740 if (zfsvfs->z_use_fuids == B_FALSE && 2741 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2742 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 2743 (mask & AT_XVATTR))) { 2744 ZFS_EXIT(zfsvfs); 2745 return (EINVAL); 2746 } 2747 2748 if (mask & AT_SIZE && vp->v_type == VDIR) { 2749 ZFS_EXIT(zfsvfs); 2750 return (EISDIR); 2751 } 2752 2753 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 2754 ZFS_EXIT(zfsvfs); 2755 return (EINVAL); 2756 } 2757 2758 /* 2759 * If this is an xvattr_t, then get a pointer to the structure of 2760 * optional attributes. If this is NULL, then we have a vattr_t. 2761 */ 2762 xoap = xva_getxoptattr(xvap); 2763 2764 xva_init(&tmpxvattr); 2765 2766 /* 2767 * Immutable files can only alter immutable bit and atime 2768 */ 2769 if ((pzp->zp_flags & ZFS_IMMUTABLE) && 2770 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 2771 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 2772 ZFS_EXIT(zfsvfs); 2773 return (EPERM); 2774 } 2775 2776 if ((mask & AT_SIZE) && (pzp->zp_flags & ZFS_READONLY)) { 2777 ZFS_EXIT(zfsvfs); 2778 return (EPERM); 2779 } 2780 2781 /* 2782 * Verify timestamps doesn't overflow 32 bits. 2783 * ZFS can handle large timestamps, but 32bit syscalls can't 2784 * handle times greater than 2039. This check should be removed 2785 * once large timestamps are fully supported. 2786 */ 2787 if (mask & (AT_ATIME | AT_MTIME)) { 2788 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 2789 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 2790 ZFS_EXIT(zfsvfs); 2791 return (EOVERFLOW); 2792 } 2793 } 2794 2795top: 2796 attrzp = NULL; 2797 2798 /* Can this be moved to before the top label? */ 2799 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2800 ZFS_EXIT(zfsvfs); 2801 return (EROFS); 2802 } 2803 2804 /* 2805 * First validate permissions 2806 */ 2807 2808 if (mask & AT_SIZE) { 2809 err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr); 2810 if (err) { 2811 ZFS_EXIT(zfsvfs); 2812 return (err); 2813 } 2814 /* 2815 * XXX - Note, we are not providing any open 2816 * mode flags here (like FNDELAY), so we may 2817 * block if there are locks present... this 2818 * should be addressed in openat(). 2819 */ 2820 /* XXX - would it be OK to generate a log record here? */ 2821 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 2822 if (err) { 2823 ZFS_EXIT(zfsvfs); 2824 return (err); 2825 } 2826 } 2827 2828 if (mask & (AT_ATIME|AT_MTIME) || 2829 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2830 XVA_ISSET_REQ(xvap, XAT_READONLY) || 2831 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2832 XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 2833 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) 2834 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2835 skipaclchk, cr); 2836 2837 if (mask & (AT_UID|AT_GID)) { 2838 int idmask = (mask & (AT_UID|AT_GID)); 2839 int take_owner; 2840 int take_group; 2841 2842 /* 2843 * NOTE: even if a new mode is being set, 2844 * we may clear S_ISUID/S_ISGID bits. 2845 */ 2846 2847 if (!(mask & AT_MODE)) 2848 vap->va_mode = pzp->zp_mode; 2849 2850 /* 2851 * Take ownership or chgrp to group we are a member of 2852 */ 2853 2854 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 2855 take_group = (mask & AT_GID) && 2856 zfs_groupmember(zfsvfs, vap->va_gid, cr); 2857 2858 /* 2859 * If both AT_UID and AT_GID are set then take_owner and 2860 * take_group must both be set in order to allow taking 2861 * ownership. 2862 * 2863 * Otherwise, send the check through secpolicy_vnode_setattr() 2864 * 2865 */ 2866 2867 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2868 ((idmask == AT_UID) && take_owner) || 2869 ((idmask == AT_GID) && take_group)) { 2870 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 2871 skipaclchk, cr) == 0) { 2872 /* 2873 * Remove setuid/setgid for non-privileged users 2874 */ 2875 secpolicy_setid_clear(vap, vp, cr); 2876 trim_mask = (mask & (AT_UID|AT_GID)); 2877 } else { 2878 need_policy = TRUE; 2879 } 2880 } else { 2881 need_policy = TRUE; 2882 } 2883 } 2884 2885 mutex_enter(&zp->z_lock); 2886 oldva.va_mode = pzp->zp_mode; 2887 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 2888 if (mask & AT_XVATTR) { 2889 /* 2890 * Update xvattr mask to include only those attributes 2891 * that are actually changing. 2892 * 2893 * the bits will be restored prior to actually setting 2894 * the attributes so the caller thinks they were set. 2895 */ 2896 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2897 if (xoap->xoa_appendonly != 2898 ((pzp->zp_flags & ZFS_APPENDONLY) != 0)) { 2899 need_policy = TRUE; 2900 } else { 2901 XVA_CLR_REQ(xvap, XAT_APPENDONLY); 2902 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 2903 } 2904 } 2905 2906 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2907 if (xoap->xoa_nounlink != 2908 ((pzp->zp_flags & ZFS_NOUNLINK) != 0)) { 2909 need_policy = TRUE; 2910 } else { 2911 XVA_CLR_REQ(xvap, XAT_NOUNLINK); 2912 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 2913 } 2914 } 2915 2916 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2917 if (xoap->xoa_immutable != 2918 ((pzp->zp_flags & ZFS_IMMUTABLE) != 0)) { 2919 need_policy = TRUE; 2920 } else { 2921 XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 2922 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 2923 } 2924 } 2925 2926 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2927 if (xoap->xoa_nodump != 2928 ((pzp->zp_flags & ZFS_NODUMP) != 0)) { 2929 need_policy = TRUE; 2930 } else { 2931 XVA_CLR_REQ(xvap, XAT_NODUMP); 2932 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 2933 } 2934 } 2935 2936 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2937 if (xoap->xoa_av_modified != 2938 ((pzp->zp_flags & ZFS_AV_MODIFIED) != 0)) { 2939 need_policy = TRUE; 2940 } else { 2941 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 2942 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 2943 } 2944 } 2945 2946 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2947 if ((vp->v_type != VREG && 2948 xoap->xoa_av_quarantined) || 2949 xoap->xoa_av_quarantined != 2950 ((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0)) { 2951 need_policy = TRUE; 2952 } else { 2953 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 2954 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 2955 } 2956 } 2957 2958 if (need_policy == FALSE && 2959 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 2960 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 2961 need_policy = TRUE; 2962 } 2963 } 2964 2965 mutex_exit(&zp->z_lock); 2966 2967 if (mask & AT_MODE) { 2968 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 2969 err = secpolicy_setid_setsticky_clear(vp, vap, 2970 &oldva, cr); 2971 if (err) { 2972 ZFS_EXIT(zfsvfs); 2973 return (err); 2974 } 2975 trim_mask |= AT_MODE; 2976 } else { 2977 need_policy = TRUE; 2978 } 2979 } 2980 2981 if (need_policy) { 2982 /* 2983 * If trim_mask is set then take ownership 2984 * has been granted or write_acl is present and user 2985 * has the ability to modify mode. In that case remove 2986 * UID|GID and or MODE from mask so that 2987 * secpolicy_vnode_setattr() doesn't revoke it. 2988 */ 2989 2990 if (trim_mask) { 2991 saved_mask = vap->va_mask; 2992 vap->va_mask &= ~trim_mask; 2993 if (trim_mask & AT_MODE) { 2994 /* 2995 * Save the mode, as secpolicy_vnode_setattr() 2996 * will overwrite it with ova.va_mode. 2997 */ 2998 saved_mode = vap->va_mode; 2999 } 3000 } 3001 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3002 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3003 if (err) { 3004 ZFS_EXIT(zfsvfs); 3005 return (err); 3006 } 3007 3008 if (trim_mask) { 3009 vap->va_mask |= saved_mask; 3010 if (trim_mask & AT_MODE) { 3011 /* 3012 * Recover the mode after 3013 * secpolicy_vnode_setattr(). 3014 */ 3015 vap->va_mode = saved_mode; 3016 } 3017 } 3018 } 3019 3020 /* 3021 * secpolicy_vnode_setattr, or take ownership may have 3022 * changed va_mask 3023 */ 3024 mask = vap->va_mask; 3025 3026 tx = dmu_tx_create(zfsvfs->z_os); 3027 dmu_tx_hold_bonus(tx, zp->z_id); 3028 3029 if (mask & AT_MODE) { 3030 uint64_t pmode = pzp->zp_mode; 3031 3032 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3033 3034 if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3035 goto out; 3036 if (pzp->zp_acl.z_acl_extern_obj) { 3037 /* Are we upgrading ACL from old V0 format to new V1 */ 3038 if (zfsvfs->z_version <= ZPL_VERSION_FUID && 3039 pzp->zp_acl.z_acl_version == 3040 ZFS_ACL_VERSION_INITIAL) { 3041 dmu_tx_hold_free(tx, 3042 pzp->zp_acl.z_acl_extern_obj, 0, 3043 DMU_OBJECT_END); 3044 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3045 0, aclp->z_acl_bytes); 3046 } else { 3047 dmu_tx_hold_write(tx, 3048 pzp->zp_acl.z_acl_extern_obj, 0, 3049 aclp->z_acl_bytes); 3050 } 3051 } else if (aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3052 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3053 0, aclp->z_acl_bytes); 3054 } 3055 } 3056 3057 if (mask & (AT_UID | AT_GID)) { 3058 if (pzp->zp_xattr) { 3059 err = zfs_zget(zp->z_zfsvfs, pzp->zp_xattr, &attrzp); 3060 if (err) 3061 goto out; 3062 dmu_tx_hold_bonus(tx, attrzp->z_id); 3063 } 3064 if (mask & AT_UID) { 3065 new_uid = zfs_fuid_create(zfsvfs, 3066 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3067 if (new_uid != pzp->zp_uid && 3068 zfs_usergroup_overquota(zfsvfs, B_FALSE, new_uid)) { 3069 err = EDQUOT; 3070 goto out; 3071 } 3072 } 3073 3074 if (mask & AT_GID) { 3075 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3076 cr, ZFS_GROUP, &fuidp); 3077 if (new_gid != pzp->zp_gid && 3078 zfs_usergroup_overquota(zfsvfs, B_TRUE, new_gid)) { 3079 err = EDQUOT; 3080 goto out; 3081 } 3082 } 3083 fuid_dirtied = zfsvfs->z_fuid_dirty; 3084 if (fuid_dirtied) { 3085 if (zfsvfs->z_fuid_obj == 0) { 3086 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 3087 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 3088 FUID_SIZE_ESTIMATE(zfsvfs)); 3089 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 3090 FALSE, NULL); 3091 } else { 3092 dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj); 3093 dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0, 3094 FUID_SIZE_ESTIMATE(zfsvfs)); 3095 } 3096 } 3097 } 3098 3099 err = dmu_tx_assign(tx, TXG_NOWAIT); 3100 if (err) { 3101 if (err == ERESTART) 3102 dmu_tx_wait(tx); 3103 goto out; 3104 } 3105 3106 dmu_buf_will_dirty(zp->z_dbuf, tx); 3107 3108 /* 3109 * Set each attribute requested. 3110 * We group settings according to the locks they need to acquire. 3111 * 3112 * Note: you cannot set ctime directly, although it will be 3113 * updated as a side-effect of calling this function. 3114 */ 3115 3116 mutex_enter(&zp->z_lock); 3117 3118 if (mask & AT_MODE) { 3119 mutex_enter(&zp->z_acl_lock); 3120 zp->z_phys->zp_mode = new_mode; 3121 err = zfs_aclset_common(zp, aclp, cr, tx); 3122 ASSERT3U(err, ==, 0); 3123 zp->z_acl_cached = aclp; 3124 aclp = NULL; 3125 mutex_exit(&zp->z_acl_lock); 3126 } 3127 3128 if (attrzp) 3129 mutex_enter(&attrzp->z_lock); 3130 3131 if (mask & AT_UID) { 3132 pzp->zp_uid = new_uid; 3133 if (attrzp) 3134 attrzp->z_phys->zp_uid = new_uid; 3135 } 3136 3137 if (mask & AT_GID) { 3138 pzp->zp_gid = new_gid; 3139 if (attrzp) 3140 attrzp->z_phys->zp_gid = new_gid; 3141 } 3142 3143 if (attrzp) 3144 mutex_exit(&attrzp->z_lock); 3145 3146 if (mask & AT_ATIME) 3147 ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime); 3148 3149 if (mask & AT_MTIME) 3150 ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime); 3151 3152 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3153 if (mask & AT_SIZE) 3154 zfs_time_stamper_locked(zp, CONTENT_MODIFIED, tx); 3155 else if (mask != 0) 3156 zfs_time_stamper_locked(zp, STATE_CHANGED, tx); 3157 /* 3158 * Do this after setting timestamps to prevent timestamp 3159 * update from toggling bit 3160 */ 3161 3162 if (xoap && (mask & AT_XVATTR)) { 3163 3164 /* 3165 * restore trimmed off masks 3166 * so that return masks can be set for caller. 3167 */ 3168 3169 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3170 XVA_SET_REQ(xvap, XAT_APPENDONLY); 3171 } 3172 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3173 XVA_SET_REQ(xvap, XAT_NOUNLINK); 3174 } 3175 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3176 XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3177 } 3178 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3179 XVA_SET_REQ(xvap, XAT_NODUMP); 3180 } 3181 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3182 XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3183 } 3184 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3185 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3186 } 3187 3188 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { 3189 size_t len; 3190 dmu_object_info_t doi; 3191 3192 ASSERT(vp->v_type == VREG); 3193 3194 /* Grow the bonus buffer if necessary. */ 3195 dmu_object_info_from_db(zp->z_dbuf, &doi); 3196 len = sizeof (xoap->xoa_av_scanstamp) + 3197 sizeof (znode_phys_t); 3198 if (len > doi.doi_bonus_size) 3199 VERIFY(dmu_set_bonus(zp->z_dbuf, len, tx) == 0); 3200 } 3201 zfs_xvattr_set(zp, xvap); 3202 } 3203 3204 if (fuid_dirtied) 3205 zfs_fuid_sync(zfsvfs, tx); 3206 3207 if (mask != 0) 3208 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3209 3210 mutex_exit(&zp->z_lock); 3211 3212out: 3213 if (attrzp) 3214 VN_RELE(ZTOV(attrzp)); 3215 3216 if (aclp) 3217 zfs_acl_free(aclp); 3218 3219 if (fuidp) { 3220 zfs_fuid_info_free(fuidp); 3221 fuidp = NULL; 3222 } 3223 3224 if (err) 3225 dmu_tx_abort(tx); 3226 else 3227 dmu_tx_commit(tx); 3228 3229 if (err == ERESTART) 3230 goto top; 3231 3232 ZFS_EXIT(zfsvfs); 3233 return (err); 3234} 3235 3236typedef struct zfs_zlock { 3237 krwlock_t *zl_rwlock; /* lock we acquired */ 3238 znode_t *zl_znode; /* znode we held */ 3239 struct zfs_zlock *zl_next; /* next in list */ 3240} zfs_zlock_t; 3241 3242/* 3243 * Drop locks and release vnodes that were held by zfs_rename_lock(). 3244 */ 3245static void 3246zfs_rename_unlock(zfs_zlock_t **zlpp) 3247{ 3248 zfs_zlock_t *zl; 3249 3250 while ((zl = *zlpp) != NULL) { 3251 if (zl->zl_znode != NULL) 3252 VN_RELE(ZTOV(zl->zl_znode)); 3253 rw_exit(zl->zl_rwlock); 3254 *zlpp = zl->zl_next; 3255 kmem_free(zl, sizeof (*zl)); 3256 } 3257} 3258 3259/* 3260 * Search back through the directory tree, using the ".." entries. 3261 * Lock each directory in the chain to prevent concurrent renames. 3262 * Fail any attempt to move a directory into one of its own descendants. 3263 * XXX - z_parent_lock can overlap with map or grow locks 3264 */ 3265static int 3266zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 3267{ 3268 zfs_zlock_t *zl; 3269 znode_t *zp = tdzp; 3270 uint64_t rootid = zp->z_zfsvfs->z_root; 3271 uint64_t *oidp = &zp->z_id; 3272 krwlock_t *rwlp = &szp->z_parent_lock; 3273 krw_t rw = RW_WRITER; 3274 3275 /* 3276 * First pass write-locks szp and compares to zp->z_id. 3277 * Later passes read-lock zp and compare to zp->z_parent. 3278 */ 3279 do { 3280 if (!rw_tryenter(rwlp, rw)) { 3281 /* 3282 * Another thread is renaming in this path. 3283 * Note that if we are a WRITER, we don't have any 3284 * parent_locks held yet. 3285 */ 3286 if (rw == RW_READER && zp->z_id > szp->z_id) { 3287 /* 3288 * Drop our locks and restart 3289 */ 3290 zfs_rename_unlock(&zl); 3291 *zlpp = NULL; 3292 zp = tdzp; 3293 oidp = &zp->z_id; 3294 rwlp = &szp->z_parent_lock; 3295 rw = RW_WRITER; 3296 continue; 3297 } else { 3298 /* 3299 * Wait for other thread to drop its locks 3300 */ 3301 rw_enter(rwlp, rw); 3302 } 3303 } 3304 3305 zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3306 zl->zl_rwlock = rwlp; 3307 zl->zl_znode = NULL; 3308 zl->zl_next = *zlpp; 3309 *zlpp = zl; 3310 3311 if (*oidp == szp->z_id) /* We're a descendant of szp */ 3312 return (EINVAL); 3313 3314 if (*oidp == rootid) /* We've hit the top */ 3315 return (0); 3316 3317 if (rw == RW_READER) { /* i.e. not the first pass */ 3318 int error = zfs_zget(zp->z_zfsvfs, *oidp, &zp); 3319 if (error) 3320 return (error); 3321 zl->zl_znode = zp; 3322 } 3323 oidp = &zp->z_phys->zp_parent; 3324 rwlp = &zp->z_parent_lock; 3325 rw = RW_READER; 3326 3327 } while (zp->z_id != sdzp->z_id); 3328 3329 return (0); 3330} 3331 3332/* 3333 * Move an entry from the provided source directory to the target 3334 * directory. Change the entry name as indicated. 3335 * 3336 * IN: sdvp - Source directory containing the "old entry". 3337 * snm - Old entry name. 3338 * tdvp - Target directory to contain the "new entry". 3339 * tnm - New entry name. 3340 * cr - credentials of caller. 3341 * ct - caller context 3342 * flags - case flags 3343 * 3344 * RETURN: 0 if success 3345 * error code if failure 3346 * 3347 * Timestamps: 3348 * sdvp,tdvp - ctime|mtime updated 3349 */ 3350/*ARGSUSED*/ 3351static int 3352zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3353 caller_context_t *ct, int flags) 3354{ 3355 znode_t *tdzp, *szp, *tzp; 3356 znode_t *sdzp = VTOZ(sdvp); 3357 zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 3358 zilog_t *zilog; 3359 vnode_t *realvp; 3360 zfs_dirlock_t *sdl, *tdl; 3361 dmu_tx_t *tx; 3362 zfs_zlock_t *zl; 3363 int cmp, serr, terr; 3364 int error = 0; 3365 int zflg = 0; 3366 3367 ZFS_ENTER(zfsvfs); 3368 ZFS_VERIFY_ZP(sdzp); 3369 zilog = zfsvfs->z_log; 3370 3371 /* 3372 * Make sure we have the real vp for the target directory. 3373 */ 3374 if (VOP_REALVP(tdvp, &realvp, ct) == 0) 3375 tdvp = realvp; 3376 3377 if (tdvp->v_vfsp != sdvp->v_vfsp || zfsctl_is_node(tdvp)) { 3378 ZFS_EXIT(zfsvfs); 3379 return (EXDEV); 3380 } 3381 3382 tdzp = VTOZ(tdvp); 3383 ZFS_VERIFY_ZP(tdzp); 3384 if (zfsvfs->z_utf8 && u8_validate(tnm, 3385 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3386 ZFS_EXIT(zfsvfs); 3387 return (EILSEQ); 3388 } 3389 3390 if (flags & FIGNORECASE) 3391 zflg |= ZCILOOK; 3392 3393top: 3394 szp = NULL; 3395 tzp = NULL; 3396 zl = NULL; 3397 3398 /* 3399 * This is to prevent the creation of links into attribute space 3400 * by renaming a linked file into/outof an attribute directory. 3401 * See the comment in zfs_link() for why this is considered bad. 3402 */ 3403 if ((tdzp->z_phys->zp_flags & ZFS_XATTR) != 3404 (sdzp->z_phys->zp_flags & ZFS_XATTR)) { 3405 ZFS_EXIT(zfsvfs); 3406 return (EINVAL); 3407 } 3408 3409 /* 3410 * Lock source and target directory entries. To prevent deadlock, 3411 * a lock ordering must be defined. We lock the directory with 3412 * the smallest object id first, or if it's a tie, the one with 3413 * the lexically first name. 3414 */ 3415 if (sdzp->z_id < tdzp->z_id) { 3416 cmp = -1; 3417 } else if (sdzp->z_id > tdzp->z_id) { 3418 cmp = 1; 3419 } else { 3420 /* 3421 * First compare the two name arguments without 3422 * considering any case folding. 3423 */ 3424 int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3425 3426 cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3427 ASSERT(error == 0 || !zfsvfs->z_utf8); 3428 if (cmp == 0) { 3429 /* 3430 * POSIX: "If the old argument and the new argument 3431 * both refer to links to the same existing file, 3432 * the rename() function shall return successfully 3433 * and perform no other action." 3434 */ 3435 ZFS_EXIT(zfsvfs); 3436 return (0); 3437 } 3438 /* 3439 * If the file system is case-folding, then we may 3440 * have some more checking to do. A case-folding file 3441 * system is either supporting mixed case sensitivity 3442 * access or is completely case-insensitive. Note 3443 * that the file system is always case preserving. 3444 * 3445 * In mixed sensitivity mode case sensitive behavior 3446 * is the default. FIGNORECASE must be used to 3447 * explicitly request case insensitive behavior. 3448 * 3449 * If the source and target names provided differ only 3450 * by case (e.g., a request to rename 'tim' to 'Tim'), 3451 * we will treat this as a special case in the 3452 * case-insensitive mode: as long as the source name 3453 * is an exact match, we will allow this to proceed as 3454 * a name-change request. 3455 */ 3456 if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3457 (zfsvfs->z_case == ZFS_CASE_MIXED && 3458 flags & FIGNORECASE)) && 3459 u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3460 &error) == 0) { 3461 /* 3462 * case preserving rename request, require exact 3463 * name matches 3464 */ 3465 zflg |= ZCIEXACT; 3466 zflg &= ~ZCILOOK; 3467 } 3468 } 3469 3470 /* 3471 * If the source and destination directories are the same, we should 3472 * grab the z_name_lock of that directory only once. 3473 */ 3474 if (sdzp == tdzp) { 3475 zflg |= ZHAVELOCK; 3476 rw_enter(&sdzp->z_name_lock, RW_READER); 3477 } 3478 3479 if (cmp < 0) { 3480 serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3481 ZEXISTS | zflg, NULL, NULL); 3482 terr = zfs_dirent_lock(&tdl, 3483 tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3484 } else { 3485 terr = zfs_dirent_lock(&tdl, 3486 tdzp, tnm, &tzp, zflg, NULL, NULL); 3487 serr = zfs_dirent_lock(&sdl, 3488 sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3489 NULL, NULL); 3490 } 3491 3492 if (serr) { 3493 /* 3494 * Source entry invalid or not there. 3495 */ 3496 if (!terr) { 3497 zfs_dirent_unlock(tdl); 3498 if (tzp) 3499 VN_RELE(ZTOV(tzp)); 3500 } 3501 3502 if (sdzp == tdzp) 3503 rw_exit(&sdzp->z_name_lock); 3504 3505 if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0) 3506 serr = EINVAL; 3507 ZFS_EXIT(zfsvfs); 3508 return (serr); 3509 } 3510 if (terr) { 3511 zfs_dirent_unlock(sdl); 3512 VN_RELE(ZTOV(szp)); 3513 3514 if (sdzp == tdzp) 3515 rw_exit(&sdzp->z_name_lock); 3516 3517 if (strcmp(tnm, "..") == 0) 3518 terr = EINVAL; 3519 ZFS_EXIT(zfsvfs); 3520 return (terr); 3521 } 3522 3523 /* 3524 * Must have write access at the source to remove the old entry 3525 * and write access at the target to create the new entry. 3526 * Note that if target and source are the same, this can be 3527 * done in a single check. 3528 */ 3529 3530 if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3531 goto out; 3532 3533 if (ZTOV(szp)->v_type == VDIR) { 3534 /* 3535 * Check to make sure rename is valid. 3536 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3537 */ 3538 if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3539 goto out; 3540 } 3541 3542 /* 3543 * Does target exist? 3544 */ 3545 if (tzp) { 3546 /* 3547 * Source and target must be the same type. 3548 */ 3549 if (ZTOV(szp)->v_type == VDIR) { 3550 if (ZTOV(tzp)->v_type != VDIR) { 3551 error = ENOTDIR; 3552 goto out; 3553 } 3554 } else { 3555 if (ZTOV(tzp)->v_type == VDIR) { 3556 error = EISDIR; 3557 goto out; 3558 } 3559 } 3560 /* 3561 * POSIX dictates that when the source and target 3562 * entries refer to the same file object, rename 3563 * must do nothing and exit without error. 3564 */ 3565 if (szp->z_id == tzp->z_id) { 3566 error = 0; 3567 goto out; 3568 } 3569 } 3570 3571 vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 3572 if (tzp) 3573 vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3574 3575 /* 3576 * notify the target directory if it is not the same 3577 * as source directory. 3578 */ 3579 if (tdvp != sdvp) { 3580 vnevent_rename_dest_dir(tdvp, ct); 3581 } 3582 3583 tx = dmu_tx_create(zfsvfs->z_os); 3584 dmu_tx_hold_bonus(tx, szp->z_id); /* nlink changes */ 3585 dmu_tx_hold_bonus(tx, sdzp->z_id); /* nlink changes */ 3586 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3587 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3588 if (sdzp != tdzp) 3589 dmu_tx_hold_bonus(tx, tdzp->z_id); /* nlink changes */ 3590 if (tzp) 3591 dmu_tx_hold_bonus(tx, tzp->z_id); /* parent changes */ 3592 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3593 error = dmu_tx_assign(tx, TXG_NOWAIT); 3594 if (error) { 3595 if (zl != NULL) 3596 zfs_rename_unlock(&zl); 3597 zfs_dirent_unlock(sdl); 3598 zfs_dirent_unlock(tdl); 3599 3600 if (sdzp == tdzp) 3601 rw_exit(&sdzp->z_name_lock); 3602 3603 VN_RELE(ZTOV(szp)); 3604 if (tzp) 3605 VN_RELE(ZTOV(tzp)); 3606 if (error == ERESTART) { 3607 dmu_tx_wait(tx); 3608 dmu_tx_abort(tx); 3609 goto top; 3610 } 3611 dmu_tx_abort(tx); 3612 ZFS_EXIT(zfsvfs); 3613 return (error); 3614 } 3615 3616 if (tzp) /* Attempt to remove the existing target */ 3617 error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 3618 3619 if (error == 0) { 3620 error = zfs_link_create(tdl, szp, tx, ZRENAMING); 3621 if (error == 0) { 3622 szp->z_phys->zp_flags |= ZFS_AV_MODIFIED; 3623 3624 error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 3625 ASSERT(error == 0); 3626 3627 zfs_log_rename(zilog, tx, 3628 TX_RENAME | (flags & FIGNORECASE ? TX_CI : 0), 3629 sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp); 3630 3631 /* Update path information for the target vnode */ 3632 vn_renamepath(tdvp, ZTOV(szp), tnm, strlen(tnm)); 3633 } 3634#ifdef FREEBSD_NAMECACHE 3635 if (error == 0) { 3636 cache_purge(sdvp); 3637 cache_purge(tdvp); 3638 } 3639#endif 3640 } 3641 3642 dmu_tx_commit(tx); 3643out: 3644 if (zl != NULL) 3645 zfs_rename_unlock(&zl); 3646 3647 zfs_dirent_unlock(sdl); 3648 zfs_dirent_unlock(tdl); 3649 3650 if (sdzp == tdzp) 3651 rw_exit(&sdzp->z_name_lock); 3652 3653 VN_RELE(ZTOV(szp)); 3654 if (tzp) 3655 VN_RELE(ZTOV(tzp)); 3656 3657 ZFS_EXIT(zfsvfs); 3658 3659 return (error); 3660} 3661 3662/* 3663 * Insert the indicated symbolic reference entry into the directory. 3664 * 3665 * IN: dvp - Directory to contain new symbolic link. 3666 * link - Name for new symlink entry. 3667 * vap - Attributes of new entry. 3668 * target - Target path of new symlink. 3669 * cr - credentials of caller. 3670 * ct - caller context 3671 * flags - case flags 3672 * 3673 * RETURN: 0 if success 3674 * error code if failure 3675 * 3676 * Timestamps: 3677 * dvp - ctime|mtime updated 3678 */ 3679/*ARGSUSED*/ 3680static int 3681zfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 3682 cred_t *cr, kthread_t *td) 3683{ 3684 znode_t *zp, *dzp = VTOZ(dvp); 3685 zfs_dirlock_t *dl; 3686 dmu_tx_t *tx; 3687 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3688 zilog_t *zilog; 3689 int len = strlen(link); 3690 int error; 3691 int zflg = ZNEW; 3692 zfs_acl_ids_t acl_ids; 3693 boolean_t fuid_dirtied; 3694 int flags = 0; 3695 3696 ASSERT(vap->va_type == VLNK); 3697 3698 ZFS_ENTER(zfsvfs); 3699 ZFS_VERIFY_ZP(dzp); 3700 zilog = zfsvfs->z_log; 3701 3702 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 3703 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3704 ZFS_EXIT(zfsvfs); 3705 return (EILSEQ); 3706 } 3707 if (flags & FIGNORECASE) 3708 zflg |= ZCILOOK; 3709top: 3710 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3711 ZFS_EXIT(zfsvfs); 3712 return (error); 3713 } 3714 3715 if (len > MAXPATHLEN) { 3716 ZFS_EXIT(zfsvfs); 3717 return (ENAMETOOLONG); 3718 } 3719 3720 /* 3721 * Attempt to lock directory; fail if entry already exists. 3722 */ 3723 error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 3724 if (error) { 3725 ZFS_EXIT(zfsvfs); 3726 return (error); 3727 } 3728 3729 VERIFY(0 == zfs_acl_ids_create(dzp, 0, vap, cr, NULL, &acl_ids)); 3730 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 3731 zfs_acl_ids_free(&acl_ids); 3732 zfs_dirent_unlock(dl); 3733 ZFS_EXIT(zfsvfs); 3734 return (EDQUOT); 3735 } 3736 tx = dmu_tx_create(zfsvfs->z_os); 3737 fuid_dirtied = zfsvfs->z_fuid_dirty; 3738 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 3739 dmu_tx_hold_bonus(tx, dzp->z_id); 3740 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 3741 if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) 3742 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE); 3743 if (fuid_dirtied) 3744 zfs_fuid_txhold(zfsvfs, tx); 3745 error = dmu_tx_assign(tx, TXG_NOWAIT); 3746 if (error) { 3747 zfs_acl_ids_free(&acl_ids); 3748 zfs_dirent_unlock(dl); 3749 if (error == ERESTART) { 3750 dmu_tx_wait(tx); 3751 dmu_tx_abort(tx); 3752 goto top; 3753 } 3754 dmu_tx_abort(tx); 3755 ZFS_EXIT(zfsvfs); 3756 return (error); 3757 } 3758 3759 dmu_buf_will_dirty(dzp->z_dbuf, tx); 3760 3761 /* 3762 * Create a new object for the symlink. 3763 * Put the link content into bonus buffer if it will fit; 3764 * otherwise, store it just like any other file data. 3765 */ 3766 if (sizeof (znode_phys_t) + len <= dmu_bonus_max()) { 3767 zfs_mknode(dzp, vap, tx, cr, 0, &zp, len, &acl_ids); 3768 if (len != 0) 3769 bcopy(link, zp->z_phys + 1, len); 3770 } else { 3771 dmu_buf_t *dbp; 3772 3773 zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); 3774 3775 if (fuid_dirtied) 3776 zfs_fuid_sync(zfsvfs, tx); 3777 /* 3778 * Nothing can access the znode yet so no locking needed 3779 * for growing the znode's blocksize. 3780 */ 3781 zfs_grow_blocksize(zp, len, tx); 3782 3783 VERIFY(0 == dmu_buf_hold(zfsvfs->z_os, 3784 zp->z_id, 0, FTAG, &dbp)); 3785 dmu_buf_will_dirty(dbp, tx); 3786 3787 ASSERT3U(len, <=, dbp->db_size); 3788 bcopy(link, dbp->db_data, len); 3789 dmu_buf_rele(dbp, FTAG); 3790 } 3791 zp->z_phys->zp_size = len; 3792 3793 /* 3794 * Insert the new object into the directory. 3795 */ 3796 (void) zfs_link_create(dl, zp, tx, ZNEW); 3797 if (error == 0) { 3798 uint64_t txtype = TX_SYMLINK; 3799 if (flags & FIGNORECASE) 3800 txtype |= TX_CI; 3801 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 3802 *vpp = ZTOV(zp); 3803 } 3804 3805 zfs_acl_ids_free(&acl_ids); 3806 3807 dmu_tx_commit(tx); 3808 3809 zfs_dirent_unlock(dl); 3810 3811 ZFS_EXIT(zfsvfs); 3812 return (error); 3813} 3814 3815/* 3816 * Return, in the buffer contained in the provided uio structure, 3817 * the symbolic path referred to by vp. 3818 * 3819 * IN: vp - vnode of symbolic link. 3820 * uoip - structure to contain the link path. 3821 * cr - credentials of caller. 3822 * ct - caller context 3823 * 3824 * OUT: uio - structure to contain the link path. 3825 * 3826 * RETURN: 0 if success 3827 * error code if failure 3828 * 3829 * Timestamps: 3830 * vp - atime updated 3831 */ 3832/* ARGSUSED */ 3833static int 3834zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 3835{ 3836 znode_t *zp = VTOZ(vp); 3837 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3838 size_t bufsz; 3839 int error; 3840 3841 ZFS_ENTER(zfsvfs); 3842 ZFS_VERIFY_ZP(zp); 3843 3844 bufsz = (size_t)zp->z_phys->zp_size; 3845 if (bufsz + sizeof (znode_phys_t) <= zp->z_dbuf->db_size) { 3846 error = uiomove(zp->z_phys + 1, 3847 MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); 3848 } else { 3849 dmu_buf_t *dbp; 3850 error = dmu_buf_hold(zfsvfs->z_os, zp->z_id, 0, FTAG, &dbp); 3851 if (error) { 3852 ZFS_EXIT(zfsvfs); 3853 return (error); 3854 } 3855 error = uiomove(dbp->db_data, 3856 MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); 3857 dmu_buf_rele(dbp, FTAG); 3858 } 3859 3860 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 3861 ZFS_EXIT(zfsvfs); 3862 return (error); 3863} 3864 3865/* 3866 * Insert a new entry into directory tdvp referencing svp. 3867 * 3868 * IN: tdvp - Directory to contain new entry. 3869 * svp - vnode of new entry. 3870 * name - name of new entry. 3871 * cr - credentials of caller. 3872 * ct - caller context 3873 * 3874 * RETURN: 0 if success 3875 * error code if failure 3876 * 3877 * Timestamps: 3878 * tdvp - ctime|mtime updated 3879 * svp - ctime updated 3880 */ 3881/* ARGSUSED */ 3882static int 3883zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 3884 caller_context_t *ct, int flags) 3885{ 3886 znode_t *dzp = VTOZ(tdvp); 3887 znode_t *tzp, *szp; 3888 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3889 zilog_t *zilog; 3890 zfs_dirlock_t *dl; 3891 dmu_tx_t *tx; 3892 vnode_t *realvp; 3893 int error; 3894 int zf = ZNEW; 3895 uint64_t parent; 3896 uid_t owner; 3897 3898 ASSERT(tdvp->v_type == VDIR); 3899 3900 ZFS_ENTER(zfsvfs); 3901 ZFS_VERIFY_ZP(dzp); 3902 zilog = zfsvfs->z_log; 3903 3904 if (VOP_REALVP(svp, &realvp, ct) == 0) 3905 svp = realvp; 3906 3907 /* 3908 * POSIX dictates that we return EPERM here. 3909 * Better choices include ENOTSUP or EISDIR. 3910 */ 3911 if (svp->v_type == VDIR) { 3912 ZFS_EXIT(zfsvfs); 3913 return (EPERM); 3914 } 3915 3916 if (svp->v_vfsp != tdvp->v_vfsp || zfsctl_is_node(svp)) { 3917 ZFS_EXIT(zfsvfs); 3918 return (EXDEV); 3919 } 3920 3921 szp = VTOZ(svp); 3922 ZFS_VERIFY_ZP(szp); 3923 3924 /* Prevent links to .zfs/shares files */ 3925 3926 if (szp->z_phys->zp_parent == zfsvfs->z_shares_dir) { 3927 ZFS_EXIT(zfsvfs); 3928 return (EPERM); 3929 } 3930 3931 if (zfsvfs->z_utf8 && u8_validate(name, 3932 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3933 ZFS_EXIT(zfsvfs); 3934 return (EILSEQ); 3935 } 3936 if (flags & FIGNORECASE) 3937 zf |= ZCILOOK; 3938 3939 /* 3940 * We do not support links between attributes and non-attributes 3941 * because of the potential security risk of creating links 3942 * into "normal" file space in order to circumvent restrictions 3943 * imposed in attribute space. 3944 */ 3945 if ((szp->z_phys->zp_flags & ZFS_XATTR) != 3946 (dzp->z_phys->zp_flags & ZFS_XATTR)) { 3947 ZFS_EXIT(zfsvfs); 3948 return (EINVAL); 3949 } 3950 3951 3952 owner = zfs_fuid_map_id(zfsvfs, szp->z_phys->zp_uid, cr, ZFS_OWNER); 3953 if (owner != crgetuid(cr) && 3954 secpolicy_basic_link(svp, cr) != 0) { 3955 ZFS_EXIT(zfsvfs); 3956 return (EPERM); 3957 } 3958 3959 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3960 ZFS_EXIT(zfsvfs); 3961 return (error); 3962 } 3963 3964top: 3965 /* 3966 * Attempt to lock directory; fail if entry already exists. 3967 */ 3968 error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 3969 if (error) { 3970 ZFS_EXIT(zfsvfs); 3971 return (error); 3972 } 3973 3974 tx = dmu_tx_create(zfsvfs->z_os); 3975 dmu_tx_hold_bonus(tx, szp->z_id); 3976 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 3977 error = dmu_tx_assign(tx, TXG_NOWAIT); 3978 if (error) { 3979 zfs_dirent_unlock(dl); 3980 if (error == ERESTART) { 3981 dmu_tx_wait(tx); 3982 dmu_tx_abort(tx); 3983 goto top; 3984 } 3985 dmu_tx_abort(tx); 3986 ZFS_EXIT(zfsvfs); 3987 return (error); 3988 } 3989 3990 error = zfs_link_create(dl, szp, tx, 0); 3991 3992 if (error == 0) { 3993 uint64_t txtype = TX_LINK; 3994 if (flags & FIGNORECASE) 3995 txtype |= TX_CI; 3996 zfs_log_link(zilog, tx, txtype, dzp, szp, name); 3997 } 3998 3999 dmu_tx_commit(tx); 4000 4001 zfs_dirent_unlock(dl); 4002 4003 if (error == 0) { 4004 vnevent_link(svp, ct); 4005 } 4006 4007 ZFS_EXIT(zfsvfs); 4008 return (error); 4009} 4010 4011/*ARGSUSED*/ 4012void 4013zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4014{ 4015 znode_t *zp = VTOZ(vp); 4016 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4017 int error; 4018 4019 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4020 if (zp->z_dbuf == NULL) { 4021 /* 4022 * The fs has been unmounted, or we did a 4023 * suspend/resume and this file no longer exists. 4024 */ 4025 VI_LOCK(vp); 4026 vp->v_count = 0; /* count arrives as 1 */ 4027 VI_UNLOCK(vp); 4028 vrecycle(vp, curthread); 4029 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4030 return; 4031 } 4032 4033 if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4034 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4035 4036 dmu_tx_hold_bonus(tx, zp->z_id); 4037 error = dmu_tx_assign(tx, TXG_WAIT); 4038 if (error) { 4039 dmu_tx_abort(tx); 4040 } else { 4041 dmu_buf_will_dirty(zp->z_dbuf, tx); 4042 mutex_enter(&zp->z_lock); 4043 zp->z_atime_dirty = 0; 4044 mutex_exit(&zp->z_lock); 4045 dmu_tx_commit(tx); 4046 } 4047 } 4048 4049 zfs_zinactive(zp); 4050 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4051} 4052 4053CTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 4054CTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 4055 4056/*ARGSUSED*/ 4057static int 4058zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4059{ 4060 znode_t *zp = VTOZ(vp); 4061 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4062 uint32_t gen; 4063 uint64_t object = zp->z_id; 4064 zfid_short_t *zfid; 4065 int size, i; 4066 4067 ZFS_ENTER(zfsvfs); 4068 ZFS_VERIFY_ZP(zp); 4069 gen = (uint32_t)zp->z_gen; 4070 4071 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4072 fidp->fid_len = size; 4073 4074 zfid = (zfid_short_t *)fidp; 4075 4076 zfid->zf_len = size; 4077 4078 for (i = 0; i < sizeof (zfid->zf_object); i++) 4079 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4080 4081 /* Must have a non-zero generation number to distinguish from .zfs */ 4082 if (gen == 0) 4083 gen = 1; 4084 for (i = 0; i < sizeof (zfid->zf_gen); i++) 4085 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4086 4087 if (size == LONG_FID_LEN) { 4088 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4089 zfid_long_t *zlfid; 4090 4091 zlfid = (zfid_long_t *)fidp; 4092 4093 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4094 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4095 4096 /* XXX - this should be the generation number for the objset */ 4097 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4098 zlfid->zf_setgen[i] = 0; 4099 } 4100 4101 ZFS_EXIT(zfsvfs); 4102 return (0); 4103} 4104 4105static int 4106zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4107 caller_context_t *ct) 4108{ 4109 znode_t *zp, *xzp; 4110 zfsvfs_t *zfsvfs; 4111 zfs_dirlock_t *dl; 4112 int error; 4113 4114 switch (cmd) { 4115 case _PC_LINK_MAX: 4116 *valp = INT_MAX; 4117 return (0); 4118 4119 case _PC_FILESIZEBITS: 4120 *valp = 64; 4121 return (0); 4122 4123#if 0 4124 case _PC_XATTR_EXISTS: 4125 zp = VTOZ(vp); 4126 zfsvfs = zp->z_zfsvfs; 4127 ZFS_ENTER(zfsvfs); 4128 ZFS_VERIFY_ZP(zp); 4129 *valp = 0; 4130 error = zfs_dirent_lock(&dl, zp, "", &xzp, 4131 ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 4132 if (error == 0) { 4133 zfs_dirent_unlock(dl); 4134 if (!zfs_dirempty(xzp)) 4135 *valp = 1; 4136 VN_RELE(ZTOV(xzp)); 4137 } else if (error == ENOENT) { 4138 /* 4139 * If there aren't extended attributes, it's the 4140 * same as having zero of them. 4141 */ 4142 error = 0; 4143 } 4144 ZFS_EXIT(zfsvfs); 4145 return (error); 4146#endif 4147 4148 case _PC_ACL_EXTENDED: 4149 *valp = 0; 4150 return (0); 4151 4152 case _PC_ACL_NFS4: 4153 *valp = 1; 4154 return (0); 4155 4156 case _PC_ACL_PATH_MAX: 4157 *valp = ACL_MAX_ENTRIES; 4158 return (0); 4159 4160 case _PC_MIN_HOLE_SIZE: 4161 *valp = (int)SPA_MINBLOCKSIZE; 4162 return (0); 4163 4164 default: 4165 return (EOPNOTSUPP); 4166 } 4167} 4168 4169/*ARGSUSED*/ 4170static int 4171zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4172 caller_context_t *ct) 4173{ 4174 znode_t *zp = VTOZ(vp); 4175 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4176 int error; 4177 boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4178 4179 ZFS_ENTER(zfsvfs); 4180 ZFS_VERIFY_ZP(zp); 4181 error = zfs_getacl(zp, vsecp, skipaclchk, cr); 4182 ZFS_EXIT(zfsvfs); 4183 4184 return (error); 4185} 4186 4187/*ARGSUSED*/ 4188static int 4189zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4190 caller_context_t *ct) 4191{ 4192 znode_t *zp = VTOZ(vp); 4193 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4194 int error; 4195 boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4196 4197 ZFS_ENTER(zfsvfs); 4198 ZFS_VERIFY_ZP(zp); 4199 error = zfs_setacl(zp, vsecp, skipaclchk, cr); 4200 ZFS_EXIT(zfsvfs); 4201 return (error); 4202} 4203 4204static int 4205ioflags(int ioflags) 4206{ 4207 int flags = 0; 4208 4209 if (ioflags & IO_APPEND) 4210 flags |= FAPPEND; 4211 if (ioflags & IO_NDELAY) 4212 flags |= FNONBLOCK; 4213 if (ioflags & IO_SYNC) 4214 flags |= (FSYNC | FDSYNC | FRSYNC); 4215 4216 return (flags); 4217} 4218 4219static int 4220zfs_getpages(struct vnode *vp, vm_page_t *m, int count, int reqpage) 4221{ 4222 znode_t *zp = VTOZ(vp); 4223 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4224 objset_t *os = zp->z_zfsvfs->z_os; 4225 vm_page_t mreq; 4226 vm_object_t object; 4227 caddr_t va; 4228 struct sf_buf *sf; 4229 int i, error; 4230 int pcount, size; 4231 4232 ZFS_ENTER(zfsvfs); 4233 ZFS_VERIFY_ZP(zp); 4234 4235 pcount = round_page(count) / PAGE_SIZE; 4236 mreq = m[reqpage]; 4237 object = mreq->object; 4238 error = 0; 4239 4240 KASSERT(vp->v_object == object, ("mismatching object")); 4241 4242 VM_OBJECT_LOCK(object); 4243 4244 for (i = 0; i < pcount; i++) { 4245 if (i != reqpage) { 4246 vm_page_lock(m[i]); 4247 vm_page_free(m[i]); 4248 vm_page_unlock(m[i]); 4249 } 4250 } 4251 4252 if (mreq->valid) { 4253 if (mreq->valid != VM_PAGE_BITS_ALL) 4254 vm_page_zero_invalid(mreq, TRUE); 4255 VM_OBJECT_UNLOCK(object); 4256 ZFS_EXIT(zfsvfs); 4257 return (VM_PAGER_OK); 4258 } 4259 4260 PCPU_INC(cnt.v_vnodein); 4261 PCPU_INC(cnt.v_vnodepgsin); 4262 4263 if (IDX_TO_OFF(mreq->pindex) >= object->un_pager.vnp.vnp_size) { 4264 VM_OBJECT_UNLOCK(object); 4265 ZFS_EXIT(zfsvfs); 4266 return (VM_PAGER_BAD); 4267 } 4268 4269 size = PAGE_SIZE; 4270 if (IDX_TO_OFF(mreq->pindex) + size > object->un_pager.vnp.vnp_size) 4271 size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(mreq->pindex); 4272 4273 VM_OBJECT_UNLOCK(object); 4274 4275 va = zfs_map_page(mreq, &sf); 4276 error = dmu_read(os, zp->z_id, IDX_TO_OFF(mreq->pindex), 4277 size, va, DMU_READ_PREFETCH); 4278 if (size != PAGE_SIZE) 4279 bzero(va + size, PAGE_SIZE - size); 4280 zfs_unmap_page(sf); 4281 4282 VM_OBJECT_LOCK(object); 4283 4284 if (!error) 4285 mreq->valid = VM_PAGE_BITS_ALL; 4286 KASSERT(mreq->dirty == 0, ("zfs_getpages: page %p is dirty", mreq)); 4287 4288 VM_OBJECT_UNLOCK(object); 4289 4290 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4291 ZFS_EXIT(zfsvfs); 4292 return (error ? VM_PAGER_ERROR : VM_PAGER_OK); 4293} 4294 4295static int 4296zfs_freebsd_getpages(ap) 4297 struct vop_getpages_args /* { 4298 struct vnode *a_vp; 4299 vm_page_t *a_m; 4300 int a_count; 4301 int a_reqpage; 4302 vm_ooffset_t a_offset; 4303 } */ *ap; 4304{ 4305 4306 return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage)); 4307} 4308 4309static int 4310zfs_freebsd_open(ap) 4311 struct vop_open_args /* { 4312 struct vnode *a_vp; 4313 int a_mode; 4314 struct ucred *a_cred; 4315 struct thread *a_td; 4316 } */ *ap; 4317{ 4318 vnode_t *vp = ap->a_vp; 4319 znode_t *zp = VTOZ(vp); 4320 int error; 4321 4322 error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); 4323 if (error == 0) 4324 vnode_create_vobject(vp, zp->z_phys->zp_size, ap->a_td); 4325 return (error); 4326} 4327 4328static int 4329zfs_freebsd_close(ap) 4330 struct vop_close_args /* { 4331 struct vnode *a_vp; 4332 int a_fflag; 4333 struct ucred *a_cred; 4334 struct thread *a_td; 4335 } */ *ap; 4336{ 4337 4338 return (zfs_close(ap->a_vp, ap->a_fflag, 0, 0, ap->a_cred, NULL)); 4339} 4340 4341static int 4342zfs_freebsd_ioctl(ap) 4343 struct vop_ioctl_args /* { 4344 struct vnode *a_vp; 4345 u_long a_command; 4346 caddr_t a_data; 4347 int a_fflag; 4348 struct ucred *cred; 4349 struct thread *td; 4350 } */ *ap; 4351{ 4352 4353 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 4354 ap->a_fflag, ap->a_cred, NULL, NULL)); 4355} 4356 4357static int 4358zfs_freebsd_read(ap) 4359 struct vop_read_args /* { 4360 struct vnode *a_vp; 4361 struct uio *a_uio; 4362 int a_ioflag; 4363 struct ucred *a_cred; 4364 } */ *ap; 4365{ 4366 4367 return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 4368 ap->a_cred, NULL)); 4369} 4370 4371static int 4372zfs_freebsd_write(ap) 4373 struct vop_write_args /* { 4374 struct vnode *a_vp; 4375 struct uio *a_uio; 4376 int a_ioflag; 4377 struct ucred *a_cred; 4378 } */ *ap; 4379{ 4380 4381 if (vn_rlimit_fsize(ap->a_vp, ap->a_uio, ap->a_uio->uio_td)) 4382 return (EFBIG); 4383 4384 return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 4385 ap->a_cred, NULL)); 4386} 4387 4388static int 4389zfs_freebsd_access(ap) 4390 struct vop_access_args /* { 4391 struct vnode *a_vp; 4392 accmode_t a_accmode; 4393 struct ucred *a_cred; 4394 struct thread *a_td; 4395 } */ *ap; 4396{ 4397 vnode_t *vp = ap->a_vp; 4398 znode_t *zp = VTOZ(vp); 4399 znode_phys_t *zphys = zp->z_phys; 4400 accmode_t accmode; 4401 int error = 0; 4402 4403 /* 4404 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 4405 */ 4406 accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 4407 if (accmode != 0) 4408 error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL); 4409 4410 /* 4411 * VADMIN has to be handled by vaccess(). 4412 */ 4413 if (error == 0) { 4414 accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 4415 if (accmode != 0) { 4416 error = vaccess(vp->v_type, zphys->zp_mode, 4417 zphys->zp_uid, zphys->zp_gid, accmode, ap->a_cred, 4418 NULL); 4419 } 4420 } 4421 4422 /* 4423 * For VEXEC, ensure that at least one execute bit is set for 4424 * non-directories. 4425 */ 4426 if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 4427 (zphys->zp_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) 4428 error = EACCES; 4429 4430 return (error); 4431} 4432 4433static int 4434zfs_freebsd_lookup(ap) 4435 struct vop_lookup_args /* { 4436 struct vnode *a_dvp; 4437 struct vnode **a_vpp; 4438 struct componentname *a_cnp; 4439 } */ *ap; 4440{ 4441 struct componentname *cnp = ap->a_cnp; 4442 char nm[NAME_MAX + 1]; 4443 4444 ASSERT(cnp->cn_namelen < sizeof(nm)); 4445 strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 4446 4447 return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 4448 cnp->cn_cred, cnp->cn_thread, 0)); 4449} 4450 4451static int 4452zfs_freebsd_create(ap) 4453 struct vop_create_args /* { 4454 struct vnode *a_dvp; 4455 struct vnode **a_vpp; 4456 struct componentname *a_cnp; 4457 struct vattr *a_vap; 4458 } */ *ap; 4459{ 4460 struct componentname *cnp = ap->a_cnp; 4461 vattr_t *vap = ap->a_vap; 4462 int mode; 4463 4464 ASSERT(cnp->cn_flags & SAVENAME); 4465 4466 vattr_init_mask(vap); 4467 mode = vap->va_mode & ALLPERMS; 4468 4469 return (zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 4470 ap->a_vpp, cnp->cn_cred, cnp->cn_thread)); 4471} 4472 4473static int 4474zfs_freebsd_remove(ap) 4475 struct vop_remove_args /* { 4476 struct vnode *a_dvp; 4477 struct vnode *a_vp; 4478 struct componentname *a_cnp; 4479 } */ *ap; 4480{ 4481 4482 ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4483 4484 return (zfs_remove(ap->a_dvp, ap->a_cnp->cn_nameptr, 4485 ap->a_cnp->cn_cred, NULL, 0)); 4486} 4487 4488static int 4489zfs_freebsd_mkdir(ap) 4490 struct vop_mkdir_args /* { 4491 struct vnode *a_dvp; 4492 struct vnode **a_vpp; 4493 struct componentname *a_cnp; 4494 struct vattr *a_vap; 4495 } */ *ap; 4496{ 4497 vattr_t *vap = ap->a_vap; 4498 4499 ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4500 4501 vattr_init_mask(vap); 4502 4503 return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 4504 ap->a_cnp->cn_cred, NULL, 0, NULL)); 4505} 4506 4507static int 4508zfs_freebsd_rmdir(ap) 4509 struct vop_rmdir_args /* { 4510 struct vnode *a_dvp; 4511 struct vnode *a_vp; 4512 struct componentname *a_cnp; 4513 } */ *ap; 4514{ 4515 struct componentname *cnp = ap->a_cnp; 4516 4517 ASSERT(cnp->cn_flags & SAVENAME); 4518 4519 return (zfs_rmdir(ap->a_dvp, cnp->cn_nameptr, NULL, cnp->cn_cred, NULL, 0)); 4520} 4521 4522static int 4523zfs_freebsd_readdir(ap) 4524 struct vop_readdir_args /* { 4525 struct vnode *a_vp; 4526 struct uio *a_uio; 4527 struct ucred *a_cred; 4528 int *a_eofflag; 4529 int *a_ncookies; 4530 u_long **a_cookies; 4531 } */ *ap; 4532{ 4533 4534 return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 4535 ap->a_ncookies, ap->a_cookies)); 4536} 4537 4538static int 4539zfs_freebsd_fsync(ap) 4540 struct vop_fsync_args /* { 4541 struct vnode *a_vp; 4542 int a_waitfor; 4543 struct thread *a_td; 4544 } */ *ap; 4545{ 4546 4547 vop_stdfsync(ap); 4548 return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); 4549} 4550 4551static int 4552zfs_freebsd_getattr(ap) 4553 struct vop_getattr_args /* { 4554 struct vnode *a_vp; 4555 struct vattr *a_vap; 4556 struct ucred *a_cred; 4557 struct thread *a_td; 4558 } */ *ap; 4559{ 4560 vattr_t *vap = ap->a_vap; 4561 xvattr_t xvap; 4562 u_long fflags = 0; 4563 int error; 4564 4565 xva_init(&xvap); 4566 xvap.xva_vattr = *vap; 4567 xvap.xva_vattr.va_mask |= AT_XVATTR; 4568 4569 /* Convert chflags into ZFS-type flags. */ 4570 /* XXX: what about SF_SETTABLE?. */ 4571 XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 4572 XVA_SET_REQ(&xvap, XAT_APPENDONLY); 4573 XVA_SET_REQ(&xvap, XAT_NOUNLINK); 4574 XVA_SET_REQ(&xvap, XAT_NODUMP); 4575 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 4576 if (error != 0) 4577 return (error); 4578 4579 /* Convert ZFS xattr into chflags. */ 4580#define FLAG_CHECK(fflag, xflag, xfield) do { \ 4581 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 4582 fflags |= (fflag); \ 4583} while (0) 4584 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 4585 xvap.xva_xoptattrs.xoa_immutable); 4586 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 4587 xvap.xva_xoptattrs.xoa_appendonly); 4588 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 4589 xvap.xva_xoptattrs.xoa_nounlink); 4590 FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 4591 xvap.xva_xoptattrs.xoa_nodump); 4592#undef FLAG_CHECK 4593 *vap = xvap.xva_vattr; 4594 vap->va_flags = fflags; 4595 return (0); 4596} 4597 4598static int 4599zfs_freebsd_setattr(ap) 4600 struct vop_setattr_args /* { 4601 struct vnode *a_vp; 4602 struct vattr *a_vap; 4603 struct ucred *a_cred; 4604 struct thread *a_td; 4605 } */ *ap; 4606{ 4607 vnode_t *vp = ap->a_vp; 4608 vattr_t *vap = ap->a_vap; 4609 cred_t *cred = ap->a_cred; 4610 xvattr_t xvap; 4611 u_long fflags; 4612 uint64_t zflags; 4613 4614 vattr_init_mask(vap); 4615 vap->va_mask &= ~AT_NOSET; 4616 4617 xva_init(&xvap); 4618 xvap.xva_vattr = *vap; 4619 4620 zflags = VTOZ(vp)->z_phys->zp_flags; 4621 4622 if (vap->va_flags != VNOVAL) { 4623 zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 4624 int error; 4625 4626 if (zfsvfs->z_use_fuids == B_FALSE) 4627 return (EOPNOTSUPP); 4628 4629 fflags = vap->va_flags; 4630 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_NODUMP)) != 0) 4631 return (EOPNOTSUPP); 4632 /* 4633 * Unprivileged processes are not permitted to unset system 4634 * flags, or modify flags if any system flags are set. 4635 * Privileged non-jail processes may not modify system flags 4636 * if securelevel > 0 and any existing system flags are set. 4637 * Privileged jail processes behave like privileged non-jail 4638 * processes if the security.jail.chflags_allowed sysctl is 4639 * is non-zero; otherwise, they behave like unprivileged 4640 * processes. 4641 */ 4642 if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 4643 priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) { 4644 if (zflags & 4645 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 4646 error = securelevel_gt(cred, 0); 4647 if (error != 0) 4648 return (error); 4649 } 4650 } else { 4651 /* 4652 * Callers may only modify the file flags on objects they 4653 * have VADMIN rights for. 4654 */ 4655 if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0) 4656 return (error); 4657 if (zflags & 4658 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 4659 return (EPERM); 4660 } 4661 if (fflags & 4662 (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 4663 return (EPERM); 4664 } 4665 } 4666 4667#define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 4668 if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 4669 ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 4670 XVA_SET_REQ(&xvap, (xflag)); \ 4671 (xfield) = ((fflags & (fflag)) != 0); \ 4672 } \ 4673} while (0) 4674 /* Convert chflags into ZFS-type flags. */ 4675 /* XXX: what about SF_SETTABLE?. */ 4676 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 4677 xvap.xva_xoptattrs.xoa_immutable); 4678 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 4679 xvap.xva_xoptattrs.xoa_appendonly); 4680 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 4681 xvap.xva_xoptattrs.xoa_nounlink); 4682 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 4683 xvap.xva_xoptattrs.xoa_nodump); 4684#undef FLAG_CHANGE 4685 } 4686 return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL)); 4687} 4688 4689static int 4690zfs_freebsd_rename(ap) 4691 struct vop_rename_args /* { 4692 struct vnode *a_fdvp; 4693 struct vnode *a_fvp; 4694 struct componentname *a_fcnp; 4695 struct vnode *a_tdvp; 4696 struct vnode *a_tvp; 4697 struct componentname *a_tcnp; 4698 } */ *ap; 4699{ 4700 vnode_t *fdvp = ap->a_fdvp; 4701 vnode_t *fvp = ap->a_fvp; 4702 vnode_t *tdvp = ap->a_tdvp; 4703 vnode_t *tvp = ap->a_tvp; 4704 int error; 4705 4706 ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 4707 ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 4708 4709 error = zfs_rename(fdvp, ap->a_fcnp->cn_nameptr, tdvp, 4710 ap->a_tcnp->cn_nameptr, ap->a_fcnp->cn_cred, NULL, 0); 4711 4712 if (tdvp == tvp) 4713 VN_RELE(tdvp); 4714 else 4715 VN_URELE(tdvp); 4716 if (tvp) 4717 VN_URELE(tvp); 4718 VN_RELE(fdvp); 4719 VN_RELE(fvp); 4720 4721 return (error); 4722} 4723 4724static int 4725zfs_freebsd_symlink(ap) 4726 struct vop_symlink_args /* { 4727 struct vnode *a_dvp; 4728 struct vnode **a_vpp; 4729 struct componentname *a_cnp; 4730 struct vattr *a_vap; 4731 char *a_target; 4732 } */ *ap; 4733{ 4734 struct componentname *cnp = ap->a_cnp; 4735 vattr_t *vap = ap->a_vap; 4736 4737 ASSERT(cnp->cn_flags & SAVENAME); 4738 4739 vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 4740 vattr_init_mask(vap); 4741 4742 return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 4743 ap->a_target, cnp->cn_cred, cnp->cn_thread)); 4744} 4745 4746static int 4747zfs_freebsd_readlink(ap) 4748 struct vop_readlink_args /* { 4749 struct vnode *a_vp; 4750 struct uio *a_uio; 4751 struct ucred *a_cred; 4752 } */ *ap; 4753{ 4754 4755 return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 4756} 4757 4758static int 4759zfs_freebsd_link(ap) 4760 struct vop_link_args /* { 4761 struct vnode *a_tdvp; 4762 struct vnode *a_vp; 4763 struct componentname *a_cnp; 4764 } */ *ap; 4765{ 4766 struct componentname *cnp = ap->a_cnp; 4767 4768 ASSERT(cnp->cn_flags & SAVENAME); 4769 4770 return (zfs_link(ap->a_tdvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); 4771} 4772 4773static int 4774zfs_freebsd_inactive(ap) 4775 struct vop_inactive_args /* { 4776 struct vnode *a_vp; 4777 struct thread *a_td; 4778 } */ *ap; 4779{ 4780 vnode_t *vp = ap->a_vp; 4781 4782 zfs_inactive(vp, ap->a_td->td_ucred, NULL); 4783 return (0); 4784} 4785 4786static void 4787zfs_reclaim_complete(void *arg, int pending) 4788{ 4789 znode_t *zp = arg; 4790 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4791 4792 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4793 if (zp->z_dbuf != NULL) { 4794 ZFS_OBJ_HOLD_ENTER(zfsvfs, zp->z_id); 4795 zfs_znode_dmu_fini(zp); 4796 ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); 4797 } 4798 zfs_znode_free(zp); 4799 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4800 /* 4801 * If the file system is being unmounted, there is a process waiting 4802 * for us, wake it up. 4803 */ 4804 if (zfsvfs->z_unmounted) 4805 wakeup_one(zfsvfs); 4806} 4807 4808static int 4809zfs_freebsd_reclaim(ap) 4810 struct vop_reclaim_args /* { 4811 struct vnode *a_vp; 4812 struct thread *a_td; 4813 } */ *ap; 4814{ 4815 vnode_t *vp = ap->a_vp; 4816 znode_t *zp = VTOZ(vp); 4817 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4818 4819 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4820 4821 ASSERT(zp != NULL); 4822 4823 /* 4824 * Destroy the vm object and flush associated pages. 4825 */ 4826 vnode_destroy_vobject(vp); 4827 4828 mutex_enter(&zp->z_lock); 4829 ASSERT(zp->z_phys != NULL); 4830 zp->z_vnode = NULL; 4831 mutex_exit(&zp->z_lock); 4832 4833 if (zp->z_unlinked) 4834 ; /* Do nothing. */ 4835 else if (zp->z_dbuf == NULL) 4836 zfs_znode_free(zp); 4837 else /* if (!zp->z_unlinked && zp->z_dbuf != NULL) */ { 4838 int locked; 4839 4840 locked = MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)) ? 2 : 4841 ZFS_OBJ_HOLD_TRYENTER(zfsvfs, zp->z_id); 4842 if (locked == 0) { 4843 /* 4844 * Lock can't be obtained due to deadlock possibility, 4845 * so defer znode destruction. 4846 */ 4847 TASK_INIT(&zp->z_task, 0, zfs_reclaim_complete, zp); 4848 taskqueue_enqueue(taskqueue_thread, &zp->z_task); 4849 } else { 4850 zfs_znode_dmu_fini(zp); 4851 if (locked == 1) 4852 ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); 4853 zfs_znode_free(zp); 4854 } 4855 } 4856 VI_LOCK(vp); 4857 vp->v_data = NULL; 4858 ASSERT(vp->v_holdcnt >= 1); 4859 VI_UNLOCK(vp); 4860 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4861 return (0); 4862} 4863 4864static int 4865zfs_freebsd_fid(ap) 4866 struct vop_fid_args /* { 4867 struct vnode *a_vp; 4868 struct fid *a_fid; 4869 } */ *ap; 4870{ 4871 4872 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 4873} 4874 4875static int 4876zfs_freebsd_pathconf(ap) 4877 struct vop_pathconf_args /* { 4878 struct vnode *a_vp; 4879 int a_name; 4880 register_t *a_retval; 4881 } */ *ap; 4882{ 4883 ulong_t val; 4884 int error; 4885 4886 error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); 4887 if (error == 0) 4888 *ap->a_retval = val; 4889 else if (error == EOPNOTSUPP) 4890 error = vop_stdpathconf(ap); 4891 return (error); 4892} 4893 4894static int 4895zfs_freebsd_fifo_pathconf(ap) 4896 struct vop_pathconf_args /* { 4897 struct vnode *a_vp; 4898 int a_name; 4899 register_t *a_retval; 4900 } */ *ap; 4901{ 4902 4903 switch (ap->a_name) { 4904 case _PC_ACL_EXTENDED: 4905 case _PC_ACL_NFS4: 4906 case _PC_ACL_PATH_MAX: 4907 case _PC_MAC_PRESENT: 4908 return (zfs_freebsd_pathconf(ap)); 4909 default: 4910 return (fifo_specops.vop_pathconf(ap)); 4911 } 4912} 4913 4914/* 4915 * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 4916 * extended attribute name: 4917 * 4918 * NAMESPACE PREFIX 4919 * system freebsd:system: 4920 * user (none, can be used to access ZFS fsattr(5) attributes 4921 * created on Solaris) 4922 */ 4923static int 4924zfs_create_attrname(int attrnamespace, const char *name, char *attrname, 4925 size_t size) 4926{ 4927 const char *namespace, *prefix, *suffix; 4928 4929 /* We don't allow '/' character in attribute name. */ 4930 if (strchr(name, '/') != NULL) 4931 return (EINVAL); 4932 /* We don't allow attribute names that start with "freebsd:" string. */ 4933 if (strncmp(name, "freebsd:", 8) == 0) 4934 return (EINVAL); 4935 4936 bzero(attrname, size); 4937 4938 switch (attrnamespace) { 4939 case EXTATTR_NAMESPACE_USER: 4940#if 0 4941 prefix = "freebsd:"; 4942 namespace = EXTATTR_NAMESPACE_USER_STRING; 4943 suffix = ":"; 4944#else 4945 /* 4946 * This is the default namespace by which we can access all 4947 * attributes created on Solaris. 4948 */ 4949 prefix = namespace = suffix = ""; 4950#endif 4951 break; 4952 case EXTATTR_NAMESPACE_SYSTEM: 4953 prefix = "freebsd:"; 4954 namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 4955 suffix = ":"; 4956 break; 4957 case EXTATTR_NAMESPACE_EMPTY: 4958 default: 4959 return (EINVAL); 4960 } 4961 if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 4962 name) >= size) { 4963 return (ENAMETOOLONG); 4964 } 4965 return (0); 4966} 4967 4968/* 4969 * Vnode operating to retrieve a named extended attribute. 4970 */ 4971static int 4972zfs_getextattr(struct vop_getextattr_args *ap) 4973/* 4974vop_getextattr { 4975 IN struct vnode *a_vp; 4976 IN int a_attrnamespace; 4977 IN const char *a_name; 4978 INOUT struct uio *a_uio; 4979 OUT size_t *a_size; 4980 IN struct ucred *a_cred; 4981 IN struct thread *a_td; 4982}; 4983*/ 4984{ 4985 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 4986 struct thread *td = ap->a_td; 4987 struct nameidata nd; 4988 char attrname[255]; 4989 struct vattr va; 4990 vnode_t *xvp = NULL, *vp; 4991 int error, flags; 4992 4993 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 4994 ap->a_cred, ap->a_td, VREAD); 4995 if (error != 0) 4996 return (error); 4997 4998 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 4999 sizeof(attrname)); 5000 if (error != 0) 5001 return (error); 5002 5003 ZFS_ENTER(zfsvfs); 5004 5005 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5006 LOOKUP_XATTR); 5007 if (error != 0) { 5008 ZFS_EXIT(zfsvfs); 5009 return (error); 5010 } 5011 5012 flags = FREAD; 5013 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, attrname, 5014 xvp, td); 5015 error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL); 5016 vp = nd.ni_vp; 5017 NDFREE(&nd, NDF_ONLY_PNBUF); 5018 if (error != 0) { 5019 ZFS_EXIT(zfsvfs); 5020 if (error == ENOENT) 5021 error = ENOATTR; 5022 return (error); 5023 } 5024 5025 if (ap->a_size != NULL) { 5026 error = VOP_GETATTR(vp, &va, ap->a_cred); 5027 if (error == 0) 5028 *ap->a_size = (size_t)va.va_size; 5029 } else if (ap->a_uio != NULL) 5030 error = VOP_READ(vp, ap->a_uio, IO_UNIT | IO_SYNC, ap->a_cred); 5031 5032 VOP_UNLOCK(vp, 0); 5033 vn_close(vp, flags, ap->a_cred, td); 5034 ZFS_EXIT(zfsvfs); 5035 5036 return (error); 5037} 5038 5039/* 5040 * Vnode operation to remove a named attribute. 5041 */ 5042int 5043zfs_deleteextattr(struct vop_deleteextattr_args *ap) 5044/* 5045vop_deleteextattr { 5046 IN struct vnode *a_vp; 5047 IN int a_attrnamespace; 5048 IN const char *a_name; 5049 IN struct ucred *a_cred; 5050 IN struct thread *a_td; 5051}; 5052*/ 5053{ 5054 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5055 struct thread *td = ap->a_td; 5056 struct nameidata nd; 5057 char attrname[255]; 5058 struct vattr va; 5059 vnode_t *xvp = NULL, *vp; 5060 int error, flags; 5061 5062 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5063 ap->a_cred, ap->a_td, VWRITE); 5064 if (error != 0) 5065 return (error); 5066 5067 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5068 sizeof(attrname)); 5069 if (error != 0) 5070 return (error); 5071 5072 ZFS_ENTER(zfsvfs); 5073 5074 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5075 LOOKUP_XATTR); 5076 if (error != 0) { 5077 ZFS_EXIT(zfsvfs); 5078 return (error); 5079 } 5080 5081 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF | MPSAFE, 5082 UIO_SYSSPACE, attrname, xvp, td); 5083 error = namei(&nd); 5084 vp = nd.ni_vp; 5085 NDFREE(&nd, NDF_ONLY_PNBUF); 5086 if (error != 0) { 5087 ZFS_EXIT(zfsvfs); 5088 if (error == ENOENT) 5089 error = ENOATTR; 5090 return (error); 5091 } 5092 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 5093 5094 vput(nd.ni_dvp); 5095 if (vp == nd.ni_dvp) 5096 vrele(vp); 5097 else 5098 vput(vp); 5099 ZFS_EXIT(zfsvfs); 5100 5101 return (error); 5102} 5103 5104/* 5105 * Vnode operation to set a named attribute. 5106 */ 5107static int 5108zfs_setextattr(struct vop_setextattr_args *ap) 5109/* 5110vop_setextattr { 5111 IN struct vnode *a_vp; 5112 IN int a_attrnamespace; 5113 IN const char *a_name; 5114 INOUT struct uio *a_uio; 5115 IN struct ucred *a_cred; 5116 IN struct thread *a_td; 5117}; 5118*/ 5119{ 5120 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5121 struct thread *td = ap->a_td; 5122 struct nameidata nd; 5123 char attrname[255]; 5124 struct vattr va; 5125 vnode_t *xvp = NULL, *vp; 5126 int error, flags; 5127 5128 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5129 ap->a_cred, ap->a_td, VWRITE); 5130 if (error != 0) 5131 return (error); 5132 5133 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5134 sizeof(attrname)); 5135 if (error != 0) 5136 return (error); 5137 5138 ZFS_ENTER(zfsvfs); 5139 5140 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5141 LOOKUP_XATTR | CREATE_XATTR_DIR); 5142 if (error != 0) { 5143 ZFS_EXIT(zfsvfs); 5144 return (error); 5145 } 5146 5147 flags = FFLAGS(O_WRONLY | O_CREAT); 5148 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, attrname, 5149 xvp, td); 5150 error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL); 5151 vp = nd.ni_vp; 5152 NDFREE(&nd, NDF_ONLY_PNBUF); 5153 if (error != 0) { 5154 ZFS_EXIT(zfsvfs); 5155 return (error); 5156 } 5157 5158 VATTR_NULL(&va); 5159 va.va_size = 0; 5160 error = VOP_SETATTR(vp, &va, ap->a_cred); 5161 if (error == 0) 5162 VOP_WRITE(vp, ap->a_uio, IO_UNIT | IO_SYNC, ap->a_cred); 5163 5164 VOP_UNLOCK(vp, 0); 5165 vn_close(vp, flags, ap->a_cred, td); 5166 ZFS_EXIT(zfsvfs); 5167 5168 return (error); 5169} 5170 5171/* 5172 * Vnode operation to retrieve extended attributes on a vnode. 5173 */ 5174static int 5175zfs_listextattr(struct vop_listextattr_args *ap) 5176/* 5177vop_listextattr { 5178 IN struct vnode *a_vp; 5179 IN int a_attrnamespace; 5180 INOUT struct uio *a_uio; 5181 OUT size_t *a_size; 5182 IN struct ucred *a_cred; 5183 IN struct thread *a_td; 5184}; 5185*/ 5186{ 5187 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5188 struct thread *td = ap->a_td; 5189 struct nameidata nd; 5190 char attrprefix[16]; 5191 u_char dirbuf[sizeof(struct dirent)]; 5192 struct dirent *dp; 5193 struct iovec aiov; 5194 struct uio auio, *uio = ap->a_uio; 5195 size_t *sizep = ap->a_size; 5196 size_t plen; 5197 vnode_t *xvp = NULL, *vp; 5198 int done, error, eof, pos; 5199 5200 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5201 ap->a_cred, ap->a_td, VREAD); 5202 if (error != 0) 5203 return (error); 5204 5205 error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 5206 sizeof(attrprefix)); 5207 if (error != 0) 5208 return (error); 5209 plen = strlen(attrprefix); 5210 5211 ZFS_ENTER(zfsvfs); 5212 5213 if (sizep != NULL) 5214 *sizep = 0; 5215 5216 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5217 LOOKUP_XATTR); 5218 if (error != 0) { 5219 ZFS_EXIT(zfsvfs); 5220 /* 5221 * ENOATTR means that the EA directory does not yet exist, 5222 * i.e. there are no extended attributes there. 5223 */ 5224 if (error == ENOATTR) 5225 error = 0; 5226 return (error); 5227 } 5228 5229 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE, 5230 UIO_SYSSPACE, ".", xvp, td); 5231 error = namei(&nd); 5232 vp = nd.ni_vp; 5233 NDFREE(&nd, NDF_ONLY_PNBUF); 5234 if (error != 0) { 5235 ZFS_EXIT(zfsvfs); 5236 return (error); 5237 } 5238 5239 auio.uio_iov = &aiov; 5240 auio.uio_iovcnt = 1; 5241 auio.uio_segflg = UIO_SYSSPACE; 5242 auio.uio_td = td; 5243 auio.uio_rw = UIO_READ; 5244 auio.uio_offset = 0; 5245 5246 do { 5247 u_char nlen; 5248 5249 aiov.iov_base = (void *)dirbuf; 5250 aiov.iov_len = sizeof(dirbuf); 5251 auio.uio_resid = sizeof(dirbuf); 5252 error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 5253 done = sizeof(dirbuf) - auio.uio_resid; 5254 if (error != 0) 5255 break; 5256 for (pos = 0; pos < done;) { 5257 dp = (struct dirent *)(dirbuf + pos); 5258 pos += dp->d_reclen; 5259 /* 5260 * XXX: Temporarily we also accept DT_UNKNOWN, as this 5261 * is what we get when attribute was created on Solaris. 5262 */ 5263 if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 5264 continue; 5265 if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) 5266 continue; 5267 else if (strncmp(dp->d_name, attrprefix, plen) != 0) 5268 continue; 5269 nlen = dp->d_namlen - plen; 5270 if (sizep != NULL) 5271 *sizep += 1 + nlen; 5272 else if (uio != NULL) { 5273 /* 5274 * Format of extattr name entry is one byte for 5275 * length and the rest for name. 5276 */ 5277 error = uiomove(&nlen, 1, uio->uio_rw, uio); 5278 if (error == 0) { 5279 error = uiomove(dp->d_name + plen, nlen, 5280 uio->uio_rw, uio); 5281 } 5282 if (error != 0) 5283 break; 5284 } 5285 } 5286 } while (!eof && error == 0); 5287 5288 vput(vp); 5289 ZFS_EXIT(zfsvfs); 5290 5291 return (error); 5292} 5293 5294int 5295zfs_freebsd_getacl(ap) 5296 struct vop_getacl_args /* { 5297 struct vnode *vp; 5298 acl_type_t type; 5299 struct acl *aclp; 5300 struct ucred *cred; 5301 struct thread *td; 5302 } */ *ap; 5303{ 5304 int error; 5305 vsecattr_t vsecattr; 5306 5307 if (ap->a_type != ACL_TYPE_NFS4) 5308 return (EINVAL); 5309 5310 vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 5311 if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL)) 5312 return (error); 5313 5314 error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt); 5315 if (vsecattr.vsa_aclentp != NULL) 5316 kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 5317 5318 return (error); 5319} 5320 5321int 5322zfs_freebsd_setacl(ap) 5323 struct vop_setacl_args /* { 5324 struct vnode *vp; 5325 acl_type_t type; 5326 struct acl *aclp; 5327 struct ucred *cred; 5328 struct thread *td; 5329 } */ *ap; 5330{ 5331 int error; 5332 vsecattr_t vsecattr; 5333 int aclbsize; /* size of acl list in bytes */ 5334 aclent_t *aaclp; 5335 5336 if (ap->a_type != ACL_TYPE_NFS4) 5337 return (EINVAL); 5338 5339 if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 5340 return (EINVAL); 5341 5342 /* 5343 * With NFSv4 ACLs, chmod(2) may need to add additional entries, 5344 * splitting every entry into two and appending "canonical six" 5345 * entries at the end. Don't allow for setting an ACL that would 5346 * cause chmod(2) to run out of ACL entries. 5347 */ 5348 if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 5349 return (ENOSPC); 5350 5351 error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 5352 if (error != 0) 5353 return (error); 5354 5355 vsecattr.vsa_mask = VSA_ACE; 5356 aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t); 5357 vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 5358 aaclp = vsecattr.vsa_aclentp; 5359 vsecattr.vsa_aclentsz = aclbsize; 5360 5361 aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 5362 error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL); 5363 kmem_free(aaclp, aclbsize); 5364 5365 return (error); 5366} 5367 5368int 5369zfs_freebsd_aclcheck(ap) 5370 struct vop_aclcheck_args /* { 5371 struct vnode *vp; 5372 acl_type_t type; 5373 struct acl *aclp; 5374 struct ucred *cred; 5375 struct thread *td; 5376 } */ *ap; 5377{ 5378 5379 return (EOPNOTSUPP); 5380} 5381 5382struct vop_vector zfs_vnodeops; 5383struct vop_vector zfs_fifoops; 5384struct vop_vector zfs_shareops; 5385 5386struct vop_vector zfs_vnodeops = { 5387 .vop_default = &default_vnodeops, 5388 .vop_inactive = zfs_freebsd_inactive, 5389 .vop_reclaim = zfs_freebsd_reclaim, 5390 .vop_access = zfs_freebsd_access, 5391#ifdef FREEBSD_NAMECACHE 5392 .vop_lookup = vfs_cache_lookup, 5393 .vop_cachedlookup = zfs_freebsd_lookup, 5394#else 5395 .vop_lookup = zfs_freebsd_lookup, 5396#endif 5397 .vop_getattr = zfs_freebsd_getattr, 5398 .vop_setattr = zfs_freebsd_setattr, 5399 .vop_create = zfs_freebsd_create, 5400 .vop_mknod = zfs_freebsd_create, 5401 .vop_mkdir = zfs_freebsd_mkdir, 5402 .vop_readdir = zfs_freebsd_readdir, 5403 .vop_fsync = zfs_freebsd_fsync, 5404 .vop_open = zfs_freebsd_open, 5405 .vop_close = zfs_freebsd_close, 5406 .vop_rmdir = zfs_freebsd_rmdir, 5407 .vop_ioctl = zfs_freebsd_ioctl, 5408 .vop_link = zfs_freebsd_link, 5409 .vop_symlink = zfs_freebsd_symlink, 5410 .vop_readlink = zfs_freebsd_readlink, 5411 .vop_read = zfs_freebsd_read, 5412 .vop_write = zfs_freebsd_write, 5413 .vop_remove = zfs_freebsd_remove, 5414 .vop_rename = zfs_freebsd_rename, 5415 .vop_pathconf = zfs_freebsd_pathconf, 5416 .vop_bmap = VOP_EOPNOTSUPP, 5417 .vop_fid = zfs_freebsd_fid, 5418 .vop_getextattr = zfs_getextattr, 5419 .vop_deleteextattr = zfs_deleteextattr, 5420 .vop_setextattr = zfs_setextattr, 5421 .vop_listextattr = zfs_listextattr, 5422 .vop_getacl = zfs_freebsd_getacl, 5423 .vop_setacl = zfs_freebsd_setacl, 5424 .vop_aclcheck = zfs_freebsd_aclcheck, 5425 .vop_getpages = zfs_freebsd_getpages, 5426}; 5427 5428struct vop_vector zfs_fifoops = { 5429 .vop_default = &fifo_specops, 5430 .vop_fsync = zfs_freebsd_fsync, 5431 .vop_access = zfs_freebsd_access, 5432 .vop_getattr = zfs_freebsd_getattr, 5433 .vop_inactive = zfs_freebsd_inactive, 5434 .vop_read = VOP_PANIC, 5435 .vop_reclaim = zfs_freebsd_reclaim, 5436 .vop_setattr = zfs_freebsd_setattr, 5437 .vop_write = VOP_PANIC, 5438 .vop_pathconf = zfs_freebsd_fifo_pathconf, 5439 .vop_fid = zfs_freebsd_fid, 5440 .vop_getacl = zfs_freebsd_getacl, 5441 .vop_setacl = zfs_freebsd_setacl, 5442 .vop_aclcheck = zfs_freebsd_aclcheck, 5443}; 5444 5445/* 5446 * special share hidden files vnode operations template 5447 */ 5448struct vop_vector zfs_shareops = { 5449 .vop_default = &default_vnodeops, 5450 .vop_access = zfs_freebsd_access, 5451 .vop_inactive = zfs_freebsd_inactive, 5452 .vop_reclaim = zfs_freebsd_reclaim, 5453 .vop_fid = zfs_freebsd_fid, 5454 .vop_pathconf = zfs_freebsd_pathconf, 5455}; 5456