zfs_vnops.c revision 210470
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22209962Smm * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23168404Spjd * Use is subject to license terms. 24168404Spjd */ 25168404Spjd 26169195Spjd/* Portions Copyright 2007 Jeremy Teo */ 27169195Spjd 28168404Spjd#include <sys/types.h> 29168404Spjd#include <sys/param.h> 30168404Spjd#include <sys/time.h> 31168404Spjd#include <sys/systm.h> 32168404Spjd#include <sys/sysmacros.h> 33168404Spjd#include <sys/resource.h> 34168404Spjd#include <sys/vfs.h> 35168404Spjd#include <sys/vnode.h> 36168404Spjd#include <sys/file.h> 37168404Spjd#include <sys/stat.h> 38168404Spjd#include <sys/kmem.h> 39168404Spjd#include <sys/taskq.h> 40168404Spjd#include <sys/uio.h> 41168404Spjd#include <sys/atomic.h> 42168404Spjd#include <sys/namei.h> 43168404Spjd#include <sys/mman.h> 44168404Spjd#include <sys/cmn_err.h> 45168404Spjd#include <sys/errno.h> 46168404Spjd#include <sys/unistd.h> 47168404Spjd#include <sys/zfs_dir.h> 48168404Spjd#include <sys/zfs_ioctl.h> 49168404Spjd#include <sys/fs/zfs.h> 50168404Spjd#include <sys/dmu.h> 51168404Spjd#include <sys/spa.h> 52168404Spjd#include <sys/txg.h> 53168404Spjd#include <sys/dbuf.h> 54168404Spjd#include <sys/zap.h> 55168404Spjd#include <sys/dirent.h> 56168962Spjd#include <sys/policy.h> 57168962Spjd#include <sys/sunddi.h> 58168404Spjd#include <sys/filio.h> 59209962Smm#include <sys/sid.h> 60168404Spjd#include <sys/zfs_ctldir.h> 61185029Spjd#include <sys/zfs_fuid.h> 62168404Spjd#include <sys/dnlc.h> 63168404Spjd#include <sys/zfs_rlock.h> 64185029Spjd#include <sys/extdirent.h> 65185029Spjd#include <sys/kidmap.h> 66168404Spjd#include <sys/bio.h> 67168404Spjd#include <sys/buf.h> 68168404Spjd#include <sys/sf_buf.h> 69168404Spjd#include <sys/sched.h> 70192800Strasz#include <sys/acl.h> 71168404Spjd 72168404Spjd/* 73168404Spjd * Programming rules. 74168404Spjd * 75168404Spjd * Each vnode op performs some logical unit of work. To do this, the ZPL must 76168404Spjd * properly lock its in-core state, create a DMU transaction, do the work, 77168404Spjd * record this work in the intent log (ZIL), commit the DMU transaction, 78185029Spjd * and wait for the intent log to commit if it is a synchronous operation. 79185029Spjd * Moreover, the vnode ops must work in both normal and log replay context. 80168404Spjd * The ordering of events is important to avoid deadlocks and references 81168404Spjd * to freed memory. The example below illustrates the following Big Rules: 82168404Spjd * 83168404Spjd * (1) A check must be made in each zfs thread for a mounted file system. 84168404Spjd * This is done avoiding races using ZFS_ENTER(zfsvfs). 85185029Spjd * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 86185029Spjd * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 87185029Spjd * can return EIO from the calling function. 88168404Spjd * 89168404Spjd * (2) VN_RELE() should always be the last thing except for zil_commit() 90168404Spjd * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 91168404Spjd * First, if it's the last reference, the vnode/znode 92168404Spjd * can be freed, so the zp may point to freed memory. Second, the last 93168404Spjd * reference will call zfs_zinactive(), which may induce a lot of work -- 94168404Spjd * pushing cached pages (which acquires range locks) and syncing out 95168404Spjd * cached atime changes. Third, zfs_zinactive() may require a new tx, 96168404Spjd * which could deadlock the system if you were already holding one. 97191900Skmacy * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 98168404Spjd * 99168404Spjd * (3) All range locks must be grabbed before calling dmu_tx_assign(), 100168404Spjd * as they can span dmu_tx_assign() calls. 101168404Spjd * 102209962Smm * (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign(). 103168404Spjd * This is critical because we don't want to block while holding locks. 104168404Spjd * Note, in particular, that if a lock is sometimes acquired before 105168404Spjd * the tx assigns, and sometimes after (e.g. z_lock), then failing to 106168404Spjd * use a non-blocking assign can deadlock the system. The scenario: 107168404Spjd * 108168404Spjd * Thread A has grabbed a lock before calling dmu_tx_assign(). 109168404Spjd * Thread B is in an already-assigned tx, and blocks for this lock. 110168404Spjd * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 111168404Spjd * forever, because the previous txg can't quiesce until B's tx commits. 112168404Spjd * 113168404Spjd * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 114168404Spjd * then drop all locks, call dmu_tx_wait(), and try again. 115168404Spjd * 116168404Spjd * (5) If the operation succeeded, generate the intent log entry for it 117168404Spjd * before dropping locks. This ensures that the ordering of events 118168404Spjd * in the intent log matches the order in which they actually occurred. 119209962Smm * During ZIL replay the zfs_log_* functions will update the sequence 120209962Smm * number to indicate the zil transaction has replayed. 121168404Spjd * 122168404Spjd * (6) At the end of each vnode op, the DMU tx must always commit, 123168404Spjd * regardless of whether there were any errors. 124168404Spjd * 125168404Spjd * (7) After dropping all locks, invoke zil_commit(zilog, seq, foid) 126168404Spjd * to ensure that synchronous semantics are provided when necessary. 127168404Spjd * 128168404Spjd * In general, this is how things should be ordered in each vnode op: 129168404Spjd * 130168404Spjd * ZFS_ENTER(zfsvfs); // exit if unmounted 131168404Spjd * top: 132168404Spjd * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 133168404Spjd * rw_enter(...); // grab any other locks you need 134168404Spjd * tx = dmu_tx_create(...); // get DMU tx 135168404Spjd * dmu_tx_hold_*(); // hold each object you might modify 136209962Smm * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign 137168404Spjd * if (error) { 138168404Spjd * rw_exit(...); // drop locks 139168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 140168404Spjd * VN_RELE(...); // release held vnodes 141209962Smm * if (error == ERESTART) { 142168404Spjd * dmu_tx_wait(tx); 143168404Spjd * dmu_tx_abort(tx); 144168404Spjd * goto top; 145168404Spjd * } 146168404Spjd * dmu_tx_abort(tx); // abort DMU tx 147168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 148168404Spjd * return (error); // really out of space 149168404Spjd * } 150168404Spjd * error = do_real_work(); // do whatever this VOP does 151168404Spjd * if (error == 0) 152168404Spjd * zfs_log_*(...); // on success, make ZIL entry 153168404Spjd * dmu_tx_commit(tx); // commit DMU tx -- error or not 154168404Spjd * rw_exit(...); // drop locks 155168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 156168404Spjd * VN_RELE(...); // release held vnodes 157168404Spjd * zil_commit(zilog, seq, foid); // synchronous when necessary 158168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 159168404Spjd * return (error); // done, report error 160168404Spjd */ 161185029Spjd 162168404Spjd/* ARGSUSED */ 163168404Spjdstatic int 164185029Spjdzfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 165168404Spjd{ 166168962Spjd znode_t *zp = VTOZ(*vpp); 167209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 168168404Spjd 169209962Smm ZFS_ENTER(zfsvfs); 170209962Smm ZFS_VERIFY_ZP(zp); 171209962Smm 172185029Spjd if ((flag & FWRITE) && (zp->z_phys->zp_flags & ZFS_APPENDONLY) && 173185029Spjd ((flag & FAPPEND) == 0)) { 174209962Smm ZFS_EXIT(zfsvfs); 175185029Spjd return (EPERM); 176185029Spjd } 177185029Spjd 178185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 179185029Spjd ZTOV(zp)->v_type == VREG && 180185029Spjd !(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) && 181209962Smm zp->z_phys->zp_size > 0) { 182209962Smm if (fs_vscan(*vpp, cr, 0) != 0) { 183209962Smm ZFS_EXIT(zfsvfs); 184185029Spjd return (EACCES); 185209962Smm } 186209962Smm } 187185029Spjd 188168404Spjd /* Keep a count of the synchronous opens in the znode */ 189168962Spjd if (flag & (FSYNC | FDSYNC)) 190168404Spjd atomic_inc_32(&zp->z_sync_cnt); 191185029Spjd 192209962Smm ZFS_EXIT(zfsvfs); 193168404Spjd return (0); 194168404Spjd} 195168404Spjd 196168404Spjd/* ARGSUSED */ 197168404Spjdstatic int 198185029Spjdzfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 199185029Spjd caller_context_t *ct) 200168404Spjd{ 201168962Spjd znode_t *zp = VTOZ(vp); 202209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 203168404Spjd 204210470Smm /* 205210470Smm * Clean up any locks held by this process on the vp. 206210470Smm */ 207210470Smm cleanlocks(vp, ddi_get_pid(), 0); 208210470Smm cleanshares(vp, ddi_get_pid()); 209210470Smm 210209962Smm ZFS_ENTER(zfsvfs); 211209962Smm ZFS_VERIFY_ZP(zp); 212209962Smm 213168404Spjd /* Decrement the synchronous opens in the znode */ 214185029Spjd if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 215168404Spjd atomic_dec_32(&zp->z_sync_cnt); 216168404Spjd 217185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 218185029Spjd ZTOV(zp)->v_type == VREG && 219185029Spjd !(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) && 220185029Spjd zp->z_phys->zp_size > 0) 221185029Spjd VERIFY(fs_vscan(vp, cr, 1) == 0); 222185029Spjd 223209962Smm ZFS_EXIT(zfsvfs); 224168404Spjd return (0); 225168404Spjd} 226168404Spjd 227168404Spjd/* 228168404Spjd * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 229168404Spjd * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 230168404Spjd */ 231168404Spjdstatic int 232168978Spjdzfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 233168404Spjd{ 234168404Spjd znode_t *zp = VTOZ(vp); 235168404Spjd uint64_t noff = (uint64_t)*off; /* new offset */ 236168404Spjd uint64_t file_sz; 237168404Spjd int error; 238168404Spjd boolean_t hole; 239168404Spjd 240168404Spjd file_sz = zp->z_phys->zp_size; 241168404Spjd if (noff >= file_sz) { 242168404Spjd return (ENXIO); 243168404Spjd } 244168404Spjd 245168962Spjd if (cmd == _FIO_SEEK_HOLE) 246168404Spjd hole = B_TRUE; 247168404Spjd else 248168404Spjd hole = B_FALSE; 249168404Spjd 250168404Spjd error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 251168404Spjd 252168404Spjd /* end of file? */ 253168404Spjd if ((error == ESRCH) || (noff > file_sz)) { 254168404Spjd /* 255168404Spjd * Handle the virtual hole at the end of file. 256168404Spjd */ 257168404Spjd if (hole) { 258168404Spjd *off = file_sz; 259168404Spjd return (0); 260168404Spjd } 261168404Spjd return (ENXIO); 262168404Spjd } 263168404Spjd 264168404Spjd if (noff < *off) 265168404Spjd return (error); 266168404Spjd *off = noff; 267168404Spjd return (error); 268168404Spjd} 269168404Spjd 270168404Spjd/* ARGSUSED */ 271168404Spjdstatic int 272168978Spjdzfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 273185029Spjd int *rvalp, caller_context_t *ct) 274168404Spjd{ 275168962Spjd offset_t off; 276168962Spjd int error; 277168962Spjd zfsvfs_t *zfsvfs; 278185029Spjd znode_t *zp; 279168404Spjd 280168404Spjd switch (com) { 281185029Spjd case _FIOFFS: 282168962Spjd return (0); 283168404Spjd 284168962Spjd /* 285168962Spjd * The following two ioctls are used by bfu. Faking out, 286168962Spjd * necessary to avoid bfu errors. 287168962Spjd */ 288185029Spjd case _FIOGDIO: 289185029Spjd case _FIOSDIO: 290168962Spjd return (0); 291168962Spjd 292185029Spjd case _FIO_SEEK_DATA: 293185029Spjd case _FIO_SEEK_HOLE: 294168962Spjd if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 295168962Spjd return (EFAULT); 296168962Spjd 297185029Spjd zp = VTOZ(vp); 298185029Spjd zfsvfs = zp->z_zfsvfs; 299168404Spjd ZFS_ENTER(zfsvfs); 300185029Spjd ZFS_VERIFY_ZP(zp); 301168404Spjd 302168404Spjd /* offset parameter is in/out */ 303168404Spjd error = zfs_holey(vp, com, &off); 304168404Spjd ZFS_EXIT(zfsvfs); 305168404Spjd if (error) 306168404Spjd return (error); 307168962Spjd if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 308168962Spjd return (EFAULT); 309168404Spjd return (0); 310168404Spjd } 311168404Spjd return (ENOTTY); 312168404Spjd} 313168404Spjd 314209962Smmstatic vm_page_t 315209962Smmpage_lookup(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 316209962Smm{ 317209962Smm vm_object_t obj; 318209962Smm vm_page_t pp; 319209962Smm 320209962Smm obj = vp->v_object; 321209962Smm VM_OBJECT_LOCK_ASSERT(obj, MA_OWNED); 322209962Smm 323209962Smm for (;;) { 324209962Smm if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 325209962Smm vm_page_is_valid(pp, (vm_offset_t)off, nbytes)) { 326209962Smm if (vm_page_sleep_if_busy(pp, FALSE, "zfsmwb")) 327209962Smm continue; 328209962Smm vm_page_busy(pp); 329209962Smm vm_page_lock_queues(); 330209962Smm vm_page_undirty(pp); 331209962Smm vm_page_unlock_queues(); 332209962Smm } else { 333209962Smm if (__predict_false(obj->cache != NULL)) { 334209962Smm vm_page_cache_free(obj, OFF_TO_IDX(start), 335209962Smm OFF_TO_IDX(start) + 1); 336209962Smm } 337209962Smm pp = NULL; 338209962Smm } 339209962Smm break; 340209962Smm } 341209962Smm return (pp); 342209962Smm} 343209962Smm 344209962Smmstatic void 345209962Smmpage_unlock(vm_page_t pp) 346209962Smm{ 347209962Smm 348209962Smm vm_page_wakeup(pp); 349209962Smm} 350209962Smm 351209962Smmstatic caddr_t 352209962Smmzfs_map_page(vm_page_t pp, struct sf_buf **sfp) 353209962Smm{ 354209962Smm 355209962Smm sched_pin(); 356209962Smm *sfp = sf_buf_alloc(pp, SFB_CPUPRIVATE); 357209962Smm return ((caddr_t)sf_buf_kva(*sfp)); 358209962Smm} 359209962Smm 360209962Smmstatic void 361209962Smmzfs_unmap_page(struct sf_buf *sf) 362209962Smm{ 363209962Smm 364209962Smm sf_buf_free(sf); 365209962Smm sched_unpin(); 366209962Smm} 367209962Smm 368209962Smm 369168404Spjd/* 370168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 371168404Spjd * between the DMU cache and the memory mapped pages. What this means: 372168404Spjd * 373168404Spjd * On Write: If we find a memory mapped page, we write to *both* 374168404Spjd * the page and the dmu buffer. 375168404Spjd */ 376209962Smm 377209962Smmstatic void 378209962Smmupdate_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 379209962Smm int segflg, dmu_tx_t *tx) 380168404Spjd{ 381168404Spjd vm_object_t obj; 382168404Spjd struct sf_buf *sf; 383209962Smm int64_t off; 384168404Spjd 385168404Spjd ASSERT(vp->v_mount != NULL); 386168404Spjd obj = vp->v_object; 387168404Spjd ASSERT(obj != NULL); 388168404Spjd 389168404Spjd off = start & PAGEOFFSET; 390168404Spjd VM_OBJECT_LOCK(obj); 391168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 392209962Smm vm_page_t pp; 393209962Smm uint64_t nbytes = MIN(PAGESIZE - off, len); 394168404Spjd 395209962Smm if ((pp = page_lookup(vp, start, off, nbytes)) != NULL) { 396168404Spjd caddr_t va; 397168404Spjd 398168404Spjd VM_OBJECT_UNLOCK(obj); 399209962Smm va = zfs_map_page(pp, &sf); 400209962Smm if (segflg == UIO_NOCOPY) { 401209962Smm (void) dmu_write(os, oid, start+off, nbytes, 402209962Smm va+off, tx); 403209962Smm } else { 404209962Smm (void) dmu_read(os, oid, start+off, nbytes, 405209962Smm va+off, DMU_READ_PREFETCH);; 406169059Spjd } 407209962Smm zfs_unmap_page(sf); 408168404Spjd VM_OBJECT_LOCK(obj); 409209962Smm page_unlock(pp); 410209962Smm 411168404Spjd } 412209962Smm len -= nbytes; 413168404Spjd off = 0; 414168404Spjd } 415168404Spjd VM_OBJECT_UNLOCK(obj); 416168404Spjd} 417168404Spjd 418168404Spjd/* 419168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 420168404Spjd * between the DMU cache and the memory mapped pages. What this means: 421168404Spjd * 422168404Spjd * On Read: We "read" preferentially from memory mapped pages, 423168404Spjd * else we default from the dmu buffer. 424168404Spjd * 425168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 426168404Spjd * the file is memory mapped. 427168404Spjd */ 428168404Spjdstatic int 429168404Spjdmappedread(vnode_t *vp, int nbytes, uio_t *uio) 430168404Spjd{ 431168404Spjd znode_t *zp = VTOZ(vp); 432168404Spjd objset_t *os = zp->z_zfsvfs->z_os; 433168404Spjd vm_object_t obj; 434168404Spjd vm_page_t m; 435168404Spjd struct sf_buf *sf; 436168404Spjd int64_t start, off; 437168926Spjd caddr_t va; 438168404Spjd int len = nbytes; 439168404Spjd int error = 0; 440169059Spjd uint64_t dirbytes; 441168404Spjd 442168404Spjd ASSERT(vp->v_mount != NULL); 443168404Spjd obj = vp->v_object; 444168404Spjd ASSERT(obj != NULL); 445168404Spjd 446168404Spjd start = uio->uio_loffset; 447168404Spjd off = start & PAGEOFFSET; 448169059Spjd dirbytes = 0; 449168404Spjd VM_OBJECT_LOCK(obj); 450168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 451168404Spjd uint64_t bytes = MIN(PAGESIZE - off, len); 452168404Spjd 453168404Spjdagain: 454168404Spjd if ((m = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 455168404Spjd vm_page_is_valid(m, (vm_offset_t)off, bytes)) { 456168404Spjd if (vm_page_sleep_if_busy(m, FALSE, "zfsmrb")) 457168404Spjd goto again; 458168404Spjd vm_page_busy(m); 459168404Spjd VM_OBJECT_UNLOCK(obj); 460169059Spjd if (dirbytes > 0) { 461169059Spjd error = dmu_read_uio(os, zp->z_id, uio, 462169059Spjd dirbytes); 463169059Spjd dirbytes = 0; 464169059Spjd } 465169059Spjd if (error == 0) { 466169059Spjd sched_pin(); 467169059Spjd sf = sf_buf_alloc(m, SFB_CPUPRIVATE); 468169059Spjd va = (caddr_t)sf_buf_kva(sf); 469169059Spjd error = uiomove(va + off, bytes, UIO_READ, uio); 470169059Spjd sf_buf_free(sf); 471169059Spjd sched_unpin(); 472169059Spjd } 473168404Spjd VM_OBJECT_LOCK(obj); 474168404Spjd vm_page_wakeup(m); 475168926Spjd } else if (m != NULL && uio->uio_segflg == UIO_NOCOPY) { 476168962Spjd /* 477168962Spjd * The code below is here to make sendfile(2) work 478168962Spjd * correctly with ZFS. As pointed out by ups@ 479168962Spjd * sendfile(2) should be changed to use VOP_GETPAGES(), 480168962Spjd * but it pessimize performance of sendfile/UFS, that's 481168962Spjd * why I handle this special case in ZFS code. 482168962Spjd */ 483168926Spjd if (vm_page_sleep_if_busy(m, FALSE, "zfsmrb")) 484168926Spjd goto again; 485168926Spjd vm_page_busy(m); 486168926Spjd VM_OBJECT_UNLOCK(obj); 487169059Spjd if (dirbytes > 0) { 488169059Spjd error = dmu_read_uio(os, zp->z_id, uio, 489169059Spjd dirbytes); 490169059Spjd dirbytes = 0; 491169059Spjd } 492169059Spjd if (error == 0) { 493169059Spjd sched_pin(); 494169059Spjd sf = sf_buf_alloc(m, SFB_CPUPRIVATE); 495169059Spjd va = (caddr_t)sf_buf_kva(sf); 496169059Spjd error = dmu_read(os, zp->z_id, start + off, 497209962Smm bytes, (void *)(va + off), 498209962Smm DMU_READ_PREFETCH); 499169059Spjd sf_buf_free(sf); 500169059Spjd sched_unpin(); 501169059Spjd } 502168926Spjd VM_OBJECT_LOCK(obj); 503168926Spjd vm_page_wakeup(m); 504169059Spjd if (error == 0) 505169059Spjd uio->uio_resid -= bytes; 506168404Spjd } else { 507169059Spjd dirbytes += bytes; 508168404Spjd } 509168404Spjd len -= bytes; 510168404Spjd off = 0; 511168404Spjd if (error) 512168404Spjd break; 513168404Spjd } 514168404Spjd VM_OBJECT_UNLOCK(obj); 515169059Spjd if (error == 0 && dirbytes > 0) 516169059Spjd error = dmu_read_uio(os, zp->z_id, uio, dirbytes); 517168404Spjd return (error); 518168404Spjd} 519168404Spjd 520168404Spjdoffset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 521168404Spjd 522168404Spjd/* 523168404Spjd * Read bytes from specified file into supplied buffer. 524168404Spjd * 525168404Spjd * IN: vp - vnode of file to be read from. 526168404Spjd * uio - structure supplying read location, range info, 527168404Spjd * and return buffer. 528168404Spjd * ioflag - SYNC flags; used to provide FRSYNC semantics. 529168404Spjd * cr - credentials of caller. 530185029Spjd * ct - caller context 531168404Spjd * 532168404Spjd * OUT: uio - updated offset and range, buffer filled. 533168404Spjd * 534168404Spjd * RETURN: 0 if success 535168404Spjd * error code if failure 536168404Spjd * 537168404Spjd * Side Effects: 538168404Spjd * vp - atime updated if byte count > 0 539168404Spjd */ 540168404Spjd/* ARGSUSED */ 541168404Spjdstatic int 542168962Spjdzfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 543168404Spjd{ 544168404Spjd znode_t *zp = VTOZ(vp); 545168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 546185029Spjd objset_t *os; 547168404Spjd ssize_t n, nbytes; 548168404Spjd int error; 549168404Spjd rl_t *rl; 550168404Spjd 551168404Spjd ZFS_ENTER(zfsvfs); 552185029Spjd ZFS_VERIFY_ZP(zp); 553185029Spjd os = zfsvfs->z_os; 554168404Spjd 555185029Spjd if (zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) { 556185029Spjd ZFS_EXIT(zfsvfs); 557185029Spjd return (EACCES); 558185029Spjd } 559185029Spjd 560168404Spjd /* 561168404Spjd * Validate file offset 562168404Spjd */ 563168404Spjd if (uio->uio_loffset < (offset_t)0) { 564168404Spjd ZFS_EXIT(zfsvfs); 565168404Spjd return (EINVAL); 566168404Spjd } 567168404Spjd 568168404Spjd /* 569168404Spjd * Fasttrack empty reads 570168404Spjd */ 571168404Spjd if (uio->uio_resid == 0) { 572168404Spjd ZFS_EXIT(zfsvfs); 573168404Spjd return (0); 574168404Spjd } 575168404Spjd 576168404Spjd /* 577168962Spjd * Check for mandatory locks 578168962Spjd */ 579168962Spjd if (MANDMODE((mode_t)zp->z_phys->zp_mode)) { 580168962Spjd if (error = chklock(vp, FREAD, 581168962Spjd uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 582168962Spjd ZFS_EXIT(zfsvfs); 583168962Spjd return (error); 584168962Spjd } 585168962Spjd } 586168962Spjd 587168962Spjd /* 588168404Spjd * If we're in FRSYNC mode, sync out this znode before reading it. 589168404Spjd */ 590168962Spjd if (ioflag & FRSYNC) 591168404Spjd zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); 592168404Spjd 593168404Spjd /* 594168404Spjd * Lock the range against changes. 595168404Spjd */ 596168404Spjd rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 597168404Spjd 598168404Spjd /* 599168404Spjd * If we are reading past end-of-file we can skip 600168404Spjd * to the end; but we might still need to set atime. 601168404Spjd */ 602168404Spjd if (uio->uio_loffset >= zp->z_phys->zp_size) { 603168404Spjd error = 0; 604168404Spjd goto out; 605168404Spjd } 606168404Spjd 607168404Spjd ASSERT(uio->uio_loffset < zp->z_phys->zp_size); 608168404Spjd n = MIN(uio->uio_resid, zp->z_phys->zp_size - uio->uio_loffset); 609168404Spjd 610168404Spjd while (n > 0) { 611168404Spjd nbytes = MIN(n, zfs_read_chunk_size - 612168404Spjd P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 613168404Spjd 614168404Spjd if (vn_has_cached_data(vp)) 615168404Spjd error = mappedread(vp, nbytes, uio); 616168404Spjd else 617168404Spjd error = dmu_read_uio(os, zp->z_id, uio, nbytes); 618185029Spjd if (error) { 619185029Spjd /* convert checksum errors into IO errors */ 620185029Spjd if (error == ECKSUM) 621185029Spjd error = EIO; 622168404Spjd break; 623185029Spjd } 624168962Spjd 625168404Spjd n -= nbytes; 626168404Spjd } 627168404Spjd 628168404Spjdout: 629168404Spjd zfs_range_unlock(rl); 630168404Spjd 631168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 632168404Spjd ZFS_EXIT(zfsvfs); 633168404Spjd return (error); 634168404Spjd} 635168404Spjd 636168404Spjd/* 637168404Spjd * Fault in the pages of the first n bytes specified by the uio structure. 638168404Spjd * 1 byte in each page is touched and the uio struct is unmodified. 639168404Spjd * Any error will exit this routine as this is only a best 640168404Spjd * attempt to get the pages resident. This is a copy of ufs_trans_touch(). 641168404Spjd */ 642168404Spjdstatic void 643168404Spjdzfs_prefault_write(ssize_t n, struct uio *uio) 644168404Spjd{ 645168404Spjd struct iovec *iov; 646168404Spjd ulong_t cnt, incr; 647168404Spjd caddr_t p; 648168404Spjd 649168404Spjd if (uio->uio_segflg != UIO_USERSPACE) 650168404Spjd return; 651168404Spjd 652168404Spjd iov = uio->uio_iov; 653168404Spjd 654168404Spjd while (n) { 655168404Spjd cnt = MIN(iov->iov_len, n); 656168404Spjd if (cnt == 0) { 657168404Spjd /* empty iov entry */ 658168404Spjd iov++; 659168404Spjd continue; 660168404Spjd } 661168404Spjd n -= cnt; 662168404Spjd /* 663168404Spjd * touch each page in this segment. 664168404Spjd */ 665168404Spjd p = iov->iov_base; 666168404Spjd while (cnt) { 667168404Spjd if (fubyte(p) == -1) 668168404Spjd return; 669168404Spjd incr = MIN(cnt, PAGESIZE); 670168404Spjd p += incr; 671168404Spjd cnt -= incr; 672168404Spjd } 673168404Spjd /* 674168404Spjd * touch the last byte in case it straddles a page. 675168404Spjd */ 676168404Spjd p--; 677168404Spjd if (fubyte(p) == -1) 678168404Spjd return; 679168404Spjd iov++; 680168404Spjd } 681168404Spjd} 682168404Spjd 683168404Spjd/* 684168404Spjd * Write the bytes to a file. 685168404Spjd * 686168404Spjd * IN: vp - vnode of file to be written to. 687168404Spjd * uio - structure supplying write location, range info, 688168404Spjd * and data buffer. 689168404Spjd * ioflag - IO_APPEND flag set if in append mode. 690168404Spjd * cr - credentials of caller. 691185029Spjd * ct - caller context (NFS/CIFS fem monitor only) 692168404Spjd * 693168404Spjd * OUT: uio - updated offset and range. 694168404Spjd * 695168404Spjd * RETURN: 0 if success 696168404Spjd * error code if failure 697168404Spjd * 698168404Spjd * Timestamps: 699168404Spjd * vp - ctime|mtime updated if byte count > 0 700168404Spjd */ 701168404Spjd/* ARGSUSED */ 702168404Spjdstatic int 703168962Spjdzfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 704168404Spjd{ 705168404Spjd znode_t *zp = VTOZ(vp); 706168962Spjd rlim64_t limit = MAXOFFSET_T; 707168404Spjd ssize_t start_resid = uio->uio_resid; 708168404Spjd ssize_t tx_bytes; 709168404Spjd uint64_t end_size; 710168404Spjd dmu_tx_t *tx; 711168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 712185029Spjd zilog_t *zilog; 713168404Spjd offset_t woff; 714168404Spjd ssize_t n, nbytes; 715168404Spjd rl_t *rl; 716168404Spjd int max_blksz = zfsvfs->z_max_blksz; 717185029Spjd uint64_t pflags; 718168404Spjd int error; 719209962Smm arc_buf_t *abuf; 720168404Spjd 721168404Spjd /* 722168404Spjd * Fasttrack empty write 723168404Spjd */ 724168404Spjd n = start_resid; 725168404Spjd if (n == 0) 726168404Spjd return (0); 727168404Spjd 728168962Spjd if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 729168962Spjd limit = MAXOFFSET_T; 730168962Spjd 731168404Spjd ZFS_ENTER(zfsvfs); 732185029Spjd ZFS_VERIFY_ZP(zp); 733168404Spjd 734168404Spjd /* 735185029Spjd * If immutable or not appending then return EPERM 736185029Spjd */ 737185029Spjd pflags = zp->z_phys->zp_flags; 738185029Spjd if ((pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 739185029Spjd ((pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 740185029Spjd (uio->uio_loffset < zp->z_phys->zp_size))) { 741185029Spjd ZFS_EXIT(zfsvfs); 742185029Spjd return (EPERM); 743185029Spjd } 744185029Spjd 745185029Spjd zilog = zfsvfs->z_log; 746185029Spjd 747185029Spjd /* 748168404Spjd * Pre-fault the pages to ensure slow (eg NFS) pages 749168404Spjd * don't hold up txg. 750168404Spjd */ 751168404Spjd zfs_prefault_write(n, uio); 752168404Spjd 753168404Spjd /* 754168404Spjd * If in append mode, set the io offset pointer to eof. 755168404Spjd */ 756168404Spjd if (ioflag & IO_APPEND) { 757168404Spjd /* 758168404Spjd * Range lock for a file append: 759168404Spjd * The value for the start of range will be determined by 760168404Spjd * zfs_range_lock() (to guarantee append semantics). 761168404Spjd * If this write will cause the block size to increase, 762168404Spjd * zfs_range_lock() will lock the entire file, so we must 763168404Spjd * later reduce the range after we grow the block size. 764168404Spjd */ 765168404Spjd rl = zfs_range_lock(zp, 0, n, RL_APPEND); 766168404Spjd if (rl->r_len == UINT64_MAX) { 767168404Spjd /* overlocked, zp_size can't change */ 768168404Spjd woff = uio->uio_loffset = zp->z_phys->zp_size; 769168404Spjd } else { 770168404Spjd woff = uio->uio_loffset = rl->r_off; 771168404Spjd } 772168404Spjd } else { 773168404Spjd woff = uio->uio_loffset; 774168404Spjd /* 775168404Spjd * Validate file offset 776168404Spjd */ 777168404Spjd if (woff < 0) { 778168404Spjd ZFS_EXIT(zfsvfs); 779168404Spjd return (EINVAL); 780168404Spjd } 781168404Spjd 782168404Spjd /* 783168404Spjd * If we need to grow the block size then zfs_range_lock() 784168404Spjd * will lock a wider range than we request here. 785168404Spjd * Later after growing the block size we reduce the range. 786168404Spjd */ 787168404Spjd rl = zfs_range_lock(zp, woff, n, RL_WRITER); 788168404Spjd } 789168404Spjd 790168962Spjd if (woff >= limit) { 791168962Spjd zfs_range_unlock(rl); 792168962Spjd ZFS_EXIT(zfsvfs); 793168962Spjd return (EFBIG); 794168962Spjd } 795168962Spjd 796168962Spjd if ((woff + n) > limit || woff > (limit - n)) 797168962Spjd n = limit - woff; 798168962Spjd 799168962Spjd /* 800168962Spjd * Check for mandatory locks 801168962Spjd */ 802168962Spjd if (MANDMODE((mode_t)zp->z_phys->zp_mode) && 803168962Spjd (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 804168962Spjd zfs_range_unlock(rl); 805168962Spjd ZFS_EXIT(zfsvfs); 806168962Spjd return (error); 807168962Spjd } 808168404Spjd end_size = MAX(zp->z_phys->zp_size, woff + n); 809168404Spjd 810168404Spjd /* 811168404Spjd * Write the file in reasonable size chunks. Each chunk is written 812168404Spjd * in a separate transaction; this keeps the intent log records small 813168404Spjd * and allows us to do more fine-grained space accounting. 814168404Spjd */ 815168404Spjd while (n > 0) { 816209962Smm abuf = NULL; 817209962Smm woff = uio->uio_loffset; 818209962Smm 819209962Smmagain: 820209962Smm if (zfs_usergroup_overquota(zfsvfs, 821209962Smm B_FALSE, zp->z_phys->zp_uid) || 822209962Smm zfs_usergroup_overquota(zfsvfs, 823209962Smm B_TRUE, zp->z_phys->zp_gid)) { 824209962Smm if (abuf != NULL) 825209962Smm dmu_return_arcbuf(abuf); 826209962Smm error = EDQUOT; 827209962Smm break; 828209962Smm } 829209962Smm 830168404Spjd /* 831209962Smm * If dmu_assign_arcbuf() is expected to execute with minimum 832209962Smm * overhead loan an arc buffer and copy user data to it before 833209962Smm * we enter a txg. This avoids holding a txg forever while we 834209962Smm * pagefault on a hanging NFS server mapping. 835209962Smm */ 836209962Smm if (abuf == NULL && n >= max_blksz && 837209962Smm woff >= zp->z_phys->zp_size && 838209962Smm P2PHASE(woff, max_blksz) == 0 && 839209962Smm zp->z_blksz == max_blksz) { 840209962Smm size_t cbytes; 841209962Smm 842209962Smm abuf = dmu_request_arcbuf(zp->z_dbuf, max_blksz); 843209962Smm ASSERT(abuf != NULL); 844209962Smm ASSERT(arc_buf_size(abuf) == max_blksz); 845209962Smm if (error = uiocopy(abuf->b_data, max_blksz, 846209962Smm UIO_WRITE, uio, &cbytes)) { 847209962Smm dmu_return_arcbuf(abuf); 848209962Smm break; 849209962Smm } 850209962Smm ASSERT(cbytes == max_blksz); 851209962Smm } 852209962Smm 853209962Smm /* 854168404Spjd * Start a transaction. 855168404Spjd */ 856168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 857168404Spjd dmu_tx_hold_bonus(tx, zp->z_id); 858168404Spjd dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 859209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 860168404Spjd if (error) { 861209962Smm if (error == ERESTART) { 862168404Spjd dmu_tx_wait(tx); 863168404Spjd dmu_tx_abort(tx); 864209962Smm goto again; 865168404Spjd } 866168404Spjd dmu_tx_abort(tx); 867209962Smm if (abuf != NULL) 868209962Smm dmu_return_arcbuf(abuf); 869168404Spjd break; 870168404Spjd } 871168404Spjd 872168404Spjd /* 873168404Spjd * If zfs_range_lock() over-locked we grow the blocksize 874168404Spjd * and then reduce the lock range. This will only happen 875168404Spjd * on the first iteration since zfs_range_reduce() will 876168404Spjd * shrink down r_len to the appropriate size. 877168404Spjd */ 878168404Spjd if (rl->r_len == UINT64_MAX) { 879168404Spjd uint64_t new_blksz; 880168404Spjd 881168404Spjd if (zp->z_blksz > max_blksz) { 882168404Spjd ASSERT(!ISP2(zp->z_blksz)); 883168404Spjd new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 884168404Spjd } else { 885168404Spjd new_blksz = MIN(end_size, max_blksz); 886168404Spjd } 887168404Spjd zfs_grow_blocksize(zp, new_blksz, tx); 888168404Spjd zfs_range_reduce(rl, woff, n); 889168404Spjd } 890168404Spjd 891168404Spjd /* 892168404Spjd * XXX - should we really limit each write to z_max_blksz? 893168404Spjd * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 894168404Spjd */ 895168404Spjd nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 896168404Spjd 897168404Spjd if (woff + nbytes > zp->z_phys->zp_size) 898168404Spjd vnode_pager_setsize(vp, woff + nbytes); 899168404Spjd 900209962Smm if (abuf == NULL) { 901209962Smm tx_bytes = uio->uio_resid; 902209962Smm error = dmu_write_uio(zfsvfs->z_os, zp->z_id, uio, 903209962Smm nbytes, tx); 904209962Smm tx_bytes -= uio->uio_resid; 905168404Spjd } else { 906209962Smm tx_bytes = nbytes; 907209962Smm ASSERT(tx_bytes == max_blksz); 908209962Smm dmu_assign_arcbuf(zp->z_dbuf, woff, abuf, tx); 909209962Smm ASSERT(tx_bytes <= uio->uio_resid); 910209962Smm uioskip(uio, tx_bytes); 911168404Spjd } 912168404Spjd 913168404Spjd /* 914209962Smm * XXXPJD: There are some cases (triggered by fsx) where 915209962Smm * vn_has_cached_data(vp) returns false when it should 916209962Smm * return true. This should be investigated. 917209962Smm */ 918209962Smm#if 0 919209962Smm if (tx_bytes && vn_has_cached_data(vp)) 920209962Smm#else 921209962Smm if (tx_bytes && vp->v_object != NULL) 922209962Smm#endif 923209962Smm { 924209962Smm update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 925209962Smm zp->z_id, uio->uio_segflg, tx); 926209962Smm } 927209962Smm 928209962Smm /* 929168404Spjd * If we made no progress, we're done. If we made even 930168404Spjd * partial progress, update the znode and ZIL accordingly. 931168404Spjd */ 932168404Spjd if (tx_bytes == 0) { 933168404Spjd dmu_tx_commit(tx); 934168404Spjd ASSERT(error != 0); 935168404Spjd break; 936168404Spjd } 937168404Spjd 938168404Spjd /* 939168404Spjd * Clear Set-UID/Set-GID bits on successful write if not 940168404Spjd * privileged and at least one of the excute bits is set. 941168404Spjd * 942168404Spjd * It would be nice to to this after all writes have 943168404Spjd * been done, but that would still expose the ISUID/ISGID 944168404Spjd * to another app after the partial write is committed. 945185029Spjd * 946185029Spjd * Note: we don't call zfs_fuid_map_id() here because 947185029Spjd * user 0 is not an ephemeral uid. 948168404Spjd */ 949168404Spjd mutex_enter(&zp->z_acl_lock); 950168404Spjd if ((zp->z_phys->zp_mode & (S_IXUSR | (S_IXUSR >> 3) | 951168404Spjd (S_IXUSR >> 6))) != 0 && 952168404Spjd (zp->z_phys->zp_mode & (S_ISUID | S_ISGID)) != 0 && 953185029Spjd secpolicy_vnode_setid_retain(vp, cr, 954168404Spjd (zp->z_phys->zp_mode & S_ISUID) != 0 && 955168404Spjd zp->z_phys->zp_uid == 0) != 0) { 956185029Spjd zp->z_phys->zp_mode &= ~(S_ISUID | S_ISGID); 957168404Spjd } 958168404Spjd mutex_exit(&zp->z_acl_lock); 959168404Spjd 960168404Spjd /* 961168404Spjd * Update time stamp. NOTE: This marks the bonus buffer as 962168404Spjd * dirty, so we don't have to do it again for zp_size. 963168404Spjd */ 964168404Spjd zfs_time_stamper(zp, CONTENT_MODIFIED, tx); 965168404Spjd 966168404Spjd /* 967168404Spjd * Update the file size (zp_size) if it has changed; 968168404Spjd * account for possible concurrent updates. 969168404Spjd */ 970168404Spjd while ((end_size = zp->z_phys->zp_size) < uio->uio_loffset) 971168404Spjd (void) atomic_cas_64(&zp->z_phys->zp_size, end_size, 972168404Spjd uio->uio_loffset); 973168404Spjd zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 974168404Spjd dmu_tx_commit(tx); 975168404Spjd 976168404Spjd if (error != 0) 977168404Spjd break; 978168404Spjd ASSERT(tx_bytes == nbytes); 979168404Spjd n -= nbytes; 980168404Spjd } 981168404Spjd 982168404Spjd zfs_range_unlock(rl); 983168404Spjd 984168404Spjd /* 985168404Spjd * If we're in replay mode, or we made no progress, return error. 986168404Spjd * Otherwise, it's at least a partial write, so it's successful. 987168404Spjd */ 988209962Smm if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 989168404Spjd ZFS_EXIT(zfsvfs); 990168404Spjd return (error); 991168404Spjd } 992168404Spjd 993168962Spjd if (ioflag & (FSYNC | FDSYNC)) 994168404Spjd zil_commit(zilog, zp->z_last_itx, zp->z_id); 995168404Spjd 996168404Spjd ZFS_EXIT(zfsvfs); 997168404Spjd return (0); 998168404Spjd} 999168404Spjd 1000168404Spjdvoid 1001168404Spjdzfs_get_done(dmu_buf_t *db, void *vzgd) 1002168404Spjd{ 1003168404Spjd zgd_t *zgd = (zgd_t *)vzgd; 1004168404Spjd rl_t *rl = zgd->zgd_rl; 1005168404Spjd vnode_t *vp = ZTOV(rl->r_zp); 1006196307Spjd objset_t *os = rl->r_zp->z_zfsvfs->z_os; 1007168404Spjd int vfslocked; 1008168404Spjd 1009168404Spjd vfslocked = VFS_LOCK_GIANT(vp->v_vfsp); 1010168404Spjd dmu_buf_rele(db, vzgd); 1011168404Spjd zfs_range_unlock(rl); 1012191900Skmacy /* 1013191900Skmacy * Release the vnode asynchronously as we currently have the 1014191900Skmacy * txg stopped from syncing. 1015191900Skmacy */ 1016196307Spjd VN_RELE_ASYNC(vp, dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1017185029Spjd zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1018168404Spjd kmem_free(zgd, sizeof (zgd_t)); 1019168404Spjd VFS_UNLOCK_GIANT(vfslocked); 1020168404Spjd} 1021168404Spjd 1022168404Spjd/* 1023168404Spjd * Get data to generate a TX_WRITE intent log record. 1024168404Spjd */ 1025168404Spjdint 1026168404Spjdzfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1027168404Spjd{ 1028168404Spjd zfsvfs_t *zfsvfs = arg; 1029168404Spjd objset_t *os = zfsvfs->z_os; 1030168404Spjd znode_t *zp; 1031168404Spjd uint64_t off = lr->lr_offset; 1032168404Spjd dmu_buf_t *db; 1033168404Spjd rl_t *rl; 1034168404Spjd zgd_t *zgd; 1035168404Spjd int dlen = lr->lr_length; /* length of user data */ 1036168404Spjd int error = 0; 1037168404Spjd 1038168404Spjd ASSERT(zio); 1039168404Spjd ASSERT(dlen != 0); 1040168404Spjd 1041168404Spjd /* 1042168404Spjd * Nothing to do if the file has been removed 1043168404Spjd */ 1044168404Spjd if (zfs_zget(zfsvfs, lr->lr_foid, &zp) != 0) 1045168404Spjd return (ENOENT); 1046168404Spjd if (zp->z_unlinked) { 1047191900Skmacy /* 1048191900Skmacy * Release the vnode asynchronously as we currently have the 1049191900Skmacy * txg stopped from syncing. 1050191900Skmacy */ 1051196307Spjd VN_RELE_ASYNC(ZTOV(zp), 1052196307Spjd dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1053168404Spjd return (ENOENT); 1054168404Spjd } 1055168404Spjd 1056168404Spjd /* 1057168404Spjd * Write records come in two flavors: immediate and indirect. 1058168404Spjd * For small writes it's cheaper to store the data with the 1059168404Spjd * log record (immediate); for large writes it's cheaper to 1060168404Spjd * sync the data and get a pointer to it (indirect) so that 1061168404Spjd * we don't have to write the data twice. 1062168404Spjd */ 1063168404Spjd if (buf != NULL) { /* immediate write */ 1064168404Spjd rl = zfs_range_lock(zp, off, dlen, RL_READER); 1065168404Spjd /* test for truncation needs to be done while range locked */ 1066168404Spjd if (off >= zp->z_phys->zp_size) { 1067168404Spjd error = ENOENT; 1068168404Spjd goto out; 1069168404Spjd } 1070209962Smm VERIFY(0 == dmu_read(os, lr->lr_foid, off, dlen, buf, 1071209962Smm DMU_READ_NO_PREFETCH)); 1072168404Spjd } else { /* indirect write */ 1073168404Spjd uint64_t boff; /* block starting offset */ 1074168404Spjd 1075168404Spjd /* 1076168404Spjd * Have to lock the whole block to ensure when it's 1077168404Spjd * written out and it's checksum is being calculated 1078168404Spjd * that no one can change the data. We need to re-check 1079168404Spjd * blocksize after we get the lock in case it's changed! 1080168404Spjd */ 1081168404Spjd for (;;) { 1082168404Spjd if (ISP2(zp->z_blksz)) { 1083168404Spjd boff = P2ALIGN_TYPED(off, zp->z_blksz, 1084168404Spjd uint64_t); 1085168404Spjd } else { 1086168404Spjd boff = 0; 1087168404Spjd } 1088168404Spjd dlen = zp->z_blksz; 1089168404Spjd rl = zfs_range_lock(zp, boff, dlen, RL_READER); 1090168404Spjd if (zp->z_blksz == dlen) 1091168404Spjd break; 1092168404Spjd zfs_range_unlock(rl); 1093168404Spjd } 1094168404Spjd /* test for truncation needs to be done while range locked */ 1095168404Spjd if (off >= zp->z_phys->zp_size) { 1096168404Spjd error = ENOENT; 1097168404Spjd goto out; 1098168404Spjd } 1099168404Spjd zgd = (zgd_t *)kmem_alloc(sizeof (zgd_t), KM_SLEEP); 1100168404Spjd zgd->zgd_rl = rl; 1101168404Spjd zgd->zgd_zilog = zfsvfs->z_log; 1102168404Spjd zgd->zgd_bp = &lr->lr_blkptr; 1103168404Spjd VERIFY(0 == dmu_buf_hold(os, lr->lr_foid, boff, zgd, &db)); 1104168404Spjd ASSERT(boff == db->db_offset); 1105168404Spjd lr->lr_blkoff = off - boff; 1106168404Spjd error = dmu_sync(zio, db, &lr->lr_blkptr, 1107168404Spjd lr->lr_common.lrc_txg, zfs_get_done, zgd); 1108185029Spjd ASSERT((error && error != EINPROGRESS) || 1109185029Spjd lr->lr_length <= zp->z_blksz); 1110209962Smm if (error == 0) { 1111209962Smm /* 1112209962Smm * dmu_sync() can compress a block of zeros to a null 1113209962Smm * blkptr but the block size still needs to be passed 1114209962Smm * through to replay. 1115209962Smm */ 1116209962Smm BP_SET_LSIZE(&lr->lr_blkptr, db->db_size); 1117185029Spjd zil_add_block(zfsvfs->z_log, &lr->lr_blkptr); 1118209962Smm } 1119209962Smm 1120168404Spjd /* 1121168404Spjd * If we get EINPROGRESS, then we need to wait for a 1122168404Spjd * write IO initiated by dmu_sync() to complete before 1123168404Spjd * we can release this dbuf. We will finish everything 1124168404Spjd * up in the zfs_get_done() callback. 1125168404Spjd */ 1126209962Smm if (error == EINPROGRESS) { 1127168404Spjd return (0); 1128209962Smm } else if (error == EALREADY) { 1129209962Smm lr->lr_common.lrc_txtype = TX_WRITE2; 1130209962Smm error = 0; 1131209962Smm } 1132168404Spjd dmu_buf_rele(db, zgd); 1133168404Spjd kmem_free(zgd, sizeof (zgd_t)); 1134168404Spjd } 1135168404Spjdout: 1136168404Spjd zfs_range_unlock(rl); 1137191900Skmacy /* 1138191900Skmacy * Release the vnode asynchronously as we currently have the 1139191900Skmacy * txg stopped from syncing. 1140191900Skmacy */ 1141196307Spjd VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1142168404Spjd return (error); 1143168404Spjd} 1144168404Spjd 1145168404Spjd/*ARGSUSED*/ 1146168404Spjdstatic int 1147185029Spjdzfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1148185029Spjd caller_context_t *ct) 1149168404Spjd{ 1150168404Spjd znode_t *zp = VTOZ(vp); 1151168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1152168404Spjd int error; 1153168404Spjd 1154168404Spjd ZFS_ENTER(zfsvfs); 1155185029Spjd ZFS_VERIFY_ZP(zp); 1156185029Spjd 1157185029Spjd if (flag & V_ACE_MASK) 1158185029Spjd error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1159185029Spjd else 1160185029Spjd error = zfs_zaccess_rwx(zp, mode, flag, cr); 1161185029Spjd 1162168404Spjd ZFS_EXIT(zfsvfs); 1163168404Spjd return (error); 1164168404Spjd} 1165168404Spjd 1166168404Spjd/* 1167168404Spjd * Lookup an entry in a directory, or an extended attribute directory. 1168168404Spjd * If it exists, return a held vnode reference for it. 1169168404Spjd * 1170168404Spjd * IN: dvp - vnode of directory to search. 1171168404Spjd * nm - name of entry to lookup. 1172168404Spjd * pnp - full pathname to lookup [UNUSED]. 1173168404Spjd * flags - LOOKUP_XATTR set if looking for an attribute. 1174168404Spjd * rdir - root directory vnode [UNUSED]. 1175168404Spjd * cr - credentials of caller. 1176185029Spjd * ct - caller context 1177185029Spjd * direntflags - directory lookup flags 1178185029Spjd * realpnp - returned pathname. 1179168404Spjd * 1180168404Spjd * OUT: vpp - vnode of located entry, NULL if not found. 1181168404Spjd * 1182168404Spjd * RETURN: 0 if success 1183168404Spjd * error code if failure 1184168404Spjd * 1185168404Spjd * Timestamps: 1186168404Spjd * NA 1187168404Spjd */ 1188168404Spjd/* ARGSUSED */ 1189168962Spjdstatic int 1190168962Spjdzfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1191185029Spjd int nameiop, cred_t *cr, kthread_t *td, int flags) 1192168404Spjd{ 1193168962Spjd znode_t *zdp = VTOZ(dvp); 1194168962Spjd zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1195168962Spjd int error; 1196185029Spjd int *direntflags = NULL; 1197185029Spjd void *realpnp = NULL; 1198168404Spjd 1199168404Spjd ZFS_ENTER(zfsvfs); 1200185029Spjd ZFS_VERIFY_ZP(zdp); 1201168404Spjd 1202168404Spjd *vpp = NULL; 1203168404Spjd 1204185029Spjd if (flags & LOOKUP_XATTR) { 1205168404Spjd#ifdef TODO 1206168404Spjd /* 1207168404Spjd * If the xattr property is off, refuse the lookup request. 1208168404Spjd */ 1209168404Spjd if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1210168404Spjd ZFS_EXIT(zfsvfs); 1211168404Spjd return (EINVAL); 1212168404Spjd } 1213185029Spjd#endif 1214168404Spjd 1215168404Spjd /* 1216168404Spjd * We don't allow recursive attributes.. 1217168404Spjd * Maybe someday we will. 1218168404Spjd */ 1219168404Spjd if (zdp->z_phys->zp_flags & ZFS_XATTR) { 1220168404Spjd ZFS_EXIT(zfsvfs); 1221168404Spjd return (EINVAL); 1222168404Spjd } 1223168404Spjd 1224168404Spjd if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1225168404Spjd ZFS_EXIT(zfsvfs); 1226168404Spjd return (error); 1227168404Spjd } 1228168404Spjd 1229168404Spjd /* 1230168404Spjd * Do we have permission to get into attribute directory? 1231168404Spjd */ 1232168404Spjd 1233185029Spjd if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1234185029Spjd B_FALSE, cr)) { 1235168404Spjd VN_RELE(*vpp); 1236185029Spjd *vpp = NULL; 1237168404Spjd } 1238168404Spjd 1239168404Spjd ZFS_EXIT(zfsvfs); 1240168404Spjd return (error); 1241168404Spjd } 1242168404Spjd 1243168404Spjd if (dvp->v_type != VDIR) { 1244168404Spjd ZFS_EXIT(zfsvfs); 1245168404Spjd return (ENOTDIR); 1246168404Spjd } 1247168404Spjd 1248168404Spjd /* 1249168404Spjd * Check accessibility of directory. 1250168404Spjd */ 1251168404Spjd 1252185029Spjd if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1253168404Spjd ZFS_EXIT(zfsvfs); 1254168404Spjd return (error); 1255168404Spjd } 1256168404Spjd 1257185029Spjd if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1258185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1259185029Spjd ZFS_EXIT(zfsvfs); 1260185029Spjd return (EILSEQ); 1261185029Spjd } 1262168404Spjd 1263185029Spjd error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1264185029Spjd if (error == 0) { 1265168962Spjd /* 1266168962Spjd * Convert device special files 1267168962Spjd */ 1268168962Spjd if (IS_DEVVP(*vpp)) { 1269168962Spjd vnode_t *svp; 1270168962Spjd 1271168962Spjd svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1272168962Spjd VN_RELE(*vpp); 1273168962Spjd if (svp == NULL) 1274168962Spjd error = ENOSYS; 1275168962Spjd else 1276168962Spjd *vpp = svp; 1277168962Spjd } 1278168962Spjd } 1279168962Spjd 1280168404Spjd /* Translate errors and add SAVENAME when needed. */ 1281168404Spjd if (cnp->cn_flags & ISLASTCN) { 1282168404Spjd switch (nameiop) { 1283168404Spjd case CREATE: 1284168404Spjd case RENAME: 1285168404Spjd if (error == ENOENT) { 1286168404Spjd error = EJUSTRETURN; 1287168404Spjd cnp->cn_flags |= SAVENAME; 1288168404Spjd break; 1289168404Spjd } 1290168404Spjd /* FALLTHROUGH */ 1291168404Spjd case DELETE: 1292168404Spjd if (error == 0) 1293168404Spjd cnp->cn_flags |= SAVENAME; 1294168404Spjd break; 1295168404Spjd } 1296168404Spjd } 1297168404Spjd if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) { 1298169198Spjd int ltype = 0; 1299169198Spjd 1300169198Spjd if (cnp->cn_flags & ISDOTDOT) { 1301176559Sattilio ltype = VOP_ISLOCKED(dvp); 1302175294Sattilio VOP_UNLOCK(dvp, 0); 1303169198Spjd } 1304206667Spjd ZFS_EXIT(zfsvfs); 1305175202Sattilio error = vn_lock(*vpp, cnp->cn_lkflags); 1306168962Spjd if (cnp->cn_flags & ISDOTDOT) 1307175202Sattilio vn_lock(dvp, ltype | LK_RETRY); 1308169172Spjd if (error != 0) { 1309169172Spjd VN_RELE(*vpp); 1310169172Spjd *vpp = NULL; 1311169172Spjd return (error); 1312169172Spjd } 1313206667Spjd } else { 1314206667Spjd ZFS_EXIT(zfsvfs); 1315168404Spjd } 1316168404Spjd 1317168404Spjd#ifdef FREEBSD_NAMECACHE 1318168404Spjd /* 1319168404Spjd * Insert name into cache (as non-existent) if appropriate. 1320168404Spjd */ 1321168404Spjd if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 1322168404Spjd cache_enter(dvp, *vpp, cnp); 1323169170Spjd /* 1324169170Spjd * Insert name into cache if appropriate. 1325169170Spjd */ 1326168404Spjd if (error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1327168404Spjd if (!(cnp->cn_flags & ISLASTCN) || 1328168404Spjd (nameiop != DELETE && nameiop != RENAME)) { 1329168404Spjd cache_enter(dvp, *vpp, cnp); 1330168404Spjd } 1331168404Spjd } 1332168404Spjd#endif 1333168404Spjd 1334168404Spjd return (error); 1335168404Spjd} 1336168404Spjd 1337168404Spjd/* 1338168404Spjd * Attempt to create a new entry in a directory. If the entry 1339168404Spjd * already exists, truncate the file if permissible, else return 1340168404Spjd * an error. Return the vp of the created or trunc'd file. 1341168404Spjd * 1342168404Spjd * IN: dvp - vnode of directory to put new file entry in. 1343168404Spjd * name - name of new file entry. 1344168404Spjd * vap - attributes of new file. 1345168404Spjd * excl - flag indicating exclusive or non-exclusive mode. 1346168404Spjd * mode - mode to open file with. 1347168404Spjd * cr - credentials of caller. 1348168404Spjd * flag - large file flag [UNUSED]. 1349185029Spjd * ct - caller context 1350185029Spjd * vsecp - ACL to be set 1351168404Spjd * 1352168404Spjd * OUT: vpp - vnode of created or trunc'd entry. 1353168404Spjd * 1354168404Spjd * RETURN: 0 if success 1355168404Spjd * error code if failure 1356168404Spjd * 1357168404Spjd * Timestamps: 1358168404Spjd * dvp - ctime|mtime updated if new entry created 1359168404Spjd * vp - ctime|mtime always, atime if new 1360168404Spjd */ 1361185029Spjd 1362168404Spjd/* ARGSUSED */ 1363168404Spjdstatic int 1364168962Spjdzfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 1365185029Spjd vnode_t **vpp, cred_t *cr, kthread_t *td) 1366168404Spjd{ 1367168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1368168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1369185029Spjd zilog_t *zilog; 1370185029Spjd objset_t *os; 1371168404Spjd zfs_dirlock_t *dl; 1372168404Spjd dmu_tx_t *tx; 1373168404Spjd int error; 1374209962Smm ksid_t *ksid; 1375209962Smm uid_t uid; 1376209962Smm gid_t gid = crgetgid(cr); 1377209962Smm zfs_acl_ids_t acl_ids; 1378209962Smm boolean_t fuid_dirtied; 1379185029Spjd void *vsecp = NULL; 1380185029Spjd int flag = 0; 1381168404Spjd 1382185029Spjd /* 1383185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 1384185029Spjd * make sure file system is at proper version 1385185029Spjd */ 1386185029Spjd 1387209962Smm ksid = crgetsid(cr, KSID_OWNER); 1388209962Smm if (ksid) 1389209962Smm uid = ksid_getid(ksid); 1390209962Smm else 1391209962Smm uid = crgetuid(cr); 1392185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 1393185029Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 1394185029Spjd IS_EPHEMERAL(crgetuid(cr)) || IS_EPHEMERAL(crgetgid(cr)))) 1395185029Spjd return (EINVAL); 1396185029Spjd 1397168404Spjd ZFS_ENTER(zfsvfs); 1398185029Spjd ZFS_VERIFY_ZP(dzp); 1399185029Spjd os = zfsvfs->z_os; 1400185029Spjd zilog = zfsvfs->z_log; 1401168404Spjd 1402185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1403185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1404185029Spjd ZFS_EXIT(zfsvfs); 1405185029Spjd return (EILSEQ); 1406185029Spjd } 1407185029Spjd 1408185029Spjd if (vap->va_mask & AT_XVATTR) { 1409197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1410185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 1411185029Spjd ZFS_EXIT(zfsvfs); 1412185029Spjd return (error); 1413185029Spjd } 1414185029Spjd } 1415168404Spjdtop: 1416168404Spjd *vpp = NULL; 1417168404Spjd 1418182905Strasz if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1419182905Strasz vap->va_mode &= ~S_ISVTX; 1420168404Spjd 1421168404Spjd if (*name == '\0') { 1422168404Spjd /* 1423168404Spjd * Null component name refers to the directory itself. 1424168404Spjd */ 1425168404Spjd VN_HOLD(dvp); 1426168404Spjd zp = dzp; 1427168404Spjd dl = NULL; 1428168404Spjd error = 0; 1429168404Spjd } else { 1430168404Spjd /* possible VN_HOLD(zp) */ 1431185029Spjd int zflg = 0; 1432185029Spjd 1433185029Spjd if (flag & FIGNORECASE) 1434185029Spjd zflg |= ZCILOOK; 1435185029Spjd 1436185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1437185029Spjd NULL, NULL); 1438185029Spjd if (error) { 1439168404Spjd if (strcmp(name, "..") == 0) 1440168404Spjd error = EISDIR; 1441168404Spjd ZFS_EXIT(zfsvfs); 1442168404Spjd return (error); 1443168404Spjd } 1444168404Spjd } 1445185029Spjd if (zp == NULL) { 1446185029Spjd uint64_t txtype; 1447168404Spjd 1448168404Spjd /* 1449168404Spjd * Create a new file object and update the directory 1450168404Spjd * to reference it. 1451168404Spjd */ 1452185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1453168404Spjd goto out; 1454168404Spjd } 1455168404Spjd 1456168404Spjd /* 1457168404Spjd * We only support the creation of regular files in 1458168404Spjd * extended attribute directories. 1459168404Spjd */ 1460168404Spjd if ((dzp->z_phys->zp_flags & ZFS_XATTR) && 1461168404Spjd (vap->va_type != VREG)) { 1462168404Spjd error = EINVAL; 1463168404Spjd goto out; 1464168404Spjd } 1465168404Spjd 1466209962Smm 1467209962Smm if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, 1468209962Smm &acl_ids)) != 0) 1469209962Smm goto out; 1470209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1471209962Smm error = EDQUOT; 1472209962Smm goto out; 1473209962Smm } 1474209962Smm 1475168404Spjd tx = dmu_tx_create(os); 1476168404Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1477209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 1478209962Smm if (fuid_dirtied) 1479209962Smm zfs_fuid_txhold(zfsvfs, tx); 1480168404Spjd dmu_tx_hold_bonus(tx, dzp->z_id); 1481168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1482209962Smm if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1483168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1484168404Spjd 0, SPA_MAXBLOCKSIZE); 1485185029Spjd } 1486209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 1487168404Spjd if (error) { 1488209962Smm zfs_acl_ids_free(&acl_ids); 1489168404Spjd zfs_dirent_unlock(dl); 1490209962Smm if (error == ERESTART) { 1491168404Spjd dmu_tx_wait(tx); 1492168404Spjd dmu_tx_abort(tx); 1493168404Spjd goto top; 1494168404Spjd } 1495168404Spjd dmu_tx_abort(tx); 1496168404Spjd ZFS_EXIT(zfsvfs); 1497168404Spjd return (error); 1498168404Spjd } 1499209962Smm zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); 1500209962Smm 1501209962Smm if (fuid_dirtied) 1502209962Smm zfs_fuid_sync(zfsvfs, tx); 1503209962Smm 1504168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 1505209962Smm 1506185029Spjd txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1507185029Spjd if (flag & FIGNORECASE) 1508185029Spjd txtype |= TX_CI; 1509185029Spjd zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1510209962Smm vsecp, acl_ids.z_fuidp, vap); 1511209962Smm zfs_acl_ids_free(&acl_ids); 1512168404Spjd dmu_tx_commit(tx); 1513168404Spjd } else { 1514185029Spjd int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1515185029Spjd 1516168404Spjd /* 1517168404Spjd * A directory entry already exists for this name. 1518168404Spjd */ 1519168404Spjd /* 1520168962Spjd * Can't truncate an existing file if in exclusive mode. 1521168962Spjd */ 1522168962Spjd if (excl == EXCL) { 1523168962Spjd error = EEXIST; 1524168962Spjd goto out; 1525168962Spjd } 1526168962Spjd /* 1527168404Spjd * Can't open a directory for writing. 1528168404Spjd */ 1529168404Spjd if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1530168404Spjd error = EISDIR; 1531168404Spjd goto out; 1532168404Spjd } 1533168404Spjd /* 1534168404Spjd * Verify requested access to file. 1535168404Spjd */ 1536185029Spjd if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1537168404Spjd goto out; 1538168404Spjd } 1539168404Spjd 1540168404Spjd mutex_enter(&dzp->z_lock); 1541168404Spjd dzp->z_seq++; 1542168404Spjd mutex_exit(&dzp->z_lock); 1543168404Spjd 1544168404Spjd /* 1545168404Spjd * Truncate regular files if requested. 1546168404Spjd */ 1547168404Spjd if ((ZTOV(zp)->v_type == VREG) && 1548168404Spjd (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1549185029Spjd /* we can't hold any locks when calling zfs_freesp() */ 1550185029Spjd zfs_dirent_unlock(dl); 1551185029Spjd dl = NULL; 1552168404Spjd error = zfs_freesp(zp, 0, 0, mode, TRUE); 1553185029Spjd if (error == 0) { 1554185029Spjd vnevent_create(ZTOV(zp), ct); 1555168404Spjd } 1556168404Spjd } 1557168404Spjd } 1558168404Spjdout: 1559168404Spjd if (dl) 1560168404Spjd zfs_dirent_unlock(dl); 1561168404Spjd 1562168404Spjd if (error) { 1563168404Spjd if (zp) 1564168404Spjd VN_RELE(ZTOV(zp)); 1565168962Spjd } else { 1566168962Spjd *vpp = ZTOV(zp); 1567168962Spjd /* 1568168962Spjd * If vnode is for a device return a specfs vnode instead. 1569168962Spjd */ 1570168962Spjd if (IS_DEVVP(*vpp)) { 1571168962Spjd struct vnode *svp; 1572168962Spjd 1573168962Spjd svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1574168962Spjd VN_RELE(*vpp); 1575168962Spjd if (svp == NULL) { 1576168962Spjd error = ENOSYS; 1577168962Spjd } 1578168962Spjd *vpp = svp; 1579168962Spjd } 1580168404Spjd } 1581168404Spjd 1582168404Spjd ZFS_EXIT(zfsvfs); 1583168404Spjd return (error); 1584168404Spjd} 1585168404Spjd 1586168404Spjd/* 1587168404Spjd * Remove an entry from a directory. 1588168404Spjd * 1589168404Spjd * IN: dvp - vnode of directory to remove entry from. 1590168404Spjd * name - name of entry to remove. 1591168404Spjd * cr - credentials of caller. 1592185029Spjd * ct - caller context 1593185029Spjd * flags - case flags 1594168404Spjd * 1595168404Spjd * RETURN: 0 if success 1596168404Spjd * error code if failure 1597168404Spjd * 1598168404Spjd * Timestamps: 1599168404Spjd * dvp - ctime|mtime 1600168404Spjd * vp - ctime (if nlink > 0) 1601168404Spjd */ 1602185029Spjd/*ARGSUSED*/ 1603168404Spjdstatic int 1604185029Spjdzfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1605185029Spjd int flags) 1606168404Spjd{ 1607168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1608168404Spjd znode_t *xzp = NULL; 1609168404Spjd vnode_t *vp; 1610168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1611185029Spjd zilog_t *zilog; 1612168962Spjd uint64_t acl_obj, xattr_obj; 1613168404Spjd zfs_dirlock_t *dl; 1614168404Spjd dmu_tx_t *tx; 1615168962Spjd boolean_t may_delete_now, delete_now = FALSE; 1616185029Spjd boolean_t unlinked, toobig = FALSE; 1617185029Spjd uint64_t txtype; 1618185029Spjd pathname_t *realnmp = NULL; 1619185029Spjd pathname_t realnm; 1620168404Spjd int error; 1621185029Spjd int zflg = ZEXISTS; 1622168404Spjd 1623168404Spjd ZFS_ENTER(zfsvfs); 1624185029Spjd ZFS_VERIFY_ZP(dzp); 1625185029Spjd zilog = zfsvfs->z_log; 1626168404Spjd 1627185029Spjd if (flags & FIGNORECASE) { 1628185029Spjd zflg |= ZCILOOK; 1629185029Spjd pn_alloc(&realnm); 1630185029Spjd realnmp = &realnm; 1631185029Spjd } 1632185029Spjd 1633168404Spjdtop: 1634168404Spjd /* 1635168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 1636168404Spjd */ 1637185029Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1638185029Spjd NULL, realnmp)) { 1639185029Spjd if (realnmp) 1640185029Spjd pn_free(realnmp); 1641168404Spjd ZFS_EXIT(zfsvfs); 1642168404Spjd return (error); 1643168404Spjd } 1644168404Spjd 1645168404Spjd vp = ZTOV(zp); 1646168404Spjd 1647168962Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1648168404Spjd goto out; 1649168962Spjd } 1650168404Spjd 1651168962Spjd /* 1652168962Spjd * Need to use rmdir for removing directories. 1653168962Spjd */ 1654168962Spjd if (vp->v_type == VDIR) { 1655168962Spjd error = EPERM; 1656168962Spjd goto out; 1657168962Spjd } 1658168962Spjd 1659185029Spjd vnevent_remove(vp, dvp, name, ct); 1660168962Spjd 1661185029Spjd if (realnmp) 1662185029Spjd dnlc_remove(dvp, realnmp->pn_buf); 1663185029Spjd else 1664185029Spjd dnlc_remove(dvp, name); 1665168404Spjd 1666168962Spjd may_delete_now = FALSE; 1667168962Spjd 1668168404Spjd /* 1669168404Spjd * We may delete the znode now, or we may put it in the unlinked set; 1670168404Spjd * it depends on whether we're the last link, and on whether there are 1671168404Spjd * other holds on the vnode. So we dmu_tx_hold() the right things to 1672168404Spjd * allow for either case. 1673168404Spjd */ 1674168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1675168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1676168404Spjd dmu_tx_hold_bonus(tx, zp->z_id); 1677185029Spjd if (may_delete_now) { 1678185029Spjd toobig = 1679185029Spjd zp->z_phys->zp_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1680185029Spjd /* if the file is too big, only hold_free a token amount */ 1681185029Spjd dmu_tx_hold_free(tx, zp->z_id, 0, 1682185029Spjd (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1683185029Spjd } 1684168404Spjd 1685168404Spjd /* are there any extended attributes? */ 1686168404Spjd if ((xattr_obj = zp->z_phys->zp_xattr) != 0) { 1687168404Spjd /* XXX - do we need this if we are deleting? */ 1688168404Spjd dmu_tx_hold_bonus(tx, xattr_obj); 1689168404Spjd } 1690168404Spjd 1691168962Spjd /* are there any additional acls */ 1692168962Spjd if ((acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj) != 0 && 1693168962Spjd may_delete_now) 1694168962Spjd dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 1695168962Spjd 1696168404Spjd /* charge as an update -- would be nice not to charge at all */ 1697168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1698168404Spjd 1699209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 1700168404Spjd if (error) { 1701168404Spjd zfs_dirent_unlock(dl); 1702168962Spjd VN_RELE(vp); 1703209962Smm if (error == ERESTART) { 1704168404Spjd dmu_tx_wait(tx); 1705168404Spjd dmu_tx_abort(tx); 1706168404Spjd goto top; 1707168404Spjd } 1708185029Spjd if (realnmp) 1709185029Spjd pn_free(realnmp); 1710168404Spjd dmu_tx_abort(tx); 1711168404Spjd ZFS_EXIT(zfsvfs); 1712168404Spjd return (error); 1713168404Spjd } 1714168404Spjd 1715168404Spjd /* 1716168404Spjd * Remove the directory entry. 1717168404Spjd */ 1718185029Spjd error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1719168404Spjd 1720168404Spjd if (error) { 1721168404Spjd dmu_tx_commit(tx); 1722168404Spjd goto out; 1723168404Spjd } 1724168404Spjd 1725168962Spjd if (0 && unlinked) { 1726168962Spjd VI_LOCK(vp); 1727185029Spjd delete_now = may_delete_now && !toobig && 1728168962Spjd vp->v_count == 1 && !vn_has_cached_data(vp) && 1729168962Spjd zp->z_phys->zp_xattr == xattr_obj && 1730168962Spjd zp->z_phys->zp_acl.z_acl_extern_obj == acl_obj; 1731168962Spjd VI_UNLOCK(vp); 1732168962Spjd } 1733168962Spjd 1734168962Spjd if (delete_now) { 1735168962Spjd if (zp->z_phys->zp_xattr) { 1736168962Spjd error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp); 1737168962Spjd ASSERT3U(error, ==, 0); 1738168962Spjd ASSERT3U(xzp->z_phys->zp_links, ==, 2); 1739168962Spjd dmu_buf_will_dirty(xzp->z_dbuf, tx); 1740168962Spjd mutex_enter(&xzp->z_lock); 1741168962Spjd xzp->z_unlinked = 1; 1742168962Spjd xzp->z_phys->zp_links = 0; 1743168962Spjd mutex_exit(&xzp->z_lock); 1744168962Spjd zfs_unlinked_add(xzp, tx); 1745168962Spjd zp->z_phys->zp_xattr = 0; /* probably unnecessary */ 1746168962Spjd } 1747168962Spjd mutex_enter(&zp->z_lock); 1748168962Spjd VI_LOCK(vp); 1749168962Spjd vp->v_count--; 1750168962Spjd ASSERT3U(vp->v_count, ==, 0); 1751168962Spjd VI_UNLOCK(vp); 1752168962Spjd mutex_exit(&zp->z_lock); 1753168962Spjd zfs_znode_delete(zp, tx); 1754168962Spjd } else if (unlinked) { 1755168404Spjd zfs_unlinked_add(zp, tx); 1756168962Spjd } 1757168404Spjd 1758185029Spjd txtype = TX_REMOVE; 1759185029Spjd if (flags & FIGNORECASE) 1760185029Spjd txtype |= TX_CI; 1761185029Spjd zfs_log_remove(zilog, tx, txtype, dzp, name); 1762168404Spjd 1763168404Spjd dmu_tx_commit(tx); 1764168404Spjdout: 1765185029Spjd if (realnmp) 1766185029Spjd pn_free(realnmp); 1767185029Spjd 1768168404Spjd zfs_dirent_unlock(dl); 1769168404Spjd 1770168962Spjd if (!delete_now) { 1771168962Spjd VN_RELE(vp); 1772168962Spjd } else if (xzp) { 1773185029Spjd /* this rele is delayed to prevent nesting transactions */ 1774168962Spjd VN_RELE(ZTOV(xzp)); 1775168962Spjd } 1776168962Spjd 1777168404Spjd ZFS_EXIT(zfsvfs); 1778168404Spjd return (error); 1779168404Spjd} 1780168404Spjd 1781168404Spjd/* 1782168404Spjd * Create a new directory and insert it into dvp using the name 1783168404Spjd * provided. Return a pointer to the inserted directory. 1784168404Spjd * 1785168404Spjd * IN: dvp - vnode of directory to add subdir to. 1786168404Spjd * dirname - name of new directory. 1787168404Spjd * vap - attributes of new directory. 1788168404Spjd * cr - credentials of caller. 1789185029Spjd * ct - caller context 1790185029Spjd * vsecp - ACL to be set 1791168404Spjd * 1792168404Spjd * OUT: vpp - vnode of created directory. 1793168404Spjd * 1794168404Spjd * RETURN: 0 if success 1795168404Spjd * error code if failure 1796168404Spjd * 1797168404Spjd * Timestamps: 1798168404Spjd * dvp - ctime|mtime updated 1799168404Spjd * vp - ctime|mtime|atime updated 1800168404Spjd */ 1801185029Spjd/*ARGSUSED*/ 1802168404Spjdstatic int 1803185029Spjdzfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 1804185029Spjd caller_context_t *ct, int flags, vsecattr_t *vsecp) 1805168404Spjd{ 1806168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1807168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1808185029Spjd zilog_t *zilog; 1809168404Spjd zfs_dirlock_t *dl; 1810185029Spjd uint64_t txtype; 1811168404Spjd dmu_tx_t *tx; 1812168404Spjd int error; 1813185029Spjd int zf = ZNEW; 1814209962Smm ksid_t *ksid; 1815209962Smm uid_t uid; 1816209962Smm gid_t gid = crgetgid(cr); 1817209962Smm zfs_acl_ids_t acl_ids; 1818209962Smm boolean_t fuid_dirtied; 1819168404Spjd 1820168404Spjd ASSERT(vap->va_type == VDIR); 1821168404Spjd 1822185029Spjd /* 1823185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 1824185029Spjd * make sure file system is at proper version 1825185029Spjd */ 1826185029Spjd 1827209962Smm ksid = crgetsid(cr, KSID_OWNER); 1828209962Smm if (ksid) 1829209962Smm uid = ksid_getid(ksid); 1830209962Smm else 1831209962Smm uid = crgetuid(cr); 1832185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 1833185029Spjd (vsecp || (vap->va_mask & AT_XVATTR) || IS_EPHEMERAL(crgetuid(cr))|| 1834185029Spjd IS_EPHEMERAL(crgetgid(cr)))) 1835185029Spjd return (EINVAL); 1836185029Spjd 1837168404Spjd ZFS_ENTER(zfsvfs); 1838185029Spjd ZFS_VERIFY_ZP(dzp); 1839185029Spjd zilog = zfsvfs->z_log; 1840168404Spjd 1841168404Spjd if (dzp->z_phys->zp_flags & ZFS_XATTR) { 1842168404Spjd ZFS_EXIT(zfsvfs); 1843168404Spjd return (EINVAL); 1844168404Spjd } 1845168404Spjd 1846185029Spjd if (zfsvfs->z_utf8 && u8_validate(dirname, 1847185029Spjd strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1848185029Spjd ZFS_EXIT(zfsvfs); 1849185029Spjd return (EILSEQ); 1850185029Spjd } 1851185029Spjd if (flags & FIGNORECASE) 1852185029Spjd zf |= ZCILOOK; 1853185029Spjd 1854185029Spjd if (vap->va_mask & AT_XVATTR) 1855197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1856185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 1857185029Spjd ZFS_EXIT(zfsvfs); 1858185029Spjd return (error); 1859185029Spjd } 1860185029Spjd 1861168404Spjd /* 1862168404Spjd * First make sure the new directory doesn't exist. 1863168404Spjd */ 1864185029Spjdtop: 1865185029Spjd *vpp = NULL; 1866185029Spjd 1867185029Spjd if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 1868185029Spjd NULL, NULL)) { 1869168404Spjd ZFS_EXIT(zfsvfs); 1870168404Spjd return (error); 1871168404Spjd } 1872168404Spjd 1873185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 1874168404Spjd zfs_dirent_unlock(dl); 1875168404Spjd ZFS_EXIT(zfsvfs); 1876168404Spjd return (error); 1877168404Spjd } 1878168404Spjd 1879209962Smm if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, 1880209962Smm &acl_ids)) != 0) { 1881209962Smm zfs_dirent_unlock(dl); 1882209962Smm ZFS_EXIT(zfsvfs); 1883209962Smm return (error); 1884185029Spjd } 1885209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1886209962Smm zfs_dirent_unlock(dl); 1887209962Smm ZFS_EXIT(zfsvfs); 1888209962Smm return (EDQUOT); 1889209962Smm } 1890209962Smm 1891168404Spjd /* 1892168404Spjd * Add a new entry to the directory. 1893168404Spjd */ 1894168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1895168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 1896168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 1897209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 1898209962Smm if (fuid_dirtied) 1899209962Smm zfs_fuid_txhold(zfsvfs, tx); 1900209962Smm if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) 1901168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1902168404Spjd 0, SPA_MAXBLOCKSIZE); 1903209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 1904168404Spjd if (error) { 1905209962Smm zfs_acl_ids_free(&acl_ids); 1906168404Spjd zfs_dirent_unlock(dl); 1907209962Smm if (error == ERESTART) { 1908168404Spjd dmu_tx_wait(tx); 1909168404Spjd dmu_tx_abort(tx); 1910168404Spjd goto top; 1911168404Spjd } 1912168404Spjd dmu_tx_abort(tx); 1913168404Spjd ZFS_EXIT(zfsvfs); 1914168404Spjd return (error); 1915168404Spjd } 1916168404Spjd 1917168404Spjd /* 1918168404Spjd * Create new node. 1919168404Spjd */ 1920209962Smm zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); 1921168404Spjd 1922209962Smm if (fuid_dirtied) 1923209962Smm zfs_fuid_sync(zfsvfs, tx); 1924168404Spjd /* 1925168404Spjd * Now put new name in parent dir. 1926168404Spjd */ 1927168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 1928168404Spjd 1929168404Spjd *vpp = ZTOV(zp); 1930168404Spjd 1931185029Spjd txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 1932185029Spjd if (flags & FIGNORECASE) 1933185029Spjd txtype |= TX_CI; 1934209962Smm zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 1935209962Smm acl_ids.z_fuidp, vap); 1936185029Spjd 1937209962Smm zfs_acl_ids_free(&acl_ids); 1938168404Spjd dmu_tx_commit(tx); 1939168404Spjd 1940168404Spjd zfs_dirent_unlock(dl); 1941168404Spjd 1942168404Spjd ZFS_EXIT(zfsvfs); 1943168404Spjd return (0); 1944168404Spjd} 1945168404Spjd 1946168404Spjd/* 1947168404Spjd * Remove a directory subdir entry. If the current working 1948168404Spjd * directory is the same as the subdir to be removed, the 1949168404Spjd * remove will fail. 1950168404Spjd * 1951168404Spjd * IN: dvp - vnode of directory to remove from. 1952168404Spjd * name - name of directory to be removed. 1953168404Spjd * cwd - vnode of current working directory. 1954168404Spjd * cr - credentials of caller. 1955185029Spjd * ct - caller context 1956185029Spjd * flags - case flags 1957168404Spjd * 1958168404Spjd * RETURN: 0 if success 1959168404Spjd * error code if failure 1960168404Spjd * 1961168404Spjd * Timestamps: 1962168404Spjd * dvp - ctime|mtime updated 1963168404Spjd */ 1964185029Spjd/*ARGSUSED*/ 1965168404Spjdstatic int 1966185029Spjdzfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 1967185029Spjd caller_context_t *ct, int flags) 1968168404Spjd{ 1969168404Spjd znode_t *dzp = VTOZ(dvp); 1970168404Spjd znode_t *zp; 1971168404Spjd vnode_t *vp; 1972168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1973185029Spjd zilog_t *zilog; 1974168404Spjd zfs_dirlock_t *dl; 1975168404Spjd dmu_tx_t *tx; 1976168404Spjd int error; 1977185029Spjd int zflg = ZEXISTS; 1978168404Spjd 1979168962Spjd ZFS_ENTER(zfsvfs); 1980185029Spjd ZFS_VERIFY_ZP(dzp); 1981185029Spjd zilog = zfsvfs->z_log; 1982168404Spjd 1983185029Spjd if (flags & FIGNORECASE) 1984185029Spjd zflg |= ZCILOOK; 1985168404Spjdtop: 1986168404Spjd zp = NULL; 1987168404Spjd 1988168404Spjd /* 1989168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 1990168404Spjd */ 1991185029Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1992185029Spjd NULL, NULL)) { 1993168404Spjd ZFS_EXIT(zfsvfs); 1994168404Spjd return (error); 1995168404Spjd } 1996168404Spjd 1997168404Spjd vp = ZTOV(zp); 1998168404Spjd 1999168404Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2000168404Spjd goto out; 2001168404Spjd } 2002168404Spjd 2003168962Spjd if (vp->v_type != VDIR) { 2004168962Spjd error = ENOTDIR; 2005168962Spjd goto out; 2006168962Spjd } 2007168962Spjd 2008168962Spjd if (vp == cwd) { 2009168962Spjd error = EINVAL; 2010168962Spjd goto out; 2011168962Spjd } 2012168962Spjd 2013185029Spjd vnevent_rmdir(vp, dvp, name, ct); 2014168962Spjd 2015168404Spjd /* 2016168404Spjd * Grab a lock on the directory to make sure that noone is 2017168404Spjd * trying to add (or lookup) entries while we are removing it. 2018168404Spjd */ 2019168404Spjd rw_enter(&zp->z_name_lock, RW_WRITER); 2020168404Spjd 2021168404Spjd /* 2022168404Spjd * Grab a lock on the parent pointer to make sure we play well 2023168404Spjd * with the treewalk and directory rename code. 2024168404Spjd */ 2025168404Spjd rw_enter(&zp->z_parent_lock, RW_WRITER); 2026168404Spjd 2027168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2028168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2029168404Spjd dmu_tx_hold_bonus(tx, zp->z_id); 2030168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2031209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 2032168404Spjd if (error) { 2033168404Spjd rw_exit(&zp->z_parent_lock); 2034168404Spjd rw_exit(&zp->z_name_lock); 2035168404Spjd zfs_dirent_unlock(dl); 2036168962Spjd VN_RELE(vp); 2037209962Smm if (error == ERESTART) { 2038168404Spjd dmu_tx_wait(tx); 2039168404Spjd dmu_tx_abort(tx); 2040168404Spjd goto top; 2041168404Spjd } 2042168404Spjd dmu_tx_abort(tx); 2043168404Spjd ZFS_EXIT(zfsvfs); 2044168404Spjd return (error); 2045168404Spjd } 2046168404Spjd 2047168404Spjd#ifdef FREEBSD_NAMECACHE 2048168404Spjd cache_purge(dvp); 2049168404Spjd#endif 2050168404Spjd 2051185029Spjd error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 2052168404Spjd 2053185029Spjd if (error == 0) { 2054185029Spjd uint64_t txtype = TX_RMDIR; 2055185029Spjd if (flags & FIGNORECASE) 2056185029Spjd txtype |= TX_CI; 2057185029Spjd zfs_log_remove(zilog, tx, txtype, dzp, name); 2058185029Spjd } 2059168404Spjd 2060168404Spjd dmu_tx_commit(tx); 2061168404Spjd 2062168404Spjd rw_exit(&zp->z_parent_lock); 2063168404Spjd rw_exit(&zp->z_name_lock); 2064168404Spjd#ifdef FREEBSD_NAMECACHE 2065168404Spjd cache_purge(vp); 2066168404Spjd#endif 2067168404Spjdout: 2068168404Spjd zfs_dirent_unlock(dl); 2069168404Spjd 2070168962Spjd VN_RELE(vp); 2071168962Spjd 2072168404Spjd ZFS_EXIT(zfsvfs); 2073168404Spjd return (error); 2074168404Spjd} 2075168404Spjd 2076168404Spjd/* 2077168404Spjd * Read as many directory entries as will fit into the provided 2078168404Spjd * buffer from the given directory cursor position (specified in 2079168404Spjd * the uio structure. 2080168404Spjd * 2081168404Spjd * IN: vp - vnode of directory to read. 2082168404Spjd * uio - structure supplying read location, range info, 2083168404Spjd * and return buffer. 2084168404Spjd * cr - credentials of caller. 2085185029Spjd * ct - caller context 2086185029Spjd * flags - case flags 2087168404Spjd * 2088168404Spjd * OUT: uio - updated offset and range, buffer filled. 2089168404Spjd * eofp - set to true if end-of-file detected. 2090168404Spjd * 2091168404Spjd * RETURN: 0 if success 2092168404Spjd * error code if failure 2093168404Spjd * 2094168404Spjd * Timestamps: 2095168404Spjd * vp - atime updated 2096168404Spjd * 2097168404Spjd * Note that the low 4 bits of the cookie returned by zap is always zero. 2098168404Spjd * This allows us to use the low range for "special" directory entries: 2099168404Spjd * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2100168404Spjd * we use the offset 2 for the '.zfs' directory. 2101168404Spjd */ 2102168404Spjd/* ARGSUSED */ 2103168404Spjdstatic int 2104168962Spjdzfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) 2105168404Spjd{ 2106168404Spjd znode_t *zp = VTOZ(vp); 2107168404Spjd iovec_t *iovp; 2108185029Spjd edirent_t *eodp; 2109168404Spjd dirent64_t *odp; 2110168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2111168404Spjd objset_t *os; 2112168404Spjd caddr_t outbuf; 2113168404Spjd size_t bufsize; 2114168404Spjd zap_cursor_t zc; 2115168404Spjd zap_attribute_t zap; 2116168404Spjd uint_t bytes_wanted; 2117168404Spjd uint64_t offset; /* must be unsigned; checks for < 1 */ 2118168404Spjd int local_eof; 2119168404Spjd int outcount; 2120168404Spjd int error; 2121168404Spjd uint8_t prefetch; 2122185029Spjd boolean_t check_sysattrs; 2123168404Spjd uint8_t type; 2124168962Spjd int ncooks; 2125168962Spjd u_long *cooks = NULL; 2126185029Spjd int flags = 0; 2127168404Spjd 2128168404Spjd ZFS_ENTER(zfsvfs); 2129185029Spjd ZFS_VERIFY_ZP(zp); 2130168404Spjd 2131168404Spjd /* 2132168404Spjd * If we are not given an eof variable, 2133168404Spjd * use a local one. 2134168404Spjd */ 2135168404Spjd if (eofp == NULL) 2136168404Spjd eofp = &local_eof; 2137168404Spjd 2138168404Spjd /* 2139168404Spjd * Check for valid iov_len. 2140168404Spjd */ 2141168404Spjd if (uio->uio_iov->iov_len <= 0) { 2142168404Spjd ZFS_EXIT(zfsvfs); 2143168404Spjd return (EINVAL); 2144168404Spjd } 2145168404Spjd 2146168404Spjd /* 2147168404Spjd * Quit if directory has been removed (posix) 2148168404Spjd */ 2149168404Spjd if ((*eofp = zp->z_unlinked) != 0) { 2150168404Spjd ZFS_EXIT(zfsvfs); 2151168404Spjd return (0); 2152168404Spjd } 2153168404Spjd 2154168404Spjd error = 0; 2155168404Spjd os = zfsvfs->z_os; 2156168404Spjd offset = uio->uio_loffset; 2157168404Spjd prefetch = zp->z_zn_prefetch; 2158168404Spjd 2159168404Spjd /* 2160168404Spjd * Initialize the iterator cursor. 2161168404Spjd */ 2162168404Spjd if (offset <= 3) { 2163168404Spjd /* 2164168404Spjd * Start iteration from the beginning of the directory. 2165168404Spjd */ 2166168404Spjd zap_cursor_init(&zc, os, zp->z_id); 2167168404Spjd } else { 2168168404Spjd /* 2169168404Spjd * The offset is a serialized cursor. 2170168404Spjd */ 2171168404Spjd zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2172168404Spjd } 2173168404Spjd 2174168404Spjd /* 2175168404Spjd * Get space to change directory entries into fs independent format. 2176168404Spjd */ 2177168404Spjd iovp = uio->uio_iov; 2178168404Spjd bytes_wanted = iovp->iov_len; 2179168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2180168404Spjd bufsize = bytes_wanted; 2181168404Spjd outbuf = kmem_alloc(bufsize, KM_SLEEP); 2182168404Spjd odp = (struct dirent64 *)outbuf; 2183168404Spjd } else { 2184168404Spjd bufsize = bytes_wanted; 2185168404Spjd odp = (struct dirent64 *)iovp->iov_base; 2186168404Spjd } 2187185029Spjd eodp = (struct edirent *)odp; 2188168404Spjd 2189169170Spjd if (ncookies != NULL) { 2190168404Spjd /* 2191168404Spjd * Minimum entry size is dirent size and 1 byte for a file name. 2192168404Spjd */ 2193168962Spjd ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2194168962Spjd cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2195169170Spjd *cookies = cooks; 2196168962Spjd *ncookies = ncooks; 2197168404Spjd } 2198185029Spjd /* 2199185029Spjd * If this VFS supports the system attribute view interface; and 2200185029Spjd * we're looking at an extended attribute directory; and we care 2201185029Spjd * about normalization conflicts on this vfs; then we must check 2202185029Spjd * for normalization conflicts with the sysattr name space. 2203185029Spjd */ 2204185029Spjd#ifdef TODO 2205185029Spjd check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2206185029Spjd (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2207185029Spjd (flags & V_RDDIR_ENTFLAGS); 2208185029Spjd#else 2209185029Spjd check_sysattrs = 0; 2210185029Spjd#endif 2211168404Spjd 2212168404Spjd /* 2213168404Spjd * Transform to file-system independent format 2214168404Spjd */ 2215168404Spjd outcount = 0; 2216168404Spjd while (outcount < bytes_wanted) { 2217168404Spjd ino64_t objnum; 2218168404Spjd ushort_t reclen; 2219185029Spjd off64_t *next; 2220168404Spjd 2221168404Spjd /* 2222168404Spjd * Special case `.', `..', and `.zfs'. 2223168404Spjd */ 2224168404Spjd if (offset == 0) { 2225168404Spjd (void) strcpy(zap.za_name, "."); 2226185029Spjd zap.za_normalization_conflict = 0; 2227168404Spjd objnum = zp->z_id; 2228169108Spjd type = DT_DIR; 2229168404Spjd } else if (offset == 1) { 2230168404Spjd (void) strcpy(zap.za_name, ".."); 2231185029Spjd zap.za_normalization_conflict = 0; 2232168404Spjd objnum = zp->z_phys->zp_parent; 2233169108Spjd type = DT_DIR; 2234168404Spjd } else if (offset == 2 && zfs_show_ctldir(zp)) { 2235168404Spjd (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2236185029Spjd zap.za_normalization_conflict = 0; 2237168404Spjd objnum = ZFSCTL_INO_ROOT; 2238169108Spjd type = DT_DIR; 2239168404Spjd } else { 2240168404Spjd /* 2241168404Spjd * Grab next entry. 2242168404Spjd */ 2243168404Spjd if (error = zap_cursor_retrieve(&zc, &zap)) { 2244168404Spjd if ((*eofp = (error == ENOENT)) != 0) 2245168404Spjd break; 2246168404Spjd else 2247168404Spjd goto update; 2248168404Spjd } 2249168404Spjd 2250168404Spjd if (zap.za_integer_length != 8 || 2251168404Spjd zap.za_num_integers != 1) { 2252168404Spjd cmn_err(CE_WARN, "zap_readdir: bad directory " 2253168404Spjd "entry, obj = %lld, offset = %lld\n", 2254168404Spjd (u_longlong_t)zp->z_id, 2255168404Spjd (u_longlong_t)offset); 2256168404Spjd error = ENXIO; 2257168404Spjd goto update; 2258168404Spjd } 2259168404Spjd 2260168404Spjd objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2261168404Spjd /* 2262168404Spjd * MacOS X can extract the object type here such as: 2263168404Spjd * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2264168404Spjd */ 2265168404Spjd type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2266185029Spjd 2267185029Spjd if (check_sysattrs && !zap.za_normalization_conflict) { 2268185029Spjd#ifdef TODO 2269185029Spjd zap.za_normalization_conflict = 2270185029Spjd xattr_sysattr_casechk(zap.za_name); 2271185029Spjd#else 2272185029Spjd panic("%s:%u: TODO", __func__, __LINE__); 2273185029Spjd#endif 2274185029Spjd } 2275168404Spjd } 2276168404Spjd 2277185029Spjd if (flags & V_RDDIR_ENTFLAGS) 2278185029Spjd reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2279185029Spjd else 2280185029Spjd reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2281185029Spjd 2282168404Spjd /* 2283168404Spjd * Will this entry fit in the buffer? 2284168404Spjd */ 2285168404Spjd if (outcount + reclen > bufsize) { 2286168404Spjd /* 2287168404Spjd * Did we manage to fit anything in the buffer? 2288168404Spjd */ 2289168404Spjd if (!outcount) { 2290168404Spjd error = EINVAL; 2291168404Spjd goto update; 2292168404Spjd } 2293168404Spjd break; 2294168404Spjd } 2295185029Spjd if (flags & V_RDDIR_ENTFLAGS) { 2296185029Spjd /* 2297185029Spjd * Add extended flag entry: 2298185029Spjd */ 2299185029Spjd eodp->ed_ino = objnum; 2300185029Spjd eodp->ed_reclen = reclen; 2301185029Spjd /* NOTE: ed_off is the offset for the *next* entry */ 2302185029Spjd next = &(eodp->ed_off); 2303185029Spjd eodp->ed_eflags = zap.za_normalization_conflict ? 2304185029Spjd ED_CASE_CONFLICT : 0; 2305185029Spjd (void) strncpy(eodp->ed_name, zap.za_name, 2306185029Spjd EDIRENT_NAMELEN(reclen)); 2307185029Spjd eodp = (edirent_t *)((intptr_t)eodp + reclen); 2308185029Spjd } else { 2309185029Spjd /* 2310185029Spjd * Add normal entry: 2311185029Spjd */ 2312185029Spjd odp->d_ino = objnum; 2313185029Spjd odp->d_reclen = reclen; 2314185029Spjd odp->d_namlen = strlen(zap.za_name); 2315185029Spjd (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2316185029Spjd odp->d_type = type; 2317185029Spjd odp = (dirent64_t *)((intptr_t)odp + reclen); 2318185029Spjd } 2319168404Spjd outcount += reclen; 2320168404Spjd 2321168404Spjd ASSERT(outcount <= bufsize); 2322168404Spjd 2323168404Spjd /* Prefetch znode */ 2324168404Spjd if (prefetch) 2325168404Spjd dmu_prefetch(os, objnum, 0, 0); 2326168404Spjd 2327168404Spjd /* 2328168404Spjd * Move to the next entry, fill in the previous offset. 2329168404Spjd */ 2330168404Spjd if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2331168404Spjd zap_cursor_advance(&zc); 2332168404Spjd offset = zap_cursor_serialize(&zc); 2333168404Spjd } else { 2334168404Spjd offset += 1; 2335168404Spjd } 2336168404Spjd 2337168962Spjd if (cooks != NULL) { 2338168962Spjd *cooks++ = offset; 2339168962Spjd ncooks--; 2340168962Spjd KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2341168404Spjd } 2342168404Spjd } 2343168404Spjd zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2344168404Spjd 2345168404Spjd /* Subtract unused cookies */ 2346168962Spjd if (ncookies != NULL) 2347168962Spjd *ncookies -= ncooks; 2348168404Spjd 2349168404Spjd if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2350168404Spjd iovp->iov_base += outcount; 2351168404Spjd iovp->iov_len -= outcount; 2352168404Spjd uio->uio_resid -= outcount; 2353168404Spjd } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2354168404Spjd /* 2355168404Spjd * Reset the pointer. 2356168404Spjd */ 2357168404Spjd offset = uio->uio_loffset; 2358168404Spjd } 2359168404Spjd 2360168404Spjdupdate: 2361168404Spjd zap_cursor_fini(&zc); 2362168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2363168404Spjd kmem_free(outbuf, bufsize); 2364168404Spjd 2365168404Spjd if (error == ENOENT) 2366168404Spjd error = 0; 2367168404Spjd 2368168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2369168404Spjd 2370168404Spjd uio->uio_loffset = offset; 2371168404Spjd ZFS_EXIT(zfsvfs); 2372169107Spjd if (error != 0 && cookies != NULL) { 2373168962Spjd free(*cookies, M_TEMP); 2374168962Spjd *cookies = NULL; 2375168962Spjd *ncookies = 0; 2376168404Spjd } 2377168404Spjd return (error); 2378168404Spjd} 2379168404Spjd 2380185029Spjdulong_t zfs_fsync_sync_cnt = 4; 2381185029Spjd 2382168404Spjdstatic int 2383185029Spjdzfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2384168404Spjd{ 2385168962Spjd znode_t *zp = VTOZ(vp); 2386168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2387168404Spjd 2388185029Spjd (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2389185029Spjd 2390168404Spjd ZFS_ENTER(zfsvfs); 2391185029Spjd ZFS_VERIFY_ZP(zp); 2392168404Spjd zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); 2393168404Spjd ZFS_EXIT(zfsvfs); 2394168404Spjd return (0); 2395168404Spjd} 2396168404Spjd 2397185029Spjd 2398168404Spjd/* 2399168404Spjd * Get the requested file attributes and place them in the provided 2400168404Spjd * vattr structure. 2401168404Spjd * 2402168404Spjd * IN: vp - vnode of file. 2403168404Spjd * vap - va_mask identifies requested attributes. 2404185029Spjd * If AT_XVATTR set, then optional attrs are requested 2405185029Spjd * flags - ATTR_NOACLCHECK (CIFS server context) 2406168404Spjd * cr - credentials of caller. 2407185029Spjd * ct - caller context 2408168404Spjd * 2409168404Spjd * OUT: vap - attribute values. 2410168404Spjd * 2411168404Spjd * RETURN: 0 (always succeeds) 2412168404Spjd */ 2413168404Spjd/* ARGSUSED */ 2414168404Spjdstatic int 2415185029Spjdzfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2416185029Spjd caller_context_t *ct) 2417168404Spjd{ 2418168962Spjd znode_t *zp = VTOZ(vp); 2419168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2420185029Spjd znode_phys_t *pzp; 2421185029Spjd int error = 0; 2422168962Spjd uint32_t blksize; 2423168962Spjd u_longlong_t nblocks; 2424185029Spjd uint64_t links; 2425185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2426185029Spjd xoptattr_t *xoap = NULL; 2427185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2428168404Spjd 2429168404Spjd ZFS_ENTER(zfsvfs); 2430185029Spjd ZFS_VERIFY_ZP(zp); 2431185029Spjd pzp = zp->z_phys; 2432168404Spjd 2433168404Spjd /* 2434185029Spjd * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2435185029Spjd * Also, if we are the owner don't bother, since owner should 2436185029Spjd * always be allowed to read basic attributes of file. 2437185029Spjd */ 2438185029Spjd if (!(pzp->zp_flags & ZFS_ACL_TRIVIAL) && 2439185029Spjd (pzp->zp_uid != crgetuid(cr))) { 2440185029Spjd if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2441185029Spjd skipaclchk, cr)) { 2442185029Spjd ZFS_EXIT(zfsvfs); 2443185029Spjd return (error); 2444185029Spjd } 2445185029Spjd } 2446185029Spjd 2447185029Spjd /* 2448168404Spjd * Return all attributes. It's cheaper to provide the answer 2449168404Spjd * than to determine whether we were asked the question. 2450168404Spjd */ 2451168404Spjd 2452209097Smm mutex_enter(&zp->z_lock); 2453168404Spjd vap->va_type = IFTOVT(pzp->zp_mode); 2454168404Spjd vap->va_mode = pzp->zp_mode & ~S_IFMT; 2455185029Spjd zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2456185029Spjd// vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2457168404Spjd vap->va_nodeid = zp->z_id; 2458185029Spjd if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2459185029Spjd links = pzp->zp_links + 1; 2460185029Spjd else 2461185029Spjd links = pzp->zp_links; 2462185029Spjd vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ 2463168404Spjd vap->va_size = pzp->zp_size; 2464168404Spjd vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 2465168958Spjd vap->va_rdev = zfs_cmpldev(pzp->zp_rdev); 2466168404Spjd vap->va_seq = zp->z_seq; 2467168404Spjd vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2468168404Spjd 2469185029Spjd /* 2470185029Spjd * Add in any requested optional attributes and the create time. 2471185029Spjd * Also set the corresponding bits in the returned attribute bitmap. 2472185029Spjd */ 2473185029Spjd if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2474185029Spjd if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2475185029Spjd xoap->xoa_archive = 2476185029Spjd ((pzp->zp_flags & ZFS_ARCHIVE) != 0); 2477185029Spjd XVA_SET_RTN(xvap, XAT_ARCHIVE); 2478185029Spjd } 2479185029Spjd 2480185029Spjd if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2481185029Spjd xoap->xoa_readonly = 2482185029Spjd ((pzp->zp_flags & ZFS_READONLY) != 0); 2483185029Spjd XVA_SET_RTN(xvap, XAT_READONLY); 2484185029Spjd } 2485185029Spjd 2486185029Spjd if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2487185029Spjd xoap->xoa_system = 2488185029Spjd ((pzp->zp_flags & ZFS_SYSTEM) != 0); 2489185029Spjd XVA_SET_RTN(xvap, XAT_SYSTEM); 2490185029Spjd } 2491185029Spjd 2492185029Spjd if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2493185029Spjd xoap->xoa_hidden = 2494185029Spjd ((pzp->zp_flags & ZFS_HIDDEN) != 0); 2495185029Spjd XVA_SET_RTN(xvap, XAT_HIDDEN); 2496185029Spjd } 2497185029Spjd 2498185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2499185029Spjd xoap->xoa_nounlink = 2500185029Spjd ((pzp->zp_flags & ZFS_NOUNLINK) != 0); 2501185029Spjd XVA_SET_RTN(xvap, XAT_NOUNLINK); 2502185029Spjd } 2503185029Spjd 2504185029Spjd if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2505185029Spjd xoap->xoa_immutable = 2506185029Spjd ((pzp->zp_flags & ZFS_IMMUTABLE) != 0); 2507185029Spjd XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2508185029Spjd } 2509185029Spjd 2510185029Spjd if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2511185029Spjd xoap->xoa_appendonly = 2512185029Spjd ((pzp->zp_flags & ZFS_APPENDONLY) != 0); 2513185029Spjd XVA_SET_RTN(xvap, XAT_APPENDONLY); 2514185029Spjd } 2515185029Spjd 2516185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2517185029Spjd xoap->xoa_nodump = 2518185029Spjd ((pzp->zp_flags & ZFS_NODUMP) != 0); 2519185029Spjd XVA_SET_RTN(xvap, XAT_NODUMP); 2520185029Spjd } 2521185029Spjd 2522185029Spjd if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2523185029Spjd xoap->xoa_opaque = 2524185029Spjd ((pzp->zp_flags & ZFS_OPAQUE) != 0); 2525185029Spjd XVA_SET_RTN(xvap, XAT_OPAQUE); 2526185029Spjd } 2527185029Spjd 2528185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2529185029Spjd xoap->xoa_av_quarantined = 2530185029Spjd ((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0); 2531185029Spjd XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2532185029Spjd } 2533185029Spjd 2534185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2535185029Spjd xoap->xoa_av_modified = 2536185029Spjd ((pzp->zp_flags & ZFS_AV_MODIFIED) != 0); 2537185029Spjd XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2538185029Spjd } 2539185029Spjd 2540185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2541185029Spjd vp->v_type == VREG && 2542185029Spjd (pzp->zp_flags & ZFS_BONUS_SCANSTAMP)) { 2543185029Spjd size_t len; 2544185029Spjd dmu_object_info_t doi; 2545185029Spjd 2546185029Spjd /* 2547185029Spjd * Only VREG files have anti-virus scanstamps, so we 2548185029Spjd * won't conflict with symlinks in the bonus buffer. 2549185029Spjd */ 2550185029Spjd dmu_object_info_from_db(zp->z_dbuf, &doi); 2551185029Spjd len = sizeof (xoap->xoa_av_scanstamp) + 2552185029Spjd sizeof (znode_phys_t); 2553185029Spjd if (len <= doi.doi_bonus_size) { 2554185029Spjd /* 2555185029Spjd * pzp points to the start of the 2556185029Spjd * znode_phys_t. pzp + 1 points to the 2557185029Spjd * first byte after the znode_phys_t. 2558185029Spjd */ 2559185029Spjd (void) memcpy(xoap->xoa_av_scanstamp, 2560185029Spjd pzp + 1, 2561185029Spjd sizeof (xoap->xoa_av_scanstamp)); 2562185029Spjd XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); 2563185029Spjd } 2564185029Spjd } 2565185029Spjd 2566185029Spjd if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 2567185029Spjd ZFS_TIME_DECODE(&xoap->xoa_createtime, pzp->zp_crtime); 2568185029Spjd XVA_SET_RTN(xvap, XAT_CREATETIME); 2569185029Spjd } 2570185029Spjd } 2571185029Spjd 2572168404Spjd ZFS_TIME_DECODE(&vap->va_atime, pzp->zp_atime); 2573168404Spjd ZFS_TIME_DECODE(&vap->va_mtime, pzp->zp_mtime); 2574168404Spjd ZFS_TIME_DECODE(&vap->va_ctime, pzp->zp_ctime); 2575168404Spjd ZFS_TIME_DECODE(&vap->va_birthtime, pzp->zp_crtime); 2576168404Spjd 2577168404Spjd mutex_exit(&zp->z_lock); 2578168404Spjd 2579168404Spjd dmu_object_size_from_db(zp->z_dbuf, &blksize, &nblocks); 2580168404Spjd vap->va_blksize = blksize; 2581168404Spjd vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2582168404Spjd 2583168404Spjd if (zp->z_blksz == 0) { 2584168404Spjd /* 2585168404Spjd * Block size hasn't been set; suggest maximal I/O transfers. 2586168404Spjd */ 2587168404Spjd vap->va_blksize = zfsvfs->z_max_blksz; 2588168404Spjd } 2589168404Spjd 2590168404Spjd ZFS_EXIT(zfsvfs); 2591168404Spjd return (0); 2592168404Spjd} 2593168404Spjd 2594168404Spjd/* 2595168404Spjd * Set the file attributes to the values contained in the 2596168404Spjd * vattr structure. 2597168404Spjd * 2598168404Spjd * IN: vp - vnode of file to be modified. 2599168404Spjd * vap - new attribute values. 2600185029Spjd * If AT_XVATTR set, then optional attrs are being set 2601168404Spjd * flags - ATTR_UTIME set if non-default time values provided. 2602185029Spjd * - ATTR_NOACLCHECK (CIFS context only). 2603168404Spjd * cr - credentials of caller. 2604185029Spjd * ct - caller context 2605168404Spjd * 2606168404Spjd * RETURN: 0 if success 2607168404Spjd * error code if failure 2608168404Spjd * 2609168404Spjd * Timestamps: 2610168404Spjd * vp - ctime updated, mtime updated if size changed. 2611168404Spjd */ 2612168404Spjd/* ARGSUSED */ 2613168404Spjdstatic int 2614168962Spjdzfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2615168962Spjd caller_context_t *ct) 2616168404Spjd{ 2617185029Spjd znode_t *zp = VTOZ(vp); 2618185029Spjd znode_phys_t *pzp; 2619168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2620185029Spjd zilog_t *zilog; 2621168404Spjd dmu_tx_t *tx; 2622168404Spjd vattr_t oldva; 2623209962Smm xvattr_t tmpxvattr; 2624168962Spjd uint_t mask = vap->va_mask; 2625168404Spjd uint_t saved_mask; 2626197831Spjd uint64_t saved_mode; 2627168404Spjd int trim_mask = 0; 2628168404Spjd uint64_t new_mode; 2629209962Smm uint64_t new_uid, new_gid; 2630168404Spjd znode_t *attrzp; 2631168404Spjd int need_policy = FALSE; 2632168404Spjd int err; 2633185029Spjd zfs_fuid_info_t *fuidp = NULL; 2634185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2635185029Spjd xoptattr_t *xoap; 2636185029Spjd zfs_acl_t *aclp = NULL; 2637185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2638209962Smm boolean_t fuid_dirtied = B_FALSE; 2639168404Spjd 2640168404Spjd if (mask == 0) 2641168404Spjd return (0); 2642168404Spjd 2643168962Spjd if (mask & AT_NOSET) 2644168962Spjd return (EINVAL); 2645168962Spjd 2646185029Spjd ZFS_ENTER(zfsvfs); 2647185029Spjd ZFS_VERIFY_ZP(zp); 2648185029Spjd 2649185029Spjd pzp = zp->z_phys; 2650185029Spjd zilog = zfsvfs->z_log; 2651185029Spjd 2652185029Spjd /* 2653185029Spjd * Make sure that if we have ephemeral uid/gid or xvattr specified 2654185029Spjd * that file system is at proper version level 2655185029Spjd */ 2656185029Spjd 2657185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2658185029Spjd (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2659185029Spjd ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 2660185029Spjd (mask & AT_XVATTR))) { 2661185029Spjd ZFS_EXIT(zfsvfs); 2662185029Spjd return (EINVAL); 2663185029Spjd } 2664185029Spjd 2665185029Spjd if (mask & AT_SIZE && vp->v_type == VDIR) { 2666185029Spjd ZFS_EXIT(zfsvfs); 2667168404Spjd return (EISDIR); 2668185029Spjd } 2669168404Spjd 2670185029Spjd if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 2671185029Spjd ZFS_EXIT(zfsvfs); 2672168404Spjd return (EINVAL); 2673185029Spjd } 2674168404Spjd 2675185029Spjd /* 2676185029Spjd * If this is an xvattr_t, then get a pointer to the structure of 2677185029Spjd * optional attributes. If this is NULL, then we have a vattr_t. 2678185029Spjd */ 2679185029Spjd xoap = xva_getxoptattr(xvap); 2680168404Spjd 2681209962Smm xva_init(&tmpxvattr); 2682209962Smm 2683185029Spjd /* 2684185029Spjd * Immutable files can only alter immutable bit and atime 2685185029Spjd */ 2686185029Spjd if ((pzp->zp_flags & ZFS_IMMUTABLE) && 2687185029Spjd ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 2688185029Spjd ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 2689185029Spjd ZFS_EXIT(zfsvfs); 2690185029Spjd return (EPERM); 2691185029Spjd } 2692185029Spjd 2693185029Spjd if ((mask & AT_SIZE) && (pzp->zp_flags & ZFS_READONLY)) { 2694185029Spjd ZFS_EXIT(zfsvfs); 2695185029Spjd return (EPERM); 2696185029Spjd } 2697185029Spjd 2698185029Spjd /* 2699185029Spjd * Verify timestamps doesn't overflow 32 bits. 2700185029Spjd * ZFS can handle large timestamps, but 32bit syscalls can't 2701185029Spjd * handle times greater than 2039. This check should be removed 2702185029Spjd * once large timestamps are fully supported. 2703185029Spjd */ 2704185029Spjd if (mask & (AT_ATIME | AT_MTIME)) { 2705185029Spjd if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 2706185029Spjd ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 2707185029Spjd ZFS_EXIT(zfsvfs); 2708185029Spjd return (EOVERFLOW); 2709185029Spjd } 2710185029Spjd } 2711185029Spjd 2712168404Spjdtop: 2713168404Spjd attrzp = NULL; 2714168404Spjd 2715168404Spjd if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2716168404Spjd ZFS_EXIT(zfsvfs); 2717168404Spjd return (EROFS); 2718168404Spjd } 2719168404Spjd 2720168404Spjd /* 2721168404Spjd * First validate permissions 2722168404Spjd */ 2723168404Spjd 2724168404Spjd if (mask & AT_SIZE) { 2725185029Spjd err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr); 2726168404Spjd if (err) { 2727168404Spjd ZFS_EXIT(zfsvfs); 2728168404Spjd return (err); 2729168404Spjd } 2730168404Spjd /* 2731168404Spjd * XXX - Note, we are not providing any open 2732168404Spjd * mode flags here (like FNDELAY), so we may 2733168404Spjd * block if there are locks present... this 2734168404Spjd * should be addressed in openat(). 2735168404Spjd */ 2736185029Spjd /* XXX - would it be OK to generate a log record here? */ 2737185029Spjd err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 2738168404Spjd if (err) { 2739168404Spjd ZFS_EXIT(zfsvfs); 2740168404Spjd return (err); 2741168404Spjd } 2742168404Spjd } 2743168404Spjd 2744185029Spjd if (mask & (AT_ATIME|AT_MTIME) || 2745185029Spjd ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2746185029Spjd XVA_ISSET_REQ(xvap, XAT_READONLY) || 2747185029Spjd XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2748185029Spjd XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 2749185029Spjd XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) 2750185029Spjd need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2751185029Spjd skipaclchk, cr); 2752168404Spjd 2753168404Spjd if (mask & (AT_UID|AT_GID)) { 2754168404Spjd int idmask = (mask & (AT_UID|AT_GID)); 2755168404Spjd int take_owner; 2756168404Spjd int take_group; 2757168404Spjd 2758168404Spjd /* 2759168404Spjd * NOTE: even if a new mode is being set, 2760168404Spjd * we may clear S_ISUID/S_ISGID bits. 2761168404Spjd */ 2762168404Spjd 2763168404Spjd if (!(mask & AT_MODE)) 2764168404Spjd vap->va_mode = pzp->zp_mode; 2765168404Spjd 2766168404Spjd /* 2767168404Spjd * Take ownership or chgrp to group we are a member of 2768168404Spjd */ 2769168404Spjd 2770168404Spjd take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 2771185029Spjd take_group = (mask & AT_GID) && 2772185029Spjd zfs_groupmember(zfsvfs, vap->va_gid, cr); 2773168404Spjd 2774168404Spjd /* 2775168404Spjd * If both AT_UID and AT_GID are set then take_owner and 2776168404Spjd * take_group must both be set in order to allow taking 2777168404Spjd * ownership. 2778168404Spjd * 2779168404Spjd * Otherwise, send the check through secpolicy_vnode_setattr() 2780168404Spjd * 2781168404Spjd */ 2782168404Spjd 2783168404Spjd if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2784168404Spjd ((idmask == AT_UID) && take_owner) || 2785168404Spjd ((idmask == AT_GID) && take_group)) { 2786185029Spjd if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 2787185029Spjd skipaclchk, cr) == 0) { 2788168404Spjd /* 2789168404Spjd * Remove setuid/setgid for non-privileged users 2790168404Spjd */ 2791185029Spjd secpolicy_setid_clear(vap, vp, cr); 2792168404Spjd trim_mask = (mask & (AT_UID|AT_GID)); 2793168404Spjd } else { 2794168404Spjd need_policy = TRUE; 2795168404Spjd } 2796168404Spjd } else { 2797168404Spjd need_policy = TRUE; 2798168404Spjd } 2799168404Spjd } 2800168404Spjd 2801168404Spjd mutex_enter(&zp->z_lock); 2802168404Spjd oldva.va_mode = pzp->zp_mode; 2803185029Spjd zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 2804185029Spjd if (mask & AT_XVATTR) { 2805209962Smm /* 2806209962Smm * Update xvattr mask to include only those attributes 2807209962Smm * that are actually changing. 2808209962Smm * 2809209962Smm * the bits will be restored prior to actually setting 2810209962Smm * the attributes so the caller thinks they were set. 2811209962Smm */ 2812209962Smm if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2813209962Smm if (xoap->xoa_appendonly != 2814209962Smm ((pzp->zp_flags & ZFS_APPENDONLY) != 0)) { 2815209962Smm need_policy = TRUE; 2816209962Smm } else { 2817209962Smm XVA_CLR_REQ(xvap, XAT_APPENDONLY); 2818209962Smm XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 2819209962Smm } 2820209962Smm } 2821209962Smm 2822209962Smm if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2823209962Smm if (xoap->xoa_nounlink != 2824209962Smm ((pzp->zp_flags & ZFS_NOUNLINK) != 0)) { 2825209962Smm need_policy = TRUE; 2826209962Smm } else { 2827209962Smm XVA_CLR_REQ(xvap, XAT_NOUNLINK); 2828209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 2829209962Smm } 2830209962Smm } 2831209962Smm 2832209962Smm if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2833209962Smm if (xoap->xoa_immutable != 2834209962Smm ((pzp->zp_flags & ZFS_IMMUTABLE) != 0)) { 2835209962Smm need_policy = TRUE; 2836209962Smm } else { 2837209962Smm XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 2838209962Smm XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 2839209962Smm } 2840209962Smm } 2841209962Smm 2842209962Smm if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2843209962Smm if (xoap->xoa_nodump != 2844209962Smm ((pzp->zp_flags & ZFS_NODUMP) != 0)) { 2845209962Smm need_policy = TRUE; 2846209962Smm } else { 2847209962Smm XVA_CLR_REQ(xvap, XAT_NODUMP); 2848209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 2849209962Smm } 2850209962Smm } 2851209962Smm 2852209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2853209962Smm if (xoap->xoa_av_modified != 2854209962Smm ((pzp->zp_flags & ZFS_AV_MODIFIED) != 0)) { 2855209962Smm need_policy = TRUE; 2856209962Smm } else { 2857209962Smm XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 2858209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 2859209962Smm } 2860209962Smm } 2861209962Smm 2862209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2863209962Smm if ((vp->v_type != VREG && 2864209962Smm xoap->xoa_av_quarantined) || 2865209962Smm xoap->xoa_av_quarantined != 2866209962Smm ((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0)) { 2867209962Smm need_policy = TRUE; 2868209962Smm } else { 2869209962Smm XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 2870209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 2871209962Smm } 2872209962Smm } 2873209962Smm 2874209962Smm if (need_policy == FALSE && 2875209962Smm (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 2876209962Smm XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 2877185029Spjd need_policy = TRUE; 2878185029Spjd } 2879185029Spjd } 2880185029Spjd 2881168404Spjd mutex_exit(&zp->z_lock); 2882168404Spjd 2883168404Spjd if (mask & AT_MODE) { 2884185029Spjd if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 2885168962Spjd err = secpolicy_setid_setsticky_clear(vp, vap, 2886168962Spjd &oldva, cr); 2887168962Spjd if (err) { 2888168962Spjd ZFS_EXIT(zfsvfs); 2889168962Spjd return (err); 2890168962Spjd } 2891168404Spjd trim_mask |= AT_MODE; 2892168404Spjd } else { 2893168404Spjd need_policy = TRUE; 2894168404Spjd } 2895168404Spjd } 2896168404Spjd 2897168404Spjd if (need_policy) { 2898168404Spjd /* 2899168404Spjd * If trim_mask is set then take ownership 2900168404Spjd * has been granted or write_acl is present and user 2901168404Spjd * has the ability to modify mode. In that case remove 2902168404Spjd * UID|GID and or MODE from mask so that 2903168404Spjd * secpolicy_vnode_setattr() doesn't revoke it. 2904168404Spjd */ 2905168404Spjd 2906168404Spjd if (trim_mask) { 2907168404Spjd saved_mask = vap->va_mask; 2908168404Spjd vap->va_mask &= ~trim_mask; 2909197831Spjd if (trim_mask & AT_MODE) { 2910197831Spjd /* 2911197831Spjd * Save the mode, as secpolicy_vnode_setattr() 2912197831Spjd * will overwrite it with ova.va_mode. 2913197831Spjd */ 2914197831Spjd saved_mode = vap->va_mode; 2915197831Spjd } 2916168404Spjd } 2917168404Spjd err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 2918185029Spjd (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 2919168404Spjd if (err) { 2920168404Spjd ZFS_EXIT(zfsvfs); 2921168404Spjd return (err); 2922168404Spjd } 2923168404Spjd 2924197831Spjd if (trim_mask) { 2925168404Spjd vap->va_mask |= saved_mask; 2926197831Spjd if (trim_mask & AT_MODE) { 2927197831Spjd /* 2928197831Spjd * Recover the mode after 2929197831Spjd * secpolicy_vnode_setattr(). 2930197831Spjd */ 2931197831Spjd vap->va_mode = saved_mode; 2932197831Spjd } 2933197831Spjd } 2934168404Spjd } 2935168404Spjd 2936168404Spjd /* 2937168404Spjd * secpolicy_vnode_setattr, or take ownership may have 2938168404Spjd * changed va_mask 2939168404Spjd */ 2940168404Spjd mask = vap->va_mask; 2941168404Spjd 2942168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2943168404Spjd dmu_tx_hold_bonus(tx, zp->z_id); 2944168404Spjd 2945168404Spjd if (mask & AT_MODE) { 2946168404Spjd uint64_t pmode = pzp->zp_mode; 2947168404Spjd 2948168404Spjd new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 2949168404Spjd 2950209962Smm if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 2951209962Smm goto out; 2952185029Spjd if (pzp->zp_acl.z_acl_extern_obj) { 2953185029Spjd /* Are we upgrading ACL from old V0 format to new V1 */ 2954185029Spjd if (zfsvfs->z_version <= ZPL_VERSION_FUID && 2955185029Spjd pzp->zp_acl.z_acl_version == 2956185029Spjd ZFS_ACL_VERSION_INITIAL) { 2957185029Spjd dmu_tx_hold_free(tx, 2958185029Spjd pzp->zp_acl.z_acl_extern_obj, 0, 2959185029Spjd DMU_OBJECT_END); 2960185029Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2961185029Spjd 0, aclp->z_acl_bytes); 2962185029Spjd } else { 2963185029Spjd dmu_tx_hold_write(tx, 2964185029Spjd pzp->zp_acl.z_acl_extern_obj, 0, 2965185029Spjd aclp->z_acl_bytes); 2966185029Spjd } 2967185029Spjd } else if (aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2968168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2969185029Spjd 0, aclp->z_acl_bytes); 2970185029Spjd } 2971168404Spjd } 2972168404Spjd 2973209962Smm if (mask & (AT_UID | AT_GID)) { 2974209962Smm if (pzp->zp_xattr) { 2975209962Smm err = zfs_zget(zp->z_zfsvfs, pzp->zp_xattr, &attrzp); 2976209962Smm if (err) 2977209962Smm goto out; 2978209962Smm dmu_tx_hold_bonus(tx, attrzp->z_id); 2979168404Spjd } 2980209962Smm if (mask & AT_UID) { 2981209962Smm new_uid = zfs_fuid_create(zfsvfs, 2982209962Smm (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 2983209962Smm if (new_uid != pzp->zp_uid && 2984209962Smm zfs_usergroup_overquota(zfsvfs, B_FALSE, new_uid)) { 2985209962Smm err = EDQUOT; 2986209962Smm goto out; 2987209962Smm } 2988209962Smm } 2989209962Smm 2990209962Smm if (mask & AT_GID) { 2991209962Smm new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 2992209962Smm cr, ZFS_GROUP, &fuidp); 2993209962Smm if (new_gid != pzp->zp_gid && 2994209962Smm zfs_usergroup_overquota(zfsvfs, B_TRUE, new_gid)) { 2995209962Smm err = EDQUOT; 2996209962Smm goto out; 2997209962Smm } 2998209962Smm } 2999209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 3000209962Smm if (fuid_dirtied) { 3001209962Smm if (zfsvfs->z_fuid_obj == 0) { 3002209962Smm dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 3003209962Smm dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 3004209962Smm FUID_SIZE_ESTIMATE(zfsvfs)); 3005209962Smm dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 3006209962Smm FALSE, NULL); 3007209962Smm } else { 3008209962Smm dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj); 3009209962Smm dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0, 3010209962Smm FUID_SIZE_ESTIMATE(zfsvfs)); 3011209962Smm } 3012209962Smm } 3013168404Spjd } 3014168404Spjd 3015209962Smm err = dmu_tx_assign(tx, TXG_NOWAIT); 3016168404Spjd if (err) { 3017209962Smm if (err == ERESTART) 3018168404Spjd dmu_tx_wait(tx); 3019209962Smm goto out; 3020168404Spjd } 3021168404Spjd 3022168404Spjd dmu_buf_will_dirty(zp->z_dbuf, tx); 3023168404Spjd 3024168404Spjd /* 3025168404Spjd * Set each attribute requested. 3026168404Spjd * We group settings according to the locks they need to acquire. 3027168404Spjd * 3028168404Spjd * Note: you cannot set ctime directly, although it will be 3029168404Spjd * updated as a side-effect of calling this function. 3030168404Spjd */ 3031168404Spjd 3032168404Spjd mutex_enter(&zp->z_lock); 3033168404Spjd 3034168404Spjd if (mask & AT_MODE) { 3035185029Spjd mutex_enter(&zp->z_acl_lock); 3036185029Spjd zp->z_phys->zp_mode = new_mode; 3037209962Smm err = zfs_aclset_common(zp, aclp, cr, tx); 3038168404Spjd ASSERT3U(err, ==, 0); 3039185029Spjd mutex_exit(&zp->z_acl_lock); 3040168404Spjd } 3041168404Spjd 3042168404Spjd if (attrzp) 3043168404Spjd mutex_enter(&attrzp->z_lock); 3044168404Spjd 3045168404Spjd if (mask & AT_UID) { 3046209962Smm pzp->zp_uid = new_uid; 3047209962Smm if (attrzp) 3048209962Smm attrzp->z_phys->zp_uid = new_uid; 3049168404Spjd } 3050168404Spjd 3051168404Spjd if (mask & AT_GID) { 3052209962Smm pzp->zp_gid = new_gid; 3053168404Spjd if (attrzp) 3054209962Smm attrzp->z_phys->zp_gid = new_gid; 3055168404Spjd } 3056168404Spjd 3057168404Spjd if (attrzp) 3058168404Spjd mutex_exit(&attrzp->z_lock); 3059168404Spjd 3060168404Spjd if (mask & AT_ATIME) 3061168404Spjd ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime); 3062168404Spjd 3063168404Spjd if (mask & AT_MTIME) 3064168404Spjd ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime); 3065168404Spjd 3066185029Spjd /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3067168404Spjd if (mask & AT_SIZE) 3068168404Spjd zfs_time_stamper_locked(zp, CONTENT_MODIFIED, tx); 3069168404Spjd else if (mask != 0) 3070168404Spjd zfs_time_stamper_locked(zp, STATE_CHANGED, tx); 3071185029Spjd /* 3072185029Spjd * Do this after setting timestamps to prevent timestamp 3073185029Spjd * update from toggling bit 3074185029Spjd */ 3075168404Spjd 3076185029Spjd if (xoap && (mask & AT_XVATTR)) { 3077209962Smm 3078209962Smm /* 3079209962Smm * restore trimmed off masks 3080209962Smm * so that return masks can be set for caller. 3081209962Smm */ 3082209962Smm 3083209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3084209962Smm XVA_SET_REQ(xvap, XAT_APPENDONLY); 3085209962Smm } 3086209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3087209962Smm XVA_SET_REQ(xvap, XAT_NOUNLINK); 3088209962Smm } 3089209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3090209962Smm XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3091209962Smm } 3092209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3093209962Smm XVA_SET_REQ(xvap, XAT_NODUMP); 3094209962Smm } 3095209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3096209962Smm XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3097209962Smm } 3098209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3099209962Smm XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3100209962Smm } 3101209962Smm 3102185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { 3103185029Spjd size_t len; 3104185029Spjd dmu_object_info_t doi; 3105185029Spjd 3106185029Spjd ASSERT(vp->v_type == VREG); 3107185029Spjd 3108185029Spjd /* Grow the bonus buffer if necessary. */ 3109185029Spjd dmu_object_info_from_db(zp->z_dbuf, &doi); 3110185029Spjd len = sizeof (xoap->xoa_av_scanstamp) + 3111185029Spjd sizeof (znode_phys_t); 3112185029Spjd if (len > doi.doi_bonus_size) 3113185029Spjd VERIFY(dmu_set_bonus(zp->z_dbuf, len, tx) == 0); 3114185029Spjd } 3115185029Spjd zfs_xvattr_set(zp, xvap); 3116185029Spjd } 3117185029Spjd 3118209962Smm if (fuid_dirtied) 3119209962Smm zfs_fuid_sync(zfsvfs, tx); 3120209962Smm 3121168404Spjd if (mask != 0) 3122185029Spjd zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3123168404Spjd 3124168404Spjd mutex_exit(&zp->z_lock); 3125168404Spjd 3126209962Smmout: 3127168404Spjd if (attrzp) 3128168404Spjd VN_RELE(ZTOV(attrzp)); 3129168404Spjd 3130209962Smm if (aclp) { 3131209962Smm zfs_acl_free(aclp); 3132209962Smm aclp = NULL; 3133209962Smm } 3134168404Spjd 3135209962Smm if (fuidp) { 3136209962Smm zfs_fuid_info_free(fuidp); 3137209962Smm fuidp = NULL; 3138209962Smm } 3139209962Smm 3140209962Smm if (err) 3141209962Smm dmu_tx_abort(tx); 3142209962Smm else 3143209962Smm dmu_tx_commit(tx); 3144209962Smm 3145209962Smm if (err == ERESTART) 3146209962Smm goto top; 3147209962Smm 3148168404Spjd ZFS_EXIT(zfsvfs); 3149168404Spjd return (err); 3150168404Spjd} 3151168404Spjd 3152168404Spjdtypedef struct zfs_zlock { 3153168404Spjd krwlock_t *zl_rwlock; /* lock we acquired */ 3154168404Spjd znode_t *zl_znode; /* znode we held */ 3155168404Spjd struct zfs_zlock *zl_next; /* next in list */ 3156168404Spjd} zfs_zlock_t; 3157168404Spjd 3158168404Spjd/* 3159168404Spjd * Drop locks and release vnodes that were held by zfs_rename_lock(). 3160168404Spjd */ 3161168404Spjdstatic void 3162168404Spjdzfs_rename_unlock(zfs_zlock_t **zlpp) 3163168404Spjd{ 3164168404Spjd zfs_zlock_t *zl; 3165168404Spjd 3166168404Spjd while ((zl = *zlpp) != NULL) { 3167168404Spjd if (zl->zl_znode != NULL) 3168168404Spjd VN_RELE(ZTOV(zl->zl_znode)); 3169168404Spjd rw_exit(zl->zl_rwlock); 3170168404Spjd *zlpp = zl->zl_next; 3171168404Spjd kmem_free(zl, sizeof (*zl)); 3172168404Spjd } 3173168404Spjd} 3174168404Spjd 3175168404Spjd/* 3176168404Spjd * Search back through the directory tree, using the ".." entries. 3177168404Spjd * Lock each directory in the chain to prevent concurrent renames. 3178168404Spjd * Fail any attempt to move a directory into one of its own descendants. 3179168404Spjd * XXX - z_parent_lock can overlap with map or grow locks 3180168404Spjd */ 3181168404Spjdstatic int 3182168404Spjdzfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 3183168404Spjd{ 3184168404Spjd zfs_zlock_t *zl; 3185168404Spjd znode_t *zp = tdzp; 3186168404Spjd uint64_t rootid = zp->z_zfsvfs->z_root; 3187168404Spjd uint64_t *oidp = &zp->z_id; 3188168404Spjd krwlock_t *rwlp = &szp->z_parent_lock; 3189168404Spjd krw_t rw = RW_WRITER; 3190168404Spjd 3191168404Spjd /* 3192168404Spjd * First pass write-locks szp and compares to zp->z_id. 3193168404Spjd * Later passes read-lock zp and compare to zp->z_parent. 3194168404Spjd */ 3195168404Spjd do { 3196168404Spjd if (!rw_tryenter(rwlp, rw)) { 3197168404Spjd /* 3198168404Spjd * Another thread is renaming in this path. 3199168404Spjd * Note that if we are a WRITER, we don't have any 3200168404Spjd * parent_locks held yet. 3201168404Spjd */ 3202168404Spjd if (rw == RW_READER && zp->z_id > szp->z_id) { 3203168404Spjd /* 3204168404Spjd * Drop our locks and restart 3205168404Spjd */ 3206168404Spjd zfs_rename_unlock(&zl); 3207168404Spjd *zlpp = NULL; 3208168404Spjd zp = tdzp; 3209168404Spjd oidp = &zp->z_id; 3210168404Spjd rwlp = &szp->z_parent_lock; 3211168404Spjd rw = RW_WRITER; 3212168404Spjd continue; 3213168404Spjd } else { 3214168404Spjd /* 3215168404Spjd * Wait for other thread to drop its locks 3216168404Spjd */ 3217168404Spjd rw_enter(rwlp, rw); 3218168404Spjd } 3219168404Spjd } 3220168404Spjd 3221168404Spjd zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3222168404Spjd zl->zl_rwlock = rwlp; 3223168404Spjd zl->zl_znode = NULL; 3224168404Spjd zl->zl_next = *zlpp; 3225168404Spjd *zlpp = zl; 3226168404Spjd 3227168404Spjd if (*oidp == szp->z_id) /* We're a descendant of szp */ 3228168404Spjd return (EINVAL); 3229168404Spjd 3230168404Spjd if (*oidp == rootid) /* We've hit the top */ 3231168404Spjd return (0); 3232168404Spjd 3233168404Spjd if (rw == RW_READER) { /* i.e. not the first pass */ 3234168404Spjd int error = zfs_zget(zp->z_zfsvfs, *oidp, &zp); 3235168404Spjd if (error) 3236168404Spjd return (error); 3237168404Spjd zl->zl_znode = zp; 3238168404Spjd } 3239168404Spjd oidp = &zp->z_phys->zp_parent; 3240168404Spjd rwlp = &zp->z_parent_lock; 3241168404Spjd rw = RW_READER; 3242168404Spjd 3243168404Spjd } while (zp->z_id != sdzp->z_id); 3244168404Spjd 3245168404Spjd return (0); 3246168404Spjd} 3247168404Spjd 3248168404Spjd/* 3249168404Spjd * Move an entry from the provided source directory to the target 3250168404Spjd * directory. Change the entry name as indicated. 3251168404Spjd * 3252168404Spjd * IN: sdvp - Source directory containing the "old entry". 3253168404Spjd * snm - Old entry name. 3254168404Spjd * tdvp - Target directory to contain the "new entry". 3255168404Spjd * tnm - New entry name. 3256168404Spjd * cr - credentials of caller. 3257185029Spjd * ct - caller context 3258185029Spjd * flags - case flags 3259168404Spjd * 3260168404Spjd * RETURN: 0 if success 3261168404Spjd * error code if failure 3262168404Spjd * 3263168404Spjd * Timestamps: 3264168404Spjd * sdvp,tdvp - ctime|mtime updated 3265168404Spjd */ 3266185029Spjd/*ARGSUSED*/ 3267168404Spjdstatic int 3268185029Spjdzfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3269185029Spjd caller_context_t *ct, int flags) 3270168404Spjd{ 3271168404Spjd znode_t *tdzp, *szp, *tzp; 3272168404Spjd znode_t *sdzp = VTOZ(sdvp); 3273168404Spjd zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 3274185029Spjd zilog_t *zilog; 3275168962Spjd vnode_t *realvp; 3276168404Spjd zfs_dirlock_t *sdl, *tdl; 3277168404Spjd dmu_tx_t *tx; 3278168404Spjd zfs_zlock_t *zl; 3279185029Spjd int cmp, serr, terr; 3280185029Spjd int error = 0; 3281185029Spjd int zflg = 0; 3282168404Spjd 3283168404Spjd ZFS_ENTER(zfsvfs); 3284185029Spjd ZFS_VERIFY_ZP(sdzp); 3285185029Spjd zilog = zfsvfs->z_log; 3286168404Spjd 3287168962Spjd /* 3288168962Spjd * Make sure we have the real vp for the target directory. 3289168962Spjd */ 3290185029Spjd if (VOP_REALVP(tdvp, &realvp, ct) == 0) 3291168962Spjd tdvp = realvp; 3292168962Spjd 3293168404Spjd if (tdvp->v_vfsp != sdvp->v_vfsp) { 3294168404Spjd ZFS_EXIT(zfsvfs); 3295168962Spjd return (EXDEV); 3296168404Spjd } 3297168404Spjd 3298168404Spjd tdzp = VTOZ(tdvp); 3299185029Spjd ZFS_VERIFY_ZP(tdzp); 3300185029Spjd if (zfsvfs->z_utf8 && u8_validate(tnm, 3301185029Spjd strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3302185029Spjd ZFS_EXIT(zfsvfs); 3303185029Spjd return (EILSEQ); 3304185029Spjd } 3305185029Spjd 3306185029Spjd if (flags & FIGNORECASE) 3307185029Spjd zflg |= ZCILOOK; 3308185029Spjd 3309168404Spjdtop: 3310168404Spjd szp = NULL; 3311168404Spjd tzp = NULL; 3312168404Spjd zl = NULL; 3313168404Spjd 3314168404Spjd /* 3315168404Spjd * This is to prevent the creation of links into attribute space 3316168404Spjd * by renaming a linked file into/outof an attribute directory. 3317168404Spjd * See the comment in zfs_link() for why this is considered bad. 3318168404Spjd */ 3319168404Spjd if ((tdzp->z_phys->zp_flags & ZFS_XATTR) != 3320168404Spjd (sdzp->z_phys->zp_flags & ZFS_XATTR)) { 3321168962Spjd ZFS_EXIT(zfsvfs); 3322168962Spjd return (EINVAL); 3323168404Spjd } 3324168404Spjd 3325168404Spjd /* 3326168404Spjd * Lock source and target directory entries. To prevent deadlock, 3327168404Spjd * a lock ordering must be defined. We lock the directory with 3328168404Spjd * the smallest object id first, or if it's a tie, the one with 3329168404Spjd * the lexically first name. 3330168404Spjd */ 3331168404Spjd if (sdzp->z_id < tdzp->z_id) { 3332168962Spjd cmp = -1; 3333168962Spjd } else if (sdzp->z_id > tdzp->z_id) { 3334168962Spjd cmp = 1; 3335168962Spjd } else { 3336185029Spjd /* 3337185029Spjd * First compare the two name arguments without 3338185029Spjd * considering any case folding. 3339185029Spjd */ 3340185029Spjd int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3341185029Spjd 3342185029Spjd cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3343185029Spjd ASSERT(error == 0 || !zfsvfs->z_utf8); 3344168962Spjd if (cmp == 0) { 3345168962Spjd /* 3346168962Spjd * POSIX: "If the old argument and the new argument 3347168962Spjd * both refer to links to the same existing file, 3348168962Spjd * the rename() function shall return successfully 3349168962Spjd * and perform no other action." 3350168962Spjd */ 3351168962Spjd ZFS_EXIT(zfsvfs); 3352168962Spjd return (0); 3353168962Spjd } 3354185029Spjd /* 3355185029Spjd * If the file system is case-folding, then we may 3356185029Spjd * have some more checking to do. A case-folding file 3357185029Spjd * system is either supporting mixed case sensitivity 3358185029Spjd * access or is completely case-insensitive. Note 3359185029Spjd * that the file system is always case preserving. 3360185029Spjd * 3361185029Spjd * In mixed sensitivity mode case sensitive behavior 3362185029Spjd * is the default. FIGNORECASE must be used to 3363185029Spjd * explicitly request case insensitive behavior. 3364185029Spjd * 3365185029Spjd * If the source and target names provided differ only 3366185029Spjd * by case (e.g., a request to rename 'tim' to 'Tim'), 3367185029Spjd * we will treat this as a special case in the 3368185029Spjd * case-insensitive mode: as long as the source name 3369185029Spjd * is an exact match, we will allow this to proceed as 3370185029Spjd * a name-change request. 3371185029Spjd */ 3372185029Spjd if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3373185029Spjd (zfsvfs->z_case == ZFS_CASE_MIXED && 3374185029Spjd flags & FIGNORECASE)) && 3375185029Spjd u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3376185029Spjd &error) == 0) { 3377185029Spjd /* 3378185029Spjd * case preserving rename request, require exact 3379185029Spjd * name matches 3380185029Spjd */ 3381185029Spjd zflg |= ZCIEXACT; 3382185029Spjd zflg &= ~ZCILOOK; 3383185029Spjd } 3384168962Spjd } 3385185029Spjd 3386208131Smm /* 3387208131Smm * If the source and destination directories are the same, we should 3388208131Smm * grab the z_name_lock of that directory only once. 3389208131Smm */ 3390208131Smm if (sdzp == tdzp) { 3391208131Smm zflg |= ZHAVELOCK; 3392208131Smm rw_enter(&sdzp->z_name_lock, RW_READER); 3393208131Smm } 3394208131Smm 3395168962Spjd if (cmp < 0) { 3396185029Spjd serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3397185029Spjd ZEXISTS | zflg, NULL, NULL); 3398185029Spjd terr = zfs_dirent_lock(&tdl, 3399185029Spjd tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3400168962Spjd } else { 3401185029Spjd terr = zfs_dirent_lock(&tdl, 3402185029Spjd tdzp, tnm, &tzp, zflg, NULL, NULL); 3403185029Spjd serr = zfs_dirent_lock(&sdl, 3404185029Spjd sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3405185029Spjd NULL, NULL); 3406168404Spjd } 3407168404Spjd 3408168962Spjd if (serr) { 3409168404Spjd /* 3410168404Spjd * Source entry invalid or not there. 3411168404Spjd */ 3412168962Spjd if (!terr) { 3413168404Spjd zfs_dirent_unlock(tdl); 3414168962Spjd if (tzp) 3415168962Spjd VN_RELE(ZTOV(tzp)); 3416168962Spjd } 3417208131Smm 3418208131Smm if (sdzp == tdzp) 3419208131Smm rw_exit(&sdzp->z_name_lock); 3420208131Smm 3421168404Spjd if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0) 3422168404Spjd serr = EINVAL; 3423168962Spjd ZFS_EXIT(zfsvfs); 3424168962Spjd return (serr); 3425168404Spjd } 3426168404Spjd if (terr) { 3427168404Spjd zfs_dirent_unlock(sdl); 3428168962Spjd VN_RELE(ZTOV(szp)); 3429208131Smm 3430208131Smm if (sdzp == tdzp) 3431208131Smm rw_exit(&sdzp->z_name_lock); 3432208131Smm 3433168404Spjd if (strcmp(tnm, "..") == 0) 3434168404Spjd terr = EINVAL; 3435168962Spjd ZFS_EXIT(zfsvfs); 3436168962Spjd return (terr); 3437168404Spjd } 3438168404Spjd 3439168404Spjd /* 3440168404Spjd * Must have write access at the source to remove the old entry 3441168404Spjd * and write access at the target to create the new entry. 3442168404Spjd * Note that if target and source are the same, this can be 3443168404Spjd * done in a single check. 3444168404Spjd */ 3445168404Spjd 3446168404Spjd if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3447168404Spjd goto out; 3448168404Spjd 3449168962Spjd if (ZTOV(szp)->v_type == VDIR) { 3450168404Spjd /* 3451168404Spjd * Check to make sure rename is valid. 3452168404Spjd * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3453168404Spjd */ 3454168404Spjd if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3455168404Spjd goto out; 3456168404Spjd } 3457168404Spjd 3458168404Spjd /* 3459168404Spjd * Does target exist? 3460168404Spjd */ 3461168404Spjd if (tzp) { 3462168404Spjd /* 3463168404Spjd * Source and target must be the same type. 3464168404Spjd */ 3465168962Spjd if (ZTOV(szp)->v_type == VDIR) { 3466168962Spjd if (ZTOV(tzp)->v_type != VDIR) { 3467168404Spjd error = ENOTDIR; 3468168404Spjd goto out; 3469168404Spjd } 3470168404Spjd } else { 3471168962Spjd if (ZTOV(tzp)->v_type == VDIR) { 3472168404Spjd error = EISDIR; 3473168404Spjd goto out; 3474168404Spjd } 3475168404Spjd } 3476168404Spjd /* 3477168404Spjd * POSIX dictates that when the source and target 3478168404Spjd * entries refer to the same file object, rename 3479168404Spjd * must do nothing and exit without error. 3480168404Spjd */ 3481168404Spjd if (szp->z_id == tzp->z_id) { 3482168404Spjd error = 0; 3483168404Spjd goto out; 3484168404Spjd } 3485168404Spjd } 3486168404Spjd 3487185029Spjd vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 3488168962Spjd if (tzp) 3489185029Spjd vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3490168962Spjd 3491185029Spjd /* 3492185029Spjd * notify the target directory if it is not the same 3493185029Spjd * as source directory. 3494185029Spjd */ 3495185029Spjd if (tdvp != sdvp) { 3496185029Spjd vnevent_rename_dest_dir(tdvp, ct); 3497185029Spjd } 3498185029Spjd 3499168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 3500168404Spjd dmu_tx_hold_bonus(tx, szp->z_id); /* nlink changes */ 3501168404Spjd dmu_tx_hold_bonus(tx, sdzp->z_id); /* nlink changes */ 3502168404Spjd dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3503168404Spjd dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3504168404Spjd if (sdzp != tdzp) 3505168404Spjd dmu_tx_hold_bonus(tx, tdzp->z_id); /* nlink changes */ 3506168404Spjd if (tzp) 3507168404Spjd dmu_tx_hold_bonus(tx, tzp->z_id); /* parent changes */ 3508168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3509209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 3510168404Spjd if (error) { 3511168404Spjd if (zl != NULL) 3512168404Spjd zfs_rename_unlock(&zl); 3513168404Spjd zfs_dirent_unlock(sdl); 3514168404Spjd zfs_dirent_unlock(tdl); 3515208131Smm 3516208131Smm if (sdzp == tdzp) 3517208131Smm rw_exit(&sdzp->z_name_lock); 3518208131Smm 3519168962Spjd VN_RELE(ZTOV(szp)); 3520168962Spjd if (tzp) 3521168962Spjd VN_RELE(ZTOV(tzp)); 3522209962Smm if (error == ERESTART) { 3523168404Spjd dmu_tx_wait(tx); 3524168404Spjd dmu_tx_abort(tx); 3525168404Spjd goto top; 3526168404Spjd } 3527168404Spjd dmu_tx_abort(tx); 3528168962Spjd ZFS_EXIT(zfsvfs); 3529168962Spjd return (error); 3530168404Spjd } 3531168404Spjd 3532168404Spjd if (tzp) /* Attempt to remove the existing target */ 3533185029Spjd error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 3534168404Spjd 3535168404Spjd if (error == 0) { 3536168404Spjd error = zfs_link_create(tdl, szp, tx, ZRENAMING); 3537168404Spjd if (error == 0) { 3538185029Spjd szp->z_phys->zp_flags |= ZFS_AV_MODIFIED; 3539185029Spjd 3540168404Spjd error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 3541168404Spjd ASSERT(error == 0); 3542185029Spjd 3543185029Spjd zfs_log_rename(zilog, tx, 3544185029Spjd TX_RENAME | (flags & FIGNORECASE ? TX_CI : 0), 3545185029Spjd sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp); 3546185029Spjd 3547185029Spjd /* Update path information for the target vnode */ 3548185029Spjd vn_renamepath(tdvp, ZTOV(szp), tnm, strlen(tnm)); 3549168404Spjd } 3550168404Spjd#ifdef FREEBSD_NAMECACHE 3551168404Spjd if (error == 0) { 3552168404Spjd cache_purge(sdvp); 3553168404Spjd cache_purge(tdvp); 3554168404Spjd } 3555168404Spjd#endif 3556168404Spjd } 3557168404Spjd 3558168404Spjd dmu_tx_commit(tx); 3559168404Spjdout: 3560168404Spjd if (zl != NULL) 3561168404Spjd zfs_rename_unlock(&zl); 3562168404Spjd 3563168404Spjd zfs_dirent_unlock(sdl); 3564168404Spjd zfs_dirent_unlock(tdl); 3565168404Spjd 3566208131Smm if (sdzp == tdzp) 3567208131Smm rw_exit(&sdzp->z_name_lock); 3568208131Smm 3569168962Spjd VN_RELE(ZTOV(szp)); 3570168404Spjd if (tzp) 3571168962Spjd VN_RELE(ZTOV(tzp)); 3572168404Spjd 3573168404Spjd ZFS_EXIT(zfsvfs); 3574168404Spjd 3575168404Spjd return (error); 3576168404Spjd} 3577168404Spjd 3578168404Spjd/* 3579168404Spjd * Insert the indicated symbolic reference entry into the directory. 3580168404Spjd * 3581168404Spjd * IN: dvp - Directory to contain new symbolic link. 3582168404Spjd * link - Name for new symlink entry. 3583168404Spjd * vap - Attributes of new entry. 3584168404Spjd * target - Target path of new symlink. 3585168404Spjd * cr - credentials of caller. 3586185029Spjd * ct - caller context 3587185029Spjd * flags - case flags 3588168404Spjd * 3589168404Spjd * RETURN: 0 if success 3590168404Spjd * error code if failure 3591168404Spjd * 3592168404Spjd * Timestamps: 3593168404Spjd * dvp - ctime|mtime updated 3594168404Spjd */ 3595185029Spjd/*ARGSUSED*/ 3596168404Spjdstatic int 3597185029Spjdzfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 3598185029Spjd cred_t *cr, kthread_t *td) 3599168404Spjd{ 3600168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 3601168404Spjd zfs_dirlock_t *dl; 3602168404Spjd dmu_tx_t *tx; 3603168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3604185029Spjd zilog_t *zilog; 3605168404Spjd int len = strlen(link); 3606168404Spjd int error; 3607185029Spjd int zflg = ZNEW; 3608209962Smm zfs_acl_ids_t acl_ids; 3609209962Smm boolean_t fuid_dirtied; 3610185029Spjd int flags = 0; 3611168404Spjd 3612168962Spjd ASSERT(vap->va_type == VLNK); 3613168404Spjd 3614168404Spjd ZFS_ENTER(zfsvfs); 3615185029Spjd ZFS_VERIFY_ZP(dzp); 3616185029Spjd zilog = zfsvfs->z_log; 3617185029Spjd 3618185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 3619185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3620185029Spjd ZFS_EXIT(zfsvfs); 3621185029Spjd return (EILSEQ); 3622185029Spjd } 3623185029Spjd if (flags & FIGNORECASE) 3624185029Spjd zflg |= ZCILOOK; 3625168404Spjdtop: 3626185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3627168404Spjd ZFS_EXIT(zfsvfs); 3628168404Spjd return (error); 3629168404Spjd } 3630168404Spjd 3631168404Spjd if (len > MAXPATHLEN) { 3632168404Spjd ZFS_EXIT(zfsvfs); 3633168404Spjd return (ENAMETOOLONG); 3634168404Spjd } 3635168404Spjd 3636168404Spjd /* 3637168404Spjd * Attempt to lock directory; fail if entry already exists. 3638168404Spjd */ 3639185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 3640185029Spjd if (error) { 3641168404Spjd ZFS_EXIT(zfsvfs); 3642168404Spjd return (error); 3643168404Spjd } 3644168404Spjd 3645209962Smm VERIFY(0 == zfs_acl_ids_create(dzp, 0, vap, cr, NULL, &acl_ids)); 3646209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 3647209962Smm zfs_acl_ids_free(&acl_ids); 3648209962Smm zfs_dirent_unlock(dl); 3649209962Smm ZFS_EXIT(zfsvfs); 3650209962Smm return (EDQUOT); 3651209962Smm } 3652168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 3653209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 3654168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 3655168404Spjd dmu_tx_hold_bonus(tx, dzp->z_id); 3656168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 3657209962Smm if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) 3658168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE); 3659209962Smm if (fuid_dirtied) 3660209962Smm zfs_fuid_txhold(zfsvfs, tx); 3661209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 3662168404Spjd if (error) { 3663209962Smm zfs_acl_ids_free(&acl_ids); 3664168404Spjd zfs_dirent_unlock(dl); 3665209962Smm if (error == ERESTART) { 3666168404Spjd dmu_tx_wait(tx); 3667168404Spjd dmu_tx_abort(tx); 3668168404Spjd goto top; 3669168404Spjd } 3670168404Spjd dmu_tx_abort(tx); 3671168404Spjd ZFS_EXIT(zfsvfs); 3672168404Spjd return (error); 3673168404Spjd } 3674168404Spjd 3675168404Spjd dmu_buf_will_dirty(dzp->z_dbuf, tx); 3676168404Spjd 3677168404Spjd /* 3678168404Spjd * Create a new object for the symlink. 3679168404Spjd * Put the link content into bonus buffer if it will fit; 3680168404Spjd * otherwise, store it just like any other file data. 3681168404Spjd */ 3682168404Spjd if (sizeof (znode_phys_t) + len <= dmu_bonus_max()) { 3683209962Smm zfs_mknode(dzp, vap, tx, cr, 0, &zp, len, &acl_ids); 3684168404Spjd if (len != 0) 3685168404Spjd bcopy(link, zp->z_phys + 1, len); 3686168404Spjd } else { 3687168404Spjd dmu_buf_t *dbp; 3688168404Spjd 3689209962Smm zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); 3690209962Smm 3691209962Smm if (fuid_dirtied) 3692209962Smm zfs_fuid_sync(zfsvfs, tx); 3693168404Spjd /* 3694168404Spjd * Nothing can access the znode yet so no locking needed 3695168404Spjd * for growing the znode's blocksize. 3696168404Spjd */ 3697168404Spjd zfs_grow_blocksize(zp, len, tx); 3698168404Spjd 3699185029Spjd VERIFY(0 == dmu_buf_hold(zfsvfs->z_os, 3700185029Spjd zp->z_id, 0, FTAG, &dbp)); 3701168404Spjd dmu_buf_will_dirty(dbp, tx); 3702168404Spjd 3703168404Spjd ASSERT3U(len, <=, dbp->db_size); 3704168404Spjd bcopy(link, dbp->db_data, len); 3705168404Spjd dmu_buf_rele(dbp, FTAG); 3706168404Spjd } 3707168404Spjd zp->z_phys->zp_size = len; 3708168404Spjd 3709168404Spjd /* 3710168404Spjd * Insert the new object into the directory. 3711168404Spjd */ 3712168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 3713168404Spjd if (error == 0) { 3714185029Spjd uint64_t txtype = TX_SYMLINK; 3715185029Spjd if (flags & FIGNORECASE) 3716185029Spjd txtype |= TX_CI; 3717185029Spjd zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 3718168962Spjd *vpp = ZTOV(zp); 3719168404Spjd } 3720168404Spjd 3721209962Smm zfs_acl_ids_free(&acl_ids); 3722209962Smm 3723168404Spjd dmu_tx_commit(tx); 3724168404Spjd 3725168404Spjd zfs_dirent_unlock(dl); 3726168404Spjd 3727168404Spjd ZFS_EXIT(zfsvfs); 3728168404Spjd return (error); 3729168404Spjd} 3730168404Spjd 3731168404Spjd/* 3732168404Spjd * Return, in the buffer contained in the provided uio structure, 3733168404Spjd * the symbolic path referred to by vp. 3734168404Spjd * 3735168404Spjd * IN: vp - vnode of symbolic link. 3736168404Spjd * uoip - structure to contain the link path. 3737168404Spjd * cr - credentials of caller. 3738185029Spjd * ct - caller context 3739168404Spjd * 3740168404Spjd * OUT: uio - structure to contain the link path. 3741168404Spjd * 3742168404Spjd * RETURN: 0 if success 3743168404Spjd * error code if failure 3744168404Spjd * 3745168404Spjd * Timestamps: 3746168404Spjd * vp - atime updated 3747168404Spjd */ 3748168404Spjd/* ARGSUSED */ 3749168404Spjdstatic int 3750185029Spjdzfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 3751168404Spjd{ 3752168404Spjd znode_t *zp = VTOZ(vp); 3753168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3754168404Spjd size_t bufsz; 3755168404Spjd int error; 3756168404Spjd 3757168404Spjd ZFS_ENTER(zfsvfs); 3758185029Spjd ZFS_VERIFY_ZP(zp); 3759168404Spjd 3760168404Spjd bufsz = (size_t)zp->z_phys->zp_size; 3761168404Spjd if (bufsz + sizeof (znode_phys_t) <= zp->z_dbuf->db_size) { 3762168404Spjd error = uiomove(zp->z_phys + 1, 3763168404Spjd MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); 3764168404Spjd } else { 3765168404Spjd dmu_buf_t *dbp; 3766168404Spjd error = dmu_buf_hold(zfsvfs->z_os, zp->z_id, 0, FTAG, &dbp); 3767168404Spjd if (error) { 3768168404Spjd ZFS_EXIT(zfsvfs); 3769168404Spjd return (error); 3770168404Spjd } 3771168404Spjd error = uiomove(dbp->db_data, 3772168404Spjd MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); 3773168404Spjd dmu_buf_rele(dbp, FTAG); 3774168404Spjd } 3775168404Spjd 3776168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 3777168404Spjd ZFS_EXIT(zfsvfs); 3778168404Spjd return (error); 3779168404Spjd} 3780168404Spjd 3781168404Spjd/* 3782168404Spjd * Insert a new entry into directory tdvp referencing svp. 3783168404Spjd * 3784168404Spjd * IN: tdvp - Directory to contain new entry. 3785168404Spjd * svp - vnode of new entry. 3786168404Spjd * name - name of new entry. 3787168404Spjd * cr - credentials of caller. 3788185029Spjd * ct - caller context 3789168404Spjd * 3790168404Spjd * RETURN: 0 if success 3791168404Spjd * error code if failure 3792168404Spjd * 3793168404Spjd * Timestamps: 3794168404Spjd * tdvp - ctime|mtime updated 3795168404Spjd * svp - ctime updated 3796168404Spjd */ 3797168404Spjd/* ARGSUSED */ 3798168404Spjdstatic int 3799185029Spjdzfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 3800185029Spjd caller_context_t *ct, int flags) 3801168404Spjd{ 3802168404Spjd znode_t *dzp = VTOZ(tdvp); 3803168404Spjd znode_t *tzp, *szp; 3804168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3805185029Spjd zilog_t *zilog; 3806168404Spjd zfs_dirlock_t *dl; 3807168404Spjd dmu_tx_t *tx; 3808168962Spjd vnode_t *realvp; 3809168404Spjd int error; 3810185029Spjd int zf = ZNEW; 3811185029Spjd uid_t owner; 3812168404Spjd 3813168404Spjd ASSERT(tdvp->v_type == VDIR); 3814168404Spjd 3815168404Spjd ZFS_ENTER(zfsvfs); 3816185029Spjd ZFS_VERIFY_ZP(dzp); 3817185029Spjd zilog = zfsvfs->z_log; 3818168404Spjd 3819185029Spjd if (VOP_REALVP(svp, &realvp, ct) == 0) 3820168962Spjd svp = realvp; 3821168962Spjd 3822168404Spjd if (svp->v_vfsp != tdvp->v_vfsp) { 3823168404Spjd ZFS_EXIT(zfsvfs); 3824168404Spjd return (EXDEV); 3825168404Spjd } 3826185029Spjd szp = VTOZ(svp); 3827185029Spjd ZFS_VERIFY_ZP(szp); 3828168404Spjd 3829185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, 3830185029Spjd strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3831185029Spjd ZFS_EXIT(zfsvfs); 3832185029Spjd return (EILSEQ); 3833185029Spjd } 3834185029Spjd if (flags & FIGNORECASE) 3835185029Spjd zf |= ZCILOOK; 3836185029Spjd 3837168404Spjdtop: 3838168404Spjd /* 3839168404Spjd * We do not support links between attributes and non-attributes 3840168404Spjd * because of the potential security risk of creating links 3841168404Spjd * into "normal" file space in order to circumvent restrictions 3842168404Spjd * imposed in attribute space. 3843168404Spjd */ 3844168404Spjd if ((szp->z_phys->zp_flags & ZFS_XATTR) != 3845168404Spjd (dzp->z_phys->zp_flags & ZFS_XATTR)) { 3846168404Spjd ZFS_EXIT(zfsvfs); 3847168404Spjd return (EINVAL); 3848168404Spjd } 3849168404Spjd 3850168404Spjd /* 3851168404Spjd * POSIX dictates that we return EPERM here. 3852168404Spjd * Better choices include ENOTSUP or EISDIR. 3853168404Spjd */ 3854168404Spjd if (svp->v_type == VDIR) { 3855168404Spjd ZFS_EXIT(zfsvfs); 3856168404Spjd return (EPERM); 3857168404Spjd } 3858168404Spjd 3859185029Spjd owner = zfs_fuid_map_id(zfsvfs, szp->z_phys->zp_uid, cr, ZFS_OWNER); 3860185029Spjd if (owner != crgetuid(cr) && 3861185029Spjd secpolicy_basic_link(svp, cr) != 0) { 3862168404Spjd ZFS_EXIT(zfsvfs); 3863168404Spjd return (EPERM); 3864168404Spjd } 3865168404Spjd 3866185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3867168404Spjd ZFS_EXIT(zfsvfs); 3868168404Spjd return (error); 3869168404Spjd } 3870168404Spjd 3871168404Spjd /* 3872168404Spjd * Attempt to lock directory; fail if entry already exists. 3873168404Spjd */ 3874185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 3875185029Spjd if (error) { 3876168404Spjd ZFS_EXIT(zfsvfs); 3877168404Spjd return (error); 3878168404Spjd } 3879168404Spjd 3880168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 3881168404Spjd dmu_tx_hold_bonus(tx, szp->z_id); 3882168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 3883209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 3884168404Spjd if (error) { 3885168404Spjd zfs_dirent_unlock(dl); 3886209962Smm if (error == ERESTART) { 3887168404Spjd dmu_tx_wait(tx); 3888168404Spjd dmu_tx_abort(tx); 3889168404Spjd goto top; 3890168404Spjd } 3891168404Spjd dmu_tx_abort(tx); 3892168404Spjd ZFS_EXIT(zfsvfs); 3893168404Spjd return (error); 3894168404Spjd } 3895168404Spjd 3896168404Spjd error = zfs_link_create(dl, szp, tx, 0); 3897168404Spjd 3898185029Spjd if (error == 0) { 3899185029Spjd uint64_t txtype = TX_LINK; 3900185029Spjd if (flags & FIGNORECASE) 3901185029Spjd txtype |= TX_CI; 3902185029Spjd zfs_log_link(zilog, tx, txtype, dzp, szp, name); 3903185029Spjd } 3904168404Spjd 3905168404Spjd dmu_tx_commit(tx); 3906168404Spjd 3907168404Spjd zfs_dirent_unlock(dl); 3908168404Spjd 3909185029Spjd if (error == 0) { 3910185029Spjd vnevent_link(svp, ct); 3911185029Spjd } 3912185029Spjd 3913168404Spjd ZFS_EXIT(zfsvfs); 3914168404Spjd return (error); 3915168404Spjd} 3916168404Spjd 3917185029Spjd/*ARGSUSED*/ 3918168962Spjdvoid 3919185029Spjdzfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 3920168404Spjd{ 3921168962Spjd znode_t *zp = VTOZ(vp); 3922168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3923168962Spjd int error; 3924168404Spjd 3925185029Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 3926185029Spjd if (zp->z_dbuf == NULL) { 3927185029Spjd /* 3928185029Spjd * The fs has been unmounted, or we did a 3929185029Spjd * suspend/resume and this file no longer exists. 3930185029Spjd */ 3931168404Spjd VI_LOCK(vp); 3932168404Spjd vp->v_count = 0; /* count arrives as 1 */ 3933196299Spjd VI_UNLOCK(vp); 3934196299Spjd vrecycle(vp, curthread); 3935185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 3936168962Spjd return; 3937168404Spjd } 3938168404Spjd 3939168404Spjd if (zp->z_atime_dirty && zp->z_unlinked == 0) { 3940168404Spjd dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 3941168404Spjd 3942168404Spjd dmu_tx_hold_bonus(tx, zp->z_id); 3943168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 3944168404Spjd if (error) { 3945168404Spjd dmu_tx_abort(tx); 3946168404Spjd } else { 3947168404Spjd dmu_buf_will_dirty(zp->z_dbuf, tx); 3948168404Spjd mutex_enter(&zp->z_lock); 3949168404Spjd zp->z_atime_dirty = 0; 3950168404Spjd mutex_exit(&zp->z_lock); 3951168404Spjd dmu_tx_commit(tx); 3952168404Spjd } 3953168404Spjd } 3954168404Spjd 3955168404Spjd zfs_zinactive(zp); 3956185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 3957168404Spjd} 3958168404Spjd 3959168404SpjdCTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 3960168404SpjdCTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 3961168404Spjd 3962185029Spjd/*ARGSUSED*/ 3963168404Spjdstatic int 3964185029Spjdzfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 3965168404Spjd{ 3966168404Spjd znode_t *zp = VTOZ(vp); 3967168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3968185029Spjd uint32_t gen; 3969168404Spjd uint64_t object = zp->z_id; 3970168404Spjd zfid_short_t *zfid; 3971168404Spjd int size, i; 3972168404Spjd 3973168404Spjd ZFS_ENTER(zfsvfs); 3974185029Spjd ZFS_VERIFY_ZP(zp); 3975185029Spjd gen = (uint32_t)zp->z_gen; 3976168404Spjd 3977168404Spjd size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 3978168404Spjd fidp->fid_len = size; 3979168404Spjd 3980168404Spjd zfid = (zfid_short_t *)fidp; 3981168404Spjd 3982168404Spjd zfid->zf_len = size; 3983168404Spjd 3984168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 3985168404Spjd zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 3986168404Spjd 3987168404Spjd /* Must have a non-zero generation number to distinguish from .zfs */ 3988168404Spjd if (gen == 0) 3989168404Spjd gen = 1; 3990168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 3991168404Spjd zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 3992168404Spjd 3993168404Spjd if (size == LONG_FID_LEN) { 3994168404Spjd uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 3995169023Spjd zfid_long_t *zlfid; 3996168404Spjd 3997168404Spjd zlfid = (zfid_long_t *)fidp; 3998168404Spjd 3999168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4000168404Spjd zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4001168404Spjd 4002168404Spjd /* XXX - this should be the generation number for the objset */ 4003168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4004168404Spjd zlfid->zf_setgen[i] = 0; 4005168404Spjd } 4006168404Spjd 4007168404Spjd ZFS_EXIT(zfsvfs); 4008168404Spjd return (0); 4009168404Spjd} 4010168404Spjd 4011168404Spjdstatic int 4012185029Spjdzfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4013185029Spjd caller_context_t *ct) 4014168404Spjd{ 4015168404Spjd znode_t *zp, *xzp; 4016168404Spjd zfsvfs_t *zfsvfs; 4017168404Spjd zfs_dirlock_t *dl; 4018168404Spjd int error; 4019168404Spjd 4020168404Spjd switch (cmd) { 4021168404Spjd case _PC_LINK_MAX: 4022168404Spjd *valp = INT_MAX; 4023168404Spjd return (0); 4024168404Spjd 4025168404Spjd case _PC_FILESIZEBITS: 4026168404Spjd *valp = 64; 4027168404Spjd return (0); 4028168404Spjd 4029168404Spjd#if 0 4030168404Spjd case _PC_XATTR_EXISTS: 4031168404Spjd zp = VTOZ(vp); 4032168404Spjd zfsvfs = zp->z_zfsvfs; 4033168404Spjd ZFS_ENTER(zfsvfs); 4034185029Spjd ZFS_VERIFY_ZP(zp); 4035168404Spjd *valp = 0; 4036168404Spjd error = zfs_dirent_lock(&dl, zp, "", &xzp, 4037185029Spjd ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 4038168404Spjd if (error == 0) { 4039168404Spjd zfs_dirent_unlock(dl); 4040168404Spjd if (!zfs_dirempty(xzp)) 4041168404Spjd *valp = 1; 4042168404Spjd VN_RELE(ZTOV(xzp)); 4043168404Spjd } else if (error == ENOENT) { 4044168404Spjd /* 4045168404Spjd * If there aren't extended attributes, it's the 4046168404Spjd * same as having zero of them. 4047168404Spjd */ 4048168404Spjd error = 0; 4049168404Spjd } 4050168404Spjd ZFS_EXIT(zfsvfs); 4051168404Spjd return (error); 4052168404Spjd#endif 4053168404Spjd 4054168404Spjd case _PC_ACL_EXTENDED: 4055196949Strasz *valp = 0; 4056168404Spjd return (0); 4057168404Spjd 4058196949Strasz case _PC_ACL_NFS4: 4059196949Strasz *valp = 1; 4060196949Strasz return (0); 4061196949Strasz 4062196949Strasz case _PC_ACL_PATH_MAX: 4063196949Strasz *valp = ACL_MAX_ENTRIES; 4064196949Strasz return (0); 4065196949Strasz 4066168404Spjd case _PC_MIN_HOLE_SIZE: 4067168404Spjd *valp = (int)SPA_MINBLOCKSIZE; 4068168404Spjd return (0); 4069168404Spjd 4070168404Spjd default: 4071168962Spjd return (EOPNOTSUPP); 4072168404Spjd } 4073168404Spjd} 4074168404Spjd 4075168404Spjd/*ARGSUSED*/ 4076168404Spjdstatic int 4077185029Spjdzfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4078185029Spjd caller_context_t *ct) 4079168404Spjd{ 4080168404Spjd znode_t *zp = VTOZ(vp); 4081168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4082168404Spjd int error; 4083185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4084168404Spjd 4085168404Spjd ZFS_ENTER(zfsvfs); 4086185029Spjd ZFS_VERIFY_ZP(zp); 4087185029Spjd error = zfs_getacl(zp, vsecp, skipaclchk, cr); 4088168404Spjd ZFS_EXIT(zfsvfs); 4089168404Spjd 4090168404Spjd return (error); 4091168404Spjd} 4092168404Spjd 4093168404Spjd/*ARGSUSED*/ 4094168404Spjdstatic int 4095185029Spjdzfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4096185029Spjd caller_context_t *ct) 4097168404Spjd{ 4098168404Spjd znode_t *zp = VTOZ(vp); 4099168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4100168404Spjd int error; 4101185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4102168404Spjd 4103168404Spjd ZFS_ENTER(zfsvfs); 4104185029Spjd ZFS_VERIFY_ZP(zp); 4105185029Spjd error = zfs_setacl(zp, vsecp, skipaclchk, cr); 4106168404Spjd ZFS_EXIT(zfsvfs); 4107168404Spjd return (error); 4108168404Spjd} 4109168404Spjd 4110168962Spjdstatic int 4111168962Spjdzfs_freebsd_open(ap) 4112168962Spjd struct vop_open_args /* { 4113168962Spjd struct vnode *a_vp; 4114168962Spjd int a_mode; 4115168962Spjd struct ucred *a_cred; 4116168962Spjd struct thread *a_td; 4117168962Spjd } */ *ap; 4118168962Spjd{ 4119168962Spjd vnode_t *vp = ap->a_vp; 4120168962Spjd znode_t *zp = VTOZ(vp); 4121168962Spjd int error; 4122168962Spjd 4123185029Spjd error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); 4124168962Spjd if (error == 0) 4125168962Spjd vnode_create_vobject(vp, zp->z_phys->zp_size, ap->a_td); 4126168962Spjd return (error); 4127168962Spjd} 4128168962Spjd 4129168962Spjdstatic int 4130168962Spjdzfs_freebsd_close(ap) 4131168962Spjd struct vop_close_args /* { 4132168962Spjd struct vnode *a_vp; 4133168962Spjd int a_fflag; 4134168962Spjd struct ucred *a_cred; 4135168962Spjd struct thread *a_td; 4136168962Spjd } */ *ap; 4137168962Spjd{ 4138168962Spjd 4139185029Spjd return (zfs_close(ap->a_vp, ap->a_fflag, 0, 0, ap->a_cred, NULL)); 4140168962Spjd} 4141168962Spjd 4142168962Spjdstatic int 4143168962Spjdzfs_freebsd_ioctl(ap) 4144168962Spjd struct vop_ioctl_args /* { 4145168962Spjd struct vnode *a_vp; 4146168962Spjd u_long a_command; 4147168962Spjd caddr_t a_data; 4148168962Spjd int a_fflag; 4149168962Spjd struct ucred *cred; 4150168962Spjd struct thread *td; 4151168962Spjd } */ *ap; 4152168962Spjd{ 4153168962Spjd 4154168978Spjd return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 4155185029Spjd ap->a_fflag, ap->a_cred, NULL, NULL)); 4156168962Spjd} 4157168962Spjd 4158168962Spjdstatic int 4159168962Spjdzfs_freebsd_read(ap) 4160168962Spjd struct vop_read_args /* { 4161168962Spjd struct vnode *a_vp; 4162168962Spjd struct uio *a_uio; 4163168962Spjd int a_ioflag; 4164168962Spjd struct ucred *a_cred; 4165168962Spjd } */ *ap; 4166168962Spjd{ 4167168962Spjd 4168168962Spjd return (zfs_read(ap->a_vp, ap->a_uio, ap->a_ioflag, ap->a_cred, NULL)); 4169168962Spjd} 4170168962Spjd 4171168962Spjdstatic int 4172168962Spjdzfs_freebsd_write(ap) 4173168962Spjd struct vop_write_args /* { 4174168962Spjd struct vnode *a_vp; 4175168962Spjd struct uio *a_uio; 4176168962Spjd int a_ioflag; 4177168962Spjd struct ucred *a_cred; 4178168962Spjd } */ *ap; 4179168962Spjd{ 4180168962Spjd 4181207745Strasz if (vn_rlimit_fsize(ap->a_vp, ap->a_uio, ap->a_uio->uio_td)) 4182207745Strasz return (EFBIG); 4183207745Strasz 4184168962Spjd return (zfs_write(ap->a_vp, ap->a_uio, ap->a_ioflag, ap->a_cred, NULL)); 4185168962Spjd} 4186168962Spjd 4187168962Spjdstatic int 4188168962Spjdzfs_freebsd_access(ap) 4189168962Spjd struct vop_access_args /* { 4190168962Spjd struct vnode *a_vp; 4191192689Strasz accmode_t a_accmode; 4192168962Spjd struct ucred *a_cred; 4193168962Spjd struct thread *a_td; 4194168962Spjd } */ *ap; 4195168962Spjd{ 4196198703Spjd accmode_t accmode; 4197198703Spjd int error = 0; 4198168962Spjd 4199185172Spjd /* 4200198703Spjd * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 4201185172Spjd */ 4202198703Spjd accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 4203198703Spjd if (accmode != 0) 4204198703Spjd error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL); 4205185172Spjd 4206198703Spjd /* 4207198703Spjd * VADMIN has to be handled by vaccess(). 4208198703Spjd */ 4209198703Spjd if (error == 0) { 4210198703Spjd accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 4211198703Spjd if (accmode != 0) { 4212198703Spjd vnode_t *vp = ap->a_vp; 4213198703Spjd znode_t *zp = VTOZ(vp); 4214198703Spjd znode_phys_t *zphys = zp->z_phys; 4215198703Spjd 4216198703Spjd error = vaccess(vp->v_type, zphys->zp_mode, 4217198703Spjd zphys->zp_uid, zphys->zp_gid, accmode, ap->a_cred, 4218198703Spjd NULL); 4219198703Spjd } 4220185172Spjd } 4221185172Spjd 4222198703Spjd return (error); 4223168962Spjd} 4224168962Spjd 4225168962Spjdstatic int 4226168962Spjdzfs_freebsd_lookup(ap) 4227168962Spjd struct vop_lookup_args /* { 4228168962Spjd struct vnode *a_dvp; 4229168962Spjd struct vnode **a_vpp; 4230168962Spjd struct componentname *a_cnp; 4231168962Spjd } */ *ap; 4232168962Spjd{ 4233168962Spjd struct componentname *cnp = ap->a_cnp; 4234168962Spjd char nm[NAME_MAX + 1]; 4235168962Spjd 4236168962Spjd ASSERT(cnp->cn_namelen < sizeof(nm)); 4237168962Spjd strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 4238168962Spjd 4239168962Spjd return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 4240185029Spjd cnp->cn_cred, cnp->cn_thread, 0)); 4241168962Spjd} 4242168962Spjd 4243168962Spjdstatic int 4244168962Spjdzfs_freebsd_create(ap) 4245168962Spjd struct vop_create_args /* { 4246168962Spjd struct vnode *a_dvp; 4247168962Spjd struct vnode **a_vpp; 4248168962Spjd struct componentname *a_cnp; 4249168962Spjd struct vattr *a_vap; 4250168962Spjd } */ *ap; 4251168962Spjd{ 4252168962Spjd struct componentname *cnp = ap->a_cnp; 4253168962Spjd vattr_t *vap = ap->a_vap; 4254168962Spjd int mode; 4255168962Spjd 4256168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 4257168962Spjd 4258168962Spjd vattr_init_mask(vap); 4259168962Spjd mode = vap->va_mode & ALLPERMS; 4260168962Spjd 4261168962Spjd return (zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 4262185029Spjd ap->a_vpp, cnp->cn_cred, cnp->cn_thread)); 4263168962Spjd} 4264168962Spjd 4265168962Spjdstatic int 4266168962Spjdzfs_freebsd_remove(ap) 4267168962Spjd struct vop_remove_args /* { 4268168962Spjd struct vnode *a_dvp; 4269168962Spjd struct vnode *a_vp; 4270168962Spjd struct componentname *a_cnp; 4271168962Spjd } */ *ap; 4272168962Spjd{ 4273168962Spjd 4274168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4275168962Spjd 4276168962Spjd return (zfs_remove(ap->a_dvp, ap->a_cnp->cn_nameptr, 4277185029Spjd ap->a_cnp->cn_cred, NULL, 0)); 4278168962Spjd} 4279168962Spjd 4280168962Spjdstatic int 4281168962Spjdzfs_freebsd_mkdir(ap) 4282168962Spjd struct vop_mkdir_args /* { 4283168962Spjd struct vnode *a_dvp; 4284168962Spjd struct vnode **a_vpp; 4285168962Spjd struct componentname *a_cnp; 4286168962Spjd struct vattr *a_vap; 4287168962Spjd } */ *ap; 4288168962Spjd{ 4289168962Spjd vattr_t *vap = ap->a_vap; 4290168962Spjd 4291168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4292168962Spjd 4293168962Spjd vattr_init_mask(vap); 4294168962Spjd 4295168962Spjd return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 4296185029Spjd ap->a_cnp->cn_cred, NULL, 0, NULL)); 4297168962Spjd} 4298168962Spjd 4299168962Spjdstatic int 4300168962Spjdzfs_freebsd_rmdir(ap) 4301168962Spjd struct vop_rmdir_args /* { 4302168962Spjd struct vnode *a_dvp; 4303168962Spjd struct vnode *a_vp; 4304168962Spjd struct componentname *a_cnp; 4305168962Spjd } */ *ap; 4306168962Spjd{ 4307168962Spjd struct componentname *cnp = ap->a_cnp; 4308168962Spjd 4309168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 4310168962Spjd 4311185029Spjd return (zfs_rmdir(ap->a_dvp, cnp->cn_nameptr, NULL, cnp->cn_cred, NULL, 0)); 4312168962Spjd} 4313168962Spjd 4314168962Spjdstatic int 4315168962Spjdzfs_freebsd_readdir(ap) 4316168962Spjd struct vop_readdir_args /* { 4317168962Spjd struct vnode *a_vp; 4318168962Spjd struct uio *a_uio; 4319168962Spjd struct ucred *a_cred; 4320168962Spjd int *a_eofflag; 4321168962Spjd int *a_ncookies; 4322168962Spjd u_long **a_cookies; 4323168962Spjd } */ *ap; 4324168962Spjd{ 4325168962Spjd 4326168962Spjd return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 4327168962Spjd ap->a_ncookies, ap->a_cookies)); 4328168962Spjd} 4329168962Spjd 4330168962Spjdstatic int 4331168962Spjdzfs_freebsd_fsync(ap) 4332168962Spjd struct vop_fsync_args /* { 4333168962Spjd struct vnode *a_vp; 4334168962Spjd int a_waitfor; 4335168962Spjd struct thread *a_td; 4336168962Spjd } */ *ap; 4337168962Spjd{ 4338168962Spjd 4339168962Spjd vop_stdfsync(ap); 4340185029Spjd return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); 4341168962Spjd} 4342168962Spjd 4343168962Spjdstatic int 4344168962Spjdzfs_freebsd_getattr(ap) 4345168962Spjd struct vop_getattr_args /* { 4346168962Spjd struct vnode *a_vp; 4347168962Spjd struct vattr *a_vap; 4348168962Spjd struct ucred *a_cred; 4349185029Spjd struct thread *a_td; 4350168962Spjd } */ *ap; 4351168962Spjd{ 4352185029Spjd vattr_t *vap = ap->a_vap; 4353185029Spjd xvattr_t xvap; 4354185029Spjd u_long fflags = 0; 4355185029Spjd int error; 4356168962Spjd 4357185029Spjd xva_init(&xvap); 4358185029Spjd xvap.xva_vattr = *vap; 4359185029Spjd xvap.xva_vattr.va_mask |= AT_XVATTR; 4360185029Spjd 4361185029Spjd /* Convert chflags into ZFS-type flags. */ 4362185029Spjd /* XXX: what about SF_SETTABLE?. */ 4363185029Spjd XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 4364185029Spjd XVA_SET_REQ(&xvap, XAT_APPENDONLY); 4365185029Spjd XVA_SET_REQ(&xvap, XAT_NOUNLINK); 4366185029Spjd XVA_SET_REQ(&xvap, XAT_NODUMP); 4367185029Spjd error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 4368185029Spjd if (error != 0) 4369185029Spjd return (error); 4370185029Spjd 4371185029Spjd /* Convert ZFS xattr into chflags. */ 4372185029Spjd#define FLAG_CHECK(fflag, xflag, xfield) do { \ 4373185029Spjd if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 4374185029Spjd fflags |= (fflag); \ 4375185029Spjd} while (0) 4376185029Spjd FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 4377185029Spjd xvap.xva_xoptattrs.xoa_immutable); 4378185029Spjd FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 4379185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 4380185029Spjd FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 4381185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 4382185029Spjd FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 4383185029Spjd xvap.xva_xoptattrs.xoa_nodump); 4384185029Spjd#undef FLAG_CHECK 4385185029Spjd *vap = xvap.xva_vattr; 4386185029Spjd vap->va_flags = fflags; 4387185029Spjd return (0); 4388168962Spjd} 4389168962Spjd 4390168962Spjdstatic int 4391168962Spjdzfs_freebsd_setattr(ap) 4392168962Spjd struct vop_setattr_args /* { 4393168962Spjd struct vnode *a_vp; 4394168962Spjd struct vattr *a_vap; 4395168962Spjd struct ucred *a_cred; 4396185029Spjd struct thread *a_td; 4397168962Spjd } */ *ap; 4398168962Spjd{ 4399185172Spjd vnode_t *vp = ap->a_vp; 4400168962Spjd vattr_t *vap = ap->a_vap; 4401185172Spjd cred_t *cred = ap->a_cred; 4402185029Spjd xvattr_t xvap; 4403185029Spjd u_long fflags; 4404185029Spjd uint64_t zflags; 4405168962Spjd 4406168962Spjd vattr_init_mask(vap); 4407170044Spjd vap->va_mask &= ~AT_NOSET; 4408168962Spjd 4409185029Spjd xva_init(&xvap); 4410185029Spjd xvap.xva_vattr = *vap; 4411185029Spjd 4412185172Spjd zflags = VTOZ(vp)->z_phys->zp_flags; 4413185172Spjd 4414185029Spjd if (vap->va_flags != VNOVAL) { 4415197683Sdelphij zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 4416185172Spjd int error; 4417185172Spjd 4418197683Sdelphij if (zfsvfs->z_use_fuids == B_FALSE) 4419197683Sdelphij return (EOPNOTSUPP); 4420197683Sdelphij 4421185029Spjd fflags = vap->va_flags; 4422185029Spjd if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_NODUMP)) != 0) 4423185029Spjd return (EOPNOTSUPP); 4424185172Spjd /* 4425185172Spjd * Unprivileged processes are not permitted to unset system 4426185172Spjd * flags, or modify flags if any system flags are set. 4427185172Spjd * Privileged non-jail processes may not modify system flags 4428185172Spjd * if securelevel > 0 and any existing system flags are set. 4429185172Spjd * Privileged jail processes behave like privileged non-jail 4430185172Spjd * processes if the security.jail.chflags_allowed sysctl is 4431185172Spjd * is non-zero; otherwise, they behave like unprivileged 4432185172Spjd * processes. 4433185172Spjd */ 4434197861Spjd if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 4435197861Spjd priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) { 4436185172Spjd if (zflags & 4437185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 4438185172Spjd error = securelevel_gt(cred, 0); 4439197861Spjd if (error != 0) 4440185172Spjd return (error); 4441185172Spjd } 4442185172Spjd } else { 4443197861Spjd /* 4444197861Spjd * Callers may only modify the file flags on objects they 4445197861Spjd * have VADMIN rights for. 4446197861Spjd */ 4447197861Spjd if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0) 4448197861Spjd return (error); 4449185172Spjd if (zflags & 4450185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 4451185172Spjd return (EPERM); 4452185172Spjd } 4453185172Spjd if (fflags & 4454185172Spjd (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 4455185172Spjd return (EPERM); 4456185172Spjd } 4457185172Spjd } 4458185029Spjd 4459185029Spjd#define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 4460185029Spjd if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 4461185029Spjd ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 4462185029Spjd XVA_SET_REQ(&xvap, (xflag)); \ 4463185029Spjd (xfield) = ((fflags & (fflag)) != 0); \ 4464185029Spjd } \ 4465185029Spjd} while (0) 4466185029Spjd /* Convert chflags into ZFS-type flags. */ 4467185029Spjd /* XXX: what about SF_SETTABLE?. */ 4468185029Spjd FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 4469185029Spjd xvap.xva_xoptattrs.xoa_immutable); 4470185029Spjd FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 4471185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 4472185029Spjd FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 4473185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 4474185029Spjd FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 4475185172Spjd xvap.xva_xoptattrs.xoa_nodump); 4476185029Spjd#undef FLAG_CHANGE 4477185029Spjd } 4478185172Spjd return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL)); 4479168962Spjd} 4480168962Spjd 4481168962Spjdstatic int 4482168962Spjdzfs_freebsd_rename(ap) 4483168962Spjd struct vop_rename_args /* { 4484168962Spjd struct vnode *a_fdvp; 4485168962Spjd struct vnode *a_fvp; 4486168962Spjd struct componentname *a_fcnp; 4487168962Spjd struct vnode *a_tdvp; 4488168962Spjd struct vnode *a_tvp; 4489168962Spjd struct componentname *a_tcnp; 4490168962Spjd } */ *ap; 4491168962Spjd{ 4492168962Spjd vnode_t *fdvp = ap->a_fdvp; 4493168962Spjd vnode_t *fvp = ap->a_fvp; 4494168962Spjd vnode_t *tdvp = ap->a_tdvp; 4495168962Spjd vnode_t *tvp = ap->a_tvp; 4496168962Spjd int error; 4497168962Spjd 4498192237Skmacy ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 4499192237Skmacy ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 4500168962Spjd 4501168962Spjd error = zfs_rename(fdvp, ap->a_fcnp->cn_nameptr, tdvp, 4502185029Spjd ap->a_tcnp->cn_nameptr, ap->a_fcnp->cn_cred, NULL, 0); 4503168962Spjd 4504168962Spjd if (tdvp == tvp) 4505168962Spjd VN_RELE(tdvp); 4506168962Spjd else 4507168962Spjd VN_URELE(tdvp); 4508168962Spjd if (tvp) 4509168962Spjd VN_URELE(tvp); 4510168962Spjd VN_RELE(fdvp); 4511168962Spjd VN_RELE(fvp); 4512168962Spjd 4513168962Spjd return (error); 4514168962Spjd} 4515168962Spjd 4516168962Spjdstatic int 4517168962Spjdzfs_freebsd_symlink(ap) 4518168962Spjd struct vop_symlink_args /* { 4519168962Spjd struct vnode *a_dvp; 4520168962Spjd struct vnode **a_vpp; 4521168962Spjd struct componentname *a_cnp; 4522168962Spjd struct vattr *a_vap; 4523168962Spjd char *a_target; 4524168962Spjd } */ *ap; 4525168962Spjd{ 4526168962Spjd struct componentname *cnp = ap->a_cnp; 4527168962Spjd vattr_t *vap = ap->a_vap; 4528168962Spjd 4529168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 4530168962Spjd 4531168962Spjd vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 4532168962Spjd vattr_init_mask(vap); 4533168962Spjd 4534168962Spjd return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 4535168962Spjd ap->a_target, cnp->cn_cred, cnp->cn_thread)); 4536168962Spjd} 4537168962Spjd 4538168962Spjdstatic int 4539168962Spjdzfs_freebsd_readlink(ap) 4540168962Spjd struct vop_readlink_args /* { 4541168962Spjd struct vnode *a_vp; 4542168962Spjd struct uio *a_uio; 4543168962Spjd struct ucred *a_cred; 4544168962Spjd } */ *ap; 4545168962Spjd{ 4546168962Spjd 4547185029Spjd return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 4548168962Spjd} 4549168962Spjd 4550168962Spjdstatic int 4551168962Spjdzfs_freebsd_link(ap) 4552168962Spjd struct vop_link_args /* { 4553168962Spjd struct vnode *a_tdvp; 4554168962Spjd struct vnode *a_vp; 4555168962Spjd struct componentname *a_cnp; 4556168962Spjd } */ *ap; 4557168962Spjd{ 4558168962Spjd struct componentname *cnp = ap->a_cnp; 4559168962Spjd 4560168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 4561168962Spjd 4562185029Spjd return (zfs_link(ap->a_tdvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); 4563168962Spjd} 4564168962Spjd 4565168962Spjdstatic int 4566168962Spjdzfs_freebsd_inactive(ap) 4567169170Spjd struct vop_inactive_args /* { 4568169170Spjd struct vnode *a_vp; 4569169170Spjd struct thread *a_td; 4570169170Spjd } */ *ap; 4571168962Spjd{ 4572168962Spjd vnode_t *vp = ap->a_vp; 4573168962Spjd 4574185029Spjd zfs_inactive(vp, ap->a_td->td_ucred, NULL); 4575168962Spjd return (0); 4576168962Spjd} 4577168962Spjd 4578185029Spjdstatic void 4579185029Spjdzfs_reclaim_complete(void *arg, int pending) 4580185029Spjd{ 4581185029Spjd znode_t *zp = arg; 4582185029Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4583185029Spjd 4584197133Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4585197133Spjd if (zp->z_dbuf != NULL) { 4586197133Spjd ZFS_OBJ_HOLD_ENTER(zfsvfs, zp->z_id); 4587197133Spjd zfs_znode_dmu_fini(zp); 4588197133Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); 4589197133Spjd } 4590185029Spjd zfs_znode_free(zp); 4591197133Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4592197133Spjd /* 4593197133Spjd * If the file system is being unmounted, there is a process waiting 4594197133Spjd * for us, wake it up. 4595197133Spjd */ 4596197133Spjd if (zfsvfs->z_unmounted) 4597197133Spjd wakeup_one(zfsvfs); 4598185029Spjd} 4599185029Spjd 4600168962Spjdstatic int 4601168962Spjdzfs_freebsd_reclaim(ap) 4602168962Spjd struct vop_reclaim_args /* { 4603168962Spjd struct vnode *a_vp; 4604168962Spjd struct thread *a_td; 4605168962Spjd } */ *ap; 4606168962Spjd{ 4607169170Spjd vnode_t *vp = ap->a_vp; 4608168962Spjd znode_t *zp = VTOZ(vp); 4609197133Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4610168962Spjd 4611197133Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4612197133Spjd 4613169025Spjd ASSERT(zp != NULL); 4614169025Spjd 4615168962Spjd /* 4616168962Spjd * Destroy the vm object and flush associated pages. 4617168962Spjd */ 4618168962Spjd vnode_destroy_vobject(vp); 4619169025Spjd 4620169025Spjd mutex_enter(&zp->z_lock); 4621196301Spjd ASSERT(zp->z_phys != NULL); 4622197153Spjd zp->z_vnode = NULL; 4623196301Spjd mutex_exit(&zp->z_lock); 4624196301Spjd 4625196301Spjd if (zp->z_unlinked) 4626196301Spjd ; /* Do nothing. */ 4627196301Spjd else if (zp->z_dbuf == NULL) 4628196301Spjd zfs_znode_free(zp); 4629196301Spjd else /* if (!zp->z_unlinked && zp->z_dbuf != NULL) */ { 4630185029Spjd int locked; 4631185029Spjd 4632185029Spjd locked = MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)) ? 2 : 4633185029Spjd ZFS_OBJ_HOLD_TRYENTER(zfsvfs, zp->z_id); 4634185029Spjd if (locked == 0) { 4635185029Spjd /* 4636185029Spjd * Lock can't be obtained due to deadlock possibility, 4637185029Spjd * so defer znode destruction. 4638185029Spjd */ 4639185029Spjd TASK_INIT(&zp->z_task, 0, zfs_reclaim_complete, zp); 4640185029Spjd taskqueue_enqueue(taskqueue_thread, &zp->z_task); 4641185029Spjd } else { 4642185029Spjd zfs_znode_dmu_fini(zp); 4643185029Spjd if (locked == 1) 4644185029Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); 4645185029Spjd zfs_znode_free(zp); 4646185029Spjd } 4647169025Spjd } 4648168962Spjd VI_LOCK(vp); 4649168962Spjd vp->v_data = NULL; 4650171567Spjd ASSERT(vp->v_holdcnt >= 1); 4651171316Sdfr VI_UNLOCK(vp); 4652197133Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4653168962Spjd return (0); 4654168962Spjd} 4655168962Spjd 4656168962Spjdstatic int 4657168962Spjdzfs_freebsd_fid(ap) 4658168962Spjd struct vop_fid_args /* { 4659168962Spjd struct vnode *a_vp; 4660168962Spjd struct fid *a_fid; 4661168962Spjd } */ *ap; 4662168962Spjd{ 4663168962Spjd 4664185029Spjd return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 4665168962Spjd} 4666168962Spjd 4667168962Spjdstatic int 4668168962Spjdzfs_freebsd_pathconf(ap) 4669168962Spjd struct vop_pathconf_args /* { 4670168962Spjd struct vnode *a_vp; 4671168962Spjd int a_name; 4672168962Spjd register_t *a_retval; 4673168962Spjd } */ *ap; 4674168962Spjd{ 4675168962Spjd ulong_t val; 4676168962Spjd int error; 4677168962Spjd 4678185029Spjd error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); 4679168962Spjd if (error == 0) 4680168962Spjd *ap->a_retval = val; 4681168962Spjd else if (error == EOPNOTSUPP) 4682168962Spjd error = vop_stdpathconf(ap); 4683168962Spjd return (error); 4684168962Spjd} 4685168962Spjd 4686196949Straszstatic int 4687196949Straszzfs_freebsd_fifo_pathconf(ap) 4688196949Strasz struct vop_pathconf_args /* { 4689196949Strasz struct vnode *a_vp; 4690196949Strasz int a_name; 4691196949Strasz register_t *a_retval; 4692196949Strasz } */ *ap; 4693196949Strasz{ 4694196949Strasz 4695196949Strasz switch (ap->a_name) { 4696196949Strasz case _PC_ACL_EXTENDED: 4697196949Strasz case _PC_ACL_NFS4: 4698196949Strasz case _PC_ACL_PATH_MAX: 4699196949Strasz case _PC_MAC_PRESENT: 4700196949Strasz return (zfs_freebsd_pathconf(ap)); 4701196949Strasz default: 4702196949Strasz return (fifo_specops.vop_pathconf(ap)); 4703196949Strasz } 4704196949Strasz} 4705196949Strasz 4706185029Spjd/* 4707185029Spjd * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 4708185029Spjd * extended attribute name: 4709185029Spjd * 4710185029Spjd * NAMESPACE PREFIX 4711185029Spjd * system freebsd:system: 4712185029Spjd * user (none, can be used to access ZFS fsattr(5) attributes 4713185029Spjd * created on Solaris) 4714185029Spjd */ 4715185029Spjdstatic int 4716185029Spjdzfs_create_attrname(int attrnamespace, const char *name, char *attrname, 4717185029Spjd size_t size) 4718185029Spjd{ 4719185029Spjd const char *namespace, *prefix, *suffix; 4720185029Spjd 4721185029Spjd /* We don't allow '/' character in attribute name. */ 4722185029Spjd if (strchr(name, '/') != NULL) 4723185029Spjd return (EINVAL); 4724185029Spjd /* We don't allow attribute names that start with "freebsd:" string. */ 4725185029Spjd if (strncmp(name, "freebsd:", 8) == 0) 4726185029Spjd return (EINVAL); 4727185029Spjd 4728185029Spjd bzero(attrname, size); 4729185029Spjd 4730185029Spjd switch (attrnamespace) { 4731185029Spjd case EXTATTR_NAMESPACE_USER: 4732185029Spjd#if 0 4733185029Spjd prefix = "freebsd:"; 4734185029Spjd namespace = EXTATTR_NAMESPACE_USER_STRING; 4735185029Spjd suffix = ":"; 4736185029Spjd#else 4737185029Spjd /* 4738185029Spjd * This is the default namespace by which we can access all 4739185029Spjd * attributes created on Solaris. 4740185029Spjd */ 4741185029Spjd prefix = namespace = suffix = ""; 4742185029Spjd#endif 4743185029Spjd break; 4744185029Spjd case EXTATTR_NAMESPACE_SYSTEM: 4745185029Spjd prefix = "freebsd:"; 4746185029Spjd namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 4747185029Spjd suffix = ":"; 4748185029Spjd break; 4749185029Spjd case EXTATTR_NAMESPACE_EMPTY: 4750185029Spjd default: 4751185029Spjd return (EINVAL); 4752185029Spjd } 4753185029Spjd if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 4754185029Spjd name) >= size) { 4755185029Spjd return (ENAMETOOLONG); 4756185029Spjd } 4757185029Spjd return (0); 4758185029Spjd} 4759185029Spjd 4760185029Spjd/* 4761185029Spjd * Vnode operating to retrieve a named extended attribute. 4762185029Spjd */ 4763185029Spjdstatic int 4764185029Spjdzfs_getextattr(struct vop_getextattr_args *ap) 4765185029Spjd/* 4766185029Spjdvop_getextattr { 4767185029Spjd IN struct vnode *a_vp; 4768185029Spjd IN int a_attrnamespace; 4769185029Spjd IN const char *a_name; 4770185029Spjd INOUT struct uio *a_uio; 4771185029Spjd OUT size_t *a_size; 4772185029Spjd IN struct ucred *a_cred; 4773185029Spjd IN struct thread *a_td; 4774185029Spjd}; 4775185029Spjd*/ 4776185029Spjd{ 4777185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 4778185029Spjd struct thread *td = ap->a_td; 4779185029Spjd struct nameidata nd; 4780185029Spjd char attrname[255]; 4781185029Spjd struct vattr va; 4782185029Spjd vnode_t *xvp = NULL, *vp; 4783185029Spjd int error, flags; 4784185029Spjd 4785195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 4786195785Strasz ap->a_cred, ap->a_td, VREAD); 4787195785Strasz if (error != 0) 4788195785Strasz return (error); 4789195785Strasz 4790185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 4791185029Spjd sizeof(attrname)); 4792185029Spjd if (error != 0) 4793185029Spjd return (error); 4794185029Spjd 4795185029Spjd ZFS_ENTER(zfsvfs); 4796185029Spjd 4797185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 4798185029Spjd LOOKUP_XATTR); 4799185029Spjd if (error != 0) { 4800185029Spjd ZFS_EXIT(zfsvfs); 4801185029Spjd return (error); 4802185029Spjd } 4803185029Spjd 4804185029Spjd flags = FREAD; 4805185029Spjd NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, attrname, 4806185029Spjd xvp, td); 4807194586Skib error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL); 4808185029Spjd vp = nd.ni_vp; 4809185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 4810185029Spjd if (error != 0) { 4811196303Spjd ZFS_EXIT(zfsvfs); 4812195785Strasz if (error == ENOENT) 4813195785Strasz error = ENOATTR; 4814185029Spjd return (error); 4815185029Spjd } 4816185029Spjd 4817185029Spjd if (ap->a_size != NULL) { 4818185029Spjd error = VOP_GETATTR(vp, &va, ap->a_cred); 4819185029Spjd if (error == 0) 4820185029Spjd *ap->a_size = (size_t)va.va_size; 4821185029Spjd } else if (ap->a_uio != NULL) 4822185029Spjd error = VOP_READ(vp, ap->a_uio, IO_UNIT | IO_SYNC, ap->a_cred); 4823185029Spjd 4824185029Spjd VOP_UNLOCK(vp, 0); 4825185029Spjd vn_close(vp, flags, ap->a_cred, td); 4826185029Spjd ZFS_EXIT(zfsvfs); 4827185029Spjd 4828185029Spjd return (error); 4829185029Spjd} 4830185029Spjd 4831185029Spjd/* 4832185029Spjd * Vnode operation to remove a named attribute. 4833185029Spjd */ 4834185029Spjdint 4835185029Spjdzfs_deleteextattr(struct vop_deleteextattr_args *ap) 4836185029Spjd/* 4837185029Spjdvop_deleteextattr { 4838185029Spjd IN struct vnode *a_vp; 4839185029Spjd IN int a_attrnamespace; 4840185029Spjd IN const char *a_name; 4841185029Spjd IN struct ucred *a_cred; 4842185029Spjd IN struct thread *a_td; 4843185029Spjd}; 4844185029Spjd*/ 4845185029Spjd{ 4846185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 4847185029Spjd struct thread *td = ap->a_td; 4848185029Spjd struct nameidata nd; 4849185029Spjd char attrname[255]; 4850185029Spjd struct vattr va; 4851185029Spjd vnode_t *xvp = NULL, *vp; 4852185029Spjd int error, flags; 4853185029Spjd 4854195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 4855195785Strasz ap->a_cred, ap->a_td, VWRITE); 4856195785Strasz if (error != 0) 4857195785Strasz return (error); 4858195785Strasz 4859185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 4860185029Spjd sizeof(attrname)); 4861185029Spjd if (error != 0) 4862185029Spjd return (error); 4863185029Spjd 4864185029Spjd ZFS_ENTER(zfsvfs); 4865185029Spjd 4866185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 4867185029Spjd LOOKUP_XATTR); 4868185029Spjd if (error != 0) { 4869185029Spjd ZFS_EXIT(zfsvfs); 4870185029Spjd return (error); 4871185029Spjd } 4872185029Spjd 4873185029Spjd NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF | MPSAFE, 4874185029Spjd UIO_SYSSPACE, attrname, xvp, td); 4875185029Spjd error = namei(&nd); 4876185029Spjd vp = nd.ni_vp; 4877185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 4878185029Spjd if (error != 0) { 4879196303Spjd ZFS_EXIT(zfsvfs); 4880195785Strasz if (error == ENOENT) 4881195785Strasz error = ENOATTR; 4882185029Spjd return (error); 4883185029Spjd } 4884185029Spjd error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 4885185029Spjd 4886185029Spjd vput(nd.ni_dvp); 4887185029Spjd if (vp == nd.ni_dvp) 4888185029Spjd vrele(vp); 4889185029Spjd else 4890185029Spjd vput(vp); 4891185029Spjd ZFS_EXIT(zfsvfs); 4892185029Spjd 4893185029Spjd return (error); 4894185029Spjd} 4895185029Spjd 4896185029Spjd/* 4897185029Spjd * Vnode operation to set a named attribute. 4898185029Spjd */ 4899185029Spjdstatic int 4900185029Spjdzfs_setextattr(struct vop_setextattr_args *ap) 4901185029Spjd/* 4902185029Spjdvop_setextattr { 4903185029Spjd IN struct vnode *a_vp; 4904185029Spjd IN int a_attrnamespace; 4905185029Spjd IN const char *a_name; 4906185029Spjd INOUT struct uio *a_uio; 4907185029Spjd IN struct ucred *a_cred; 4908185029Spjd IN struct thread *a_td; 4909185029Spjd}; 4910185029Spjd*/ 4911185029Spjd{ 4912185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 4913185029Spjd struct thread *td = ap->a_td; 4914185029Spjd struct nameidata nd; 4915185029Spjd char attrname[255]; 4916185029Spjd struct vattr va; 4917185029Spjd vnode_t *xvp = NULL, *vp; 4918185029Spjd int error, flags; 4919185029Spjd 4920195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 4921195785Strasz ap->a_cred, ap->a_td, VWRITE); 4922195785Strasz if (error != 0) 4923195785Strasz return (error); 4924195785Strasz 4925185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 4926185029Spjd sizeof(attrname)); 4927185029Spjd if (error != 0) 4928185029Spjd return (error); 4929185029Spjd 4930185029Spjd ZFS_ENTER(zfsvfs); 4931185029Spjd 4932185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 4933195785Strasz LOOKUP_XATTR | CREATE_XATTR_DIR); 4934185029Spjd if (error != 0) { 4935185029Spjd ZFS_EXIT(zfsvfs); 4936185029Spjd return (error); 4937185029Spjd } 4938185029Spjd 4939185029Spjd flags = FFLAGS(O_WRONLY | O_CREAT); 4940185029Spjd NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, attrname, 4941185029Spjd xvp, td); 4942194586Skib error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL); 4943185029Spjd vp = nd.ni_vp; 4944185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 4945185029Spjd if (error != 0) { 4946185029Spjd ZFS_EXIT(zfsvfs); 4947185029Spjd return (error); 4948185029Spjd } 4949185029Spjd 4950185029Spjd VATTR_NULL(&va); 4951185029Spjd va.va_size = 0; 4952185029Spjd error = VOP_SETATTR(vp, &va, ap->a_cred); 4953185029Spjd if (error == 0) 4954185029Spjd VOP_WRITE(vp, ap->a_uio, IO_UNIT | IO_SYNC, ap->a_cred); 4955185029Spjd 4956185029Spjd VOP_UNLOCK(vp, 0); 4957185029Spjd vn_close(vp, flags, ap->a_cred, td); 4958185029Spjd ZFS_EXIT(zfsvfs); 4959185029Spjd 4960185029Spjd return (error); 4961185029Spjd} 4962185029Spjd 4963185029Spjd/* 4964185029Spjd * Vnode operation to retrieve extended attributes on a vnode. 4965185029Spjd */ 4966185029Spjdstatic int 4967185029Spjdzfs_listextattr(struct vop_listextattr_args *ap) 4968185029Spjd/* 4969185029Spjdvop_listextattr { 4970185029Spjd IN struct vnode *a_vp; 4971185029Spjd IN int a_attrnamespace; 4972185029Spjd INOUT struct uio *a_uio; 4973185029Spjd OUT size_t *a_size; 4974185029Spjd IN struct ucred *a_cred; 4975185029Spjd IN struct thread *a_td; 4976185029Spjd}; 4977185029Spjd*/ 4978185029Spjd{ 4979185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 4980185029Spjd struct thread *td = ap->a_td; 4981185029Spjd struct nameidata nd; 4982185029Spjd char attrprefix[16]; 4983185029Spjd u_char dirbuf[sizeof(struct dirent)]; 4984185029Spjd struct dirent *dp; 4985185029Spjd struct iovec aiov; 4986185029Spjd struct uio auio, *uio = ap->a_uio; 4987185029Spjd size_t *sizep = ap->a_size; 4988185029Spjd size_t plen; 4989185029Spjd vnode_t *xvp = NULL, *vp; 4990185029Spjd int done, error, eof, pos; 4991185029Spjd 4992195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 4993195785Strasz ap->a_cred, ap->a_td, VREAD); 4994196303Spjd if (error != 0) 4995195785Strasz return (error); 4996195785Strasz 4997185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 4998185029Spjd sizeof(attrprefix)); 4999185029Spjd if (error != 0) 5000185029Spjd return (error); 5001185029Spjd plen = strlen(attrprefix); 5002185029Spjd 5003185029Spjd ZFS_ENTER(zfsvfs); 5004185029Spjd 5005195822Strasz if (sizep != NULL) 5006195822Strasz *sizep = 0; 5007195822Strasz 5008185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5009185029Spjd LOOKUP_XATTR); 5010185029Spjd if (error != 0) { 5011196303Spjd ZFS_EXIT(zfsvfs); 5012195785Strasz /* 5013195785Strasz * ENOATTR means that the EA directory does not yet exist, 5014195785Strasz * i.e. there are no extended attributes there. 5015195785Strasz */ 5016195785Strasz if (error == ENOATTR) 5017195785Strasz error = 0; 5018185029Spjd return (error); 5019185029Spjd } 5020185029Spjd 5021188588Sjhb NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE, 5022188588Sjhb UIO_SYSSPACE, ".", xvp, td); 5023185029Spjd error = namei(&nd); 5024185029Spjd vp = nd.ni_vp; 5025185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 5026185029Spjd if (error != 0) { 5027185029Spjd ZFS_EXIT(zfsvfs); 5028185029Spjd return (error); 5029185029Spjd } 5030185029Spjd 5031185029Spjd auio.uio_iov = &aiov; 5032185029Spjd auio.uio_iovcnt = 1; 5033185029Spjd auio.uio_segflg = UIO_SYSSPACE; 5034185029Spjd auio.uio_td = td; 5035185029Spjd auio.uio_rw = UIO_READ; 5036185029Spjd auio.uio_offset = 0; 5037185029Spjd 5038185029Spjd do { 5039185029Spjd u_char nlen; 5040185029Spjd 5041185029Spjd aiov.iov_base = (void *)dirbuf; 5042185029Spjd aiov.iov_len = sizeof(dirbuf); 5043185029Spjd auio.uio_resid = sizeof(dirbuf); 5044185029Spjd error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 5045185029Spjd done = sizeof(dirbuf) - auio.uio_resid; 5046185029Spjd if (error != 0) 5047185029Spjd break; 5048185029Spjd for (pos = 0; pos < done;) { 5049185029Spjd dp = (struct dirent *)(dirbuf + pos); 5050185029Spjd pos += dp->d_reclen; 5051185029Spjd /* 5052185029Spjd * XXX: Temporarily we also accept DT_UNKNOWN, as this 5053185029Spjd * is what we get when attribute was created on Solaris. 5054185029Spjd */ 5055185029Spjd if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 5056185029Spjd continue; 5057185029Spjd if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) 5058185029Spjd continue; 5059185029Spjd else if (strncmp(dp->d_name, attrprefix, plen) != 0) 5060185029Spjd continue; 5061185029Spjd nlen = dp->d_namlen - plen; 5062185029Spjd if (sizep != NULL) 5063185029Spjd *sizep += 1 + nlen; 5064185029Spjd else if (uio != NULL) { 5065185029Spjd /* 5066185029Spjd * Format of extattr name entry is one byte for 5067185029Spjd * length and the rest for name. 5068185029Spjd */ 5069185029Spjd error = uiomove(&nlen, 1, uio->uio_rw, uio); 5070185029Spjd if (error == 0) { 5071185029Spjd error = uiomove(dp->d_name + plen, nlen, 5072185029Spjd uio->uio_rw, uio); 5073185029Spjd } 5074185029Spjd if (error != 0) 5075185029Spjd break; 5076185029Spjd } 5077185029Spjd } 5078185029Spjd } while (!eof && error == 0); 5079185029Spjd 5080185029Spjd vput(vp); 5081185029Spjd ZFS_EXIT(zfsvfs); 5082185029Spjd 5083185029Spjd return (error); 5084185029Spjd} 5085185029Spjd 5086192800Straszint 5087192800Straszzfs_freebsd_getacl(ap) 5088192800Strasz struct vop_getacl_args /* { 5089192800Strasz struct vnode *vp; 5090192800Strasz acl_type_t type; 5091192800Strasz struct acl *aclp; 5092192800Strasz struct ucred *cred; 5093192800Strasz struct thread *td; 5094192800Strasz } */ *ap; 5095192800Strasz{ 5096192800Strasz int error; 5097192800Strasz vsecattr_t vsecattr; 5098192800Strasz 5099192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 5100197435Strasz return (EINVAL); 5101192800Strasz 5102192800Strasz vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 5103192800Strasz if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL)) 5104192800Strasz return (error); 5105192800Strasz 5106192800Strasz error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt); 5107196303Spjd if (vsecattr.vsa_aclentp != NULL) 5108196303Spjd kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 5109192800Strasz 5110196303Spjd return (error); 5111192800Strasz} 5112192800Strasz 5113192800Straszint 5114192800Straszzfs_freebsd_setacl(ap) 5115192800Strasz struct vop_setacl_args /* { 5116192800Strasz struct vnode *vp; 5117192800Strasz acl_type_t type; 5118192800Strasz struct acl *aclp; 5119192800Strasz struct ucred *cred; 5120192800Strasz struct thread *td; 5121192800Strasz } */ *ap; 5122192800Strasz{ 5123192800Strasz int error; 5124192800Strasz vsecattr_t vsecattr; 5125192800Strasz int aclbsize; /* size of acl list in bytes */ 5126192800Strasz aclent_t *aaclp; 5127192800Strasz 5128192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 5129197435Strasz return (EINVAL); 5130192800Strasz 5131192800Strasz if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 5132192800Strasz return (EINVAL); 5133192800Strasz 5134192800Strasz /* 5135196949Strasz * With NFSv4 ACLs, chmod(2) may need to add additional entries, 5136192800Strasz * splitting every entry into two and appending "canonical six" 5137192800Strasz * entries at the end. Don't allow for setting an ACL that would 5138192800Strasz * cause chmod(2) to run out of ACL entries. 5139192800Strasz */ 5140192800Strasz if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 5141192800Strasz return (ENOSPC); 5142192800Strasz 5143208030Strasz error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 5144208030Strasz if (error != 0) 5145208030Strasz return (error); 5146208030Strasz 5147192800Strasz vsecattr.vsa_mask = VSA_ACE; 5148192800Strasz aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t); 5149192800Strasz vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 5150192800Strasz aaclp = vsecattr.vsa_aclentp; 5151192800Strasz vsecattr.vsa_aclentsz = aclbsize; 5152192800Strasz 5153192800Strasz aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 5154192800Strasz error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL); 5155192800Strasz kmem_free(aaclp, aclbsize); 5156192800Strasz 5157192800Strasz return (error); 5158192800Strasz} 5159192800Strasz 5160192800Straszint 5161192800Straszzfs_freebsd_aclcheck(ap) 5162192800Strasz struct vop_aclcheck_args /* { 5163192800Strasz struct vnode *vp; 5164192800Strasz acl_type_t type; 5165192800Strasz struct acl *aclp; 5166192800Strasz struct ucred *cred; 5167192800Strasz struct thread *td; 5168192800Strasz } */ *ap; 5169192800Strasz{ 5170192800Strasz 5171192800Strasz return (EOPNOTSUPP); 5172192800Strasz} 5173192800Strasz 5174168404Spjdstruct vop_vector zfs_vnodeops; 5175168404Spjdstruct vop_vector zfs_fifoops; 5176209962Smmstruct vop_vector zfs_shareops; 5177168404Spjd 5178168404Spjdstruct vop_vector zfs_vnodeops = { 5179185029Spjd .vop_default = &default_vnodeops, 5180185029Spjd .vop_inactive = zfs_freebsd_inactive, 5181185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 5182185029Spjd .vop_access = zfs_freebsd_access, 5183168404Spjd#ifdef FREEBSD_NAMECACHE 5184185029Spjd .vop_lookup = vfs_cache_lookup, 5185185029Spjd .vop_cachedlookup = zfs_freebsd_lookup, 5186168404Spjd#else 5187185029Spjd .vop_lookup = zfs_freebsd_lookup, 5188168404Spjd#endif 5189185029Spjd .vop_getattr = zfs_freebsd_getattr, 5190185029Spjd .vop_setattr = zfs_freebsd_setattr, 5191185029Spjd .vop_create = zfs_freebsd_create, 5192185029Spjd .vop_mknod = zfs_freebsd_create, 5193185029Spjd .vop_mkdir = zfs_freebsd_mkdir, 5194185029Spjd .vop_readdir = zfs_freebsd_readdir, 5195185029Spjd .vop_fsync = zfs_freebsd_fsync, 5196185029Spjd .vop_open = zfs_freebsd_open, 5197185029Spjd .vop_close = zfs_freebsd_close, 5198185029Spjd .vop_rmdir = zfs_freebsd_rmdir, 5199185029Spjd .vop_ioctl = zfs_freebsd_ioctl, 5200185029Spjd .vop_link = zfs_freebsd_link, 5201185029Spjd .vop_symlink = zfs_freebsd_symlink, 5202185029Spjd .vop_readlink = zfs_freebsd_readlink, 5203185029Spjd .vop_read = zfs_freebsd_read, 5204185029Spjd .vop_write = zfs_freebsd_write, 5205185029Spjd .vop_remove = zfs_freebsd_remove, 5206185029Spjd .vop_rename = zfs_freebsd_rename, 5207185029Spjd .vop_pathconf = zfs_freebsd_pathconf, 5208185029Spjd .vop_bmap = VOP_EOPNOTSUPP, 5209185029Spjd .vop_fid = zfs_freebsd_fid, 5210185029Spjd .vop_getextattr = zfs_getextattr, 5211185029Spjd .vop_deleteextattr = zfs_deleteextattr, 5212185029Spjd .vop_setextattr = zfs_setextattr, 5213185029Spjd .vop_listextattr = zfs_listextattr, 5214192800Strasz .vop_getacl = zfs_freebsd_getacl, 5215192800Strasz .vop_setacl = zfs_freebsd_setacl, 5216192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 5217168404Spjd}; 5218168404Spjd 5219169170Spjdstruct vop_vector zfs_fifoops = { 5220185029Spjd .vop_default = &fifo_specops, 5221200162Skib .vop_fsync = zfs_freebsd_fsync, 5222185029Spjd .vop_access = zfs_freebsd_access, 5223185029Spjd .vop_getattr = zfs_freebsd_getattr, 5224185029Spjd .vop_inactive = zfs_freebsd_inactive, 5225185029Spjd .vop_read = VOP_PANIC, 5226185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 5227185029Spjd .vop_setattr = zfs_freebsd_setattr, 5228185029Spjd .vop_write = VOP_PANIC, 5229196949Strasz .vop_pathconf = zfs_freebsd_fifo_pathconf, 5230185029Spjd .vop_fid = zfs_freebsd_fid, 5231192800Strasz .vop_getacl = zfs_freebsd_getacl, 5232192800Strasz .vop_setacl = zfs_freebsd_setacl, 5233192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 5234168404Spjd}; 5235209962Smm 5236209962Smm/* 5237209962Smm * special share hidden files vnode operations template 5238209962Smm */ 5239209962Smmstruct vop_vector zfs_shareops = { 5240209962Smm .vop_default = &default_vnodeops, 5241209962Smm .vop_access = zfs_freebsd_access, 5242209962Smm .vop_inactive = zfs_freebsd_inactive, 5243209962Smm .vop_reclaim = zfs_freebsd_reclaim, 5244209962Smm .vop_fid = zfs_freebsd_fid, 5245209962Smm .vop_pathconf = zfs_freebsd_pathconf, 5246209962Smm}; 5247