zfs_vnops.c revision 185029
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22185029Spjd * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23168404Spjd * Use is subject to license terms. 24168404Spjd */ 25168404Spjd 26169195Spjd/* Portions Copyright 2007 Jeremy Teo */ 27169195Spjd 28168404Spjd#include <sys/types.h> 29168404Spjd#include <sys/param.h> 30168404Spjd#include <sys/time.h> 31168404Spjd#include <sys/systm.h> 32168404Spjd#include <sys/sysmacros.h> 33168404Spjd#include <sys/resource.h> 34168404Spjd#include <sys/vfs.h> 35168404Spjd#include <sys/vnode.h> 36168404Spjd#include <sys/file.h> 37168404Spjd#include <sys/stat.h> 38168404Spjd#include <sys/kmem.h> 39168404Spjd#include <sys/taskq.h> 40168404Spjd#include <sys/uio.h> 41168404Spjd#include <sys/atomic.h> 42168404Spjd#include <sys/namei.h> 43168404Spjd#include <sys/mman.h> 44168404Spjd#include <sys/cmn_err.h> 45168404Spjd#include <sys/errno.h> 46168404Spjd#include <sys/unistd.h> 47168404Spjd#include <sys/zfs_dir.h> 48168404Spjd#include <sys/zfs_acl.h> 49168404Spjd#include <sys/zfs_ioctl.h> 50168404Spjd#include <sys/fs/zfs.h> 51168404Spjd#include <sys/dmu.h> 52168404Spjd#include <sys/spa.h> 53168404Spjd#include <sys/txg.h> 54168404Spjd#include <sys/dbuf.h> 55168404Spjd#include <sys/zap.h> 56168404Spjd#include <sys/dirent.h> 57168962Spjd#include <sys/policy.h> 58168962Spjd#include <sys/sunddi.h> 59168404Spjd#include <sys/filio.h> 60168404Spjd#include <sys/zfs_ctldir.h> 61185029Spjd#include <sys/zfs_fuid.h> 62168404Spjd#include <sys/dnlc.h> 63168404Spjd#include <sys/zfs_rlock.h> 64185029Spjd#include <sys/extdirent.h> 65185029Spjd#include <sys/kidmap.h> 66168404Spjd#include <sys/bio.h> 67168404Spjd#include <sys/buf.h> 68168404Spjd#include <sys/sf_buf.h> 69168404Spjd#include <sys/sched.h> 70168404Spjd 71168404Spjd/* 72168404Spjd * Programming rules. 73168404Spjd * 74168404Spjd * Each vnode op performs some logical unit of work. To do this, the ZPL must 75168404Spjd * properly lock its in-core state, create a DMU transaction, do the work, 76168404Spjd * record this work in the intent log (ZIL), commit the DMU transaction, 77185029Spjd * and wait for the intent log to commit if it is a synchronous operation. 78185029Spjd * Moreover, the vnode ops must work in both normal and log replay context. 79168404Spjd * The ordering of events is important to avoid deadlocks and references 80168404Spjd * to freed memory. The example below illustrates the following Big Rules: 81168404Spjd * 82168404Spjd * (1) A check must be made in each zfs thread for a mounted file system. 83168404Spjd * This is done avoiding races using ZFS_ENTER(zfsvfs). 84185029Spjd * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 85185029Spjd * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 86185029Spjd * can return EIO from the calling function. 87168404Spjd * 88168404Spjd * (2) VN_RELE() should always be the last thing except for zil_commit() 89168404Spjd * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 90168404Spjd * First, if it's the last reference, the vnode/znode 91168404Spjd * can be freed, so the zp may point to freed memory. Second, the last 92168404Spjd * reference will call zfs_zinactive(), which may induce a lot of work -- 93168404Spjd * pushing cached pages (which acquires range locks) and syncing out 94168404Spjd * cached atime changes. Third, zfs_zinactive() may require a new tx, 95168404Spjd * which could deadlock the system if you were already holding one. 96168404Spjd * 97168404Spjd * (3) All range locks must be grabbed before calling dmu_tx_assign(), 98168404Spjd * as they can span dmu_tx_assign() calls. 99168404Spjd * 100168404Spjd * (4) Always pass zfsvfs->z_assign as the second argument to dmu_tx_assign(). 101168404Spjd * In normal operation, this will be TXG_NOWAIT. During ZIL replay, 102168404Spjd * it will be a specific txg. Either way, dmu_tx_assign() never blocks. 103168404Spjd * This is critical because we don't want to block while holding locks. 104168404Spjd * Note, in particular, that if a lock is sometimes acquired before 105168404Spjd * the tx assigns, and sometimes after (e.g. z_lock), then failing to 106168404Spjd * use a non-blocking assign can deadlock the system. The scenario: 107168404Spjd * 108168404Spjd * Thread A has grabbed a lock before calling dmu_tx_assign(). 109168404Spjd * Thread B is in an already-assigned tx, and blocks for this lock. 110168404Spjd * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 111168404Spjd * forever, because the previous txg can't quiesce until B's tx commits. 112168404Spjd * 113168404Spjd * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 114168404Spjd * then drop all locks, call dmu_tx_wait(), and try again. 115168404Spjd * 116168404Spjd * (5) If the operation succeeded, generate the intent log entry for it 117168404Spjd * before dropping locks. This ensures that the ordering of events 118168404Spjd * in the intent log matches the order in which they actually occurred. 119168404Spjd * 120168404Spjd * (6) At the end of each vnode op, the DMU tx must always commit, 121168404Spjd * regardless of whether there were any errors. 122168404Spjd * 123168404Spjd * (7) After dropping all locks, invoke zil_commit(zilog, seq, foid) 124168404Spjd * to ensure that synchronous semantics are provided when necessary. 125168404Spjd * 126168404Spjd * In general, this is how things should be ordered in each vnode op: 127168404Spjd * 128168404Spjd * ZFS_ENTER(zfsvfs); // exit if unmounted 129168404Spjd * top: 130168404Spjd * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 131168404Spjd * rw_enter(...); // grab any other locks you need 132168404Spjd * tx = dmu_tx_create(...); // get DMU tx 133168404Spjd * dmu_tx_hold_*(); // hold each object you might modify 134168404Spjd * error = dmu_tx_assign(tx, zfsvfs->z_assign); // try to assign 135168404Spjd * if (error) { 136168404Spjd * rw_exit(...); // drop locks 137168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 138168404Spjd * VN_RELE(...); // release held vnodes 139168404Spjd * if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { 140168404Spjd * dmu_tx_wait(tx); 141168404Spjd * dmu_tx_abort(tx); 142168404Spjd * goto top; 143168404Spjd * } 144168404Spjd * dmu_tx_abort(tx); // abort DMU tx 145168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 146168404Spjd * return (error); // really out of space 147168404Spjd * } 148168404Spjd * error = do_real_work(); // do whatever this VOP does 149168404Spjd * if (error == 0) 150168404Spjd * zfs_log_*(...); // on success, make ZIL entry 151168404Spjd * dmu_tx_commit(tx); // commit DMU tx -- error or not 152168404Spjd * rw_exit(...); // drop locks 153168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 154168404Spjd * VN_RELE(...); // release held vnodes 155168404Spjd * zil_commit(zilog, seq, foid); // synchronous when necessary 156168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 157168404Spjd * return (error); // done, report error 158168404Spjd */ 159185029Spjd 160168404Spjd/* ARGSUSED */ 161168404Spjdstatic int 162185029Spjdzfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 163168404Spjd{ 164168962Spjd znode_t *zp = VTOZ(*vpp); 165168404Spjd 166185029Spjd if ((flag & FWRITE) && (zp->z_phys->zp_flags & ZFS_APPENDONLY) && 167185029Spjd ((flag & FAPPEND) == 0)) { 168185029Spjd return (EPERM); 169185029Spjd } 170185029Spjd 171185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 172185029Spjd ZTOV(zp)->v_type == VREG && 173185029Spjd !(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) && 174185029Spjd zp->z_phys->zp_size > 0) 175185029Spjd if (fs_vscan(*vpp, cr, 0) != 0) 176185029Spjd return (EACCES); 177185029Spjd 178168404Spjd /* Keep a count of the synchronous opens in the znode */ 179168962Spjd if (flag & (FSYNC | FDSYNC)) 180168404Spjd atomic_inc_32(&zp->z_sync_cnt); 181185029Spjd 182168404Spjd return (0); 183168404Spjd} 184168404Spjd 185168404Spjd/* ARGSUSED */ 186168404Spjdstatic int 187185029Spjdzfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 188185029Spjd caller_context_t *ct) 189168404Spjd{ 190168962Spjd znode_t *zp = VTOZ(vp); 191168404Spjd 192168404Spjd /* Decrement the synchronous opens in the znode */ 193185029Spjd if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 194168404Spjd atomic_dec_32(&zp->z_sync_cnt); 195168404Spjd 196168962Spjd /* 197168962Spjd * Clean up any locks held by this process on the vp. 198168962Spjd */ 199168962Spjd cleanlocks(vp, ddi_get_pid(), 0); 200168962Spjd cleanshares(vp, ddi_get_pid()); 201168962Spjd 202185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 203185029Spjd ZTOV(zp)->v_type == VREG && 204185029Spjd !(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) && 205185029Spjd zp->z_phys->zp_size > 0) 206185029Spjd VERIFY(fs_vscan(vp, cr, 1) == 0); 207185029Spjd 208168404Spjd return (0); 209168404Spjd} 210168404Spjd 211168404Spjd/* 212168404Spjd * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 213168404Spjd * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 214168404Spjd */ 215168404Spjdstatic int 216168978Spjdzfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 217168404Spjd{ 218168404Spjd znode_t *zp = VTOZ(vp); 219168404Spjd uint64_t noff = (uint64_t)*off; /* new offset */ 220168404Spjd uint64_t file_sz; 221168404Spjd int error; 222168404Spjd boolean_t hole; 223168404Spjd 224168404Spjd file_sz = zp->z_phys->zp_size; 225168404Spjd if (noff >= file_sz) { 226168404Spjd return (ENXIO); 227168404Spjd } 228168404Spjd 229168962Spjd if (cmd == _FIO_SEEK_HOLE) 230168404Spjd hole = B_TRUE; 231168404Spjd else 232168404Spjd hole = B_FALSE; 233168404Spjd 234168404Spjd error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 235168404Spjd 236168404Spjd /* end of file? */ 237168404Spjd if ((error == ESRCH) || (noff > file_sz)) { 238168404Spjd /* 239168404Spjd * Handle the virtual hole at the end of file. 240168404Spjd */ 241168404Spjd if (hole) { 242168404Spjd *off = file_sz; 243168404Spjd return (0); 244168404Spjd } 245168404Spjd return (ENXIO); 246168404Spjd } 247168404Spjd 248168404Spjd if (noff < *off) 249168404Spjd return (error); 250168404Spjd *off = noff; 251168404Spjd return (error); 252168404Spjd} 253168404Spjd 254168404Spjd/* ARGSUSED */ 255168404Spjdstatic int 256168978Spjdzfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 257185029Spjd int *rvalp, caller_context_t *ct) 258168404Spjd{ 259168962Spjd offset_t off; 260168962Spjd int error; 261168962Spjd zfsvfs_t *zfsvfs; 262185029Spjd znode_t *zp; 263168404Spjd 264168404Spjd switch (com) { 265185029Spjd case _FIOFFS: 266168962Spjd return (0); 267168404Spjd 268168962Spjd /* 269168962Spjd * The following two ioctls are used by bfu. Faking out, 270168962Spjd * necessary to avoid bfu errors. 271168962Spjd */ 272185029Spjd case _FIOGDIO: 273185029Spjd case _FIOSDIO: 274168962Spjd return (0); 275168962Spjd 276185029Spjd case _FIO_SEEK_DATA: 277185029Spjd case _FIO_SEEK_HOLE: 278168962Spjd if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 279168962Spjd return (EFAULT); 280168962Spjd 281185029Spjd zp = VTOZ(vp); 282185029Spjd zfsvfs = zp->z_zfsvfs; 283168404Spjd ZFS_ENTER(zfsvfs); 284185029Spjd ZFS_VERIFY_ZP(zp); 285168404Spjd 286168404Spjd /* offset parameter is in/out */ 287168404Spjd error = zfs_holey(vp, com, &off); 288168404Spjd ZFS_EXIT(zfsvfs); 289168404Spjd if (error) 290168404Spjd return (error); 291168962Spjd if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 292168962Spjd return (EFAULT); 293168404Spjd return (0); 294168404Spjd } 295168404Spjd return (ENOTTY); 296168404Spjd} 297168404Spjd 298168404Spjd/* 299168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 300168404Spjd * between the DMU cache and the memory mapped pages. What this means: 301168404Spjd * 302168404Spjd * On Write: If we find a memory mapped page, we write to *both* 303168404Spjd * the page and the dmu buffer. 304168404Spjd * 305168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 306168404Spjd * the file is memory mapped. 307168404Spjd */ 308168404Spjdstatic int 309168404Spjdmappedwrite(vnode_t *vp, int nbytes, uio_t *uio, dmu_tx_t *tx) 310168404Spjd{ 311168404Spjd znode_t *zp = VTOZ(vp); 312168404Spjd objset_t *os = zp->z_zfsvfs->z_os; 313168404Spjd vm_object_t obj; 314168404Spjd vm_page_t m; 315168404Spjd struct sf_buf *sf; 316168404Spjd int64_t start, off; 317168404Spjd int len = nbytes; 318168404Spjd int error = 0; 319169059Spjd uint64_t dirbytes; 320168404Spjd 321168404Spjd ASSERT(vp->v_mount != NULL); 322168404Spjd obj = vp->v_object; 323168404Spjd ASSERT(obj != NULL); 324168404Spjd 325168404Spjd start = uio->uio_loffset; 326168404Spjd off = start & PAGEOFFSET; 327169059Spjd dirbytes = 0; 328168404Spjd VM_OBJECT_LOCK(obj); 329168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 330168404Spjd uint64_t bytes = MIN(PAGESIZE - off, len); 331169059Spjd uint64_t fsize; 332168404Spjd 333168404Spjdagain: 334168404Spjd if ((m = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 335168404Spjd vm_page_is_valid(m, (vm_offset_t)off, bytes)) { 336169059Spjd uint64_t woff; 337168404Spjd caddr_t va; 338168404Spjd 339168404Spjd if (vm_page_sleep_if_busy(m, FALSE, "zfsmwb")) 340168404Spjd goto again; 341169059Spjd fsize = obj->un_pager.vnp.vnp_size; 342168404Spjd vm_page_busy(m); 343169057Spjd vm_page_lock_queues(); 344169057Spjd vm_page_undirty(m); 345169057Spjd vm_page_unlock_queues(); 346168404Spjd VM_OBJECT_UNLOCK(obj); 347169059Spjd if (dirbytes > 0) { 348169059Spjd error = dmu_write_uio(os, zp->z_id, uio, 349169059Spjd dirbytes, tx); 350169059Spjd dirbytes = 0; 351169059Spjd } 352169059Spjd if (error == 0) { 353169059Spjd sched_pin(); 354169059Spjd sf = sf_buf_alloc(m, SFB_CPUPRIVATE); 355169059Spjd va = (caddr_t)sf_buf_kva(sf); 356169059Spjd woff = uio->uio_loffset - off; 357169059Spjd error = uiomove(va + off, bytes, UIO_WRITE, uio); 358169167Spjd /* 359169167Spjd * The uiomove() above could have been partially 360169167Spjd * successful, that's why we call dmu_write() 361169167Spjd * below unconditionally. The page was marked 362169167Spjd * non-dirty above and we would lose the changes 363169167Spjd * without doing so. If the uiomove() failed 364169167Spjd * entirely, well, we just write what we got 365169167Spjd * before one more time. 366169167Spjd */ 367169059Spjd dmu_write(os, zp->z_id, woff, 368169059Spjd MIN(PAGESIZE, fsize - woff), va, tx); 369169059Spjd sf_buf_free(sf); 370169059Spjd sched_unpin(); 371169059Spjd } 372168404Spjd VM_OBJECT_LOCK(obj); 373168404Spjd vm_page_wakeup(m); 374168404Spjd } else { 375177230Spjd if (__predict_false(obj->cache != NULL)) { 376177230Spjd vm_page_cache_free(obj, OFF_TO_IDX(start), 377177230Spjd OFF_TO_IDX(start) + 1); 378177230Spjd } 379169059Spjd dirbytes += bytes; 380168404Spjd } 381168404Spjd len -= bytes; 382168404Spjd off = 0; 383168404Spjd if (error) 384168404Spjd break; 385168404Spjd } 386168404Spjd VM_OBJECT_UNLOCK(obj); 387169059Spjd if (error == 0 && dirbytes > 0) 388169059Spjd error = dmu_write_uio(os, zp->z_id, uio, dirbytes, tx); 389168404Spjd return (error); 390168404Spjd} 391168404Spjd 392168404Spjd/* 393168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 394168404Spjd * between the DMU cache and the memory mapped pages. What this means: 395168404Spjd * 396168404Spjd * On Read: We "read" preferentially from memory mapped pages, 397168404Spjd * else we default from the dmu buffer. 398168404Spjd * 399168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 400168404Spjd * the file is memory mapped. 401168404Spjd */ 402168404Spjdstatic int 403168404Spjdmappedread(vnode_t *vp, int nbytes, uio_t *uio) 404168404Spjd{ 405168404Spjd znode_t *zp = VTOZ(vp); 406168404Spjd objset_t *os = zp->z_zfsvfs->z_os; 407168404Spjd vm_object_t obj; 408168404Spjd vm_page_t m; 409168404Spjd struct sf_buf *sf; 410168404Spjd int64_t start, off; 411168926Spjd caddr_t va; 412168404Spjd int len = nbytes; 413168404Spjd int error = 0; 414169059Spjd uint64_t dirbytes; 415168404Spjd 416168404Spjd ASSERT(vp->v_mount != NULL); 417168404Spjd obj = vp->v_object; 418168404Spjd ASSERT(obj != NULL); 419168404Spjd 420168404Spjd start = uio->uio_loffset; 421168404Spjd off = start & PAGEOFFSET; 422169059Spjd dirbytes = 0; 423168404Spjd VM_OBJECT_LOCK(obj); 424168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 425168404Spjd uint64_t bytes = MIN(PAGESIZE - off, len); 426168404Spjd 427168404Spjdagain: 428168404Spjd if ((m = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 429168404Spjd vm_page_is_valid(m, (vm_offset_t)off, bytes)) { 430168404Spjd if (vm_page_sleep_if_busy(m, FALSE, "zfsmrb")) 431168404Spjd goto again; 432168404Spjd vm_page_busy(m); 433168404Spjd VM_OBJECT_UNLOCK(obj); 434169059Spjd if (dirbytes > 0) { 435169059Spjd error = dmu_read_uio(os, zp->z_id, uio, 436169059Spjd dirbytes); 437169059Spjd dirbytes = 0; 438169059Spjd } 439169059Spjd if (error == 0) { 440169059Spjd sched_pin(); 441169059Spjd sf = sf_buf_alloc(m, SFB_CPUPRIVATE); 442169059Spjd va = (caddr_t)sf_buf_kva(sf); 443169059Spjd error = uiomove(va + off, bytes, UIO_READ, uio); 444169059Spjd sf_buf_free(sf); 445169059Spjd sched_unpin(); 446169059Spjd } 447168404Spjd VM_OBJECT_LOCK(obj); 448168404Spjd vm_page_wakeup(m); 449168926Spjd } else if (m != NULL && uio->uio_segflg == UIO_NOCOPY) { 450168962Spjd /* 451168962Spjd * The code below is here to make sendfile(2) work 452168962Spjd * correctly with ZFS. As pointed out by ups@ 453168962Spjd * sendfile(2) should be changed to use VOP_GETPAGES(), 454168962Spjd * but it pessimize performance of sendfile/UFS, that's 455168962Spjd * why I handle this special case in ZFS code. 456168962Spjd */ 457168926Spjd if (vm_page_sleep_if_busy(m, FALSE, "zfsmrb")) 458168926Spjd goto again; 459168926Spjd vm_page_busy(m); 460168926Spjd VM_OBJECT_UNLOCK(obj); 461169059Spjd if (dirbytes > 0) { 462169059Spjd error = dmu_read_uio(os, zp->z_id, uio, 463169059Spjd dirbytes); 464169059Spjd dirbytes = 0; 465169059Spjd } 466169059Spjd if (error == 0) { 467169059Spjd sched_pin(); 468169059Spjd sf = sf_buf_alloc(m, SFB_CPUPRIVATE); 469169059Spjd va = (caddr_t)sf_buf_kva(sf); 470169059Spjd error = dmu_read(os, zp->z_id, start + off, 471169059Spjd bytes, (void *)(va + off)); 472169059Spjd sf_buf_free(sf); 473169059Spjd sched_unpin(); 474169059Spjd } 475168926Spjd VM_OBJECT_LOCK(obj); 476168926Spjd vm_page_wakeup(m); 477169059Spjd if (error == 0) 478169059Spjd uio->uio_resid -= bytes; 479168404Spjd } else { 480169059Spjd dirbytes += bytes; 481168404Spjd } 482168404Spjd len -= bytes; 483168404Spjd off = 0; 484168404Spjd if (error) 485168404Spjd break; 486168404Spjd } 487168404Spjd VM_OBJECT_UNLOCK(obj); 488169059Spjd if (error == 0 && dirbytes > 0) 489169059Spjd error = dmu_read_uio(os, zp->z_id, uio, dirbytes); 490168404Spjd return (error); 491168404Spjd} 492168404Spjd 493168404Spjdoffset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 494168404Spjd 495168404Spjd/* 496168404Spjd * Read bytes from specified file into supplied buffer. 497168404Spjd * 498168404Spjd * IN: vp - vnode of file to be read from. 499168404Spjd * uio - structure supplying read location, range info, 500168404Spjd * and return buffer. 501168404Spjd * ioflag - SYNC flags; used to provide FRSYNC semantics. 502168404Spjd * cr - credentials of caller. 503185029Spjd * ct - caller context 504168404Spjd * 505168404Spjd * OUT: uio - updated offset and range, buffer filled. 506168404Spjd * 507168404Spjd * RETURN: 0 if success 508168404Spjd * error code if failure 509168404Spjd * 510168404Spjd * Side Effects: 511168404Spjd * vp - atime updated if byte count > 0 512168404Spjd */ 513168404Spjd/* ARGSUSED */ 514168404Spjdstatic int 515168962Spjdzfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 516168404Spjd{ 517168404Spjd znode_t *zp = VTOZ(vp); 518168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 519185029Spjd objset_t *os; 520168404Spjd ssize_t n, nbytes; 521168404Spjd int error; 522168404Spjd rl_t *rl; 523168404Spjd 524168404Spjd ZFS_ENTER(zfsvfs); 525185029Spjd ZFS_VERIFY_ZP(zp); 526185029Spjd os = zfsvfs->z_os; 527168404Spjd 528185029Spjd if (zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) { 529185029Spjd ZFS_EXIT(zfsvfs); 530185029Spjd return (EACCES); 531185029Spjd } 532185029Spjd 533168404Spjd /* 534168404Spjd * Validate file offset 535168404Spjd */ 536168404Spjd if (uio->uio_loffset < (offset_t)0) { 537168404Spjd ZFS_EXIT(zfsvfs); 538168404Spjd return (EINVAL); 539168404Spjd } 540168404Spjd 541168404Spjd /* 542168404Spjd * Fasttrack empty reads 543168404Spjd */ 544168404Spjd if (uio->uio_resid == 0) { 545168404Spjd ZFS_EXIT(zfsvfs); 546168404Spjd return (0); 547168404Spjd } 548168404Spjd 549168404Spjd /* 550168962Spjd * Check for mandatory locks 551168962Spjd */ 552168962Spjd if (MANDMODE((mode_t)zp->z_phys->zp_mode)) { 553168962Spjd if (error = chklock(vp, FREAD, 554168962Spjd uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 555168962Spjd ZFS_EXIT(zfsvfs); 556168962Spjd return (error); 557168962Spjd } 558168962Spjd } 559168962Spjd 560168962Spjd /* 561168404Spjd * If we're in FRSYNC mode, sync out this znode before reading it. 562168404Spjd */ 563168962Spjd if (ioflag & FRSYNC) 564168404Spjd zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); 565168404Spjd 566168404Spjd /* 567168404Spjd * Lock the range against changes. 568168404Spjd */ 569168404Spjd rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 570168404Spjd 571168404Spjd /* 572168404Spjd * If we are reading past end-of-file we can skip 573168404Spjd * to the end; but we might still need to set atime. 574168404Spjd */ 575168404Spjd if (uio->uio_loffset >= zp->z_phys->zp_size) { 576168404Spjd error = 0; 577168404Spjd goto out; 578168404Spjd } 579168404Spjd 580168404Spjd ASSERT(uio->uio_loffset < zp->z_phys->zp_size); 581168404Spjd n = MIN(uio->uio_resid, zp->z_phys->zp_size - uio->uio_loffset); 582168404Spjd 583168404Spjd while (n > 0) { 584168404Spjd nbytes = MIN(n, zfs_read_chunk_size - 585168404Spjd P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 586168404Spjd 587168404Spjd if (vn_has_cached_data(vp)) 588168404Spjd error = mappedread(vp, nbytes, uio); 589168404Spjd else 590168404Spjd error = dmu_read_uio(os, zp->z_id, uio, nbytes); 591185029Spjd if (error) { 592185029Spjd /* convert checksum errors into IO errors */ 593185029Spjd if (error == ECKSUM) 594185029Spjd error = EIO; 595168404Spjd break; 596185029Spjd } 597168962Spjd 598168404Spjd n -= nbytes; 599168404Spjd } 600168404Spjd 601168404Spjdout: 602168404Spjd zfs_range_unlock(rl); 603168404Spjd 604168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 605168404Spjd ZFS_EXIT(zfsvfs); 606168404Spjd return (error); 607168404Spjd} 608168404Spjd 609168404Spjd/* 610168404Spjd * Fault in the pages of the first n bytes specified by the uio structure. 611168404Spjd * 1 byte in each page is touched and the uio struct is unmodified. 612168404Spjd * Any error will exit this routine as this is only a best 613168404Spjd * attempt to get the pages resident. This is a copy of ufs_trans_touch(). 614168404Spjd */ 615168404Spjdstatic void 616168404Spjdzfs_prefault_write(ssize_t n, struct uio *uio) 617168404Spjd{ 618168404Spjd struct iovec *iov; 619168404Spjd ulong_t cnt, incr; 620168404Spjd caddr_t p; 621168404Spjd 622168404Spjd if (uio->uio_segflg != UIO_USERSPACE) 623168404Spjd return; 624168404Spjd 625168404Spjd iov = uio->uio_iov; 626168404Spjd 627168404Spjd while (n) { 628168404Spjd cnt = MIN(iov->iov_len, n); 629168404Spjd if (cnt == 0) { 630168404Spjd /* empty iov entry */ 631168404Spjd iov++; 632168404Spjd continue; 633168404Spjd } 634168404Spjd n -= cnt; 635168404Spjd /* 636168404Spjd * touch each page in this segment. 637168404Spjd */ 638168404Spjd p = iov->iov_base; 639168404Spjd while (cnt) { 640168404Spjd if (fubyte(p) == -1) 641168404Spjd return; 642168404Spjd incr = MIN(cnt, PAGESIZE); 643168404Spjd p += incr; 644168404Spjd cnt -= incr; 645168404Spjd } 646168404Spjd /* 647168404Spjd * touch the last byte in case it straddles a page. 648168404Spjd */ 649168404Spjd p--; 650168404Spjd if (fubyte(p) == -1) 651168404Spjd return; 652168404Spjd iov++; 653168404Spjd } 654168404Spjd} 655168404Spjd 656168404Spjd/* 657168404Spjd * Write the bytes to a file. 658168404Spjd * 659168404Spjd * IN: vp - vnode of file to be written to. 660168404Spjd * uio - structure supplying write location, range info, 661168404Spjd * and data buffer. 662168404Spjd * ioflag - IO_APPEND flag set if in append mode. 663168404Spjd * cr - credentials of caller. 664185029Spjd * ct - caller context (NFS/CIFS fem monitor only) 665168404Spjd * 666168404Spjd * OUT: uio - updated offset and range. 667168404Spjd * 668168404Spjd * RETURN: 0 if success 669168404Spjd * error code if failure 670168404Spjd * 671168404Spjd * Timestamps: 672168404Spjd * vp - ctime|mtime updated if byte count > 0 673168404Spjd */ 674168404Spjd/* ARGSUSED */ 675168404Spjdstatic int 676168962Spjdzfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 677168404Spjd{ 678168404Spjd znode_t *zp = VTOZ(vp); 679168962Spjd rlim64_t limit = MAXOFFSET_T; 680168404Spjd ssize_t start_resid = uio->uio_resid; 681168404Spjd ssize_t tx_bytes; 682168404Spjd uint64_t end_size; 683168404Spjd dmu_tx_t *tx; 684168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 685185029Spjd zilog_t *zilog; 686168404Spjd offset_t woff; 687168404Spjd ssize_t n, nbytes; 688168404Spjd rl_t *rl; 689168404Spjd int max_blksz = zfsvfs->z_max_blksz; 690185029Spjd uint64_t pflags; 691168404Spjd int error; 692168404Spjd 693168404Spjd /* 694168404Spjd * Fasttrack empty write 695168404Spjd */ 696168404Spjd n = start_resid; 697168404Spjd if (n == 0) 698168404Spjd return (0); 699168404Spjd 700168962Spjd if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 701168962Spjd limit = MAXOFFSET_T; 702168962Spjd 703168404Spjd ZFS_ENTER(zfsvfs); 704185029Spjd ZFS_VERIFY_ZP(zp); 705168404Spjd 706168404Spjd /* 707185029Spjd * If immutable or not appending then return EPERM 708185029Spjd */ 709185029Spjd pflags = zp->z_phys->zp_flags; 710185029Spjd if ((pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 711185029Spjd ((pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 712185029Spjd (uio->uio_loffset < zp->z_phys->zp_size))) { 713185029Spjd ZFS_EXIT(zfsvfs); 714185029Spjd return (EPERM); 715185029Spjd } 716185029Spjd 717185029Spjd zilog = zfsvfs->z_log; 718185029Spjd 719185029Spjd /* 720168404Spjd * Pre-fault the pages to ensure slow (eg NFS) pages 721168404Spjd * don't hold up txg. 722168404Spjd */ 723168404Spjd zfs_prefault_write(n, uio); 724168404Spjd 725168404Spjd /* 726168404Spjd * If in append mode, set the io offset pointer to eof. 727168404Spjd */ 728168404Spjd if (ioflag & IO_APPEND) { 729168404Spjd /* 730168404Spjd * Range lock for a file append: 731168404Spjd * The value for the start of range will be determined by 732168404Spjd * zfs_range_lock() (to guarantee append semantics). 733168404Spjd * If this write will cause the block size to increase, 734168404Spjd * zfs_range_lock() will lock the entire file, so we must 735168404Spjd * later reduce the range after we grow the block size. 736168404Spjd */ 737168404Spjd rl = zfs_range_lock(zp, 0, n, RL_APPEND); 738168404Spjd if (rl->r_len == UINT64_MAX) { 739168404Spjd /* overlocked, zp_size can't change */ 740168404Spjd woff = uio->uio_loffset = zp->z_phys->zp_size; 741168404Spjd } else { 742168404Spjd woff = uio->uio_loffset = rl->r_off; 743168404Spjd } 744168404Spjd } else { 745168404Spjd woff = uio->uio_loffset; 746168404Spjd /* 747168404Spjd * Validate file offset 748168404Spjd */ 749168404Spjd if (woff < 0) { 750168404Spjd ZFS_EXIT(zfsvfs); 751168404Spjd return (EINVAL); 752168404Spjd } 753168404Spjd 754168404Spjd /* 755168404Spjd * If we need to grow the block size then zfs_range_lock() 756168404Spjd * will lock a wider range than we request here. 757168404Spjd * Later after growing the block size we reduce the range. 758168404Spjd */ 759168404Spjd rl = zfs_range_lock(zp, woff, n, RL_WRITER); 760168404Spjd } 761168404Spjd 762168962Spjd if (woff >= limit) { 763168962Spjd zfs_range_unlock(rl); 764168962Spjd ZFS_EXIT(zfsvfs); 765168962Spjd return (EFBIG); 766168962Spjd } 767168962Spjd 768168962Spjd if ((woff + n) > limit || woff > (limit - n)) 769168962Spjd n = limit - woff; 770168962Spjd 771168962Spjd /* 772168962Spjd * Check for mandatory locks 773168962Spjd */ 774168962Spjd if (MANDMODE((mode_t)zp->z_phys->zp_mode) && 775168962Spjd (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 776168962Spjd zfs_range_unlock(rl); 777168962Spjd ZFS_EXIT(zfsvfs); 778168962Spjd return (error); 779168962Spjd } 780168404Spjd end_size = MAX(zp->z_phys->zp_size, woff + n); 781168404Spjd 782168404Spjd /* 783168404Spjd * Write the file in reasonable size chunks. Each chunk is written 784168404Spjd * in a separate transaction; this keeps the intent log records small 785168404Spjd * and allows us to do more fine-grained space accounting. 786168404Spjd */ 787168404Spjd while (n > 0) { 788168404Spjd /* 789168404Spjd * Start a transaction. 790168404Spjd */ 791168404Spjd woff = uio->uio_loffset; 792168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 793168404Spjd dmu_tx_hold_bonus(tx, zp->z_id); 794168404Spjd dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 795168404Spjd error = dmu_tx_assign(tx, zfsvfs->z_assign); 796168404Spjd if (error) { 797168404Spjd if (error == ERESTART && 798168404Spjd zfsvfs->z_assign == TXG_NOWAIT) { 799168404Spjd dmu_tx_wait(tx); 800168404Spjd dmu_tx_abort(tx); 801168404Spjd continue; 802168404Spjd } 803168404Spjd dmu_tx_abort(tx); 804168404Spjd break; 805168404Spjd } 806168404Spjd 807168404Spjd /* 808168404Spjd * If zfs_range_lock() over-locked we grow the blocksize 809168404Spjd * and then reduce the lock range. This will only happen 810168404Spjd * on the first iteration since zfs_range_reduce() will 811168404Spjd * shrink down r_len to the appropriate size. 812168404Spjd */ 813168404Spjd if (rl->r_len == UINT64_MAX) { 814168404Spjd uint64_t new_blksz; 815168404Spjd 816168404Spjd if (zp->z_blksz > max_blksz) { 817168404Spjd ASSERT(!ISP2(zp->z_blksz)); 818168404Spjd new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 819168404Spjd } else { 820168404Spjd new_blksz = MIN(end_size, max_blksz); 821168404Spjd } 822168404Spjd zfs_grow_blocksize(zp, new_blksz, tx); 823168404Spjd zfs_range_reduce(rl, woff, n); 824168404Spjd } 825168404Spjd 826168404Spjd /* 827168404Spjd * XXX - should we really limit each write to z_max_blksz? 828168404Spjd * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 829168404Spjd */ 830168404Spjd nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 831168404Spjd 832168404Spjd if (woff + nbytes > zp->z_phys->zp_size) 833168404Spjd vnode_pager_setsize(vp, woff + nbytes); 834168404Spjd 835169302Spjd rw_enter(&zp->z_map_lock, RW_READER); 836169302Spjd 837168962Spjd tx_bytes = uio->uio_resid; 838168404Spjd if (vn_has_cached_data(vp)) { 839168404Spjd rw_exit(&zp->z_map_lock); 840168404Spjd error = mappedwrite(vp, nbytes, uio, tx); 841168404Spjd } else { 842168404Spjd error = dmu_write_uio(zfsvfs->z_os, zp->z_id, 843168404Spjd uio, nbytes, tx); 844168404Spjd rw_exit(&zp->z_map_lock); 845168404Spjd } 846168404Spjd tx_bytes -= uio->uio_resid; 847168404Spjd 848168404Spjd /* 849168404Spjd * If we made no progress, we're done. If we made even 850168404Spjd * partial progress, update the znode and ZIL accordingly. 851168404Spjd */ 852168404Spjd if (tx_bytes == 0) { 853168404Spjd dmu_tx_commit(tx); 854168404Spjd ASSERT(error != 0); 855168404Spjd break; 856168404Spjd } 857168404Spjd 858168404Spjd /* 859168404Spjd * Clear Set-UID/Set-GID bits on successful write if not 860168404Spjd * privileged and at least one of the excute bits is set. 861168404Spjd * 862168404Spjd * It would be nice to to this after all writes have 863168404Spjd * been done, but that would still expose the ISUID/ISGID 864168404Spjd * to another app after the partial write is committed. 865185029Spjd * 866185029Spjd * Note: we don't call zfs_fuid_map_id() here because 867185029Spjd * user 0 is not an ephemeral uid. 868168404Spjd */ 869168404Spjd mutex_enter(&zp->z_acl_lock); 870168404Spjd if ((zp->z_phys->zp_mode & (S_IXUSR | (S_IXUSR >> 3) | 871168404Spjd (S_IXUSR >> 6))) != 0 && 872168404Spjd (zp->z_phys->zp_mode & (S_ISUID | S_ISGID)) != 0 && 873185029Spjd secpolicy_vnode_setid_retain(vp, cr, 874168404Spjd (zp->z_phys->zp_mode & S_ISUID) != 0 && 875168404Spjd zp->z_phys->zp_uid == 0) != 0) { 876185029Spjd zp->z_phys->zp_mode &= ~(S_ISUID | S_ISGID); 877168404Spjd } 878168404Spjd mutex_exit(&zp->z_acl_lock); 879168404Spjd 880168404Spjd /* 881168404Spjd * Update time stamp. NOTE: This marks the bonus buffer as 882168404Spjd * dirty, so we don't have to do it again for zp_size. 883168404Spjd */ 884168404Spjd zfs_time_stamper(zp, CONTENT_MODIFIED, tx); 885168404Spjd 886168404Spjd /* 887168404Spjd * Update the file size (zp_size) if it has changed; 888168404Spjd * account for possible concurrent updates. 889168404Spjd */ 890168404Spjd while ((end_size = zp->z_phys->zp_size) < uio->uio_loffset) 891168404Spjd (void) atomic_cas_64(&zp->z_phys->zp_size, end_size, 892168404Spjd uio->uio_loffset); 893168404Spjd zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 894168404Spjd dmu_tx_commit(tx); 895168404Spjd 896168404Spjd if (error != 0) 897168404Spjd break; 898168404Spjd ASSERT(tx_bytes == nbytes); 899168404Spjd n -= nbytes; 900168404Spjd } 901168404Spjd 902168404Spjd zfs_range_unlock(rl); 903168404Spjd 904168404Spjd /* 905168404Spjd * If we're in replay mode, or we made no progress, return error. 906168404Spjd * Otherwise, it's at least a partial write, so it's successful. 907168404Spjd */ 908168404Spjd if (zfsvfs->z_assign >= TXG_INITIAL || uio->uio_resid == start_resid) { 909168404Spjd ZFS_EXIT(zfsvfs); 910168404Spjd return (error); 911168404Spjd } 912168404Spjd 913168962Spjd if (ioflag & (FSYNC | FDSYNC)) 914168404Spjd zil_commit(zilog, zp->z_last_itx, zp->z_id); 915168404Spjd 916168404Spjd ZFS_EXIT(zfsvfs); 917168404Spjd return (0); 918168404Spjd} 919168404Spjd 920168404Spjdvoid 921168404Spjdzfs_get_done(dmu_buf_t *db, void *vzgd) 922168404Spjd{ 923168404Spjd zgd_t *zgd = (zgd_t *)vzgd; 924168404Spjd rl_t *rl = zgd->zgd_rl; 925168404Spjd vnode_t *vp = ZTOV(rl->r_zp); 926168404Spjd int vfslocked; 927168404Spjd 928168404Spjd vfslocked = VFS_LOCK_GIANT(vp->v_vfsp); 929168404Spjd dmu_buf_rele(db, vzgd); 930168404Spjd zfs_range_unlock(rl); 931168404Spjd VN_RELE(vp); 932185029Spjd zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 933168404Spjd kmem_free(zgd, sizeof (zgd_t)); 934168404Spjd VFS_UNLOCK_GIANT(vfslocked); 935168404Spjd} 936168404Spjd 937168404Spjd/* 938168404Spjd * Get data to generate a TX_WRITE intent log record. 939168404Spjd */ 940168404Spjdint 941168404Spjdzfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 942168404Spjd{ 943168404Spjd zfsvfs_t *zfsvfs = arg; 944168404Spjd objset_t *os = zfsvfs->z_os; 945168404Spjd znode_t *zp; 946168404Spjd uint64_t off = lr->lr_offset; 947168404Spjd dmu_buf_t *db; 948168404Spjd rl_t *rl; 949168404Spjd zgd_t *zgd; 950168404Spjd int dlen = lr->lr_length; /* length of user data */ 951168404Spjd int error = 0; 952168404Spjd 953168404Spjd ASSERT(zio); 954168404Spjd ASSERT(dlen != 0); 955168404Spjd 956168404Spjd /* 957168404Spjd * Nothing to do if the file has been removed 958168404Spjd */ 959168404Spjd if (zfs_zget(zfsvfs, lr->lr_foid, &zp) != 0) 960168404Spjd return (ENOENT); 961168404Spjd if (zp->z_unlinked) { 962168404Spjd VN_RELE(ZTOV(zp)); 963168404Spjd return (ENOENT); 964168404Spjd } 965168404Spjd 966168404Spjd /* 967168404Spjd * Write records come in two flavors: immediate and indirect. 968168404Spjd * For small writes it's cheaper to store the data with the 969168404Spjd * log record (immediate); for large writes it's cheaper to 970168404Spjd * sync the data and get a pointer to it (indirect) so that 971168404Spjd * we don't have to write the data twice. 972168404Spjd */ 973168404Spjd if (buf != NULL) { /* immediate write */ 974168404Spjd rl = zfs_range_lock(zp, off, dlen, RL_READER); 975168404Spjd /* test for truncation needs to be done while range locked */ 976168404Spjd if (off >= zp->z_phys->zp_size) { 977168404Spjd error = ENOENT; 978168404Spjd goto out; 979168404Spjd } 980168404Spjd VERIFY(0 == dmu_read(os, lr->lr_foid, off, dlen, buf)); 981168404Spjd } else { /* indirect write */ 982168404Spjd uint64_t boff; /* block starting offset */ 983168404Spjd 984168404Spjd /* 985168404Spjd * Have to lock the whole block to ensure when it's 986168404Spjd * written out and it's checksum is being calculated 987168404Spjd * that no one can change the data. We need to re-check 988168404Spjd * blocksize after we get the lock in case it's changed! 989168404Spjd */ 990168404Spjd for (;;) { 991168404Spjd if (ISP2(zp->z_blksz)) { 992168404Spjd boff = P2ALIGN_TYPED(off, zp->z_blksz, 993168404Spjd uint64_t); 994168404Spjd } else { 995168404Spjd boff = 0; 996168404Spjd } 997168404Spjd dlen = zp->z_blksz; 998168404Spjd rl = zfs_range_lock(zp, boff, dlen, RL_READER); 999168404Spjd if (zp->z_blksz == dlen) 1000168404Spjd break; 1001168404Spjd zfs_range_unlock(rl); 1002168404Spjd } 1003168404Spjd /* test for truncation needs to be done while range locked */ 1004168404Spjd if (off >= zp->z_phys->zp_size) { 1005168404Spjd error = ENOENT; 1006168404Spjd goto out; 1007168404Spjd } 1008168404Spjd zgd = (zgd_t *)kmem_alloc(sizeof (zgd_t), KM_SLEEP); 1009168404Spjd zgd->zgd_rl = rl; 1010168404Spjd zgd->zgd_zilog = zfsvfs->z_log; 1011168404Spjd zgd->zgd_bp = &lr->lr_blkptr; 1012168404Spjd VERIFY(0 == dmu_buf_hold(os, lr->lr_foid, boff, zgd, &db)); 1013168404Spjd ASSERT(boff == db->db_offset); 1014168404Spjd lr->lr_blkoff = off - boff; 1015168404Spjd error = dmu_sync(zio, db, &lr->lr_blkptr, 1016168404Spjd lr->lr_common.lrc_txg, zfs_get_done, zgd); 1017185029Spjd ASSERT((error && error != EINPROGRESS) || 1018185029Spjd lr->lr_length <= zp->z_blksz); 1019185029Spjd if (error == 0) 1020185029Spjd zil_add_block(zfsvfs->z_log, &lr->lr_blkptr); 1021168404Spjd /* 1022168404Spjd * If we get EINPROGRESS, then we need to wait for a 1023168404Spjd * write IO initiated by dmu_sync() to complete before 1024168404Spjd * we can release this dbuf. We will finish everything 1025168404Spjd * up in the zfs_get_done() callback. 1026168404Spjd */ 1027168404Spjd if (error == EINPROGRESS) 1028168404Spjd return (0); 1029168404Spjd dmu_buf_rele(db, zgd); 1030168404Spjd kmem_free(zgd, sizeof (zgd_t)); 1031168404Spjd } 1032168404Spjdout: 1033168404Spjd zfs_range_unlock(rl); 1034168404Spjd VN_RELE(ZTOV(zp)); 1035168404Spjd return (error); 1036168404Spjd} 1037168404Spjd 1038168404Spjd/*ARGSUSED*/ 1039168404Spjdstatic int 1040185029Spjdzfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1041185029Spjd caller_context_t *ct) 1042168404Spjd{ 1043168404Spjd znode_t *zp = VTOZ(vp); 1044168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1045168404Spjd int error; 1046168404Spjd 1047168404Spjd ZFS_ENTER(zfsvfs); 1048185029Spjd ZFS_VERIFY_ZP(zp); 1049185029Spjd 1050185029Spjd if (flag & V_ACE_MASK) 1051185029Spjd error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1052185029Spjd else 1053185029Spjd error = zfs_zaccess_rwx(zp, mode, flag, cr); 1054185029Spjd 1055168404Spjd ZFS_EXIT(zfsvfs); 1056168404Spjd return (error); 1057168404Spjd} 1058168404Spjd 1059168404Spjd/* 1060168404Spjd * Lookup an entry in a directory, or an extended attribute directory. 1061168404Spjd * If it exists, return a held vnode reference for it. 1062168404Spjd * 1063168404Spjd * IN: dvp - vnode of directory to search. 1064168404Spjd * nm - name of entry to lookup. 1065168404Spjd * pnp - full pathname to lookup [UNUSED]. 1066168404Spjd * flags - LOOKUP_XATTR set if looking for an attribute. 1067168404Spjd * rdir - root directory vnode [UNUSED]. 1068168404Spjd * cr - credentials of caller. 1069185029Spjd * ct - caller context 1070185029Spjd * direntflags - directory lookup flags 1071185029Spjd * realpnp - returned pathname. 1072168404Spjd * 1073168404Spjd * OUT: vpp - vnode of located entry, NULL if not found. 1074168404Spjd * 1075168404Spjd * RETURN: 0 if success 1076168404Spjd * error code if failure 1077168404Spjd * 1078168404Spjd * Timestamps: 1079168404Spjd * NA 1080168404Spjd */ 1081168404Spjd/* ARGSUSED */ 1082168962Spjdstatic int 1083168962Spjdzfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1084185029Spjd int nameiop, cred_t *cr, kthread_t *td, int flags) 1085168404Spjd{ 1086168962Spjd znode_t *zdp = VTOZ(dvp); 1087168962Spjd zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1088168962Spjd int error; 1089185029Spjd int *direntflags = NULL; 1090185029Spjd void *realpnp = NULL; 1091168404Spjd 1092168404Spjd ZFS_ENTER(zfsvfs); 1093185029Spjd ZFS_VERIFY_ZP(zdp); 1094168404Spjd 1095168404Spjd *vpp = NULL; 1096168404Spjd 1097185029Spjd if (flags & LOOKUP_XATTR) { 1098168404Spjd#ifdef TODO 1099168404Spjd /* 1100168404Spjd * If the xattr property is off, refuse the lookup request. 1101168404Spjd */ 1102168404Spjd if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1103168404Spjd ZFS_EXIT(zfsvfs); 1104168404Spjd return (EINVAL); 1105168404Spjd } 1106185029Spjd#endif 1107168404Spjd 1108168404Spjd /* 1109168404Spjd * We don't allow recursive attributes.. 1110168404Spjd * Maybe someday we will. 1111168404Spjd */ 1112168404Spjd if (zdp->z_phys->zp_flags & ZFS_XATTR) { 1113168404Spjd ZFS_EXIT(zfsvfs); 1114168404Spjd return (EINVAL); 1115168404Spjd } 1116168404Spjd 1117168404Spjd if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1118168404Spjd ZFS_EXIT(zfsvfs); 1119168404Spjd return (error); 1120168404Spjd } 1121168404Spjd 1122168404Spjd /* 1123168404Spjd * Do we have permission to get into attribute directory? 1124168404Spjd */ 1125168404Spjd 1126185029Spjd if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1127185029Spjd B_FALSE, cr)) { 1128168404Spjd VN_RELE(*vpp); 1129185029Spjd *vpp = NULL; 1130168404Spjd } 1131168404Spjd 1132168404Spjd ZFS_EXIT(zfsvfs); 1133168404Spjd return (error); 1134168404Spjd } 1135168404Spjd 1136168404Spjd if (dvp->v_type != VDIR) { 1137168404Spjd ZFS_EXIT(zfsvfs); 1138168404Spjd return (ENOTDIR); 1139168404Spjd } 1140168404Spjd 1141168404Spjd /* 1142168404Spjd * Check accessibility of directory. 1143168404Spjd */ 1144168404Spjd 1145185029Spjd if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1146168404Spjd ZFS_EXIT(zfsvfs); 1147168404Spjd return (error); 1148168404Spjd } 1149168404Spjd 1150185029Spjd if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1151185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1152185029Spjd ZFS_EXIT(zfsvfs); 1153185029Spjd return (EILSEQ); 1154185029Spjd } 1155168404Spjd 1156185029Spjd error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1157185029Spjd if (error == 0) { 1158168962Spjd /* 1159168962Spjd * Convert device special files 1160168962Spjd */ 1161168962Spjd if (IS_DEVVP(*vpp)) { 1162168962Spjd vnode_t *svp; 1163168962Spjd 1164168962Spjd svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1165168962Spjd VN_RELE(*vpp); 1166168962Spjd if (svp == NULL) 1167168962Spjd error = ENOSYS; 1168168962Spjd else 1169168962Spjd *vpp = svp; 1170168962Spjd } 1171168962Spjd } 1172168962Spjd 1173168404Spjd ZFS_EXIT(zfsvfs); 1174168404Spjd 1175168404Spjd /* Translate errors and add SAVENAME when needed. */ 1176168404Spjd if (cnp->cn_flags & ISLASTCN) { 1177168404Spjd switch (nameiop) { 1178168404Spjd case CREATE: 1179168404Spjd case RENAME: 1180168404Spjd if (error == ENOENT) { 1181168404Spjd error = EJUSTRETURN; 1182168404Spjd cnp->cn_flags |= SAVENAME; 1183168404Spjd break; 1184168404Spjd } 1185168404Spjd /* FALLTHROUGH */ 1186168404Spjd case DELETE: 1187168404Spjd if (error == 0) 1188168404Spjd cnp->cn_flags |= SAVENAME; 1189168404Spjd break; 1190168404Spjd } 1191168404Spjd } 1192168404Spjd if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) { 1193169198Spjd int ltype = 0; 1194169198Spjd 1195169198Spjd if (cnp->cn_flags & ISDOTDOT) { 1196176559Sattilio ltype = VOP_ISLOCKED(dvp); 1197175294Sattilio VOP_UNLOCK(dvp, 0); 1198169198Spjd } 1199175202Sattilio error = vn_lock(*vpp, cnp->cn_lkflags); 1200168962Spjd if (cnp->cn_flags & ISDOTDOT) 1201175202Sattilio vn_lock(dvp, ltype | LK_RETRY); 1202169172Spjd if (error != 0) { 1203169172Spjd VN_RELE(*vpp); 1204169172Spjd *vpp = NULL; 1205169172Spjd return (error); 1206169172Spjd } 1207168404Spjd } 1208168404Spjd 1209168404Spjd#ifdef FREEBSD_NAMECACHE 1210168404Spjd /* 1211168404Spjd * Insert name into cache (as non-existent) if appropriate. 1212168404Spjd */ 1213168404Spjd if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 1214168404Spjd cache_enter(dvp, *vpp, cnp); 1215169170Spjd /* 1216169170Spjd * Insert name into cache if appropriate. 1217169170Spjd */ 1218168404Spjd if (error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1219168404Spjd if (!(cnp->cn_flags & ISLASTCN) || 1220168404Spjd (nameiop != DELETE && nameiop != RENAME)) { 1221168404Spjd cache_enter(dvp, *vpp, cnp); 1222168404Spjd } 1223168404Spjd } 1224168404Spjd#endif 1225168404Spjd 1226168404Spjd return (error); 1227168404Spjd} 1228168404Spjd 1229168404Spjd/* 1230168404Spjd * Attempt to create a new entry in a directory. If the entry 1231168404Spjd * already exists, truncate the file if permissible, else return 1232168404Spjd * an error. Return the vp of the created or trunc'd file. 1233168404Spjd * 1234168404Spjd * IN: dvp - vnode of directory to put new file entry in. 1235168404Spjd * name - name of new file entry. 1236168404Spjd * vap - attributes of new file. 1237168404Spjd * excl - flag indicating exclusive or non-exclusive mode. 1238168404Spjd * mode - mode to open file with. 1239168404Spjd * cr - credentials of caller. 1240168404Spjd * flag - large file flag [UNUSED]. 1241185029Spjd * ct - caller context 1242185029Spjd * vsecp - ACL to be set 1243168404Spjd * 1244168404Spjd * OUT: vpp - vnode of created or trunc'd entry. 1245168404Spjd * 1246168404Spjd * RETURN: 0 if success 1247168404Spjd * error code if failure 1248168404Spjd * 1249168404Spjd * Timestamps: 1250168404Spjd * dvp - ctime|mtime updated if new entry created 1251168404Spjd * vp - ctime|mtime always, atime if new 1252168404Spjd */ 1253185029Spjd 1254168404Spjd/* ARGSUSED */ 1255168404Spjdstatic int 1256168962Spjdzfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 1257185029Spjd vnode_t **vpp, cred_t *cr, kthread_t *td) 1258168404Spjd{ 1259168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1260168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1261185029Spjd zilog_t *zilog; 1262185029Spjd objset_t *os; 1263168404Spjd zfs_dirlock_t *dl; 1264168404Spjd dmu_tx_t *tx; 1265168404Spjd int error; 1266185029Spjd zfs_acl_t *aclp = NULL; 1267185029Spjd zfs_fuid_info_t *fuidp = NULL; 1268185029Spjd void *vsecp = NULL; 1269185029Spjd int flag = 0; 1270168404Spjd 1271185029Spjd /* 1272185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 1273185029Spjd * make sure file system is at proper version 1274185029Spjd */ 1275185029Spjd 1276185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 1277185029Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 1278185029Spjd IS_EPHEMERAL(crgetuid(cr)) || IS_EPHEMERAL(crgetgid(cr)))) 1279185029Spjd return (EINVAL); 1280185029Spjd 1281168404Spjd ZFS_ENTER(zfsvfs); 1282185029Spjd ZFS_VERIFY_ZP(dzp); 1283185029Spjd os = zfsvfs->z_os; 1284185029Spjd zilog = zfsvfs->z_log; 1285168404Spjd 1286185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1287185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1288185029Spjd ZFS_EXIT(zfsvfs); 1289185029Spjd return (EILSEQ); 1290185029Spjd } 1291185029Spjd 1292185029Spjd if (vap->va_mask & AT_XVATTR) { 1293185029Spjd if ((error = secpolicy_xvattr((xvattr_t *)vap, 1294185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 1295185029Spjd ZFS_EXIT(zfsvfs); 1296185029Spjd return (error); 1297185029Spjd } 1298185029Spjd } 1299168404Spjdtop: 1300168404Spjd *vpp = NULL; 1301168404Spjd 1302182905Strasz if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1303182905Strasz vap->va_mode &= ~S_ISVTX; 1304168404Spjd 1305168404Spjd if (*name == '\0') { 1306168404Spjd /* 1307168404Spjd * Null component name refers to the directory itself. 1308168404Spjd */ 1309168404Spjd VN_HOLD(dvp); 1310168404Spjd zp = dzp; 1311168404Spjd dl = NULL; 1312168404Spjd error = 0; 1313168404Spjd } else { 1314168404Spjd /* possible VN_HOLD(zp) */ 1315185029Spjd int zflg = 0; 1316185029Spjd 1317185029Spjd if (flag & FIGNORECASE) 1318185029Spjd zflg |= ZCILOOK; 1319185029Spjd 1320185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1321185029Spjd NULL, NULL); 1322185029Spjd if (error) { 1323168404Spjd if (strcmp(name, "..") == 0) 1324168404Spjd error = EISDIR; 1325168404Spjd ZFS_EXIT(zfsvfs); 1326185029Spjd if (aclp) 1327185029Spjd zfs_acl_free(aclp); 1328168404Spjd return (error); 1329168404Spjd } 1330168404Spjd } 1331185029Spjd if (vsecp && aclp == NULL) { 1332185029Spjd error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, &aclp); 1333185029Spjd if (error) { 1334185029Spjd ZFS_EXIT(zfsvfs); 1335185029Spjd if (dl) 1336185029Spjd zfs_dirent_unlock(dl); 1337185029Spjd return (error); 1338185029Spjd } 1339185029Spjd } 1340168404Spjd 1341185029Spjd if (zp == NULL) { 1342185029Spjd uint64_t txtype; 1343168404Spjd 1344168404Spjd /* 1345168404Spjd * Create a new file object and update the directory 1346168404Spjd * to reference it. 1347168404Spjd */ 1348185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1349168404Spjd goto out; 1350168404Spjd } 1351168404Spjd 1352168404Spjd /* 1353168404Spjd * We only support the creation of regular files in 1354168404Spjd * extended attribute directories. 1355168404Spjd */ 1356168404Spjd if ((dzp->z_phys->zp_flags & ZFS_XATTR) && 1357168404Spjd (vap->va_type != VREG)) { 1358168404Spjd error = EINVAL; 1359168404Spjd goto out; 1360168404Spjd } 1361168404Spjd 1362168404Spjd tx = dmu_tx_create(os); 1363168404Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1364185029Spjd if ((aclp && aclp->z_has_fuids) || IS_EPHEMERAL(crgetuid(cr)) || 1365185029Spjd IS_EPHEMERAL(crgetgid(cr))) { 1366185029Spjd if (zfsvfs->z_fuid_obj == 0) { 1367185029Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1368185029Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 1369185029Spjd FUID_SIZE_ESTIMATE(zfsvfs)); 1370185029Spjd dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 1371185029Spjd FALSE, NULL); 1372185029Spjd } else { 1373185029Spjd dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj); 1374185029Spjd dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0, 1375185029Spjd FUID_SIZE_ESTIMATE(zfsvfs)); 1376185029Spjd } 1377185029Spjd } 1378168404Spjd dmu_tx_hold_bonus(tx, dzp->z_id); 1379168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1380185029Spjd if ((dzp->z_phys->zp_flags & ZFS_INHERIT_ACE) || aclp) { 1381168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1382168404Spjd 0, SPA_MAXBLOCKSIZE); 1383185029Spjd } 1384168404Spjd error = dmu_tx_assign(tx, zfsvfs->z_assign); 1385168404Spjd if (error) { 1386168404Spjd zfs_dirent_unlock(dl); 1387168404Spjd if (error == ERESTART && 1388168404Spjd zfsvfs->z_assign == TXG_NOWAIT) { 1389168404Spjd dmu_tx_wait(tx); 1390168404Spjd dmu_tx_abort(tx); 1391168404Spjd goto top; 1392168404Spjd } 1393168404Spjd dmu_tx_abort(tx); 1394168404Spjd ZFS_EXIT(zfsvfs); 1395185029Spjd if (aclp) 1396185029Spjd zfs_acl_free(aclp); 1397168404Spjd return (error); 1398168404Spjd } 1399185029Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, aclp, &fuidp); 1400168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 1401185029Spjd txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1402185029Spjd if (flag & FIGNORECASE) 1403185029Spjd txtype |= TX_CI; 1404185029Spjd zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1405185029Spjd vsecp, fuidp, vap); 1406185029Spjd if (fuidp) 1407185029Spjd zfs_fuid_info_free(fuidp); 1408168404Spjd dmu_tx_commit(tx); 1409168404Spjd } else { 1410185029Spjd int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1411185029Spjd 1412168404Spjd /* 1413168404Spjd * A directory entry already exists for this name. 1414168404Spjd */ 1415168404Spjd /* 1416168962Spjd * Can't truncate an existing file if in exclusive mode. 1417168962Spjd */ 1418168962Spjd if (excl == EXCL) { 1419168962Spjd error = EEXIST; 1420168962Spjd goto out; 1421168962Spjd } 1422168962Spjd /* 1423168404Spjd * Can't open a directory for writing. 1424168404Spjd */ 1425168404Spjd if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1426168404Spjd error = EISDIR; 1427168404Spjd goto out; 1428168404Spjd } 1429168404Spjd /* 1430168404Spjd * Verify requested access to file. 1431168404Spjd */ 1432185029Spjd if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1433168404Spjd goto out; 1434168404Spjd } 1435168404Spjd 1436168404Spjd mutex_enter(&dzp->z_lock); 1437168404Spjd dzp->z_seq++; 1438168404Spjd mutex_exit(&dzp->z_lock); 1439168404Spjd 1440168404Spjd /* 1441168404Spjd * Truncate regular files if requested. 1442168404Spjd */ 1443168404Spjd if ((ZTOV(zp)->v_type == VREG) && 1444168404Spjd (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1445185029Spjd /* we can't hold any locks when calling zfs_freesp() */ 1446185029Spjd zfs_dirent_unlock(dl); 1447185029Spjd dl = NULL; 1448168404Spjd error = zfs_freesp(zp, 0, 0, mode, TRUE); 1449185029Spjd if (error == 0) { 1450185029Spjd vnevent_create(ZTOV(zp), ct); 1451168404Spjd } 1452168404Spjd } 1453168404Spjd } 1454168404Spjdout: 1455168404Spjd if (dl) 1456168404Spjd zfs_dirent_unlock(dl); 1457168404Spjd 1458168404Spjd if (error) { 1459168404Spjd if (zp) 1460168404Spjd VN_RELE(ZTOV(zp)); 1461168962Spjd } else { 1462168962Spjd *vpp = ZTOV(zp); 1463168962Spjd /* 1464168962Spjd * If vnode is for a device return a specfs vnode instead. 1465168962Spjd */ 1466168962Spjd if (IS_DEVVP(*vpp)) { 1467168962Spjd struct vnode *svp; 1468168962Spjd 1469168962Spjd svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1470168962Spjd VN_RELE(*vpp); 1471168962Spjd if (svp == NULL) { 1472168962Spjd error = ENOSYS; 1473168962Spjd } 1474168962Spjd *vpp = svp; 1475168962Spjd } 1476168404Spjd } 1477185029Spjd if (aclp) 1478185029Spjd zfs_acl_free(aclp); 1479168404Spjd 1480168404Spjd ZFS_EXIT(zfsvfs); 1481168404Spjd return (error); 1482168404Spjd} 1483168404Spjd 1484168404Spjd/* 1485168404Spjd * Remove an entry from a directory. 1486168404Spjd * 1487168404Spjd * IN: dvp - vnode of directory to remove entry from. 1488168404Spjd * name - name of entry to remove. 1489168404Spjd * cr - credentials of caller. 1490185029Spjd * ct - caller context 1491185029Spjd * flags - case flags 1492168404Spjd * 1493168404Spjd * RETURN: 0 if success 1494168404Spjd * error code if failure 1495168404Spjd * 1496168404Spjd * Timestamps: 1497168404Spjd * dvp - ctime|mtime 1498168404Spjd * vp - ctime (if nlink > 0) 1499168404Spjd */ 1500185029Spjd/*ARGSUSED*/ 1501168404Spjdstatic int 1502185029Spjdzfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1503185029Spjd int flags) 1504168404Spjd{ 1505168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1506168404Spjd znode_t *xzp = NULL; 1507168404Spjd vnode_t *vp; 1508168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1509185029Spjd zilog_t *zilog; 1510168962Spjd uint64_t acl_obj, xattr_obj; 1511168404Spjd zfs_dirlock_t *dl; 1512168404Spjd dmu_tx_t *tx; 1513168962Spjd boolean_t may_delete_now, delete_now = FALSE; 1514185029Spjd boolean_t unlinked, toobig = FALSE; 1515185029Spjd uint64_t txtype; 1516185029Spjd pathname_t *realnmp = NULL; 1517185029Spjd pathname_t realnm; 1518168404Spjd int error; 1519185029Spjd int zflg = ZEXISTS; 1520168404Spjd 1521168404Spjd ZFS_ENTER(zfsvfs); 1522185029Spjd ZFS_VERIFY_ZP(dzp); 1523185029Spjd zilog = zfsvfs->z_log; 1524168404Spjd 1525185029Spjd if (flags & FIGNORECASE) { 1526185029Spjd zflg |= ZCILOOK; 1527185029Spjd pn_alloc(&realnm); 1528185029Spjd realnmp = &realnm; 1529185029Spjd } 1530185029Spjd 1531168404Spjdtop: 1532168404Spjd /* 1533168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 1534168404Spjd */ 1535185029Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1536185029Spjd NULL, realnmp)) { 1537185029Spjd if (realnmp) 1538185029Spjd pn_free(realnmp); 1539168404Spjd ZFS_EXIT(zfsvfs); 1540168404Spjd return (error); 1541168404Spjd } 1542168404Spjd 1543168404Spjd vp = ZTOV(zp); 1544168404Spjd 1545168962Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1546168404Spjd goto out; 1547168962Spjd } 1548168404Spjd 1549168962Spjd /* 1550168962Spjd * Need to use rmdir for removing directories. 1551168962Spjd */ 1552168962Spjd if (vp->v_type == VDIR) { 1553168962Spjd error = EPERM; 1554168962Spjd goto out; 1555168962Spjd } 1556168962Spjd 1557185029Spjd vnevent_remove(vp, dvp, name, ct); 1558168962Spjd 1559185029Spjd if (realnmp) 1560185029Spjd dnlc_remove(dvp, realnmp->pn_buf); 1561185029Spjd else 1562185029Spjd dnlc_remove(dvp, name); 1563168404Spjd 1564168962Spjd may_delete_now = FALSE; 1565168962Spjd 1566168404Spjd /* 1567168404Spjd * We may delete the znode now, or we may put it in the unlinked set; 1568168404Spjd * it depends on whether we're the last link, and on whether there are 1569168404Spjd * other holds on the vnode. So we dmu_tx_hold() the right things to 1570168404Spjd * allow for either case. 1571168404Spjd */ 1572168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1573168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1574168404Spjd dmu_tx_hold_bonus(tx, zp->z_id); 1575185029Spjd if (may_delete_now) { 1576185029Spjd toobig = 1577185029Spjd zp->z_phys->zp_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1578185029Spjd /* if the file is too big, only hold_free a token amount */ 1579185029Spjd dmu_tx_hold_free(tx, zp->z_id, 0, 1580185029Spjd (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1581185029Spjd } 1582168404Spjd 1583168404Spjd /* are there any extended attributes? */ 1584168404Spjd if ((xattr_obj = zp->z_phys->zp_xattr) != 0) { 1585168404Spjd /* XXX - do we need this if we are deleting? */ 1586168404Spjd dmu_tx_hold_bonus(tx, xattr_obj); 1587168404Spjd } 1588168404Spjd 1589168962Spjd /* are there any additional acls */ 1590168962Spjd if ((acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj) != 0 && 1591168962Spjd may_delete_now) 1592168962Spjd dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 1593168962Spjd 1594168404Spjd /* charge as an update -- would be nice not to charge at all */ 1595168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1596168404Spjd 1597168404Spjd error = dmu_tx_assign(tx, zfsvfs->z_assign); 1598168404Spjd if (error) { 1599168404Spjd zfs_dirent_unlock(dl); 1600168962Spjd VN_RELE(vp); 1601168404Spjd if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { 1602168404Spjd dmu_tx_wait(tx); 1603168404Spjd dmu_tx_abort(tx); 1604168404Spjd goto top; 1605168404Spjd } 1606185029Spjd if (realnmp) 1607185029Spjd pn_free(realnmp); 1608168404Spjd dmu_tx_abort(tx); 1609168404Spjd ZFS_EXIT(zfsvfs); 1610168404Spjd return (error); 1611168404Spjd } 1612168404Spjd 1613168404Spjd /* 1614168404Spjd * Remove the directory entry. 1615168404Spjd */ 1616185029Spjd error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1617168404Spjd 1618168404Spjd if (error) { 1619168404Spjd dmu_tx_commit(tx); 1620168404Spjd goto out; 1621168404Spjd } 1622168404Spjd 1623168962Spjd if (0 && unlinked) { 1624168962Spjd VI_LOCK(vp); 1625185029Spjd delete_now = may_delete_now && !toobig && 1626168962Spjd vp->v_count == 1 && !vn_has_cached_data(vp) && 1627168962Spjd zp->z_phys->zp_xattr == xattr_obj && 1628168962Spjd zp->z_phys->zp_acl.z_acl_extern_obj == acl_obj; 1629168962Spjd VI_UNLOCK(vp); 1630168962Spjd } 1631168962Spjd 1632168962Spjd if (delete_now) { 1633168962Spjd if (zp->z_phys->zp_xattr) { 1634168962Spjd error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp); 1635168962Spjd ASSERT3U(error, ==, 0); 1636168962Spjd ASSERT3U(xzp->z_phys->zp_links, ==, 2); 1637168962Spjd dmu_buf_will_dirty(xzp->z_dbuf, tx); 1638168962Spjd mutex_enter(&xzp->z_lock); 1639168962Spjd xzp->z_unlinked = 1; 1640168962Spjd xzp->z_phys->zp_links = 0; 1641168962Spjd mutex_exit(&xzp->z_lock); 1642168962Spjd zfs_unlinked_add(xzp, tx); 1643168962Spjd zp->z_phys->zp_xattr = 0; /* probably unnecessary */ 1644168962Spjd } 1645168962Spjd mutex_enter(&zp->z_lock); 1646168962Spjd VI_LOCK(vp); 1647168962Spjd vp->v_count--; 1648168962Spjd ASSERT3U(vp->v_count, ==, 0); 1649168962Spjd VI_UNLOCK(vp); 1650168962Spjd mutex_exit(&zp->z_lock); 1651168962Spjd zfs_znode_delete(zp, tx); 1652168962Spjd } else if (unlinked) { 1653168404Spjd zfs_unlinked_add(zp, tx); 1654168962Spjd } 1655168404Spjd 1656185029Spjd txtype = TX_REMOVE; 1657185029Spjd if (flags & FIGNORECASE) 1658185029Spjd txtype |= TX_CI; 1659185029Spjd zfs_log_remove(zilog, tx, txtype, dzp, name); 1660168404Spjd 1661168404Spjd dmu_tx_commit(tx); 1662168404Spjdout: 1663185029Spjd if (realnmp) 1664185029Spjd pn_free(realnmp); 1665185029Spjd 1666168404Spjd zfs_dirent_unlock(dl); 1667168404Spjd 1668168962Spjd if (!delete_now) { 1669168962Spjd VN_RELE(vp); 1670168962Spjd } else if (xzp) { 1671185029Spjd /* this rele is delayed to prevent nesting transactions */ 1672168962Spjd VN_RELE(ZTOV(xzp)); 1673168962Spjd } 1674168962Spjd 1675168404Spjd ZFS_EXIT(zfsvfs); 1676168404Spjd return (error); 1677168404Spjd} 1678168404Spjd 1679168404Spjd/* 1680168404Spjd * Create a new directory and insert it into dvp using the name 1681168404Spjd * provided. Return a pointer to the inserted directory. 1682168404Spjd * 1683168404Spjd * IN: dvp - vnode of directory to add subdir to. 1684168404Spjd * dirname - name of new directory. 1685168404Spjd * vap - attributes of new directory. 1686168404Spjd * cr - credentials of caller. 1687185029Spjd * ct - caller context 1688185029Spjd * vsecp - ACL to be set 1689168404Spjd * 1690168404Spjd * OUT: vpp - vnode of created directory. 1691168404Spjd * 1692168404Spjd * RETURN: 0 if success 1693168404Spjd * error code if failure 1694168404Spjd * 1695168404Spjd * Timestamps: 1696168404Spjd * dvp - ctime|mtime updated 1697168404Spjd * vp - ctime|mtime|atime updated 1698168404Spjd */ 1699185029Spjd/*ARGSUSED*/ 1700168404Spjdstatic int 1701185029Spjdzfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 1702185029Spjd caller_context_t *ct, int flags, vsecattr_t *vsecp) 1703168404Spjd{ 1704168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1705168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1706185029Spjd zilog_t *zilog; 1707168404Spjd zfs_dirlock_t *dl; 1708185029Spjd uint64_t txtype; 1709168404Spjd dmu_tx_t *tx; 1710168404Spjd int error; 1711185029Spjd zfs_acl_t *aclp = NULL; 1712185029Spjd zfs_fuid_info_t *fuidp = NULL; 1713185029Spjd int zf = ZNEW; 1714168404Spjd 1715168404Spjd ASSERT(vap->va_type == VDIR); 1716168404Spjd 1717185029Spjd /* 1718185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 1719185029Spjd * make sure file system is at proper version 1720185029Spjd */ 1721185029Spjd 1722185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 1723185029Spjd (vsecp || (vap->va_mask & AT_XVATTR) || IS_EPHEMERAL(crgetuid(cr))|| 1724185029Spjd IS_EPHEMERAL(crgetgid(cr)))) 1725185029Spjd return (EINVAL); 1726185029Spjd 1727168404Spjd ZFS_ENTER(zfsvfs); 1728185029Spjd ZFS_VERIFY_ZP(dzp); 1729185029Spjd zilog = zfsvfs->z_log; 1730168404Spjd 1731168404Spjd if (dzp->z_phys->zp_flags & ZFS_XATTR) { 1732168404Spjd ZFS_EXIT(zfsvfs); 1733168404Spjd return (EINVAL); 1734168404Spjd } 1735168404Spjd 1736185029Spjd if (zfsvfs->z_utf8 && u8_validate(dirname, 1737185029Spjd strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1738185029Spjd ZFS_EXIT(zfsvfs); 1739185029Spjd return (EILSEQ); 1740185029Spjd } 1741185029Spjd if (flags & FIGNORECASE) 1742185029Spjd zf |= ZCILOOK; 1743185029Spjd 1744185029Spjd if (vap->va_mask & AT_XVATTR) 1745185029Spjd if ((error = secpolicy_xvattr((xvattr_t *)vap, 1746185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 1747185029Spjd ZFS_EXIT(zfsvfs); 1748185029Spjd return (error); 1749185029Spjd } 1750185029Spjd 1751168404Spjd /* 1752168404Spjd * First make sure the new directory doesn't exist. 1753168404Spjd */ 1754185029Spjdtop: 1755185029Spjd *vpp = NULL; 1756185029Spjd 1757185029Spjd if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 1758185029Spjd NULL, NULL)) { 1759168404Spjd ZFS_EXIT(zfsvfs); 1760168404Spjd return (error); 1761168404Spjd } 1762168404Spjd 1763185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 1764168404Spjd zfs_dirent_unlock(dl); 1765168404Spjd ZFS_EXIT(zfsvfs); 1766168404Spjd return (error); 1767168404Spjd } 1768168404Spjd 1769185029Spjd if (vsecp && aclp == NULL) { 1770185029Spjd error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, &aclp); 1771185029Spjd if (error) { 1772185029Spjd zfs_dirent_unlock(dl); 1773185029Spjd ZFS_EXIT(zfsvfs); 1774185029Spjd return (error); 1775185029Spjd } 1776185029Spjd } 1777168404Spjd /* 1778168404Spjd * Add a new entry to the directory. 1779168404Spjd */ 1780168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1781168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 1782168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 1783185029Spjd if ((aclp && aclp->z_has_fuids) || IS_EPHEMERAL(crgetuid(cr)) || 1784185029Spjd IS_EPHEMERAL(crgetgid(cr))) { 1785185029Spjd if (zfsvfs->z_fuid_obj == 0) { 1786185029Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1787185029Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 1788185029Spjd FUID_SIZE_ESTIMATE(zfsvfs)); 1789185029Spjd dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL); 1790185029Spjd } else { 1791185029Spjd dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj); 1792185029Spjd dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0, 1793185029Spjd FUID_SIZE_ESTIMATE(zfsvfs)); 1794185029Spjd } 1795185029Spjd } 1796185029Spjd if ((dzp->z_phys->zp_flags & ZFS_INHERIT_ACE) || aclp) 1797168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1798168404Spjd 0, SPA_MAXBLOCKSIZE); 1799168404Spjd error = dmu_tx_assign(tx, zfsvfs->z_assign); 1800168404Spjd if (error) { 1801168404Spjd zfs_dirent_unlock(dl); 1802168404Spjd if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { 1803168404Spjd dmu_tx_wait(tx); 1804168404Spjd dmu_tx_abort(tx); 1805168404Spjd goto top; 1806168404Spjd } 1807168404Spjd dmu_tx_abort(tx); 1808168404Spjd ZFS_EXIT(zfsvfs); 1809185029Spjd if (aclp) 1810185029Spjd zfs_acl_free(aclp); 1811168404Spjd return (error); 1812168404Spjd } 1813168404Spjd 1814168404Spjd /* 1815168404Spjd * Create new node. 1816168404Spjd */ 1817185029Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, aclp, &fuidp); 1818168404Spjd 1819185029Spjd if (aclp) 1820185029Spjd zfs_acl_free(aclp); 1821185029Spjd 1822168404Spjd /* 1823168404Spjd * Now put new name in parent dir. 1824168404Spjd */ 1825168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 1826168404Spjd 1827168404Spjd *vpp = ZTOV(zp); 1828168404Spjd 1829185029Spjd txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 1830185029Spjd if (flags & FIGNORECASE) 1831185029Spjd txtype |= TX_CI; 1832185029Spjd zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, fuidp, vap); 1833185029Spjd 1834185029Spjd if (fuidp) 1835185029Spjd zfs_fuid_info_free(fuidp); 1836168404Spjd dmu_tx_commit(tx); 1837168404Spjd 1838168404Spjd zfs_dirent_unlock(dl); 1839168404Spjd 1840168404Spjd ZFS_EXIT(zfsvfs); 1841168404Spjd return (0); 1842168404Spjd} 1843168404Spjd 1844168404Spjd/* 1845168404Spjd * Remove a directory subdir entry. If the current working 1846168404Spjd * directory is the same as the subdir to be removed, the 1847168404Spjd * remove will fail. 1848168404Spjd * 1849168404Spjd * IN: dvp - vnode of directory to remove from. 1850168404Spjd * name - name of directory to be removed. 1851168404Spjd * cwd - vnode of current working directory. 1852168404Spjd * cr - credentials of caller. 1853185029Spjd * ct - caller context 1854185029Spjd * flags - case flags 1855168404Spjd * 1856168404Spjd * RETURN: 0 if success 1857168404Spjd * error code if failure 1858168404Spjd * 1859168404Spjd * Timestamps: 1860168404Spjd * dvp - ctime|mtime updated 1861168404Spjd */ 1862185029Spjd/*ARGSUSED*/ 1863168404Spjdstatic int 1864185029Spjdzfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 1865185029Spjd caller_context_t *ct, int flags) 1866168404Spjd{ 1867168404Spjd znode_t *dzp = VTOZ(dvp); 1868168404Spjd znode_t *zp; 1869168404Spjd vnode_t *vp; 1870168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1871185029Spjd zilog_t *zilog; 1872168404Spjd zfs_dirlock_t *dl; 1873168404Spjd dmu_tx_t *tx; 1874168404Spjd int error; 1875185029Spjd int zflg = ZEXISTS; 1876168404Spjd 1877168962Spjd ZFS_ENTER(zfsvfs); 1878185029Spjd ZFS_VERIFY_ZP(dzp); 1879185029Spjd zilog = zfsvfs->z_log; 1880168404Spjd 1881185029Spjd if (flags & FIGNORECASE) 1882185029Spjd zflg |= ZCILOOK; 1883168404Spjdtop: 1884168404Spjd zp = NULL; 1885168404Spjd 1886168404Spjd /* 1887168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 1888168404Spjd */ 1889185029Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1890185029Spjd NULL, NULL)) { 1891168404Spjd ZFS_EXIT(zfsvfs); 1892168404Spjd return (error); 1893168404Spjd } 1894168404Spjd 1895168404Spjd vp = ZTOV(zp); 1896168404Spjd 1897168404Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1898168404Spjd goto out; 1899168404Spjd } 1900168404Spjd 1901168962Spjd if (vp->v_type != VDIR) { 1902168962Spjd error = ENOTDIR; 1903168962Spjd goto out; 1904168962Spjd } 1905168962Spjd 1906168962Spjd if (vp == cwd) { 1907168962Spjd error = EINVAL; 1908168962Spjd goto out; 1909168962Spjd } 1910168962Spjd 1911185029Spjd vnevent_rmdir(vp, dvp, name, ct); 1912168962Spjd 1913168404Spjd /* 1914168404Spjd * Grab a lock on the directory to make sure that noone is 1915168404Spjd * trying to add (or lookup) entries while we are removing it. 1916168404Spjd */ 1917168404Spjd rw_enter(&zp->z_name_lock, RW_WRITER); 1918168404Spjd 1919168404Spjd /* 1920168404Spjd * Grab a lock on the parent pointer to make sure we play well 1921168404Spjd * with the treewalk and directory rename code. 1922168404Spjd */ 1923168404Spjd rw_enter(&zp->z_parent_lock, RW_WRITER); 1924168404Spjd 1925168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1926168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1927168404Spjd dmu_tx_hold_bonus(tx, zp->z_id); 1928168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1929168404Spjd error = dmu_tx_assign(tx, zfsvfs->z_assign); 1930168404Spjd if (error) { 1931168404Spjd rw_exit(&zp->z_parent_lock); 1932168404Spjd rw_exit(&zp->z_name_lock); 1933168404Spjd zfs_dirent_unlock(dl); 1934168962Spjd VN_RELE(vp); 1935168404Spjd if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { 1936168404Spjd dmu_tx_wait(tx); 1937168404Spjd dmu_tx_abort(tx); 1938168404Spjd goto top; 1939168404Spjd } 1940168404Spjd dmu_tx_abort(tx); 1941168404Spjd ZFS_EXIT(zfsvfs); 1942168404Spjd return (error); 1943168404Spjd } 1944168404Spjd 1945168404Spjd#ifdef FREEBSD_NAMECACHE 1946168404Spjd cache_purge(dvp); 1947168404Spjd#endif 1948168404Spjd 1949185029Spjd error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 1950168404Spjd 1951185029Spjd if (error == 0) { 1952185029Spjd uint64_t txtype = TX_RMDIR; 1953185029Spjd if (flags & FIGNORECASE) 1954185029Spjd txtype |= TX_CI; 1955185029Spjd zfs_log_remove(zilog, tx, txtype, dzp, name); 1956185029Spjd } 1957168404Spjd 1958168404Spjd dmu_tx_commit(tx); 1959168404Spjd 1960168404Spjd rw_exit(&zp->z_parent_lock); 1961168404Spjd rw_exit(&zp->z_name_lock); 1962168404Spjd#ifdef FREEBSD_NAMECACHE 1963168404Spjd cache_purge(vp); 1964168404Spjd#endif 1965168404Spjdout: 1966168404Spjd zfs_dirent_unlock(dl); 1967168404Spjd 1968168962Spjd VN_RELE(vp); 1969168962Spjd 1970168404Spjd ZFS_EXIT(zfsvfs); 1971168404Spjd return (error); 1972168404Spjd} 1973168404Spjd 1974168404Spjd/* 1975168404Spjd * Read as many directory entries as will fit into the provided 1976168404Spjd * buffer from the given directory cursor position (specified in 1977168404Spjd * the uio structure. 1978168404Spjd * 1979168404Spjd * IN: vp - vnode of directory to read. 1980168404Spjd * uio - structure supplying read location, range info, 1981168404Spjd * and return buffer. 1982168404Spjd * cr - credentials of caller. 1983185029Spjd * ct - caller context 1984185029Spjd * flags - case flags 1985168404Spjd * 1986168404Spjd * OUT: uio - updated offset and range, buffer filled. 1987168404Spjd * eofp - set to true if end-of-file detected. 1988168404Spjd * 1989168404Spjd * RETURN: 0 if success 1990168404Spjd * error code if failure 1991168404Spjd * 1992168404Spjd * Timestamps: 1993168404Spjd * vp - atime updated 1994168404Spjd * 1995168404Spjd * Note that the low 4 bits of the cookie returned by zap is always zero. 1996168404Spjd * This allows us to use the low range for "special" directory entries: 1997168404Spjd * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 1998168404Spjd * we use the offset 2 for the '.zfs' directory. 1999168404Spjd */ 2000168404Spjd/* ARGSUSED */ 2001168404Spjdstatic int 2002168962Spjdzfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) 2003168404Spjd{ 2004168404Spjd znode_t *zp = VTOZ(vp); 2005168404Spjd iovec_t *iovp; 2006185029Spjd edirent_t *eodp; 2007168404Spjd dirent64_t *odp; 2008168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2009168404Spjd objset_t *os; 2010168404Spjd caddr_t outbuf; 2011168404Spjd size_t bufsize; 2012168404Spjd zap_cursor_t zc; 2013168404Spjd zap_attribute_t zap; 2014168404Spjd uint_t bytes_wanted; 2015168404Spjd uint64_t offset; /* must be unsigned; checks for < 1 */ 2016168404Spjd int local_eof; 2017168404Spjd int outcount; 2018168404Spjd int error; 2019168404Spjd uint8_t prefetch; 2020185029Spjd boolean_t check_sysattrs; 2021168404Spjd uint8_t type; 2022168962Spjd int ncooks; 2023168962Spjd u_long *cooks = NULL; 2024185029Spjd int flags = 0; 2025168404Spjd 2026168404Spjd ZFS_ENTER(zfsvfs); 2027185029Spjd ZFS_VERIFY_ZP(zp); 2028168404Spjd 2029168404Spjd /* 2030168404Spjd * If we are not given an eof variable, 2031168404Spjd * use a local one. 2032168404Spjd */ 2033168404Spjd if (eofp == NULL) 2034168404Spjd eofp = &local_eof; 2035168404Spjd 2036168404Spjd /* 2037168404Spjd * Check for valid iov_len. 2038168404Spjd */ 2039168404Spjd if (uio->uio_iov->iov_len <= 0) { 2040168404Spjd ZFS_EXIT(zfsvfs); 2041168404Spjd return (EINVAL); 2042168404Spjd } 2043168404Spjd 2044168404Spjd /* 2045168404Spjd * Quit if directory has been removed (posix) 2046168404Spjd */ 2047168404Spjd if ((*eofp = zp->z_unlinked) != 0) { 2048168404Spjd ZFS_EXIT(zfsvfs); 2049168404Spjd return (0); 2050168404Spjd } 2051168404Spjd 2052168404Spjd error = 0; 2053168404Spjd os = zfsvfs->z_os; 2054168404Spjd offset = uio->uio_loffset; 2055168404Spjd prefetch = zp->z_zn_prefetch; 2056168404Spjd 2057168404Spjd /* 2058168404Spjd * Initialize the iterator cursor. 2059168404Spjd */ 2060168404Spjd if (offset <= 3) { 2061168404Spjd /* 2062168404Spjd * Start iteration from the beginning of the directory. 2063168404Spjd */ 2064168404Spjd zap_cursor_init(&zc, os, zp->z_id); 2065168404Spjd } else { 2066168404Spjd /* 2067168404Spjd * The offset is a serialized cursor. 2068168404Spjd */ 2069168404Spjd zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2070168404Spjd } 2071168404Spjd 2072168404Spjd /* 2073168404Spjd * Get space to change directory entries into fs independent format. 2074168404Spjd */ 2075168404Spjd iovp = uio->uio_iov; 2076168404Spjd bytes_wanted = iovp->iov_len; 2077168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2078168404Spjd bufsize = bytes_wanted; 2079168404Spjd outbuf = kmem_alloc(bufsize, KM_SLEEP); 2080168404Spjd odp = (struct dirent64 *)outbuf; 2081168404Spjd } else { 2082168404Spjd bufsize = bytes_wanted; 2083168404Spjd odp = (struct dirent64 *)iovp->iov_base; 2084168404Spjd } 2085185029Spjd eodp = (struct edirent *)odp; 2086168404Spjd 2087169170Spjd if (ncookies != NULL) { 2088168404Spjd /* 2089168404Spjd * Minimum entry size is dirent size and 1 byte for a file name. 2090168404Spjd */ 2091168962Spjd ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2092168962Spjd cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2093169170Spjd *cookies = cooks; 2094168962Spjd *ncookies = ncooks; 2095168404Spjd } 2096185029Spjd /* 2097185029Spjd * If this VFS supports the system attribute view interface; and 2098185029Spjd * we're looking at an extended attribute directory; and we care 2099185029Spjd * about normalization conflicts on this vfs; then we must check 2100185029Spjd * for normalization conflicts with the sysattr name space. 2101185029Spjd */ 2102185029Spjd#ifdef TODO 2103185029Spjd check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2104185029Spjd (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2105185029Spjd (flags & V_RDDIR_ENTFLAGS); 2106185029Spjd#else 2107185029Spjd check_sysattrs = 0; 2108185029Spjd#endif 2109168404Spjd 2110168404Spjd /* 2111168404Spjd * Transform to file-system independent format 2112168404Spjd */ 2113168404Spjd outcount = 0; 2114168404Spjd while (outcount < bytes_wanted) { 2115168404Spjd ino64_t objnum; 2116168404Spjd ushort_t reclen; 2117185029Spjd off64_t *next; 2118168404Spjd 2119168404Spjd /* 2120168404Spjd * Special case `.', `..', and `.zfs'. 2121168404Spjd */ 2122168404Spjd if (offset == 0) { 2123168404Spjd (void) strcpy(zap.za_name, "."); 2124185029Spjd zap.za_normalization_conflict = 0; 2125168404Spjd objnum = zp->z_id; 2126169108Spjd type = DT_DIR; 2127168404Spjd } else if (offset == 1) { 2128168404Spjd (void) strcpy(zap.za_name, ".."); 2129185029Spjd zap.za_normalization_conflict = 0; 2130168404Spjd objnum = zp->z_phys->zp_parent; 2131169108Spjd type = DT_DIR; 2132168404Spjd } else if (offset == 2 && zfs_show_ctldir(zp)) { 2133168404Spjd (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2134185029Spjd zap.za_normalization_conflict = 0; 2135168404Spjd objnum = ZFSCTL_INO_ROOT; 2136169108Spjd type = DT_DIR; 2137168404Spjd } else { 2138168404Spjd /* 2139168404Spjd * Grab next entry. 2140168404Spjd */ 2141168404Spjd if (error = zap_cursor_retrieve(&zc, &zap)) { 2142168404Spjd if ((*eofp = (error == ENOENT)) != 0) 2143168404Spjd break; 2144168404Spjd else 2145168404Spjd goto update; 2146168404Spjd } 2147168404Spjd 2148168404Spjd if (zap.za_integer_length != 8 || 2149168404Spjd zap.za_num_integers != 1) { 2150168404Spjd cmn_err(CE_WARN, "zap_readdir: bad directory " 2151168404Spjd "entry, obj = %lld, offset = %lld\n", 2152168404Spjd (u_longlong_t)zp->z_id, 2153168404Spjd (u_longlong_t)offset); 2154168404Spjd error = ENXIO; 2155168404Spjd goto update; 2156168404Spjd } 2157168404Spjd 2158168404Spjd objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2159168404Spjd /* 2160168404Spjd * MacOS X can extract the object type here such as: 2161168404Spjd * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2162168404Spjd */ 2163168404Spjd type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2164185029Spjd 2165185029Spjd if (check_sysattrs && !zap.za_normalization_conflict) { 2166185029Spjd#ifdef TODO 2167185029Spjd zap.za_normalization_conflict = 2168185029Spjd xattr_sysattr_casechk(zap.za_name); 2169185029Spjd#else 2170185029Spjd panic("%s:%u: TODO", __func__, __LINE__); 2171185029Spjd#endif 2172185029Spjd } 2173168404Spjd } 2174168404Spjd 2175185029Spjd if (flags & V_RDDIR_ENTFLAGS) 2176185029Spjd reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2177185029Spjd else 2178185029Spjd reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2179185029Spjd 2180168404Spjd /* 2181168404Spjd * Will this entry fit in the buffer? 2182168404Spjd */ 2183168404Spjd if (outcount + reclen > bufsize) { 2184168404Spjd /* 2185168404Spjd * Did we manage to fit anything in the buffer? 2186168404Spjd */ 2187168404Spjd if (!outcount) { 2188168404Spjd error = EINVAL; 2189168404Spjd goto update; 2190168404Spjd } 2191168404Spjd break; 2192168404Spjd } 2193185029Spjd if (flags & V_RDDIR_ENTFLAGS) { 2194185029Spjd /* 2195185029Spjd * Add extended flag entry: 2196185029Spjd */ 2197185029Spjd eodp->ed_ino = objnum; 2198185029Spjd eodp->ed_reclen = reclen; 2199185029Spjd /* NOTE: ed_off is the offset for the *next* entry */ 2200185029Spjd next = &(eodp->ed_off); 2201185029Spjd eodp->ed_eflags = zap.za_normalization_conflict ? 2202185029Spjd ED_CASE_CONFLICT : 0; 2203185029Spjd (void) strncpy(eodp->ed_name, zap.za_name, 2204185029Spjd EDIRENT_NAMELEN(reclen)); 2205185029Spjd eodp = (edirent_t *)((intptr_t)eodp + reclen); 2206185029Spjd } else { 2207185029Spjd /* 2208185029Spjd * Add normal entry: 2209185029Spjd */ 2210185029Spjd odp->d_ino = objnum; 2211185029Spjd odp->d_reclen = reclen; 2212185029Spjd odp->d_namlen = strlen(zap.za_name); 2213185029Spjd (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2214185029Spjd odp->d_type = type; 2215185029Spjd odp = (dirent64_t *)((intptr_t)odp + reclen); 2216185029Spjd } 2217168404Spjd outcount += reclen; 2218168404Spjd 2219168404Spjd ASSERT(outcount <= bufsize); 2220168404Spjd 2221168404Spjd /* Prefetch znode */ 2222168404Spjd if (prefetch) 2223168404Spjd dmu_prefetch(os, objnum, 0, 0); 2224168404Spjd 2225168404Spjd /* 2226168404Spjd * Move to the next entry, fill in the previous offset. 2227168404Spjd */ 2228168404Spjd if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2229168404Spjd zap_cursor_advance(&zc); 2230168404Spjd offset = zap_cursor_serialize(&zc); 2231168404Spjd } else { 2232168404Spjd offset += 1; 2233168404Spjd } 2234168404Spjd 2235168962Spjd if (cooks != NULL) { 2236168962Spjd *cooks++ = offset; 2237168962Spjd ncooks--; 2238168962Spjd KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2239168404Spjd } 2240168404Spjd } 2241168404Spjd zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2242168404Spjd 2243168404Spjd /* Subtract unused cookies */ 2244168962Spjd if (ncookies != NULL) 2245168962Spjd *ncookies -= ncooks; 2246168404Spjd 2247168404Spjd if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2248168404Spjd iovp->iov_base += outcount; 2249168404Spjd iovp->iov_len -= outcount; 2250168404Spjd uio->uio_resid -= outcount; 2251168404Spjd } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2252168404Spjd /* 2253168404Spjd * Reset the pointer. 2254168404Spjd */ 2255168404Spjd offset = uio->uio_loffset; 2256168404Spjd } 2257168404Spjd 2258168404Spjdupdate: 2259168404Spjd zap_cursor_fini(&zc); 2260168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2261168404Spjd kmem_free(outbuf, bufsize); 2262168404Spjd 2263168404Spjd if (error == ENOENT) 2264168404Spjd error = 0; 2265168404Spjd 2266168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2267168404Spjd 2268168404Spjd uio->uio_loffset = offset; 2269168404Spjd ZFS_EXIT(zfsvfs); 2270169107Spjd if (error != 0 && cookies != NULL) { 2271168962Spjd free(*cookies, M_TEMP); 2272168962Spjd *cookies = NULL; 2273168962Spjd *ncookies = 0; 2274168404Spjd } 2275168404Spjd return (error); 2276168404Spjd} 2277168404Spjd 2278185029Spjdulong_t zfs_fsync_sync_cnt = 4; 2279185029Spjd 2280168404Spjdstatic int 2281185029Spjdzfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2282168404Spjd{ 2283168962Spjd znode_t *zp = VTOZ(vp); 2284168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2285168404Spjd 2286185029Spjd (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2287185029Spjd 2288168404Spjd ZFS_ENTER(zfsvfs); 2289185029Spjd ZFS_VERIFY_ZP(zp); 2290168404Spjd zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); 2291168404Spjd ZFS_EXIT(zfsvfs); 2292168404Spjd return (0); 2293168404Spjd} 2294168404Spjd 2295185029Spjd 2296168404Spjd/* 2297168404Spjd * Get the requested file attributes and place them in the provided 2298168404Spjd * vattr structure. 2299168404Spjd * 2300168404Spjd * IN: vp - vnode of file. 2301168404Spjd * vap - va_mask identifies requested attributes. 2302185029Spjd * If AT_XVATTR set, then optional attrs are requested 2303185029Spjd * flags - ATTR_NOACLCHECK (CIFS server context) 2304168404Spjd * cr - credentials of caller. 2305185029Spjd * ct - caller context 2306168404Spjd * 2307168404Spjd * OUT: vap - attribute values. 2308168404Spjd * 2309168404Spjd * RETURN: 0 (always succeeds) 2310168404Spjd */ 2311168404Spjd/* ARGSUSED */ 2312168404Spjdstatic int 2313185029Spjdzfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2314185029Spjd caller_context_t *ct) 2315168404Spjd{ 2316168962Spjd znode_t *zp = VTOZ(vp); 2317168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2318185029Spjd znode_phys_t *pzp; 2319185029Spjd int error = 0; 2320168962Spjd uint32_t blksize; 2321168962Spjd u_longlong_t nblocks; 2322185029Spjd uint64_t links; 2323185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2324185029Spjd xoptattr_t *xoap = NULL; 2325185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2326168404Spjd 2327168404Spjd ZFS_ENTER(zfsvfs); 2328185029Spjd ZFS_VERIFY_ZP(zp); 2329185029Spjd pzp = zp->z_phys; 2330168404Spjd 2331185029Spjd mutex_enter(&zp->z_lock); 2332185029Spjd 2333168404Spjd /* 2334185029Spjd * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2335185029Spjd * Also, if we are the owner don't bother, since owner should 2336185029Spjd * always be allowed to read basic attributes of file. 2337185029Spjd */ 2338185029Spjd if (!(pzp->zp_flags & ZFS_ACL_TRIVIAL) && 2339185029Spjd (pzp->zp_uid != crgetuid(cr))) { 2340185029Spjd if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2341185029Spjd skipaclchk, cr)) { 2342185029Spjd mutex_exit(&zp->z_lock); 2343185029Spjd ZFS_EXIT(zfsvfs); 2344185029Spjd return (error); 2345185029Spjd } 2346185029Spjd } 2347185029Spjd 2348185029Spjd /* 2349168404Spjd * Return all attributes. It's cheaper to provide the answer 2350168404Spjd * than to determine whether we were asked the question. 2351168404Spjd */ 2352168404Spjd 2353168404Spjd vap->va_type = IFTOVT(pzp->zp_mode); 2354168404Spjd vap->va_mode = pzp->zp_mode & ~S_IFMT; 2355185029Spjd zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2356185029Spjd// vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2357168404Spjd vap->va_nodeid = zp->z_id; 2358185029Spjd if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2359185029Spjd links = pzp->zp_links + 1; 2360185029Spjd else 2361185029Spjd links = pzp->zp_links; 2362185029Spjd vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ 2363168404Spjd vap->va_size = pzp->zp_size; 2364168404Spjd vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 2365168958Spjd vap->va_rdev = zfs_cmpldev(pzp->zp_rdev); 2366168404Spjd vap->va_seq = zp->z_seq; 2367168404Spjd vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2368168404Spjd 2369185029Spjd /* 2370185029Spjd * Add in any requested optional attributes and the create time. 2371185029Spjd * Also set the corresponding bits in the returned attribute bitmap. 2372185029Spjd */ 2373185029Spjd if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2374185029Spjd if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2375185029Spjd xoap->xoa_archive = 2376185029Spjd ((pzp->zp_flags & ZFS_ARCHIVE) != 0); 2377185029Spjd XVA_SET_RTN(xvap, XAT_ARCHIVE); 2378185029Spjd } 2379185029Spjd 2380185029Spjd if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2381185029Spjd xoap->xoa_readonly = 2382185029Spjd ((pzp->zp_flags & ZFS_READONLY) != 0); 2383185029Spjd XVA_SET_RTN(xvap, XAT_READONLY); 2384185029Spjd } 2385185029Spjd 2386185029Spjd if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2387185029Spjd xoap->xoa_system = 2388185029Spjd ((pzp->zp_flags & ZFS_SYSTEM) != 0); 2389185029Spjd XVA_SET_RTN(xvap, XAT_SYSTEM); 2390185029Spjd } 2391185029Spjd 2392185029Spjd if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2393185029Spjd xoap->xoa_hidden = 2394185029Spjd ((pzp->zp_flags & ZFS_HIDDEN) != 0); 2395185029Spjd XVA_SET_RTN(xvap, XAT_HIDDEN); 2396185029Spjd } 2397185029Spjd 2398185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2399185029Spjd xoap->xoa_nounlink = 2400185029Spjd ((pzp->zp_flags & ZFS_NOUNLINK) != 0); 2401185029Spjd XVA_SET_RTN(xvap, XAT_NOUNLINK); 2402185029Spjd } 2403185029Spjd 2404185029Spjd if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2405185029Spjd xoap->xoa_immutable = 2406185029Spjd ((pzp->zp_flags & ZFS_IMMUTABLE) != 0); 2407185029Spjd XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2408185029Spjd } 2409185029Spjd 2410185029Spjd if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2411185029Spjd xoap->xoa_appendonly = 2412185029Spjd ((pzp->zp_flags & ZFS_APPENDONLY) != 0); 2413185029Spjd XVA_SET_RTN(xvap, XAT_APPENDONLY); 2414185029Spjd } 2415185029Spjd 2416185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2417185029Spjd xoap->xoa_nodump = 2418185029Spjd ((pzp->zp_flags & ZFS_NODUMP) != 0); 2419185029Spjd XVA_SET_RTN(xvap, XAT_NODUMP); 2420185029Spjd } 2421185029Spjd 2422185029Spjd if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2423185029Spjd xoap->xoa_opaque = 2424185029Spjd ((pzp->zp_flags & ZFS_OPAQUE) != 0); 2425185029Spjd XVA_SET_RTN(xvap, XAT_OPAQUE); 2426185029Spjd } 2427185029Spjd 2428185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2429185029Spjd xoap->xoa_av_quarantined = 2430185029Spjd ((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0); 2431185029Spjd XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2432185029Spjd } 2433185029Spjd 2434185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2435185029Spjd xoap->xoa_av_modified = 2436185029Spjd ((pzp->zp_flags & ZFS_AV_MODIFIED) != 0); 2437185029Spjd XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2438185029Spjd } 2439185029Spjd 2440185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2441185029Spjd vp->v_type == VREG && 2442185029Spjd (pzp->zp_flags & ZFS_BONUS_SCANSTAMP)) { 2443185029Spjd size_t len; 2444185029Spjd dmu_object_info_t doi; 2445185029Spjd 2446185029Spjd /* 2447185029Spjd * Only VREG files have anti-virus scanstamps, so we 2448185029Spjd * won't conflict with symlinks in the bonus buffer. 2449185029Spjd */ 2450185029Spjd dmu_object_info_from_db(zp->z_dbuf, &doi); 2451185029Spjd len = sizeof (xoap->xoa_av_scanstamp) + 2452185029Spjd sizeof (znode_phys_t); 2453185029Spjd if (len <= doi.doi_bonus_size) { 2454185029Spjd /* 2455185029Spjd * pzp points to the start of the 2456185029Spjd * znode_phys_t. pzp + 1 points to the 2457185029Spjd * first byte after the znode_phys_t. 2458185029Spjd */ 2459185029Spjd (void) memcpy(xoap->xoa_av_scanstamp, 2460185029Spjd pzp + 1, 2461185029Spjd sizeof (xoap->xoa_av_scanstamp)); 2462185029Spjd XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); 2463185029Spjd } 2464185029Spjd } 2465185029Spjd 2466185029Spjd if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 2467185029Spjd ZFS_TIME_DECODE(&xoap->xoa_createtime, pzp->zp_crtime); 2468185029Spjd XVA_SET_RTN(xvap, XAT_CREATETIME); 2469185029Spjd } 2470185029Spjd } 2471185029Spjd 2472168404Spjd ZFS_TIME_DECODE(&vap->va_atime, pzp->zp_atime); 2473168404Spjd ZFS_TIME_DECODE(&vap->va_mtime, pzp->zp_mtime); 2474168404Spjd ZFS_TIME_DECODE(&vap->va_ctime, pzp->zp_ctime); 2475168404Spjd ZFS_TIME_DECODE(&vap->va_birthtime, pzp->zp_crtime); 2476168404Spjd 2477168404Spjd mutex_exit(&zp->z_lock); 2478168404Spjd 2479168404Spjd dmu_object_size_from_db(zp->z_dbuf, &blksize, &nblocks); 2480168404Spjd vap->va_blksize = blksize; 2481168404Spjd vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2482168404Spjd 2483168404Spjd if (zp->z_blksz == 0) { 2484168404Spjd /* 2485168404Spjd * Block size hasn't been set; suggest maximal I/O transfers. 2486168404Spjd */ 2487168404Spjd vap->va_blksize = zfsvfs->z_max_blksz; 2488168404Spjd } 2489168404Spjd 2490168404Spjd ZFS_EXIT(zfsvfs); 2491168404Spjd return (0); 2492168404Spjd} 2493168404Spjd 2494168404Spjd/* 2495168404Spjd * Set the file attributes to the values contained in the 2496168404Spjd * vattr structure. 2497168404Spjd * 2498168404Spjd * IN: vp - vnode of file to be modified. 2499168404Spjd * vap - new attribute values. 2500185029Spjd * If AT_XVATTR set, then optional attrs are being set 2501168404Spjd * flags - ATTR_UTIME set if non-default time values provided. 2502185029Spjd * - ATTR_NOACLCHECK (CIFS context only). 2503168404Spjd * cr - credentials of caller. 2504185029Spjd * ct - caller context 2505168404Spjd * 2506168404Spjd * RETURN: 0 if success 2507168404Spjd * error code if failure 2508168404Spjd * 2509168404Spjd * Timestamps: 2510168404Spjd * vp - ctime updated, mtime updated if size changed. 2511168404Spjd */ 2512168404Spjd/* ARGSUSED */ 2513168404Spjdstatic int 2514168962Spjdzfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2515168962Spjd caller_context_t *ct) 2516168404Spjd{ 2517185029Spjd znode_t *zp = VTOZ(vp); 2518185029Spjd znode_phys_t *pzp; 2519168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2520185029Spjd zilog_t *zilog; 2521168404Spjd dmu_tx_t *tx; 2522168404Spjd vattr_t oldva; 2523168962Spjd uint_t mask = vap->va_mask; 2524168404Spjd uint_t saved_mask; 2525168404Spjd int trim_mask = 0; 2526168404Spjd uint64_t new_mode; 2527168404Spjd znode_t *attrzp; 2528168404Spjd int need_policy = FALSE; 2529168404Spjd int err; 2530185029Spjd zfs_fuid_info_t *fuidp = NULL; 2531185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2532185029Spjd xoptattr_t *xoap; 2533185029Spjd zfs_acl_t *aclp = NULL; 2534185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2535168404Spjd 2536168404Spjd if (mask == 0) 2537168404Spjd return (0); 2538168404Spjd 2539168962Spjd if (mask & AT_NOSET) 2540168962Spjd return (EINVAL); 2541168962Spjd 2542185029Spjd ZFS_ENTER(zfsvfs); 2543185029Spjd ZFS_VERIFY_ZP(zp); 2544185029Spjd 2545185029Spjd pzp = zp->z_phys; 2546185029Spjd zilog = zfsvfs->z_log; 2547185029Spjd 2548185029Spjd /* 2549185029Spjd * Make sure that if we have ephemeral uid/gid or xvattr specified 2550185029Spjd * that file system is at proper version level 2551185029Spjd */ 2552185029Spjd 2553185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2554185029Spjd (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2555185029Spjd ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 2556185029Spjd (mask & AT_XVATTR))) { 2557185029Spjd ZFS_EXIT(zfsvfs); 2558185029Spjd return (EINVAL); 2559185029Spjd } 2560185029Spjd 2561185029Spjd if (mask & AT_SIZE && vp->v_type == VDIR) { 2562185029Spjd ZFS_EXIT(zfsvfs); 2563168404Spjd return (EISDIR); 2564185029Spjd } 2565168404Spjd 2566185029Spjd if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 2567185029Spjd ZFS_EXIT(zfsvfs); 2568168404Spjd return (EINVAL); 2569185029Spjd } 2570168404Spjd 2571185029Spjd /* 2572185029Spjd * If this is an xvattr_t, then get a pointer to the structure of 2573185029Spjd * optional attributes. If this is NULL, then we have a vattr_t. 2574185029Spjd */ 2575185029Spjd xoap = xva_getxoptattr(xvap); 2576168404Spjd 2577185029Spjd /* 2578185029Spjd * Immutable files can only alter immutable bit and atime 2579185029Spjd */ 2580185029Spjd if ((pzp->zp_flags & ZFS_IMMUTABLE) && 2581185029Spjd ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 2582185029Spjd ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 2583185029Spjd ZFS_EXIT(zfsvfs); 2584185029Spjd return (EPERM); 2585185029Spjd } 2586185029Spjd 2587185029Spjd if ((mask & AT_SIZE) && (pzp->zp_flags & ZFS_READONLY)) { 2588185029Spjd ZFS_EXIT(zfsvfs); 2589185029Spjd return (EPERM); 2590185029Spjd } 2591185029Spjd 2592185029Spjd /* 2593185029Spjd * Verify timestamps doesn't overflow 32 bits. 2594185029Spjd * ZFS can handle large timestamps, but 32bit syscalls can't 2595185029Spjd * handle times greater than 2039. This check should be removed 2596185029Spjd * once large timestamps are fully supported. 2597185029Spjd */ 2598185029Spjd if (mask & (AT_ATIME | AT_MTIME)) { 2599185029Spjd if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 2600185029Spjd ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 2601185029Spjd ZFS_EXIT(zfsvfs); 2602185029Spjd return (EOVERFLOW); 2603185029Spjd } 2604185029Spjd } 2605185029Spjd 2606168404Spjdtop: 2607168404Spjd attrzp = NULL; 2608168404Spjd 2609168404Spjd if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2610168404Spjd ZFS_EXIT(zfsvfs); 2611168404Spjd return (EROFS); 2612168404Spjd } 2613168404Spjd 2614168404Spjd /* 2615168404Spjd * First validate permissions 2616168404Spjd */ 2617168404Spjd 2618168404Spjd if (mask & AT_SIZE) { 2619185029Spjd err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr); 2620168404Spjd if (err) { 2621168404Spjd ZFS_EXIT(zfsvfs); 2622168404Spjd return (err); 2623168404Spjd } 2624168404Spjd /* 2625168404Spjd * XXX - Note, we are not providing any open 2626168404Spjd * mode flags here (like FNDELAY), so we may 2627168404Spjd * block if there are locks present... this 2628168404Spjd * should be addressed in openat(). 2629168404Spjd */ 2630185029Spjd /* XXX - would it be OK to generate a log record here? */ 2631185029Spjd err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 2632168404Spjd if (err) { 2633168404Spjd ZFS_EXIT(zfsvfs); 2634168404Spjd return (err); 2635168404Spjd } 2636168404Spjd } 2637168404Spjd 2638185029Spjd if (mask & (AT_ATIME|AT_MTIME) || 2639185029Spjd ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2640185029Spjd XVA_ISSET_REQ(xvap, XAT_READONLY) || 2641185029Spjd XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2642185029Spjd XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 2643185029Spjd XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) 2644185029Spjd need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2645185029Spjd skipaclchk, cr); 2646168404Spjd 2647168404Spjd if (mask & (AT_UID|AT_GID)) { 2648168404Spjd int idmask = (mask & (AT_UID|AT_GID)); 2649168404Spjd int take_owner; 2650168404Spjd int take_group; 2651168404Spjd 2652168404Spjd /* 2653168404Spjd * NOTE: even if a new mode is being set, 2654168404Spjd * we may clear S_ISUID/S_ISGID bits. 2655168404Spjd */ 2656168404Spjd 2657168404Spjd if (!(mask & AT_MODE)) 2658168404Spjd vap->va_mode = pzp->zp_mode; 2659168404Spjd 2660168404Spjd /* 2661168404Spjd * Take ownership or chgrp to group we are a member of 2662168404Spjd */ 2663168404Spjd 2664168404Spjd take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 2665185029Spjd take_group = (mask & AT_GID) && 2666185029Spjd zfs_groupmember(zfsvfs, vap->va_gid, cr); 2667168404Spjd 2668168404Spjd /* 2669168404Spjd * If both AT_UID and AT_GID are set then take_owner and 2670168404Spjd * take_group must both be set in order to allow taking 2671168404Spjd * ownership. 2672168404Spjd * 2673168404Spjd * Otherwise, send the check through secpolicy_vnode_setattr() 2674168404Spjd * 2675168404Spjd */ 2676168404Spjd 2677168404Spjd if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2678168404Spjd ((idmask == AT_UID) && take_owner) || 2679168404Spjd ((idmask == AT_GID) && take_group)) { 2680185029Spjd if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 2681185029Spjd skipaclchk, cr) == 0) { 2682168404Spjd /* 2683168404Spjd * Remove setuid/setgid for non-privileged users 2684168404Spjd */ 2685185029Spjd secpolicy_setid_clear(vap, vp, cr); 2686168404Spjd trim_mask = (mask & (AT_UID|AT_GID)); 2687168404Spjd } else { 2688168404Spjd need_policy = TRUE; 2689168404Spjd } 2690168404Spjd } else { 2691168404Spjd need_policy = TRUE; 2692168404Spjd } 2693168404Spjd } 2694168404Spjd 2695168404Spjd mutex_enter(&zp->z_lock); 2696168404Spjd oldva.va_mode = pzp->zp_mode; 2697185029Spjd zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 2698185029Spjd if (mask & AT_XVATTR) { 2699185029Spjd if ((need_policy == FALSE) && 2700185029Spjd (XVA_ISSET_REQ(xvap, XAT_APPENDONLY) && 2701185029Spjd xoap->xoa_appendonly != 2702185029Spjd ((pzp->zp_flags & ZFS_APPENDONLY) != 0)) || 2703185029Spjd (XVA_ISSET_REQ(xvap, XAT_NOUNLINK) && 2704185029Spjd xoap->xoa_nounlink != 2705185029Spjd ((pzp->zp_flags & ZFS_NOUNLINK) != 0)) || 2706185029Spjd (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE) && 2707185029Spjd xoap->xoa_immutable != 2708185029Spjd ((pzp->zp_flags & ZFS_IMMUTABLE) != 0)) || 2709185029Spjd (XVA_ISSET_REQ(xvap, XAT_NODUMP) && 2710185029Spjd xoap->xoa_nodump != 2711185029Spjd ((pzp->zp_flags & ZFS_NODUMP) != 0)) || 2712185029Spjd (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED) && 2713185029Spjd xoap->xoa_av_modified != 2714185029Spjd ((pzp->zp_flags & ZFS_AV_MODIFIED) != 0)) || 2715185029Spjd ((XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED) && 2716185029Spjd ((vp->v_type != VREG && xoap->xoa_av_quarantined) || 2717185029Spjd xoap->xoa_av_quarantined != 2718185029Spjd ((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0)))) || 2719185029Spjd (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) || 2720185029Spjd (XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 2721185029Spjd need_policy = TRUE; 2722185029Spjd } 2723185029Spjd } 2724185029Spjd 2725168404Spjd mutex_exit(&zp->z_lock); 2726168404Spjd 2727168404Spjd if (mask & AT_MODE) { 2728185029Spjd if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 2729168962Spjd err = secpolicy_setid_setsticky_clear(vp, vap, 2730168962Spjd &oldva, cr); 2731168962Spjd if (err) { 2732168962Spjd ZFS_EXIT(zfsvfs); 2733168962Spjd return (err); 2734168962Spjd } 2735168404Spjd trim_mask |= AT_MODE; 2736168404Spjd } else { 2737168404Spjd need_policy = TRUE; 2738168404Spjd } 2739168404Spjd } 2740168404Spjd 2741168404Spjd if (need_policy) { 2742168404Spjd /* 2743168404Spjd * If trim_mask is set then take ownership 2744168404Spjd * has been granted or write_acl is present and user 2745168404Spjd * has the ability to modify mode. In that case remove 2746168404Spjd * UID|GID and or MODE from mask so that 2747168404Spjd * secpolicy_vnode_setattr() doesn't revoke it. 2748168404Spjd */ 2749168404Spjd 2750168404Spjd if (trim_mask) { 2751168404Spjd saved_mask = vap->va_mask; 2752168404Spjd vap->va_mask &= ~trim_mask; 2753168404Spjd } 2754168404Spjd err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 2755185029Spjd (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 2756168404Spjd if (err) { 2757168404Spjd ZFS_EXIT(zfsvfs); 2758168404Spjd return (err); 2759168404Spjd } 2760168404Spjd 2761168404Spjd if (trim_mask) 2762168404Spjd vap->va_mask |= saved_mask; 2763168404Spjd } 2764168404Spjd 2765168404Spjd /* 2766168404Spjd * secpolicy_vnode_setattr, or take ownership may have 2767168404Spjd * changed va_mask 2768168404Spjd */ 2769168404Spjd mask = vap->va_mask; 2770168404Spjd 2771168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2772168404Spjd dmu_tx_hold_bonus(tx, zp->z_id); 2773185029Spjd if (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2774185029Spjd ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid))) { 2775185029Spjd if (zfsvfs->z_fuid_obj == 0) { 2776185029Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 2777185029Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2778185029Spjd FUID_SIZE_ESTIMATE(zfsvfs)); 2779185029Spjd dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL); 2780185029Spjd } else { 2781185029Spjd dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj); 2782185029Spjd dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0, 2783185029Spjd FUID_SIZE_ESTIMATE(zfsvfs)); 2784185029Spjd } 2785185029Spjd } 2786168404Spjd 2787168404Spjd if (mask & AT_MODE) { 2788168404Spjd uint64_t pmode = pzp->zp_mode; 2789168404Spjd 2790168404Spjd new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 2791168404Spjd 2792185029Spjd if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) { 2793185029Spjd dmu_tx_abort(tx); 2794185029Spjd ZFS_EXIT(zfsvfs); 2795185029Spjd return (err); 2796185029Spjd } 2797185029Spjd if (pzp->zp_acl.z_acl_extern_obj) { 2798185029Spjd /* Are we upgrading ACL from old V0 format to new V1 */ 2799185029Spjd if (zfsvfs->z_version <= ZPL_VERSION_FUID && 2800185029Spjd pzp->zp_acl.z_acl_version == 2801185029Spjd ZFS_ACL_VERSION_INITIAL) { 2802185029Spjd dmu_tx_hold_free(tx, 2803185029Spjd pzp->zp_acl.z_acl_extern_obj, 0, 2804185029Spjd DMU_OBJECT_END); 2805185029Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2806185029Spjd 0, aclp->z_acl_bytes); 2807185029Spjd } else { 2808185029Spjd dmu_tx_hold_write(tx, 2809185029Spjd pzp->zp_acl.z_acl_extern_obj, 0, 2810185029Spjd aclp->z_acl_bytes); 2811185029Spjd } 2812185029Spjd } else if (aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2813168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2814185029Spjd 0, aclp->z_acl_bytes); 2815185029Spjd } 2816168404Spjd } 2817168404Spjd 2818185029Spjd if ((mask & (AT_UID | AT_GID)) && pzp->zp_xattr != 0) { 2819185029Spjd err = zfs_zget(zp->z_zfsvfs, pzp->zp_xattr, &attrzp); 2820168404Spjd if (err) { 2821168404Spjd dmu_tx_abort(tx); 2822168404Spjd ZFS_EXIT(zfsvfs); 2823185029Spjd if (aclp) 2824185029Spjd zfs_acl_free(aclp); 2825168404Spjd return (err); 2826168404Spjd } 2827168404Spjd dmu_tx_hold_bonus(tx, attrzp->z_id); 2828168404Spjd } 2829168404Spjd 2830168404Spjd err = dmu_tx_assign(tx, zfsvfs->z_assign); 2831168404Spjd if (err) { 2832168404Spjd if (attrzp) 2833168404Spjd VN_RELE(ZTOV(attrzp)); 2834185029Spjd 2835185029Spjd if (aclp) { 2836185029Spjd zfs_acl_free(aclp); 2837185029Spjd aclp = NULL; 2838185029Spjd } 2839185029Spjd 2840168404Spjd if (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { 2841168404Spjd dmu_tx_wait(tx); 2842168404Spjd dmu_tx_abort(tx); 2843168404Spjd goto top; 2844168404Spjd } 2845168404Spjd dmu_tx_abort(tx); 2846168404Spjd ZFS_EXIT(zfsvfs); 2847168404Spjd return (err); 2848168404Spjd } 2849168404Spjd 2850168404Spjd dmu_buf_will_dirty(zp->z_dbuf, tx); 2851168404Spjd 2852168404Spjd /* 2853168404Spjd * Set each attribute requested. 2854168404Spjd * We group settings according to the locks they need to acquire. 2855168404Spjd * 2856168404Spjd * Note: you cannot set ctime directly, although it will be 2857168404Spjd * updated as a side-effect of calling this function. 2858168404Spjd */ 2859168404Spjd 2860168404Spjd mutex_enter(&zp->z_lock); 2861168404Spjd 2862168404Spjd if (mask & AT_MODE) { 2863185029Spjd mutex_enter(&zp->z_acl_lock); 2864185029Spjd zp->z_phys->zp_mode = new_mode; 2865185029Spjd err = zfs_aclset_common(zp, aclp, cr, &fuidp, tx); 2866168404Spjd ASSERT3U(err, ==, 0); 2867185029Spjd mutex_exit(&zp->z_acl_lock); 2868168404Spjd } 2869168404Spjd 2870168404Spjd if (attrzp) 2871168404Spjd mutex_enter(&attrzp->z_lock); 2872168404Spjd 2873168404Spjd if (mask & AT_UID) { 2874185029Spjd pzp->zp_uid = zfs_fuid_create(zfsvfs, 2875185029Spjd vap->va_uid, cr, ZFS_OWNER, tx, &fuidp); 2876168404Spjd if (attrzp) { 2877185029Spjd attrzp->z_phys->zp_uid = zfs_fuid_create(zfsvfs, 2878185029Spjd vap->va_uid, cr, ZFS_OWNER, tx, &fuidp); 2879168404Spjd } 2880168404Spjd } 2881168404Spjd 2882168404Spjd if (mask & AT_GID) { 2883185029Spjd pzp->zp_gid = zfs_fuid_create(zfsvfs, vap->va_gid, 2884185029Spjd cr, ZFS_GROUP, tx, &fuidp); 2885168404Spjd if (attrzp) 2886185029Spjd attrzp->z_phys->zp_gid = zfs_fuid_create(zfsvfs, 2887185029Spjd vap->va_gid, cr, ZFS_GROUP, tx, &fuidp); 2888168404Spjd } 2889168404Spjd 2890185029Spjd if (aclp) 2891185029Spjd zfs_acl_free(aclp); 2892185029Spjd 2893168404Spjd if (attrzp) 2894168404Spjd mutex_exit(&attrzp->z_lock); 2895168404Spjd 2896168404Spjd if (mask & AT_ATIME) 2897168404Spjd ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime); 2898168404Spjd 2899168404Spjd if (mask & AT_MTIME) 2900168404Spjd ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime); 2901168404Spjd 2902185029Spjd /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 2903168404Spjd if (mask & AT_SIZE) 2904168404Spjd zfs_time_stamper_locked(zp, CONTENT_MODIFIED, tx); 2905168404Spjd else if (mask != 0) 2906168404Spjd zfs_time_stamper_locked(zp, STATE_CHANGED, tx); 2907185029Spjd /* 2908185029Spjd * Do this after setting timestamps to prevent timestamp 2909185029Spjd * update from toggling bit 2910185029Spjd */ 2911168404Spjd 2912185029Spjd if (xoap && (mask & AT_XVATTR)) { 2913185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { 2914185029Spjd size_t len; 2915185029Spjd dmu_object_info_t doi; 2916185029Spjd 2917185029Spjd ASSERT(vp->v_type == VREG); 2918185029Spjd 2919185029Spjd /* Grow the bonus buffer if necessary. */ 2920185029Spjd dmu_object_info_from_db(zp->z_dbuf, &doi); 2921185029Spjd len = sizeof (xoap->xoa_av_scanstamp) + 2922185029Spjd sizeof (znode_phys_t); 2923185029Spjd if (len > doi.doi_bonus_size) 2924185029Spjd VERIFY(dmu_set_bonus(zp->z_dbuf, len, tx) == 0); 2925185029Spjd } 2926185029Spjd zfs_xvattr_set(zp, xvap); 2927185029Spjd } 2928185029Spjd 2929168404Spjd if (mask != 0) 2930185029Spjd zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 2931168404Spjd 2932185029Spjd if (fuidp) 2933185029Spjd zfs_fuid_info_free(fuidp); 2934168404Spjd mutex_exit(&zp->z_lock); 2935168404Spjd 2936168404Spjd if (attrzp) 2937168404Spjd VN_RELE(ZTOV(attrzp)); 2938168404Spjd 2939168404Spjd dmu_tx_commit(tx); 2940168404Spjd 2941168404Spjd ZFS_EXIT(zfsvfs); 2942168404Spjd return (err); 2943168404Spjd} 2944168404Spjd 2945168404Spjdtypedef struct zfs_zlock { 2946168404Spjd krwlock_t *zl_rwlock; /* lock we acquired */ 2947168404Spjd znode_t *zl_znode; /* znode we held */ 2948168404Spjd struct zfs_zlock *zl_next; /* next in list */ 2949168404Spjd} zfs_zlock_t; 2950168404Spjd 2951168404Spjd/* 2952168404Spjd * Drop locks and release vnodes that were held by zfs_rename_lock(). 2953168404Spjd */ 2954168404Spjdstatic void 2955168404Spjdzfs_rename_unlock(zfs_zlock_t **zlpp) 2956168404Spjd{ 2957168404Spjd zfs_zlock_t *zl; 2958168404Spjd 2959168404Spjd while ((zl = *zlpp) != NULL) { 2960168404Spjd if (zl->zl_znode != NULL) 2961168404Spjd VN_RELE(ZTOV(zl->zl_znode)); 2962168404Spjd rw_exit(zl->zl_rwlock); 2963168404Spjd *zlpp = zl->zl_next; 2964168404Spjd kmem_free(zl, sizeof (*zl)); 2965168404Spjd } 2966168404Spjd} 2967168404Spjd 2968168404Spjd/* 2969168404Spjd * Search back through the directory tree, using the ".." entries. 2970168404Spjd * Lock each directory in the chain to prevent concurrent renames. 2971168404Spjd * Fail any attempt to move a directory into one of its own descendants. 2972168404Spjd * XXX - z_parent_lock can overlap with map or grow locks 2973168404Spjd */ 2974168404Spjdstatic int 2975168404Spjdzfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 2976168404Spjd{ 2977168404Spjd zfs_zlock_t *zl; 2978168404Spjd znode_t *zp = tdzp; 2979168404Spjd uint64_t rootid = zp->z_zfsvfs->z_root; 2980168404Spjd uint64_t *oidp = &zp->z_id; 2981168404Spjd krwlock_t *rwlp = &szp->z_parent_lock; 2982168404Spjd krw_t rw = RW_WRITER; 2983168404Spjd 2984168404Spjd /* 2985168404Spjd * First pass write-locks szp and compares to zp->z_id. 2986168404Spjd * Later passes read-lock zp and compare to zp->z_parent. 2987168404Spjd */ 2988168404Spjd do { 2989168404Spjd if (!rw_tryenter(rwlp, rw)) { 2990168404Spjd /* 2991168404Spjd * Another thread is renaming in this path. 2992168404Spjd * Note that if we are a WRITER, we don't have any 2993168404Spjd * parent_locks held yet. 2994168404Spjd */ 2995168404Spjd if (rw == RW_READER && zp->z_id > szp->z_id) { 2996168404Spjd /* 2997168404Spjd * Drop our locks and restart 2998168404Spjd */ 2999168404Spjd zfs_rename_unlock(&zl); 3000168404Spjd *zlpp = NULL; 3001168404Spjd zp = tdzp; 3002168404Spjd oidp = &zp->z_id; 3003168404Spjd rwlp = &szp->z_parent_lock; 3004168404Spjd rw = RW_WRITER; 3005168404Spjd continue; 3006168404Spjd } else { 3007168404Spjd /* 3008168404Spjd * Wait for other thread to drop its locks 3009168404Spjd */ 3010168404Spjd rw_enter(rwlp, rw); 3011168404Spjd } 3012168404Spjd } 3013168404Spjd 3014168404Spjd zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3015168404Spjd zl->zl_rwlock = rwlp; 3016168404Spjd zl->zl_znode = NULL; 3017168404Spjd zl->zl_next = *zlpp; 3018168404Spjd *zlpp = zl; 3019168404Spjd 3020168404Spjd if (*oidp == szp->z_id) /* We're a descendant of szp */ 3021168404Spjd return (EINVAL); 3022168404Spjd 3023168404Spjd if (*oidp == rootid) /* We've hit the top */ 3024168404Spjd return (0); 3025168404Spjd 3026168404Spjd if (rw == RW_READER) { /* i.e. not the first pass */ 3027168404Spjd int error = zfs_zget(zp->z_zfsvfs, *oidp, &zp); 3028168404Spjd if (error) 3029168404Spjd return (error); 3030168404Spjd zl->zl_znode = zp; 3031168404Spjd } 3032168404Spjd oidp = &zp->z_phys->zp_parent; 3033168404Spjd rwlp = &zp->z_parent_lock; 3034168404Spjd rw = RW_READER; 3035168404Spjd 3036168404Spjd } while (zp->z_id != sdzp->z_id); 3037168404Spjd 3038168404Spjd return (0); 3039168404Spjd} 3040168404Spjd 3041168404Spjd/* 3042168404Spjd * Move an entry from the provided source directory to the target 3043168404Spjd * directory. Change the entry name as indicated. 3044168404Spjd * 3045168404Spjd * IN: sdvp - Source directory containing the "old entry". 3046168404Spjd * snm - Old entry name. 3047168404Spjd * tdvp - Target directory to contain the "new entry". 3048168404Spjd * tnm - New entry name. 3049168404Spjd * cr - credentials of caller. 3050185029Spjd * ct - caller context 3051185029Spjd * flags - case flags 3052168404Spjd * 3053168404Spjd * RETURN: 0 if success 3054168404Spjd * error code if failure 3055168404Spjd * 3056168404Spjd * Timestamps: 3057168404Spjd * sdvp,tdvp - ctime|mtime updated 3058168404Spjd */ 3059185029Spjd/*ARGSUSED*/ 3060168404Spjdstatic int 3061185029Spjdzfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3062185029Spjd caller_context_t *ct, int flags) 3063168404Spjd{ 3064168404Spjd znode_t *tdzp, *szp, *tzp; 3065168404Spjd znode_t *sdzp = VTOZ(sdvp); 3066168404Spjd zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 3067185029Spjd zilog_t *zilog; 3068168962Spjd vnode_t *realvp; 3069168404Spjd zfs_dirlock_t *sdl, *tdl; 3070168404Spjd dmu_tx_t *tx; 3071168404Spjd zfs_zlock_t *zl; 3072185029Spjd int cmp, serr, terr; 3073185029Spjd int error = 0; 3074185029Spjd int zflg = 0; 3075168404Spjd 3076168404Spjd ZFS_ENTER(zfsvfs); 3077185029Spjd ZFS_VERIFY_ZP(sdzp); 3078185029Spjd zilog = zfsvfs->z_log; 3079168404Spjd 3080168962Spjd /* 3081168962Spjd * Make sure we have the real vp for the target directory. 3082168962Spjd */ 3083185029Spjd if (VOP_REALVP(tdvp, &realvp, ct) == 0) 3084168962Spjd tdvp = realvp; 3085168962Spjd 3086168404Spjd if (tdvp->v_vfsp != sdvp->v_vfsp) { 3087168404Spjd ZFS_EXIT(zfsvfs); 3088168962Spjd return (EXDEV); 3089168404Spjd } 3090168404Spjd 3091168404Spjd tdzp = VTOZ(tdvp); 3092185029Spjd ZFS_VERIFY_ZP(tdzp); 3093185029Spjd if (zfsvfs->z_utf8 && u8_validate(tnm, 3094185029Spjd strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3095185029Spjd ZFS_EXIT(zfsvfs); 3096185029Spjd return (EILSEQ); 3097185029Spjd } 3098185029Spjd 3099185029Spjd if (flags & FIGNORECASE) 3100185029Spjd zflg |= ZCILOOK; 3101185029Spjd 3102168404Spjdtop: 3103168404Spjd szp = NULL; 3104168404Spjd tzp = NULL; 3105168404Spjd zl = NULL; 3106168404Spjd 3107168404Spjd /* 3108168404Spjd * This is to prevent the creation of links into attribute space 3109168404Spjd * by renaming a linked file into/outof an attribute directory. 3110168404Spjd * See the comment in zfs_link() for why this is considered bad. 3111168404Spjd */ 3112168404Spjd if ((tdzp->z_phys->zp_flags & ZFS_XATTR) != 3113168404Spjd (sdzp->z_phys->zp_flags & ZFS_XATTR)) { 3114168962Spjd ZFS_EXIT(zfsvfs); 3115168962Spjd return (EINVAL); 3116168404Spjd } 3117168404Spjd 3118168404Spjd /* 3119168404Spjd * Lock source and target directory entries. To prevent deadlock, 3120168404Spjd * a lock ordering must be defined. We lock the directory with 3121168404Spjd * the smallest object id first, or if it's a tie, the one with 3122168404Spjd * the lexically first name. 3123168404Spjd */ 3124168404Spjd if (sdzp->z_id < tdzp->z_id) { 3125168962Spjd cmp = -1; 3126168962Spjd } else if (sdzp->z_id > tdzp->z_id) { 3127168962Spjd cmp = 1; 3128168962Spjd } else { 3129185029Spjd /* 3130185029Spjd * First compare the two name arguments without 3131185029Spjd * considering any case folding. 3132185029Spjd */ 3133185029Spjd int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3134185029Spjd 3135185029Spjd cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3136185029Spjd ASSERT(error == 0 || !zfsvfs->z_utf8); 3137168962Spjd if (cmp == 0) { 3138168962Spjd /* 3139168962Spjd * POSIX: "If the old argument and the new argument 3140168962Spjd * both refer to links to the same existing file, 3141168962Spjd * the rename() function shall return successfully 3142168962Spjd * and perform no other action." 3143168962Spjd */ 3144168962Spjd ZFS_EXIT(zfsvfs); 3145168962Spjd return (0); 3146168962Spjd } 3147185029Spjd /* 3148185029Spjd * If the file system is case-folding, then we may 3149185029Spjd * have some more checking to do. A case-folding file 3150185029Spjd * system is either supporting mixed case sensitivity 3151185029Spjd * access or is completely case-insensitive. Note 3152185029Spjd * that the file system is always case preserving. 3153185029Spjd * 3154185029Spjd * In mixed sensitivity mode case sensitive behavior 3155185029Spjd * is the default. FIGNORECASE must be used to 3156185029Spjd * explicitly request case insensitive behavior. 3157185029Spjd * 3158185029Spjd * If the source and target names provided differ only 3159185029Spjd * by case (e.g., a request to rename 'tim' to 'Tim'), 3160185029Spjd * we will treat this as a special case in the 3161185029Spjd * case-insensitive mode: as long as the source name 3162185029Spjd * is an exact match, we will allow this to proceed as 3163185029Spjd * a name-change request. 3164185029Spjd */ 3165185029Spjd if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3166185029Spjd (zfsvfs->z_case == ZFS_CASE_MIXED && 3167185029Spjd flags & FIGNORECASE)) && 3168185029Spjd u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3169185029Spjd &error) == 0) { 3170185029Spjd /* 3171185029Spjd * case preserving rename request, require exact 3172185029Spjd * name matches 3173185029Spjd */ 3174185029Spjd zflg |= ZCIEXACT; 3175185029Spjd zflg &= ~ZCILOOK; 3176185029Spjd } 3177168962Spjd } 3178185029Spjd 3179168962Spjd if (cmp < 0) { 3180185029Spjd serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3181185029Spjd ZEXISTS | zflg, NULL, NULL); 3182185029Spjd terr = zfs_dirent_lock(&tdl, 3183185029Spjd tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3184168962Spjd } else { 3185185029Spjd terr = zfs_dirent_lock(&tdl, 3186185029Spjd tdzp, tnm, &tzp, zflg, NULL, NULL); 3187185029Spjd serr = zfs_dirent_lock(&sdl, 3188185029Spjd sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3189185029Spjd NULL, NULL); 3190168404Spjd } 3191168404Spjd 3192168962Spjd if (serr) { 3193168404Spjd /* 3194168404Spjd * Source entry invalid or not there. 3195168404Spjd */ 3196168962Spjd if (!terr) { 3197168404Spjd zfs_dirent_unlock(tdl); 3198168962Spjd if (tzp) 3199168962Spjd VN_RELE(ZTOV(tzp)); 3200168962Spjd } 3201168404Spjd if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0) 3202168404Spjd serr = EINVAL; 3203168962Spjd ZFS_EXIT(zfsvfs); 3204168962Spjd return (serr); 3205168404Spjd } 3206168404Spjd if (terr) { 3207168404Spjd zfs_dirent_unlock(sdl); 3208168962Spjd VN_RELE(ZTOV(szp)); 3209168404Spjd if (strcmp(tnm, "..") == 0) 3210168404Spjd terr = EINVAL; 3211168962Spjd ZFS_EXIT(zfsvfs); 3212168962Spjd return (terr); 3213168404Spjd } 3214168404Spjd 3215168404Spjd /* 3216168404Spjd * Must have write access at the source to remove the old entry 3217168404Spjd * and write access at the target to create the new entry. 3218168404Spjd * Note that if target and source are the same, this can be 3219168404Spjd * done in a single check. 3220168404Spjd */ 3221168404Spjd 3222168404Spjd if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3223168404Spjd goto out; 3224168404Spjd 3225168962Spjd if (ZTOV(szp)->v_type == VDIR) { 3226168404Spjd /* 3227168404Spjd * Check to make sure rename is valid. 3228168404Spjd * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3229168404Spjd */ 3230168404Spjd if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3231168404Spjd goto out; 3232168404Spjd } 3233168404Spjd 3234168404Spjd /* 3235168404Spjd * Does target exist? 3236168404Spjd */ 3237168404Spjd if (tzp) { 3238168404Spjd /* 3239168404Spjd * Source and target must be the same type. 3240168404Spjd */ 3241168962Spjd if (ZTOV(szp)->v_type == VDIR) { 3242168962Spjd if (ZTOV(tzp)->v_type != VDIR) { 3243168404Spjd error = ENOTDIR; 3244168404Spjd goto out; 3245168404Spjd } 3246168404Spjd } else { 3247168962Spjd if (ZTOV(tzp)->v_type == VDIR) { 3248168404Spjd error = EISDIR; 3249168404Spjd goto out; 3250168404Spjd } 3251168404Spjd } 3252168404Spjd /* 3253168404Spjd * POSIX dictates that when the source and target 3254168404Spjd * entries refer to the same file object, rename 3255168404Spjd * must do nothing and exit without error. 3256168404Spjd */ 3257168404Spjd if (szp->z_id == tzp->z_id) { 3258168404Spjd error = 0; 3259168404Spjd goto out; 3260168404Spjd } 3261168404Spjd } 3262168404Spjd 3263185029Spjd vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 3264168962Spjd if (tzp) 3265185029Spjd vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3266168962Spjd 3267185029Spjd /* 3268185029Spjd * notify the target directory if it is not the same 3269185029Spjd * as source directory. 3270185029Spjd */ 3271185029Spjd if (tdvp != sdvp) { 3272185029Spjd vnevent_rename_dest_dir(tdvp, ct); 3273185029Spjd } 3274185029Spjd 3275168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 3276168404Spjd dmu_tx_hold_bonus(tx, szp->z_id); /* nlink changes */ 3277168404Spjd dmu_tx_hold_bonus(tx, sdzp->z_id); /* nlink changes */ 3278168404Spjd dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3279168404Spjd dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3280168404Spjd if (sdzp != tdzp) 3281168404Spjd dmu_tx_hold_bonus(tx, tdzp->z_id); /* nlink changes */ 3282168404Spjd if (tzp) 3283168404Spjd dmu_tx_hold_bonus(tx, tzp->z_id); /* parent changes */ 3284168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3285168404Spjd error = dmu_tx_assign(tx, zfsvfs->z_assign); 3286168404Spjd if (error) { 3287168404Spjd if (zl != NULL) 3288168404Spjd zfs_rename_unlock(&zl); 3289168404Spjd zfs_dirent_unlock(sdl); 3290168404Spjd zfs_dirent_unlock(tdl); 3291168962Spjd VN_RELE(ZTOV(szp)); 3292168962Spjd if (tzp) 3293168962Spjd VN_RELE(ZTOV(tzp)); 3294168404Spjd if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { 3295168404Spjd dmu_tx_wait(tx); 3296168404Spjd dmu_tx_abort(tx); 3297168404Spjd goto top; 3298168404Spjd } 3299168404Spjd dmu_tx_abort(tx); 3300168962Spjd ZFS_EXIT(zfsvfs); 3301168962Spjd return (error); 3302168404Spjd } 3303168404Spjd 3304168404Spjd if (tzp) /* Attempt to remove the existing target */ 3305185029Spjd error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 3306168404Spjd 3307168404Spjd if (error == 0) { 3308168404Spjd error = zfs_link_create(tdl, szp, tx, ZRENAMING); 3309168404Spjd if (error == 0) { 3310185029Spjd szp->z_phys->zp_flags |= ZFS_AV_MODIFIED; 3311185029Spjd 3312168404Spjd error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 3313168404Spjd ASSERT(error == 0); 3314185029Spjd 3315185029Spjd zfs_log_rename(zilog, tx, 3316185029Spjd TX_RENAME | (flags & FIGNORECASE ? TX_CI : 0), 3317185029Spjd sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp); 3318185029Spjd 3319185029Spjd /* Update path information for the target vnode */ 3320185029Spjd vn_renamepath(tdvp, ZTOV(szp), tnm, strlen(tnm)); 3321168404Spjd } 3322168404Spjd#ifdef FREEBSD_NAMECACHE 3323168404Spjd if (error == 0) { 3324168404Spjd cache_purge(sdvp); 3325168404Spjd cache_purge(tdvp); 3326168404Spjd } 3327168404Spjd#endif 3328168404Spjd } 3329168404Spjd 3330168404Spjd dmu_tx_commit(tx); 3331168404Spjdout: 3332168404Spjd if (zl != NULL) 3333168404Spjd zfs_rename_unlock(&zl); 3334168404Spjd 3335168404Spjd zfs_dirent_unlock(sdl); 3336168404Spjd zfs_dirent_unlock(tdl); 3337168404Spjd 3338168962Spjd VN_RELE(ZTOV(szp)); 3339168404Spjd if (tzp) 3340168962Spjd VN_RELE(ZTOV(tzp)); 3341168404Spjd 3342168404Spjd ZFS_EXIT(zfsvfs); 3343168404Spjd 3344168404Spjd return (error); 3345168404Spjd} 3346168404Spjd 3347168404Spjd/* 3348168404Spjd * Insert the indicated symbolic reference entry into the directory. 3349168404Spjd * 3350168404Spjd * IN: dvp - Directory to contain new symbolic link. 3351168404Spjd * link - Name for new symlink entry. 3352168404Spjd * vap - Attributes of new entry. 3353168404Spjd * target - Target path of new symlink. 3354168404Spjd * cr - credentials of caller. 3355185029Spjd * ct - caller context 3356185029Spjd * flags - case flags 3357168404Spjd * 3358168404Spjd * RETURN: 0 if success 3359168404Spjd * error code if failure 3360168404Spjd * 3361168404Spjd * Timestamps: 3362168404Spjd * dvp - ctime|mtime updated 3363168404Spjd */ 3364185029Spjd/*ARGSUSED*/ 3365168404Spjdstatic int 3366185029Spjdzfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 3367185029Spjd cred_t *cr, kthread_t *td) 3368168404Spjd{ 3369168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 3370168404Spjd zfs_dirlock_t *dl; 3371168404Spjd dmu_tx_t *tx; 3372168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3373185029Spjd zilog_t *zilog; 3374168404Spjd int len = strlen(link); 3375168404Spjd int error; 3376185029Spjd int zflg = ZNEW; 3377185029Spjd zfs_fuid_info_t *fuidp = NULL; 3378185029Spjd int flags = 0; 3379168404Spjd 3380168962Spjd ASSERT(vap->va_type == VLNK); 3381168404Spjd 3382168404Spjd ZFS_ENTER(zfsvfs); 3383185029Spjd ZFS_VERIFY_ZP(dzp); 3384185029Spjd zilog = zfsvfs->z_log; 3385185029Spjd 3386185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 3387185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3388185029Spjd ZFS_EXIT(zfsvfs); 3389185029Spjd return (EILSEQ); 3390185029Spjd } 3391185029Spjd if (flags & FIGNORECASE) 3392185029Spjd zflg |= ZCILOOK; 3393168404Spjdtop: 3394185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3395168404Spjd ZFS_EXIT(zfsvfs); 3396168404Spjd return (error); 3397168404Spjd } 3398168404Spjd 3399168404Spjd if (len > MAXPATHLEN) { 3400168404Spjd ZFS_EXIT(zfsvfs); 3401168404Spjd return (ENAMETOOLONG); 3402168404Spjd } 3403168404Spjd 3404168404Spjd /* 3405168404Spjd * Attempt to lock directory; fail if entry already exists. 3406168404Spjd */ 3407185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 3408185029Spjd if (error) { 3409168404Spjd ZFS_EXIT(zfsvfs); 3410168404Spjd return (error); 3411168404Spjd } 3412168404Spjd 3413168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 3414168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 3415168404Spjd dmu_tx_hold_bonus(tx, dzp->z_id); 3416168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 3417168404Spjd if (dzp->z_phys->zp_flags & ZFS_INHERIT_ACE) 3418168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE); 3419185029Spjd if (IS_EPHEMERAL(crgetuid(cr)) || IS_EPHEMERAL(crgetgid(cr))) { 3420185029Spjd if (zfsvfs->z_fuid_obj == 0) { 3421185029Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 3422185029Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 3423185029Spjd FUID_SIZE_ESTIMATE(zfsvfs)); 3424185029Spjd dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL); 3425185029Spjd } else { 3426185029Spjd dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj); 3427185029Spjd dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0, 3428185029Spjd FUID_SIZE_ESTIMATE(zfsvfs)); 3429185029Spjd } 3430185029Spjd } 3431168404Spjd error = dmu_tx_assign(tx, zfsvfs->z_assign); 3432168404Spjd if (error) { 3433168404Spjd zfs_dirent_unlock(dl); 3434168404Spjd if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { 3435168404Spjd dmu_tx_wait(tx); 3436168404Spjd dmu_tx_abort(tx); 3437168404Spjd goto top; 3438168404Spjd } 3439168404Spjd dmu_tx_abort(tx); 3440168404Spjd ZFS_EXIT(zfsvfs); 3441168404Spjd return (error); 3442168404Spjd } 3443168404Spjd 3444168404Spjd dmu_buf_will_dirty(dzp->z_dbuf, tx); 3445168404Spjd 3446168404Spjd /* 3447168404Spjd * Create a new object for the symlink. 3448168404Spjd * Put the link content into bonus buffer if it will fit; 3449168404Spjd * otherwise, store it just like any other file data. 3450168404Spjd */ 3451168404Spjd if (sizeof (znode_phys_t) + len <= dmu_bonus_max()) { 3452185029Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, len, NULL, &fuidp); 3453168404Spjd if (len != 0) 3454168404Spjd bcopy(link, zp->z_phys + 1, len); 3455168404Spjd } else { 3456168404Spjd dmu_buf_t *dbp; 3457168404Spjd 3458185029Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, NULL, &fuidp); 3459168404Spjd /* 3460168404Spjd * Nothing can access the znode yet so no locking needed 3461168404Spjd * for growing the znode's blocksize. 3462168404Spjd */ 3463168404Spjd zfs_grow_blocksize(zp, len, tx); 3464168404Spjd 3465185029Spjd VERIFY(0 == dmu_buf_hold(zfsvfs->z_os, 3466185029Spjd zp->z_id, 0, FTAG, &dbp)); 3467168404Spjd dmu_buf_will_dirty(dbp, tx); 3468168404Spjd 3469168404Spjd ASSERT3U(len, <=, dbp->db_size); 3470168404Spjd bcopy(link, dbp->db_data, len); 3471168404Spjd dmu_buf_rele(dbp, FTAG); 3472168404Spjd } 3473168404Spjd zp->z_phys->zp_size = len; 3474168404Spjd 3475168404Spjd /* 3476168404Spjd * Insert the new object into the directory. 3477168404Spjd */ 3478168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 3479168962Spjdout: 3480168404Spjd if (error == 0) { 3481185029Spjd uint64_t txtype = TX_SYMLINK; 3482185029Spjd if (flags & FIGNORECASE) 3483185029Spjd txtype |= TX_CI; 3484185029Spjd zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 3485168962Spjd *vpp = ZTOV(zp); 3486168404Spjd } 3487185029Spjd if (fuidp) 3488185029Spjd zfs_fuid_info_free(fuidp); 3489168404Spjd 3490168404Spjd dmu_tx_commit(tx); 3491168404Spjd 3492168404Spjd zfs_dirent_unlock(dl); 3493168404Spjd 3494168404Spjd ZFS_EXIT(zfsvfs); 3495168404Spjd return (error); 3496168404Spjd} 3497168404Spjd 3498168404Spjd/* 3499168404Spjd * Return, in the buffer contained in the provided uio structure, 3500168404Spjd * the symbolic path referred to by vp. 3501168404Spjd * 3502168404Spjd * IN: vp - vnode of symbolic link. 3503168404Spjd * uoip - structure to contain the link path. 3504168404Spjd * cr - credentials of caller. 3505185029Spjd * ct - caller context 3506168404Spjd * 3507168404Spjd * OUT: uio - structure to contain the link path. 3508168404Spjd * 3509168404Spjd * RETURN: 0 if success 3510168404Spjd * error code if failure 3511168404Spjd * 3512168404Spjd * Timestamps: 3513168404Spjd * vp - atime updated 3514168404Spjd */ 3515168404Spjd/* ARGSUSED */ 3516168404Spjdstatic int 3517185029Spjdzfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 3518168404Spjd{ 3519168404Spjd znode_t *zp = VTOZ(vp); 3520168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3521168404Spjd size_t bufsz; 3522168404Spjd int error; 3523168404Spjd 3524168404Spjd ZFS_ENTER(zfsvfs); 3525185029Spjd ZFS_VERIFY_ZP(zp); 3526168404Spjd 3527168404Spjd bufsz = (size_t)zp->z_phys->zp_size; 3528168404Spjd if (bufsz + sizeof (znode_phys_t) <= zp->z_dbuf->db_size) { 3529168404Spjd error = uiomove(zp->z_phys + 1, 3530168404Spjd MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); 3531168404Spjd } else { 3532168404Spjd dmu_buf_t *dbp; 3533168404Spjd error = dmu_buf_hold(zfsvfs->z_os, zp->z_id, 0, FTAG, &dbp); 3534168404Spjd if (error) { 3535168404Spjd ZFS_EXIT(zfsvfs); 3536168404Spjd return (error); 3537168404Spjd } 3538168404Spjd error = uiomove(dbp->db_data, 3539168404Spjd MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); 3540168404Spjd dmu_buf_rele(dbp, FTAG); 3541168404Spjd } 3542168404Spjd 3543168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 3544168404Spjd ZFS_EXIT(zfsvfs); 3545168404Spjd return (error); 3546168404Spjd} 3547168404Spjd 3548168404Spjd/* 3549168404Spjd * Insert a new entry into directory tdvp referencing svp. 3550168404Spjd * 3551168404Spjd * IN: tdvp - Directory to contain new entry. 3552168404Spjd * svp - vnode of new entry. 3553168404Spjd * name - name of new entry. 3554168404Spjd * cr - credentials of caller. 3555185029Spjd * ct - caller context 3556168404Spjd * 3557168404Spjd * RETURN: 0 if success 3558168404Spjd * error code if failure 3559168404Spjd * 3560168404Spjd * Timestamps: 3561168404Spjd * tdvp - ctime|mtime updated 3562168404Spjd * svp - ctime updated 3563168404Spjd */ 3564168404Spjd/* ARGSUSED */ 3565168404Spjdstatic int 3566185029Spjdzfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 3567185029Spjd caller_context_t *ct, int flags) 3568168404Spjd{ 3569168404Spjd znode_t *dzp = VTOZ(tdvp); 3570168404Spjd znode_t *tzp, *szp; 3571168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3572185029Spjd zilog_t *zilog; 3573168404Spjd zfs_dirlock_t *dl; 3574168404Spjd dmu_tx_t *tx; 3575168962Spjd vnode_t *realvp; 3576168404Spjd int error; 3577185029Spjd int zf = ZNEW; 3578185029Spjd uid_t owner; 3579168404Spjd 3580168404Spjd ASSERT(tdvp->v_type == VDIR); 3581168404Spjd 3582168404Spjd ZFS_ENTER(zfsvfs); 3583185029Spjd ZFS_VERIFY_ZP(dzp); 3584185029Spjd zilog = zfsvfs->z_log; 3585168404Spjd 3586185029Spjd if (VOP_REALVP(svp, &realvp, ct) == 0) 3587168962Spjd svp = realvp; 3588168962Spjd 3589168404Spjd if (svp->v_vfsp != tdvp->v_vfsp) { 3590168404Spjd ZFS_EXIT(zfsvfs); 3591168404Spjd return (EXDEV); 3592168404Spjd } 3593185029Spjd szp = VTOZ(svp); 3594185029Spjd ZFS_VERIFY_ZP(szp); 3595168404Spjd 3596185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, 3597185029Spjd strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3598185029Spjd ZFS_EXIT(zfsvfs); 3599185029Spjd return (EILSEQ); 3600185029Spjd } 3601185029Spjd if (flags & FIGNORECASE) 3602185029Spjd zf |= ZCILOOK; 3603185029Spjd 3604168404Spjdtop: 3605168404Spjd /* 3606168404Spjd * We do not support links between attributes and non-attributes 3607168404Spjd * because of the potential security risk of creating links 3608168404Spjd * into "normal" file space in order to circumvent restrictions 3609168404Spjd * imposed in attribute space. 3610168404Spjd */ 3611168404Spjd if ((szp->z_phys->zp_flags & ZFS_XATTR) != 3612168404Spjd (dzp->z_phys->zp_flags & ZFS_XATTR)) { 3613168404Spjd ZFS_EXIT(zfsvfs); 3614168404Spjd return (EINVAL); 3615168404Spjd } 3616168404Spjd 3617168404Spjd /* 3618168404Spjd * POSIX dictates that we return EPERM here. 3619168404Spjd * Better choices include ENOTSUP or EISDIR. 3620168404Spjd */ 3621168404Spjd if (svp->v_type == VDIR) { 3622168404Spjd ZFS_EXIT(zfsvfs); 3623168404Spjd return (EPERM); 3624168404Spjd } 3625168404Spjd 3626185029Spjd owner = zfs_fuid_map_id(zfsvfs, szp->z_phys->zp_uid, cr, ZFS_OWNER); 3627185029Spjd if (owner != crgetuid(cr) && 3628185029Spjd secpolicy_basic_link(svp, cr) != 0) { 3629168404Spjd ZFS_EXIT(zfsvfs); 3630168404Spjd return (EPERM); 3631168404Spjd } 3632168404Spjd 3633185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3634168404Spjd ZFS_EXIT(zfsvfs); 3635168404Spjd return (error); 3636168404Spjd } 3637168404Spjd 3638168404Spjd /* 3639168404Spjd * Attempt to lock directory; fail if entry already exists. 3640168404Spjd */ 3641185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 3642185029Spjd if (error) { 3643168404Spjd ZFS_EXIT(zfsvfs); 3644168404Spjd return (error); 3645168404Spjd } 3646168404Spjd 3647168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 3648168404Spjd dmu_tx_hold_bonus(tx, szp->z_id); 3649168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 3650168404Spjd error = dmu_tx_assign(tx, zfsvfs->z_assign); 3651168404Spjd if (error) { 3652168404Spjd zfs_dirent_unlock(dl); 3653168404Spjd if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { 3654168404Spjd dmu_tx_wait(tx); 3655168404Spjd dmu_tx_abort(tx); 3656168404Spjd goto top; 3657168404Spjd } 3658168404Spjd dmu_tx_abort(tx); 3659168404Spjd ZFS_EXIT(zfsvfs); 3660168404Spjd return (error); 3661168404Spjd } 3662168404Spjd 3663168404Spjd error = zfs_link_create(dl, szp, tx, 0); 3664168404Spjd 3665185029Spjd if (error == 0) { 3666185029Spjd uint64_t txtype = TX_LINK; 3667185029Spjd if (flags & FIGNORECASE) 3668185029Spjd txtype |= TX_CI; 3669185029Spjd zfs_log_link(zilog, tx, txtype, dzp, szp, name); 3670185029Spjd } 3671168404Spjd 3672168404Spjd dmu_tx_commit(tx); 3673168404Spjd 3674168404Spjd zfs_dirent_unlock(dl); 3675168404Spjd 3676185029Spjd if (error == 0) { 3677185029Spjd vnevent_link(svp, ct); 3678185029Spjd } 3679185029Spjd 3680168404Spjd ZFS_EXIT(zfsvfs); 3681168404Spjd return (error); 3682168404Spjd} 3683168404Spjd 3684185029Spjd/*ARGSUSED*/ 3685168962Spjdvoid 3686185029Spjdzfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 3687168404Spjd{ 3688168962Spjd znode_t *zp = VTOZ(vp); 3689168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3690168962Spjd int error; 3691168404Spjd 3692185029Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 3693185029Spjd if (zp->z_dbuf == NULL) { 3694185029Spjd /* 3695185029Spjd * The fs has been unmounted, or we did a 3696185029Spjd * suspend/resume and this file no longer exists. 3697185029Spjd */ 3698168404Spjd mutex_enter(&zp->z_lock); 3699168404Spjd VI_LOCK(vp); 3700168404Spjd vp->v_count = 0; /* count arrives as 1 */ 3701185029Spjd mutex_exit(&zp->z_lock); 3702185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 3703185029Spjd zfs_znode_free(zp); 3704168962Spjd return; 3705168404Spjd } 3706168404Spjd 3707168404Spjd if (zp->z_atime_dirty && zp->z_unlinked == 0) { 3708168404Spjd dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 3709168404Spjd 3710168404Spjd dmu_tx_hold_bonus(tx, zp->z_id); 3711168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 3712168404Spjd if (error) { 3713168404Spjd dmu_tx_abort(tx); 3714168404Spjd } else { 3715168404Spjd dmu_buf_will_dirty(zp->z_dbuf, tx); 3716168404Spjd mutex_enter(&zp->z_lock); 3717168404Spjd zp->z_atime_dirty = 0; 3718168404Spjd mutex_exit(&zp->z_lock); 3719168404Spjd dmu_tx_commit(tx); 3720168404Spjd } 3721168404Spjd } 3722168404Spjd 3723168404Spjd zfs_zinactive(zp); 3724185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 3725168404Spjd} 3726168404Spjd 3727168404SpjdCTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 3728168404SpjdCTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 3729168404Spjd 3730185029Spjd/*ARGSUSED*/ 3731168404Spjdstatic int 3732185029Spjdzfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 3733168404Spjd{ 3734168404Spjd znode_t *zp = VTOZ(vp); 3735168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3736185029Spjd uint32_t gen; 3737168404Spjd uint64_t object = zp->z_id; 3738168404Spjd zfid_short_t *zfid; 3739168404Spjd int size, i; 3740168404Spjd 3741168404Spjd ZFS_ENTER(zfsvfs); 3742185029Spjd ZFS_VERIFY_ZP(zp); 3743185029Spjd gen = (uint32_t)zp->z_gen; 3744168404Spjd 3745168404Spjd size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 3746168404Spjd fidp->fid_len = size; 3747168404Spjd 3748168404Spjd zfid = (zfid_short_t *)fidp; 3749168404Spjd 3750168404Spjd zfid->zf_len = size; 3751168404Spjd 3752168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 3753168404Spjd zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 3754168404Spjd 3755168404Spjd /* Must have a non-zero generation number to distinguish from .zfs */ 3756168404Spjd if (gen == 0) 3757168404Spjd gen = 1; 3758168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 3759168404Spjd zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 3760168404Spjd 3761168404Spjd if (size == LONG_FID_LEN) { 3762168404Spjd uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 3763169023Spjd zfid_long_t *zlfid; 3764168404Spjd 3765168404Spjd zlfid = (zfid_long_t *)fidp; 3766168404Spjd 3767168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 3768168404Spjd zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 3769168404Spjd 3770168404Spjd /* XXX - this should be the generation number for the objset */ 3771168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 3772168404Spjd zlfid->zf_setgen[i] = 0; 3773168404Spjd } 3774168404Spjd 3775168404Spjd ZFS_EXIT(zfsvfs); 3776168404Spjd return (0); 3777168404Spjd} 3778168404Spjd 3779168404Spjdstatic int 3780185029Spjdzfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 3781185029Spjd caller_context_t *ct) 3782168404Spjd{ 3783168404Spjd znode_t *zp, *xzp; 3784168404Spjd zfsvfs_t *zfsvfs; 3785168404Spjd zfs_dirlock_t *dl; 3786168404Spjd int error; 3787168404Spjd 3788168404Spjd switch (cmd) { 3789168404Spjd case _PC_LINK_MAX: 3790168404Spjd *valp = INT_MAX; 3791168404Spjd return (0); 3792168404Spjd 3793168404Spjd case _PC_FILESIZEBITS: 3794168404Spjd *valp = 64; 3795168404Spjd return (0); 3796168404Spjd 3797168404Spjd#if 0 3798168404Spjd case _PC_XATTR_EXISTS: 3799168404Spjd zp = VTOZ(vp); 3800168404Spjd zfsvfs = zp->z_zfsvfs; 3801168404Spjd ZFS_ENTER(zfsvfs); 3802185029Spjd ZFS_VERIFY_ZP(zp); 3803168404Spjd *valp = 0; 3804168404Spjd error = zfs_dirent_lock(&dl, zp, "", &xzp, 3805185029Spjd ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 3806168404Spjd if (error == 0) { 3807168404Spjd zfs_dirent_unlock(dl); 3808168404Spjd if (!zfs_dirempty(xzp)) 3809168404Spjd *valp = 1; 3810168404Spjd VN_RELE(ZTOV(xzp)); 3811168404Spjd } else if (error == ENOENT) { 3812168404Spjd /* 3813168404Spjd * If there aren't extended attributes, it's the 3814168404Spjd * same as having zero of them. 3815168404Spjd */ 3816168404Spjd error = 0; 3817168404Spjd } 3818168404Spjd ZFS_EXIT(zfsvfs); 3819168404Spjd return (error); 3820168404Spjd#endif 3821168404Spjd 3822168404Spjd case _PC_ACL_EXTENDED: 3823168404Spjd *valp = 0; /* TODO */ 3824168404Spjd return (0); 3825168404Spjd 3826168404Spjd case _PC_MIN_HOLE_SIZE: 3827168404Spjd *valp = (int)SPA_MINBLOCKSIZE; 3828168404Spjd return (0); 3829168404Spjd 3830168404Spjd default: 3831168962Spjd return (EOPNOTSUPP); 3832168404Spjd } 3833168404Spjd} 3834168404Spjd 3835168404Spjd#ifdef TODO 3836168404Spjd/*ARGSUSED*/ 3837168404Spjdstatic int 3838185029Spjdzfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 3839185029Spjd caller_context_t *ct) 3840168404Spjd{ 3841168404Spjd znode_t *zp = VTOZ(vp); 3842168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3843168404Spjd int error; 3844185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 3845168404Spjd 3846168404Spjd ZFS_ENTER(zfsvfs); 3847185029Spjd ZFS_VERIFY_ZP(zp); 3848185029Spjd error = zfs_getacl(zp, vsecp, skipaclchk, cr); 3849168404Spjd ZFS_EXIT(zfsvfs); 3850168404Spjd 3851168404Spjd return (error); 3852168404Spjd} 3853168404Spjd#endif /* TODO */ 3854168404Spjd 3855168404Spjd#ifdef TODO 3856168404Spjd/*ARGSUSED*/ 3857168404Spjdstatic int 3858185029Spjdzfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 3859185029Spjd caller_context_t *ct) 3860168404Spjd{ 3861168404Spjd znode_t *zp = VTOZ(vp); 3862168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3863168404Spjd int error; 3864185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 3865168404Spjd 3866168404Spjd ZFS_ENTER(zfsvfs); 3867185029Spjd ZFS_VERIFY_ZP(zp); 3868185029Spjd error = zfs_setacl(zp, vsecp, skipaclchk, cr); 3869168404Spjd ZFS_EXIT(zfsvfs); 3870168404Spjd return (error); 3871168404Spjd} 3872168404Spjd#endif /* TODO */ 3873168404Spjd 3874168962Spjdstatic int 3875168962Spjdzfs_freebsd_open(ap) 3876168962Spjd struct vop_open_args /* { 3877168962Spjd struct vnode *a_vp; 3878168962Spjd int a_mode; 3879168962Spjd struct ucred *a_cred; 3880168962Spjd struct thread *a_td; 3881168962Spjd } */ *ap; 3882168962Spjd{ 3883168962Spjd vnode_t *vp = ap->a_vp; 3884168962Spjd znode_t *zp = VTOZ(vp); 3885168962Spjd int error; 3886168962Spjd 3887185029Spjd error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); 3888168962Spjd if (error == 0) 3889168962Spjd vnode_create_vobject(vp, zp->z_phys->zp_size, ap->a_td); 3890168962Spjd return (error); 3891168962Spjd} 3892168962Spjd 3893168962Spjdstatic int 3894168962Spjdzfs_freebsd_close(ap) 3895168962Spjd struct vop_close_args /* { 3896168962Spjd struct vnode *a_vp; 3897168962Spjd int a_fflag; 3898168962Spjd struct ucred *a_cred; 3899168962Spjd struct thread *a_td; 3900168962Spjd } */ *ap; 3901168962Spjd{ 3902168962Spjd 3903185029Spjd return (zfs_close(ap->a_vp, ap->a_fflag, 0, 0, ap->a_cred, NULL)); 3904168962Spjd} 3905168962Spjd 3906168962Spjdstatic int 3907168962Spjdzfs_freebsd_ioctl(ap) 3908168962Spjd struct vop_ioctl_args /* { 3909168962Spjd struct vnode *a_vp; 3910168962Spjd u_long a_command; 3911168962Spjd caddr_t a_data; 3912168962Spjd int a_fflag; 3913168962Spjd struct ucred *cred; 3914168962Spjd struct thread *td; 3915168962Spjd } */ *ap; 3916168962Spjd{ 3917168962Spjd 3918168978Spjd return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 3919185029Spjd ap->a_fflag, ap->a_cred, NULL, NULL)); 3920168962Spjd} 3921168962Spjd 3922168962Spjdstatic int 3923168962Spjdzfs_freebsd_read(ap) 3924168962Spjd struct vop_read_args /* { 3925168962Spjd struct vnode *a_vp; 3926168962Spjd struct uio *a_uio; 3927168962Spjd int a_ioflag; 3928168962Spjd struct ucred *a_cred; 3929168962Spjd } */ *ap; 3930168962Spjd{ 3931168962Spjd 3932168962Spjd return (zfs_read(ap->a_vp, ap->a_uio, ap->a_ioflag, ap->a_cred, NULL)); 3933168962Spjd} 3934168962Spjd 3935168962Spjdstatic int 3936168962Spjdzfs_freebsd_write(ap) 3937168962Spjd struct vop_write_args /* { 3938168962Spjd struct vnode *a_vp; 3939168962Spjd struct uio *a_uio; 3940168962Spjd int a_ioflag; 3941168962Spjd struct ucred *a_cred; 3942168962Spjd } */ *ap; 3943168962Spjd{ 3944168962Spjd 3945168962Spjd return (zfs_write(ap->a_vp, ap->a_uio, ap->a_ioflag, ap->a_cred, NULL)); 3946168962Spjd} 3947168962Spjd 3948168962Spjdstatic int 3949168962Spjdzfs_freebsd_access(ap) 3950168962Spjd struct vop_access_args /* { 3951168962Spjd struct vnode *a_vp; 3952185029Spjd int a_accmode; 3953168962Spjd struct ucred *a_cred; 3954168962Spjd struct thread *a_td; 3955168962Spjd } */ *ap; 3956168962Spjd{ 3957168962Spjd 3958185029Spjd return (zfs_access(ap->a_vp, ap->a_accmode, 0, ap->a_cred, NULL)); 3959168962Spjd} 3960168962Spjd 3961168962Spjdstatic int 3962168962Spjdzfs_freebsd_lookup(ap) 3963168962Spjd struct vop_lookup_args /* { 3964168962Spjd struct vnode *a_dvp; 3965168962Spjd struct vnode **a_vpp; 3966168962Spjd struct componentname *a_cnp; 3967168962Spjd } */ *ap; 3968168962Spjd{ 3969168962Spjd struct componentname *cnp = ap->a_cnp; 3970168962Spjd char nm[NAME_MAX + 1]; 3971168962Spjd 3972168962Spjd ASSERT(cnp->cn_namelen < sizeof(nm)); 3973168962Spjd strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 3974168962Spjd 3975168962Spjd return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 3976185029Spjd cnp->cn_cred, cnp->cn_thread, 0)); 3977168962Spjd} 3978168962Spjd 3979168962Spjdstatic int 3980168962Spjdzfs_freebsd_create(ap) 3981168962Spjd struct vop_create_args /* { 3982168962Spjd struct vnode *a_dvp; 3983168962Spjd struct vnode **a_vpp; 3984168962Spjd struct componentname *a_cnp; 3985168962Spjd struct vattr *a_vap; 3986168962Spjd } */ *ap; 3987168962Spjd{ 3988168962Spjd struct componentname *cnp = ap->a_cnp; 3989168962Spjd vattr_t *vap = ap->a_vap; 3990168962Spjd int mode; 3991168962Spjd 3992168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 3993168962Spjd 3994168962Spjd vattr_init_mask(vap); 3995168962Spjd mode = vap->va_mode & ALLPERMS; 3996168962Spjd 3997168962Spjd return (zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 3998185029Spjd ap->a_vpp, cnp->cn_cred, cnp->cn_thread)); 3999168962Spjd} 4000168962Spjd 4001168962Spjdstatic int 4002168962Spjdzfs_freebsd_remove(ap) 4003168962Spjd struct vop_remove_args /* { 4004168962Spjd struct vnode *a_dvp; 4005168962Spjd struct vnode *a_vp; 4006168962Spjd struct componentname *a_cnp; 4007168962Spjd } */ *ap; 4008168962Spjd{ 4009168962Spjd 4010168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4011168962Spjd 4012168962Spjd return (zfs_remove(ap->a_dvp, ap->a_cnp->cn_nameptr, 4013185029Spjd ap->a_cnp->cn_cred, NULL, 0)); 4014168962Spjd} 4015168962Spjd 4016168962Spjdstatic int 4017168962Spjdzfs_freebsd_mkdir(ap) 4018168962Spjd struct vop_mkdir_args /* { 4019168962Spjd struct vnode *a_dvp; 4020168962Spjd struct vnode **a_vpp; 4021168962Spjd struct componentname *a_cnp; 4022168962Spjd struct vattr *a_vap; 4023168962Spjd } */ *ap; 4024168962Spjd{ 4025168962Spjd vattr_t *vap = ap->a_vap; 4026168962Spjd 4027168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4028168962Spjd 4029168962Spjd vattr_init_mask(vap); 4030168962Spjd 4031168962Spjd return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 4032185029Spjd ap->a_cnp->cn_cred, NULL, 0, NULL)); 4033168962Spjd} 4034168962Spjd 4035168962Spjdstatic int 4036168962Spjdzfs_freebsd_rmdir(ap) 4037168962Spjd struct vop_rmdir_args /* { 4038168962Spjd struct vnode *a_dvp; 4039168962Spjd struct vnode *a_vp; 4040168962Spjd struct componentname *a_cnp; 4041168962Spjd } */ *ap; 4042168962Spjd{ 4043168962Spjd struct componentname *cnp = ap->a_cnp; 4044168962Spjd 4045168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 4046168962Spjd 4047185029Spjd return (zfs_rmdir(ap->a_dvp, cnp->cn_nameptr, NULL, cnp->cn_cred, NULL, 0)); 4048168962Spjd} 4049168962Spjd 4050168962Spjdstatic int 4051168962Spjdzfs_freebsd_readdir(ap) 4052168962Spjd struct vop_readdir_args /* { 4053168962Spjd struct vnode *a_vp; 4054168962Spjd struct uio *a_uio; 4055168962Spjd struct ucred *a_cred; 4056168962Spjd int *a_eofflag; 4057168962Spjd int *a_ncookies; 4058168962Spjd u_long **a_cookies; 4059168962Spjd } */ *ap; 4060168962Spjd{ 4061168962Spjd 4062168962Spjd return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 4063168962Spjd ap->a_ncookies, ap->a_cookies)); 4064168962Spjd} 4065168962Spjd 4066168962Spjdstatic int 4067168962Spjdzfs_freebsd_fsync(ap) 4068168962Spjd struct vop_fsync_args /* { 4069168962Spjd struct vnode *a_vp; 4070168962Spjd int a_waitfor; 4071168962Spjd struct thread *a_td; 4072168962Spjd } */ *ap; 4073168962Spjd{ 4074168962Spjd 4075168962Spjd vop_stdfsync(ap); 4076185029Spjd return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); 4077168962Spjd} 4078168962Spjd 4079168962Spjdstatic int 4080168962Spjdzfs_freebsd_getattr(ap) 4081168962Spjd struct vop_getattr_args /* { 4082168962Spjd struct vnode *a_vp; 4083168962Spjd struct vattr *a_vap; 4084168962Spjd struct ucred *a_cred; 4085185029Spjd struct thread *a_td; 4086168962Spjd } */ *ap; 4087168962Spjd{ 4088185029Spjd vattr_t *vap = ap->a_vap; 4089185029Spjd xvattr_t xvap; 4090185029Spjd u_long fflags = 0; 4091185029Spjd int error; 4092168962Spjd 4093185029Spjd xva_init(&xvap); 4094185029Spjd xvap.xva_vattr = *vap; 4095185029Spjd xvap.xva_vattr.va_mask |= AT_XVATTR; 4096185029Spjd 4097185029Spjd /* Convert chflags into ZFS-type flags. */ 4098185029Spjd /* XXX: what about SF_SETTABLE?. */ 4099185029Spjd XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 4100185029Spjd XVA_SET_REQ(&xvap, XAT_APPENDONLY); 4101185029Spjd XVA_SET_REQ(&xvap, XAT_NOUNLINK); 4102185029Spjd XVA_SET_REQ(&xvap, XAT_NODUMP); 4103185029Spjd error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 4104185029Spjd if (error != 0) 4105185029Spjd return (error); 4106185029Spjd 4107185029Spjd /* Convert ZFS xattr into chflags. */ 4108185029Spjd#define FLAG_CHECK(fflag, xflag, xfield) do { \ 4109185029Spjd if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 4110185029Spjd fflags |= (fflag); \ 4111185029Spjd} while (0) 4112185029Spjd FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 4113185029Spjd xvap.xva_xoptattrs.xoa_immutable); 4114185029Spjd FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 4115185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 4116185029Spjd FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 4117185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 4118185029Spjd FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 4119185029Spjd xvap.xva_xoptattrs.xoa_nodump); 4120185029Spjd#undef FLAG_CHECK 4121185029Spjd *vap = xvap.xva_vattr; 4122185029Spjd vap->va_flags = fflags; 4123185029Spjd return (0); 4124168962Spjd} 4125168962Spjd 4126168962Spjdstatic int 4127168962Spjdzfs_freebsd_setattr(ap) 4128168962Spjd struct vop_setattr_args /* { 4129168962Spjd struct vnode *a_vp; 4130168962Spjd struct vattr *a_vap; 4131168962Spjd struct ucred *a_cred; 4132185029Spjd struct thread *a_td; 4133168962Spjd } */ *ap; 4134168962Spjd{ 4135168962Spjd vattr_t *vap = ap->a_vap; 4136185029Spjd xvattr_t xvap; 4137185029Spjd u_long fflags; 4138185029Spjd uint64_t zflags; 4139168962Spjd 4140168962Spjd vattr_init_mask(vap); 4141170044Spjd vap->va_mask &= ~AT_NOSET; 4142168962Spjd 4143185029Spjd xva_init(&xvap); 4144185029Spjd xvap.xva_vattr = *vap; 4145185029Spjd 4146185029Spjd if (vap->va_flags != VNOVAL) { 4147185029Spjd fflags = vap->va_flags; 4148185029Spjd if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_NODUMP)) != 0) 4149185029Spjd return (EOPNOTSUPP); 4150185029Spjd zflags = VTOZ(ap->a_vp)->z_phys->zp_flags; 4151185029Spjd 4152185029Spjd#define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 4153185029Spjd if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 4154185029Spjd ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 4155185029Spjd XVA_SET_REQ(&xvap, (xflag)); \ 4156185029Spjd (xfield) = ((fflags & (fflag)) != 0); \ 4157185029Spjd } \ 4158185029Spjd} while (0) 4159185029Spjd /* Convert chflags into ZFS-type flags. */ 4160185029Spjd /* XXX: what about SF_SETTABLE?. */ 4161185029Spjd FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 4162185029Spjd xvap.xva_xoptattrs.xoa_immutable); 4163185029Spjd FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 4164185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 4165185029Spjd FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 4166185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 4167185029Spjd FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 4168185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 4169185029Spjd#undef FLAG_CHANGE 4170185029Spjd } 4171185029Spjd return (zfs_setattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL)); 4172168962Spjd} 4173168962Spjd 4174168962Spjdstatic int 4175168962Spjdzfs_freebsd_rename(ap) 4176168962Spjd struct vop_rename_args /* { 4177168962Spjd struct vnode *a_fdvp; 4178168962Spjd struct vnode *a_fvp; 4179168962Spjd struct componentname *a_fcnp; 4180168962Spjd struct vnode *a_tdvp; 4181168962Spjd struct vnode *a_tvp; 4182168962Spjd struct componentname *a_tcnp; 4183168962Spjd } */ *ap; 4184168962Spjd{ 4185168962Spjd vnode_t *fdvp = ap->a_fdvp; 4186168962Spjd vnode_t *fvp = ap->a_fvp; 4187168962Spjd vnode_t *tdvp = ap->a_tdvp; 4188168962Spjd vnode_t *tvp = ap->a_tvp; 4189168962Spjd int error; 4190168962Spjd 4191168962Spjd ASSERT(ap->a_fcnp->cn_flags & SAVENAME); 4192168962Spjd ASSERT(ap->a_tcnp->cn_flags & SAVENAME); 4193168962Spjd 4194168962Spjd error = zfs_rename(fdvp, ap->a_fcnp->cn_nameptr, tdvp, 4195185029Spjd ap->a_tcnp->cn_nameptr, ap->a_fcnp->cn_cred, NULL, 0); 4196168962Spjd 4197168962Spjd if (tdvp == tvp) 4198168962Spjd VN_RELE(tdvp); 4199168962Spjd else 4200168962Spjd VN_URELE(tdvp); 4201168962Spjd if (tvp) 4202168962Spjd VN_URELE(tvp); 4203168962Spjd VN_RELE(fdvp); 4204168962Spjd VN_RELE(fvp); 4205168962Spjd 4206168962Spjd return (error); 4207168962Spjd} 4208168962Spjd 4209168962Spjdstatic int 4210168962Spjdzfs_freebsd_symlink(ap) 4211168962Spjd struct vop_symlink_args /* { 4212168962Spjd struct vnode *a_dvp; 4213168962Spjd struct vnode **a_vpp; 4214168962Spjd struct componentname *a_cnp; 4215168962Spjd struct vattr *a_vap; 4216168962Spjd char *a_target; 4217168962Spjd } */ *ap; 4218168962Spjd{ 4219168962Spjd struct componentname *cnp = ap->a_cnp; 4220168962Spjd vattr_t *vap = ap->a_vap; 4221168962Spjd 4222168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 4223168962Spjd 4224168962Spjd vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 4225168962Spjd vattr_init_mask(vap); 4226168962Spjd 4227168962Spjd return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 4228168962Spjd ap->a_target, cnp->cn_cred, cnp->cn_thread)); 4229168962Spjd} 4230168962Spjd 4231168962Spjdstatic int 4232168962Spjdzfs_freebsd_readlink(ap) 4233168962Spjd struct vop_readlink_args /* { 4234168962Spjd struct vnode *a_vp; 4235168962Spjd struct uio *a_uio; 4236168962Spjd struct ucred *a_cred; 4237168962Spjd } */ *ap; 4238168962Spjd{ 4239168962Spjd 4240185029Spjd return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 4241168962Spjd} 4242168962Spjd 4243168962Spjdstatic int 4244168962Spjdzfs_freebsd_link(ap) 4245168962Spjd struct vop_link_args /* { 4246168962Spjd struct vnode *a_tdvp; 4247168962Spjd struct vnode *a_vp; 4248168962Spjd struct componentname *a_cnp; 4249168962Spjd } */ *ap; 4250168962Spjd{ 4251168962Spjd struct componentname *cnp = ap->a_cnp; 4252168962Spjd 4253168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 4254168962Spjd 4255185029Spjd return (zfs_link(ap->a_tdvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); 4256168962Spjd} 4257168962Spjd 4258168962Spjdstatic int 4259168962Spjdzfs_freebsd_inactive(ap) 4260169170Spjd struct vop_inactive_args /* { 4261169170Spjd struct vnode *a_vp; 4262169170Spjd struct thread *a_td; 4263169170Spjd } */ *ap; 4264168962Spjd{ 4265168962Spjd vnode_t *vp = ap->a_vp; 4266168962Spjd 4267185029Spjd zfs_inactive(vp, ap->a_td->td_ucred, NULL); 4268168962Spjd return (0); 4269168962Spjd} 4270168962Spjd 4271185029Spjdstatic void 4272185029Spjdzfs_reclaim_complete(void *arg, int pending) 4273185029Spjd{ 4274185029Spjd znode_t *zp = arg; 4275185029Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4276185029Spjd 4277185029Spjd ZFS_LOG(1, "zp=%p", zp); 4278185029Spjd ZFS_OBJ_HOLD_ENTER(zfsvfs, zp->z_id); 4279185029Spjd zfs_znode_dmu_fini(zp); 4280185029Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); 4281185029Spjd zfs_znode_free(zp); 4282185029Spjd} 4283185029Spjd 4284168962Spjdstatic int 4285168962Spjdzfs_freebsd_reclaim(ap) 4286168962Spjd struct vop_reclaim_args /* { 4287168962Spjd struct vnode *a_vp; 4288168962Spjd struct thread *a_td; 4289168962Spjd } */ *ap; 4290168962Spjd{ 4291169170Spjd vnode_t *vp = ap->a_vp; 4292168962Spjd znode_t *zp = VTOZ(vp); 4293169025Spjd zfsvfs_t *zfsvfs; 4294168962Spjd 4295169025Spjd ASSERT(zp != NULL); 4296169025Spjd 4297168962Spjd /* 4298168962Spjd * Destroy the vm object and flush associated pages. 4299168962Spjd */ 4300168962Spjd vnode_destroy_vobject(vp); 4301169025Spjd 4302169025Spjd mutex_enter(&zp->z_lock); 4303169025Spjd ASSERT(zp->z_phys); 4304185029Spjd ZTOV(zp) = NULL; 4305169025Spjd if (!zp->z_unlinked) { 4306185029Spjd int locked; 4307185029Spjd 4308185029Spjd zfsvfs = zp->z_zfsvfs; 4309169025Spjd mutex_exit(&zp->z_lock); 4310185029Spjd locked = MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)) ? 2 : 4311185029Spjd ZFS_OBJ_HOLD_TRYENTER(zfsvfs, zp->z_id); 4312185029Spjd if (locked == 0) { 4313185029Spjd /* 4314185029Spjd * Lock can't be obtained due to deadlock possibility, 4315185029Spjd * so defer znode destruction. 4316185029Spjd */ 4317185029Spjd TASK_INIT(&zp->z_task, 0, zfs_reclaim_complete, zp); 4318185029Spjd taskqueue_enqueue(taskqueue_thread, &zp->z_task); 4319185029Spjd } else { 4320185029Spjd zfs_znode_dmu_fini(zp); 4321185029Spjd if (locked == 1) 4322185029Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); 4323185029Spjd zfs_znode_free(zp); 4324185029Spjd } 4325169025Spjd } else { 4326169025Spjd mutex_exit(&zp->z_lock); 4327169025Spjd } 4328168962Spjd VI_LOCK(vp); 4329168962Spjd vp->v_data = NULL; 4330171567Spjd ASSERT(vp->v_holdcnt >= 1); 4331171316Sdfr VI_UNLOCK(vp); 4332168962Spjd return (0); 4333168962Spjd} 4334168962Spjd 4335168962Spjdstatic int 4336168962Spjdzfs_freebsd_fid(ap) 4337168962Spjd struct vop_fid_args /* { 4338168962Spjd struct vnode *a_vp; 4339168962Spjd struct fid *a_fid; 4340168962Spjd } */ *ap; 4341168962Spjd{ 4342168962Spjd 4343185029Spjd return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 4344168962Spjd} 4345168962Spjd 4346168962Spjdstatic int 4347168962Spjdzfs_freebsd_pathconf(ap) 4348168962Spjd struct vop_pathconf_args /* { 4349168962Spjd struct vnode *a_vp; 4350168962Spjd int a_name; 4351168962Spjd register_t *a_retval; 4352168962Spjd } */ *ap; 4353168962Spjd{ 4354168962Spjd ulong_t val; 4355168962Spjd int error; 4356168962Spjd 4357185029Spjd error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); 4358168962Spjd if (error == 0) 4359168962Spjd *ap->a_retval = val; 4360168962Spjd else if (error == EOPNOTSUPP) 4361168962Spjd error = vop_stdpathconf(ap); 4362168962Spjd return (error); 4363168962Spjd} 4364168962Spjd 4365185029Spjd/* 4366185029Spjd * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 4367185029Spjd * extended attribute name: 4368185029Spjd * 4369185029Spjd * NAMESPACE PREFIX 4370185029Spjd * system freebsd:system: 4371185029Spjd * user (none, can be used to access ZFS fsattr(5) attributes 4372185029Spjd * created on Solaris) 4373185029Spjd */ 4374185029Spjdstatic int 4375185029Spjdzfs_create_attrname(int attrnamespace, const char *name, char *attrname, 4376185029Spjd size_t size) 4377185029Spjd{ 4378185029Spjd const char *namespace, *prefix, *suffix; 4379185029Spjd 4380185029Spjd /* We don't allow '/' character in attribute name. */ 4381185029Spjd if (strchr(name, '/') != NULL) 4382185029Spjd return (EINVAL); 4383185029Spjd /* We don't allow attribute names that start with "freebsd:" string. */ 4384185029Spjd if (strncmp(name, "freebsd:", 8) == 0) 4385185029Spjd return (EINVAL); 4386185029Spjd 4387185029Spjd bzero(attrname, size); 4388185029Spjd 4389185029Spjd switch (attrnamespace) { 4390185029Spjd case EXTATTR_NAMESPACE_USER: 4391185029Spjd#if 0 4392185029Spjd prefix = "freebsd:"; 4393185029Spjd namespace = EXTATTR_NAMESPACE_USER_STRING; 4394185029Spjd suffix = ":"; 4395185029Spjd#else 4396185029Spjd /* 4397185029Spjd * This is the default namespace by which we can access all 4398185029Spjd * attributes created on Solaris. 4399185029Spjd */ 4400185029Spjd prefix = namespace = suffix = ""; 4401185029Spjd#endif 4402185029Spjd break; 4403185029Spjd case EXTATTR_NAMESPACE_SYSTEM: 4404185029Spjd prefix = "freebsd:"; 4405185029Spjd namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 4406185029Spjd suffix = ":"; 4407185029Spjd break; 4408185029Spjd case EXTATTR_NAMESPACE_EMPTY: 4409185029Spjd default: 4410185029Spjd return (EINVAL); 4411185029Spjd } 4412185029Spjd if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 4413185029Spjd name) >= size) { 4414185029Spjd return (ENAMETOOLONG); 4415185029Spjd } 4416185029Spjd return (0); 4417185029Spjd} 4418185029Spjd 4419185029Spjd/* 4420185029Spjd * Vnode operating to retrieve a named extended attribute. 4421185029Spjd */ 4422185029Spjdstatic int 4423185029Spjdzfs_getextattr(struct vop_getextattr_args *ap) 4424185029Spjd/* 4425185029Spjdvop_getextattr { 4426185029Spjd IN struct vnode *a_vp; 4427185029Spjd IN int a_attrnamespace; 4428185029Spjd IN const char *a_name; 4429185029Spjd INOUT struct uio *a_uio; 4430185029Spjd OUT size_t *a_size; 4431185029Spjd IN struct ucred *a_cred; 4432185029Spjd IN struct thread *a_td; 4433185029Spjd}; 4434185029Spjd*/ 4435185029Spjd{ 4436185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 4437185029Spjd struct thread *td = ap->a_td; 4438185029Spjd struct nameidata nd; 4439185029Spjd char attrname[255]; 4440185029Spjd struct vattr va; 4441185029Spjd vnode_t *xvp = NULL, *vp; 4442185029Spjd int error, flags; 4443185029Spjd 4444185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 4445185029Spjd sizeof(attrname)); 4446185029Spjd if (error != 0) 4447185029Spjd return (error); 4448185029Spjd 4449185029Spjd ZFS_ENTER(zfsvfs); 4450185029Spjd 4451185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 4452185029Spjd LOOKUP_XATTR); 4453185029Spjd if (error != 0) { 4454185029Spjd ZFS_EXIT(zfsvfs); 4455185029Spjd return (error); 4456185029Spjd } 4457185029Spjd 4458185029Spjd flags = FREAD; 4459185029Spjd NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, attrname, 4460185029Spjd xvp, td); 4461185029Spjd error = vn_open_cred(&nd, &flags, 0, ap->a_cred, NULL); 4462185029Spjd vp = nd.ni_vp; 4463185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 4464185029Spjd if (error != 0) { 4465185029Spjd ZFS_EXIT(zfsvfs); 4466185029Spjd return (error); 4467185029Spjd } 4468185029Spjd 4469185029Spjd if (ap->a_size != NULL) { 4470185029Spjd error = VOP_GETATTR(vp, &va, ap->a_cred); 4471185029Spjd if (error == 0) 4472185029Spjd *ap->a_size = (size_t)va.va_size; 4473185029Spjd } else if (ap->a_uio != NULL) 4474185029Spjd error = VOP_READ(vp, ap->a_uio, IO_UNIT | IO_SYNC, ap->a_cred); 4475185029Spjd 4476185029Spjd VOP_UNLOCK(vp, 0); 4477185029Spjd vn_close(vp, flags, ap->a_cred, td); 4478185029Spjd ZFS_EXIT(zfsvfs); 4479185029Spjd 4480185029Spjd return (error); 4481185029Spjd} 4482185029Spjd 4483185029Spjd/* 4484185029Spjd * Vnode operation to remove a named attribute. 4485185029Spjd */ 4486185029Spjdint 4487185029Spjdzfs_deleteextattr(struct vop_deleteextattr_args *ap) 4488185029Spjd/* 4489185029Spjdvop_deleteextattr { 4490185029Spjd IN struct vnode *a_vp; 4491185029Spjd IN int a_attrnamespace; 4492185029Spjd IN const char *a_name; 4493185029Spjd IN struct ucred *a_cred; 4494185029Spjd IN struct thread *a_td; 4495185029Spjd}; 4496185029Spjd*/ 4497185029Spjd{ 4498185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 4499185029Spjd struct thread *td = ap->a_td; 4500185029Spjd struct nameidata nd; 4501185029Spjd char attrname[255]; 4502185029Spjd struct vattr va; 4503185029Spjd vnode_t *xvp = NULL, *vp; 4504185029Spjd int error, flags; 4505185029Spjd 4506185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 4507185029Spjd sizeof(attrname)); 4508185029Spjd if (error != 0) 4509185029Spjd return (error); 4510185029Spjd 4511185029Spjd ZFS_ENTER(zfsvfs); 4512185029Spjd 4513185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 4514185029Spjd LOOKUP_XATTR); 4515185029Spjd if (error != 0) { 4516185029Spjd ZFS_EXIT(zfsvfs); 4517185029Spjd return (error); 4518185029Spjd } 4519185029Spjd 4520185029Spjd NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF | MPSAFE, 4521185029Spjd UIO_SYSSPACE, attrname, xvp, td); 4522185029Spjd error = namei(&nd); 4523185029Spjd vp = nd.ni_vp; 4524185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 4525185029Spjd if (error != 0) { 4526185029Spjd ZFS_EXIT(zfsvfs); 4527185029Spjd return (error); 4528185029Spjd } 4529185029Spjd VOP_LEASE(nd.ni_dvp, td, ap->a_cred, LEASE_WRITE); 4530185029Spjd error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 4531185029Spjd 4532185029Spjd vput(nd.ni_dvp); 4533185029Spjd if (vp == nd.ni_dvp) 4534185029Spjd vrele(vp); 4535185029Spjd else 4536185029Spjd vput(vp); 4537185029Spjd ZFS_EXIT(zfsvfs); 4538185029Spjd 4539185029Spjd return (error); 4540185029Spjd} 4541185029Spjd 4542185029Spjd/* 4543185029Spjd * Vnode operation to set a named attribute. 4544185029Spjd */ 4545185029Spjdstatic int 4546185029Spjdzfs_setextattr(struct vop_setextattr_args *ap) 4547185029Spjd/* 4548185029Spjdvop_setextattr { 4549185029Spjd IN struct vnode *a_vp; 4550185029Spjd IN int a_attrnamespace; 4551185029Spjd IN const char *a_name; 4552185029Spjd INOUT struct uio *a_uio; 4553185029Spjd IN struct ucred *a_cred; 4554185029Spjd IN struct thread *a_td; 4555185029Spjd}; 4556185029Spjd*/ 4557185029Spjd{ 4558185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 4559185029Spjd struct thread *td = ap->a_td; 4560185029Spjd struct nameidata nd; 4561185029Spjd char attrname[255]; 4562185029Spjd struct vattr va; 4563185029Spjd vnode_t *xvp = NULL, *vp; 4564185029Spjd int error, flags; 4565185029Spjd 4566185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 4567185029Spjd sizeof(attrname)); 4568185029Spjd if (error != 0) 4569185029Spjd return (error); 4570185029Spjd 4571185029Spjd ZFS_ENTER(zfsvfs); 4572185029Spjd 4573185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 4574185029Spjd LOOKUP_XATTR); 4575185029Spjd if (error != 0) { 4576185029Spjd ZFS_EXIT(zfsvfs); 4577185029Spjd return (error); 4578185029Spjd } 4579185029Spjd 4580185029Spjd flags = FFLAGS(O_WRONLY | O_CREAT); 4581185029Spjd NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, attrname, 4582185029Spjd xvp, td); 4583185029Spjd error = vn_open_cred(&nd, &flags, 0600, ap->a_cred, NULL); 4584185029Spjd vp = nd.ni_vp; 4585185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 4586185029Spjd if (error != 0) { 4587185029Spjd ZFS_EXIT(zfsvfs); 4588185029Spjd return (error); 4589185029Spjd } 4590185029Spjd 4591185029Spjd VOP_LEASE(vp, td, ap->a_cred, LEASE_WRITE); 4592185029Spjd VATTR_NULL(&va); 4593185029Spjd va.va_size = 0; 4594185029Spjd error = VOP_SETATTR(vp, &va, ap->a_cred); 4595185029Spjd if (error == 0) 4596185029Spjd VOP_WRITE(vp, ap->a_uio, IO_UNIT | IO_SYNC, ap->a_cred); 4597185029Spjd 4598185029Spjd VOP_UNLOCK(vp, 0); 4599185029Spjd vn_close(vp, flags, ap->a_cred, td); 4600185029Spjd ZFS_EXIT(zfsvfs); 4601185029Spjd 4602185029Spjd return (error); 4603185029Spjd} 4604185029Spjd 4605185029Spjd/* 4606185029Spjd * Vnode operation to retrieve extended attributes on a vnode. 4607185029Spjd */ 4608185029Spjdstatic int 4609185029Spjdzfs_listextattr(struct vop_listextattr_args *ap) 4610185029Spjd/* 4611185029Spjdvop_listextattr { 4612185029Spjd IN struct vnode *a_vp; 4613185029Spjd IN int a_attrnamespace; 4614185029Spjd INOUT struct uio *a_uio; 4615185029Spjd OUT size_t *a_size; 4616185029Spjd IN struct ucred *a_cred; 4617185029Spjd IN struct thread *a_td; 4618185029Spjd}; 4619185029Spjd*/ 4620185029Spjd{ 4621185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 4622185029Spjd struct thread *td = ap->a_td; 4623185029Spjd struct nameidata nd; 4624185029Spjd char attrprefix[16]; 4625185029Spjd u_char dirbuf[sizeof(struct dirent)]; 4626185029Spjd struct dirent *dp; 4627185029Spjd struct iovec aiov; 4628185029Spjd struct uio auio, *uio = ap->a_uio; 4629185029Spjd size_t *sizep = ap->a_size; 4630185029Spjd size_t plen; 4631185029Spjd vnode_t *xvp = NULL, *vp; 4632185029Spjd int done, error, eof, pos; 4633185029Spjd 4634185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 4635185029Spjd sizeof(attrprefix)); 4636185029Spjd if (error != 0) 4637185029Spjd return (error); 4638185029Spjd plen = strlen(attrprefix); 4639185029Spjd 4640185029Spjd ZFS_ENTER(zfsvfs); 4641185029Spjd 4642185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 4643185029Spjd LOOKUP_XATTR); 4644185029Spjd if (error != 0) { 4645185029Spjd ZFS_EXIT(zfsvfs); 4646185029Spjd return (error); 4647185029Spjd } 4648185029Spjd 4649185029Spjd NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE, UIO_SYSSPACE, 4650185029Spjd ".", xvp, td); 4651185029Spjd error = namei(&nd); 4652185029Spjd vp = nd.ni_vp; 4653185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 4654185029Spjd if (error != 0) { 4655185029Spjd ZFS_EXIT(zfsvfs); 4656185029Spjd return (error); 4657185029Spjd } 4658185029Spjd 4659185029Spjd auio.uio_iov = &aiov; 4660185029Spjd auio.uio_iovcnt = 1; 4661185029Spjd auio.uio_segflg = UIO_SYSSPACE; 4662185029Spjd auio.uio_td = td; 4663185029Spjd auio.uio_rw = UIO_READ; 4664185029Spjd auio.uio_offset = 0; 4665185029Spjd 4666185029Spjd if (sizep != NULL) 4667185029Spjd *sizep = 0; 4668185029Spjd 4669185029Spjd do { 4670185029Spjd u_char nlen; 4671185029Spjd 4672185029Spjd aiov.iov_base = (void *)dirbuf; 4673185029Spjd aiov.iov_len = sizeof(dirbuf); 4674185029Spjd auio.uio_resid = sizeof(dirbuf); 4675185029Spjd error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 4676185029Spjd done = sizeof(dirbuf) - auio.uio_resid; 4677185029Spjd if (error != 0) 4678185029Spjd break; 4679185029Spjd for (pos = 0; pos < done;) { 4680185029Spjd dp = (struct dirent *)(dirbuf + pos); 4681185029Spjd pos += dp->d_reclen; 4682185029Spjd /* 4683185029Spjd * XXX: Temporarily we also accept DT_UNKNOWN, as this 4684185029Spjd * is what we get when attribute was created on Solaris. 4685185029Spjd */ 4686185029Spjd if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 4687185029Spjd continue; 4688185029Spjd if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) 4689185029Spjd continue; 4690185029Spjd else if (strncmp(dp->d_name, attrprefix, plen) != 0) 4691185029Spjd continue; 4692185029Spjd nlen = dp->d_namlen - plen; 4693185029Spjd if (sizep != NULL) 4694185029Spjd *sizep += 1 + nlen; 4695185029Spjd else if (uio != NULL) { 4696185029Spjd /* 4697185029Spjd * Format of extattr name entry is one byte for 4698185029Spjd * length and the rest for name. 4699185029Spjd */ 4700185029Spjd error = uiomove(&nlen, 1, uio->uio_rw, uio); 4701185029Spjd if (error == 0) { 4702185029Spjd error = uiomove(dp->d_name + plen, nlen, 4703185029Spjd uio->uio_rw, uio); 4704185029Spjd } 4705185029Spjd if (error != 0) 4706185029Spjd break; 4707185029Spjd } 4708185029Spjd } 4709185029Spjd } while (!eof && error == 0); 4710185029Spjd 4711185029Spjd vput(vp); 4712185029Spjd ZFS_EXIT(zfsvfs); 4713185029Spjd 4714185029Spjd return (error); 4715185029Spjd} 4716185029Spjd 4717168404Spjdstruct vop_vector zfs_vnodeops; 4718168404Spjdstruct vop_vector zfs_fifoops; 4719168404Spjd 4720168404Spjdstruct vop_vector zfs_vnodeops = { 4721185029Spjd .vop_default = &default_vnodeops, 4722185029Spjd .vop_inactive = zfs_freebsd_inactive, 4723185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 4724185029Spjd .vop_access = zfs_freebsd_access, 4725168404Spjd#ifdef FREEBSD_NAMECACHE 4726185029Spjd .vop_lookup = vfs_cache_lookup, 4727185029Spjd .vop_cachedlookup = zfs_freebsd_lookup, 4728168404Spjd#else 4729185029Spjd .vop_lookup = zfs_freebsd_lookup, 4730168404Spjd#endif 4731185029Spjd .vop_getattr = zfs_freebsd_getattr, 4732185029Spjd .vop_setattr = zfs_freebsd_setattr, 4733185029Spjd .vop_create = zfs_freebsd_create, 4734185029Spjd .vop_mknod = zfs_freebsd_create, 4735185029Spjd .vop_mkdir = zfs_freebsd_mkdir, 4736185029Spjd .vop_readdir = zfs_freebsd_readdir, 4737185029Spjd .vop_fsync = zfs_freebsd_fsync, 4738185029Spjd .vop_open = zfs_freebsd_open, 4739185029Spjd .vop_close = zfs_freebsd_close, 4740185029Spjd .vop_rmdir = zfs_freebsd_rmdir, 4741185029Spjd .vop_ioctl = zfs_freebsd_ioctl, 4742185029Spjd .vop_link = zfs_freebsd_link, 4743185029Spjd .vop_symlink = zfs_freebsd_symlink, 4744185029Spjd .vop_readlink = zfs_freebsd_readlink, 4745185029Spjd .vop_read = zfs_freebsd_read, 4746185029Spjd .vop_write = zfs_freebsd_write, 4747185029Spjd .vop_remove = zfs_freebsd_remove, 4748185029Spjd .vop_rename = zfs_freebsd_rename, 4749185029Spjd .vop_pathconf = zfs_freebsd_pathconf, 4750185029Spjd .vop_bmap = VOP_EOPNOTSUPP, 4751185029Spjd .vop_fid = zfs_freebsd_fid, 4752185029Spjd .vop_getextattr = zfs_getextattr, 4753185029Spjd .vop_deleteextattr = zfs_deleteextattr, 4754185029Spjd .vop_setextattr = zfs_setextattr, 4755185029Spjd .vop_listextattr = zfs_listextattr, 4756168404Spjd}; 4757168404Spjd 4758169170Spjdstruct vop_vector zfs_fifoops = { 4759185029Spjd .vop_default = &fifo_specops, 4760185029Spjd .vop_fsync = VOP_PANIC, 4761185029Spjd .vop_access = zfs_freebsd_access, 4762185029Spjd .vop_getattr = zfs_freebsd_getattr, 4763185029Spjd .vop_inactive = zfs_freebsd_inactive, 4764185029Spjd .vop_read = VOP_PANIC, 4765185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 4766185029Spjd .vop_setattr = zfs_freebsd_setattr, 4767185029Spjd .vop_write = VOP_PANIC, 4768185029Spjd .vop_fid = zfs_freebsd_fid, 4769168404Spjd}; 4770