zfs_vnops.c revision 177230
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22168404Spjd * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23168404Spjd * Use is subject to license terms. 24168404Spjd */ 25168404Spjd 26169195Spjd/* Portions Copyright 2007 Jeremy Teo */ 27169195Spjd 28168404Spjd#pragma ident "%Z%%M% %I% %E% SMI" 29168404Spjd 30168404Spjd#include <sys/types.h> 31168404Spjd#include <sys/param.h> 32168404Spjd#include <sys/time.h> 33168404Spjd#include <sys/systm.h> 34168404Spjd#include <sys/sysmacros.h> 35168404Spjd#include <sys/resource.h> 36168404Spjd#include <sys/vfs.h> 37168404Spjd#include <sys/vnode.h> 38168404Spjd#include <sys/file.h> 39168404Spjd#include <sys/stat.h> 40168404Spjd#include <sys/kmem.h> 41168404Spjd#include <sys/taskq.h> 42168404Spjd#include <sys/uio.h> 43168404Spjd#include <sys/atomic.h> 44168404Spjd#include <sys/namei.h> 45168404Spjd#include <sys/mman.h> 46168404Spjd#include <sys/cmn_err.h> 47168404Spjd#include <sys/errno.h> 48168404Spjd#include <sys/unistd.h> 49168404Spjd#include <sys/zfs_vfsops.h> 50168404Spjd#include <sys/zfs_dir.h> 51168404Spjd#include <sys/zfs_acl.h> 52168404Spjd#include <sys/zfs_ioctl.h> 53168404Spjd#include <sys/fs/zfs.h> 54168404Spjd#include <sys/dmu.h> 55168404Spjd#include <sys/spa.h> 56168404Spjd#include <sys/txg.h> 57168404Spjd#include <sys/dbuf.h> 58168404Spjd#include <sys/zap.h> 59168404Spjd#include <sys/dirent.h> 60168962Spjd#include <sys/policy.h> 61168962Spjd#include <sys/sunddi.h> 62168404Spjd#include <sys/filio.h> 63168404Spjd#include <sys/zfs_ctldir.h> 64168404Spjd#include <sys/dnlc.h> 65168404Spjd#include <sys/zfs_rlock.h> 66168404Spjd#include <sys/bio.h> 67168404Spjd#include <sys/buf.h> 68168404Spjd#include <sys/sf_buf.h> 69168404Spjd#include <sys/sched.h> 70168404Spjd 71168404Spjd/* 72168404Spjd * Programming rules. 73168404Spjd * 74168404Spjd * Each vnode op performs some logical unit of work. To do this, the ZPL must 75168404Spjd * properly lock its in-core state, create a DMU transaction, do the work, 76168404Spjd * record this work in the intent log (ZIL), commit the DMU transaction, 77168404Spjd * and wait the the intent log to commit if it's is a synchronous operation. 78168404Spjd * Morover, the vnode ops must work in both normal and log replay context. 79168404Spjd * The ordering of events is important to avoid deadlocks and references 80168404Spjd * to freed memory. The example below illustrates the following Big Rules: 81168404Spjd * 82168404Spjd * (1) A check must be made in each zfs thread for a mounted file system. 83168404Spjd * This is done avoiding races using ZFS_ENTER(zfsvfs). 84168404Spjd * A ZFS_EXIT(zfsvfs) is needed before all returns. 85168404Spjd * 86168404Spjd * (2) VN_RELE() should always be the last thing except for zil_commit() 87168404Spjd * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 88168404Spjd * First, if it's the last reference, the vnode/znode 89168404Spjd * can be freed, so the zp may point to freed memory. Second, the last 90168404Spjd * reference will call zfs_zinactive(), which may induce a lot of work -- 91168404Spjd * pushing cached pages (which acquires range locks) and syncing out 92168404Spjd * cached atime changes. Third, zfs_zinactive() may require a new tx, 93168404Spjd * which could deadlock the system if you were already holding one. 94168404Spjd * 95168404Spjd * (3) All range locks must be grabbed before calling dmu_tx_assign(), 96168404Spjd * as they can span dmu_tx_assign() calls. 97168404Spjd * 98168404Spjd * (4) Always pass zfsvfs->z_assign as the second argument to dmu_tx_assign(). 99168404Spjd * In normal operation, this will be TXG_NOWAIT. During ZIL replay, 100168404Spjd * it will be a specific txg. Either way, dmu_tx_assign() never blocks. 101168404Spjd * This is critical because we don't want to block while holding locks. 102168404Spjd * Note, in particular, that if a lock is sometimes acquired before 103168404Spjd * the tx assigns, and sometimes after (e.g. z_lock), then failing to 104168404Spjd * use a non-blocking assign can deadlock the system. The scenario: 105168404Spjd * 106168404Spjd * Thread A has grabbed a lock before calling dmu_tx_assign(). 107168404Spjd * Thread B is in an already-assigned tx, and blocks for this lock. 108168404Spjd * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 109168404Spjd * forever, because the previous txg can't quiesce until B's tx commits. 110168404Spjd * 111168404Spjd * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 112168404Spjd * then drop all locks, call dmu_tx_wait(), and try again. 113168404Spjd * 114168404Spjd * (5) If the operation succeeded, generate the intent log entry for it 115168404Spjd * before dropping locks. This ensures that the ordering of events 116168404Spjd * in the intent log matches the order in which they actually occurred. 117168404Spjd * 118168404Spjd * (6) At the end of each vnode op, the DMU tx must always commit, 119168404Spjd * regardless of whether there were any errors. 120168404Spjd * 121168404Spjd * (7) After dropping all locks, invoke zil_commit(zilog, seq, foid) 122168404Spjd * to ensure that synchronous semantics are provided when necessary. 123168404Spjd * 124168404Spjd * In general, this is how things should be ordered in each vnode op: 125168404Spjd * 126168404Spjd * ZFS_ENTER(zfsvfs); // exit if unmounted 127168404Spjd * top: 128168404Spjd * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 129168404Spjd * rw_enter(...); // grab any other locks you need 130168404Spjd * tx = dmu_tx_create(...); // get DMU tx 131168404Spjd * dmu_tx_hold_*(); // hold each object you might modify 132168404Spjd * error = dmu_tx_assign(tx, zfsvfs->z_assign); // try to assign 133168404Spjd * if (error) { 134168404Spjd * rw_exit(...); // drop locks 135168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 136168404Spjd * VN_RELE(...); // release held vnodes 137168404Spjd * if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { 138168404Spjd * dmu_tx_wait(tx); 139168404Spjd * dmu_tx_abort(tx); 140168404Spjd * goto top; 141168404Spjd * } 142168404Spjd * dmu_tx_abort(tx); // abort DMU tx 143168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 144168404Spjd * return (error); // really out of space 145168404Spjd * } 146168404Spjd * error = do_real_work(); // do whatever this VOP does 147168404Spjd * if (error == 0) 148168404Spjd * zfs_log_*(...); // on success, make ZIL entry 149168404Spjd * dmu_tx_commit(tx); // commit DMU tx -- error or not 150168404Spjd * rw_exit(...); // drop locks 151168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 152168404Spjd * VN_RELE(...); // release held vnodes 153168404Spjd * zil_commit(zilog, seq, foid); // synchronous when necessary 154168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 155168404Spjd * return (error); // done, report error 156168404Spjd */ 157168404Spjd/* ARGSUSED */ 158168404Spjdstatic int 159168962Spjdzfs_open(vnode_t **vpp, int flag, cred_t *cr) 160168404Spjd{ 161168962Spjd znode_t *zp = VTOZ(*vpp); 162168404Spjd 163168404Spjd /* Keep a count of the synchronous opens in the znode */ 164168962Spjd if (flag & (FSYNC | FDSYNC)) 165168404Spjd atomic_inc_32(&zp->z_sync_cnt); 166168404Spjd return (0); 167168404Spjd} 168168404Spjd 169168404Spjd/* ARGSUSED */ 170168404Spjdstatic int 171168962Spjdzfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr) 172168404Spjd{ 173168962Spjd znode_t *zp = VTOZ(vp); 174168404Spjd 175168404Spjd /* Decrement the synchronous opens in the znode */ 176168962Spjd if (flag & (FSYNC | FDSYNC)) 177168404Spjd atomic_dec_32(&zp->z_sync_cnt); 178168404Spjd 179168962Spjd /* 180168962Spjd * Clean up any locks held by this process on the vp. 181168962Spjd */ 182168962Spjd cleanlocks(vp, ddi_get_pid(), 0); 183168962Spjd cleanshares(vp, ddi_get_pid()); 184168962Spjd 185168404Spjd return (0); 186168404Spjd} 187168404Spjd 188168404Spjd/* 189168404Spjd * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 190168404Spjd * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 191168404Spjd */ 192168404Spjdstatic int 193168978Spjdzfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 194168404Spjd{ 195168404Spjd znode_t *zp = VTOZ(vp); 196168404Spjd uint64_t noff = (uint64_t)*off; /* new offset */ 197168404Spjd uint64_t file_sz; 198168404Spjd int error; 199168404Spjd boolean_t hole; 200168404Spjd 201168404Spjd file_sz = zp->z_phys->zp_size; 202168404Spjd if (noff >= file_sz) { 203168404Spjd return (ENXIO); 204168404Spjd } 205168404Spjd 206168962Spjd if (cmd == _FIO_SEEK_HOLE) 207168404Spjd hole = B_TRUE; 208168404Spjd else 209168404Spjd hole = B_FALSE; 210168404Spjd 211168404Spjd error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 212168404Spjd 213168404Spjd /* end of file? */ 214168404Spjd if ((error == ESRCH) || (noff > file_sz)) { 215168404Spjd /* 216168404Spjd * Handle the virtual hole at the end of file. 217168404Spjd */ 218168404Spjd if (hole) { 219168404Spjd *off = file_sz; 220168404Spjd return (0); 221168404Spjd } 222168404Spjd return (ENXIO); 223168404Spjd } 224168404Spjd 225168404Spjd if (noff < *off) 226168404Spjd return (error); 227168404Spjd *off = noff; 228168404Spjd return (error); 229168404Spjd} 230168404Spjd 231168404Spjd/* ARGSUSED */ 232168404Spjdstatic int 233168978Spjdzfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 234168962Spjd int *rvalp) 235168404Spjd{ 236168962Spjd offset_t off; 237168962Spjd int error; 238168962Spjd zfsvfs_t *zfsvfs; 239168404Spjd 240168404Spjd switch (com) { 241168962Spjd case _FIOFFS: 242168962Spjd return (0); 243168404Spjd 244168962Spjd /* 245168962Spjd * The following two ioctls are used by bfu. Faking out, 246168962Spjd * necessary to avoid bfu errors. 247168962Spjd */ 248168962Spjd case _FIOGDIO: 249168962Spjd case _FIOSDIO: 250168962Spjd return (0); 251168962Spjd 252168962Spjd case _FIO_SEEK_DATA: 253168962Spjd case _FIO_SEEK_HOLE: 254168962Spjd if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 255168962Spjd return (EFAULT); 256168962Spjd 257168404Spjd zfsvfs = VTOZ(vp)->z_zfsvfs; 258168404Spjd ZFS_ENTER(zfsvfs); 259168404Spjd 260168404Spjd /* offset parameter is in/out */ 261168404Spjd error = zfs_holey(vp, com, &off); 262168404Spjd ZFS_EXIT(zfsvfs); 263168404Spjd if (error) 264168404Spjd return (error); 265168962Spjd if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 266168962Spjd return (EFAULT); 267168404Spjd return (0); 268168404Spjd } 269168404Spjd return (ENOTTY); 270168404Spjd} 271168404Spjd 272168404Spjd/* 273168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 274168404Spjd * between the DMU cache and the memory mapped pages. What this means: 275168404Spjd * 276168404Spjd * On Write: If we find a memory mapped page, we write to *both* 277168404Spjd * the page and the dmu buffer. 278168404Spjd * 279168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 280168404Spjd * the file is memory mapped. 281168404Spjd */ 282168404Spjdstatic int 283168404Spjdmappedwrite(vnode_t *vp, int nbytes, uio_t *uio, dmu_tx_t *tx) 284168404Spjd{ 285168404Spjd znode_t *zp = VTOZ(vp); 286168404Spjd objset_t *os = zp->z_zfsvfs->z_os; 287168404Spjd vm_object_t obj; 288168404Spjd vm_page_t m; 289168404Spjd struct sf_buf *sf; 290168404Spjd int64_t start, off; 291168404Spjd int len = nbytes; 292168404Spjd int error = 0; 293169059Spjd uint64_t dirbytes; 294168404Spjd 295168404Spjd ASSERT(vp->v_mount != NULL); 296168404Spjd obj = vp->v_object; 297168404Spjd ASSERT(obj != NULL); 298168404Spjd 299168404Spjd start = uio->uio_loffset; 300168404Spjd off = start & PAGEOFFSET; 301169059Spjd dirbytes = 0; 302168404Spjd VM_OBJECT_LOCK(obj); 303168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 304168404Spjd uint64_t bytes = MIN(PAGESIZE - off, len); 305169059Spjd uint64_t fsize; 306168404Spjd 307168404Spjdagain: 308168404Spjd if ((m = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 309168404Spjd vm_page_is_valid(m, (vm_offset_t)off, bytes)) { 310169059Spjd uint64_t woff; 311168404Spjd caddr_t va; 312168404Spjd 313168404Spjd if (vm_page_sleep_if_busy(m, FALSE, "zfsmwb")) 314168404Spjd goto again; 315169059Spjd fsize = obj->un_pager.vnp.vnp_size; 316168404Spjd vm_page_busy(m); 317169057Spjd vm_page_lock_queues(); 318169057Spjd vm_page_undirty(m); 319169057Spjd vm_page_unlock_queues(); 320168404Spjd VM_OBJECT_UNLOCK(obj); 321169059Spjd if (dirbytes > 0) { 322169059Spjd error = dmu_write_uio(os, zp->z_id, uio, 323169059Spjd dirbytes, tx); 324169059Spjd dirbytes = 0; 325169059Spjd } 326169059Spjd if (error == 0) { 327169059Spjd sched_pin(); 328169059Spjd sf = sf_buf_alloc(m, SFB_CPUPRIVATE); 329169059Spjd va = (caddr_t)sf_buf_kva(sf); 330169059Spjd woff = uio->uio_loffset - off; 331169059Spjd error = uiomove(va + off, bytes, UIO_WRITE, uio); 332169167Spjd /* 333169167Spjd * The uiomove() above could have been partially 334169167Spjd * successful, that's why we call dmu_write() 335169167Spjd * below unconditionally. The page was marked 336169167Spjd * non-dirty above and we would lose the changes 337169167Spjd * without doing so. If the uiomove() failed 338169167Spjd * entirely, well, we just write what we got 339169167Spjd * before one more time. 340169167Spjd */ 341169059Spjd dmu_write(os, zp->z_id, woff, 342169059Spjd MIN(PAGESIZE, fsize - woff), va, tx); 343169059Spjd sf_buf_free(sf); 344169059Spjd sched_unpin(); 345169059Spjd } 346168404Spjd VM_OBJECT_LOCK(obj); 347168404Spjd vm_page_wakeup(m); 348168404Spjd } else { 349177230Spjd if (__predict_false(obj->cache != NULL)) { 350177230Spjd vm_page_cache_free(obj, OFF_TO_IDX(start), 351177230Spjd OFF_TO_IDX(start) + 1); 352177230Spjd } 353169059Spjd dirbytes += bytes; 354168404Spjd } 355168404Spjd len -= bytes; 356168404Spjd off = 0; 357168404Spjd if (error) 358168404Spjd break; 359168404Spjd } 360168404Spjd VM_OBJECT_UNLOCK(obj); 361169059Spjd if (error == 0 && dirbytes > 0) 362169059Spjd error = dmu_write_uio(os, zp->z_id, uio, dirbytes, tx); 363168404Spjd return (error); 364168404Spjd} 365168404Spjd 366168404Spjd/* 367168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 368168404Spjd * between the DMU cache and the memory mapped pages. What this means: 369168404Spjd * 370168404Spjd * On Read: We "read" preferentially from memory mapped pages, 371168404Spjd * else we default from the dmu buffer. 372168404Spjd * 373168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 374168404Spjd * the file is memory mapped. 375168404Spjd */ 376168404Spjdstatic int 377168404Spjdmappedread(vnode_t *vp, int nbytes, uio_t *uio) 378168404Spjd{ 379168404Spjd znode_t *zp = VTOZ(vp); 380168404Spjd objset_t *os = zp->z_zfsvfs->z_os; 381168404Spjd vm_object_t obj; 382168404Spjd vm_page_t m; 383168404Spjd struct sf_buf *sf; 384168404Spjd int64_t start, off; 385168926Spjd caddr_t va; 386168404Spjd int len = nbytes; 387168404Spjd int error = 0; 388169059Spjd uint64_t dirbytes; 389168404Spjd 390168404Spjd ASSERT(vp->v_mount != NULL); 391168404Spjd obj = vp->v_object; 392168404Spjd ASSERT(obj != NULL); 393168404Spjd 394168404Spjd start = uio->uio_loffset; 395168404Spjd off = start & PAGEOFFSET; 396169059Spjd dirbytes = 0; 397168404Spjd VM_OBJECT_LOCK(obj); 398168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 399168404Spjd uint64_t bytes = MIN(PAGESIZE - off, len); 400168404Spjd 401168404Spjdagain: 402168404Spjd if ((m = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 403168404Spjd vm_page_is_valid(m, (vm_offset_t)off, bytes)) { 404168404Spjd if (vm_page_sleep_if_busy(m, FALSE, "zfsmrb")) 405168404Spjd goto again; 406168404Spjd vm_page_busy(m); 407168404Spjd VM_OBJECT_UNLOCK(obj); 408169059Spjd if (dirbytes > 0) { 409169059Spjd error = dmu_read_uio(os, zp->z_id, uio, 410169059Spjd dirbytes); 411169059Spjd dirbytes = 0; 412169059Spjd } 413169059Spjd if (error == 0) { 414169059Spjd sched_pin(); 415169059Spjd sf = sf_buf_alloc(m, SFB_CPUPRIVATE); 416169059Spjd va = (caddr_t)sf_buf_kva(sf); 417169059Spjd error = uiomove(va + off, bytes, UIO_READ, uio); 418169059Spjd sf_buf_free(sf); 419169059Spjd sched_unpin(); 420169059Spjd } 421168404Spjd VM_OBJECT_LOCK(obj); 422168404Spjd vm_page_wakeup(m); 423168926Spjd } else if (m != NULL && uio->uio_segflg == UIO_NOCOPY) { 424168962Spjd /* 425168962Spjd * The code below is here to make sendfile(2) work 426168962Spjd * correctly with ZFS. As pointed out by ups@ 427168962Spjd * sendfile(2) should be changed to use VOP_GETPAGES(), 428168962Spjd * but it pessimize performance of sendfile/UFS, that's 429168962Spjd * why I handle this special case in ZFS code. 430168962Spjd */ 431168926Spjd if (vm_page_sleep_if_busy(m, FALSE, "zfsmrb")) 432168926Spjd goto again; 433168926Spjd vm_page_busy(m); 434168926Spjd VM_OBJECT_UNLOCK(obj); 435169059Spjd if (dirbytes > 0) { 436169059Spjd error = dmu_read_uio(os, zp->z_id, uio, 437169059Spjd dirbytes); 438169059Spjd dirbytes = 0; 439169059Spjd } 440169059Spjd if (error == 0) { 441169059Spjd sched_pin(); 442169059Spjd sf = sf_buf_alloc(m, SFB_CPUPRIVATE); 443169059Spjd va = (caddr_t)sf_buf_kva(sf); 444169059Spjd error = dmu_read(os, zp->z_id, start + off, 445169059Spjd bytes, (void *)(va + off)); 446169059Spjd sf_buf_free(sf); 447169059Spjd sched_unpin(); 448169059Spjd } 449168926Spjd VM_OBJECT_LOCK(obj); 450168926Spjd vm_page_wakeup(m); 451169059Spjd if (error == 0) 452169059Spjd uio->uio_resid -= bytes; 453168404Spjd } else { 454169059Spjd dirbytes += bytes; 455168404Spjd } 456168404Spjd len -= bytes; 457168404Spjd off = 0; 458168404Spjd if (error) 459168404Spjd break; 460168404Spjd } 461168404Spjd VM_OBJECT_UNLOCK(obj); 462169059Spjd if (error == 0 && dirbytes > 0) 463169059Spjd error = dmu_read_uio(os, zp->z_id, uio, dirbytes); 464168404Spjd return (error); 465168404Spjd} 466168404Spjd 467168404Spjdoffset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 468168404Spjd 469168404Spjd/* 470168404Spjd * Read bytes from specified file into supplied buffer. 471168404Spjd * 472168404Spjd * IN: vp - vnode of file to be read from. 473168404Spjd * uio - structure supplying read location, range info, 474168404Spjd * and return buffer. 475168404Spjd * ioflag - SYNC flags; used to provide FRSYNC semantics. 476168404Spjd * cr - credentials of caller. 477168404Spjd * 478168404Spjd * OUT: uio - updated offset and range, buffer filled. 479168404Spjd * 480168404Spjd * RETURN: 0 if success 481168404Spjd * error code if failure 482168404Spjd * 483168404Spjd * Side Effects: 484168404Spjd * vp - atime updated if byte count > 0 485168404Spjd */ 486168404Spjd/* ARGSUSED */ 487168404Spjdstatic int 488168962Spjdzfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 489168404Spjd{ 490168404Spjd znode_t *zp = VTOZ(vp); 491168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 492168404Spjd objset_t *os = zfsvfs->z_os; 493168404Spjd ssize_t n, nbytes; 494168404Spjd int error; 495168404Spjd rl_t *rl; 496168404Spjd 497168404Spjd ZFS_ENTER(zfsvfs); 498168404Spjd 499168404Spjd /* 500168404Spjd * Validate file offset 501168404Spjd */ 502168404Spjd if (uio->uio_loffset < (offset_t)0) { 503168404Spjd ZFS_EXIT(zfsvfs); 504168404Spjd return (EINVAL); 505168404Spjd } 506168404Spjd 507168404Spjd /* 508168404Spjd * Fasttrack empty reads 509168404Spjd */ 510168404Spjd if (uio->uio_resid == 0) { 511168404Spjd ZFS_EXIT(zfsvfs); 512168404Spjd return (0); 513168404Spjd } 514168404Spjd 515168404Spjd /* 516168962Spjd * Check for mandatory locks 517168962Spjd */ 518168962Spjd if (MANDMODE((mode_t)zp->z_phys->zp_mode)) { 519168962Spjd if (error = chklock(vp, FREAD, 520168962Spjd uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 521168962Spjd ZFS_EXIT(zfsvfs); 522168962Spjd return (error); 523168962Spjd } 524168962Spjd } 525168962Spjd 526168962Spjd /* 527168404Spjd * If we're in FRSYNC mode, sync out this znode before reading it. 528168404Spjd */ 529168962Spjd if (ioflag & FRSYNC) 530168404Spjd zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); 531168404Spjd 532168404Spjd /* 533168404Spjd * Lock the range against changes. 534168404Spjd */ 535168404Spjd rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 536168404Spjd 537168404Spjd /* 538168404Spjd * If we are reading past end-of-file we can skip 539168404Spjd * to the end; but we might still need to set atime. 540168404Spjd */ 541168404Spjd if (uio->uio_loffset >= zp->z_phys->zp_size) { 542168404Spjd error = 0; 543168404Spjd goto out; 544168404Spjd } 545168404Spjd 546168404Spjd ASSERT(uio->uio_loffset < zp->z_phys->zp_size); 547168404Spjd n = MIN(uio->uio_resid, zp->z_phys->zp_size - uio->uio_loffset); 548168404Spjd 549168404Spjd while (n > 0) { 550168404Spjd nbytes = MIN(n, zfs_read_chunk_size - 551168404Spjd P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 552168404Spjd 553168404Spjd if (vn_has_cached_data(vp)) 554168404Spjd error = mappedread(vp, nbytes, uio); 555168404Spjd else 556168404Spjd error = dmu_read_uio(os, zp->z_id, uio, nbytes); 557168404Spjd if (error) 558168404Spjd break; 559168962Spjd 560168404Spjd n -= nbytes; 561168404Spjd } 562168404Spjd 563168404Spjdout: 564168404Spjd zfs_range_unlock(rl); 565168404Spjd 566168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 567168404Spjd ZFS_EXIT(zfsvfs); 568168404Spjd return (error); 569168404Spjd} 570168404Spjd 571168404Spjd/* 572168404Spjd * Fault in the pages of the first n bytes specified by the uio structure. 573168404Spjd * 1 byte in each page is touched and the uio struct is unmodified. 574168404Spjd * Any error will exit this routine as this is only a best 575168404Spjd * attempt to get the pages resident. This is a copy of ufs_trans_touch(). 576168404Spjd */ 577168404Spjdstatic void 578168404Spjdzfs_prefault_write(ssize_t n, struct uio *uio) 579168404Spjd{ 580168404Spjd struct iovec *iov; 581168404Spjd ulong_t cnt, incr; 582168404Spjd caddr_t p; 583168404Spjd 584168404Spjd if (uio->uio_segflg != UIO_USERSPACE) 585168404Spjd return; 586168404Spjd 587168404Spjd iov = uio->uio_iov; 588168404Spjd 589168404Spjd while (n) { 590168404Spjd cnt = MIN(iov->iov_len, n); 591168404Spjd if (cnt == 0) { 592168404Spjd /* empty iov entry */ 593168404Spjd iov++; 594168404Spjd continue; 595168404Spjd } 596168404Spjd n -= cnt; 597168404Spjd /* 598168404Spjd * touch each page in this segment. 599168404Spjd */ 600168404Spjd p = iov->iov_base; 601168404Spjd while (cnt) { 602168404Spjd if (fubyte(p) == -1) 603168404Spjd return; 604168404Spjd incr = MIN(cnt, PAGESIZE); 605168404Spjd p += incr; 606168404Spjd cnt -= incr; 607168404Spjd } 608168404Spjd /* 609168404Spjd * touch the last byte in case it straddles a page. 610168404Spjd */ 611168404Spjd p--; 612168404Spjd if (fubyte(p) == -1) 613168404Spjd return; 614168404Spjd iov++; 615168404Spjd } 616168404Spjd} 617168404Spjd 618168404Spjd/* 619168404Spjd * Write the bytes to a file. 620168404Spjd * 621168404Spjd * IN: vp - vnode of file to be written to. 622168404Spjd * uio - structure supplying write location, range info, 623168404Spjd * and data buffer. 624168404Spjd * ioflag - IO_APPEND flag set if in append mode. 625168404Spjd * cr - credentials of caller. 626168404Spjd * 627168404Spjd * OUT: uio - updated offset and range. 628168404Spjd * 629168404Spjd * RETURN: 0 if success 630168404Spjd * error code if failure 631168404Spjd * 632168404Spjd * Timestamps: 633168404Spjd * vp - ctime|mtime updated if byte count > 0 634168404Spjd */ 635168404Spjd/* ARGSUSED */ 636168404Spjdstatic int 637168962Spjdzfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 638168404Spjd{ 639168404Spjd znode_t *zp = VTOZ(vp); 640168962Spjd rlim64_t limit = MAXOFFSET_T; 641168404Spjd ssize_t start_resid = uio->uio_resid; 642168404Spjd ssize_t tx_bytes; 643168404Spjd uint64_t end_size; 644168404Spjd dmu_tx_t *tx; 645168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 646168404Spjd zilog_t *zilog = zfsvfs->z_log; 647168404Spjd offset_t woff; 648168404Spjd ssize_t n, nbytes; 649168404Spjd rl_t *rl; 650168404Spjd int max_blksz = zfsvfs->z_max_blksz; 651168404Spjd int error; 652168404Spjd 653168404Spjd /* 654168404Spjd * Fasttrack empty write 655168404Spjd */ 656168404Spjd n = start_resid; 657168404Spjd if (n == 0) 658168404Spjd return (0); 659168404Spjd 660168962Spjd if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 661168962Spjd limit = MAXOFFSET_T; 662168962Spjd 663168404Spjd ZFS_ENTER(zfsvfs); 664168404Spjd 665168404Spjd /* 666168404Spjd * Pre-fault the pages to ensure slow (eg NFS) pages 667168404Spjd * don't hold up txg. 668168404Spjd */ 669168404Spjd zfs_prefault_write(n, uio); 670168404Spjd 671168404Spjd /* 672168404Spjd * If in append mode, set the io offset pointer to eof. 673168404Spjd */ 674168404Spjd if (ioflag & IO_APPEND) { 675168404Spjd /* 676168404Spjd * Range lock for a file append: 677168404Spjd * The value for the start of range will be determined by 678168404Spjd * zfs_range_lock() (to guarantee append semantics). 679168404Spjd * If this write will cause the block size to increase, 680168404Spjd * zfs_range_lock() will lock the entire file, so we must 681168404Spjd * later reduce the range after we grow the block size. 682168404Spjd */ 683168404Spjd rl = zfs_range_lock(zp, 0, n, RL_APPEND); 684168404Spjd if (rl->r_len == UINT64_MAX) { 685168404Spjd /* overlocked, zp_size can't change */ 686168404Spjd woff = uio->uio_loffset = zp->z_phys->zp_size; 687168404Spjd } else { 688168404Spjd woff = uio->uio_loffset = rl->r_off; 689168404Spjd } 690168404Spjd } else { 691168404Spjd woff = uio->uio_loffset; 692168404Spjd /* 693168404Spjd * Validate file offset 694168404Spjd */ 695168404Spjd if (woff < 0) { 696168404Spjd ZFS_EXIT(zfsvfs); 697168404Spjd return (EINVAL); 698168404Spjd } 699168404Spjd 700168404Spjd /* 701168404Spjd * If we need to grow the block size then zfs_range_lock() 702168404Spjd * will lock a wider range than we request here. 703168404Spjd * Later after growing the block size we reduce the range. 704168404Spjd */ 705168404Spjd rl = zfs_range_lock(zp, woff, n, RL_WRITER); 706168404Spjd } 707168404Spjd 708168962Spjd if (woff >= limit) { 709168962Spjd zfs_range_unlock(rl); 710168962Spjd ZFS_EXIT(zfsvfs); 711168962Spjd return (EFBIG); 712168962Spjd } 713168962Spjd 714168962Spjd if ((woff + n) > limit || woff > (limit - n)) 715168962Spjd n = limit - woff; 716168962Spjd 717168962Spjd /* 718168962Spjd * Check for mandatory locks 719168962Spjd */ 720168962Spjd if (MANDMODE((mode_t)zp->z_phys->zp_mode) && 721168962Spjd (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 722168962Spjd zfs_range_unlock(rl); 723168962Spjd ZFS_EXIT(zfsvfs); 724168962Spjd return (error); 725168962Spjd } 726168404Spjd end_size = MAX(zp->z_phys->zp_size, woff + n); 727168404Spjd 728168404Spjd /* 729168404Spjd * Write the file in reasonable size chunks. Each chunk is written 730168404Spjd * in a separate transaction; this keeps the intent log records small 731168404Spjd * and allows us to do more fine-grained space accounting. 732168404Spjd */ 733168404Spjd while (n > 0) { 734168404Spjd /* 735168404Spjd * Start a transaction. 736168404Spjd */ 737168404Spjd woff = uio->uio_loffset; 738168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 739168404Spjd dmu_tx_hold_bonus(tx, zp->z_id); 740168404Spjd dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 741168404Spjd error = dmu_tx_assign(tx, zfsvfs->z_assign); 742168404Spjd if (error) { 743168404Spjd if (error == ERESTART && 744168404Spjd zfsvfs->z_assign == TXG_NOWAIT) { 745168404Spjd dmu_tx_wait(tx); 746168404Spjd dmu_tx_abort(tx); 747168404Spjd continue; 748168404Spjd } 749168404Spjd dmu_tx_abort(tx); 750168404Spjd break; 751168404Spjd } 752168404Spjd 753168404Spjd /* 754168404Spjd * If zfs_range_lock() over-locked we grow the blocksize 755168404Spjd * and then reduce the lock range. This will only happen 756168404Spjd * on the first iteration since zfs_range_reduce() will 757168404Spjd * shrink down r_len to the appropriate size. 758168404Spjd */ 759168404Spjd if (rl->r_len == UINT64_MAX) { 760168404Spjd uint64_t new_blksz; 761168404Spjd 762168404Spjd if (zp->z_blksz > max_blksz) { 763168404Spjd ASSERT(!ISP2(zp->z_blksz)); 764168404Spjd new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 765168404Spjd } else { 766168404Spjd new_blksz = MIN(end_size, max_blksz); 767168404Spjd } 768168404Spjd zfs_grow_blocksize(zp, new_blksz, tx); 769168404Spjd zfs_range_reduce(rl, woff, n); 770168404Spjd } 771168404Spjd 772168404Spjd /* 773168404Spjd * XXX - should we really limit each write to z_max_blksz? 774168404Spjd * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 775168404Spjd */ 776168404Spjd nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 777168404Spjd 778168404Spjd if (woff + nbytes > zp->z_phys->zp_size) 779168404Spjd vnode_pager_setsize(vp, woff + nbytes); 780168404Spjd 781169302Spjd rw_enter(&zp->z_map_lock, RW_READER); 782169302Spjd 783168962Spjd tx_bytes = uio->uio_resid; 784168404Spjd if (vn_has_cached_data(vp)) { 785168404Spjd rw_exit(&zp->z_map_lock); 786168404Spjd error = mappedwrite(vp, nbytes, uio, tx); 787168404Spjd } else { 788168404Spjd error = dmu_write_uio(zfsvfs->z_os, zp->z_id, 789168404Spjd uio, nbytes, tx); 790168404Spjd rw_exit(&zp->z_map_lock); 791168404Spjd } 792168404Spjd tx_bytes -= uio->uio_resid; 793168404Spjd 794168404Spjd /* 795168404Spjd * If we made no progress, we're done. If we made even 796168404Spjd * partial progress, update the znode and ZIL accordingly. 797168404Spjd */ 798168404Spjd if (tx_bytes == 0) { 799168404Spjd dmu_tx_commit(tx); 800168404Spjd ASSERT(error != 0); 801168404Spjd break; 802168404Spjd } 803168404Spjd 804168404Spjd /* 805168404Spjd * Clear Set-UID/Set-GID bits on successful write if not 806168404Spjd * privileged and at least one of the excute bits is set. 807168404Spjd * 808168404Spjd * It would be nice to to this after all writes have 809168404Spjd * been done, but that would still expose the ISUID/ISGID 810168404Spjd * to another app after the partial write is committed. 811168404Spjd */ 812168404Spjd mutex_enter(&zp->z_acl_lock); 813168404Spjd if ((zp->z_phys->zp_mode & (S_IXUSR | (S_IXUSR >> 3) | 814168404Spjd (S_IXUSR >> 6))) != 0 && 815168404Spjd (zp->z_phys->zp_mode & (S_ISUID | S_ISGID)) != 0 && 816168404Spjd secpolicy_vnode_setid_retain(cr, 817168404Spjd (zp->z_phys->zp_mode & S_ISUID) != 0 && 818168404Spjd zp->z_phys->zp_uid == 0) != 0) { 819168404Spjd zp->z_phys->zp_mode &= ~(S_ISUID | S_ISGID); 820168404Spjd } 821168404Spjd mutex_exit(&zp->z_acl_lock); 822168404Spjd 823168404Spjd /* 824168404Spjd * Update time stamp. NOTE: This marks the bonus buffer as 825168404Spjd * dirty, so we don't have to do it again for zp_size. 826168404Spjd */ 827168404Spjd zfs_time_stamper(zp, CONTENT_MODIFIED, tx); 828168404Spjd 829168404Spjd /* 830168404Spjd * Update the file size (zp_size) if it has changed; 831168404Spjd * account for possible concurrent updates. 832168404Spjd */ 833168404Spjd while ((end_size = zp->z_phys->zp_size) < uio->uio_loffset) 834168404Spjd (void) atomic_cas_64(&zp->z_phys->zp_size, end_size, 835168404Spjd uio->uio_loffset); 836168404Spjd zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 837168404Spjd dmu_tx_commit(tx); 838168404Spjd 839168404Spjd if (error != 0) 840168404Spjd break; 841168404Spjd ASSERT(tx_bytes == nbytes); 842168404Spjd n -= nbytes; 843168404Spjd } 844168404Spjd 845168404Spjd zfs_range_unlock(rl); 846168404Spjd 847168404Spjd /* 848168404Spjd * If we're in replay mode, or we made no progress, return error. 849168404Spjd * Otherwise, it's at least a partial write, so it's successful. 850168404Spjd */ 851168404Spjd if (zfsvfs->z_assign >= TXG_INITIAL || uio->uio_resid == start_resid) { 852168404Spjd ZFS_EXIT(zfsvfs); 853168404Spjd return (error); 854168404Spjd } 855168404Spjd 856168962Spjd if (ioflag & (FSYNC | FDSYNC)) 857168404Spjd zil_commit(zilog, zp->z_last_itx, zp->z_id); 858168404Spjd 859168404Spjd ZFS_EXIT(zfsvfs); 860168404Spjd return (0); 861168404Spjd} 862168404Spjd 863168404Spjdvoid 864168404Spjdzfs_get_done(dmu_buf_t *db, void *vzgd) 865168404Spjd{ 866168404Spjd zgd_t *zgd = (zgd_t *)vzgd; 867168404Spjd rl_t *rl = zgd->zgd_rl; 868168404Spjd vnode_t *vp = ZTOV(rl->r_zp); 869168404Spjd int vfslocked; 870168404Spjd 871168404Spjd vfslocked = VFS_LOCK_GIANT(vp->v_vfsp); 872168404Spjd dmu_buf_rele(db, vzgd); 873168404Spjd zfs_range_unlock(rl); 874168404Spjd VN_RELE(vp); 875168404Spjd zil_add_vdev(zgd->zgd_zilog, DVA_GET_VDEV(BP_IDENTITY(zgd->zgd_bp))); 876168404Spjd kmem_free(zgd, sizeof (zgd_t)); 877168404Spjd VFS_UNLOCK_GIANT(vfslocked); 878168404Spjd} 879168404Spjd 880168404Spjd/* 881168404Spjd * Get data to generate a TX_WRITE intent log record. 882168404Spjd */ 883168404Spjdint 884168404Spjdzfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 885168404Spjd{ 886168404Spjd zfsvfs_t *zfsvfs = arg; 887168404Spjd objset_t *os = zfsvfs->z_os; 888168404Spjd znode_t *zp; 889168404Spjd uint64_t off = lr->lr_offset; 890168404Spjd dmu_buf_t *db; 891168404Spjd rl_t *rl; 892168404Spjd zgd_t *zgd; 893168404Spjd int dlen = lr->lr_length; /* length of user data */ 894168404Spjd int error = 0; 895168404Spjd 896168404Spjd ASSERT(zio); 897168404Spjd ASSERT(dlen != 0); 898168404Spjd 899168404Spjd /* 900168404Spjd * Nothing to do if the file has been removed 901168404Spjd */ 902168404Spjd if (zfs_zget(zfsvfs, lr->lr_foid, &zp) != 0) 903168404Spjd return (ENOENT); 904168404Spjd if (zp->z_unlinked) { 905168404Spjd VN_RELE(ZTOV(zp)); 906168404Spjd return (ENOENT); 907168404Spjd } 908168404Spjd 909168404Spjd /* 910168404Spjd * Write records come in two flavors: immediate and indirect. 911168404Spjd * For small writes it's cheaper to store the data with the 912168404Spjd * log record (immediate); for large writes it's cheaper to 913168404Spjd * sync the data and get a pointer to it (indirect) so that 914168404Spjd * we don't have to write the data twice. 915168404Spjd */ 916168404Spjd if (buf != NULL) { /* immediate write */ 917168404Spjd rl = zfs_range_lock(zp, off, dlen, RL_READER); 918168404Spjd /* test for truncation needs to be done while range locked */ 919168404Spjd if (off >= zp->z_phys->zp_size) { 920168404Spjd error = ENOENT; 921168404Spjd goto out; 922168404Spjd } 923168404Spjd VERIFY(0 == dmu_read(os, lr->lr_foid, off, dlen, buf)); 924168404Spjd } else { /* indirect write */ 925168404Spjd uint64_t boff; /* block starting offset */ 926168404Spjd 927168404Spjd /* 928168404Spjd * Have to lock the whole block to ensure when it's 929168404Spjd * written out and it's checksum is being calculated 930168404Spjd * that no one can change the data. We need to re-check 931168404Spjd * blocksize after we get the lock in case it's changed! 932168404Spjd */ 933168404Spjd for (;;) { 934168404Spjd if (ISP2(zp->z_blksz)) { 935168404Spjd boff = P2ALIGN_TYPED(off, zp->z_blksz, 936168404Spjd uint64_t); 937168404Spjd } else { 938168404Spjd boff = 0; 939168404Spjd } 940168404Spjd dlen = zp->z_blksz; 941168404Spjd rl = zfs_range_lock(zp, boff, dlen, RL_READER); 942168404Spjd if (zp->z_blksz == dlen) 943168404Spjd break; 944168404Spjd zfs_range_unlock(rl); 945168404Spjd } 946168404Spjd /* test for truncation needs to be done while range locked */ 947168404Spjd if (off >= zp->z_phys->zp_size) { 948168404Spjd error = ENOENT; 949168404Spjd goto out; 950168404Spjd } 951168404Spjd zgd = (zgd_t *)kmem_alloc(sizeof (zgd_t), KM_SLEEP); 952168404Spjd zgd->zgd_rl = rl; 953168404Spjd zgd->zgd_zilog = zfsvfs->z_log; 954168404Spjd zgd->zgd_bp = &lr->lr_blkptr; 955168404Spjd VERIFY(0 == dmu_buf_hold(os, lr->lr_foid, boff, zgd, &db)); 956168404Spjd ASSERT(boff == db->db_offset); 957168404Spjd lr->lr_blkoff = off - boff; 958168404Spjd error = dmu_sync(zio, db, &lr->lr_blkptr, 959168404Spjd lr->lr_common.lrc_txg, zfs_get_done, zgd); 960168404Spjd ASSERT(error == EEXIST || lr->lr_length <= zp->z_blksz); 961168404Spjd if (error == 0) { 962168404Spjd zil_add_vdev(zfsvfs->z_log, 963168404Spjd DVA_GET_VDEV(BP_IDENTITY(&lr->lr_blkptr))); 964168404Spjd } 965168404Spjd /* 966168404Spjd * If we get EINPROGRESS, then we need to wait for a 967168404Spjd * write IO initiated by dmu_sync() to complete before 968168404Spjd * we can release this dbuf. We will finish everything 969168404Spjd * up in the zfs_get_done() callback. 970168404Spjd */ 971168404Spjd if (error == EINPROGRESS) 972168404Spjd return (0); 973168404Spjd dmu_buf_rele(db, zgd); 974168404Spjd kmem_free(zgd, sizeof (zgd_t)); 975168404Spjd } 976168404Spjdout: 977168404Spjd zfs_range_unlock(rl); 978168404Spjd VN_RELE(ZTOV(zp)); 979168404Spjd return (error); 980168404Spjd} 981168404Spjd 982168404Spjd/*ARGSUSED*/ 983168404Spjdstatic int 984168962Spjdzfs_access(vnode_t *vp, int mode, int flags, cred_t *cr) 985168404Spjd{ 986168404Spjd znode_t *zp = VTOZ(vp); 987168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 988168404Spjd int error; 989168404Spjd 990168404Spjd ZFS_ENTER(zfsvfs); 991168962Spjd error = zfs_zaccess_rwx(zp, mode, cr); 992168404Spjd ZFS_EXIT(zfsvfs); 993168404Spjd return (error); 994168404Spjd} 995168404Spjd 996168404Spjd/* 997168404Spjd * Lookup an entry in a directory, or an extended attribute directory. 998168404Spjd * If it exists, return a held vnode reference for it. 999168404Spjd * 1000168404Spjd * IN: dvp - vnode of directory to search. 1001168404Spjd * nm - name of entry to lookup. 1002168404Spjd * pnp - full pathname to lookup [UNUSED]. 1003168404Spjd * flags - LOOKUP_XATTR set if looking for an attribute. 1004168404Spjd * rdir - root directory vnode [UNUSED]. 1005168404Spjd * cr - credentials of caller. 1006168404Spjd * 1007168404Spjd * OUT: vpp - vnode of located entry, NULL if not found. 1008168404Spjd * 1009168404Spjd * RETURN: 0 if success 1010168404Spjd * error code if failure 1011168404Spjd * 1012168404Spjd * Timestamps: 1013168404Spjd * NA 1014168404Spjd */ 1015168404Spjd/* ARGSUSED */ 1016168962Spjdstatic int 1017168962Spjdzfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1018169170Spjd int nameiop, cred_t *cr, kthread_t *td) 1019168404Spjd{ 1020168404Spjd 1021168962Spjd znode_t *zdp = VTOZ(dvp); 1022168962Spjd zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1023168962Spjd int error; 1024168404Spjd 1025168404Spjd ZFS_ENTER(zfsvfs); 1026168404Spjd 1027168404Spjd *vpp = NULL; 1028168404Spjd 1029168404Spjd#ifdef TODO 1030168404Spjd if (flags & LOOKUP_XATTR) { 1031168404Spjd /* 1032168404Spjd * If the xattr property is off, refuse the lookup request. 1033168404Spjd */ 1034168404Spjd if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1035168404Spjd ZFS_EXIT(zfsvfs); 1036168404Spjd return (EINVAL); 1037168404Spjd } 1038168404Spjd 1039168404Spjd /* 1040168404Spjd * We don't allow recursive attributes.. 1041168404Spjd * Maybe someday we will. 1042168404Spjd */ 1043168404Spjd if (zdp->z_phys->zp_flags & ZFS_XATTR) { 1044168404Spjd ZFS_EXIT(zfsvfs); 1045168404Spjd return (EINVAL); 1046168404Spjd } 1047168404Spjd 1048168404Spjd if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1049168404Spjd ZFS_EXIT(zfsvfs); 1050168404Spjd return (error); 1051168404Spjd } 1052168404Spjd 1053168404Spjd /* 1054168404Spjd * Do we have permission to get into attribute directory? 1055168404Spjd */ 1056168404Spjd 1057168404Spjd if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, cr)) { 1058168404Spjd VN_RELE(*vpp); 1059168404Spjd } 1060168404Spjd 1061168404Spjd ZFS_EXIT(zfsvfs); 1062168404Spjd return (error); 1063168404Spjd } 1064168404Spjd#endif /* TODO */ 1065168404Spjd 1066168404Spjd if (dvp->v_type != VDIR) { 1067168404Spjd ZFS_EXIT(zfsvfs); 1068168404Spjd return (ENOTDIR); 1069168404Spjd } 1070168404Spjd 1071168404Spjd /* 1072168404Spjd * Check accessibility of directory. 1073168404Spjd */ 1074168404Spjd 1075168404Spjd if (error = zfs_zaccess(zdp, ACE_EXECUTE, cr)) { 1076168404Spjd ZFS_EXIT(zfsvfs); 1077168404Spjd return (error); 1078168404Spjd } 1079168404Spjd 1080168962Spjd if ((error = zfs_dirlook(zdp, nm, vpp)) == 0) { 1081168404Spjd 1082168962Spjd /* 1083168962Spjd * Convert device special files 1084168962Spjd */ 1085168962Spjd if (IS_DEVVP(*vpp)) { 1086168962Spjd vnode_t *svp; 1087168962Spjd 1088168962Spjd svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1089168962Spjd VN_RELE(*vpp); 1090168962Spjd if (svp == NULL) 1091168962Spjd error = ENOSYS; 1092168962Spjd else 1093168962Spjd *vpp = svp; 1094168962Spjd } 1095168962Spjd } 1096168962Spjd 1097168404Spjd ZFS_EXIT(zfsvfs); 1098168404Spjd 1099168404Spjd /* Translate errors and add SAVENAME when needed. */ 1100168404Spjd if (cnp->cn_flags & ISLASTCN) { 1101168404Spjd switch (nameiop) { 1102168404Spjd case CREATE: 1103168404Spjd case RENAME: 1104168404Spjd if (error == ENOENT) { 1105168404Spjd error = EJUSTRETURN; 1106168404Spjd cnp->cn_flags |= SAVENAME; 1107168404Spjd break; 1108168404Spjd } 1109168404Spjd /* FALLTHROUGH */ 1110168404Spjd case DELETE: 1111168404Spjd if (error == 0) 1112168404Spjd cnp->cn_flags |= SAVENAME; 1113168404Spjd break; 1114168404Spjd } 1115168404Spjd } 1116168404Spjd if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) { 1117169198Spjd int ltype = 0; 1118169198Spjd 1119169198Spjd if (cnp->cn_flags & ISDOTDOT) { 1120176559Sattilio ltype = VOP_ISLOCKED(dvp); 1121175294Sattilio VOP_UNLOCK(dvp, 0); 1122169198Spjd } 1123175202Sattilio error = vn_lock(*vpp, cnp->cn_lkflags); 1124168962Spjd if (cnp->cn_flags & ISDOTDOT) 1125175202Sattilio vn_lock(dvp, ltype | LK_RETRY); 1126169172Spjd if (error != 0) { 1127169172Spjd VN_RELE(*vpp); 1128169172Spjd *vpp = NULL; 1129169172Spjd return (error); 1130169172Spjd } 1131168404Spjd } 1132168404Spjd 1133168404Spjd#ifdef FREEBSD_NAMECACHE 1134168404Spjd /* 1135168404Spjd * Insert name into cache (as non-existent) if appropriate. 1136168404Spjd */ 1137168404Spjd if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 1138168404Spjd cache_enter(dvp, *vpp, cnp); 1139169170Spjd /* 1140169170Spjd * Insert name into cache if appropriate. 1141169170Spjd */ 1142168404Spjd if (error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1143168404Spjd if (!(cnp->cn_flags & ISLASTCN) || 1144168404Spjd (nameiop != DELETE && nameiop != RENAME)) { 1145168404Spjd cache_enter(dvp, *vpp, cnp); 1146168404Spjd } 1147168404Spjd } 1148168404Spjd#endif 1149168404Spjd 1150168404Spjd return (error); 1151168404Spjd} 1152168404Spjd 1153168404Spjd/* 1154168404Spjd * Attempt to create a new entry in a directory. If the entry 1155168404Spjd * already exists, truncate the file if permissible, else return 1156168404Spjd * an error. Return the vp of the created or trunc'd file. 1157168404Spjd * 1158168404Spjd * IN: dvp - vnode of directory to put new file entry in. 1159168404Spjd * name - name of new file entry. 1160168404Spjd * vap - attributes of new file. 1161168404Spjd * excl - flag indicating exclusive or non-exclusive mode. 1162168404Spjd * mode - mode to open file with. 1163168404Spjd * cr - credentials of caller. 1164168404Spjd * flag - large file flag [UNUSED]. 1165168404Spjd * 1166168404Spjd * OUT: vpp - vnode of created or trunc'd entry. 1167168404Spjd * 1168168404Spjd * RETURN: 0 if success 1169168404Spjd * error code if failure 1170168404Spjd * 1171168404Spjd * Timestamps: 1172168404Spjd * dvp - ctime|mtime updated if new entry created 1173168404Spjd * vp - ctime|mtime always, atime if new 1174168404Spjd */ 1175168404Spjd/* ARGSUSED */ 1176168404Spjdstatic int 1177168962Spjdzfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 1178176559Sattilio vnode_t **vpp, cred_t *cr) 1179168404Spjd{ 1180168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1181168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1182168404Spjd zilog_t *zilog = zfsvfs->z_log; 1183168404Spjd objset_t *os = zfsvfs->z_os; 1184168404Spjd zfs_dirlock_t *dl; 1185168404Spjd dmu_tx_t *tx; 1186168404Spjd int error; 1187168404Spjd uint64_t zoid; 1188168404Spjd 1189168404Spjd ZFS_ENTER(zfsvfs); 1190168404Spjd 1191168404Spjdtop: 1192168404Spjd *vpp = NULL; 1193168404Spjd 1194168404Spjd if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr)) 1195168404Spjd vap->va_mode &= ~VSVTX; 1196168404Spjd 1197168404Spjd if (*name == '\0') { 1198168404Spjd /* 1199168404Spjd * Null component name refers to the directory itself. 1200168404Spjd */ 1201168404Spjd VN_HOLD(dvp); 1202168404Spjd zp = dzp; 1203168404Spjd dl = NULL; 1204168404Spjd error = 0; 1205168404Spjd } else { 1206168404Spjd /* possible VN_HOLD(zp) */ 1207168404Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, 0)) { 1208168404Spjd if (strcmp(name, "..") == 0) 1209168404Spjd error = EISDIR; 1210168404Spjd ZFS_EXIT(zfsvfs); 1211168404Spjd return (error); 1212168404Spjd } 1213168404Spjd } 1214168404Spjd 1215168404Spjd zoid = zp ? zp->z_id : -1ULL; 1216168404Spjd 1217168404Spjd if (zp == NULL) { 1218168404Spjd /* 1219168404Spjd * Create a new file object and update the directory 1220168404Spjd * to reference it. 1221168404Spjd */ 1222168404Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, cr)) { 1223168404Spjd goto out; 1224168404Spjd } 1225168404Spjd 1226168404Spjd /* 1227168404Spjd * We only support the creation of regular files in 1228168404Spjd * extended attribute directories. 1229168404Spjd */ 1230168404Spjd if ((dzp->z_phys->zp_flags & ZFS_XATTR) && 1231168404Spjd (vap->va_type != VREG)) { 1232168404Spjd error = EINVAL; 1233168404Spjd goto out; 1234168404Spjd } 1235168404Spjd 1236168404Spjd tx = dmu_tx_create(os); 1237168404Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1238168404Spjd dmu_tx_hold_bonus(tx, dzp->z_id); 1239168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1240168404Spjd if (dzp->z_phys->zp_flags & ZFS_INHERIT_ACE) 1241168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1242168404Spjd 0, SPA_MAXBLOCKSIZE); 1243168404Spjd error = dmu_tx_assign(tx, zfsvfs->z_assign); 1244168404Spjd if (error) { 1245168404Spjd zfs_dirent_unlock(dl); 1246168404Spjd if (error == ERESTART && 1247168404Spjd zfsvfs->z_assign == TXG_NOWAIT) { 1248168404Spjd dmu_tx_wait(tx); 1249168404Spjd dmu_tx_abort(tx); 1250168404Spjd goto top; 1251168404Spjd } 1252168404Spjd dmu_tx_abort(tx); 1253168404Spjd ZFS_EXIT(zfsvfs); 1254168404Spjd return (error); 1255168404Spjd } 1256168404Spjd zfs_mknode(dzp, vap, &zoid, tx, cr, 0, &zp, 0); 1257168404Spjd ASSERT(zp->z_id == zoid); 1258168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 1259168404Spjd zfs_log_create(zilog, tx, TX_CREATE, dzp, zp, name); 1260168404Spjd dmu_tx_commit(tx); 1261168404Spjd } else { 1262168404Spjd /* 1263168404Spjd * A directory entry already exists for this name. 1264168404Spjd */ 1265168404Spjd /* 1266168962Spjd * Can't truncate an existing file if in exclusive mode. 1267168962Spjd */ 1268168962Spjd if (excl == EXCL) { 1269168962Spjd error = EEXIST; 1270168962Spjd goto out; 1271168962Spjd } 1272168962Spjd /* 1273168404Spjd * Can't open a directory for writing. 1274168404Spjd */ 1275168404Spjd if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1276168404Spjd error = EISDIR; 1277168404Spjd goto out; 1278168404Spjd } 1279168404Spjd /* 1280168404Spjd * Verify requested access to file. 1281168404Spjd */ 1282168404Spjd if (mode && (error = zfs_zaccess_rwx(zp, mode, cr))) { 1283168404Spjd goto out; 1284168404Spjd } 1285168404Spjd 1286168404Spjd mutex_enter(&dzp->z_lock); 1287168404Spjd dzp->z_seq++; 1288168404Spjd mutex_exit(&dzp->z_lock); 1289168404Spjd 1290168404Spjd /* 1291168404Spjd * Truncate regular files if requested. 1292168404Spjd */ 1293168404Spjd if ((ZTOV(zp)->v_type == VREG) && 1294168404Spjd (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1295168404Spjd error = zfs_freesp(zp, 0, 0, mode, TRUE); 1296168404Spjd if (error == ERESTART && 1297168404Spjd zfsvfs->z_assign == TXG_NOWAIT) { 1298168404Spjd /* NB: we already did dmu_tx_wait() */ 1299168404Spjd zfs_dirent_unlock(dl); 1300168404Spjd VN_RELE(ZTOV(zp)); 1301168404Spjd goto top; 1302168404Spjd } 1303168404Spjd } 1304168404Spjd } 1305168404Spjdout: 1306168404Spjd 1307168404Spjd if (error == 0) { 1308168404Spjd *vpp = ZTOV(zp); 1309175202Sattilio vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1310168404Spjd } 1311168404Spjd 1312168404Spjd if (dl) 1313168404Spjd zfs_dirent_unlock(dl); 1314168404Spjd 1315168404Spjd if (error) { 1316168404Spjd if (zp) 1317168404Spjd VN_RELE(ZTOV(zp)); 1318168962Spjd } else { 1319168962Spjd *vpp = ZTOV(zp); 1320168962Spjd /* 1321168962Spjd * If vnode is for a device return a specfs vnode instead. 1322168962Spjd */ 1323168962Spjd if (IS_DEVVP(*vpp)) { 1324168962Spjd struct vnode *svp; 1325168962Spjd 1326168962Spjd svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1327168962Spjd VN_RELE(*vpp); 1328168962Spjd if (svp == NULL) { 1329168962Spjd error = ENOSYS; 1330168962Spjd } 1331168962Spjd *vpp = svp; 1332168962Spjd } 1333168404Spjd } 1334168404Spjd 1335168404Spjd ZFS_EXIT(zfsvfs); 1336168404Spjd return (error); 1337168404Spjd} 1338168404Spjd 1339168404Spjd/* 1340168404Spjd * Remove an entry from a directory. 1341168404Spjd * 1342168404Spjd * IN: dvp - vnode of directory to remove entry from. 1343168404Spjd * name - name of entry to remove. 1344168404Spjd * cr - credentials of caller. 1345168404Spjd * 1346168404Spjd * RETURN: 0 if success 1347168404Spjd * error code if failure 1348168404Spjd * 1349168404Spjd * Timestamps: 1350168404Spjd * dvp - ctime|mtime 1351168404Spjd * vp - ctime (if nlink > 0) 1352168404Spjd */ 1353168404Spjdstatic int 1354168962Spjdzfs_remove(vnode_t *dvp, char *name, cred_t *cr) 1355168404Spjd{ 1356168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1357168404Spjd znode_t *xzp = NULL; 1358168404Spjd vnode_t *vp; 1359168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1360168404Spjd zilog_t *zilog = zfsvfs->z_log; 1361168962Spjd uint64_t acl_obj, xattr_obj; 1362168404Spjd zfs_dirlock_t *dl; 1363168404Spjd dmu_tx_t *tx; 1364168962Spjd boolean_t may_delete_now, delete_now = FALSE; 1365168404Spjd boolean_t unlinked; 1366168404Spjd int error; 1367168404Spjd 1368168404Spjd ZFS_ENTER(zfsvfs); 1369168404Spjd 1370168404Spjdtop: 1371168404Spjd /* 1372168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 1373168404Spjd */ 1374168404Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, ZEXISTS)) { 1375168404Spjd ZFS_EXIT(zfsvfs); 1376168404Spjd return (error); 1377168404Spjd } 1378168404Spjd 1379168404Spjd vp = ZTOV(zp); 1380168404Spjd 1381168962Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1382168404Spjd goto out; 1383168962Spjd } 1384168404Spjd 1385168962Spjd /* 1386168962Spjd * Need to use rmdir for removing directories. 1387168962Spjd */ 1388168962Spjd if (vp->v_type == VDIR) { 1389168962Spjd error = EPERM; 1390168962Spjd goto out; 1391168962Spjd } 1392168962Spjd 1393168962Spjd vnevent_remove(vp); 1394168962Spjd 1395168404Spjd dnlc_remove(dvp, name); 1396168404Spjd 1397168962Spjd may_delete_now = FALSE; 1398168962Spjd 1399168404Spjd /* 1400168404Spjd * We may delete the znode now, or we may put it in the unlinked set; 1401168404Spjd * it depends on whether we're the last link, and on whether there are 1402168404Spjd * other holds on the vnode. So we dmu_tx_hold() the right things to 1403168404Spjd * allow for either case. 1404168404Spjd */ 1405168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1406168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1407168404Spjd dmu_tx_hold_bonus(tx, zp->z_id); 1408168962Spjd if (may_delete_now) 1409168962Spjd dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END); 1410168404Spjd 1411168404Spjd /* are there any extended attributes? */ 1412168404Spjd if ((xattr_obj = zp->z_phys->zp_xattr) != 0) { 1413168404Spjd /* XXX - do we need this if we are deleting? */ 1414168404Spjd dmu_tx_hold_bonus(tx, xattr_obj); 1415168404Spjd } 1416168404Spjd 1417168962Spjd /* are there any additional acls */ 1418168962Spjd if ((acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj) != 0 && 1419168962Spjd may_delete_now) 1420168962Spjd dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 1421168962Spjd 1422168404Spjd /* charge as an update -- would be nice not to charge at all */ 1423168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1424168404Spjd 1425168404Spjd error = dmu_tx_assign(tx, zfsvfs->z_assign); 1426168404Spjd if (error) { 1427168404Spjd zfs_dirent_unlock(dl); 1428168962Spjd VN_RELE(vp); 1429168404Spjd if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { 1430168404Spjd dmu_tx_wait(tx); 1431168404Spjd dmu_tx_abort(tx); 1432168404Spjd goto top; 1433168404Spjd } 1434168404Spjd dmu_tx_abort(tx); 1435168404Spjd ZFS_EXIT(zfsvfs); 1436168404Spjd return (error); 1437168404Spjd } 1438168404Spjd 1439168404Spjd /* 1440168404Spjd * Remove the directory entry. 1441168404Spjd */ 1442168404Spjd error = zfs_link_destroy(dl, zp, tx, 0, &unlinked); 1443168404Spjd 1444168404Spjd if (error) { 1445168404Spjd dmu_tx_commit(tx); 1446168404Spjd goto out; 1447168404Spjd } 1448168404Spjd 1449168962Spjd if (0 && unlinked) { 1450168962Spjd VI_LOCK(vp); 1451168962Spjd delete_now = may_delete_now && 1452168962Spjd vp->v_count == 1 && !vn_has_cached_data(vp) && 1453168962Spjd zp->z_phys->zp_xattr == xattr_obj && 1454168962Spjd zp->z_phys->zp_acl.z_acl_extern_obj == acl_obj; 1455168962Spjd VI_UNLOCK(vp); 1456168962Spjd } 1457168962Spjd 1458168962Spjd if (delete_now) { 1459168962Spjd if (zp->z_phys->zp_xattr) { 1460168962Spjd error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp); 1461168962Spjd ASSERT3U(error, ==, 0); 1462168962Spjd ASSERT3U(xzp->z_phys->zp_links, ==, 2); 1463168962Spjd dmu_buf_will_dirty(xzp->z_dbuf, tx); 1464168962Spjd mutex_enter(&xzp->z_lock); 1465168962Spjd xzp->z_unlinked = 1; 1466168962Spjd xzp->z_phys->zp_links = 0; 1467168962Spjd mutex_exit(&xzp->z_lock); 1468168962Spjd zfs_unlinked_add(xzp, tx); 1469168962Spjd zp->z_phys->zp_xattr = 0; /* probably unnecessary */ 1470168962Spjd } 1471168962Spjd mutex_enter(&zp->z_lock); 1472168962Spjd VI_LOCK(vp); 1473168962Spjd vp->v_count--; 1474168962Spjd ASSERT3U(vp->v_count, ==, 0); 1475168962Spjd VI_UNLOCK(vp); 1476168962Spjd mutex_exit(&zp->z_lock); 1477168962Spjd zfs_znode_delete(zp, tx); 1478168962Spjd VFS_RELE(zfsvfs->z_vfs); 1479168962Spjd } else if (unlinked) { 1480168404Spjd zfs_unlinked_add(zp, tx); 1481168962Spjd } 1482168404Spjd 1483168404Spjd zfs_log_remove(zilog, tx, TX_REMOVE, dzp, name); 1484168404Spjd 1485168404Spjd dmu_tx_commit(tx); 1486168404Spjdout: 1487168404Spjd zfs_dirent_unlock(dl); 1488168404Spjd 1489168962Spjd if (!delete_now) { 1490168962Spjd VN_RELE(vp); 1491168962Spjd } else if (xzp) { 1492168962Spjd /* this rele delayed to prevent nesting transactions */ 1493168962Spjd VN_RELE(ZTOV(xzp)); 1494168962Spjd } 1495168962Spjd 1496168404Spjd ZFS_EXIT(zfsvfs); 1497168404Spjd return (error); 1498168404Spjd} 1499168404Spjd 1500168404Spjd/* 1501168404Spjd * Create a new directory and insert it into dvp using the name 1502168404Spjd * provided. Return a pointer to the inserted directory. 1503168404Spjd * 1504168404Spjd * IN: dvp - vnode of directory to add subdir to. 1505168404Spjd * dirname - name of new directory. 1506168404Spjd * vap - attributes of new directory. 1507168404Spjd * cr - credentials of caller. 1508168404Spjd * 1509168404Spjd * OUT: vpp - vnode of created directory. 1510168404Spjd * 1511168404Spjd * RETURN: 0 if success 1512168404Spjd * error code if failure 1513168404Spjd * 1514168404Spjd * Timestamps: 1515168404Spjd * dvp - ctime|mtime updated 1516168404Spjd * vp - ctime|mtime|atime updated 1517168404Spjd */ 1518168404Spjdstatic int 1519168962Spjdzfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr) 1520168404Spjd{ 1521168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1522168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1523168404Spjd zilog_t *zilog = zfsvfs->z_log; 1524168404Spjd zfs_dirlock_t *dl; 1525168404Spjd uint64_t zoid = 0; 1526168404Spjd dmu_tx_t *tx; 1527168404Spjd int error; 1528168404Spjd 1529168404Spjd ASSERT(vap->va_type == VDIR); 1530168404Spjd 1531168404Spjd ZFS_ENTER(zfsvfs); 1532168404Spjd 1533168404Spjd if (dzp->z_phys->zp_flags & ZFS_XATTR) { 1534168404Spjd ZFS_EXIT(zfsvfs); 1535168404Spjd return (EINVAL); 1536168404Spjd } 1537168404Spjdtop: 1538168404Spjd *vpp = NULL; 1539168404Spjd 1540168404Spjd /* 1541168404Spjd * First make sure the new directory doesn't exist. 1542168404Spjd */ 1543168404Spjd if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, ZNEW)) { 1544168404Spjd ZFS_EXIT(zfsvfs); 1545168404Spjd return (error); 1546168404Spjd } 1547168404Spjd 1548168404Spjd if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, cr)) { 1549168404Spjd zfs_dirent_unlock(dl); 1550168404Spjd ZFS_EXIT(zfsvfs); 1551168404Spjd return (error); 1552168404Spjd } 1553168404Spjd 1554168404Spjd /* 1555168404Spjd * Add a new entry to the directory. 1556168404Spjd */ 1557168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1558168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 1559168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 1560168404Spjd if (dzp->z_phys->zp_flags & ZFS_INHERIT_ACE) 1561168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1562168404Spjd 0, SPA_MAXBLOCKSIZE); 1563168404Spjd error = dmu_tx_assign(tx, zfsvfs->z_assign); 1564168404Spjd if (error) { 1565168404Spjd zfs_dirent_unlock(dl); 1566168404Spjd if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { 1567168404Spjd dmu_tx_wait(tx); 1568168404Spjd dmu_tx_abort(tx); 1569168404Spjd goto top; 1570168404Spjd } 1571168404Spjd dmu_tx_abort(tx); 1572168404Spjd ZFS_EXIT(zfsvfs); 1573168404Spjd return (error); 1574168404Spjd } 1575168404Spjd 1576168404Spjd /* 1577168404Spjd * Create new node. 1578168404Spjd */ 1579168404Spjd zfs_mknode(dzp, vap, &zoid, tx, cr, 0, &zp, 0); 1580168404Spjd 1581168404Spjd /* 1582168404Spjd * Now put new name in parent dir. 1583168404Spjd */ 1584168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 1585168404Spjd 1586168404Spjd *vpp = ZTOV(zp); 1587168404Spjd 1588168404Spjd zfs_log_create(zilog, tx, TX_MKDIR, dzp, zp, dirname); 1589168404Spjd dmu_tx_commit(tx); 1590168404Spjd 1591175202Sattilio vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1592168404Spjd 1593168404Spjd zfs_dirent_unlock(dl); 1594168404Spjd 1595168404Spjd ZFS_EXIT(zfsvfs); 1596168404Spjd return (0); 1597168404Spjd} 1598168404Spjd 1599168404Spjd/* 1600168404Spjd * Remove a directory subdir entry. If the current working 1601168404Spjd * directory is the same as the subdir to be removed, the 1602168404Spjd * remove will fail. 1603168404Spjd * 1604168404Spjd * IN: dvp - vnode of directory to remove from. 1605168404Spjd * name - name of directory to be removed. 1606168404Spjd * cwd - vnode of current working directory. 1607168404Spjd * cr - credentials of caller. 1608168404Spjd * 1609168404Spjd * RETURN: 0 if success 1610168404Spjd * error code if failure 1611168404Spjd * 1612168404Spjd * Timestamps: 1613168404Spjd * dvp - ctime|mtime updated 1614168404Spjd */ 1615168404Spjdstatic int 1616168962Spjdzfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr) 1617168404Spjd{ 1618168404Spjd znode_t *dzp = VTOZ(dvp); 1619168404Spjd znode_t *zp; 1620168404Spjd vnode_t *vp; 1621168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1622168404Spjd zilog_t *zilog = zfsvfs->z_log; 1623168404Spjd zfs_dirlock_t *dl; 1624168404Spjd dmu_tx_t *tx; 1625168404Spjd int error; 1626168404Spjd 1627168962Spjd ZFS_ENTER(zfsvfs); 1628168404Spjd 1629168404Spjdtop: 1630168404Spjd zp = NULL; 1631168404Spjd 1632168404Spjd /* 1633168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 1634168404Spjd */ 1635168404Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, ZEXISTS)) { 1636168404Spjd ZFS_EXIT(zfsvfs); 1637168404Spjd return (error); 1638168404Spjd } 1639168404Spjd 1640168404Spjd vp = ZTOV(zp); 1641168404Spjd 1642168404Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1643168404Spjd goto out; 1644168404Spjd } 1645168404Spjd 1646168962Spjd if (vp->v_type != VDIR) { 1647168962Spjd error = ENOTDIR; 1648168962Spjd goto out; 1649168962Spjd } 1650168962Spjd 1651168962Spjd if (vp == cwd) { 1652168962Spjd error = EINVAL; 1653168962Spjd goto out; 1654168962Spjd } 1655168962Spjd 1656168962Spjd vnevent_rmdir(vp); 1657168962Spjd 1658168404Spjd /* 1659168404Spjd * Grab a lock on the directory to make sure that noone is 1660168404Spjd * trying to add (or lookup) entries while we are removing it. 1661168404Spjd */ 1662168404Spjd rw_enter(&zp->z_name_lock, RW_WRITER); 1663168404Spjd 1664168404Spjd /* 1665168404Spjd * Grab a lock on the parent pointer to make sure we play well 1666168404Spjd * with the treewalk and directory rename code. 1667168404Spjd */ 1668168404Spjd rw_enter(&zp->z_parent_lock, RW_WRITER); 1669168404Spjd 1670168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1671168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1672168404Spjd dmu_tx_hold_bonus(tx, zp->z_id); 1673168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1674168404Spjd error = dmu_tx_assign(tx, zfsvfs->z_assign); 1675168404Spjd if (error) { 1676168404Spjd rw_exit(&zp->z_parent_lock); 1677168404Spjd rw_exit(&zp->z_name_lock); 1678168404Spjd zfs_dirent_unlock(dl); 1679168962Spjd VN_RELE(vp); 1680168404Spjd if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { 1681168404Spjd dmu_tx_wait(tx); 1682168404Spjd dmu_tx_abort(tx); 1683168404Spjd goto top; 1684168404Spjd } 1685168404Spjd dmu_tx_abort(tx); 1686168404Spjd ZFS_EXIT(zfsvfs); 1687168404Spjd return (error); 1688168404Spjd } 1689168404Spjd 1690168404Spjd#ifdef FREEBSD_NAMECACHE 1691168404Spjd cache_purge(dvp); 1692168404Spjd#endif 1693168404Spjd 1694168404Spjd error = zfs_link_destroy(dl, zp, tx, 0, NULL); 1695168404Spjd 1696168404Spjd if (error == 0) 1697168404Spjd zfs_log_remove(zilog, tx, TX_RMDIR, dzp, name); 1698168404Spjd 1699168404Spjd dmu_tx_commit(tx); 1700168404Spjd 1701168404Spjd rw_exit(&zp->z_parent_lock); 1702168404Spjd rw_exit(&zp->z_name_lock); 1703168404Spjd#ifdef FREEBSD_NAMECACHE 1704168404Spjd cache_purge(vp); 1705168404Spjd#endif 1706168404Spjdout: 1707168404Spjd zfs_dirent_unlock(dl); 1708168404Spjd 1709168962Spjd VN_RELE(vp); 1710168962Spjd 1711168404Spjd ZFS_EXIT(zfsvfs); 1712168404Spjd return (error); 1713168404Spjd} 1714168404Spjd 1715168404Spjd/* 1716168404Spjd * Read as many directory entries as will fit into the provided 1717168404Spjd * buffer from the given directory cursor position (specified in 1718168404Spjd * the uio structure. 1719168404Spjd * 1720168404Spjd * IN: vp - vnode of directory to read. 1721168404Spjd * uio - structure supplying read location, range info, 1722168404Spjd * and return buffer. 1723168404Spjd * cr - credentials of caller. 1724168404Spjd * 1725168404Spjd * OUT: uio - updated offset and range, buffer filled. 1726168404Spjd * eofp - set to true if end-of-file detected. 1727168404Spjd * 1728168404Spjd * RETURN: 0 if success 1729168404Spjd * error code if failure 1730168404Spjd * 1731168404Spjd * Timestamps: 1732168404Spjd * vp - atime updated 1733168404Spjd * 1734168404Spjd * Note that the low 4 bits of the cookie returned by zap is always zero. 1735168404Spjd * This allows us to use the low range for "special" directory entries: 1736168404Spjd * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 1737168404Spjd * we use the offset 2 for the '.zfs' directory. 1738168404Spjd */ 1739168404Spjd/* ARGSUSED */ 1740168404Spjdstatic int 1741168962Spjdzfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) 1742168404Spjd{ 1743168404Spjd znode_t *zp = VTOZ(vp); 1744168404Spjd iovec_t *iovp; 1745168404Spjd dirent64_t *odp; 1746168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1747168404Spjd objset_t *os; 1748168404Spjd caddr_t outbuf; 1749168404Spjd size_t bufsize; 1750168404Spjd zap_cursor_t zc; 1751168404Spjd zap_attribute_t zap; 1752168404Spjd uint_t bytes_wanted; 1753168404Spjd uint64_t offset; /* must be unsigned; checks for < 1 */ 1754168404Spjd int local_eof; 1755168404Spjd int outcount; 1756168404Spjd int error; 1757168404Spjd uint8_t prefetch; 1758168404Spjd uint8_t type; 1759168962Spjd int ncooks; 1760168962Spjd u_long *cooks = NULL; 1761168404Spjd 1762168404Spjd ZFS_ENTER(zfsvfs); 1763168404Spjd 1764168404Spjd /* 1765168404Spjd * If we are not given an eof variable, 1766168404Spjd * use a local one. 1767168404Spjd */ 1768168404Spjd if (eofp == NULL) 1769168404Spjd eofp = &local_eof; 1770168404Spjd 1771168404Spjd /* 1772168404Spjd * Check for valid iov_len. 1773168404Spjd */ 1774168404Spjd if (uio->uio_iov->iov_len <= 0) { 1775168404Spjd ZFS_EXIT(zfsvfs); 1776168404Spjd return (EINVAL); 1777168404Spjd } 1778168404Spjd 1779168404Spjd /* 1780168404Spjd * Quit if directory has been removed (posix) 1781168404Spjd */ 1782168404Spjd if ((*eofp = zp->z_unlinked) != 0) { 1783168404Spjd ZFS_EXIT(zfsvfs); 1784168404Spjd return (0); 1785168404Spjd } 1786168404Spjd 1787168404Spjd error = 0; 1788168404Spjd os = zfsvfs->z_os; 1789168404Spjd offset = uio->uio_loffset; 1790168404Spjd prefetch = zp->z_zn_prefetch; 1791168404Spjd 1792168404Spjd /* 1793168404Spjd * Initialize the iterator cursor. 1794168404Spjd */ 1795168404Spjd if (offset <= 3) { 1796168404Spjd /* 1797168404Spjd * Start iteration from the beginning of the directory. 1798168404Spjd */ 1799168404Spjd zap_cursor_init(&zc, os, zp->z_id); 1800168404Spjd } else { 1801168404Spjd /* 1802168404Spjd * The offset is a serialized cursor. 1803168404Spjd */ 1804168404Spjd zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 1805168404Spjd } 1806168404Spjd 1807168404Spjd /* 1808168404Spjd * Get space to change directory entries into fs independent format. 1809168404Spjd */ 1810168404Spjd iovp = uio->uio_iov; 1811168404Spjd bytes_wanted = iovp->iov_len; 1812168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 1813168404Spjd bufsize = bytes_wanted; 1814168404Spjd outbuf = kmem_alloc(bufsize, KM_SLEEP); 1815168404Spjd odp = (struct dirent64 *)outbuf; 1816168404Spjd } else { 1817168404Spjd bufsize = bytes_wanted; 1818168404Spjd odp = (struct dirent64 *)iovp->iov_base; 1819168404Spjd } 1820168404Spjd 1821169170Spjd if (ncookies != NULL) { 1822168404Spjd /* 1823168404Spjd * Minimum entry size is dirent size and 1 byte for a file name. 1824168404Spjd */ 1825168962Spjd ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 1826168962Spjd cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 1827169170Spjd *cookies = cooks; 1828168962Spjd *ncookies = ncooks; 1829168404Spjd } 1830168404Spjd 1831168404Spjd /* 1832168404Spjd * Transform to file-system independent format 1833168404Spjd */ 1834168404Spjd outcount = 0; 1835168404Spjd while (outcount < bytes_wanted) { 1836168404Spjd ino64_t objnum; 1837168404Spjd ushort_t reclen; 1838168404Spjd 1839168404Spjd /* 1840168404Spjd * Special case `.', `..', and `.zfs'. 1841168404Spjd */ 1842168404Spjd if (offset == 0) { 1843168404Spjd (void) strcpy(zap.za_name, "."); 1844168404Spjd objnum = zp->z_id; 1845169108Spjd type = DT_DIR; 1846168404Spjd } else if (offset == 1) { 1847168404Spjd (void) strcpy(zap.za_name, ".."); 1848168404Spjd objnum = zp->z_phys->zp_parent; 1849169108Spjd type = DT_DIR; 1850168404Spjd } else if (offset == 2 && zfs_show_ctldir(zp)) { 1851168404Spjd (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 1852168404Spjd objnum = ZFSCTL_INO_ROOT; 1853169108Spjd type = DT_DIR; 1854168404Spjd } else { 1855168404Spjd /* 1856168404Spjd * Grab next entry. 1857168404Spjd */ 1858168404Spjd if (error = zap_cursor_retrieve(&zc, &zap)) { 1859168404Spjd if ((*eofp = (error == ENOENT)) != 0) 1860168404Spjd break; 1861168404Spjd else 1862168404Spjd goto update; 1863168404Spjd } 1864168404Spjd 1865168404Spjd if (zap.za_integer_length != 8 || 1866168404Spjd zap.za_num_integers != 1) { 1867168404Spjd cmn_err(CE_WARN, "zap_readdir: bad directory " 1868168404Spjd "entry, obj = %lld, offset = %lld\n", 1869168404Spjd (u_longlong_t)zp->z_id, 1870168404Spjd (u_longlong_t)offset); 1871168404Spjd error = ENXIO; 1872168404Spjd goto update; 1873168404Spjd } 1874168404Spjd 1875168404Spjd objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 1876168404Spjd /* 1877168404Spjd * MacOS X can extract the object type here such as: 1878168404Spjd * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 1879168404Spjd */ 1880168404Spjd type = ZFS_DIRENT_TYPE(zap.za_first_integer); 1881168404Spjd } 1882168404Spjd reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 1883168404Spjd 1884168404Spjd /* 1885168404Spjd * Will this entry fit in the buffer? 1886168404Spjd */ 1887168404Spjd if (outcount + reclen > bufsize) { 1888168404Spjd /* 1889168404Spjd * Did we manage to fit anything in the buffer? 1890168404Spjd */ 1891168404Spjd if (!outcount) { 1892168404Spjd error = EINVAL; 1893168404Spjd goto update; 1894168404Spjd } 1895168404Spjd break; 1896168404Spjd } 1897168404Spjd /* 1898168404Spjd * Add this entry: 1899168404Spjd */ 1900168404Spjd odp->d_ino = objnum; 1901168404Spjd odp->d_reclen = reclen; 1902168404Spjd odp->d_namlen = strlen(zap.za_name); 1903168404Spjd (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 1904168404Spjd odp->d_type = type; 1905168404Spjd outcount += reclen; 1906168404Spjd odp = (dirent64_t *)((intptr_t)odp + reclen); 1907168404Spjd 1908168404Spjd ASSERT(outcount <= bufsize); 1909168404Spjd 1910168404Spjd /* Prefetch znode */ 1911168404Spjd if (prefetch) 1912168404Spjd dmu_prefetch(os, objnum, 0, 0); 1913168404Spjd 1914168404Spjd /* 1915168404Spjd * Move to the next entry, fill in the previous offset. 1916168404Spjd */ 1917168404Spjd if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 1918168404Spjd zap_cursor_advance(&zc); 1919168404Spjd offset = zap_cursor_serialize(&zc); 1920168404Spjd } else { 1921168404Spjd offset += 1; 1922168404Spjd } 1923168404Spjd 1924168962Spjd if (cooks != NULL) { 1925168962Spjd *cooks++ = offset; 1926168962Spjd ncooks--; 1927168962Spjd KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 1928168404Spjd } 1929168404Spjd } 1930168404Spjd zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 1931168404Spjd 1932168404Spjd /* Subtract unused cookies */ 1933168962Spjd if (ncookies != NULL) 1934168962Spjd *ncookies -= ncooks; 1935168404Spjd 1936168404Spjd if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 1937168404Spjd iovp->iov_base += outcount; 1938168404Spjd iovp->iov_len -= outcount; 1939168404Spjd uio->uio_resid -= outcount; 1940168404Spjd } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 1941168404Spjd /* 1942168404Spjd * Reset the pointer. 1943168404Spjd */ 1944168404Spjd offset = uio->uio_loffset; 1945168404Spjd } 1946168404Spjd 1947168404Spjdupdate: 1948168404Spjd zap_cursor_fini(&zc); 1949168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 1950168404Spjd kmem_free(outbuf, bufsize); 1951168404Spjd 1952168404Spjd if (error == ENOENT) 1953168404Spjd error = 0; 1954168404Spjd 1955168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 1956168404Spjd 1957168404Spjd uio->uio_loffset = offset; 1958168404Spjd ZFS_EXIT(zfsvfs); 1959169107Spjd if (error != 0 && cookies != NULL) { 1960168962Spjd free(*cookies, M_TEMP); 1961168962Spjd *cookies = NULL; 1962168962Spjd *ncookies = 0; 1963168404Spjd } 1964168404Spjd return (error); 1965168404Spjd} 1966168404Spjd 1967168404Spjdstatic int 1968168962Spjdzfs_fsync(vnode_t *vp, int syncflag, cred_t *cr) 1969168404Spjd{ 1970168962Spjd znode_t *zp = VTOZ(vp); 1971168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1972168404Spjd 1973168404Spjd ZFS_ENTER(zfsvfs); 1974168404Spjd zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); 1975168404Spjd ZFS_EXIT(zfsvfs); 1976168404Spjd return (0); 1977168404Spjd} 1978168404Spjd 1979168404Spjd/* 1980168404Spjd * Get the requested file attributes and place them in the provided 1981168404Spjd * vattr structure. 1982168404Spjd * 1983168404Spjd * IN: vp - vnode of file. 1984168404Spjd * vap - va_mask identifies requested attributes. 1985168404Spjd * flags - [UNUSED] 1986168404Spjd * cr - credentials of caller. 1987168404Spjd * 1988168404Spjd * OUT: vap - attribute values. 1989168404Spjd * 1990168404Spjd * RETURN: 0 (always succeeds) 1991168404Spjd */ 1992168404Spjd/* ARGSUSED */ 1993168404Spjdstatic int 1994168962Spjdzfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr) 1995168404Spjd{ 1996168962Spjd znode_t *zp = VTOZ(vp); 1997168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1998168962Spjd znode_phys_t *pzp = zp->z_phys; 1999168962Spjd uint32_t blksize; 2000168962Spjd u_longlong_t nblocks; 2001168962Spjd int error; 2002168404Spjd 2003168404Spjd ZFS_ENTER(zfsvfs); 2004168404Spjd 2005168404Spjd /* 2006168404Spjd * Return all attributes. It's cheaper to provide the answer 2007168404Spjd * than to determine whether we were asked the question. 2008168404Spjd */ 2009168404Spjd mutex_enter(&zp->z_lock); 2010168404Spjd 2011168404Spjd vap->va_type = IFTOVT(pzp->zp_mode); 2012168404Spjd vap->va_mode = pzp->zp_mode & ~S_IFMT; 2013168404Spjd vap->va_uid = zp->z_phys->zp_uid; 2014168404Spjd vap->va_gid = zp->z_phys->zp_gid; 2015168404Spjd vap->va_nodeid = zp->z_id; 2016168404Spjd vap->va_nlink = MIN(pzp->zp_links, UINT32_MAX); /* nlink_t limit! */ 2017168404Spjd vap->va_size = pzp->zp_size; 2018168404Spjd vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 2019168958Spjd vap->va_rdev = zfs_cmpldev(pzp->zp_rdev); 2020168404Spjd vap->va_seq = zp->z_seq; 2021168404Spjd vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2022168404Spjd 2023168404Spjd ZFS_TIME_DECODE(&vap->va_atime, pzp->zp_atime); 2024168404Spjd ZFS_TIME_DECODE(&vap->va_mtime, pzp->zp_mtime); 2025168404Spjd ZFS_TIME_DECODE(&vap->va_ctime, pzp->zp_ctime); 2026168404Spjd ZFS_TIME_DECODE(&vap->va_birthtime, pzp->zp_crtime); 2027168404Spjd 2028168404Spjd /* 2029168404Spjd * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2030168404Spjd * Also, if we are the owner don't bother, since owner should 2031168404Spjd * always be allowed to read basic attributes of file. 2032168404Spjd */ 2033168404Spjd if (!(zp->z_phys->zp_flags & ZFS_ACL_TRIVIAL) && 2034168404Spjd (zp->z_phys->zp_uid != crgetuid(cr))) { 2035168404Spjd if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, cr)) { 2036168404Spjd mutex_exit(&zp->z_lock); 2037168404Spjd ZFS_EXIT(zfsvfs); 2038168404Spjd return (error); 2039168404Spjd } 2040168404Spjd } 2041168404Spjd 2042168404Spjd mutex_exit(&zp->z_lock); 2043168404Spjd 2044168404Spjd dmu_object_size_from_db(zp->z_dbuf, &blksize, &nblocks); 2045168404Spjd vap->va_blksize = blksize; 2046168404Spjd vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2047168404Spjd 2048168404Spjd if (zp->z_blksz == 0) { 2049168404Spjd /* 2050168404Spjd * Block size hasn't been set; suggest maximal I/O transfers. 2051168404Spjd */ 2052168404Spjd vap->va_blksize = zfsvfs->z_max_blksz; 2053168404Spjd } 2054168404Spjd 2055168404Spjd ZFS_EXIT(zfsvfs); 2056168404Spjd return (0); 2057168404Spjd} 2058168404Spjd 2059168404Spjd/* 2060168404Spjd * Set the file attributes to the values contained in the 2061168404Spjd * vattr structure. 2062168404Spjd * 2063168404Spjd * IN: vp - vnode of file to be modified. 2064168404Spjd * vap - new attribute values. 2065168404Spjd * flags - ATTR_UTIME set if non-default time values provided. 2066168404Spjd * cr - credentials of caller. 2067168404Spjd * 2068168404Spjd * RETURN: 0 if success 2069168404Spjd * error code if failure 2070168404Spjd * 2071168404Spjd * Timestamps: 2072168404Spjd * vp - ctime updated, mtime updated if size changed. 2073168404Spjd */ 2074168404Spjd/* ARGSUSED */ 2075168404Spjdstatic int 2076168962Spjdzfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2077168962Spjd caller_context_t *ct) 2078168404Spjd{ 2079168962Spjd struct znode *zp = VTOZ(vp); 2080168404Spjd znode_phys_t *pzp = zp->z_phys; 2081168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2082168404Spjd zilog_t *zilog = zfsvfs->z_log; 2083168404Spjd dmu_tx_t *tx; 2084168404Spjd vattr_t oldva; 2085168962Spjd uint_t mask = vap->va_mask; 2086168404Spjd uint_t saved_mask; 2087168404Spjd int trim_mask = 0; 2088168404Spjd uint64_t new_mode; 2089168404Spjd znode_t *attrzp; 2090168404Spjd int need_policy = FALSE; 2091168404Spjd int err; 2092168404Spjd 2093168404Spjd if (mask == 0) 2094168404Spjd return (0); 2095168404Spjd 2096168962Spjd if (mask & AT_NOSET) 2097168962Spjd return (EINVAL); 2098168962Spjd 2099168404Spjd if (mask & AT_SIZE && vp->v_type == VDIR) 2100168404Spjd return (EISDIR); 2101168404Spjd 2102168404Spjd if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) 2103168404Spjd return (EINVAL); 2104168404Spjd 2105168404Spjd ZFS_ENTER(zfsvfs); 2106168404Spjd 2107168404Spjdtop: 2108168404Spjd attrzp = NULL; 2109168404Spjd 2110168404Spjd if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2111168404Spjd ZFS_EXIT(zfsvfs); 2112168404Spjd return (EROFS); 2113168404Spjd } 2114168404Spjd 2115168404Spjd /* 2116168404Spjd * First validate permissions 2117168404Spjd */ 2118168404Spjd 2119168404Spjd if (mask & AT_SIZE) { 2120168404Spjd err = zfs_zaccess(zp, ACE_WRITE_DATA, cr); 2121168404Spjd if (err) { 2122168404Spjd ZFS_EXIT(zfsvfs); 2123168404Spjd return (err); 2124168404Spjd } 2125168404Spjd /* 2126168404Spjd * XXX - Note, we are not providing any open 2127168404Spjd * mode flags here (like FNDELAY), so we may 2128168404Spjd * block if there are locks present... this 2129168404Spjd * should be addressed in openat(). 2130168404Spjd */ 2131168404Spjd do { 2132168404Spjd err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 2133168404Spjd /* NB: we already did dmu_tx_wait() if necessary */ 2134168404Spjd } while (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT); 2135168404Spjd if (err) { 2136168404Spjd ZFS_EXIT(zfsvfs); 2137168404Spjd return (err); 2138168404Spjd } 2139168404Spjd } 2140168404Spjd 2141168404Spjd if (mask & (AT_ATIME|AT_MTIME)) 2142168404Spjd need_policy = zfs_zaccess_v4_perm(zp, ACE_WRITE_ATTRIBUTES, cr); 2143168404Spjd 2144168404Spjd if (mask & (AT_UID|AT_GID)) { 2145168404Spjd int idmask = (mask & (AT_UID|AT_GID)); 2146168404Spjd int take_owner; 2147168404Spjd int take_group; 2148168404Spjd 2149168404Spjd /* 2150168404Spjd * NOTE: even if a new mode is being set, 2151168404Spjd * we may clear S_ISUID/S_ISGID bits. 2152168404Spjd */ 2153168404Spjd 2154168404Spjd if (!(mask & AT_MODE)) 2155168404Spjd vap->va_mode = pzp->zp_mode; 2156168404Spjd 2157168404Spjd /* 2158168404Spjd * Take ownership or chgrp to group we are a member of 2159168404Spjd */ 2160168404Spjd 2161168404Spjd take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 2162168404Spjd take_group = (mask & AT_GID) && groupmember(vap->va_gid, cr); 2163168404Spjd 2164168404Spjd /* 2165168404Spjd * If both AT_UID and AT_GID are set then take_owner and 2166168404Spjd * take_group must both be set in order to allow taking 2167168404Spjd * ownership. 2168168404Spjd * 2169168404Spjd * Otherwise, send the check through secpolicy_vnode_setattr() 2170168404Spjd * 2171168404Spjd */ 2172168404Spjd 2173168404Spjd if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2174168404Spjd ((idmask == AT_UID) && take_owner) || 2175168404Spjd ((idmask == AT_GID) && take_group)) { 2176168404Spjd if (zfs_zaccess_v4_perm(zp, ACE_WRITE_OWNER, cr) == 0) { 2177168404Spjd /* 2178168404Spjd * Remove setuid/setgid for non-privileged users 2179168404Spjd */ 2180168404Spjd secpolicy_setid_clear(vap, cr); 2181168404Spjd trim_mask = (mask & (AT_UID|AT_GID)); 2182168404Spjd } else { 2183168404Spjd need_policy = TRUE; 2184168404Spjd } 2185168404Spjd } else { 2186168404Spjd need_policy = TRUE; 2187168404Spjd } 2188168404Spjd } 2189168404Spjd 2190168404Spjd mutex_enter(&zp->z_lock); 2191168404Spjd oldva.va_mode = pzp->zp_mode; 2192168404Spjd oldva.va_uid = zp->z_phys->zp_uid; 2193168404Spjd oldva.va_gid = zp->z_phys->zp_gid; 2194168404Spjd mutex_exit(&zp->z_lock); 2195168404Spjd 2196168404Spjd if (mask & AT_MODE) { 2197168404Spjd if (zfs_zaccess_v4_perm(zp, ACE_WRITE_ACL, cr) == 0) { 2198168962Spjd err = secpolicy_setid_setsticky_clear(vp, vap, 2199168962Spjd &oldva, cr); 2200168962Spjd if (err) { 2201168962Spjd ZFS_EXIT(zfsvfs); 2202168962Spjd return (err); 2203168962Spjd } 2204168404Spjd trim_mask |= AT_MODE; 2205168404Spjd } else { 2206168404Spjd need_policy = TRUE; 2207168404Spjd } 2208168404Spjd } 2209168404Spjd 2210168404Spjd if (need_policy) { 2211168404Spjd /* 2212168404Spjd * If trim_mask is set then take ownership 2213168404Spjd * has been granted or write_acl is present and user 2214168404Spjd * has the ability to modify mode. In that case remove 2215168404Spjd * UID|GID and or MODE from mask so that 2216168404Spjd * secpolicy_vnode_setattr() doesn't revoke it. 2217168404Spjd */ 2218168404Spjd 2219168404Spjd if (trim_mask) { 2220168404Spjd saved_mask = vap->va_mask; 2221168404Spjd vap->va_mask &= ~trim_mask; 2222168404Spjd 2223168404Spjd } 2224168404Spjd err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 2225168404Spjd (int (*)(void *, int, cred_t *))zfs_zaccess_rwx, zp); 2226168404Spjd if (err) { 2227168404Spjd ZFS_EXIT(zfsvfs); 2228168404Spjd return (err); 2229168404Spjd } 2230168404Spjd 2231168404Spjd if (trim_mask) 2232168404Spjd vap->va_mask |= saved_mask; 2233168404Spjd } 2234168404Spjd 2235168404Spjd /* 2236168404Spjd * secpolicy_vnode_setattr, or take ownership may have 2237168404Spjd * changed va_mask 2238168404Spjd */ 2239168404Spjd mask = vap->va_mask; 2240168404Spjd 2241168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2242168404Spjd dmu_tx_hold_bonus(tx, zp->z_id); 2243168404Spjd 2244168404Spjd if (mask & AT_MODE) { 2245168404Spjd uint64_t pmode = pzp->zp_mode; 2246168404Spjd 2247168404Spjd new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 2248168404Spjd 2249168404Spjd if (zp->z_phys->zp_acl.z_acl_extern_obj) 2250168404Spjd dmu_tx_hold_write(tx, 2251168404Spjd pzp->zp_acl.z_acl_extern_obj, 0, SPA_MAXBLOCKSIZE); 2252168404Spjd else 2253168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2254168404Spjd 0, ZFS_ACL_SIZE(MAX_ACL_SIZE)); 2255168404Spjd } 2256168404Spjd 2257168404Spjd if ((mask & (AT_UID | AT_GID)) && zp->z_phys->zp_xattr != 0) { 2258168404Spjd err = zfs_zget(zp->z_zfsvfs, zp->z_phys->zp_xattr, &attrzp); 2259168404Spjd if (err) { 2260168404Spjd dmu_tx_abort(tx); 2261168404Spjd ZFS_EXIT(zfsvfs); 2262168404Spjd return (err); 2263168404Spjd } 2264168404Spjd dmu_tx_hold_bonus(tx, attrzp->z_id); 2265168404Spjd } 2266168404Spjd 2267168404Spjd err = dmu_tx_assign(tx, zfsvfs->z_assign); 2268168404Spjd if (err) { 2269168404Spjd if (attrzp) 2270168404Spjd VN_RELE(ZTOV(attrzp)); 2271168404Spjd if (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { 2272168404Spjd dmu_tx_wait(tx); 2273168404Spjd dmu_tx_abort(tx); 2274168404Spjd goto top; 2275168404Spjd } 2276168404Spjd dmu_tx_abort(tx); 2277168404Spjd ZFS_EXIT(zfsvfs); 2278168404Spjd return (err); 2279168404Spjd } 2280168404Spjd 2281168404Spjd dmu_buf_will_dirty(zp->z_dbuf, tx); 2282168404Spjd 2283168404Spjd /* 2284168404Spjd * Set each attribute requested. 2285168404Spjd * We group settings according to the locks they need to acquire. 2286168404Spjd * 2287168404Spjd * Note: you cannot set ctime directly, although it will be 2288168404Spjd * updated as a side-effect of calling this function. 2289168404Spjd */ 2290168404Spjd 2291168404Spjd mutex_enter(&zp->z_lock); 2292168404Spjd 2293168404Spjd if (mask & AT_MODE) { 2294168404Spjd err = zfs_acl_chmod_setattr(zp, new_mode, tx); 2295168404Spjd ASSERT3U(err, ==, 0); 2296168404Spjd } 2297168404Spjd 2298168404Spjd if (attrzp) 2299168404Spjd mutex_enter(&attrzp->z_lock); 2300168404Spjd 2301168404Spjd if (mask & AT_UID) { 2302168404Spjd zp->z_phys->zp_uid = (uint64_t)vap->va_uid; 2303168404Spjd if (attrzp) { 2304168404Spjd attrzp->z_phys->zp_uid = (uint64_t)vap->va_uid; 2305168404Spjd } 2306168404Spjd } 2307168404Spjd 2308168404Spjd if (mask & AT_GID) { 2309168404Spjd zp->z_phys->zp_gid = (uint64_t)vap->va_gid; 2310168404Spjd if (attrzp) 2311168404Spjd attrzp->z_phys->zp_gid = (uint64_t)vap->va_gid; 2312168404Spjd } 2313168404Spjd 2314168404Spjd if (attrzp) 2315168404Spjd mutex_exit(&attrzp->z_lock); 2316168404Spjd 2317168404Spjd if (mask & AT_ATIME) 2318168404Spjd ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime); 2319168404Spjd 2320168404Spjd if (mask & AT_MTIME) 2321168404Spjd ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime); 2322168404Spjd 2323168404Spjd if (mask & AT_SIZE) 2324168404Spjd zfs_time_stamper_locked(zp, CONTENT_MODIFIED, tx); 2325168404Spjd else if (mask != 0) 2326168404Spjd zfs_time_stamper_locked(zp, STATE_CHANGED, tx); 2327168404Spjd 2328168404Spjd if (mask != 0) 2329168404Spjd zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask); 2330168404Spjd 2331168404Spjd mutex_exit(&zp->z_lock); 2332168404Spjd 2333168404Spjd if (attrzp) 2334168404Spjd VN_RELE(ZTOV(attrzp)); 2335168404Spjd 2336168404Spjd dmu_tx_commit(tx); 2337168404Spjd 2338168404Spjd ZFS_EXIT(zfsvfs); 2339168404Spjd return (err); 2340168404Spjd} 2341168404Spjd 2342168404Spjdtypedef struct zfs_zlock { 2343168404Spjd krwlock_t *zl_rwlock; /* lock we acquired */ 2344168404Spjd znode_t *zl_znode; /* znode we held */ 2345168404Spjd struct zfs_zlock *zl_next; /* next in list */ 2346168404Spjd} zfs_zlock_t; 2347168404Spjd 2348168404Spjd/* 2349168404Spjd * Drop locks and release vnodes that were held by zfs_rename_lock(). 2350168404Spjd */ 2351168404Spjdstatic void 2352168404Spjdzfs_rename_unlock(zfs_zlock_t **zlpp) 2353168404Spjd{ 2354168404Spjd zfs_zlock_t *zl; 2355168404Spjd 2356168404Spjd while ((zl = *zlpp) != NULL) { 2357168404Spjd if (zl->zl_znode != NULL) 2358168404Spjd VN_RELE(ZTOV(zl->zl_znode)); 2359168404Spjd rw_exit(zl->zl_rwlock); 2360168404Spjd *zlpp = zl->zl_next; 2361168404Spjd kmem_free(zl, sizeof (*zl)); 2362168404Spjd } 2363168404Spjd} 2364168404Spjd 2365168404Spjd/* 2366168404Spjd * Search back through the directory tree, using the ".." entries. 2367168404Spjd * Lock each directory in the chain to prevent concurrent renames. 2368168404Spjd * Fail any attempt to move a directory into one of its own descendants. 2369168404Spjd * XXX - z_parent_lock can overlap with map or grow locks 2370168404Spjd */ 2371168404Spjdstatic int 2372168404Spjdzfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 2373168404Spjd{ 2374168404Spjd zfs_zlock_t *zl; 2375168404Spjd znode_t *zp = tdzp; 2376168404Spjd uint64_t rootid = zp->z_zfsvfs->z_root; 2377168404Spjd uint64_t *oidp = &zp->z_id; 2378168404Spjd krwlock_t *rwlp = &szp->z_parent_lock; 2379168404Spjd krw_t rw = RW_WRITER; 2380168404Spjd 2381168404Spjd /* 2382168404Spjd * First pass write-locks szp and compares to zp->z_id. 2383168404Spjd * Later passes read-lock zp and compare to zp->z_parent. 2384168404Spjd */ 2385168404Spjd do { 2386168404Spjd if (!rw_tryenter(rwlp, rw)) { 2387168404Spjd /* 2388168404Spjd * Another thread is renaming in this path. 2389168404Spjd * Note that if we are a WRITER, we don't have any 2390168404Spjd * parent_locks held yet. 2391168404Spjd */ 2392168404Spjd if (rw == RW_READER && zp->z_id > szp->z_id) { 2393168404Spjd /* 2394168404Spjd * Drop our locks and restart 2395168404Spjd */ 2396168404Spjd zfs_rename_unlock(&zl); 2397168404Spjd *zlpp = NULL; 2398168404Spjd zp = tdzp; 2399168404Spjd oidp = &zp->z_id; 2400168404Spjd rwlp = &szp->z_parent_lock; 2401168404Spjd rw = RW_WRITER; 2402168404Spjd continue; 2403168404Spjd } else { 2404168404Spjd /* 2405168404Spjd * Wait for other thread to drop its locks 2406168404Spjd */ 2407168404Spjd rw_enter(rwlp, rw); 2408168404Spjd } 2409168404Spjd } 2410168404Spjd 2411168404Spjd zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 2412168404Spjd zl->zl_rwlock = rwlp; 2413168404Spjd zl->zl_znode = NULL; 2414168404Spjd zl->zl_next = *zlpp; 2415168404Spjd *zlpp = zl; 2416168404Spjd 2417168404Spjd if (*oidp == szp->z_id) /* We're a descendant of szp */ 2418168404Spjd return (EINVAL); 2419168404Spjd 2420168404Spjd if (*oidp == rootid) /* We've hit the top */ 2421168404Spjd return (0); 2422168404Spjd 2423168404Spjd if (rw == RW_READER) { /* i.e. not the first pass */ 2424168404Spjd int error = zfs_zget(zp->z_zfsvfs, *oidp, &zp); 2425168404Spjd if (error) 2426168404Spjd return (error); 2427168404Spjd zl->zl_znode = zp; 2428168404Spjd } 2429168404Spjd oidp = &zp->z_phys->zp_parent; 2430168404Spjd rwlp = &zp->z_parent_lock; 2431168404Spjd rw = RW_READER; 2432168404Spjd 2433168404Spjd } while (zp->z_id != sdzp->z_id); 2434168404Spjd 2435168404Spjd return (0); 2436168404Spjd} 2437168404Spjd 2438168404Spjd/* 2439168404Spjd * Move an entry from the provided source directory to the target 2440168404Spjd * directory. Change the entry name as indicated. 2441168404Spjd * 2442168404Spjd * IN: sdvp - Source directory containing the "old entry". 2443168404Spjd * snm - Old entry name. 2444168404Spjd * tdvp - Target directory to contain the "new entry". 2445168404Spjd * tnm - New entry name. 2446168404Spjd * cr - credentials of caller. 2447168404Spjd * 2448168404Spjd * RETURN: 0 if success 2449168404Spjd * error code if failure 2450168404Spjd * 2451168404Spjd * Timestamps: 2452168404Spjd * sdvp,tdvp - ctime|mtime updated 2453168404Spjd */ 2454168404Spjdstatic int 2455168962Spjdzfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr) 2456168404Spjd{ 2457168404Spjd znode_t *tdzp, *szp, *tzp; 2458168404Spjd znode_t *sdzp = VTOZ(sdvp); 2459168404Spjd zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 2460168404Spjd zilog_t *zilog = zfsvfs->z_log; 2461168962Spjd vnode_t *realvp; 2462168404Spjd zfs_dirlock_t *sdl, *tdl; 2463168404Spjd dmu_tx_t *tx; 2464168404Spjd zfs_zlock_t *zl; 2465168962Spjd int cmp, serr, terr, error; 2466168404Spjd 2467168404Spjd ZFS_ENTER(zfsvfs); 2468168404Spjd 2469168962Spjd /* 2470168962Spjd * Make sure we have the real vp for the target directory. 2471168962Spjd */ 2472168962Spjd if (VOP_REALVP(tdvp, &realvp) == 0) 2473168962Spjd tdvp = realvp; 2474168962Spjd 2475168404Spjd if (tdvp->v_vfsp != sdvp->v_vfsp) { 2476168404Spjd ZFS_EXIT(zfsvfs); 2477168962Spjd return (EXDEV); 2478168404Spjd } 2479168404Spjd 2480168404Spjd tdzp = VTOZ(tdvp); 2481168404Spjdtop: 2482168404Spjd szp = NULL; 2483168404Spjd tzp = NULL; 2484168404Spjd zl = NULL; 2485168404Spjd 2486168404Spjd /* 2487168404Spjd * This is to prevent the creation of links into attribute space 2488168404Spjd * by renaming a linked file into/outof an attribute directory. 2489168404Spjd * See the comment in zfs_link() for why this is considered bad. 2490168404Spjd */ 2491168404Spjd if ((tdzp->z_phys->zp_flags & ZFS_XATTR) != 2492168404Spjd (sdzp->z_phys->zp_flags & ZFS_XATTR)) { 2493168962Spjd ZFS_EXIT(zfsvfs); 2494168962Spjd return (EINVAL); 2495168404Spjd } 2496168404Spjd 2497168404Spjd /* 2498168404Spjd * Lock source and target directory entries. To prevent deadlock, 2499168404Spjd * a lock ordering must be defined. We lock the directory with 2500168404Spjd * the smallest object id first, or if it's a tie, the one with 2501168404Spjd * the lexically first name. 2502168404Spjd */ 2503168404Spjd if (sdzp->z_id < tdzp->z_id) { 2504168962Spjd cmp = -1; 2505168962Spjd } else if (sdzp->z_id > tdzp->z_id) { 2506168962Spjd cmp = 1; 2507168962Spjd } else { 2508168962Spjd cmp = strcmp(snm, tnm); 2509168962Spjd if (cmp == 0) { 2510168962Spjd /* 2511168962Spjd * POSIX: "If the old argument and the new argument 2512168962Spjd * both refer to links to the same existing file, 2513168962Spjd * the rename() function shall return successfully 2514168962Spjd * and perform no other action." 2515168962Spjd */ 2516168962Spjd ZFS_EXIT(zfsvfs); 2517168962Spjd return (0); 2518168962Spjd } 2519168962Spjd } 2520168962Spjd if (cmp < 0) { 2521168404Spjd serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, ZEXISTS); 2522168404Spjd terr = zfs_dirent_lock(&tdl, tdzp, tnm, &tzp, 0); 2523168962Spjd } else { 2524168404Spjd terr = zfs_dirent_lock(&tdl, tdzp, tnm, &tzp, 0); 2525168404Spjd serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, ZEXISTS); 2526168404Spjd } 2527168404Spjd 2528168962Spjd if (serr) { 2529168404Spjd /* 2530168404Spjd * Source entry invalid or not there. 2531168404Spjd */ 2532168962Spjd if (!terr) { 2533168404Spjd zfs_dirent_unlock(tdl); 2534168962Spjd if (tzp) 2535168962Spjd VN_RELE(ZTOV(tzp)); 2536168962Spjd } 2537168404Spjd if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0) 2538168404Spjd serr = EINVAL; 2539168962Spjd ZFS_EXIT(zfsvfs); 2540168962Spjd return (serr); 2541168404Spjd } 2542168404Spjd if (terr) { 2543168404Spjd zfs_dirent_unlock(sdl); 2544168962Spjd VN_RELE(ZTOV(szp)); 2545168404Spjd if (strcmp(tnm, "..") == 0) 2546168404Spjd terr = EINVAL; 2547168962Spjd ZFS_EXIT(zfsvfs); 2548168962Spjd return (terr); 2549168404Spjd } 2550168404Spjd 2551168404Spjd /* 2552168404Spjd * Must have write access at the source to remove the old entry 2553168404Spjd * and write access at the target to create the new entry. 2554168404Spjd * Note that if target and source are the same, this can be 2555168404Spjd * done in a single check. 2556168404Spjd */ 2557168404Spjd 2558168404Spjd if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 2559168404Spjd goto out; 2560168404Spjd 2561168962Spjd if (ZTOV(szp)->v_type == VDIR) { 2562168404Spjd /* 2563168404Spjd * Check to make sure rename is valid. 2564168404Spjd * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 2565168404Spjd */ 2566168404Spjd if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 2567168404Spjd goto out; 2568168404Spjd } 2569168404Spjd 2570168404Spjd /* 2571168404Spjd * Does target exist? 2572168404Spjd */ 2573168404Spjd if (tzp) { 2574168404Spjd /* 2575168404Spjd * Source and target must be the same type. 2576168404Spjd */ 2577168962Spjd if (ZTOV(szp)->v_type == VDIR) { 2578168962Spjd if (ZTOV(tzp)->v_type != VDIR) { 2579168404Spjd error = ENOTDIR; 2580168404Spjd goto out; 2581168404Spjd } 2582168404Spjd } else { 2583168962Spjd if (ZTOV(tzp)->v_type == VDIR) { 2584168404Spjd error = EISDIR; 2585168404Spjd goto out; 2586168404Spjd } 2587168404Spjd } 2588168404Spjd /* 2589168404Spjd * POSIX dictates that when the source and target 2590168404Spjd * entries refer to the same file object, rename 2591168404Spjd * must do nothing and exit without error. 2592168404Spjd */ 2593168404Spjd if (szp->z_id == tzp->z_id) { 2594168404Spjd error = 0; 2595168404Spjd goto out; 2596168404Spjd } 2597168404Spjd } 2598168404Spjd 2599168962Spjd vnevent_rename_src(ZTOV(szp)); 2600168962Spjd if (tzp) 2601168962Spjd vnevent_rename_dest(ZTOV(tzp)); 2602168962Spjd 2603168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2604168404Spjd dmu_tx_hold_bonus(tx, szp->z_id); /* nlink changes */ 2605168404Spjd dmu_tx_hold_bonus(tx, sdzp->z_id); /* nlink changes */ 2606168404Spjd dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 2607168404Spjd dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 2608168404Spjd if (sdzp != tdzp) 2609168404Spjd dmu_tx_hold_bonus(tx, tdzp->z_id); /* nlink changes */ 2610168404Spjd if (tzp) 2611168404Spjd dmu_tx_hold_bonus(tx, tzp->z_id); /* parent changes */ 2612168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2613168404Spjd error = dmu_tx_assign(tx, zfsvfs->z_assign); 2614168404Spjd if (error) { 2615168404Spjd if (zl != NULL) 2616168404Spjd zfs_rename_unlock(&zl); 2617168404Spjd zfs_dirent_unlock(sdl); 2618168404Spjd zfs_dirent_unlock(tdl); 2619168962Spjd VN_RELE(ZTOV(szp)); 2620168962Spjd if (tzp) 2621168962Spjd VN_RELE(ZTOV(tzp)); 2622168404Spjd if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { 2623168404Spjd dmu_tx_wait(tx); 2624168404Spjd dmu_tx_abort(tx); 2625168404Spjd goto top; 2626168404Spjd } 2627168404Spjd dmu_tx_abort(tx); 2628168962Spjd ZFS_EXIT(zfsvfs); 2629168962Spjd return (error); 2630168404Spjd } 2631168404Spjd 2632168404Spjd if (tzp) /* Attempt to remove the existing target */ 2633168404Spjd error = zfs_link_destroy(tdl, tzp, tx, 0, NULL); 2634168404Spjd 2635168404Spjd if (error == 0) { 2636168404Spjd error = zfs_link_create(tdl, szp, tx, ZRENAMING); 2637168404Spjd if (error == 0) { 2638168404Spjd error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 2639168404Spjd ASSERT(error == 0); 2640168404Spjd zfs_log_rename(zilog, tx, TX_RENAME, sdzp, 2641168404Spjd sdl->dl_name, tdzp, tdl->dl_name, szp); 2642168404Spjd } 2643168404Spjd#ifdef FREEBSD_NAMECACHE 2644168404Spjd if (error == 0) { 2645168404Spjd cache_purge(sdvp); 2646168404Spjd cache_purge(tdvp); 2647168404Spjd } 2648168404Spjd#endif 2649168404Spjd } 2650168404Spjd 2651168404Spjd dmu_tx_commit(tx); 2652168404Spjdout: 2653168404Spjd if (zl != NULL) 2654168404Spjd zfs_rename_unlock(&zl); 2655168404Spjd 2656168404Spjd zfs_dirent_unlock(sdl); 2657168404Spjd zfs_dirent_unlock(tdl); 2658168404Spjd 2659168962Spjd VN_RELE(ZTOV(szp)); 2660168404Spjd if (tzp) 2661168962Spjd VN_RELE(ZTOV(tzp)); 2662168404Spjd 2663168404Spjd ZFS_EXIT(zfsvfs); 2664168404Spjd 2665168404Spjd return (error); 2666168404Spjd} 2667168404Spjd 2668168404Spjd/* 2669168404Spjd * Insert the indicated symbolic reference entry into the directory. 2670168404Spjd * 2671168404Spjd * IN: dvp - Directory to contain new symbolic link. 2672168404Spjd * link - Name for new symlink entry. 2673168404Spjd * vap - Attributes of new entry. 2674168404Spjd * target - Target path of new symlink. 2675168404Spjd * cr - credentials of caller. 2676168404Spjd * 2677168404Spjd * RETURN: 0 if success 2678168404Spjd * error code if failure 2679168404Spjd * 2680168404Spjd * Timestamps: 2681168404Spjd * dvp - ctime|mtime updated 2682168404Spjd */ 2683168404Spjdstatic int 2684168962Spjdzfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, cred_t *cr, kthread_t *td) 2685168404Spjd{ 2686168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 2687168404Spjd zfs_dirlock_t *dl; 2688168404Spjd dmu_tx_t *tx; 2689168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2690168404Spjd zilog_t *zilog = zfsvfs->z_log; 2691168404Spjd uint64_t zoid; 2692168404Spjd int len = strlen(link); 2693168404Spjd int error; 2694168404Spjd 2695168962Spjd ASSERT(vap->va_type == VLNK); 2696168404Spjd 2697168404Spjd ZFS_ENTER(zfsvfs); 2698168404Spjdtop: 2699168404Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, cr)) { 2700168404Spjd ZFS_EXIT(zfsvfs); 2701168404Spjd return (error); 2702168404Spjd } 2703168404Spjd 2704168404Spjd if (len > MAXPATHLEN) { 2705168404Spjd ZFS_EXIT(zfsvfs); 2706168404Spjd return (ENAMETOOLONG); 2707168404Spjd } 2708168404Spjd 2709168404Spjd /* 2710168404Spjd * Attempt to lock directory; fail if entry already exists. 2711168404Spjd */ 2712168404Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, ZNEW)) { 2713168404Spjd ZFS_EXIT(zfsvfs); 2714168404Spjd return (error); 2715168404Spjd } 2716168404Spjd 2717168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2718168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 2719168404Spjd dmu_tx_hold_bonus(tx, dzp->z_id); 2720168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 2721168404Spjd if (dzp->z_phys->zp_flags & ZFS_INHERIT_ACE) 2722168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE); 2723168404Spjd error = dmu_tx_assign(tx, zfsvfs->z_assign); 2724168404Spjd if (error) { 2725168404Spjd zfs_dirent_unlock(dl); 2726168404Spjd if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { 2727168404Spjd dmu_tx_wait(tx); 2728168404Spjd dmu_tx_abort(tx); 2729168404Spjd goto top; 2730168404Spjd } 2731168404Spjd dmu_tx_abort(tx); 2732168404Spjd ZFS_EXIT(zfsvfs); 2733168404Spjd return (error); 2734168404Spjd } 2735168404Spjd 2736168404Spjd dmu_buf_will_dirty(dzp->z_dbuf, tx); 2737168404Spjd 2738168404Spjd /* 2739168404Spjd * Create a new object for the symlink. 2740168404Spjd * Put the link content into bonus buffer if it will fit; 2741168404Spjd * otherwise, store it just like any other file data. 2742168404Spjd */ 2743168404Spjd zoid = 0; 2744168404Spjd if (sizeof (znode_phys_t) + len <= dmu_bonus_max()) { 2745168404Spjd zfs_mknode(dzp, vap, &zoid, tx, cr, 0, &zp, len); 2746168404Spjd if (len != 0) 2747168404Spjd bcopy(link, zp->z_phys + 1, len); 2748168404Spjd } else { 2749168404Spjd dmu_buf_t *dbp; 2750168404Spjd 2751168404Spjd zfs_mknode(dzp, vap, &zoid, tx, cr, 0, &zp, 0); 2752168404Spjd 2753168404Spjd /* 2754168404Spjd * Nothing can access the znode yet so no locking needed 2755168404Spjd * for growing the znode's blocksize. 2756168404Spjd */ 2757168404Spjd zfs_grow_blocksize(zp, len, tx); 2758168404Spjd 2759168404Spjd VERIFY(0 == dmu_buf_hold(zfsvfs->z_os, zoid, 0, FTAG, &dbp)); 2760168404Spjd dmu_buf_will_dirty(dbp, tx); 2761168404Spjd 2762168404Spjd ASSERT3U(len, <=, dbp->db_size); 2763168404Spjd bcopy(link, dbp->db_data, len); 2764168404Spjd dmu_buf_rele(dbp, FTAG); 2765168404Spjd } 2766168404Spjd zp->z_phys->zp_size = len; 2767168404Spjd 2768168404Spjd /* 2769168404Spjd * Insert the new object into the directory. 2770168404Spjd */ 2771168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 2772168962Spjdout: 2773168404Spjd if (error == 0) { 2774168404Spjd zfs_log_symlink(zilog, tx, TX_SYMLINK, dzp, zp, name, link); 2775168962Spjd *vpp = ZTOV(zp); 2776175202Sattilio vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 2777168404Spjd } 2778168404Spjd 2779168404Spjd dmu_tx_commit(tx); 2780168404Spjd 2781168404Spjd zfs_dirent_unlock(dl); 2782168404Spjd 2783168404Spjd ZFS_EXIT(zfsvfs); 2784168404Spjd return (error); 2785168404Spjd} 2786168404Spjd 2787168404Spjd/* 2788168404Spjd * Return, in the buffer contained in the provided uio structure, 2789168404Spjd * the symbolic path referred to by vp. 2790168404Spjd * 2791168404Spjd * IN: vp - vnode of symbolic link. 2792168404Spjd * uoip - structure to contain the link path. 2793168404Spjd * cr - credentials of caller. 2794168404Spjd * 2795168404Spjd * OUT: uio - structure to contain the link path. 2796168404Spjd * 2797168404Spjd * RETURN: 0 if success 2798168404Spjd * error code if failure 2799168404Spjd * 2800168404Spjd * Timestamps: 2801168404Spjd * vp - atime updated 2802168404Spjd */ 2803168404Spjd/* ARGSUSED */ 2804168404Spjdstatic int 2805168962Spjdzfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr) 2806168404Spjd{ 2807168404Spjd znode_t *zp = VTOZ(vp); 2808168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2809168404Spjd size_t bufsz; 2810168404Spjd int error; 2811168404Spjd 2812168404Spjd ZFS_ENTER(zfsvfs); 2813168404Spjd 2814168404Spjd bufsz = (size_t)zp->z_phys->zp_size; 2815168404Spjd if (bufsz + sizeof (znode_phys_t) <= zp->z_dbuf->db_size) { 2816168404Spjd error = uiomove(zp->z_phys + 1, 2817168404Spjd MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); 2818168404Spjd } else { 2819168404Spjd dmu_buf_t *dbp; 2820168404Spjd error = dmu_buf_hold(zfsvfs->z_os, zp->z_id, 0, FTAG, &dbp); 2821168404Spjd if (error) { 2822168404Spjd ZFS_EXIT(zfsvfs); 2823168404Spjd return (error); 2824168404Spjd } 2825168404Spjd error = uiomove(dbp->db_data, 2826168404Spjd MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); 2827168404Spjd dmu_buf_rele(dbp, FTAG); 2828168404Spjd } 2829168404Spjd 2830168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2831168404Spjd ZFS_EXIT(zfsvfs); 2832168404Spjd return (error); 2833168404Spjd} 2834168404Spjd 2835168404Spjd/* 2836168404Spjd * Insert a new entry into directory tdvp referencing svp. 2837168404Spjd * 2838168404Spjd * IN: tdvp - Directory to contain new entry. 2839168404Spjd * svp - vnode of new entry. 2840168404Spjd * name - name of new entry. 2841168404Spjd * cr - credentials of caller. 2842168404Spjd * 2843168404Spjd * RETURN: 0 if success 2844168404Spjd * error code if failure 2845168404Spjd * 2846168404Spjd * Timestamps: 2847168404Spjd * tdvp - ctime|mtime updated 2848168404Spjd * svp - ctime updated 2849168404Spjd */ 2850168404Spjd/* ARGSUSED */ 2851168404Spjdstatic int 2852168962Spjdzfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr) 2853168404Spjd{ 2854168404Spjd znode_t *dzp = VTOZ(tdvp); 2855168404Spjd znode_t *tzp, *szp; 2856168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2857168404Spjd zilog_t *zilog = zfsvfs->z_log; 2858168404Spjd zfs_dirlock_t *dl; 2859168404Spjd dmu_tx_t *tx; 2860168962Spjd vnode_t *realvp; 2861168404Spjd int error; 2862168404Spjd 2863168404Spjd ASSERT(tdvp->v_type == VDIR); 2864168404Spjd 2865168404Spjd ZFS_ENTER(zfsvfs); 2866168404Spjd 2867168962Spjd if (VOP_REALVP(svp, &realvp) == 0) 2868168962Spjd svp = realvp; 2869168962Spjd 2870168404Spjd if (svp->v_vfsp != tdvp->v_vfsp) { 2871168404Spjd ZFS_EXIT(zfsvfs); 2872168404Spjd return (EXDEV); 2873168404Spjd } 2874168404Spjd 2875168404Spjd szp = VTOZ(svp); 2876168404Spjdtop: 2877168404Spjd /* 2878168404Spjd * We do not support links between attributes and non-attributes 2879168404Spjd * because of the potential security risk of creating links 2880168404Spjd * into "normal" file space in order to circumvent restrictions 2881168404Spjd * imposed in attribute space. 2882168404Spjd */ 2883168404Spjd if ((szp->z_phys->zp_flags & ZFS_XATTR) != 2884168404Spjd (dzp->z_phys->zp_flags & ZFS_XATTR)) { 2885168404Spjd ZFS_EXIT(zfsvfs); 2886168404Spjd return (EINVAL); 2887168404Spjd } 2888168404Spjd 2889168404Spjd /* 2890168404Spjd * POSIX dictates that we return EPERM here. 2891168404Spjd * Better choices include ENOTSUP or EISDIR. 2892168404Spjd */ 2893168404Spjd if (svp->v_type == VDIR) { 2894168404Spjd ZFS_EXIT(zfsvfs); 2895168404Spjd return (EPERM); 2896168404Spjd } 2897168404Spjd 2898168404Spjd if ((uid_t)szp->z_phys->zp_uid != crgetuid(cr) && 2899168404Spjd secpolicy_basic_link(cr) != 0) { 2900168404Spjd ZFS_EXIT(zfsvfs); 2901168404Spjd return (EPERM); 2902168404Spjd } 2903168404Spjd 2904168404Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, cr)) { 2905168404Spjd ZFS_EXIT(zfsvfs); 2906168404Spjd return (error); 2907168404Spjd } 2908168404Spjd 2909168404Spjd /* 2910168404Spjd * Attempt to lock directory; fail if entry already exists. 2911168404Spjd */ 2912168404Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &tzp, ZNEW)) { 2913168404Spjd ZFS_EXIT(zfsvfs); 2914168404Spjd return (error); 2915168404Spjd } 2916168404Spjd 2917168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2918168404Spjd dmu_tx_hold_bonus(tx, szp->z_id); 2919168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 2920168404Spjd error = dmu_tx_assign(tx, zfsvfs->z_assign); 2921168404Spjd if (error) { 2922168404Spjd zfs_dirent_unlock(dl); 2923168404Spjd if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { 2924168404Spjd dmu_tx_wait(tx); 2925168404Spjd dmu_tx_abort(tx); 2926168404Spjd goto top; 2927168404Spjd } 2928168404Spjd dmu_tx_abort(tx); 2929168404Spjd ZFS_EXIT(zfsvfs); 2930168404Spjd return (error); 2931168404Spjd } 2932168404Spjd 2933168404Spjd error = zfs_link_create(dl, szp, tx, 0); 2934168404Spjd 2935168404Spjd if (error == 0) 2936168404Spjd zfs_log_link(zilog, tx, TX_LINK, dzp, szp, name); 2937168404Spjd 2938168404Spjd dmu_tx_commit(tx); 2939168404Spjd 2940168404Spjd zfs_dirent_unlock(dl); 2941168404Spjd 2942168404Spjd ZFS_EXIT(zfsvfs); 2943168404Spjd return (error); 2944168404Spjd} 2945168404Spjd 2946168962Spjdvoid 2947168962Spjdzfs_inactive(vnode_t *vp, cred_t *cr) 2948168404Spjd{ 2949168962Spjd znode_t *zp = VTOZ(vp); 2950168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2951168962Spjd int error; 2952168404Spjd 2953168404Spjd rw_enter(&zfsvfs->z_um_lock, RW_READER); 2954168404Spjd if (zfsvfs->z_unmounted2) { 2955168404Spjd ASSERT(zp->z_dbuf_held == 0); 2956168404Spjd 2957168404Spjd mutex_enter(&zp->z_lock); 2958168404Spjd VI_LOCK(vp); 2959168404Spjd vp->v_count = 0; /* count arrives as 1 */ 2960168404Spjd VI_UNLOCK(vp); 2961168404Spjd if (zp->z_dbuf == NULL) { 2962168404Spjd mutex_exit(&zp->z_lock); 2963168404Spjd zfs_znode_free(zp); 2964168404Spjd } else { 2965168404Spjd mutex_exit(&zp->z_lock); 2966168404Spjd } 2967168404Spjd rw_exit(&zfsvfs->z_um_lock); 2968168404Spjd VFS_RELE(zfsvfs->z_vfs); 2969168962Spjd return; 2970168404Spjd } 2971168404Spjd 2972168404Spjd if (zp->z_atime_dirty && zp->z_unlinked == 0) { 2973168404Spjd dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 2974168404Spjd 2975168404Spjd dmu_tx_hold_bonus(tx, zp->z_id); 2976168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 2977168404Spjd if (error) { 2978168404Spjd dmu_tx_abort(tx); 2979168404Spjd } else { 2980168404Spjd dmu_buf_will_dirty(zp->z_dbuf, tx); 2981168404Spjd mutex_enter(&zp->z_lock); 2982168404Spjd zp->z_atime_dirty = 0; 2983168404Spjd mutex_exit(&zp->z_lock); 2984168404Spjd dmu_tx_commit(tx); 2985168404Spjd } 2986168404Spjd } 2987168404Spjd 2988168404Spjd zfs_zinactive(zp); 2989168404Spjd rw_exit(&zfsvfs->z_um_lock); 2990168404Spjd} 2991168404Spjd 2992168404SpjdCTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 2993168404SpjdCTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 2994168404Spjd 2995168404Spjdstatic int 2996168962Spjdzfs_fid(vnode_t *vp, fid_t *fidp) 2997168404Spjd{ 2998168404Spjd znode_t *zp = VTOZ(vp); 2999168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3000168404Spjd uint32_t gen = (uint32_t)zp->z_phys->zp_gen; 3001168404Spjd uint64_t object = zp->z_id; 3002168404Spjd zfid_short_t *zfid; 3003168404Spjd int size, i; 3004168404Spjd 3005168404Spjd ZFS_ENTER(zfsvfs); 3006168404Spjd 3007168404Spjd size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 3008168404Spjd fidp->fid_len = size; 3009168404Spjd 3010168404Spjd zfid = (zfid_short_t *)fidp; 3011168404Spjd 3012168404Spjd zfid->zf_len = size; 3013168404Spjd 3014168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 3015168404Spjd zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 3016168404Spjd 3017168404Spjd /* Must have a non-zero generation number to distinguish from .zfs */ 3018168404Spjd if (gen == 0) 3019168404Spjd gen = 1; 3020168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 3021168404Spjd zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 3022168404Spjd 3023168404Spjd if (size == LONG_FID_LEN) { 3024168404Spjd uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 3025169023Spjd zfid_long_t *zlfid; 3026168404Spjd 3027168404Spjd zlfid = (zfid_long_t *)fidp; 3028168404Spjd 3029168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 3030168404Spjd zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 3031168404Spjd 3032168404Spjd /* XXX - this should be the generation number for the objset */ 3033168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 3034168404Spjd zlfid->zf_setgen[i] = 0; 3035168404Spjd } 3036168404Spjd 3037168404Spjd ZFS_EXIT(zfsvfs); 3038168404Spjd return (0); 3039168404Spjd} 3040168404Spjd 3041168404Spjdstatic int 3042168962Spjdzfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr) 3043168404Spjd{ 3044168404Spjd znode_t *zp, *xzp; 3045168404Spjd zfsvfs_t *zfsvfs; 3046168404Spjd zfs_dirlock_t *dl; 3047168404Spjd int error; 3048168404Spjd 3049168404Spjd switch (cmd) { 3050168404Spjd case _PC_LINK_MAX: 3051168404Spjd *valp = INT_MAX; 3052168404Spjd return (0); 3053168404Spjd 3054168404Spjd case _PC_FILESIZEBITS: 3055168404Spjd *valp = 64; 3056168404Spjd return (0); 3057168404Spjd 3058168404Spjd#if 0 3059168404Spjd case _PC_XATTR_EXISTS: 3060168404Spjd zp = VTOZ(vp); 3061168404Spjd zfsvfs = zp->z_zfsvfs; 3062168404Spjd ZFS_ENTER(zfsvfs); 3063168404Spjd *valp = 0; 3064168404Spjd error = zfs_dirent_lock(&dl, zp, "", &xzp, 3065168404Spjd ZXATTR | ZEXISTS | ZSHARED); 3066168404Spjd if (error == 0) { 3067168404Spjd zfs_dirent_unlock(dl); 3068168404Spjd if (!zfs_dirempty(xzp)) 3069168404Spjd *valp = 1; 3070168404Spjd VN_RELE(ZTOV(xzp)); 3071168404Spjd } else if (error == ENOENT) { 3072168404Spjd /* 3073168404Spjd * If there aren't extended attributes, it's the 3074168404Spjd * same as having zero of them. 3075168404Spjd */ 3076168404Spjd error = 0; 3077168404Spjd } 3078168404Spjd ZFS_EXIT(zfsvfs); 3079168404Spjd return (error); 3080168404Spjd#endif 3081168404Spjd 3082168404Spjd case _PC_ACL_EXTENDED: 3083168404Spjd *valp = 0; /* TODO */ 3084168404Spjd return (0); 3085168404Spjd 3086168404Spjd case _PC_MIN_HOLE_SIZE: 3087168404Spjd *valp = (int)SPA_MINBLOCKSIZE; 3088168404Spjd return (0); 3089168404Spjd 3090168404Spjd default: 3091168962Spjd return (EOPNOTSUPP); 3092168404Spjd } 3093168404Spjd} 3094168404Spjd 3095168404Spjd#ifdef TODO 3096168404Spjd/*ARGSUSED*/ 3097168404Spjdstatic int 3098168404Spjdzfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr) 3099168404Spjd{ 3100168404Spjd znode_t *zp = VTOZ(vp); 3101168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3102168404Spjd int error; 3103168404Spjd 3104168404Spjd ZFS_ENTER(zfsvfs); 3105168404Spjd error = zfs_getacl(zp, vsecp, cr); 3106168404Spjd ZFS_EXIT(zfsvfs); 3107168404Spjd 3108168404Spjd return (error); 3109168404Spjd} 3110168404Spjd#endif /* TODO */ 3111168404Spjd 3112168404Spjd#ifdef TODO 3113168404Spjd/*ARGSUSED*/ 3114168404Spjdstatic int 3115168404Spjdzfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr) 3116168404Spjd{ 3117168404Spjd znode_t *zp = VTOZ(vp); 3118168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3119168404Spjd int error; 3120168404Spjd 3121168404Spjd ZFS_ENTER(zfsvfs); 3122168404Spjd error = zfs_setacl(zp, vsecp, cr); 3123168404Spjd ZFS_EXIT(zfsvfs); 3124168404Spjd return (error); 3125168404Spjd} 3126168404Spjd#endif /* TODO */ 3127168404Spjd 3128168962Spjdstatic int 3129168962Spjdzfs_freebsd_open(ap) 3130168962Spjd struct vop_open_args /* { 3131168962Spjd struct vnode *a_vp; 3132168962Spjd int a_mode; 3133168962Spjd struct ucred *a_cred; 3134168962Spjd struct thread *a_td; 3135168962Spjd } */ *ap; 3136168962Spjd{ 3137168962Spjd vnode_t *vp = ap->a_vp; 3138168962Spjd znode_t *zp = VTOZ(vp); 3139168962Spjd int error; 3140168962Spjd 3141168962Spjd error = zfs_open(&vp, ap->a_mode, ap->a_cred); 3142168962Spjd if (error == 0) 3143168962Spjd vnode_create_vobject(vp, zp->z_phys->zp_size, ap->a_td); 3144168962Spjd return (error); 3145168962Spjd} 3146168962Spjd 3147168962Spjdstatic int 3148168962Spjdzfs_freebsd_close(ap) 3149168962Spjd struct vop_close_args /* { 3150168962Spjd struct vnode *a_vp; 3151168962Spjd int a_fflag; 3152168962Spjd struct ucred *a_cred; 3153168962Spjd struct thread *a_td; 3154168962Spjd } */ *ap; 3155168962Spjd{ 3156168962Spjd 3157168962Spjd return (zfs_close(ap->a_vp, ap->a_fflag, 0, 0, ap->a_cred)); 3158168962Spjd} 3159168962Spjd 3160168962Spjdstatic int 3161168962Spjdzfs_freebsd_ioctl(ap) 3162168962Spjd struct vop_ioctl_args /* { 3163168962Spjd struct vnode *a_vp; 3164168962Spjd u_long a_command; 3165168962Spjd caddr_t a_data; 3166168962Spjd int a_fflag; 3167168962Spjd struct ucred *cred; 3168168962Spjd struct thread *td; 3169168962Spjd } */ *ap; 3170168962Spjd{ 3171168962Spjd 3172168978Spjd return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 3173168962Spjd ap->a_fflag, ap->a_cred, NULL)); 3174168962Spjd} 3175168962Spjd 3176168962Spjdstatic int 3177168962Spjdzfs_freebsd_read(ap) 3178168962Spjd struct vop_read_args /* { 3179168962Spjd struct vnode *a_vp; 3180168962Spjd struct uio *a_uio; 3181168962Spjd int a_ioflag; 3182168962Spjd struct ucred *a_cred; 3183168962Spjd } */ *ap; 3184168962Spjd{ 3185168962Spjd 3186168962Spjd return (zfs_read(ap->a_vp, ap->a_uio, ap->a_ioflag, ap->a_cred, NULL)); 3187168962Spjd} 3188168962Spjd 3189168962Spjdstatic int 3190168962Spjdzfs_freebsd_write(ap) 3191168962Spjd struct vop_write_args /* { 3192168962Spjd struct vnode *a_vp; 3193168962Spjd struct uio *a_uio; 3194168962Spjd int a_ioflag; 3195168962Spjd struct ucred *a_cred; 3196168962Spjd } */ *ap; 3197168962Spjd{ 3198168962Spjd 3199168962Spjd return (zfs_write(ap->a_vp, ap->a_uio, ap->a_ioflag, ap->a_cred, NULL)); 3200168962Spjd} 3201168962Spjd 3202168962Spjdstatic int 3203168962Spjdzfs_freebsd_access(ap) 3204168962Spjd struct vop_access_args /* { 3205168962Spjd struct vnode *a_vp; 3206168962Spjd int a_mode; 3207168962Spjd struct ucred *a_cred; 3208168962Spjd struct thread *a_td; 3209168962Spjd } */ *ap; 3210168962Spjd{ 3211168962Spjd 3212168962Spjd return (zfs_access(ap->a_vp, ap->a_mode, 0, ap->a_cred)); 3213168962Spjd} 3214168962Spjd 3215168962Spjdstatic int 3216168962Spjdzfs_freebsd_lookup(ap) 3217168962Spjd struct vop_lookup_args /* { 3218168962Spjd struct vnode *a_dvp; 3219168962Spjd struct vnode **a_vpp; 3220168962Spjd struct componentname *a_cnp; 3221168962Spjd } */ *ap; 3222168962Spjd{ 3223168962Spjd struct componentname *cnp = ap->a_cnp; 3224168962Spjd char nm[NAME_MAX + 1]; 3225168962Spjd 3226168962Spjd ASSERT(cnp->cn_namelen < sizeof(nm)); 3227168962Spjd strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 3228168962Spjd 3229168962Spjd return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 3230168962Spjd cnp->cn_cred, cnp->cn_thread)); 3231168962Spjd} 3232168962Spjd 3233168962Spjdstatic int 3234168962Spjdzfs_freebsd_create(ap) 3235168962Spjd struct vop_create_args /* { 3236168962Spjd struct vnode *a_dvp; 3237168962Spjd struct vnode **a_vpp; 3238168962Spjd struct componentname *a_cnp; 3239168962Spjd struct vattr *a_vap; 3240168962Spjd } */ *ap; 3241168962Spjd{ 3242168962Spjd struct componentname *cnp = ap->a_cnp; 3243168962Spjd vattr_t *vap = ap->a_vap; 3244168962Spjd int mode; 3245168962Spjd 3246168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 3247168962Spjd 3248168962Spjd vattr_init_mask(vap); 3249168962Spjd mode = vap->va_mode & ALLPERMS; 3250168962Spjd 3251168962Spjd return (zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 3252176559Sattilio ap->a_vpp, cnp->cn_cred)); 3253168962Spjd} 3254168962Spjd 3255168962Spjdstatic int 3256168962Spjdzfs_freebsd_remove(ap) 3257168962Spjd struct vop_remove_args /* { 3258168962Spjd struct vnode *a_dvp; 3259168962Spjd struct vnode *a_vp; 3260168962Spjd struct componentname *a_cnp; 3261168962Spjd } */ *ap; 3262168962Spjd{ 3263168962Spjd 3264168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 3265168962Spjd 3266168962Spjd return (zfs_remove(ap->a_dvp, ap->a_cnp->cn_nameptr, 3267168962Spjd ap->a_cnp->cn_cred)); 3268168962Spjd} 3269168962Spjd 3270168962Spjdstatic int 3271168962Spjdzfs_freebsd_mkdir(ap) 3272168962Spjd struct vop_mkdir_args /* { 3273168962Spjd struct vnode *a_dvp; 3274168962Spjd struct vnode **a_vpp; 3275168962Spjd struct componentname *a_cnp; 3276168962Spjd struct vattr *a_vap; 3277168962Spjd } */ *ap; 3278168962Spjd{ 3279168962Spjd vattr_t *vap = ap->a_vap; 3280168962Spjd 3281168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 3282168962Spjd 3283168962Spjd vattr_init_mask(vap); 3284168962Spjd 3285168962Spjd return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 3286168962Spjd ap->a_cnp->cn_cred)); 3287168962Spjd} 3288168962Spjd 3289168962Spjdstatic int 3290168962Spjdzfs_freebsd_rmdir(ap) 3291168962Spjd struct vop_rmdir_args /* { 3292168962Spjd struct vnode *a_dvp; 3293168962Spjd struct vnode *a_vp; 3294168962Spjd struct componentname *a_cnp; 3295168962Spjd } */ *ap; 3296168962Spjd{ 3297168962Spjd struct componentname *cnp = ap->a_cnp; 3298168962Spjd 3299168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 3300168962Spjd 3301168962Spjd return (zfs_rmdir(ap->a_dvp, cnp->cn_nameptr, NULL, cnp->cn_cred)); 3302168962Spjd} 3303168962Spjd 3304168962Spjdstatic int 3305168962Spjdzfs_freebsd_readdir(ap) 3306168962Spjd struct vop_readdir_args /* { 3307168962Spjd struct vnode *a_vp; 3308168962Spjd struct uio *a_uio; 3309168962Spjd struct ucred *a_cred; 3310168962Spjd int *a_eofflag; 3311168962Spjd int *a_ncookies; 3312168962Spjd u_long **a_cookies; 3313168962Spjd } */ *ap; 3314168962Spjd{ 3315168962Spjd 3316168962Spjd return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 3317168962Spjd ap->a_ncookies, ap->a_cookies)); 3318168962Spjd} 3319168962Spjd 3320168962Spjdstatic int 3321168962Spjdzfs_freebsd_fsync(ap) 3322168962Spjd struct vop_fsync_args /* { 3323168962Spjd struct vnode *a_vp; 3324168962Spjd int a_waitfor; 3325168962Spjd struct thread *a_td; 3326168962Spjd } */ *ap; 3327168962Spjd{ 3328168962Spjd 3329168962Spjd vop_stdfsync(ap); 3330168962Spjd return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred)); 3331168962Spjd} 3332168962Spjd 3333168962Spjdstatic int 3334168962Spjdzfs_freebsd_getattr(ap) 3335168962Spjd struct vop_getattr_args /* { 3336168962Spjd struct vnode *a_vp; 3337168962Spjd struct vattr *a_vap; 3338168962Spjd struct ucred *a_cred; 3339168962Spjd struct thread *a_td; 3340168962Spjd } */ *ap; 3341168962Spjd{ 3342168962Spjd 3343168962Spjd return (zfs_getattr(ap->a_vp, ap->a_vap, 0, ap->a_cred)); 3344168962Spjd} 3345168962Spjd 3346168962Spjdstatic int 3347168962Spjdzfs_freebsd_setattr(ap) 3348168962Spjd struct vop_setattr_args /* { 3349168962Spjd struct vnode *a_vp; 3350168962Spjd struct vattr *a_vap; 3351168962Spjd struct ucred *a_cred; 3352168962Spjd struct thread *a_td; 3353168962Spjd } */ *ap; 3354168962Spjd{ 3355168962Spjd vattr_t *vap = ap->a_vap; 3356168962Spjd 3357168962Spjd /* No support for FreeBSD's chflags(2). */ 3358168962Spjd if (vap->va_flags != VNOVAL) 3359168962Spjd return (EOPNOTSUPP); 3360168962Spjd 3361168962Spjd vattr_init_mask(vap); 3362170044Spjd vap->va_mask &= ~AT_NOSET; 3363168962Spjd 3364168962Spjd return (zfs_setattr(ap->a_vp, vap, 0, ap->a_cred, NULL)); 3365168962Spjd} 3366168962Spjd 3367168962Spjdstatic int 3368168962Spjdzfs_freebsd_rename(ap) 3369168962Spjd struct vop_rename_args /* { 3370168962Spjd struct vnode *a_fdvp; 3371168962Spjd struct vnode *a_fvp; 3372168962Spjd struct componentname *a_fcnp; 3373168962Spjd struct vnode *a_tdvp; 3374168962Spjd struct vnode *a_tvp; 3375168962Spjd struct componentname *a_tcnp; 3376168962Spjd } */ *ap; 3377168962Spjd{ 3378168962Spjd vnode_t *fdvp = ap->a_fdvp; 3379168962Spjd vnode_t *fvp = ap->a_fvp; 3380168962Spjd vnode_t *tdvp = ap->a_tdvp; 3381168962Spjd vnode_t *tvp = ap->a_tvp; 3382168962Spjd int error; 3383168962Spjd 3384168962Spjd ASSERT(ap->a_fcnp->cn_flags & SAVENAME); 3385168962Spjd ASSERT(ap->a_tcnp->cn_flags & SAVENAME); 3386168962Spjd 3387168962Spjd error = zfs_rename(fdvp, ap->a_fcnp->cn_nameptr, tdvp, 3388168962Spjd ap->a_tcnp->cn_nameptr, ap->a_fcnp->cn_cred); 3389168962Spjd 3390168962Spjd if (tdvp == tvp) 3391168962Spjd VN_RELE(tdvp); 3392168962Spjd else 3393168962Spjd VN_URELE(tdvp); 3394168962Spjd if (tvp) 3395168962Spjd VN_URELE(tvp); 3396168962Spjd VN_RELE(fdvp); 3397168962Spjd VN_RELE(fvp); 3398168962Spjd 3399168962Spjd return (error); 3400168962Spjd} 3401168962Spjd 3402168962Spjdstatic int 3403168962Spjdzfs_freebsd_symlink(ap) 3404168962Spjd struct vop_symlink_args /* { 3405168962Spjd struct vnode *a_dvp; 3406168962Spjd struct vnode **a_vpp; 3407168962Spjd struct componentname *a_cnp; 3408168962Spjd struct vattr *a_vap; 3409168962Spjd char *a_target; 3410168962Spjd } */ *ap; 3411168962Spjd{ 3412168962Spjd struct componentname *cnp = ap->a_cnp; 3413168962Spjd vattr_t *vap = ap->a_vap; 3414168962Spjd 3415168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 3416168962Spjd 3417168962Spjd vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 3418168962Spjd vattr_init_mask(vap); 3419168962Spjd 3420168962Spjd return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 3421168962Spjd ap->a_target, cnp->cn_cred, cnp->cn_thread)); 3422168962Spjd} 3423168962Spjd 3424168962Spjdstatic int 3425168962Spjdzfs_freebsd_readlink(ap) 3426168962Spjd struct vop_readlink_args /* { 3427168962Spjd struct vnode *a_vp; 3428168962Spjd struct uio *a_uio; 3429168962Spjd struct ucred *a_cred; 3430168962Spjd } */ *ap; 3431168962Spjd{ 3432168962Spjd 3433168962Spjd return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred)); 3434168962Spjd} 3435168962Spjd 3436168962Spjdstatic int 3437168962Spjdzfs_freebsd_link(ap) 3438168962Spjd struct vop_link_args /* { 3439168962Spjd struct vnode *a_tdvp; 3440168962Spjd struct vnode *a_vp; 3441168962Spjd struct componentname *a_cnp; 3442168962Spjd } */ *ap; 3443168962Spjd{ 3444168962Spjd struct componentname *cnp = ap->a_cnp; 3445168962Spjd 3446168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 3447168962Spjd 3448168962Spjd return (zfs_link(ap->a_tdvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred)); 3449168962Spjd} 3450168962Spjd 3451168962Spjdstatic int 3452168962Spjdzfs_freebsd_inactive(ap) 3453169170Spjd struct vop_inactive_args /* { 3454169170Spjd struct vnode *a_vp; 3455169170Spjd struct thread *a_td; 3456169170Spjd } */ *ap; 3457168962Spjd{ 3458168962Spjd vnode_t *vp = ap->a_vp; 3459168962Spjd 3460168962Spjd zfs_inactive(vp, ap->a_td->td_ucred); 3461168962Spjd return (0); 3462168962Spjd} 3463168962Spjd 3464168962Spjdstatic int 3465168962Spjdzfs_freebsd_reclaim(ap) 3466168962Spjd struct vop_reclaim_args /* { 3467168962Spjd struct vnode *a_vp; 3468168962Spjd struct thread *a_td; 3469168962Spjd } */ *ap; 3470168962Spjd{ 3471169170Spjd vnode_t *vp = ap->a_vp; 3472168962Spjd znode_t *zp = VTOZ(vp); 3473169025Spjd zfsvfs_t *zfsvfs; 3474169025Spjd int rele = 1; 3475168962Spjd 3476169025Spjd ASSERT(zp != NULL); 3477169025Spjd 3478168962Spjd /* 3479168962Spjd * Destroy the vm object and flush associated pages. 3480168962Spjd */ 3481168962Spjd vnode_destroy_vobject(vp); 3482169025Spjd 3483169025Spjd mutex_enter(&zp->z_lock); 3484169025Spjd ASSERT(zp->z_phys); 3485169025Spjd ASSERT(zp->z_dbuf_held); 3486169025Spjd zfsvfs = zp->z_zfsvfs; 3487169025Spjd if (!zp->z_unlinked) { 3488169025Spjd zp->z_dbuf_held = 0; 3489169025Spjd ZTOV(zp) = NULL; 3490169025Spjd mutex_exit(&zp->z_lock); 3491169025Spjd dmu_buf_rele(zp->z_dbuf, NULL); 3492169025Spjd } else { 3493169025Spjd mutex_exit(&zp->z_lock); 3494169025Spjd } 3495168962Spjd VI_LOCK(vp); 3496169025Spjd if (vp->v_count > 0) 3497169025Spjd rele = 0; 3498168962Spjd vp->v_data = NULL; 3499171567Spjd ASSERT(vp->v_holdcnt >= 1); 3500171316Sdfr VI_UNLOCK(vp); 3501169025Spjd if (!zp->z_unlinked && rele) 3502169025Spjd VFS_RELE(zfsvfs->z_vfs); 3503168962Spjd return (0); 3504168962Spjd} 3505168962Spjd 3506168962Spjdstatic int 3507168962Spjdzfs_freebsd_fid(ap) 3508168962Spjd struct vop_fid_args /* { 3509168962Spjd struct vnode *a_vp; 3510168962Spjd struct fid *a_fid; 3511168962Spjd } */ *ap; 3512168962Spjd{ 3513168962Spjd 3514168962Spjd return (zfs_fid(ap->a_vp, (void *)ap->a_fid)); 3515168962Spjd} 3516168962Spjd 3517168962Spjdstatic int 3518168962Spjdzfs_freebsd_pathconf(ap) 3519168962Spjd struct vop_pathconf_args /* { 3520168962Spjd struct vnode *a_vp; 3521168962Spjd int a_name; 3522168962Spjd register_t *a_retval; 3523168962Spjd } */ *ap; 3524168962Spjd{ 3525168962Spjd ulong_t val; 3526168962Spjd int error; 3527168962Spjd 3528168962Spjd error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred); 3529168962Spjd if (error == 0) 3530168962Spjd *ap->a_retval = val; 3531168962Spjd else if (error == EOPNOTSUPP) 3532168962Spjd error = vop_stdpathconf(ap); 3533168962Spjd return (error); 3534168962Spjd} 3535168962Spjd 3536168404Spjd/* 3537168404Spjd * Advisory record locking support 3538168404Spjd */ 3539168404Spjdstatic int 3540168962Spjdzfs_freebsd_advlock(ap) 3541168404Spjd struct vop_advlock_args /* { 3542168404Spjd struct vnode *a_vp; 3543168404Spjd caddr_t a_id; 3544168404Spjd int a_op; 3545168404Spjd struct flock *a_fl; 3546168404Spjd int a_flags; 3547168404Spjd } */ *ap; 3548168404Spjd{ 3549168404Spjd znode_t *zp = VTOZ(ap->a_vp); 3550168404Spjd 3551168404Spjd return (lf_advlock(ap, &(zp->z_lockf), zp->z_phys->zp_size)); 3552168404Spjd} 3553168404Spjd 3554168404Spjdstruct vop_vector zfs_vnodeops; 3555168404Spjdstruct vop_vector zfs_fifoops; 3556168404Spjd 3557168404Spjdstruct vop_vector zfs_vnodeops = { 3558168404Spjd .vop_default = &default_vnodeops, 3559168962Spjd .vop_inactive = zfs_freebsd_inactive, 3560168962Spjd .vop_reclaim = zfs_freebsd_reclaim, 3561168962Spjd .vop_access = zfs_freebsd_access, 3562168404Spjd#ifdef FREEBSD_NAMECACHE 3563168404Spjd .vop_lookup = vfs_cache_lookup, 3564168962Spjd .vop_cachedlookup = zfs_freebsd_lookup, 3565168404Spjd#else 3566168962Spjd .vop_lookup = zfs_freebsd_lookup, 3567168404Spjd#endif 3568168962Spjd .vop_getattr = zfs_freebsd_getattr, 3569168962Spjd .vop_setattr = zfs_freebsd_setattr, 3570168962Spjd .vop_create = zfs_freebsd_create, 3571168962Spjd .vop_mknod = zfs_freebsd_create, 3572168962Spjd .vop_mkdir = zfs_freebsd_mkdir, 3573168962Spjd .vop_readdir = zfs_freebsd_readdir, 3574168962Spjd .vop_fsync = zfs_freebsd_fsync, 3575168962Spjd .vop_open = zfs_freebsd_open, 3576168962Spjd .vop_close = zfs_freebsd_close, 3577168962Spjd .vop_rmdir = zfs_freebsd_rmdir, 3578168962Spjd .vop_ioctl = zfs_freebsd_ioctl, 3579168962Spjd .vop_link = zfs_freebsd_link, 3580168962Spjd .vop_symlink = zfs_freebsd_symlink, 3581168962Spjd .vop_readlink = zfs_freebsd_readlink, 3582168962Spjd .vop_read = zfs_freebsd_read, 3583168962Spjd .vop_write = zfs_freebsd_write, 3584168962Spjd .vop_remove = zfs_freebsd_remove, 3585168962Spjd .vop_rename = zfs_freebsd_rename, 3586168962Spjd .vop_advlock = zfs_freebsd_advlock, 3587168962Spjd .vop_pathconf = zfs_freebsd_pathconf, 3588168404Spjd .vop_bmap = VOP_EOPNOTSUPP, 3589168962Spjd .vop_fid = zfs_freebsd_fid, 3590168404Spjd}; 3591168404Spjd 3592169170Spjdstruct vop_vector zfs_fifoops = { 3593168404Spjd .vop_default = &fifo_specops, 3594168404Spjd .vop_fsync = VOP_PANIC, 3595168962Spjd .vop_access = zfs_freebsd_access, 3596168962Spjd .vop_getattr = zfs_freebsd_getattr, 3597168962Spjd .vop_inactive = zfs_freebsd_inactive, 3598168404Spjd .vop_read = VOP_PANIC, 3599168962Spjd .vop_reclaim = zfs_freebsd_reclaim, 3600168962Spjd .vop_setattr = zfs_freebsd_setattr, 3601168404Spjd .vop_write = VOP_PANIC, 3602168962Spjd .vop_fid = zfs_freebsd_fid, 3603168404Spjd}; 3604