zfs_vnops.c revision 240829
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22212694Smm * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23240415Smm * Copyright (c) 2012 by Delphix. All rights reserved. 24168404Spjd */ 25168404Spjd 26169195Spjd/* Portions Copyright 2007 Jeremy Teo */ 27219089Spjd/* Portions Copyright 2010 Robert Milkowski */ 28169195Spjd 29168404Spjd#include <sys/types.h> 30168404Spjd#include <sys/param.h> 31168404Spjd#include <sys/time.h> 32168404Spjd#include <sys/systm.h> 33168404Spjd#include <sys/sysmacros.h> 34168404Spjd#include <sys/resource.h> 35168404Spjd#include <sys/vfs.h> 36168404Spjd#include <sys/vnode.h> 37168404Spjd#include <sys/file.h> 38168404Spjd#include <sys/stat.h> 39168404Spjd#include <sys/kmem.h> 40168404Spjd#include <sys/taskq.h> 41168404Spjd#include <sys/uio.h> 42168404Spjd#include <sys/atomic.h> 43168404Spjd#include <sys/namei.h> 44168404Spjd#include <sys/mman.h> 45168404Spjd#include <sys/cmn_err.h> 46168404Spjd#include <sys/errno.h> 47168404Spjd#include <sys/unistd.h> 48168404Spjd#include <sys/zfs_dir.h> 49168404Spjd#include <sys/zfs_ioctl.h> 50168404Spjd#include <sys/fs/zfs.h> 51168404Spjd#include <sys/dmu.h> 52219089Spjd#include <sys/dmu_objset.h> 53168404Spjd#include <sys/spa.h> 54168404Spjd#include <sys/txg.h> 55168404Spjd#include <sys/dbuf.h> 56168404Spjd#include <sys/zap.h> 57219089Spjd#include <sys/sa.h> 58168404Spjd#include <sys/dirent.h> 59168962Spjd#include <sys/policy.h> 60168962Spjd#include <sys/sunddi.h> 61168404Spjd#include <sys/filio.h> 62209962Smm#include <sys/sid.h> 63168404Spjd#include <sys/zfs_ctldir.h> 64185029Spjd#include <sys/zfs_fuid.h> 65219089Spjd#include <sys/zfs_sa.h> 66168404Spjd#include <sys/dnlc.h> 67168404Spjd#include <sys/zfs_rlock.h> 68185029Spjd#include <sys/extdirent.h> 69185029Spjd#include <sys/kidmap.h> 70168404Spjd#include <sys/bio.h> 71168404Spjd#include <sys/buf.h> 72168404Spjd#include <sys/sf_buf.h> 73168404Spjd#include <sys/sched.h> 74192800Strasz#include <sys/acl.h> 75239077Smarius#include <vm/vm_param.h> 76215401Savg#include <vm/vm_pageout.h> 77168404Spjd 78168404Spjd/* 79168404Spjd * Programming rules. 80168404Spjd * 81168404Spjd * Each vnode op performs some logical unit of work. To do this, the ZPL must 82168404Spjd * properly lock its in-core state, create a DMU transaction, do the work, 83168404Spjd * record this work in the intent log (ZIL), commit the DMU transaction, 84185029Spjd * and wait for the intent log to commit if it is a synchronous operation. 85185029Spjd * Moreover, the vnode ops must work in both normal and log replay context. 86168404Spjd * The ordering of events is important to avoid deadlocks and references 87168404Spjd * to freed memory. The example below illustrates the following Big Rules: 88168404Spjd * 89168404Spjd * (1) A check must be made in each zfs thread for a mounted file system. 90168404Spjd * This is done avoiding races using ZFS_ENTER(zfsvfs). 91185029Spjd * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 92185029Spjd * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 93185029Spjd * can return EIO from the calling function. 94168404Spjd * 95168404Spjd * (2) VN_RELE() should always be the last thing except for zil_commit() 96168404Spjd * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 97168404Spjd * First, if it's the last reference, the vnode/znode 98168404Spjd * can be freed, so the zp may point to freed memory. Second, the last 99168404Spjd * reference will call zfs_zinactive(), which may induce a lot of work -- 100168404Spjd * pushing cached pages (which acquires range locks) and syncing out 101168404Spjd * cached atime changes. Third, zfs_zinactive() may require a new tx, 102168404Spjd * which could deadlock the system if you were already holding one. 103191900Skmacy * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 104168404Spjd * 105168404Spjd * (3) All range locks must be grabbed before calling dmu_tx_assign(), 106168404Spjd * as they can span dmu_tx_assign() calls. 107168404Spjd * 108209962Smm * (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign(). 109168404Spjd * This is critical because we don't want to block while holding locks. 110168404Spjd * Note, in particular, that if a lock is sometimes acquired before 111168404Spjd * the tx assigns, and sometimes after (e.g. z_lock), then failing to 112168404Spjd * use a non-blocking assign can deadlock the system. The scenario: 113168404Spjd * 114168404Spjd * Thread A has grabbed a lock before calling dmu_tx_assign(). 115168404Spjd * Thread B is in an already-assigned tx, and blocks for this lock. 116168404Spjd * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 117168404Spjd * forever, because the previous txg can't quiesce until B's tx commits. 118168404Spjd * 119168404Spjd * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 120168404Spjd * then drop all locks, call dmu_tx_wait(), and try again. 121168404Spjd * 122168404Spjd * (5) If the operation succeeded, generate the intent log entry for it 123168404Spjd * before dropping locks. This ensures that the ordering of events 124168404Spjd * in the intent log matches the order in which they actually occurred. 125209962Smm * During ZIL replay the zfs_log_* functions will update the sequence 126209962Smm * number to indicate the zil transaction has replayed. 127168404Spjd * 128168404Spjd * (6) At the end of each vnode op, the DMU tx must always commit, 129168404Spjd * regardless of whether there were any errors. 130168404Spjd * 131219089Spjd * (7) After dropping all locks, invoke zil_commit(zilog, foid) 132168404Spjd * to ensure that synchronous semantics are provided when necessary. 133168404Spjd * 134168404Spjd * In general, this is how things should be ordered in each vnode op: 135168404Spjd * 136168404Spjd * ZFS_ENTER(zfsvfs); // exit if unmounted 137168404Spjd * top: 138168404Spjd * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 139168404Spjd * rw_enter(...); // grab any other locks you need 140168404Spjd * tx = dmu_tx_create(...); // get DMU tx 141168404Spjd * dmu_tx_hold_*(); // hold each object you might modify 142209962Smm * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign 143168404Spjd * if (error) { 144168404Spjd * rw_exit(...); // drop locks 145168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 146168404Spjd * VN_RELE(...); // release held vnodes 147209962Smm * if (error == ERESTART) { 148168404Spjd * dmu_tx_wait(tx); 149168404Spjd * dmu_tx_abort(tx); 150168404Spjd * goto top; 151168404Spjd * } 152168404Spjd * dmu_tx_abort(tx); // abort DMU tx 153168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 154168404Spjd * return (error); // really out of space 155168404Spjd * } 156168404Spjd * error = do_real_work(); // do whatever this VOP does 157168404Spjd * if (error == 0) 158168404Spjd * zfs_log_*(...); // on success, make ZIL entry 159168404Spjd * dmu_tx_commit(tx); // commit DMU tx -- error or not 160168404Spjd * rw_exit(...); // drop locks 161168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 162168404Spjd * VN_RELE(...); // release held vnodes 163219089Spjd * zil_commit(zilog, foid); // synchronous when necessary 164168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 165168404Spjd * return (error); // done, report error 166168404Spjd */ 167185029Spjd 168168404Spjd/* ARGSUSED */ 169168404Spjdstatic int 170185029Spjdzfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 171168404Spjd{ 172168962Spjd znode_t *zp = VTOZ(*vpp); 173209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 174168404Spjd 175209962Smm ZFS_ENTER(zfsvfs); 176209962Smm ZFS_VERIFY_ZP(zp); 177209962Smm 178219089Spjd if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 179185029Spjd ((flag & FAPPEND) == 0)) { 180209962Smm ZFS_EXIT(zfsvfs); 181185029Spjd return (EPERM); 182185029Spjd } 183185029Spjd 184185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 185185029Spjd ZTOV(zp)->v_type == VREG && 186219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 187209962Smm if (fs_vscan(*vpp, cr, 0) != 0) { 188209962Smm ZFS_EXIT(zfsvfs); 189185029Spjd return (EACCES); 190209962Smm } 191209962Smm } 192185029Spjd 193168404Spjd /* Keep a count of the synchronous opens in the znode */ 194168962Spjd if (flag & (FSYNC | FDSYNC)) 195168404Spjd atomic_inc_32(&zp->z_sync_cnt); 196185029Spjd 197209962Smm ZFS_EXIT(zfsvfs); 198168404Spjd return (0); 199168404Spjd} 200168404Spjd 201168404Spjd/* ARGSUSED */ 202168404Spjdstatic int 203185029Spjdzfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 204185029Spjd caller_context_t *ct) 205168404Spjd{ 206168962Spjd znode_t *zp = VTOZ(vp); 207209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 208168404Spjd 209210470Smm /* 210210470Smm * Clean up any locks held by this process on the vp. 211210470Smm */ 212210470Smm cleanlocks(vp, ddi_get_pid(), 0); 213210470Smm cleanshares(vp, ddi_get_pid()); 214210470Smm 215209962Smm ZFS_ENTER(zfsvfs); 216209962Smm ZFS_VERIFY_ZP(zp); 217209962Smm 218168404Spjd /* Decrement the synchronous opens in the znode */ 219185029Spjd if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 220168404Spjd atomic_dec_32(&zp->z_sync_cnt); 221168404Spjd 222185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 223185029Spjd ZTOV(zp)->v_type == VREG && 224219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 225185029Spjd VERIFY(fs_vscan(vp, cr, 1) == 0); 226185029Spjd 227209962Smm ZFS_EXIT(zfsvfs); 228168404Spjd return (0); 229168404Spjd} 230168404Spjd 231168404Spjd/* 232168404Spjd * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 233168404Spjd * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 234168404Spjd */ 235168404Spjdstatic int 236168978Spjdzfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 237168404Spjd{ 238168404Spjd znode_t *zp = VTOZ(vp); 239168404Spjd uint64_t noff = (uint64_t)*off; /* new offset */ 240168404Spjd uint64_t file_sz; 241168404Spjd int error; 242168404Spjd boolean_t hole; 243168404Spjd 244219089Spjd file_sz = zp->z_size; 245168404Spjd if (noff >= file_sz) { 246168404Spjd return (ENXIO); 247168404Spjd } 248168404Spjd 249168962Spjd if (cmd == _FIO_SEEK_HOLE) 250168404Spjd hole = B_TRUE; 251168404Spjd else 252168404Spjd hole = B_FALSE; 253168404Spjd 254168404Spjd error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 255168404Spjd 256168404Spjd /* end of file? */ 257168404Spjd if ((error == ESRCH) || (noff > file_sz)) { 258168404Spjd /* 259168404Spjd * Handle the virtual hole at the end of file. 260168404Spjd */ 261168404Spjd if (hole) { 262168404Spjd *off = file_sz; 263168404Spjd return (0); 264168404Spjd } 265168404Spjd return (ENXIO); 266168404Spjd } 267168404Spjd 268168404Spjd if (noff < *off) 269168404Spjd return (error); 270168404Spjd *off = noff; 271168404Spjd return (error); 272168404Spjd} 273168404Spjd 274168404Spjd/* ARGSUSED */ 275168404Spjdstatic int 276168978Spjdzfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 277185029Spjd int *rvalp, caller_context_t *ct) 278168404Spjd{ 279168962Spjd offset_t off; 280168962Spjd int error; 281168962Spjd zfsvfs_t *zfsvfs; 282185029Spjd znode_t *zp; 283168404Spjd 284168404Spjd switch (com) { 285185029Spjd case _FIOFFS: 286168962Spjd return (0); 287168404Spjd 288168962Spjd /* 289168962Spjd * The following two ioctls are used by bfu. Faking out, 290168962Spjd * necessary to avoid bfu errors. 291168962Spjd */ 292185029Spjd case _FIOGDIO: 293185029Spjd case _FIOSDIO: 294168962Spjd return (0); 295168962Spjd 296185029Spjd case _FIO_SEEK_DATA: 297185029Spjd case _FIO_SEEK_HOLE: 298233918Savg#ifdef sun 299168962Spjd if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 300168962Spjd return (EFAULT); 301233918Savg#else 302233918Savg off = *(offset_t *)data; 303233918Savg#endif 304185029Spjd zp = VTOZ(vp); 305185029Spjd zfsvfs = zp->z_zfsvfs; 306168404Spjd ZFS_ENTER(zfsvfs); 307185029Spjd ZFS_VERIFY_ZP(zp); 308168404Spjd 309168404Spjd /* offset parameter is in/out */ 310168404Spjd error = zfs_holey(vp, com, &off); 311168404Spjd ZFS_EXIT(zfsvfs); 312168404Spjd if (error) 313168404Spjd return (error); 314233918Savg#ifdef sun 315168962Spjd if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 316168962Spjd return (EFAULT); 317233918Savg#else 318233918Savg *(offset_t *)data = off; 319233918Savg#endif 320168404Spjd return (0); 321168404Spjd } 322168404Spjd return (ENOTTY); 323168404Spjd} 324168404Spjd 325209962Smmstatic vm_page_t 326209962Smmpage_lookup(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 327209962Smm{ 328209962Smm vm_object_t obj; 329209962Smm vm_page_t pp; 330209962Smm 331209962Smm obj = vp->v_object; 332209962Smm VM_OBJECT_LOCK_ASSERT(obj, MA_OWNED); 333209962Smm 334209962Smm for (;;) { 335209962Smm if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 336209962Smm vm_page_is_valid(pp, (vm_offset_t)off, nbytes)) { 337212652Savg if ((pp->oflags & VPO_BUSY) != 0) { 338212652Savg /* 339212652Savg * Reference the page before unlocking and 340212652Savg * sleeping so that the page daemon is less 341212652Savg * likely to reclaim it. 342212652Savg */ 343225418Skib vm_page_reference(pp); 344212652Savg vm_page_sleep(pp, "zfsmwb"); 345209962Smm continue; 346212652Savg } 347209962Smm vm_page_busy(pp); 348209962Smm vm_page_undirty(pp); 349209962Smm } else { 350234064Sattilio if (vm_page_is_cached(obj, OFF_TO_IDX(start))) 351209962Smm vm_page_cache_free(obj, OFF_TO_IDX(start), 352209962Smm OFF_TO_IDX(start) + 1); 353209962Smm pp = NULL; 354209962Smm } 355209962Smm break; 356209962Smm } 357209962Smm return (pp); 358209962Smm} 359209962Smm 360209962Smmstatic void 361209962Smmpage_unlock(vm_page_t pp) 362209962Smm{ 363209962Smm 364209962Smm vm_page_wakeup(pp); 365209962Smm} 366209962Smm 367209962Smmstatic caddr_t 368209962Smmzfs_map_page(vm_page_t pp, struct sf_buf **sfp) 369209962Smm{ 370209962Smm 371212951Savg *sfp = sf_buf_alloc(pp, 0); 372209962Smm return ((caddr_t)sf_buf_kva(*sfp)); 373209962Smm} 374209962Smm 375209962Smmstatic void 376209962Smmzfs_unmap_page(struct sf_buf *sf) 377209962Smm{ 378209962Smm 379209962Smm sf_buf_free(sf); 380209962Smm} 381209962Smm 382168404Spjd/* 383168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 384168404Spjd * between the DMU cache and the memory mapped pages. What this means: 385168404Spjd * 386168404Spjd * On Write: If we find a memory mapped page, we write to *both* 387168404Spjd * the page and the dmu buffer. 388168404Spjd */ 389209962Smmstatic void 390209962Smmupdate_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 391209962Smm int segflg, dmu_tx_t *tx) 392168404Spjd{ 393168404Spjd vm_object_t obj; 394168404Spjd struct sf_buf *sf; 395212655Savg int off; 396168404Spjd 397168404Spjd ASSERT(vp->v_mount != NULL); 398168404Spjd obj = vp->v_object; 399168404Spjd ASSERT(obj != NULL); 400168404Spjd 401168404Spjd off = start & PAGEOFFSET; 402168404Spjd VM_OBJECT_LOCK(obj); 403168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 404209962Smm vm_page_t pp; 405212655Savg int nbytes = MIN(PAGESIZE - off, len); 406168404Spjd 407209962Smm if ((pp = page_lookup(vp, start, off, nbytes)) != NULL) { 408168404Spjd caddr_t va; 409168404Spjd 410168404Spjd VM_OBJECT_UNLOCK(obj); 411209962Smm va = zfs_map_page(pp, &sf); 412209962Smm if (segflg == UIO_NOCOPY) { 413209962Smm (void) dmu_write(os, oid, start+off, nbytes, 414209962Smm va+off, tx); 415209962Smm } else { 416209962Smm (void) dmu_read(os, oid, start+off, nbytes, 417216378Spjd va+off, DMU_READ_PREFETCH); 418169059Spjd } 419209962Smm zfs_unmap_page(sf); 420168404Spjd VM_OBJECT_LOCK(obj); 421209962Smm page_unlock(pp); 422168404Spjd } 423209962Smm len -= nbytes; 424168404Spjd off = 0; 425168404Spjd } 426168404Spjd VM_OBJECT_UNLOCK(obj); 427168404Spjd} 428168404Spjd 429168404Spjd/* 430219089Spjd * Read with UIO_NOCOPY flag means that sendfile(2) requests 431219089Spjd * ZFS to populate a range of page cache pages with data. 432219089Spjd * 433219089Spjd * NOTE: this function could be optimized to pre-allocate 434219089Spjd * all pages in advance, drain VPO_BUSY on all of them, 435219089Spjd * map them into contiguous KVA region and populate them 436219089Spjd * in one single dmu_read() call. 437219089Spjd */ 438219089Spjdstatic int 439219089Spjdmappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) 440219089Spjd{ 441219089Spjd znode_t *zp = VTOZ(vp); 442219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 443219089Spjd struct sf_buf *sf; 444219089Spjd vm_object_t obj; 445219089Spjd vm_page_t pp; 446219089Spjd int64_t start; 447219089Spjd caddr_t va; 448219089Spjd int len = nbytes; 449219089Spjd int off; 450219089Spjd int error = 0; 451219089Spjd 452219089Spjd ASSERT(uio->uio_segflg == UIO_NOCOPY); 453219089Spjd ASSERT(vp->v_mount != NULL); 454219089Spjd obj = vp->v_object; 455219089Spjd ASSERT(obj != NULL); 456219089Spjd ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); 457219089Spjd 458219089Spjd VM_OBJECT_LOCK(obj); 459219089Spjd for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { 460219089Spjd int bytes = MIN(PAGESIZE, len); 461219089Spjd 462219089Spjd pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_NOBUSY | 463219089Spjd VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_IGN_SBUSY); 464219089Spjd if (pp->valid == 0) { 465219089Spjd vm_page_io_start(pp); 466219089Spjd VM_OBJECT_UNLOCK(obj); 467219089Spjd va = zfs_map_page(pp, &sf); 468219089Spjd error = dmu_read(os, zp->z_id, start, bytes, va, 469219089Spjd DMU_READ_PREFETCH); 470219089Spjd if (bytes != PAGESIZE && error == 0) 471219089Spjd bzero(va + bytes, PAGESIZE - bytes); 472219089Spjd zfs_unmap_page(sf); 473219089Spjd VM_OBJECT_LOCK(obj); 474219089Spjd vm_page_io_finish(pp); 475219089Spjd vm_page_lock(pp); 476219089Spjd if (error) { 477219089Spjd vm_page_free(pp); 478219089Spjd } else { 479219089Spjd pp->valid = VM_PAGE_BITS_ALL; 480219089Spjd vm_page_activate(pp); 481219089Spjd } 482219089Spjd vm_page_unlock(pp); 483219089Spjd } 484219089Spjd if (error) 485219089Spjd break; 486219089Spjd uio->uio_resid -= bytes; 487219089Spjd uio->uio_offset += bytes; 488219089Spjd len -= bytes; 489219089Spjd } 490219089Spjd VM_OBJECT_UNLOCK(obj); 491219089Spjd return (error); 492219089Spjd} 493219089Spjd 494219089Spjd/* 495168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 496168404Spjd * between the DMU cache and the memory mapped pages. What this means: 497168404Spjd * 498168404Spjd * On Read: We "read" preferentially from memory mapped pages, 499168404Spjd * else we default from the dmu buffer. 500168404Spjd * 501168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 502168404Spjd * the file is memory mapped. 503168404Spjd */ 504168404Spjdstatic int 505168404Spjdmappedread(vnode_t *vp, int nbytes, uio_t *uio) 506168404Spjd{ 507168404Spjd znode_t *zp = VTOZ(vp); 508168404Spjd objset_t *os = zp->z_zfsvfs->z_os; 509168404Spjd vm_object_t obj; 510212655Savg int64_t start; 511168926Spjd caddr_t va; 512168404Spjd int len = nbytes; 513212655Savg int off; 514168404Spjd int error = 0; 515168404Spjd 516168404Spjd ASSERT(vp->v_mount != NULL); 517168404Spjd obj = vp->v_object; 518168404Spjd ASSERT(obj != NULL); 519168404Spjd 520168404Spjd start = uio->uio_loffset; 521168404Spjd off = start & PAGEOFFSET; 522168404Spjd VM_OBJECT_LOCK(obj); 523168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 524219089Spjd vm_page_t pp; 525219089Spjd uint64_t bytes = MIN(PAGESIZE - off, len); 526168404Spjd 527219089Spjd if (pp = page_lookup(vp, start, off, bytes)) { 528219089Spjd struct sf_buf *sf; 529219089Spjd caddr_t va; 530212652Savg 531168404Spjd VM_OBJECT_UNLOCK(obj); 532219089Spjd va = zfs_map_page(pp, &sf); 533219089Spjd error = uiomove(va + off, bytes, UIO_READ, uio); 534219089Spjd zfs_unmap_page(sf); 535168404Spjd VM_OBJECT_LOCK(obj); 536219089Spjd page_unlock(pp); 537219089Spjd } else { 538168926Spjd VM_OBJECT_UNLOCK(obj); 539219089Spjd error = dmu_read_uio(os, zp->z_id, uio, bytes); 540168926Spjd VM_OBJECT_LOCK(obj); 541168404Spjd } 542168404Spjd len -= bytes; 543168404Spjd off = 0; 544168404Spjd if (error) 545168404Spjd break; 546168404Spjd } 547168404Spjd VM_OBJECT_UNLOCK(obj); 548168404Spjd return (error); 549168404Spjd} 550168404Spjd 551168404Spjdoffset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 552168404Spjd 553168404Spjd/* 554168404Spjd * Read bytes from specified file into supplied buffer. 555168404Spjd * 556168404Spjd * IN: vp - vnode of file to be read from. 557168404Spjd * uio - structure supplying read location, range info, 558168404Spjd * and return buffer. 559168404Spjd * ioflag - SYNC flags; used to provide FRSYNC semantics. 560168404Spjd * cr - credentials of caller. 561185029Spjd * ct - caller context 562168404Spjd * 563168404Spjd * OUT: uio - updated offset and range, buffer filled. 564168404Spjd * 565168404Spjd * RETURN: 0 if success 566168404Spjd * error code if failure 567168404Spjd * 568168404Spjd * Side Effects: 569168404Spjd * vp - atime updated if byte count > 0 570168404Spjd */ 571168404Spjd/* ARGSUSED */ 572168404Spjdstatic int 573168962Spjdzfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 574168404Spjd{ 575168404Spjd znode_t *zp = VTOZ(vp); 576168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 577185029Spjd objset_t *os; 578168404Spjd ssize_t n, nbytes; 579168404Spjd int error; 580168404Spjd rl_t *rl; 581219089Spjd xuio_t *xuio = NULL; 582168404Spjd 583168404Spjd ZFS_ENTER(zfsvfs); 584185029Spjd ZFS_VERIFY_ZP(zp); 585185029Spjd os = zfsvfs->z_os; 586168404Spjd 587219089Spjd if (zp->z_pflags & ZFS_AV_QUARANTINED) { 588185029Spjd ZFS_EXIT(zfsvfs); 589185029Spjd return (EACCES); 590185029Spjd } 591185029Spjd 592168404Spjd /* 593168404Spjd * Validate file offset 594168404Spjd */ 595168404Spjd if (uio->uio_loffset < (offset_t)0) { 596168404Spjd ZFS_EXIT(zfsvfs); 597168404Spjd return (EINVAL); 598168404Spjd } 599168404Spjd 600168404Spjd /* 601168404Spjd * Fasttrack empty reads 602168404Spjd */ 603168404Spjd if (uio->uio_resid == 0) { 604168404Spjd ZFS_EXIT(zfsvfs); 605168404Spjd return (0); 606168404Spjd } 607168404Spjd 608168404Spjd /* 609168962Spjd * Check for mandatory locks 610168962Spjd */ 611219089Spjd if (MANDMODE(zp->z_mode)) { 612168962Spjd if (error = chklock(vp, FREAD, 613168962Spjd uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 614168962Spjd ZFS_EXIT(zfsvfs); 615168962Spjd return (error); 616168962Spjd } 617168962Spjd } 618168962Spjd 619168962Spjd /* 620168404Spjd * If we're in FRSYNC mode, sync out this znode before reading it. 621168404Spjd */ 622224605Smm if (zfsvfs->z_log && 623224605Smm (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) 624219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 625168404Spjd 626168404Spjd /* 627168404Spjd * Lock the range against changes. 628168404Spjd */ 629168404Spjd rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 630168404Spjd 631168404Spjd /* 632168404Spjd * If we are reading past end-of-file we can skip 633168404Spjd * to the end; but we might still need to set atime. 634168404Spjd */ 635219089Spjd if (uio->uio_loffset >= zp->z_size) { 636168404Spjd error = 0; 637168404Spjd goto out; 638168404Spjd } 639168404Spjd 640219089Spjd ASSERT(uio->uio_loffset < zp->z_size); 641219089Spjd n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 642168404Spjd 643219089Spjd#ifdef sun 644219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 645219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 646219089Spjd int nblk; 647219089Spjd int blksz = zp->z_blksz; 648219089Spjd uint64_t offset = uio->uio_loffset; 649219089Spjd 650219089Spjd xuio = (xuio_t *)uio; 651219089Spjd if ((ISP2(blksz))) { 652219089Spjd nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 653219089Spjd blksz)) / blksz; 654219089Spjd } else { 655219089Spjd ASSERT(offset + n <= blksz); 656219089Spjd nblk = 1; 657219089Spjd } 658219089Spjd (void) dmu_xuio_init(xuio, nblk); 659219089Spjd 660219089Spjd if (vn_has_cached_data(vp)) { 661219089Spjd /* 662219089Spjd * For simplicity, we always allocate a full buffer 663219089Spjd * even if we only expect to read a portion of a block. 664219089Spjd */ 665219089Spjd while (--nblk >= 0) { 666219089Spjd (void) dmu_xuio_add(xuio, 667219089Spjd dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 668219089Spjd blksz), 0, blksz); 669219089Spjd } 670219089Spjd } 671219089Spjd } 672219089Spjd#endif /* sun */ 673219089Spjd 674168404Spjd while (n > 0) { 675168404Spjd nbytes = MIN(n, zfs_read_chunk_size - 676168404Spjd P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 677168404Spjd 678219089Spjd#ifdef __FreeBSD__ 679219089Spjd if (uio->uio_segflg == UIO_NOCOPY) 680219089Spjd error = mappedread_sf(vp, nbytes, uio); 681219089Spjd else 682219089Spjd#endif /* __FreeBSD__ */ 683168404Spjd if (vn_has_cached_data(vp)) 684168404Spjd error = mappedread(vp, nbytes, uio); 685168404Spjd else 686168404Spjd error = dmu_read_uio(os, zp->z_id, uio, nbytes); 687185029Spjd if (error) { 688185029Spjd /* convert checksum errors into IO errors */ 689185029Spjd if (error == ECKSUM) 690185029Spjd error = EIO; 691168404Spjd break; 692185029Spjd } 693168962Spjd 694168404Spjd n -= nbytes; 695168404Spjd } 696168404Spjdout: 697168404Spjd zfs_range_unlock(rl); 698168404Spjd 699168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 700168404Spjd ZFS_EXIT(zfsvfs); 701168404Spjd return (error); 702168404Spjd} 703168404Spjd 704168404Spjd/* 705168404Spjd * Write the bytes to a file. 706168404Spjd * 707168404Spjd * IN: vp - vnode of file to be written to. 708168404Spjd * uio - structure supplying write location, range info, 709168404Spjd * and data buffer. 710213673Spjd * ioflag - FAPPEND flag set if in append mode. 711168404Spjd * cr - credentials of caller. 712185029Spjd * ct - caller context (NFS/CIFS fem monitor only) 713168404Spjd * 714168404Spjd * OUT: uio - updated offset and range. 715168404Spjd * 716168404Spjd * RETURN: 0 if success 717168404Spjd * error code if failure 718168404Spjd * 719168404Spjd * Timestamps: 720168404Spjd * vp - ctime|mtime updated if byte count > 0 721168404Spjd */ 722219089Spjd 723168404Spjd/* ARGSUSED */ 724168404Spjdstatic int 725168962Spjdzfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 726168404Spjd{ 727168404Spjd znode_t *zp = VTOZ(vp); 728168962Spjd rlim64_t limit = MAXOFFSET_T; 729168404Spjd ssize_t start_resid = uio->uio_resid; 730168404Spjd ssize_t tx_bytes; 731168404Spjd uint64_t end_size; 732168404Spjd dmu_tx_t *tx; 733168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 734185029Spjd zilog_t *zilog; 735168404Spjd offset_t woff; 736168404Spjd ssize_t n, nbytes; 737168404Spjd rl_t *rl; 738168404Spjd int max_blksz = zfsvfs->z_max_blksz; 739168404Spjd int error; 740209962Smm arc_buf_t *abuf; 741219089Spjd iovec_t *aiov; 742219089Spjd xuio_t *xuio = NULL; 743219089Spjd int i_iov = 0; 744219089Spjd int iovcnt = uio->uio_iovcnt; 745219089Spjd iovec_t *iovp = uio->uio_iov; 746219089Spjd int write_eof; 747219089Spjd int count = 0; 748219089Spjd sa_bulk_attr_t bulk[4]; 749219089Spjd uint64_t mtime[2], ctime[2]; 750168404Spjd 751168404Spjd /* 752168404Spjd * Fasttrack empty write 753168404Spjd */ 754168404Spjd n = start_resid; 755168404Spjd if (n == 0) 756168404Spjd return (0); 757168404Spjd 758168962Spjd if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 759168962Spjd limit = MAXOFFSET_T; 760168962Spjd 761168404Spjd ZFS_ENTER(zfsvfs); 762185029Spjd ZFS_VERIFY_ZP(zp); 763168404Spjd 764219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 765219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 766219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 767219089Spjd &zp->z_size, 8); 768219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 769219089Spjd &zp->z_pflags, 8); 770219089Spjd 771168404Spjd /* 772185029Spjd * If immutable or not appending then return EPERM 773185029Spjd */ 774219089Spjd if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 775219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 776219089Spjd (uio->uio_loffset < zp->z_size))) { 777185029Spjd ZFS_EXIT(zfsvfs); 778185029Spjd return (EPERM); 779185029Spjd } 780185029Spjd 781185029Spjd zilog = zfsvfs->z_log; 782185029Spjd 783185029Spjd /* 784219089Spjd * Validate file offset 785219089Spjd */ 786219089Spjd woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 787219089Spjd if (woff < 0) { 788219089Spjd ZFS_EXIT(zfsvfs); 789219089Spjd return (EINVAL); 790219089Spjd } 791219089Spjd 792219089Spjd /* 793219089Spjd * Check for mandatory locks before calling zfs_range_lock() 794219089Spjd * in order to prevent a deadlock with locks set via fcntl(). 795219089Spjd */ 796219089Spjd if (MANDMODE((mode_t)zp->z_mode) && 797219089Spjd (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 798219089Spjd ZFS_EXIT(zfsvfs); 799219089Spjd return (error); 800219089Spjd } 801219089Spjd 802219089Spjd#ifdef sun 803219089Spjd /* 804168404Spjd * Pre-fault the pages to ensure slow (eg NFS) pages 805168404Spjd * don't hold up txg. 806219089Spjd * Skip this if uio contains loaned arc_buf. 807168404Spjd */ 808219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 809219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 810219089Spjd xuio = (xuio_t *)uio; 811219089Spjd else 812219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 813219089Spjd#endif /* sun */ 814168404Spjd 815168404Spjd /* 816168404Spjd * If in append mode, set the io offset pointer to eof. 817168404Spjd */ 818213673Spjd if (ioflag & FAPPEND) { 819168404Spjd /* 820219089Spjd * Obtain an appending range lock to guarantee file append 821219089Spjd * semantics. We reset the write offset once we have the lock. 822168404Spjd */ 823168404Spjd rl = zfs_range_lock(zp, 0, n, RL_APPEND); 824219089Spjd woff = rl->r_off; 825168404Spjd if (rl->r_len == UINT64_MAX) { 826219089Spjd /* 827219089Spjd * We overlocked the file because this write will cause 828219089Spjd * the file block size to increase. 829219089Spjd * Note that zp_size cannot change with this lock held. 830219089Spjd */ 831219089Spjd woff = zp->z_size; 832168404Spjd } 833219089Spjd uio->uio_loffset = woff; 834168404Spjd } else { 835168404Spjd /* 836219089Spjd * Note that if the file block size will change as a result of 837219089Spjd * this write, then this range lock will lock the entire file 838219089Spjd * so that we can re-write the block safely. 839168404Spjd */ 840168404Spjd rl = zfs_range_lock(zp, woff, n, RL_WRITER); 841168404Spjd } 842168404Spjd 843235781Strasz if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { 844235781Strasz zfs_range_unlock(rl); 845235781Strasz ZFS_EXIT(zfsvfs); 846235781Strasz return (EFBIG); 847235781Strasz } 848235781Strasz 849168962Spjd if (woff >= limit) { 850168962Spjd zfs_range_unlock(rl); 851168962Spjd ZFS_EXIT(zfsvfs); 852168962Spjd return (EFBIG); 853168962Spjd } 854168962Spjd 855168962Spjd if ((woff + n) > limit || woff > (limit - n)) 856168962Spjd n = limit - woff; 857168962Spjd 858219089Spjd /* Will this write extend the file length? */ 859219089Spjd write_eof = (woff + n > zp->z_size); 860168404Spjd 861219089Spjd end_size = MAX(zp->z_size, woff + n); 862219089Spjd 863168404Spjd /* 864168404Spjd * Write the file in reasonable size chunks. Each chunk is written 865168404Spjd * in a separate transaction; this keeps the intent log records small 866168404Spjd * and allows us to do more fine-grained space accounting. 867168404Spjd */ 868168404Spjd while (n > 0) { 869209962Smm abuf = NULL; 870209962Smm woff = uio->uio_loffset; 871209962Smmagain: 872219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 873219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 874209962Smm if (abuf != NULL) 875209962Smm dmu_return_arcbuf(abuf); 876209962Smm error = EDQUOT; 877209962Smm break; 878209962Smm } 879209962Smm 880219089Spjd if (xuio && abuf == NULL) { 881219089Spjd ASSERT(i_iov < iovcnt); 882219089Spjd aiov = &iovp[i_iov]; 883219089Spjd abuf = dmu_xuio_arcbuf(xuio, i_iov); 884219089Spjd dmu_xuio_clear(xuio, i_iov); 885219089Spjd DTRACE_PROBE3(zfs_cp_write, int, i_iov, 886219089Spjd iovec_t *, aiov, arc_buf_t *, abuf); 887219089Spjd ASSERT((aiov->iov_base == abuf->b_data) || 888219089Spjd ((char *)aiov->iov_base - (char *)abuf->b_data + 889219089Spjd aiov->iov_len == arc_buf_size(abuf))); 890219089Spjd i_iov++; 891219089Spjd } else if (abuf == NULL && n >= max_blksz && 892219089Spjd woff >= zp->z_size && 893209962Smm P2PHASE(woff, max_blksz) == 0 && 894209962Smm zp->z_blksz == max_blksz) { 895219089Spjd /* 896219089Spjd * This write covers a full block. "Borrow" a buffer 897219089Spjd * from the dmu so that we can fill it before we enter 898219089Spjd * a transaction. This avoids the possibility of 899219089Spjd * holding up the transaction if the data copy hangs 900219089Spjd * up on a pagefault (e.g., from an NFS server mapping). 901219089Spjd */ 902209962Smm size_t cbytes; 903209962Smm 904219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 905219089Spjd max_blksz); 906209962Smm ASSERT(abuf != NULL); 907209962Smm ASSERT(arc_buf_size(abuf) == max_blksz); 908209962Smm if (error = uiocopy(abuf->b_data, max_blksz, 909209962Smm UIO_WRITE, uio, &cbytes)) { 910209962Smm dmu_return_arcbuf(abuf); 911209962Smm break; 912209962Smm } 913209962Smm ASSERT(cbytes == max_blksz); 914209962Smm } 915209962Smm 916209962Smm /* 917168404Spjd * Start a transaction. 918168404Spjd */ 919168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 920219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 921168404Spjd dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 922219089Spjd zfs_sa_upgrade_txholds(tx, zp); 923209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 924168404Spjd if (error) { 925209962Smm if (error == ERESTART) { 926168404Spjd dmu_tx_wait(tx); 927168404Spjd dmu_tx_abort(tx); 928209962Smm goto again; 929168404Spjd } 930168404Spjd dmu_tx_abort(tx); 931209962Smm if (abuf != NULL) 932209962Smm dmu_return_arcbuf(abuf); 933168404Spjd break; 934168404Spjd } 935168404Spjd 936168404Spjd /* 937168404Spjd * If zfs_range_lock() over-locked we grow the blocksize 938168404Spjd * and then reduce the lock range. This will only happen 939168404Spjd * on the first iteration since zfs_range_reduce() will 940168404Spjd * shrink down r_len to the appropriate size. 941168404Spjd */ 942168404Spjd if (rl->r_len == UINT64_MAX) { 943168404Spjd uint64_t new_blksz; 944168404Spjd 945168404Spjd if (zp->z_blksz > max_blksz) { 946168404Spjd ASSERT(!ISP2(zp->z_blksz)); 947168404Spjd new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 948168404Spjd } else { 949168404Spjd new_blksz = MIN(end_size, max_blksz); 950168404Spjd } 951168404Spjd zfs_grow_blocksize(zp, new_blksz, tx); 952168404Spjd zfs_range_reduce(rl, woff, n); 953168404Spjd } 954168404Spjd 955168404Spjd /* 956168404Spjd * XXX - should we really limit each write to z_max_blksz? 957168404Spjd * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 958168404Spjd */ 959168404Spjd nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 960168404Spjd 961219089Spjd if (woff + nbytes > zp->z_size) 962168404Spjd vnode_pager_setsize(vp, woff + nbytes); 963168404Spjd 964209962Smm if (abuf == NULL) { 965209962Smm tx_bytes = uio->uio_resid; 966219089Spjd error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 967219089Spjd uio, nbytes, tx); 968209962Smm tx_bytes -= uio->uio_resid; 969168404Spjd } else { 970209962Smm tx_bytes = nbytes; 971219089Spjd ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 972219089Spjd /* 973219089Spjd * If this is not a full block write, but we are 974219089Spjd * extending the file past EOF and this data starts 975219089Spjd * block-aligned, use assign_arcbuf(). Otherwise, 976219089Spjd * write via dmu_write(). 977219089Spjd */ 978219089Spjd if (tx_bytes < max_blksz && (!write_eof || 979219089Spjd aiov->iov_base != abuf->b_data)) { 980219089Spjd ASSERT(xuio); 981219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, woff, 982219089Spjd aiov->iov_len, aiov->iov_base, tx); 983219089Spjd dmu_return_arcbuf(abuf); 984219089Spjd xuio_stat_wbuf_copied(); 985219089Spjd } else { 986219089Spjd ASSERT(xuio || tx_bytes == max_blksz); 987219089Spjd dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 988219089Spjd woff, abuf, tx); 989219089Spjd } 990209962Smm ASSERT(tx_bytes <= uio->uio_resid); 991209962Smm uioskip(uio, tx_bytes); 992168404Spjd } 993212657Savg if (tx_bytes && vn_has_cached_data(vp)) { 994209962Smm update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 995209962Smm zp->z_id, uio->uio_segflg, tx); 996209962Smm } 997209962Smm 998209962Smm /* 999168404Spjd * If we made no progress, we're done. If we made even 1000168404Spjd * partial progress, update the znode and ZIL accordingly. 1001168404Spjd */ 1002168404Spjd if (tx_bytes == 0) { 1003219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 1004219089Spjd (void *)&zp->z_size, sizeof (uint64_t), tx); 1005168404Spjd dmu_tx_commit(tx); 1006168404Spjd ASSERT(error != 0); 1007168404Spjd break; 1008168404Spjd } 1009168404Spjd 1010168404Spjd /* 1011168404Spjd * Clear Set-UID/Set-GID bits on successful write if not 1012168404Spjd * privileged and at least one of the excute bits is set. 1013168404Spjd * 1014168404Spjd * It would be nice to to this after all writes have 1015168404Spjd * been done, but that would still expose the ISUID/ISGID 1016168404Spjd * to another app after the partial write is committed. 1017185029Spjd * 1018185029Spjd * Note: we don't call zfs_fuid_map_id() here because 1019185029Spjd * user 0 is not an ephemeral uid. 1020168404Spjd */ 1021168404Spjd mutex_enter(&zp->z_acl_lock); 1022219089Spjd if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 1023168404Spjd (S_IXUSR >> 6))) != 0 && 1024219089Spjd (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 1025185029Spjd secpolicy_vnode_setid_retain(vp, cr, 1026219089Spjd (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 1027219089Spjd uint64_t newmode; 1028219089Spjd zp->z_mode &= ~(S_ISUID | S_ISGID); 1029219089Spjd newmode = zp->z_mode; 1030219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 1031219089Spjd (void *)&newmode, sizeof (uint64_t), tx); 1032168404Spjd } 1033168404Spjd mutex_exit(&zp->z_acl_lock); 1034168404Spjd 1035219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 1036219089Spjd B_TRUE); 1037168404Spjd 1038168404Spjd /* 1039168404Spjd * Update the file size (zp_size) if it has changed; 1040168404Spjd * account for possible concurrent updates. 1041168404Spjd */ 1042219089Spjd while ((end_size = zp->z_size) < uio->uio_loffset) { 1043219089Spjd (void) atomic_cas_64(&zp->z_size, end_size, 1044168404Spjd uio->uio_loffset); 1045219089Spjd ASSERT(error == 0); 1046219089Spjd } 1047219089Spjd /* 1048219089Spjd * If we are replaying and eof is non zero then force 1049219089Spjd * the file size to the specified eof. Note, there's no 1050219089Spjd * concurrency during replay. 1051219089Spjd */ 1052219089Spjd if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 1053219089Spjd zp->z_size = zfsvfs->z_replay_eof; 1054219089Spjd 1055219089Spjd error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1056219089Spjd 1057168404Spjd zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 1058168404Spjd dmu_tx_commit(tx); 1059168404Spjd 1060168404Spjd if (error != 0) 1061168404Spjd break; 1062168404Spjd ASSERT(tx_bytes == nbytes); 1063168404Spjd n -= nbytes; 1064219089Spjd 1065219089Spjd#ifdef sun 1066219089Spjd if (!xuio && n > 0) 1067219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 1068219089Spjd#endif /* sun */ 1069168404Spjd } 1070168404Spjd 1071168404Spjd zfs_range_unlock(rl); 1072168404Spjd 1073168404Spjd /* 1074168404Spjd * If we're in replay mode, or we made no progress, return error. 1075168404Spjd * Otherwise, it's at least a partial write, so it's successful. 1076168404Spjd */ 1077209962Smm if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1078168404Spjd ZFS_EXIT(zfsvfs); 1079168404Spjd return (error); 1080168404Spjd } 1081168404Spjd 1082219089Spjd if (ioflag & (FSYNC | FDSYNC) || 1083219089Spjd zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1084219089Spjd zil_commit(zilog, zp->z_id); 1085168404Spjd 1086168404Spjd ZFS_EXIT(zfsvfs); 1087168404Spjd return (0); 1088168404Spjd} 1089168404Spjd 1090168404Spjdvoid 1091219089Spjdzfs_get_done(zgd_t *zgd, int error) 1092168404Spjd{ 1093219089Spjd znode_t *zp = zgd->zgd_private; 1094219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 1095168404Spjd int vfslocked; 1096168404Spjd 1097219089Spjd if (zgd->zgd_db) 1098219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1099219089Spjd 1100219089Spjd zfs_range_unlock(zgd->zgd_rl); 1101219089Spjd 1102219089Spjd vfslocked = VFS_LOCK_GIANT(zp->z_zfsvfs->z_vfs); 1103191900Skmacy /* 1104191900Skmacy * Release the vnode asynchronously as we currently have the 1105191900Skmacy * txg stopped from syncing. 1106191900Skmacy */ 1107219089Spjd VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1108219089Spjd 1109219089Spjd if (error == 0 && zgd->zgd_bp) 1110219089Spjd zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1111219089Spjd 1112168404Spjd kmem_free(zgd, sizeof (zgd_t)); 1113168404Spjd VFS_UNLOCK_GIANT(vfslocked); 1114168404Spjd} 1115168404Spjd 1116214378Smm#ifdef DEBUG 1117214378Smmstatic int zil_fault_io = 0; 1118214378Smm#endif 1119214378Smm 1120168404Spjd/* 1121168404Spjd * Get data to generate a TX_WRITE intent log record. 1122168404Spjd */ 1123168404Spjdint 1124168404Spjdzfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1125168404Spjd{ 1126168404Spjd zfsvfs_t *zfsvfs = arg; 1127168404Spjd objset_t *os = zfsvfs->z_os; 1128168404Spjd znode_t *zp; 1129219089Spjd uint64_t object = lr->lr_foid; 1130219089Spjd uint64_t offset = lr->lr_offset; 1131219089Spjd uint64_t size = lr->lr_length; 1132219089Spjd blkptr_t *bp = &lr->lr_blkptr; 1133168404Spjd dmu_buf_t *db; 1134168404Spjd zgd_t *zgd; 1135168404Spjd int error = 0; 1136168404Spjd 1137219089Spjd ASSERT(zio != NULL); 1138219089Spjd ASSERT(size != 0); 1139168404Spjd 1140168404Spjd /* 1141168404Spjd * Nothing to do if the file has been removed 1142168404Spjd */ 1143219089Spjd if (zfs_zget(zfsvfs, object, &zp) != 0) 1144168404Spjd return (ENOENT); 1145168404Spjd if (zp->z_unlinked) { 1146191900Skmacy /* 1147191900Skmacy * Release the vnode asynchronously as we currently have the 1148191900Skmacy * txg stopped from syncing. 1149191900Skmacy */ 1150196307Spjd VN_RELE_ASYNC(ZTOV(zp), 1151196307Spjd dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1152168404Spjd return (ENOENT); 1153168404Spjd } 1154168404Spjd 1155219089Spjd zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1156219089Spjd zgd->zgd_zilog = zfsvfs->z_log; 1157219089Spjd zgd->zgd_private = zp; 1158219089Spjd 1159168404Spjd /* 1160168404Spjd * Write records come in two flavors: immediate and indirect. 1161168404Spjd * For small writes it's cheaper to store the data with the 1162168404Spjd * log record (immediate); for large writes it's cheaper to 1163168404Spjd * sync the data and get a pointer to it (indirect) so that 1164168404Spjd * we don't have to write the data twice. 1165168404Spjd */ 1166168404Spjd if (buf != NULL) { /* immediate write */ 1167219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1168168404Spjd /* test for truncation needs to be done while range locked */ 1169219089Spjd if (offset >= zp->z_size) { 1170168404Spjd error = ENOENT; 1171219089Spjd } else { 1172219089Spjd error = dmu_read(os, object, offset, size, buf, 1173219089Spjd DMU_READ_NO_PREFETCH); 1174168404Spjd } 1175219089Spjd ASSERT(error == 0 || error == ENOENT); 1176168404Spjd } else { /* indirect write */ 1177168404Spjd /* 1178168404Spjd * Have to lock the whole block to ensure when it's 1179168404Spjd * written out and it's checksum is being calculated 1180168404Spjd * that no one can change the data. We need to re-check 1181168404Spjd * blocksize after we get the lock in case it's changed! 1182168404Spjd */ 1183168404Spjd for (;;) { 1184219089Spjd uint64_t blkoff; 1185219089Spjd size = zp->z_blksz; 1186219089Spjd blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1187219089Spjd offset -= blkoff; 1188219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1189219089Spjd RL_READER); 1190219089Spjd if (zp->z_blksz == size) 1191168404Spjd break; 1192219089Spjd offset += blkoff; 1193219089Spjd zfs_range_unlock(zgd->zgd_rl); 1194168404Spjd } 1195168404Spjd /* test for truncation needs to be done while range locked */ 1196219089Spjd if (lr->lr_offset >= zp->z_size) 1197168404Spjd error = ENOENT; 1198214378Smm#ifdef DEBUG 1199214378Smm if (zil_fault_io) { 1200214378Smm error = EIO; 1201214378Smm zil_fault_io = 0; 1202214378Smm } 1203214378Smm#endif 1204219089Spjd if (error == 0) 1205219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1206219089Spjd DMU_READ_NO_PREFETCH); 1207214378Smm 1208209962Smm if (error == 0) { 1209219089Spjd zgd->zgd_db = db; 1210219089Spjd zgd->zgd_bp = bp; 1211219089Spjd 1212219089Spjd ASSERT(db->db_offset == offset); 1213219089Spjd ASSERT(db->db_size == size); 1214219089Spjd 1215219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1216219089Spjd zfs_get_done, zgd); 1217219089Spjd ASSERT(error || lr->lr_length <= zp->z_blksz); 1218219089Spjd 1219209962Smm /* 1220219089Spjd * On success, we need to wait for the write I/O 1221219089Spjd * initiated by dmu_sync() to complete before we can 1222219089Spjd * release this dbuf. We will finish everything up 1223219089Spjd * in the zfs_get_done() callback. 1224209962Smm */ 1225219089Spjd if (error == 0) 1226219089Spjd return (0); 1227209962Smm 1228219089Spjd if (error == EALREADY) { 1229219089Spjd lr->lr_common.lrc_txtype = TX_WRITE2; 1230219089Spjd error = 0; 1231219089Spjd } 1232209962Smm } 1233168404Spjd } 1234219089Spjd 1235219089Spjd zfs_get_done(zgd, error); 1236219089Spjd 1237168404Spjd return (error); 1238168404Spjd} 1239168404Spjd 1240168404Spjd/*ARGSUSED*/ 1241168404Spjdstatic int 1242185029Spjdzfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1243185029Spjd caller_context_t *ct) 1244168404Spjd{ 1245168404Spjd znode_t *zp = VTOZ(vp); 1246168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1247168404Spjd int error; 1248168404Spjd 1249168404Spjd ZFS_ENTER(zfsvfs); 1250185029Spjd ZFS_VERIFY_ZP(zp); 1251185029Spjd 1252185029Spjd if (flag & V_ACE_MASK) 1253185029Spjd error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1254185029Spjd else 1255185029Spjd error = zfs_zaccess_rwx(zp, mode, flag, cr); 1256185029Spjd 1257168404Spjd ZFS_EXIT(zfsvfs); 1258168404Spjd return (error); 1259168404Spjd} 1260168404Spjd 1261168404Spjd/* 1262211932Smm * If vnode is for a device return a specfs vnode instead. 1263211932Smm */ 1264211932Smmstatic int 1265211932Smmspecvp_check(vnode_t **vpp, cred_t *cr) 1266211932Smm{ 1267211932Smm int error = 0; 1268211932Smm 1269211932Smm if (IS_DEVVP(*vpp)) { 1270211932Smm struct vnode *svp; 1271211932Smm 1272211932Smm svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1273211932Smm VN_RELE(*vpp); 1274211932Smm if (svp == NULL) 1275211932Smm error = ENOSYS; 1276211932Smm *vpp = svp; 1277211932Smm } 1278211932Smm return (error); 1279211932Smm} 1280211932Smm 1281211932Smm 1282211932Smm/* 1283168404Spjd * Lookup an entry in a directory, or an extended attribute directory. 1284168404Spjd * If it exists, return a held vnode reference for it. 1285168404Spjd * 1286168404Spjd * IN: dvp - vnode of directory to search. 1287168404Spjd * nm - name of entry to lookup. 1288168404Spjd * pnp - full pathname to lookup [UNUSED]. 1289168404Spjd * flags - LOOKUP_XATTR set if looking for an attribute. 1290168404Spjd * rdir - root directory vnode [UNUSED]. 1291168404Spjd * cr - credentials of caller. 1292185029Spjd * ct - caller context 1293185029Spjd * direntflags - directory lookup flags 1294185029Spjd * realpnp - returned pathname. 1295168404Spjd * 1296168404Spjd * OUT: vpp - vnode of located entry, NULL if not found. 1297168404Spjd * 1298168404Spjd * RETURN: 0 if success 1299168404Spjd * error code if failure 1300168404Spjd * 1301168404Spjd * Timestamps: 1302168404Spjd * NA 1303168404Spjd */ 1304168404Spjd/* ARGSUSED */ 1305168962Spjdstatic int 1306168962Spjdzfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1307185029Spjd int nameiop, cred_t *cr, kthread_t *td, int flags) 1308168404Spjd{ 1309168962Spjd znode_t *zdp = VTOZ(dvp); 1310168962Spjd zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1311211932Smm int error = 0; 1312185029Spjd int *direntflags = NULL; 1313185029Spjd void *realpnp = NULL; 1314168404Spjd 1315211932Smm /* fast path */ 1316211932Smm if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 1317211932Smm 1318211932Smm if (dvp->v_type != VDIR) { 1319211932Smm return (ENOTDIR); 1320219089Spjd } else if (zdp->z_sa_hdl == NULL) { 1321211932Smm return (EIO); 1322211932Smm } 1323211932Smm 1324211932Smm if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1325211932Smm error = zfs_fastaccesschk_execute(zdp, cr); 1326211932Smm if (!error) { 1327211932Smm *vpp = dvp; 1328211932Smm VN_HOLD(*vpp); 1329211932Smm return (0); 1330211932Smm } 1331211932Smm return (error); 1332211932Smm } else { 1333211932Smm vnode_t *tvp = dnlc_lookup(dvp, nm); 1334211932Smm 1335211932Smm if (tvp) { 1336211932Smm error = zfs_fastaccesschk_execute(zdp, cr); 1337211932Smm if (error) { 1338211932Smm VN_RELE(tvp); 1339211932Smm return (error); 1340211932Smm } 1341211932Smm if (tvp == DNLC_NO_VNODE) { 1342211932Smm VN_RELE(tvp); 1343211932Smm return (ENOENT); 1344211932Smm } else { 1345211932Smm *vpp = tvp; 1346211932Smm return (specvp_check(vpp, cr)); 1347211932Smm } 1348211932Smm } 1349211932Smm } 1350211932Smm } 1351211932Smm 1352211932Smm DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1353211932Smm 1354168404Spjd ZFS_ENTER(zfsvfs); 1355185029Spjd ZFS_VERIFY_ZP(zdp); 1356168404Spjd 1357168404Spjd *vpp = NULL; 1358168404Spjd 1359185029Spjd if (flags & LOOKUP_XATTR) { 1360168404Spjd#ifdef TODO 1361168404Spjd /* 1362168404Spjd * If the xattr property is off, refuse the lookup request. 1363168404Spjd */ 1364168404Spjd if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1365168404Spjd ZFS_EXIT(zfsvfs); 1366168404Spjd return (EINVAL); 1367168404Spjd } 1368185029Spjd#endif 1369168404Spjd 1370168404Spjd /* 1371168404Spjd * We don't allow recursive attributes.. 1372168404Spjd * Maybe someday we will. 1373168404Spjd */ 1374219089Spjd if (zdp->z_pflags & ZFS_XATTR) { 1375168404Spjd ZFS_EXIT(zfsvfs); 1376168404Spjd return (EINVAL); 1377168404Spjd } 1378168404Spjd 1379168404Spjd if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1380168404Spjd ZFS_EXIT(zfsvfs); 1381168404Spjd return (error); 1382168404Spjd } 1383168404Spjd 1384168404Spjd /* 1385168404Spjd * Do we have permission to get into attribute directory? 1386168404Spjd */ 1387168404Spjd 1388185029Spjd if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1389185029Spjd B_FALSE, cr)) { 1390168404Spjd VN_RELE(*vpp); 1391185029Spjd *vpp = NULL; 1392168404Spjd } 1393168404Spjd 1394168404Spjd ZFS_EXIT(zfsvfs); 1395168404Spjd return (error); 1396168404Spjd } 1397168404Spjd 1398168404Spjd if (dvp->v_type != VDIR) { 1399168404Spjd ZFS_EXIT(zfsvfs); 1400168404Spjd return (ENOTDIR); 1401168404Spjd } 1402168404Spjd 1403168404Spjd /* 1404168404Spjd * Check accessibility of directory. 1405168404Spjd */ 1406168404Spjd 1407185029Spjd if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1408168404Spjd ZFS_EXIT(zfsvfs); 1409168404Spjd return (error); 1410168404Spjd } 1411168404Spjd 1412185029Spjd if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1413185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1414185029Spjd ZFS_EXIT(zfsvfs); 1415185029Spjd return (EILSEQ); 1416185029Spjd } 1417168404Spjd 1418185029Spjd error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1419211932Smm if (error == 0) 1420211932Smm error = specvp_check(vpp, cr); 1421168962Spjd 1422168404Spjd /* Translate errors and add SAVENAME when needed. */ 1423168404Spjd if (cnp->cn_flags & ISLASTCN) { 1424168404Spjd switch (nameiop) { 1425168404Spjd case CREATE: 1426168404Spjd case RENAME: 1427168404Spjd if (error == ENOENT) { 1428168404Spjd error = EJUSTRETURN; 1429168404Spjd cnp->cn_flags |= SAVENAME; 1430168404Spjd break; 1431168404Spjd } 1432168404Spjd /* FALLTHROUGH */ 1433168404Spjd case DELETE: 1434168404Spjd if (error == 0) 1435168404Spjd cnp->cn_flags |= SAVENAME; 1436168404Spjd break; 1437168404Spjd } 1438168404Spjd } 1439168404Spjd if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) { 1440169198Spjd int ltype = 0; 1441169198Spjd 1442169198Spjd if (cnp->cn_flags & ISDOTDOT) { 1443176559Sattilio ltype = VOP_ISLOCKED(dvp); 1444175294Sattilio VOP_UNLOCK(dvp, 0); 1445169198Spjd } 1446206667Spjd ZFS_EXIT(zfsvfs); 1447219089Spjd error = zfs_vnode_lock(*vpp, cnp->cn_lkflags); 1448168962Spjd if (cnp->cn_flags & ISDOTDOT) 1449175202Sattilio vn_lock(dvp, ltype | LK_RETRY); 1450169172Spjd if (error != 0) { 1451169172Spjd VN_RELE(*vpp); 1452169172Spjd *vpp = NULL; 1453169172Spjd return (error); 1454169172Spjd } 1455206667Spjd } else { 1456206667Spjd ZFS_EXIT(zfsvfs); 1457168404Spjd } 1458168404Spjd 1459168404Spjd#ifdef FREEBSD_NAMECACHE 1460168404Spjd /* 1461168404Spjd * Insert name into cache (as non-existent) if appropriate. 1462168404Spjd */ 1463168404Spjd if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 1464168404Spjd cache_enter(dvp, *vpp, cnp); 1465169170Spjd /* 1466169170Spjd * Insert name into cache if appropriate. 1467169170Spjd */ 1468168404Spjd if (error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1469168404Spjd if (!(cnp->cn_flags & ISLASTCN) || 1470168404Spjd (nameiop != DELETE && nameiop != RENAME)) { 1471168404Spjd cache_enter(dvp, *vpp, cnp); 1472168404Spjd } 1473168404Spjd } 1474168404Spjd#endif 1475168404Spjd 1476168404Spjd return (error); 1477168404Spjd} 1478168404Spjd 1479168404Spjd/* 1480168404Spjd * Attempt to create a new entry in a directory. If the entry 1481168404Spjd * already exists, truncate the file if permissible, else return 1482168404Spjd * an error. Return the vp of the created or trunc'd file. 1483168404Spjd * 1484168404Spjd * IN: dvp - vnode of directory to put new file entry in. 1485168404Spjd * name - name of new file entry. 1486168404Spjd * vap - attributes of new file. 1487168404Spjd * excl - flag indicating exclusive or non-exclusive mode. 1488168404Spjd * mode - mode to open file with. 1489168404Spjd * cr - credentials of caller. 1490168404Spjd * flag - large file flag [UNUSED]. 1491185029Spjd * ct - caller context 1492185029Spjd * vsecp - ACL to be set 1493168404Spjd * 1494168404Spjd * OUT: vpp - vnode of created or trunc'd entry. 1495168404Spjd * 1496168404Spjd * RETURN: 0 if success 1497168404Spjd * error code if failure 1498168404Spjd * 1499168404Spjd * Timestamps: 1500168404Spjd * dvp - ctime|mtime updated if new entry created 1501168404Spjd * vp - ctime|mtime always, atime if new 1502168404Spjd */ 1503185029Spjd 1504168404Spjd/* ARGSUSED */ 1505168404Spjdstatic int 1506168962Spjdzfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 1507185029Spjd vnode_t **vpp, cred_t *cr, kthread_t *td) 1508168404Spjd{ 1509168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1510168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1511185029Spjd zilog_t *zilog; 1512185029Spjd objset_t *os; 1513168404Spjd zfs_dirlock_t *dl; 1514168404Spjd dmu_tx_t *tx; 1515168404Spjd int error; 1516209962Smm ksid_t *ksid; 1517209962Smm uid_t uid; 1518209962Smm gid_t gid = crgetgid(cr); 1519219089Spjd zfs_acl_ids_t acl_ids; 1520209962Smm boolean_t fuid_dirtied; 1521219089Spjd boolean_t have_acl = B_FALSE; 1522185029Spjd void *vsecp = NULL; 1523185029Spjd int flag = 0; 1524168404Spjd 1525185029Spjd /* 1526185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 1527185029Spjd * make sure file system is at proper version 1528185029Spjd */ 1529185029Spjd 1530209962Smm ksid = crgetsid(cr, KSID_OWNER); 1531209962Smm if (ksid) 1532209962Smm uid = ksid_getid(ksid); 1533209962Smm else 1534209962Smm uid = crgetuid(cr); 1535219089Spjd 1536185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 1537185029Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 1538219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1539185029Spjd return (EINVAL); 1540185029Spjd 1541168404Spjd ZFS_ENTER(zfsvfs); 1542185029Spjd ZFS_VERIFY_ZP(dzp); 1543185029Spjd os = zfsvfs->z_os; 1544185029Spjd zilog = zfsvfs->z_log; 1545168404Spjd 1546185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1547185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1548185029Spjd ZFS_EXIT(zfsvfs); 1549185029Spjd return (EILSEQ); 1550185029Spjd } 1551185029Spjd 1552185029Spjd if (vap->va_mask & AT_XVATTR) { 1553197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1554185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 1555185029Spjd ZFS_EXIT(zfsvfs); 1556185029Spjd return (error); 1557185029Spjd } 1558185029Spjd } 1559168404Spjdtop: 1560168404Spjd *vpp = NULL; 1561168404Spjd 1562182905Strasz if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1563182905Strasz vap->va_mode &= ~S_ISVTX; 1564168404Spjd 1565168404Spjd if (*name == '\0') { 1566168404Spjd /* 1567168404Spjd * Null component name refers to the directory itself. 1568168404Spjd */ 1569168404Spjd VN_HOLD(dvp); 1570168404Spjd zp = dzp; 1571168404Spjd dl = NULL; 1572168404Spjd error = 0; 1573168404Spjd } else { 1574168404Spjd /* possible VN_HOLD(zp) */ 1575185029Spjd int zflg = 0; 1576185029Spjd 1577185029Spjd if (flag & FIGNORECASE) 1578185029Spjd zflg |= ZCILOOK; 1579185029Spjd 1580185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1581185029Spjd NULL, NULL); 1582185029Spjd if (error) { 1583219089Spjd if (have_acl) 1584219089Spjd zfs_acl_ids_free(&acl_ids); 1585168404Spjd if (strcmp(name, "..") == 0) 1586168404Spjd error = EISDIR; 1587168404Spjd ZFS_EXIT(zfsvfs); 1588168404Spjd return (error); 1589168404Spjd } 1590168404Spjd } 1591219089Spjd 1592185029Spjd if (zp == NULL) { 1593185029Spjd uint64_t txtype; 1594168404Spjd 1595168404Spjd /* 1596168404Spjd * Create a new file object and update the directory 1597168404Spjd * to reference it. 1598168404Spjd */ 1599185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1600219089Spjd if (have_acl) 1601219089Spjd zfs_acl_ids_free(&acl_ids); 1602168404Spjd goto out; 1603168404Spjd } 1604168404Spjd 1605168404Spjd /* 1606168404Spjd * We only support the creation of regular files in 1607168404Spjd * extended attribute directories. 1608168404Spjd */ 1609219089Spjd 1610219089Spjd if ((dzp->z_pflags & ZFS_XATTR) && 1611168404Spjd (vap->va_type != VREG)) { 1612219089Spjd if (have_acl) 1613219089Spjd zfs_acl_ids_free(&acl_ids); 1614168404Spjd error = EINVAL; 1615168404Spjd goto out; 1616168404Spjd } 1617168404Spjd 1618219089Spjd if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, 1619219089Spjd cr, vsecp, &acl_ids)) != 0) 1620219089Spjd goto out; 1621219089Spjd have_acl = B_TRUE; 1622209962Smm 1623209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1624211932Smm zfs_acl_ids_free(&acl_ids); 1625209962Smm error = EDQUOT; 1626209962Smm goto out; 1627209962Smm } 1628209962Smm 1629168404Spjd tx = dmu_tx_create(os); 1630219089Spjd 1631219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1632219089Spjd ZFS_SA_BASE_ATTR_SIZE); 1633219089Spjd 1634209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 1635209962Smm if (fuid_dirtied) 1636209962Smm zfs_fuid_txhold(zfsvfs, tx); 1637168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1638219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 1639219089Spjd if (!zfsvfs->z_use_sa && 1640219089Spjd acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1641168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1642219089Spjd 0, acl_ids.z_aclp->z_acl_bytes); 1643185029Spjd } 1644209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 1645168404Spjd if (error) { 1646168404Spjd zfs_dirent_unlock(dl); 1647209962Smm if (error == ERESTART) { 1648168404Spjd dmu_tx_wait(tx); 1649168404Spjd dmu_tx_abort(tx); 1650168404Spjd goto top; 1651168404Spjd } 1652219089Spjd zfs_acl_ids_free(&acl_ids); 1653168404Spjd dmu_tx_abort(tx); 1654168404Spjd ZFS_EXIT(zfsvfs); 1655168404Spjd return (error); 1656168404Spjd } 1657219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1658209962Smm 1659209962Smm if (fuid_dirtied) 1660209962Smm zfs_fuid_sync(zfsvfs, tx); 1661209962Smm 1662168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 1663185029Spjd txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1664185029Spjd if (flag & FIGNORECASE) 1665185029Spjd txtype |= TX_CI; 1666185029Spjd zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1667209962Smm vsecp, acl_ids.z_fuidp, vap); 1668209962Smm zfs_acl_ids_free(&acl_ids); 1669168404Spjd dmu_tx_commit(tx); 1670168404Spjd } else { 1671185029Spjd int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1672185029Spjd 1673219089Spjd if (have_acl) 1674219089Spjd zfs_acl_ids_free(&acl_ids); 1675219089Spjd have_acl = B_FALSE; 1676219089Spjd 1677168404Spjd /* 1678168404Spjd * A directory entry already exists for this name. 1679168404Spjd */ 1680168404Spjd /* 1681168962Spjd * Can't truncate an existing file if in exclusive mode. 1682168962Spjd */ 1683168962Spjd if (excl == EXCL) { 1684168962Spjd error = EEXIST; 1685168962Spjd goto out; 1686168962Spjd } 1687168962Spjd /* 1688168404Spjd * Can't open a directory for writing. 1689168404Spjd */ 1690168404Spjd if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1691168404Spjd error = EISDIR; 1692168404Spjd goto out; 1693168404Spjd } 1694168404Spjd /* 1695168404Spjd * Verify requested access to file. 1696168404Spjd */ 1697185029Spjd if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1698168404Spjd goto out; 1699168404Spjd } 1700168404Spjd 1701168404Spjd mutex_enter(&dzp->z_lock); 1702168404Spjd dzp->z_seq++; 1703168404Spjd mutex_exit(&dzp->z_lock); 1704168404Spjd 1705168404Spjd /* 1706168404Spjd * Truncate regular files if requested. 1707168404Spjd */ 1708168404Spjd if ((ZTOV(zp)->v_type == VREG) && 1709168404Spjd (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1710185029Spjd /* we can't hold any locks when calling zfs_freesp() */ 1711185029Spjd zfs_dirent_unlock(dl); 1712185029Spjd dl = NULL; 1713168404Spjd error = zfs_freesp(zp, 0, 0, mode, TRUE); 1714185029Spjd if (error == 0) { 1715185029Spjd vnevent_create(ZTOV(zp), ct); 1716168404Spjd } 1717168404Spjd } 1718168404Spjd } 1719168404Spjdout: 1720168404Spjd if (dl) 1721168404Spjd zfs_dirent_unlock(dl); 1722168404Spjd 1723168404Spjd if (error) { 1724168404Spjd if (zp) 1725168404Spjd VN_RELE(ZTOV(zp)); 1726168962Spjd } else { 1727168962Spjd *vpp = ZTOV(zp); 1728211932Smm error = specvp_check(vpp, cr); 1729168404Spjd } 1730168404Spjd 1731219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1732219089Spjd zil_commit(zilog, 0); 1733219089Spjd 1734168404Spjd ZFS_EXIT(zfsvfs); 1735168404Spjd return (error); 1736168404Spjd} 1737168404Spjd 1738168404Spjd/* 1739168404Spjd * Remove an entry from a directory. 1740168404Spjd * 1741168404Spjd * IN: dvp - vnode of directory to remove entry from. 1742168404Spjd * name - name of entry to remove. 1743168404Spjd * cr - credentials of caller. 1744185029Spjd * ct - caller context 1745185029Spjd * flags - case flags 1746168404Spjd * 1747168404Spjd * RETURN: 0 if success 1748168404Spjd * error code if failure 1749168404Spjd * 1750168404Spjd * Timestamps: 1751168404Spjd * dvp - ctime|mtime 1752168404Spjd * vp - ctime (if nlink > 0) 1753168404Spjd */ 1754219089Spjd 1755219089Spjduint64_t null_xattr = 0; 1756219089Spjd 1757185029Spjd/*ARGSUSED*/ 1758168404Spjdstatic int 1759185029Spjdzfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1760185029Spjd int flags) 1761168404Spjd{ 1762168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1763219089Spjd znode_t *xzp; 1764168404Spjd vnode_t *vp; 1765168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1766185029Spjd zilog_t *zilog; 1767168962Spjd uint64_t acl_obj, xattr_obj; 1768219089Spjd uint64_t xattr_obj_unlinked = 0; 1769219089Spjd uint64_t obj = 0; 1770168404Spjd zfs_dirlock_t *dl; 1771168404Spjd dmu_tx_t *tx; 1772168962Spjd boolean_t may_delete_now, delete_now = FALSE; 1773185029Spjd boolean_t unlinked, toobig = FALSE; 1774185029Spjd uint64_t txtype; 1775185029Spjd pathname_t *realnmp = NULL; 1776185029Spjd pathname_t realnm; 1777168404Spjd int error; 1778185029Spjd int zflg = ZEXISTS; 1779168404Spjd 1780168404Spjd ZFS_ENTER(zfsvfs); 1781185029Spjd ZFS_VERIFY_ZP(dzp); 1782185029Spjd zilog = zfsvfs->z_log; 1783168404Spjd 1784185029Spjd if (flags & FIGNORECASE) { 1785185029Spjd zflg |= ZCILOOK; 1786185029Spjd pn_alloc(&realnm); 1787185029Spjd realnmp = &realnm; 1788185029Spjd } 1789185029Spjd 1790168404Spjdtop: 1791219089Spjd xattr_obj = 0; 1792219089Spjd xzp = NULL; 1793168404Spjd /* 1794168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 1795168404Spjd */ 1796185029Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1797185029Spjd NULL, realnmp)) { 1798185029Spjd if (realnmp) 1799185029Spjd pn_free(realnmp); 1800168404Spjd ZFS_EXIT(zfsvfs); 1801168404Spjd return (error); 1802168404Spjd } 1803168404Spjd 1804168404Spjd vp = ZTOV(zp); 1805168404Spjd 1806168962Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1807168404Spjd goto out; 1808168962Spjd } 1809168404Spjd 1810168962Spjd /* 1811168962Spjd * Need to use rmdir for removing directories. 1812168962Spjd */ 1813168962Spjd if (vp->v_type == VDIR) { 1814168962Spjd error = EPERM; 1815168962Spjd goto out; 1816168962Spjd } 1817168962Spjd 1818185029Spjd vnevent_remove(vp, dvp, name, ct); 1819168962Spjd 1820185029Spjd if (realnmp) 1821185029Spjd dnlc_remove(dvp, realnmp->pn_buf); 1822185029Spjd else 1823185029Spjd dnlc_remove(dvp, name); 1824168404Spjd 1825219089Spjd VI_LOCK(vp); 1826219089Spjd may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); 1827219089Spjd VI_UNLOCK(vp); 1828168962Spjd 1829168404Spjd /* 1830168404Spjd * We may delete the znode now, or we may put it in the unlinked set; 1831168404Spjd * it depends on whether we're the last link, and on whether there are 1832168404Spjd * other holds on the vnode. So we dmu_tx_hold() the right things to 1833168404Spjd * allow for either case. 1834168404Spjd */ 1835219089Spjd obj = zp->z_id; 1836168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1837168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1838219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1839219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1840219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 1841185029Spjd if (may_delete_now) { 1842185029Spjd toobig = 1843219089Spjd zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1844185029Spjd /* if the file is too big, only hold_free a token amount */ 1845185029Spjd dmu_tx_hold_free(tx, zp->z_id, 0, 1846185029Spjd (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1847185029Spjd } 1848168404Spjd 1849168404Spjd /* are there any extended attributes? */ 1850219089Spjd error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1851219089Spjd &xattr_obj, sizeof (xattr_obj)); 1852219089Spjd if (error == 0 && xattr_obj) { 1853219089Spjd error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1854240415Smm ASSERT0(error); 1855219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 1856219089Spjd dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1857168404Spjd } 1858168404Spjd 1859219089Spjd mutex_enter(&zp->z_lock); 1860219089Spjd if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) 1861168962Spjd dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 1862219089Spjd mutex_exit(&zp->z_lock); 1863168962Spjd 1864168404Spjd /* charge as an update -- would be nice not to charge at all */ 1865168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1866168404Spjd 1867209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 1868168404Spjd if (error) { 1869168404Spjd zfs_dirent_unlock(dl); 1870168962Spjd VN_RELE(vp); 1871219089Spjd if (xzp) 1872219089Spjd VN_RELE(ZTOV(xzp)); 1873209962Smm if (error == ERESTART) { 1874168404Spjd dmu_tx_wait(tx); 1875168404Spjd dmu_tx_abort(tx); 1876168404Spjd goto top; 1877168404Spjd } 1878185029Spjd if (realnmp) 1879185029Spjd pn_free(realnmp); 1880168404Spjd dmu_tx_abort(tx); 1881168404Spjd ZFS_EXIT(zfsvfs); 1882168404Spjd return (error); 1883168404Spjd } 1884168404Spjd 1885168404Spjd /* 1886168404Spjd * Remove the directory entry. 1887168404Spjd */ 1888185029Spjd error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1889168404Spjd 1890168404Spjd if (error) { 1891168404Spjd dmu_tx_commit(tx); 1892168404Spjd goto out; 1893168404Spjd } 1894168404Spjd 1895219089Spjd if (unlinked) { 1896219089Spjd 1897219089Spjd /* 1898219089Spjd * Hold z_lock so that we can make sure that the ACL obj 1899219089Spjd * hasn't changed. Could have been deleted due to 1900219089Spjd * zfs_sa_upgrade(). 1901219089Spjd */ 1902219089Spjd mutex_enter(&zp->z_lock); 1903168962Spjd VI_LOCK(vp); 1904219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1905219089Spjd &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); 1906185029Spjd delete_now = may_delete_now && !toobig && 1907168962Spjd vp->v_count == 1 && !vn_has_cached_data(vp) && 1908219089Spjd xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == 1909219089Spjd acl_obj; 1910168962Spjd VI_UNLOCK(vp); 1911168962Spjd } 1912168962Spjd 1913168962Spjd if (delete_now) { 1914219089Spjd if (xattr_obj_unlinked) { 1915219089Spjd ASSERT3U(xzp->z_links, ==, 2); 1916168962Spjd mutex_enter(&xzp->z_lock); 1917168962Spjd xzp->z_unlinked = 1; 1918219089Spjd xzp->z_links = 0; 1919219089Spjd error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 1920219089Spjd &xzp->z_links, sizeof (xzp->z_links), tx); 1921219089Spjd ASSERT3U(error, ==, 0); 1922168962Spjd mutex_exit(&xzp->z_lock); 1923168962Spjd zfs_unlinked_add(xzp, tx); 1924219089Spjd 1925219089Spjd if (zp->z_is_sa) 1926219089Spjd error = sa_remove(zp->z_sa_hdl, 1927219089Spjd SA_ZPL_XATTR(zfsvfs), tx); 1928219089Spjd else 1929219089Spjd error = sa_update(zp->z_sa_hdl, 1930219089Spjd SA_ZPL_XATTR(zfsvfs), &null_xattr, 1931219089Spjd sizeof (uint64_t), tx); 1932240415Smm ASSERT0(error); 1933168962Spjd } 1934168962Spjd VI_LOCK(vp); 1935168962Spjd vp->v_count--; 1936240415Smm ASSERT0(vp->v_count); 1937168962Spjd VI_UNLOCK(vp); 1938168962Spjd mutex_exit(&zp->z_lock); 1939168962Spjd zfs_znode_delete(zp, tx); 1940168962Spjd } else if (unlinked) { 1941219089Spjd mutex_exit(&zp->z_lock); 1942168404Spjd zfs_unlinked_add(zp, tx); 1943168962Spjd } 1944168404Spjd 1945185029Spjd txtype = TX_REMOVE; 1946185029Spjd if (flags & FIGNORECASE) 1947185029Spjd txtype |= TX_CI; 1948219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 1949168404Spjd 1950168404Spjd dmu_tx_commit(tx); 1951168404Spjdout: 1952185029Spjd if (realnmp) 1953185029Spjd pn_free(realnmp); 1954185029Spjd 1955168404Spjd zfs_dirent_unlock(dl); 1956168404Spjd 1957219089Spjd if (!delete_now) 1958168962Spjd VN_RELE(vp); 1959219089Spjd if (xzp) 1960168962Spjd VN_RELE(ZTOV(xzp)); 1961168962Spjd 1962219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1963219089Spjd zil_commit(zilog, 0); 1964219089Spjd 1965168404Spjd ZFS_EXIT(zfsvfs); 1966168404Spjd return (error); 1967168404Spjd} 1968168404Spjd 1969168404Spjd/* 1970168404Spjd * Create a new directory and insert it into dvp using the name 1971168404Spjd * provided. Return a pointer to the inserted directory. 1972168404Spjd * 1973168404Spjd * IN: dvp - vnode of directory to add subdir to. 1974168404Spjd * dirname - name of new directory. 1975168404Spjd * vap - attributes of new directory. 1976168404Spjd * cr - credentials of caller. 1977185029Spjd * ct - caller context 1978185029Spjd * vsecp - ACL to be set 1979168404Spjd * 1980168404Spjd * OUT: vpp - vnode of created directory. 1981168404Spjd * 1982168404Spjd * RETURN: 0 if success 1983168404Spjd * error code if failure 1984168404Spjd * 1985168404Spjd * Timestamps: 1986168404Spjd * dvp - ctime|mtime updated 1987168404Spjd * vp - ctime|mtime|atime updated 1988168404Spjd */ 1989185029Spjd/*ARGSUSED*/ 1990168404Spjdstatic int 1991185029Spjdzfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 1992185029Spjd caller_context_t *ct, int flags, vsecattr_t *vsecp) 1993168404Spjd{ 1994168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1995168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1996185029Spjd zilog_t *zilog; 1997168404Spjd zfs_dirlock_t *dl; 1998185029Spjd uint64_t txtype; 1999168404Spjd dmu_tx_t *tx; 2000168404Spjd int error; 2001185029Spjd int zf = ZNEW; 2002209962Smm ksid_t *ksid; 2003209962Smm uid_t uid; 2004209962Smm gid_t gid = crgetgid(cr); 2005219089Spjd zfs_acl_ids_t acl_ids; 2006209962Smm boolean_t fuid_dirtied; 2007168404Spjd 2008168404Spjd ASSERT(vap->va_type == VDIR); 2009168404Spjd 2010185029Spjd /* 2011185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 2012185029Spjd * make sure file system is at proper version 2013185029Spjd */ 2014185029Spjd 2015209962Smm ksid = crgetsid(cr, KSID_OWNER); 2016209962Smm if (ksid) 2017209962Smm uid = ksid_getid(ksid); 2018209962Smm else 2019209962Smm uid = crgetuid(cr); 2020185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2021219089Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 2022219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2023185029Spjd return (EINVAL); 2024185029Spjd 2025168404Spjd ZFS_ENTER(zfsvfs); 2026185029Spjd ZFS_VERIFY_ZP(dzp); 2027185029Spjd zilog = zfsvfs->z_log; 2028168404Spjd 2029219089Spjd if (dzp->z_pflags & ZFS_XATTR) { 2030168404Spjd ZFS_EXIT(zfsvfs); 2031168404Spjd return (EINVAL); 2032168404Spjd } 2033168404Spjd 2034185029Spjd if (zfsvfs->z_utf8 && u8_validate(dirname, 2035185029Spjd strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2036185029Spjd ZFS_EXIT(zfsvfs); 2037185029Spjd return (EILSEQ); 2038185029Spjd } 2039185029Spjd if (flags & FIGNORECASE) 2040185029Spjd zf |= ZCILOOK; 2041185029Spjd 2042219089Spjd if (vap->va_mask & AT_XVATTR) { 2043197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2044185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 2045185029Spjd ZFS_EXIT(zfsvfs); 2046185029Spjd return (error); 2047185029Spjd } 2048219089Spjd } 2049185029Spjd 2050219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 2051219089Spjd vsecp, &acl_ids)) != 0) { 2052219089Spjd ZFS_EXIT(zfsvfs); 2053219089Spjd return (error); 2054219089Spjd } 2055168404Spjd /* 2056168404Spjd * First make sure the new directory doesn't exist. 2057219089Spjd * 2058219089Spjd * Existence is checked first to make sure we don't return 2059219089Spjd * EACCES instead of EEXIST which can cause some applications 2060219089Spjd * to fail. 2061168404Spjd */ 2062185029Spjdtop: 2063185029Spjd *vpp = NULL; 2064185029Spjd 2065185029Spjd if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 2066185029Spjd NULL, NULL)) { 2067219089Spjd zfs_acl_ids_free(&acl_ids); 2068168404Spjd ZFS_EXIT(zfsvfs); 2069168404Spjd return (error); 2070168404Spjd } 2071168404Spjd 2072185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 2073219089Spjd zfs_acl_ids_free(&acl_ids); 2074168404Spjd zfs_dirent_unlock(dl); 2075168404Spjd ZFS_EXIT(zfsvfs); 2076168404Spjd return (error); 2077168404Spjd } 2078168404Spjd 2079209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2080211932Smm zfs_acl_ids_free(&acl_ids); 2081209962Smm zfs_dirent_unlock(dl); 2082209962Smm ZFS_EXIT(zfsvfs); 2083209962Smm return (EDQUOT); 2084209962Smm } 2085209962Smm 2086168404Spjd /* 2087168404Spjd * Add a new entry to the directory. 2088168404Spjd */ 2089168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2090168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 2091168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2092209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 2093209962Smm if (fuid_dirtied) 2094209962Smm zfs_fuid_txhold(zfsvfs, tx); 2095219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2096219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2097219089Spjd acl_ids.z_aclp->z_acl_bytes); 2098219089Spjd } 2099219089Spjd 2100219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2101219089Spjd ZFS_SA_BASE_ATTR_SIZE); 2102219089Spjd 2103209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 2104168404Spjd if (error) { 2105168404Spjd zfs_dirent_unlock(dl); 2106209962Smm if (error == ERESTART) { 2107168404Spjd dmu_tx_wait(tx); 2108168404Spjd dmu_tx_abort(tx); 2109168404Spjd goto top; 2110168404Spjd } 2111219089Spjd zfs_acl_ids_free(&acl_ids); 2112168404Spjd dmu_tx_abort(tx); 2113168404Spjd ZFS_EXIT(zfsvfs); 2114168404Spjd return (error); 2115168404Spjd } 2116168404Spjd 2117168404Spjd /* 2118168404Spjd * Create new node. 2119168404Spjd */ 2120219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2121168404Spjd 2122209962Smm if (fuid_dirtied) 2123209962Smm zfs_fuid_sync(zfsvfs, tx); 2124219089Spjd 2125168404Spjd /* 2126168404Spjd * Now put new name in parent dir. 2127168404Spjd */ 2128168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 2129168404Spjd 2130168404Spjd *vpp = ZTOV(zp); 2131168404Spjd 2132185029Spjd txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 2133185029Spjd if (flags & FIGNORECASE) 2134185029Spjd txtype |= TX_CI; 2135209962Smm zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 2136209962Smm acl_ids.z_fuidp, vap); 2137185029Spjd 2138209962Smm zfs_acl_ids_free(&acl_ids); 2139219089Spjd 2140168404Spjd dmu_tx_commit(tx); 2141168404Spjd 2142168404Spjd zfs_dirent_unlock(dl); 2143168404Spjd 2144219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2145219089Spjd zil_commit(zilog, 0); 2146219089Spjd 2147168404Spjd ZFS_EXIT(zfsvfs); 2148168404Spjd return (0); 2149168404Spjd} 2150168404Spjd 2151168404Spjd/* 2152168404Spjd * Remove a directory subdir entry. If the current working 2153168404Spjd * directory is the same as the subdir to be removed, the 2154168404Spjd * remove will fail. 2155168404Spjd * 2156168404Spjd * IN: dvp - vnode of directory to remove from. 2157168404Spjd * name - name of directory to be removed. 2158168404Spjd * cwd - vnode of current working directory. 2159168404Spjd * cr - credentials of caller. 2160185029Spjd * ct - caller context 2161185029Spjd * flags - case flags 2162168404Spjd * 2163168404Spjd * RETURN: 0 if success 2164168404Spjd * error code if failure 2165168404Spjd * 2166168404Spjd * Timestamps: 2167168404Spjd * dvp - ctime|mtime updated 2168168404Spjd */ 2169185029Spjd/*ARGSUSED*/ 2170168404Spjdstatic int 2171185029Spjdzfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 2172185029Spjd caller_context_t *ct, int flags) 2173168404Spjd{ 2174168404Spjd znode_t *dzp = VTOZ(dvp); 2175168404Spjd znode_t *zp; 2176168404Spjd vnode_t *vp; 2177168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2178185029Spjd zilog_t *zilog; 2179168404Spjd zfs_dirlock_t *dl; 2180168404Spjd dmu_tx_t *tx; 2181168404Spjd int error; 2182185029Spjd int zflg = ZEXISTS; 2183168404Spjd 2184168962Spjd ZFS_ENTER(zfsvfs); 2185185029Spjd ZFS_VERIFY_ZP(dzp); 2186185029Spjd zilog = zfsvfs->z_log; 2187168404Spjd 2188185029Spjd if (flags & FIGNORECASE) 2189185029Spjd zflg |= ZCILOOK; 2190168404Spjdtop: 2191168404Spjd zp = NULL; 2192168404Spjd 2193168404Spjd /* 2194168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 2195168404Spjd */ 2196185029Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 2197185029Spjd NULL, NULL)) { 2198168404Spjd ZFS_EXIT(zfsvfs); 2199168404Spjd return (error); 2200168404Spjd } 2201168404Spjd 2202168404Spjd vp = ZTOV(zp); 2203168404Spjd 2204168404Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2205168404Spjd goto out; 2206168404Spjd } 2207168404Spjd 2208168962Spjd if (vp->v_type != VDIR) { 2209168962Spjd error = ENOTDIR; 2210168962Spjd goto out; 2211168962Spjd } 2212168962Spjd 2213168962Spjd if (vp == cwd) { 2214168962Spjd error = EINVAL; 2215168962Spjd goto out; 2216168962Spjd } 2217168962Spjd 2218185029Spjd vnevent_rmdir(vp, dvp, name, ct); 2219168962Spjd 2220168404Spjd /* 2221168404Spjd * Grab a lock on the directory to make sure that noone is 2222168404Spjd * trying to add (or lookup) entries while we are removing it. 2223168404Spjd */ 2224168404Spjd rw_enter(&zp->z_name_lock, RW_WRITER); 2225168404Spjd 2226168404Spjd /* 2227168404Spjd * Grab a lock on the parent pointer to make sure we play well 2228168404Spjd * with the treewalk and directory rename code. 2229168404Spjd */ 2230168404Spjd rw_enter(&zp->z_parent_lock, RW_WRITER); 2231168404Spjd 2232168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2233168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2234219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2235168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2236219089Spjd zfs_sa_upgrade_txholds(tx, zp); 2237219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 2238209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 2239168404Spjd if (error) { 2240168404Spjd rw_exit(&zp->z_parent_lock); 2241168404Spjd rw_exit(&zp->z_name_lock); 2242168404Spjd zfs_dirent_unlock(dl); 2243168962Spjd VN_RELE(vp); 2244209962Smm if (error == ERESTART) { 2245168404Spjd dmu_tx_wait(tx); 2246168404Spjd dmu_tx_abort(tx); 2247168404Spjd goto top; 2248168404Spjd } 2249168404Spjd dmu_tx_abort(tx); 2250168404Spjd ZFS_EXIT(zfsvfs); 2251168404Spjd return (error); 2252168404Spjd } 2253168404Spjd 2254168404Spjd#ifdef FREEBSD_NAMECACHE 2255168404Spjd cache_purge(dvp); 2256168404Spjd#endif 2257168404Spjd 2258185029Spjd error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 2259168404Spjd 2260185029Spjd if (error == 0) { 2261185029Spjd uint64_t txtype = TX_RMDIR; 2262185029Spjd if (flags & FIGNORECASE) 2263185029Spjd txtype |= TX_CI; 2264219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2265185029Spjd } 2266168404Spjd 2267168404Spjd dmu_tx_commit(tx); 2268168404Spjd 2269168404Spjd rw_exit(&zp->z_parent_lock); 2270168404Spjd rw_exit(&zp->z_name_lock); 2271168404Spjd#ifdef FREEBSD_NAMECACHE 2272168404Spjd cache_purge(vp); 2273168404Spjd#endif 2274168404Spjdout: 2275168404Spjd zfs_dirent_unlock(dl); 2276168404Spjd 2277168962Spjd VN_RELE(vp); 2278168962Spjd 2279219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2280219089Spjd zil_commit(zilog, 0); 2281219089Spjd 2282168404Spjd ZFS_EXIT(zfsvfs); 2283168404Spjd return (error); 2284168404Spjd} 2285168404Spjd 2286168404Spjd/* 2287168404Spjd * Read as many directory entries as will fit into the provided 2288168404Spjd * buffer from the given directory cursor position (specified in 2289168404Spjd * the uio structure. 2290168404Spjd * 2291168404Spjd * IN: vp - vnode of directory to read. 2292168404Spjd * uio - structure supplying read location, range info, 2293168404Spjd * and return buffer. 2294168404Spjd * cr - credentials of caller. 2295185029Spjd * ct - caller context 2296185029Spjd * flags - case flags 2297168404Spjd * 2298168404Spjd * OUT: uio - updated offset and range, buffer filled. 2299168404Spjd * eofp - set to true if end-of-file detected. 2300168404Spjd * 2301168404Spjd * RETURN: 0 if success 2302168404Spjd * error code if failure 2303168404Spjd * 2304168404Spjd * Timestamps: 2305168404Spjd * vp - atime updated 2306168404Spjd * 2307168404Spjd * Note that the low 4 bits of the cookie returned by zap is always zero. 2308168404Spjd * This allows us to use the low range for "special" directory entries: 2309168404Spjd * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2310168404Spjd * we use the offset 2 for the '.zfs' directory. 2311168404Spjd */ 2312168404Spjd/* ARGSUSED */ 2313168404Spjdstatic int 2314168962Spjdzfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) 2315168404Spjd{ 2316168404Spjd znode_t *zp = VTOZ(vp); 2317168404Spjd iovec_t *iovp; 2318185029Spjd edirent_t *eodp; 2319168404Spjd dirent64_t *odp; 2320168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2321168404Spjd objset_t *os; 2322168404Spjd caddr_t outbuf; 2323168404Spjd size_t bufsize; 2324168404Spjd zap_cursor_t zc; 2325168404Spjd zap_attribute_t zap; 2326168404Spjd uint_t bytes_wanted; 2327168404Spjd uint64_t offset; /* must be unsigned; checks for < 1 */ 2328219089Spjd uint64_t parent; 2329168404Spjd int local_eof; 2330168404Spjd int outcount; 2331168404Spjd int error; 2332168404Spjd uint8_t prefetch; 2333185029Spjd boolean_t check_sysattrs; 2334168404Spjd uint8_t type; 2335168962Spjd int ncooks; 2336168962Spjd u_long *cooks = NULL; 2337185029Spjd int flags = 0; 2338168404Spjd 2339168404Spjd ZFS_ENTER(zfsvfs); 2340185029Spjd ZFS_VERIFY_ZP(zp); 2341168404Spjd 2342219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 2343219089Spjd &parent, sizeof (parent))) != 0) { 2344219089Spjd ZFS_EXIT(zfsvfs); 2345219089Spjd return (error); 2346219089Spjd } 2347219089Spjd 2348168404Spjd /* 2349168404Spjd * If we are not given an eof variable, 2350168404Spjd * use a local one. 2351168404Spjd */ 2352168404Spjd if (eofp == NULL) 2353168404Spjd eofp = &local_eof; 2354168404Spjd 2355168404Spjd /* 2356168404Spjd * Check for valid iov_len. 2357168404Spjd */ 2358168404Spjd if (uio->uio_iov->iov_len <= 0) { 2359168404Spjd ZFS_EXIT(zfsvfs); 2360168404Spjd return (EINVAL); 2361168404Spjd } 2362168404Spjd 2363168404Spjd /* 2364168404Spjd * Quit if directory has been removed (posix) 2365168404Spjd */ 2366168404Spjd if ((*eofp = zp->z_unlinked) != 0) { 2367168404Spjd ZFS_EXIT(zfsvfs); 2368168404Spjd return (0); 2369168404Spjd } 2370168404Spjd 2371168404Spjd error = 0; 2372168404Spjd os = zfsvfs->z_os; 2373168404Spjd offset = uio->uio_loffset; 2374168404Spjd prefetch = zp->z_zn_prefetch; 2375168404Spjd 2376168404Spjd /* 2377168404Spjd * Initialize the iterator cursor. 2378168404Spjd */ 2379168404Spjd if (offset <= 3) { 2380168404Spjd /* 2381168404Spjd * Start iteration from the beginning of the directory. 2382168404Spjd */ 2383168404Spjd zap_cursor_init(&zc, os, zp->z_id); 2384168404Spjd } else { 2385168404Spjd /* 2386168404Spjd * The offset is a serialized cursor. 2387168404Spjd */ 2388168404Spjd zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2389168404Spjd } 2390168404Spjd 2391168404Spjd /* 2392168404Spjd * Get space to change directory entries into fs independent format. 2393168404Spjd */ 2394168404Spjd iovp = uio->uio_iov; 2395168404Spjd bytes_wanted = iovp->iov_len; 2396168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2397168404Spjd bufsize = bytes_wanted; 2398168404Spjd outbuf = kmem_alloc(bufsize, KM_SLEEP); 2399168404Spjd odp = (struct dirent64 *)outbuf; 2400168404Spjd } else { 2401168404Spjd bufsize = bytes_wanted; 2402168404Spjd odp = (struct dirent64 *)iovp->iov_base; 2403168404Spjd } 2404185029Spjd eodp = (struct edirent *)odp; 2405168404Spjd 2406169170Spjd if (ncookies != NULL) { 2407168404Spjd /* 2408168404Spjd * Minimum entry size is dirent size and 1 byte for a file name. 2409168404Spjd */ 2410168962Spjd ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2411219404Spjd cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2412219404Spjd *cookies = cooks; 2413168962Spjd *ncookies = ncooks; 2414168404Spjd } 2415185029Spjd /* 2416185029Spjd * If this VFS supports the system attribute view interface; and 2417185029Spjd * we're looking at an extended attribute directory; and we care 2418185029Spjd * about normalization conflicts on this vfs; then we must check 2419185029Spjd * for normalization conflicts with the sysattr name space. 2420185029Spjd */ 2421185029Spjd#ifdef TODO 2422185029Spjd check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2423185029Spjd (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2424185029Spjd (flags & V_RDDIR_ENTFLAGS); 2425185029Spjd#else 2426185029Spjd check_sysattrs = 0; 2427185029Spjd#endif 2428168404Spjd 2429168404Spjd /* 2430168404Spjd * Transform to file-system independent format 2431168404Spjd */ 2432168404Spjd outcount = 0; 2433168404Spjd while (outcount < bytes_wanted) { 2434168404Spjd ino64_t objnum; 2435168404Spjd ushort_t reclen; 2436219089Spjd off64_t *next = NULL; 2437168404Spjd 2438168404Spjd /* 2439168404Spjd * Special case `.', `..', and `.zfs'. 2440168404Spjd */ 2441168404Spjd if (offset == 0) { 2442168404Spjd (void) strcpy(zap.za_name, "."); 2443185029Spjd zap.za_normalization_conflict = 0; 2444168404Spjd objnum = zp->z_id; 2445169108Spjd type = DT_DIR; 2446168404Spjd } else if (offset == 1) { 2447168404Spjd (void) strcpy(zap.za_name, ".."); 2448185029Spjd zap.za_normalization_conflict = 0; 2449219089Spjd objnum = parent; 2450169108Spjd type = DT_DIR; 2451168404Spjd } else if (offset == 2 && zfs_show_ctldir(zp)) { 2452168404Spjd (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2453185029Spjd zap.za_normalization_conflict = 0; 2454168404Spjd objnum = ZFSCTL_INO_ROOT; 2455169108Spjd type = DT_DIR; 2456168404Spjd } else { 2457168404Spjd /* 2458168404Spjd * Grab next entry. 2459168404Spjd */ 2460168404Spjd if (error = zap_cursor_retrieve(&zc, &zap)) { 2461168404Spjd if ((*eofp = (error == ENOENT)) != 0) 2462168404Spjd break; 2463168404Spjd else 2464168404Spjd goto update; 2465168404Spjd } 2466168404Spjd 2467168404Spjd if (zap.za_integer_length != 8 || 2468168404Spjd zap.za_num_integers != 1) { 2469168404Spjd cmn_err(CE_WARN, "zap_readdir: bad directory " 2470168404Spjd "entry, obj = %lld, offset = %lld\n", 2471168404Spjd (u_longlong_t)zp->z_id, 2472168404Spjd (u_longlong_t)offset); 2473168404Spjd error = ENXIO; 2474168404Spjd goto update; 2475168404Spjd } 2476168404Spjd 2477168404Spjd objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2478168404Spjd /* 2479168404Spjd * MacOS X can extract the object type here such as: 2480168404Spjd * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2481168404Spjd */ 2482168404Spjd type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2483185029Spjd 2484185029Spjd if (check_sysattrs && !zap.za_normalization_conflict) { 2485185029Spjd#ifdef TODO 2486185029Spjd zap.za_normalization_conflict = 2487185029Spjd xattr_sysattr_casechk(zap.za_name); 2488185029Spjd#else 2489185029Spjd panic("%s:%u: TODO", __func__, __LINE__); 2490185029Spjd#endif 2491185029Spjd } 2492168404Spjd } 2493168404Spjd 2494211932Smm if (flags & V_RDDIR_ACCFILTER) { 2495211932Smm /* 2496211932Smm * If we have no access at all, don't include 2497211932Smm * this entry in the returned information 2498211932Smm */ 2499211932Smm znode_t *ezp; 2500211932Smm if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2501211932Smm goto skip_entry; 2502211932Smm if (!zfs_has_access(ezp, cr)) { 2503211932Smm VN_RELE(ZTOV(ezp)); 2504211932Smm goto skip_entry; 2505211932Smm } 2506211932Smm VN_RELE(ZTOV(ezp)); 2507211932Smm } 2508211932Smm 2509185029Spjd if (flags & V_RDDIR_ENTFLAGS) 2510185029Spjd reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2511185029Spjd else 2512185029Spjd reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2513185029Spjd 2514168404Spjd /* 2515168404Spjd * Will this entry fit in the buffer? 2516168404Spjd */ 2517168404Spjd if (outcount + reclen > bufsize) { 2518168404Spjd /* 2519168404Spjd * Did we manage to fit anything in the buffer? 2520168404Spjd */ 2521168404Spjd if (!outcount) { 2522168404Spjd error = EINVAL; 2523168404Spjd goto update; 2524168404Spjd } 2525168404Spjd break; 2526168404Spjd } 2527185029Spjd if (flags & V_RDDIR_ENTFLAGS) { 2528185029Spjd /* 2529185029Spjd * Add extended flag entry: 2530185029Spjd */ 2531185029Spjd eodp->ed_ino = objnum; 2532185029Spjd eodp->ed_reclen = reclen; 2533185029Spjd /* NOTE: ed_off is the offset for the *next* entry */ 2534185029Spjd next = &(eodp->ed_off); 2535185029Spjd eodp->ed_eflags = zap.za_normalization_conflict ? 2536185029Spjd ED_CASE_CONFLICT : 0; 2537185029Spjd (void) strncpy(eodp->ed_name, zap.za_name, 2538185029Spjd EDIRENT_NAMELEN(reclen)); 2539185029Spjd eodp = (edirent_t *)((intptr_t)eodp + reclen); 2540185029Spjd } else { 2541185029Spjd /* 2542185029Spjd * Add normal entry: 2543185029Spjd */ 2544185029Spjd odp->d_ino = objnum; 2545185029Spjd odp->d_reclen = reclen; 2546185029Spjd odp->d_namlen = strlen(zap.za_name); 2547185029Spjd (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2548185029Spjd odp->d_type = type; 2549185029Spjd odp = (dirent64_t *)((intptr_t)odp + reclen); 2550185029Spjd } 2551168404Spjd outcount += reclen; 2552168404Spjd 2553168404Spjd ASSERT(outcount <= bufsize); 2554168404Spjd 2555168404Spjd /* Prefetch znode */ 2556168404Spjd if (prefetch) 2557168404Spjd dmu_prefetch(os, objnum, 0, 0); 2558168404Spjd 2559211932Smm skip_entry: 2560168404Spjd /* 2561168404Spjd * Move to the next entry, fill in the previous offset. 2562168404Spjd */ 2563168404Spjd if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2564168404Spjd zap_cursor_advance(&zc); 2565168404Spjd offset = zap_cursor_serialize(&zc); 2566168404Spjd } else { 2567168404Spjd offset += 1; 2568168404Spjd } 2569219404Spjd 2570219404Spjd if (cooks != NULL) { 2571219404Spjd *cooks++ = offset; 2572219404Spjd ncooks--; 2573219404Spjd KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2574219404Spjd } 2575168404Spjd } 2576168404Spjd zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2577168404Spjd 2578168404Spjd /* Subtract unused cookies */ 2579168962Spjd if (ncookies != NULL) 2580168962Spjd *ncookies -= ncooks; 2581168404Spjd 2582168404Spjd if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2583168404Spjd iovp->iov_base += outcount; 2584168404Spjd iovp->iov_len -= outcount; 2585168404Spjd uio->uio_resid -= outcount; 2586168404Spjd } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2587168404Spjd /* 2588168404Spjd * Reset the pointer. 2589168404Spjd */ 2590168404Spjd offset = uio->uio_loffset; 2591168404Spjd } 2592168404Spjd 2593168404Spjdupdate: 2594168404Spjd zap_cursor_fini(&zc); 2595168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2596168404Spjd kmem_free(outbuf, bufsize); 2597168404Spjd 2598168404Spjd if (error == ENOENT) 2599168404Spjd error = 0; 2600168404Spjd 2601168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2602168404Spjd 2603168404Spjd uio->uio_loffset = offset; 2604168404Spjd ZFS_EXIT(zfsvfs); 2605169107Spjd if (error != 0 && cookies != NULL) { 2606168962Spjd free(*cookies, M_TEMP); 2607168962Spjd *cookies = NULL; 2608168962Spjd *ncookies = 0; 2609168404Spjd } 2610168404Spjd return (error); 2611168404Spjd} 2612168404Spjd 2613185029Spjdulong_t zfs_fsync_sync_cnt = 4; 2614185029Spjd 2615168404Spjdstatic int 2616185029Spjdzfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2617168404Spjd{ 2618168962Spjd znode_t *zp = VTOZ(vp); 2619168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2620168404Spjd 2621185029Spjd (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2622185029Spjd 2623219089Spjd if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 2624219089Spjd ZFS_ENTER(zfsvfs); 2625219089Spjd ZFS_VERIFY_ZP(zp); 2626219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 2627219089Spjd ZFS_EXIT(zfsvfs); 2628219089Spjd } 2629168404Spjd return (0); 2630168404Spjd} 2631168404Spjd 2632185029Spjd 2633168404Spjd/* 2634168404Spjd * Get the requested file attributes and place them in the provided 2635168404Spjd * vattr structure. 2636168404Spjd * 2637168404Spjd * IN: vp - vnode of file. 2638168404Spjd * vap - va_mask identifies requested attributes. 2639185029Spjd * If AT_XVATTR set, then optional attrs are requested 2640185029Spjd * flags - ATTR_NOACLCHECK (CIFS server context) 2641168404Spjd * cr - credentials of caller. 2642185029Spjd * ct - caller context 2643168404Spjd * 2644168404Spjd * OUT: vap - attribute values. 2645168404Spjd * 2646168404Spjd * RETURN: 0 (always succeeds) 2647168404Spjd */ 2648168404Spjd/* ARGSUSED */ 2649168404Spjdstatic int 2650185029Spjdzfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2651185029Spjd caller_context_t *ct) 2652168404Spjd{ 2653168962Spjd znode_t *zp = VTOZ(vp); 2654168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2655185029Spjd int error = 0; 2656168962Spjd uint32_t blksize; 2657168962Spjd u_longlong_t nblocks; 2658185029Spjd uint64_t links; 2659224251Sdelphij uint64_t mtime[2], ctime[2], crtime[2], rdev; 2660185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2661185029Spjd xoptattr_t *xoap = NULL; 2662185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2663224251Sdelphij sa_bulk_attr_t bulk[4]; 2664219089Spjd int count = 0; 2665168404Spjd 2666168404Spjd ZFS_ENTER(zfsvfs); 2667185029Spjd ZFS_VERIFY_ZP(zp); 2668168404Spjd 2669219089Spjd zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2670219089Spjd 2671219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 2672219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 2673219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &crtime, 16); 2674224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2675224251Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 2676224251Sdelphij &rdev, 8); 2677219089Spjd 2678219089Spjd if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 2679219089Spjd ZFS_EXIT(zfsvfs); 2680219089Spjd return (error); 2681219089Spjd } 2682219089Spjd 2683168404Spjd /* 2684185029Spjd * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2685185029Spjd * Also, if we are the owner don't bother, since owner should 2686185029Spjd * always be allowed to read basic attributes of file. 2687185029Spjd */ 2688219089Spjd if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2689219089Spjd (vap->va_uid != crgetuid(cr))) { 2690185029Spjd if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2691185029Spjd skipaclchk, cr)) { 2692185029Spjd ZFS_EXIT(zfsvfs); 2693185029Spjd return (error); 2694185029Spjd } 2695185029Spjd } 2696185029Spjd 2697185029Spjd /* 2698168404Spjd * Return all attributes. It's cheaper to provide the answer 2699168404Spjd * than to determine whether we were asked the question. 2700168404Spjd */ 2701168404Spjd 2702209097Smm mutex_enter(&zp->z_lock); 2703219089Spjd vap->va_type = IFTOVT(zp->z_mode); 2704219089Spjd vap->va_mode = zp->z_mode & ~S_IFMT; 2705224252Sdelphij#ifdef sun 2706224252Sdelphij vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2707224252Sdelphij#else 2708224252Sdelphij vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 2709224252Sdelphij#endif 2710168404Spjd vap->va_nodeid = zp->z_id; 2711185029Spjd if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2712219089Spjd links = zp->z_links + 1; 2713185029Spjd else 2714219089Spjd links = zp->z_links; 2715229425Sdim vap->va_nlink = MIN(links, LINK_MAX); /* nlink_t limit! */ 2716219089Spjd vap->va_size = zp->z_size; 2717224252Sdelphij#ifdef sun 2718224252Sdelphij vap->va_rdev = vp->v_rdev; 2719224252Sdelphij#else 2720224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2721224251Sdelphij vap->va_rdev = zfs_cmpldev(rdev); 2722224252Sdelphij#endif 2723168404Spjd vap->va_seq = zp->z_seq; 2724168404Spjd vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2725168404Spjd 2726185029Spjd /* 2727185029Spjd * Add in any requested optional attributes and the create time. 2728185029Spjd * Also set the corresponding bits in the returned attribute bitmap. 2729185029Spjd */ 2730185029Spjd if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2731185029Spjd if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2732185029Spjd xoap->xoa_archive = 2733219089Spjd ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2734185029Spjd XVA_SET_RTN(xvap, XAT_ARCHIVE); 2735185029Spjd } 2736185029Spjd 2737185029Spjd if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2738185029Spjd xoap->xoa_readonly = 2739219089Spjd ((zp->z_pflags & ZFS_READONLY) != 0); 2740185029Spjd XVA_SET_RTN(xvap, XAT_READONLY); 2741185029Spjd } 2742185029Spjd 2743185029Spjd if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2744185029Spjd xoap->xoa_system = 2745219089Spjd ((zp->z_pflags & ZFS_SYSTEM) != 0); 2746185029Spjd XVA_SET_RTN(xvap, XAT_SYSTEM); 2747185029Spjd } 2748185029Spjd 2749185029Spjd if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2750185029Spjd xoap->xoa_hidden = 2751219089Spjd ((zp->z_pflags & ZFS_HIDDEN) != 0); 2752185029Spjd XVA_SET_RTN(xvap, XAT_HIDDEN); 2753185029Spjd } 2754185029Spjd 2755185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2756185029Spjd xoap->xoa_nounlink = 2757219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2758185029Spjd XVA_SET_RTN(xvap, XAT_NOUNLINK); 2759185029Spjd } 2760185029Spjd 2761185029Spjd if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2762185029Spjd xoap->xoa_immutable = 2763219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2764185029Spjd XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2765185029Spjd } 2766185029Spjd 2767185029Spjd if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2768185029Spjd xoap->xoa_appendonly = 2769219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2770185029Spjd XVA_SET_RTN(xvap, XAT_APPENDONLY); 2771185029Spjd } 2772185029Spjd 2773185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2774185029Spjd xoap->xoa_nodump = 2775219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0); 2776185029Spjd XVA_SET_RTN(xvap, XAT_NODUMP); 2777185029Spjd } 2778185029Spjd 2779185029Spjd if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2780185029Spjd xoap->xoa_opaque = 2781219089Spjd ((zp->z_pflags & ZFS_OPAQUE) != 0); 2782185029Spjd XVA_SET_RTN(xvap, XAT_OPAQUE); 2783185029Spjd } 2784185029Spjd 2785185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2786185029Spjd xoap->xoa_av_quarantined = 2787219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2788185029Spjd XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2789185029Spjd } 2790185029Spjd 2791185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2792185029Spjd xoap->xoa_av_modified = 2793219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2794185029Spjd XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2795185029Spjd } 2796185029Spjd 2797185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2798219089Spjd vp->v_type == VREG) { 2799219089Spjd zfs_sa_get_scanstamp(zp, xvap); 2800185029Spjd } 2801185029Spjd 2802185029Spjd if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 2803219089Spjd uint64_t times[2]; 2804219089Spjd 2805219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 2806219089Spjd times, sizeof (times)); 2807219089Spjd ZFS_TIME_DECODE(&xoap->xoa_createtime, times); 2808185029Spjd XVA_SET_RTN(xvap, XAT_CREATETIME); 2809185029Spjd } 2810219089Spjd 2811219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2812219089Spjd xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 2813219089Spjd XVA_SET_RTN(xvap, XAT_REPARSE); 2814219089Spjd } 2815219089Spjd if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 2816219089Spjd xoap->xoa_generation = zp->z_gen; 2817219089Spjd XVA_SET_RTN(xvap, XAT_GEN); 2818219089Spjd } 2819219089Spjd 2820219089Spjd if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2821219089Spjd xoap->xoa_offline = 2822219089Spjd ((zp->z_pflags & ZFS_OFFLINE) != 0); 2823219089Spjd XVA_SET_RTN(xvap, XAT_OFFLINE); 2824219089Spjd } 2825219089Spjd 2826219089Spjd if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2827219089Spjd xoap->xoa_sparse = 2828219089Spjd ((zp->z_pflags & ZFS_SPARSE) != 0); 2829219089Spjd XVA_SET_RTN(xvap, XAT_SPARSE); 2830219089Spjd } 2831185029Spjd } 2832185029Spjd 2833219089Spjd ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 2834219089Spjd ZFS_TIME_DECODE(&vap->va_mtime, mtime); 2835219089Spjd ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2836219089Spjd ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 2837168404Spjd 2838168404Spjd mutex_exit(&zp->z_lock); 2839168404Spjd 2840219089Spjd sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 2841168404Spjd vap->va_blksize = blksize; 2842168404Spjd vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2843168404Spjd 2844168404Spjd if (zp->z_blksz == 0) { 2845168404Spjd /* 2846168404Spjd * Block size hasn't been set; suggest maximal I/O transfers. 2847168404Spjd */ 2848168404Spjd vap->va_blksize = zfsvfs->z_max_blksz; 2849168404Spjd } 2850168404Spjd 2851168404Spjd ZFS_EXIT(zfsvfs); 2852168404Spjd return (0); 2853168404Spjd} 2854168404Spjd 2855168404Spjd/* 2856168404Spjd * Set the file attributes to the values contained in the 2857168404Spjd * vattr structure. 2858168404Spjd * 2859168404Spjd * IN: vp - vnode of file to be modified. 2860168404Spjd * vap - new attribute values. 2861185029Spjd * If AT_XVATTR set, then optional attrs are being set 2862168404Spjd * flags - ATTR_UTIME set if non-default time values provided. 2863185029Spjd * - ATTR_NOACLCHECK (CIFS context only). 2864168404Spjd * cr - credentials of caller. 2865185029Spjd * ct - caller context 2866168404Spjd * 2867168404Spjd * RETURN: 0 if success 2868168404Spjd * error code if failure 2869168404Spjd * 2870168404Spjd * Timestamps: 2871168404Spjd * vp - ctime updated, mtime updated if size changed. 2872168404Spjd */ 2873168404Spjd/* ARGSUSED */ 2874168404Spjdstatic int 2875168962Spjdzfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2876168962Spjd caller_context_t *ct) 2877168404Spjd{ 2878185029Spjd znode_t *zp = VTOZ(vp); 2879168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2880185029Spjd zilog_t *zilog; 2881168404Spjd dmu_tx_t *tx; 2882168404Spjd vattr_t oldva; 2883209962Smm xvattr_t tmpxvattr; 2884168962Spjd uint_t mask = vap->va_mask; 2885168404Spjd uint_t saved_mask; 2886197831Spjd uint64_t saved_mode; 2887168404Spjd int trim_mask = 0; 2888168404Spjd uint64_t new_mode; 2889209962Smm uint64_t new_uid, new_gid; 2890219089Spjd uint64_t xattr_obj; 2891219089Spjd uint64_t mtime[2], ctime[2]; 2892168404Spjd znode_t *attrzp; 2893168404Spjd int need_policy = FALSE; 2894219089Spjd int err, err2; 2895185029Spjd zfs_fuid_info_t *fuidp = NULL; 2896185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2897185029Spjd xoptattr_t *xoap; 2898219089Spjd zfs_acl_t *aclp; 2899185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2900219089Spjd boolean_t fuid_dirtied = B_FALSE; 2901219089Spjd sa_bulk_attr_t bulk[7], xattr_bulk[7]; 2902219089Spjd int count = 0, xattr_count = 0; 2903168404Spjd 2904168404Spjd if (mask == 0) 2905168404Spjd return (0); 2906168404Spjd 2907168962Spjd if (mask & AT_NOSET) 2908168962Spjd return (EINVAL); 2909168962Spjd 2910185029Spjd ZFS_ENTER(zfsvfs); 2911185029Spjd ZFS_VERIFY_ZP(zp); 2912185029Spjd 2913185029Spjd zilog = zfsvfs->z_log; 2914185029Spjd 2915185029Spjd /* 2916185029Spjd * Make sure that if we have ephemeral uid/gid or xvattr specified 2917185029Spjd * that file system is at proper version level 2918185029Spjd */ 2919185029Spjd 2920185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2921185029Spjd (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2922185029Spjd ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 2923185029Spjd (mask & AT_XVATTR))) { 2924185029Spjd ZFS_EXIT(zfsvfs); 2925185029Spjd return (EINVAL); 2926185029Spjd } 2927185029Spjd 2928185029Spjd if (mask & AT_SIZE && vp->v_type == VDIR) { 2929185029Spjd ZFS_EXIT(zfsvfs); 2930168404Spjd return (EISDIR); 2931185029Spjd } 2932168404Spjd 2933185029Spjd if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 2934185029Spjd ZFS_EXIT(zfsvfs); 2935168404Spjd return (EINVAL); 2936185029Spjd } 2937168404Spjd 2938185029Spjd /* 2939185029Spjd * If this is an xvattr_t, then get a pointer to the structure of 2940185029Spjd * optional attributes. If this is NULL, then we have a vattr_t. 2941185029Spjd */ 2942185029Spjd xoap = xva_getxoptattr(xvap); 2943168404Spjd 2944209962Smm xva_init(&tmpxvattr); 2945209962Smm 2946185029Spjd /* 2947185029Spjd * Immutable files can only alter immutable bit and atime 2948185029Spjd */ 2949219089Spjd if ((zp->z_pflags & ZFS_IMMUTABLE) && 2950185029Spjd ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 2951185029Spjd ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 2952185029Spjd ZFS_EXIT(zfsvfs); 2953185029Spjd return (EPERM); 2954185029Spjd } 2955185029Spjd 2956219089Spjd if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { 2957185029Spjd ZFS_EXIT(zfsvfs); 2958185029Spjd return (EPERM); 2959185029Spjd } 2960185029Spjd 2961185029Spjd /* 2962185029Spjd * Verify timestamps doesn't overflow 32 bits. 2963185029Spjd * ZFS can handle large timestamps, but 32bit syscalls can't 2964185029Spjd * handle times greater than 2039. This check should be removed 2965185029Spjd * once large timestamps are fully supported. 2966185029Spjd */ 2967185029Spjd if (mask & (AT_ATIME | AT_MTIME)) { 2968185029Spjd if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 2969185029Spjd ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 2970185029Spjd ZFS_EXIT(zfsvfs); 2971185029Spjd return (EOVERFLOW); 2972185029Spjd } 2973185029Spjd } 2974185029Spjd 2975168404Spjdtop: 2976168404Spjd attrzp = NULL; 2977219089Spjd aclp = NULL; 2978168404Spjd 2979211932Smm /* Can this be moved to before the top label? */ 2980168404Spjd if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2981168404Spjd ZFS_EXIT(zfsvfs); 2982168404Spjd return (EROFS); 2983168404Spjd } 2984168404Spjd 2985168404Spjd /* 2986168404Spjd * First validate permissions 2987168404Spjd */ 2988168404Spjd 2989168404Spjd if (mask & AT_SIZE) { 2990168404Spjd /* 2991168404Spjd * XXX - Note, we are not providing any open 2992168404Spjd * mode flags here (like FNDELAY), so we may 2993168404Spjd * block if there are locks present... this 2994168404Spjd * should be addressed in openat(). 2995168404Spjd */ 2996185029Spjd /* XXX - would it be OK to generate a log record here? */ 2997185029Spjd err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 2998168404Spjd if (err) { 2999168404Spjd ZFS_EXIT(zfsvfs); 3000168404Spjd return (err); 3001168404Spjd } 3002168404Spjd } 3003168404Spjd 3004185029Spjd if (mask & (AT_ATIME|AT_MTIME) || 3005185029Spjd ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 3006185029Spjd XVA_ISSET_REQ(xvap, XAT_READONLY) || 3007185029Spjd XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 3008219089Spjd XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 3009219089Spjd XVA_ISSET_REQ(xvap, XAT_SPARSE) || 3010185029Spjd XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 3011219089Spjd XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 3012185029Spjd need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 3013185029Spjd skipaclchk, cr); 3014219089Spjd } 3015168404Spjd 3016168404Spjd if (mask & (AT_UID|AT_GID)) { 3017168404Spjd int idmask = (mask & (AT_UID|AT_GID)); 3018168404Spjd int take_owner; 3019168404Spjd int take_group; 3020168404Spjd 3021168404Spjd /* 3022168404Spjd * NOTE: even if a new mode is being set, 3023168404Spjd * we may clear S_ISUID/S_ISGID bits. 3024168404Spjd */ 3025168404Spjd 3026168404Spjd if (!(mask & AT_MODE)) 3027219089Spjd vap->va_mode = zp->z_mode; 3028168404Spjd 3029168404Spjd /* 3030168404Spjd * Take ownership or chgrp to group we are a member of 3031168404Spjd */ 3032168404Spjd 3033168404Spjd take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 3034185029Spjd take_group = (mask & AT_GID) && 3035185029Spjd zfs_groupmember(zfsvfs, vap->va_gid, cr); 3036168404Spjd 3037168404Spjd /* 3038168404Spjd * If both AT_UID and AT_GID are set then take_owner and 3039168404Spjd * take_group must both be set in order to allow taking 3040168404Spjd * ownership. 3041168404Spjd * 3042168404Spjd * Otherwise, send the check through secpolicy_vnode_setattr() 3043168404Spjd * 3044168404Spjd */ 3045168404Spjd 3046168404Spjd if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 3047168404Spjd ((idmask == AT_UID) && take_owner) || 3048168404Spjd ((idmask == AT_GID) && take_group)) { 3049185029Spjd if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 3050185029Spjd skipaclchk, cr) == 0) { 3051168404Spjd /* 3052168404Spjd * Remove setuid/setgid for non-privileged users 3053168404Spjd */ 3054185029Spjd secpolicy_setid_clear(vap, vp, cr); 3055168404Spjd trim_mask = (mask & (AT_UID|AT_GID)); 3056168404Spjd } else { 3057168404Spjd need_policy = TRUE; 3058168404Spjd } 3059168404Spjd } else { 3060168404Spjd need_policy = TRUE; 3061168404Spjd } 3062168404Spjd } 3063168404Spjd 3064168404Spjd mutex_enter(&zp->z_lock); 3065219089Spjd oldva.va_mode = zp->z_mode; 3066185029Spjd zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 3067185029Spjd if (mask & AT_XVATTR) { 3068209962Smm /* 3069209962Smm * Update xvattr mask to include only those attributes 3070209962Smm * that are actually changing. 3071209962Smm * 3072209962Smm * the bits will be restored prior to actually setting 3073209962Smm * the attributes so the caller thinks they were set. 3074209962Smm */ 3075209962Smm if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3076209962Smm if (xoap->xoa_appendonly != 3077219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 3078209962Smm need_policy = TRUE; 3079209962Smm } else { 3080209962Smm XVA_CLR_REQ(xvap, XAT_APPENDONLY); 3081209962Smm XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 3082209962Smm } 3083209962Smm } 3084209962Smm 3085209962Smm if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3086209962Smm if (xoap->xoa_nounlink != 3087219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 3088209962Smm need_policy = TRUE; 3089209962Smm } else { 3090209962Smm XVA_CLR_REQ(xvap, XAT_NOUNLINK); 3091209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 3092209962Smm } 3093209962Smm } 3094209962Smm 3095209962Smm if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3096209962Smm if (xoap->xoa_immutable != 3097219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 3098209962Smm need_policy = TRUE; 3099209962Smm } else { 3100209962Smm XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 3101209962Smm XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 3102209962Smm } 3103209962Smm } 3104209962Smm 3105209962Smm if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3106209962Smm if (xoap->xoa_nodump != 3107219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0)) { 3108209962Smm need_policy = TRUE; 3109209962Smm } else { 3110209962Smm XVA_CLR_REQ(xvap, XAT_NODUMP); 3111209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 3112209962Smm } 3113209962Smm } 3114209962Smm 3115209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3116209962Smm if (xoap->xoa_av_modified != 3117219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 3118209962Smm need_policy = TRUE; 3119209962Smm } else { 3120209962Smm XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 3121209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 3122209962Smm } 3123209962Smm } 3124209962Smm 3125209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3126209962Smm if ((vp->v_type != VREG && 3127209962Smm xoap->xoa_av_quarantined) || 3128209962Smm xoap->xoa_av_quarantined != 3129219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 3130209962Smm need_policy = TRUE; 3131209962Smm } else { 3132209962Smm XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 3133209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 3134209962Smm } 3135209962Smm } 3136209962Smm 3137219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3138219089Spjd mutex_exit(&zp->z_lock); 3139219089Spjd ZFS_EXIT(zfsvfs); 3140219089Spjd return (EPERM); 3141219089Spjd } 3142219089Spjd 3143209962Smm if (need_policy == FALSE && 3144209962Smm (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3145209962Smm XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3146185029Spjd need_policy = TRUE; 3147185029Spjd } 3148185029Spjd } 3149185029Spjd 3150168404Spjd mutex_exit(&zp->z_lock); 3151168404Spjd 3152168404Spjd if (mask & AT_MODE) { 3153185029Spjd if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3154168962Spjd err = secpolicy_setid_setsticky_clear(vp, vap, 3155168962Spjd &oldva, cr); 3156168962Spjd if (err) { 3157168962Spjd ZFS_EXIT(zfsvfs); 3158168962Spjd return (err); 3159168962Spjd } 3160168404Spjd trim_mask |= AT_MODE; 3161168404Spjd } else { 3162168404Spjd need_policy = TRUE; 3163168404Spjd } 3164168404Spjd } 3165168404Spjd 3166168404Spjd if (need_policy) { 3167168404Spjd /* 3168168404Spjd * If trim_mask is set then take ownership 3169168404Spjd * has been granted or write_acl is present and user 3170168404Spjd * has the ability to modify mode. In that case remove 3171168404Spjd * UID|GID and or MODE from mask so that 3172168404Spjd * secpolicy_vnode_setattr() doesn't revoke it. 3173168404Spjd */ 3174168404Spjd 3175168404Spjd if (trim_mask) { 3176168404Spjd saved_mask = vap->va_mask; 3177168404Spjd vap->va_mask &= ~trim_mask; 3178197831Spjd if (trim_mask & AT_MODE) { 3179197831Spjd /* 3180197831Spjd * Save the mode, as secpolicy_vnode_setattr() 3181197831Spjd * will overwrite it with ova.va_mode. 3182197831Spjd */ 3183197831Spjd saved_mode = vap->va_mode; 3184197831Spjd } 3185168404Spjd } 3186168404Spjd err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3187185029Spjd (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3188168404Spjd if (err) { 3189168404Spjd ZFS_EXIT(zfsvfs); 3190168404Spjd return (err); 3191168404Spjd } 3192168404Spjd 3193197831Spjd if (trim_mask) { 3194168404Spjd vap->va_mask |= saved_mask; 3195197831Spjd if (trim_mask & AT_MODE) { 3196197831Spjd /* 3197197831Spjd * Recover the mode after 3198197831Spjd * secpolicy_vnode_setattr(). 3199197831Spjd */ 3200197831Spjd vap->va_mode = saved_mode; 3201197831Spjd } 3202197831Spjd } 3203168404Spjd } 3204168404Spjd 3205168404Spjd /* 3206168404Spjd * secpolicy_vnode_setattr, or take ownership may have 3207168404Spjd * changed va_mask 3208168404Spjd */ 3209168404Spjd mask = vap->va_mask; 3210168404Spjd 3211219089Spjd if ((mask & (AT_UID | AT_GID))) { 3212219089Spjd err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 3213219089Spjd &xattr_obj, sizeof (xattr_obj)); 3214168404Spjd 3215219089Spjd if (err == 0 && xattr_obj) { 3216219089Spjd err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 3217209962Smm if (err) 3218219089Spjd goto out2; 3219168404Spjd } 3220209962Smm if (mask & AT_UID) { 3221209962Smm new_uid = zfs_fuid_create(zfsvfs, 3222209962Smm (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3223219089Spjd if (new_uid != zp->z_uid && 3224219089Spjd zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 3225219089Spjd if (attrzp) 3226219089Spjd VN_RELE(ZTOV(attrzp)); 3227209962Smm err = EDQUOT; 3228219089Spjd goto out2; 3229209962Smm } 3230209962Smm } 3231209962Smm 3232209962Smm if (mask & AT_GID) { 3233209962Smm new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3234209962Smm cr, ZFS_GROUP, &fuidp); 3235219089Spjd if (new_gid != zp->z_gid && 3236219089Spjd zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 3237219089Spjd if (attrzp) 3238219089Spjd VN_RELE(ZTOV(attrzp)); 3239209962Smm err = EDQUOT; 3240219089Spjd goto out2; 3241209962Smm } 3242209962Smm } 3243219089Spjd } 3244219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 3245219089Spjd 3246219089Spjd if (mask & AT_MODE) { 3247219089Spjd uint64_t pmode = zp->z_mode; 3248219089Spjd uint64_t acl_obj; 3249219089Spjd new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3250219089Spjd 3251224174Smm if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3252224174Smm goto out; 3253219089Spjd 3254219089Spjd mutex_enter(&zp->z_lock); 3255219089Spjd if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 3256219089Spjd /* 3257219089Spjd * Are we upgrading ACL from old V0 format 3258219089Spjd * to V1 format? 3259219089Spjd */ 3260219089Spjd if (zfsvfs->z_version >= ZPL_VERSION_FUID && 3261219089Spjd zfs_znode_acl_version(zp) == 3262219089Spjd ZFS_ACL_VERSION_INITIAL) { 3263219089Spjd dmu_tx_hold_free(tx, acl_obj, 0, 3264219089Spjd DMU_OBJECT_END); 3265219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3266219089Spjd 0, aclp->z_acl_bytes); 3267209962Smm } else { 3268219089Spjd dmu_tx_hold_write(tx, acl_obj, 0, 3269219089Spjd aclp->z_acl_bytes); 3270209962Smm } 3271219089Spjd } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3272219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3273219089Spjd 0, aclp->z_acl_bytes); 3274209962Smm } 3275219089Spjd mutex_exit(&zp->z_lock); 3276219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3277219089Spjd } else { 3278219089Spjd if ((mask & AT_XVATTR) && 3279219089Spjd XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3280219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3281219089Spjd else 3282219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3283168404Spjd } 3284168404Spjd 3285219089Spjd if (attrzp) { 3286219089Spjd dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3287219089Spjd } 3288219089Spjd 3289219089Spjd fuid_dirtied = zfsvfs->z_fuid_dirty; 3290219089Spjd if (fuid_dirtied) 3291219089Spjd zfs_fuid_txhold(zfsvfs, tx); 3292219089Spjd 3293219089Spjd zfs_sa_upgrade_txholds(tx, zp); 3294219089Spjd 3295209962Smm err = dmu_tx_assign(tx, TXG_NOWAIT); 3296168404Spjd if (err) { 3297209962Smm if (err == ERESTART) 3298168404Spjd dmu_tx_wait(tx); 3299209962Smm goto out; 3300168404Spjd } 3301168404Spjd 3302219089Spjd count = 0; 3303168404Spjd /* 3304168404Spjd * Set each attribute requested. 3305168404Spjd * We group settings according to the locks they need to acquire. 3306168404Spjd * 3307168404Spjd * Note: you cannot set ctime directly, although it will be 3308168404Spjd * updated as a side-effect of calling this function. 3309168404Spjd */ 3310168404Spjd 3311219089Spjd 3312219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3313219089Spjd mutex_enter(&zp->z_acl_lock); 3314168404Spjd mutex_enter(&zp->z_lock); 3315168404Spjd 3316219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3317219089Spjd &zp->z_pflags, sizeof (zp->z_pflags)); 3318219089Spjd 3319219089Spjd if (attrzp) { 3320219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3321219089Spjd mutex_enter(&attrzp->z_acl_lock); 3322219089Spjd mutex_enter(&attrzp->z_lock); 3323219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3324219089Spjd SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3325219089Spjd sizeof (attrzp->z_pflags)); 3326219089Spjd } 3327219089Spjd 3328219089Spjd if (mask & (AT_UID|AT_GID)) { 3329219089Spjd 3330219089Spjd if (mask & AT_UID) { 3331219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 3332219089Spjd &new_uid, sizeof (new_uid)); 3333219089Spjd zp->z_uid = new_uid; 3334219089Spjd if (attrzp) { 3335219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3336219089Spjd SA_ZPL_UID(zfsvfs), NULL, &new_uid, 3337219089Spjd sizeof (new_uid)); 3338219089Spjd attrzp->z_uid = new_uid; 3339219089Spjd } 3340219089Spjd } 3341219089Spjd 3342219089Spjd if (mask & AT_GID) { 3343219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 3344219089Spjd NULL, &new_gid, sizeof (new_gid)); 3345219089Spjd zp->z_gid = new_gid; 3346219089Spjd if (attrzp) { 3347219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3348219089Spjd SA_ZPL_GID(zfsvfs), NULL, &new_gid, 3349219089Spjd sizeof (new_gid)); 3350219089Spjd attrzp->z_gid = new_gid; 3351219089Spjd } 3352219089Spjd } 3353219089Spjd if (!(mask & AT_MODE)) { 3354219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 3355219089Spjd NULL, &new_mode, sizeof (new_mode)); 3356219089Spjd new_mode = zp->z_mode; 3357219089Spjd } 3358219089Spjd err = zfs_acl_chown_setattr(zp); 3359219089Spjd ASSERT(err == 0); 3360219089Spjd if (attrzp) { 3361219089Spjd err = zfs_acl_chown_setattr(attrzp); 3362219089Spjd ASSERT(err == 0); 3363219089Spjd } 3364219089Spjd } 3365219089Spjd 3366168404Spjd if (mask & AT_MODE) { 3367219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 3368219089Spjd &new_mode, sizeof (new_mode)); 3369219089Spjd zp->z_mode = new_mode; 3370219089Spjd ASSERT3U((uintptr_t)aclp, !=, 0); 3371209962Smm err = zfs_aclset_common(zp, aclp, cr, tx); 3372240415Smm ASSERT0(err); 3373219089Spjd if (zp->z_acl_cached) 3374219089Spjd zfs_acl_free(zp->z_acl_cached); 3375211932Smm zp->z_acl_cached = aclp; 3376211932Smm aclp = NULL; 3377168404Spjd } 3378168404Spjd 3379168404Spjd 3380219089Spjd if (mask & AT_ATIME) { 3381219089Spjd ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 3382219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 3383219089Spjd &zp->z_atime, sizeof (zp->z_atime)); 3384168404Spjd } 3385168404Spjd 3386219089Spjd if (mask & AT_MTIME) { 3387219089Spjd ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 3388219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 3389219089Spjd mtime, sizeof (mtime)); 3390168404Spjd } 3391168404Spjd 3392185029Spjd /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3393219089Spjd if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3394219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3395219089Spjd NULL, mtime, sizeof (mtime)); 3396219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3397219089Spjd &ctime, sizeof (ctime)); 3398219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 3399219089Spjd B_TRUE); 3400219089Spjd } else if (mask != 0) { 3401219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3402219089Spjd &ctime, sizeof (ctime)); 3403219089Spjd zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 3404219089Spjd B_TRUE); 3405219089Spjd if (attrzp) { 3406219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3407219089Spjd SA_ZPL_CTIME(zfsvfs), NULL, 3408219089Spjd &ctime, sizeof (ctime)); 3409219089Spjd zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 3410219089Spjd mtime, ctime, B_TRUE); 3411219089Spjd } 3412219089Spjd } 3413185029Spjd /* 3414185029Spjd * Do this after setting timestamps to prevent timestamp 3415185029Spjd * update from toggling bit 3416185029Spjd */ 3417168404Spjd 3418185029Spjd if (xoap && (mask & AT_XVATTR)) { 3419209962Smm 3420209962Smm /* 3421209962Smm * restore trimmed off masks 3422209962Smm * so that return masks can be set for caller. 3423209962Smm */ 3424209962Smm 3425209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3426209962Smm XVA_SET_REQ(xvap, XAT_APPENDONLY); 3427209962Smm } 3428209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3429209962Smm XVA_SET_REQ(xvap, XAT_NOUNLINK); 3430209962Smm } 3431209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3432209962Smm XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3433209962Smm } 3434209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3435209962Smm XVA_SET_REQ(xvap, XAT_NODUMP); 3436209962Smm } 3437209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3438209962Smm XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3439209962Smm } 3440209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3441209962Smm XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3442209962Smm } 3443209962Smm 3444219089Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3445185029Spjd ASSERT(vp->v_type == VREG); 3446185029Spjd 3447219089Spjd zfs_xvattr_set(zp, xvap, tx); 3448185029Spjd } 3449185029Spjd 3450209962Smm if (fuid_dirtied) 3451209962Smm zfs_fuid_sync(zfsvfs, tx); 3452209962Smm 3453168404Spjd if (mask != 0) 3454185029Spjd zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3455168404Spjd 3456168404Spjd mutex_exit(&zp->z_lock); 3457219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3458219089Spjd mutex_exit(&zp->z_acl_lock); 3459168404Spjd 3460219089Spjd if (attrzp) { 3461219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3462219089Spjd mutex_exit(&attrzp->z_acl_lock); 3463219089Spjd mutex_exit(&attrzp->z_lock); 3464219089Spjd } 3465209962Smmout: 3466219089Spjd if (err == 0 && attrzp) { 3467219089Spjd err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 3468219089Spjd xattr_count, tx); 3469219089Spjd ASSERT(err2 == 0); 3470219089Spjd } 3471219089Spjd 3472168404Spjd if (attrzp) 3473168404Spjd VN_RELE(ZTOV(attrzp)); 3474211932Smm if (aclp) 3475209962Smm zfs_acl_free(aclp); 3476168404Spjd 3477209962Smm if (fuidp) { 3478209962Smm zfs_fuid_info_free(fuidp); 3479209962Smm fuidp = NULL; 3480209962Smm } 3481209962Smm 3482219089Spjd if (err) { 3483209962Smm dmu_tx_abort(tx); 3484219089Spjd if (err == ERESTART) 3485219089Spjd goto top; 3486219089Spjd } else { 3487219089Spjd err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 3488209962Smm dmu_tx_commit(tx); 3489219089Spjd } 3490209962Smm 3491219089Spjdout2: 3492219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3493219089Spjd zil_commit(zilog, 0); 3494209962Smm 3495168404Spjd ZFS_EXIT(zfsvfs); 3496168404Spjd return (err); 3497168404Spjd} 3498168404Spjd 3499168404Spjdtypedef struct zfs_zlock { 3500168404Spjd krwlock_t *zl_rwlock; /* lock we acquired */ 3501168404Spjd znode_t *zl_znode; /* znode we held */ 3502168404Spjd struct zfs_zlock *zl_next; /* next in list */ 3503168404Spjd} zfs_zlock_t; 3504168404Spjd 3505168404Spjd/* 3506168404Spjd * Drop locks and release vnodes that were held by zfs_rename_lock(). 3507168404Spjd */ 3508168404Spjdstatic void 3509168404Spjdzfs_rename_unlock(zfs_zlock_t **zlpp) 3510168404Spjd{ 3511168404Spjd zfs_zlock_t *zl; 3512168404Spjd 3513168404Spjd while ((zl = *zlpp) != NULL) { 3514168404Spjd if (zl->zl_znode != NULL) 3515168404Spjd VN_RELE(ZTOV(zl->zl_znode)); 3516168404Spjd rw_exit(zl->zl_rwlock); 3517168404Spjd *zlpp = zl->zl_next; 3518168404Spjd kmem_free(zl, sizeof (*zl)); 3519168404Spjd } 3520168404Spjd} 3521168404Spjd 3522168404Spjd/* 3523168404Spjd * Search back through the directory tree, using the ".." entries. 3524168404Spjd * Lock each directory in the chain to prevent concurrent renames. 3525168404Spjd * Fail any attempt to move a directory into one of its own descendants. 3526168404Spjd * XXX - z_parent_lock can overlap with map or grow locks 3527168404Spjd */ 3528168404Spjdstatic int 3529168404Spjdzfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 3530168404Spjd{ 3531168404Spjd zfs_zlock_t *zl; 3532168404Spjd znode_t *zp = tdzp; 3533168404Spjd uint64_t rootid = zp->z_zfsvfs->z_root; 3534219089Spjd uint64_t oidp = zp->z_id; 3535168404Spjd krwlock_t *rwlp = &szp->z_parent_lock; 3536168404Spjd krw_t rw = RW_WRITER; 3537168404Spjd 3538168404Spjd /* 3539168404Spjd * First pass write-locks szp and compares to zp->z_id. 3540168404Spjd * Later passes read-lock zp and compare to zp->z_parent. 3541168404Spjd */ 3542168404Spjd do { 3543168404Spjd if (!rw_tryenter(rwlp, rw)) { 3544168404Spjd /* 3545168404Spjd * Another thread is renaming in this path. 3546168404Spjd * Note that if we are a WRITER, we don't have any 3547168404Spjd * parent_locks held yet. 3548168404Spjd */ 3549168404Spjd if (rw == RW_READER && zp->z_id > szp->z_id) { 3550168404Spjd /* 3551168404Spjd * Drop our locks and restart 3552168404Spjd */ 3553168404Spjd zfs_rename_unlock(&zl); 3554168404Spjd *zlpp = NULL; 3555168404Spjd zp = tdzp; 3556219089Spjd oidp = zp->z_id; 3557168404Spjd rwlp = &szp->z_parent_lock; 3558168404Spjd rw = RW_WRITER; 3559168404Spjd continue; 3560168404Spjd } else { 3561168404Spjd /* 3562168404Spjd * Wait for other thread to drop its locks 3563168404Spjd */ 3564168404Spjd rw_enter(rwlp, rw); 3565168404Spjd } 3566168404Spjd } 3567168404Spjd 3568168404Spjd zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3569168404Spjd zl->zl_rwlock = rwlp; 3570168404Spjd zl->zl_znode = NULL; 3571168404Spjd zl->zl_next = *zlpp; 3572168404Spjd *zlpp = zl; 3573168404Spjd 3574219089Spjd if (oidp == szp->z_id) /* We're a descendant of szp */ 3575168404Spjd return (EINVAL); 3576168404Spjd 3577219089Spjd if (oidp == rootid) /* We've hit the top */ 3578168404Spjd return (0); 3579168404Spjd 3580168404Spjd if (rw == RW_READER) { /* i.e. not the first pass */ 3581219089Spjd int error = zfs_zget(zp->z_zfsvfs, oidp, &zp); 3582168404Spjd if (error) 3583168404Spjd return (error); 3584168404Spjd zl->zl_znode = zp; 3585168404Spjd } 3586219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), 3587219089Spjd &oidp, sizeof (oidp)); 3588168404Spjd rwlp = &zp->z_parent_lock; 3589168404Spjd rw = RW_READER; 3590168404Spjd 3591168404Spjd } while (zp->z_id != sdzp->z_id); 3592168404Spjd 3593168404Spjd return (0); 3594168404Spjd} 3595168404Spjd 3596168404Spjd/* 3597168404Spjd * Move an entry from the provided source directory to the target 3598168404Spjd * directory. Change the entry name as indicated. 3599168404Spjd * 3600168404Spjd * IN: sdvp - Source directory containing the "old entry". 3601168404Spjd * snm - Old entry name. 3602168404Spjd * tdvp - Target directory to contain the "new entry". 3603168404Spjd * tnm - New entry name. 3604168404Spjd * cr - credentials of caller. 3605185029Spjd * ct - caller context 3606185029Spjd * flags - case flags 3607168404Spjd * 3608168404Spjd * RETURN: 0 if success 3609168404Spjd * error code if failure 3610168404Spjd * 3611168404Spjd * Timestamps: 3612168404Spjd * sdvp,tdvp - ctime|mtime updated 3613168404Spjd */ 3614185029Spjd/*ARGSUSED*/ 3615168404Spjdstatic int 3616185029Spjdzfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3617185029Spjd caller_context_t *ct, int flags) 3618168404Spjd{ 3619168404Spjd znode_t *tdzp, *szp, *tzp; 3620168404Spjd znode_t *sdzp = VTOZ(sdvp); 3621168404Spjd zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 3622185029Spjd zilog_t *zilog; 3623168962Spjd vnode_t *realvp; 3624168404Spjd zfs_dirlock_t *sdl, *tdl; 3625168404Spjd dmu_tx_t *tx; 3626168404Spjd zfs_zlock_t *zl; 3627185029Spjd int cmp, serr, terr; 3628185029Spjd int error = 0; 3629185029Spjd int zflg = 0; 3630168404Spjd 3631168404Spjd ZFS_ENTER(zfsvfs); 3632185029Spjd ZFS_VERIFY_ZP(sdzp); 3633185029Spjd zilog = zfsvfs->z_log; 3634168404Spjd 3635168962Spjd /* 3636168962Spjd * Make sure we have the real vp for the target directory. 3637168962Spjd */ 3638185029Spjd if (VOP_REALVP(tdvp, &realvp, ct) == 0) 3639168962Spjd tdvp = realvp; 3640168962Spjd 3641212694Smm if (tdvp->v_vfsp != sdvp->v_vfsp || zfsctl_is_node(tdvp)) { 3642168404Spjd ZFS_EXIT(zfsvfs); 3643168962Spjd return (EXDEV); 3644168404Spjd } 3645168404Spjd 3646168404Spjd tdzp = VTOZ(tdvp); 3647185029Spjd ZFS_VERIFY_ZP(tdzp); 3648185029Spjd if (zfsvfs->z_utf8 && u8_validate(tnm, 3649185029Spjd strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3650185029Spjd ZFS_EXIT(zfsvfs); 3651185029Spjd return (EILSEQ); 3652185029Spjd } 3653185029Spjd 3654185029Spjd if (flags & FIGNORECASE) 3655185029Spjd zflg |= ZCILOOK; 3656185029Spjd 3657168404Spjdtop: 3658168404Spjd szp = NULL; 3659168404Spjd tzp = NULL; 3660168404Spjd zl = NULL; 3661168404Spjd 3662168404Spjd /* 3663168404Spjd * This is to prevent the creation of links into attribute space 3664168404Spjd * by renaming a linked file into/outof an attribute directory. 3665168404Spjd * See the comment in zfs_link() for why this is considered bad. 3666168404Spjd */ 3667219089Spjd if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3668168962Spjd ZFS_EXIT(zfsvfs); 3669168962Spjd return (EINVAL); 3670168404Spjd } 3671168404Spjd 3672168404Spjd /* 3673168404Spjd * Lock source and target directory entries. To prevent deadlock, 3674168404Spjd * a lock ordering must be defined. We lock the directory with 3675168404Spjd * the smallest object id first, or if it's a tie, the one with 3676168404Spjd * the lexically first name. 3677168404Spjd */ 3678168404Spjd if (sdzp->z_id < tdzp->z_id) { 3679168962Spjd cmp = -1; 3680168962Spjd } else if (sdzp->z_id > tdzp->z_id) { 3681168962Spjd cmp = 1; 3682168962Spjd } else { 3683185029Spjd /* 3684185029Spjd * First compare the two name arguments without 3685185029Spjd * considering any case folding. 3686185029Spjd */ 3687185029Spjd int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3688185029Spjd 3689185029Spjd cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3690185029Spjd ASSERT(error == 0 || !zfsvfs->z_utf8); 3691168962Spjd if (cmp == 0) { 3692168962Spjd /* 3693168962Spjd * POSIX: "If the old argument and the new argument 3694168962Spjd * both refer to links to the same existing file, 3695168962Spjd * the rename() function shall return successfully 3696168962Spjd * and perform no other action." 3697168962Spjd */ 3698168962Spjd ZFS_EXIT(zfsvfs); 3699168962Spjd return (0); 3700168962Spjd } 3701185029Spjd /* 3702185029Spjd * If the file system is case-folding, then we may 3703185029Spjd * have some more checking to do. A case-folding file 3704185029Spjd * system is either supporting mixed case sensitivity 3705185029Spjd * access or is completely case-insensitive. Note 3706185029Spjd * that the file system is always case preserving. 3707185029Spjd * 3708185029Spjd * In mixed sensitivity mode case sensitive behavior 3709185029Spjd * is the default. FIGNORECASE must be used to 3710185029Spjd * explicitly request case insensitive behavior. 3711185029Spjd * 3712185029Spjd * If the source and target names provided differ only 3713185029Spjd * by case (e.g., a request to rename 'tim' to 'Tim'), 3714185029Spjd * we will treat this as a special case in the 3715185029Spjd * case-insensitive mode: as long as the source name 3716185029Spjd * is an exact match, we will allow this to proceed as 3717185029Spjd * a name-change request. 3718185029Spjd */ 3719185029Spjd if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3720185029Spjd (zfsvfs->z_case == ZFS_CASE_MIXED && 3721185029Spjd flags & FIGNORECASE)) && 3722185029Spjd u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3723185029Spjd &error) == 0) { 3724185029Spjd /* 3725185029Spjd * case preserving rename request, require exact 3726185029Spjd * name matches 3727185029Spjd */ 3728185029Spjd zflg |= ZCIEXACT; 3729185029Spjd zflg &= ~ZCILOOK; 3730185029Spjd } 3731168962Spjd } 3732185029Spjd 3733208131Smm /* 3734208131Smm * If the source and destination directories are the same, we should 3735208131Smm * grab the z_name_lock of that directory only once. 3736208131Smm */ 3737208131Smm if (sdzp == tdzp) { 3738208131Smm zflg |= ZHAVELOCK; 3739208131Smm rw_enter(&sdzp->z_name_lock, RW_READER); 3740208131Smm } 3741208131Smm 3742168962Spjd if (cmp < 0) { 3743185029Spjd serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3744185029Spjd ZEXISTS | zflg, NULL, NULL); 3745185029Spjd terr = zfs_dirent_lock(&tdl, 3746185029Spjd tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3747168962Spjd } else { 3748185029Spjd terr = zfs_dirent_lock(&tdl, 3749185029Spjd tdzp, tnm, &tzp, zflg, NULL, NULL); 3750185029Spjd serr = zfs_dirent_lock(&sdl, 3751185029Spjd sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3752185029Spjd NULL, NULL); 3753168404Spjd } 3754168404Spjd 3755168962Spjd if (serr) { 3756168404Spjd /* 3757168404Spjd * Source entry invalid or not there. 3758168404Spjd */ 3759168962Spjd if (!terr) { 3760168404Spjd zfs_dirent_unlock(tdl); 3761168962Spjd if (tzp) 3762168962Spjd VN_RELE(ZTOV(tzp)); 3763168962Spjd } 3764208131Smm 3765208131Smm if (sdzp == tdzp) 3766208131Smm rw_exit(&sdzp->z_name_lock); 3767208131Smm 3768219089Spjd /* 3769219089Spjd * FreeBSD: In OpenSolaris they only check if rename source is 3770219089Spjd * ".." here, because "." is handled in their lookup. This is 3771219089Spjd * not the case for FreeBSD, so we check for "." explicitly. 3772219089Spjd */ 3773168404Spjd if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0) 3774168404Spjd serr = EINVAL; 3775168962Spjd ZFS_EXIT(zfsvfs); 3776168962Spjd return (serr); 3777168404Spjd } 3778168404Spjd if (terr) { 3779168404Spjd zfs_dirent_unlock(sdl); 3780168962Spjd VN_RELE(ZTOV(szp)); 3781208131Smm 3782208131Smm if (sdzp == tdzp) 3783208131Smm rw_exit(&sdzp->z_name_lock); 3784208131Smm 3785168404Spjd if (strcmp(tnm, "..") == 0) 3786168404Spjd terr = EINVAL; 3787168962Spjd ZFS_EXIT(zfsvfs); 3788168962Spjd return (terr); 3789168404Spjd } 3790168404Spjd 3791168404Spjd /* 3792168404Spjd * Must have write access at the source to remove the old entry 3793168404Spjd * and write access at the target to create the new entry. 3794168404Spjd * Note that if target and source are the same, this can be 3795168404Spjd * done in a single check. 3796168404Spjd */ 3797168404Spjd 3798168404Spjd if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3799168404Spjd goto out; 3800168404Spjd 3801168962Spjd if (ZTOV(szp)->v_type == VDIR) { 3802168404Spjd /* 3803168404Spjd * Check to make sure rename is valid. 3804168404Spjd * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3805168404Spjd */ 3806168404Spjd if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3807168404Spjd goto out; 3808168404Spjd } 3809168404Spjd 3810168404Spjd /* 3811168404Spjd * Does target exist? 3812168404Spjd */ 3813168404Spjd if (tzp) { 3814168404Spjd /* 3815168404Spjd * Source and target must be the same type. 3816168404Spjd */ 3817168962Spjd if (ZTOV(szp)->v_type == VDIR) { 3818168962Spjd if (ZTOV(tzp)->v_type != VDIR) { 3819168404Spjd error = ENOTDIR; 3820168404Spjd goto out; 3821168404Spjd } 3822168404Spjd } else { 3823168962Spjd if (ZTOV(tzp)->v_type == VDIR) { 3824168404Spjd error = EISDIR; 3825168404Spjd goto out; 3826168404Spjd } 3827168404Spjd } 3828168404Spjd /* 3829168404Spjd * POSIX dictates that when the source and target 3830168404Spjd * entries refer to the same file object, rename 3831168404Spjd * must do nothing and exit without error. 3832168404Spjd */ 3833168404Spjd if (szp->z_id == tzp->z_id) { 3834168404Spjd error = 0; 3835168404Spjd goto out; 3836168404Spjd } 3837168404Spjd } 3838168404Spjd 3839185029Spjd vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 3840168962Spjd if (tzp) 3841185029Spjd vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3842168962Spjd 3843185029Spjd /* 3844185029Spjd * notify the target directory if it is not the same 3845185029Spjd * as source directory. 3846185029Spjd */ 3847185029Spjd if (tdvp != sdvp) { 3848185029Spjd vnevent_rename_dest_dir(tdvp, ct); 3849185029Spjd } 3850185029Spjd 3851168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 3852219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3853219089Spjd dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3854168404Spjd dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3855168404Spjd dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3856219089Spjd if (sdzp != tdzp) { 3857219089Spjd dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 3858219089Spjd zfs_sa_upgrade_txholds(tx, tdzp); 3859219089Spjd } 3860219089Spjd if (tzp) { 3861219089Spjd dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 3862219089Spjd zfs_sa_upgrade_txholds(tx, tzp); 3863219089Spjd } 3864219089Spjd 3865219089Spjd zfs_sa_upgrade_txholds(tx, szp); 3866168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3867209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 3868168404Spjd if (error) { 3869168404Spjd if (zl != NULL) 3870168404Spjd zfs_rename_unlock(&zl); 3871168404Spjd zfs_dirent_unlock(sdl); 3872168404Spjd zfs_dirent_unlock(tdl); 3873208131Smm 3874208131Smm if (sdzp == tdzp) 3875208131Smm rw_exit(&sdzp->z_name_lock); 3876208131Smm 3877168962Spjd VN_RELE(ZTOV(szp)); 3878168962Spjd if (tzp) 3879168962Spjd VN_RELE(ZTOV(tzp)); 3880209962Smm if (error == ERESTART) { 3881168404Spjd dmu_tx_wait(tx); 3882168404Spjd dmu_tx_abort(tx); 3883168404Spjd goto top; 3884168404Spjd } 3885168404Spjd dmu_tx_abort(tx); 3886168962Spjd ZFS_EXIT(zfsvfs); 3887168962Spjd return (error); 3888168404Spjd } 3889168404Spjd 3890168404Spjd if (tzp) /* Attempt to remove the existing target */ 3891185029Spjd error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 3892168404Spjd 3893168404Spjd if (error == 0) { 3894168404Spjd error = zfs_link_create(tdl, szp, tx, ZRENAMING); 3895168404Spjd if (error == 0) { 3896219089Spjd szp->z_pflags |= ZFS_AV_MODIFIED; 3897185029Spjd 3898219089Spjd error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 3899219089Spjd (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3900240415Smm ASSERT0(error); 3901219089Spjd 3902168404Spjd error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 3903219089Spjd if (error == 0) { 3904219089Spjd zfs_log_rename(zilog, tx, TX_RENAME | 3905219089Spjd (flags & FIGNORECASE ? TX_CI : 0), sdzp, 3906219089Spjd sdl->dl_name, tdzp, tdl->dl_name, szp); 3907185029Spjd 3908219089Spjd /* 3909219089Spjd * Update path information for the target vnode 3910219089Spjd */ 3911219089Spjd vn_renamepath(tdvp, ZTOV(szp), tnm, 3912219089Spjd strlen(tnm)); 3913219089Spjd } else { 3914219089Spjd /* 3915219089Spjd * At this point, we have successfully created 3916219089Spjd * the target name, but have failed to remove 3917219089Spjd * the source name. Since the create was done 3918219089Spjd * with the ZRENAMING flag, there are 3919219089Spjd * complications; for one, the link count is 3920219089Spjd * wrong. The easiest way to deal with this 3921219089Spjd * is to remove the newly created target, and 3922219089Spjd * return the original error. This must 3923219089Spjd * succeed; fortunately, it is very unlikely to 3924219089Spjd * fail, since we just created it. 3925219089Spjd */ 3926219089Spjd VERIFY3U(zfs_link_destroy(tdl, szp, tx, 3927219089Spjd ZRENAMING, NULL), ==, 0); 3928219089Spjd } 3929168404Spjd } 3930168404Spjd#ifdef FREEBSD_NAMECACHE 3931168404Spjd if (error == 0) { 3932168404Spjd cache_purge(sdvp); 3933168404Spjd cache_purge(tdvp); 3934240829Spjd cache_purge(ZTOV(szp)); 3935240829Spjd if (tzp) 3936240829Spjd cache_purge(ZTOV(tzp)); 3937168404Spjd } 3938168404Spjd#endif 3939168404Spjd } 3940168404Spjd 3941168404Spjd dmu_tx_commit(tx); 3942168404Spjdout: 3943168404Spjd if (zl != NULL) 3944168404Spjd zfs_rename_unlock(&zl); 3945168404Spjd 3946168404Spjd zfs_dirent_unlock(sdl); 3947168404Spjd zfs_dirent_unlock(tdl); 3948168404Spjd 3949208131Smm if (sdzp == tdzp) 3950208131Smm rw_exit(&sdzp->z_name_lock); 3951208131Smm 3952219089Spjd 3953168962Spjd VN_RELE(ZTOV(szp)); 3954168404Spjd if (tzp) 3955168962Spjd VN_RELE(ZTOV(tzp)); 3956168404Spjd 3957219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3958219089Spjd zil_commit(zilog, 0); 3959219089Spjd 3960168404Spjd ZFS_EXIT(zfsvfs); 3961168404Spjd 3962168404Spjd return (error); 3963168404Spjd} 3964168404Spjd 3965168404Spjd/* 3966168404Spjd * Insert the indicated symbolic reference entry into the directory. 3967168404Spjd * 3968168404Spjd * IN: dvp - Directory to contain new symbolic link. 3969168404Spjd * link - Name for new symlink entry. 3970168404Spjd * vap - Attributes of new entry. 3971168404Spjd * target - Target path of new symlink. 3972168404Spjd * cr - credentials of caller. 3973185029Spjd * ct - caller context 3974185029Spjd * flags - case flags 3975168404Spjd * 3976168404Spjd * RETURN: 0 if success 3977168404Spjd * error code if failure 3978168404Spjd * 3979168404Spjd * Timestamps: 3980168404Spjd * dvp - ctime|mtime updated 3981168404Spjd */ 3982185029Spjd/*ARGSUSED*/ 3983168404Spjdstatic int 3984185029Spjdzfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 3985185029Spjd cred_t *cr, kthread_t *td) 3986168404Spjd{ 3987168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 3988168404Spjd zfs_dirlock_t *dl; 3989168404Spjd dmu_tx_t *tx; 3990168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3991185029Spjd zilog_t *zilog; 3992219089Spjd uint64_t len = strlen(link); 3993168404Spjd int error; 3994185029Spjd int zflg = ZNEW; 3995209962Smm zfs_acl_ids_t acl_ids; 3996209962Smm boolean_t fuid_dirtied; 3997219089Spjd uint64_t txtype = TX_SYMLINK; 3998185029Spjd int flags = 0; 3999168404Spjd 4000168962Spjd ASSERT(vap->va_type == VLNK); 4001168404Spjd 4002168404Spjd ZFS_ENTER(zfsvfs); 4003185029Spjd ZFS_VERIFY_ZP(dzp); 4004185029Spjd zilog = zfsvfs->z_log; 4005185029Spjd 4006185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 4007185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4008185029Spjd ZFS_EXIT(zfsvfs); 4009185029Spjd return (EILSEQ); 4010185029Spjd } 4011185029Spjd if (flags & FIGNORECASE) 4012185029Spjd zflg |= ZCILOOK; 4013168404Spjd 4014168404Spjd if (len > MAXPATHLEN) { 4015168404Spjd ZFS_EXIT(zfsvfs); 4016168404Spjd return (ENAMETOOLONG); 4017168404Spjd } 4018168404Spjd 4019219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, 4020219089Spjd vap, cr, NULL, &acl_ids)) != 0) { 4021219089Spjd ZFS_EXIT(zfsvfs); 4022219089Spjd return (error); 4023219089Spjd } 4024219089Spjdtop: 4025168404Spjd /* 4026168404Spjd * Attempt to lock directory; fail if entry already exists. 4027168404Spjd */ 4028185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 4029185029Spjd if (error) { 4030219089Spjd zfs_acl_ids_free(&acl_ids); 4031168404Spjd ZFS_EXIT(zfsvfs); 4032168404Spjd return (error); 4033168404Spjd } 4034168404Spjd 4035219089Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4036219089Spjd zfs_acl_ids_free(&acl_ids); 4037219089Spjd zfs_dirent_unlock(dl); 4038219089Spjd ZFS_EXIT(zfsvfs); 4039219089Spjd return (error); 4040219089Spjd } 4041219089Spjd 4042209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 4043209962Smm zfs_acl_ids_free(&acl_ids); 4044209962Smm zfs_dirent_unlock(dl); 4045209962Smm ZFS_EXIT(zfsvfs); 4046209962Smm return (EDQUOT); 4047209962Smm } 4048168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4049209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 4050168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 4051168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4052219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 4053219089Spjd ZFS_SA_BASE_ATTR_SIZE + len); 4054219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 4055219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 4056219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 4057219089Spjd acl_ids.z_aclp->z_acl_bytes); 4058219089Spjd } 4059209962Smm if (fuid_dirtied) 4060209962Smm zfs_fuid_txhold(zfsvfs, tx); 4061209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 4062168404Spjd if (error) { 4063168404Spjd zfs_dirent_unlock(dl); 4064209962Smm if (error == ERESTART) { 4065168404Spjd dmu_tx_wait(tx); 4066168404Spjd dmu_tx_abort(tx); 4067168404Spjd goto top; 4068168404Spjd } 4069219089Spjd zfs_acl_ids_free(&acl_ids); 4070168404Spjd dmu_tx_abort(tx); 4071168404Spjd ZFS_EXIT(zfsvfs); 4072168404Spjd return (error); 4073168404Spjd } 4074168404Spjd 4075168404Spjd /* 4076168404Spjd * Create a new object for the symlink. 4077219089Spjd * for version 4 ZPL datsets the symlink will be an SA attribute 4078168404Spjd */ 4079219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 4080168404Spjd 4081219089Spjd if (fuid_dirtied) 4082219089Spjd zfs_fuid_sync(zfsvfs, tx); 4083209962Smm 4084219089Spjd mutex_enter(&zp->z_lock); 4085219089Spjd if (zp->z_is_sa) 4086219089Spjd error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 4087219089Spjd link, len, tx); 4088219089Spjd else 4089219089Spjd zfs_sa_symlink(zp, link, len, tx); 4090219089Spjd mutex_exit(&zp->z_lock); 4091168404Spjd 4092219089Spjd zp->z_size = len; 4093219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 4094219089Spjd &zp->z_size, sizeof (zp->z_size), tx); 4095168404Spjd /* 4096168404Spjd * Insert the new object into the directory. 4097168404Spjd */ 4098168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 4099168404Spjd 4100219089Spjd if (flags & FIGNORECASE) 4101219089Spjd txtype |= TX_CI; 4102219089Spjd zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 4103219089Spjd *vpp = ZTOV(zp); 4104219089Spjd 4105209962Smm zfs_acl_ids_free(&acl_ids); 4106209962Smm 4107168404Spjd dmu_tx_commit(tx); 4108168404Spjd 4109168404Spjd zfs_dirent_unlock(dl); 4110168404Spjd 4111219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4112219089Spjd zil_commit(zilog, 0); 4113219089Spjd 4114168404Spjd ZFS_EXIT(zfsvfs); 4115168404Spjd return (error); 4116168404Spjd} 4117168404Spjd 4118168404Spjd/* 4119168404Spjd * Return, in the buffer contained in the provided uio structure, 4120168404Spjd * the symbolic path referred to by vp. 4121168404Spjd * 4122168404Spjd * IN: vp - vnode of symbolic link. 4123168404Spjd * uoip - structure to contain the link path. 4124168404Spjd * cr - credentials of caller. 4125185029Spjd * ct - caller context 4126168404Spjd * 4127168404Spjd * OUT: uio - structure to contain the link path. 4128168404Spjd * 4129168404Spjd * RETURN: 0 if success 4130168404Spjd * error code if failure 4131168404Spjd * 4132168404Spjd * Timestamps: 4133168404Spjd * vp - atime updated 4134168404Spjd */ 4135168404Spjd/* ARGSUSED */ 4136168404Spjdstatic int 4137185029Spjdzfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 4138168404Spjd{ 4139168404Spjd znode_t *zp = VTOZ(vp); 4140168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4141168404Spjd int error; 4142168404Spjd 4143168404Spjd ZFS_ENTER(zfsvfs); 4144185029Spjd ZFS_VERIFY_ZP(zp); 4145168404Spjd 4146219089Spjd mutex_enter(&zp->z_lock); 4147219089Spjd if (zp->z_is_sa) 4148219089Spjd error = sa_lookup_uio(zp->z_sa_hdl, 4149219089Spjd SA_ZPL_SYMLINK(zfsvfs), uio); 4150219089Spjd else 4151219089Spjd error = zfs_sa_readlink(zp, uio); 4152219089Spjd mutex_exit(&zp->z_lock); 4153168404Spjd 4154168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4155219089Spjd 4156168404Spjd ZFS_EXIT(zfsvfs); 4157168404Spjd return (error); 4158168404Spjd} 4159168404Spjd 4160168404Spjd/* 4161168404Spjd * Insert a new entry into directory tdvp referencing svp. 4162168404Spjd * 4163168404Spjd * IN: tdvp - Directory to contain new entry. 4164168404Spjd * svp - vnode of new entry. 4165168404Spjd * name - name of new entry. 4166168404Spjd * cr - credentials of caller. 4167185029Spjd * ct - caller context 4168168404Spjd * 4169168404Spjd * RETURN: 0 if success 4170168404Spjd * error code if failure 4171168404Spjd * 4172168404Spjd * Timestamps: 4173168404Spjd * tdvp - ctime|mtime updated 4174168404Spjd * svp - ctime updated 4175168404Spjd */ 4176168404Spjd/* ARGSUSED */ 4177168404Spjdstatic int 4178185029Spjdzfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4179185029Spjd caller_context_t *ct, int flags) 4180168404Spjd{ 4181168404Spjd znode_t *dzp = VTOZ(tdvp); 4182168404Spjd znode_t *tzp, *szp; 4183168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4184185029Spjd zilog_t *zilog; 4185168404Spjd zfs_dirlock_t *dl; 4186168404Spjd dmu_tx_t *tx; 4187168962Spjd vnode_t *realvp; 4188168404Spjd int error; 4189185029Spjd int zf = ZNEW; 4190212694Smm uint64_t parent; 4191185029Spjd uid_t owner; 4192168404Spjd 4193168404Spjd ASSERT(tdvp->v_type == VDIR); 4194168404Spjd 4195168404Spjd ZFS_ENTER(zfsvfs); 4196185029Spjd ZFS_VERIFY_ZP(dzp); 4197185029Spjd zilog = zfsvfs->z_log; 4198168404Spjd 4199185029Spjd if (VOP_REALVP(svp, &realvp, ct) == 0) 4200168962Spjd svp = realvp; 4201168962Spjd 4202212694Smm /* 4203212694Smm * POSIX dictates that we return EPERM here. 4204212694Smm * Better choices include ENOTSUP or EISDIR. 4205212694Smm */ 4206212694Smm if (svp->v_type == VDIR) { 4207168404Spjd ZFS_EXIT(zfsvfs); 4208212694Smm return (EPERM); 4209212694Smm } 4210212694Smm 4211212694Smm if (svp->v_vfsp != tdvp->v_vfsp || zfsctl_is_node(svp)) { 4212212694Smm ZFS_EXIT(zfsvfs); 4213168404Spjd return (EXDEV); 4214168404Spjd } 4215212694Smm 4216185029Spjd szp = VTOZ(svp); 4217185029Spjd ZFS_VERIFY_ZP(szp); 4218168404Spjd 4219212694Smm /* Prevent links to .zfs/shares files */ 4220212694Smm 4221219089Spjd if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4222219089Spjd &parent, sizeof (uint64_t))) != 0) { 4223212694Smm ZFS_EXIT(zfsvfs); 4224219089Spjd return (error); 4225219089Spjd } 4226219089Spjd if (parent == zfsvfs->z_shares_dir) { 4227219089Spjd ZFS_EXIT(zfsvfs); 4228212694Smm return (EPERM); 4229212694Smm } 4230212694Smm 4231185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, 4232185029Spjd strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4233185029Spjd ZFS_EXIT(zfsvfs); 4234185029Spjd return (EILSEQ); 4235185029Spjd } 4236185029Spjd if (flags & FIGNORECASE) 4237185029Spjd zf |= ZCILOOK; 4238185029Spjd 4239168404Spjd /* 4240168404Spjd * We do not support links between attributes and non-attributes 4241168404Spjd * because of the potential security risk of creating links 4242168404Spjd * into "normal" file space in order to circumvent restrictions 4243168404Spjd * imposed in attribute space. 4244168404Spjd */ 4245219089Spjd if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4246168404Spjd ZFS_EXIT(zfsvfs); 4247168404Spjd return (EINVAL); 4248168404Spjd } 4249168404Spjd 4250168404Spjd 4251219089Spjd owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4252219089Spjd if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { 4253168404Spjd ZFS_EXIT(zfsvfs); 4254168404Spjd return (EPERM); 4255168404Spjd } 4256168404Spjd 4257185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4258168404Spjd ZFS_EXIT(zfsvfs); 4259168404Spjd return (error); 4260168404Spjd } 4261168404Spjd 4262212694Smmtop: 4263168404Spjd /* 4264168404Spjd * Attempt to lock directory; fail if entry already exists. 4265168404Spjd */ 4266185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 4267185029Spjd if (error) { 4268168404Spjd ZFS_EXIT(zfsvfs); 4269168404Spjd return (error); 4270168404Spjd } 4271168404Spjd 4272168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4273219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4274168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4275219089Spjd zfs_sa_upgrade_txholds(tx, szp); 4276219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 4277209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 4278168404Spjd if (error) { 4279168404Spjd zfs_dirent_unlock(dl); 4280209962Smm if (error == ERESTART) { 4281168404Spjd dmu_tx_wait(tx); 4282168404Spjd dmu_tx_abort(tx); 4283168404Spjd goto top; 4284168404Spjd } 4285168404Spjd dmu_tx_abort(tx); 4286168404Spjd ZFS_EXIT(zfsvfs); 4287168404Spjd return (error); 4288168404Spjd } 4289168404Spjd 4290168404Spjd error = zfs_link_create(dl, szp, tx, 0); 4291168404Spjd 4292185029Spjd if (error == 0) { 4293185029Spjd uint64_t txtype = TX_LINK; 4294185029Spjd if (flags & FIGNORECASE) 4295185029Spjd txtype |= TX_CI; 4296185029Spjd zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4297185029Spjd } 4298168404Spjd 4299168404Spjd dmu_tx_commit(tx); 4300168404Spjd 4301168404Spjd zfs_dirent_unlock(dl); 4302168404Spjd 4303185029Spjd if (error == 0) { 4304185029Spjd vnevent_link(svp, ct); 4305185029Spjd } 4306185029Spjd 4307219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4308219089Spjd zil_commit(zilog, 0); 4309219089Spjd 4310168404Spjd ZFS_EXIT(zfsvfs); 4311168404Spjd return (error); 4312168404Spjd} 4313168404Spjd 4314219089Spjd#ifdef sun 4315219089Spjd/* 4316219089Spjd * zfs_null_putapage() is used when the file system has been force 4317219089Spjd * unmounted. It just drops the pages. 4318219089Spjd */ 4319219089Spjd/* ARGSUSED */ 4320219089Spjdstatic int 4321219089Spjdzfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4322219089Spjd size_t *lenp, int flags, cred_t *cr) 4323219089Spjd{ 4324219089Spjd pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); 4325219089Spjd return (0); 4326219089Spjd} 4327219089Spjd 4328219089Spjd/* 4329219089Spjd * Push a page out to disk, klustering if possible. 4330219089Spjd * 4331219089Spjd * IN: vp - file to push page to. 4332219089Spjd * pp - page to push. 4333219089Spjd * flags - additional flags. 4334219089Spjd * cr - credentials of caller. 4335219089Spjd * 4336219089Spjd * OUT: offp - start of range pushed. 4337219089Spjd * lenp - len of range pushed. 4338219089Spjd * 4339219089Spjd * RETURN: 0 if success 4340219089Spjd * error code if failure 4341219089Spjd * 4342219089Spjd * NOTE: callers must have locked the page to be pushed. On 4343219089Spjd * exit, the page (and all other pages in the kluster) must be 4344219089Spjd * unlocked. 4345219089Spjd */ 4346219089Spjd/* ARGSUSED */ 4347219089Spjdstatic int 4348219089Spjdzfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4349219089Spjd size_t *lenp, int flags, cred_t *cr) 4350219089Spjd{ 4351219089Spjd znode_t *zp = VTOZ(vp); 4352219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4353219089Spjd dmu_tx_t *tx; 4354219089Spjd u_offset_t off, koff; 4355219089Spjd size_t len, klen; 4356219089Spjd int err; 4357219089Spjd 4358219089Spjd off = pp->p_offset; 4359219089Spjd len = PAGESIZE; 4360219089Spjd /* 4361219089Spjd * If our blocksize is bigger than the page size, try to kluster 4362219089Spjd * multiple pages so that we write a full block (thus avoiding 4363219089Spjd * a read-modify-write). 4364219089Spjd */ 4365219089Spjd if (off < zp->z_size && zp->z_blksz > PAGESIZE) { 4366219089Spjd klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); 4367219089Spjd koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; 4368219089Spjd ASSERT(koff <= zp->z_size); 4369219089Spjd if (koff + klen > zp->z_size) 4370219089Spjd klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE); 4371219089Spjd pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); 4372219089Spjd } 4373219089Spjd ASSERT3U(btop(len), ==, btopr(len)); 4374219089Spjd 4375219089Spjd /* 4376219089Spjd * Can't push pages past end-of-file. 4377219089Spjd */ 4378219089Spjd if (off >= zp->z_size) { 4379219089Spjd /* ignore all pages */ 4380219089Spjd err = 0; 4381219089Spjd goto out; 4382219089Spjd } else if (off + len > zp->z_size) { 4383219089Spjd int npages = btopr(zp->z_size - off); 4384219089Spjd page_t *trunc; 4385219089Spjd 4386219089Spjd page_list_break(&pp, &trunc, npages); 4387219089Spjd /* ignore pages past end of file */ 4388219089Spjd if (trunc) 4389219089Spjd pvn_write_done(trunc, flags); 4390219089Spjd len = zp->z_size - off; 4391219089Spjd } 4392219089Spjd 4393219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 4394219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 4395219089Spjd err = EDQUOT; 4396219089Spjd goto out; 4397219089Spjd } 4398219089Spjdtop: 4399219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 4400219089Spjd dmu_tx_hold_write(tx, zp->z_id, off, len); 4401219089Spjd 4402219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4403219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4404219089Spjd err = dmu_tx_assign(tx, TXG_NOWAIT); 4405219089Spjd if (err != 0) { 4406219089Spjd if (err == ERESTART) { 4407219089Spjd dmu_tx_wait(tx); 4408219089Spjd dmu_tx_abort(tx); 4409219089Spjd goto top; 4410219089Spjd } 4411219089Spjd dmu_tx_abort(tx); 4412219089Spjd goto out; 4413219089Spjd } 4414219089Spjd 4415219089Spjd if (zp->z_blksz <= PAGESIZE) { 4416219089Spjd caddr_t va = zfs_map_page(pp, S_READ); 4417219089Spjd ASSERT3U(len, <=, PAGESIZE); 4418219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 4419219089Spjd zfs_unmap_page(pp, va); 4420219089Spjd } else { 4421219089Spjd err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 4422219089Spjd } 4423219089Spjd 4424219089Spjd if (err == 0) { 4425219089Spjd uint64_t mtime[2], ctime[2]; 4426219089Spjd sa_bulk_attr_t bulk[3]; 4427219089Spjd int count = 0; 4428219089Spjd 4429219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 4430219089Spjd &mtime, 16); 4431219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 4432219089Spjd &ctime, 16); 4433219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4434219089Spjd &zp->z_pflags, 8); 4435219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 4436219089Spjd B_TRUE); 4437219089Spjd zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 4438219089Spjd } 4439219089Spjd dmu_tx_commit(tx); 4440219089Spjd 4441219089Spjdout: 4442219089Spjd pvn_write_done(pp, (err ? B_ERROR : 0) | flags); 4443219089Spjd if (offp) 4444219089Spjd *offp = off; 4445219089Spjd if (lenp) 4446219089Spjd *lenp = len; 4447219089Spjd 4448219089Spjd return (err); 4449219089Spjd} 4450219089Spjd 4451219089Spjd/* 4452219089Spjd * Copy the portion of the file indicated from pages into the file. 4453219089Spjd * The pages are stored in a page list attached to the files vnode. 4454219089Spjd * 4455219089Spjd * IN: vp - vnode of file to push page data to. 4456219089Spjd * off - position in file to put data. 4457219089Spjd * len - amount of data to write. 4458219089Spjd * flags - flags to control the operation. 4459219089Spjd * cr - credentials of caller. 4460219089Spjd * ct - caller context. 4461219089Spjd * 4462219089Spjd * RETURN: 0 if success 4463219089Spjd * error code if failure 4464219089Spjd * 4465219089Spjd * Timestamps: 4466219089Spjd * vp - ctime|mtime updated 4467219089Spjd */ 4468185029Spjd/*ARGSUSED*/ 4469219089Spjdstatic int 4470219089Spjdzfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 4471219089Spjd caller_context_t *ct) 4472219089Spjd{ 4473219089Spjd znode_t *zp = VTOZ(vp); 4474219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4475219089Spjd page_t *pp; 4476219089Spjd size_t io_len; 4477219089Spjd u_offset_t io_off; 4478219089Spjd uint_t blksz; 4479219089Spjd rl_t *rl; 4480219089Spjd int error = 0; 4481219089Spjd 4482219089Spjd ZFS_ENTER(zfsvfs); 4483219089Spjd ZFS_VERIFY_ZP(zp); 4484219089Spjd 4485219089Spjd /* 4486219089Spjd * Align this request to the file block size in case we kluster. 4487219089Spjd * XXX - this can result in pretty aggresive locking, which can 4488219089Spjd * impact simultanious read/write access. One option might be 4489219089Spjd * to break up long requests (len == 0) into block-by-block 4490219089Spjd * operations to get narrower locking. 4491219089Spjd */ 4492219089Spjd blksz = zp->z_blksz; 4493219089Spjd if (ISP2(blksz)) 4494219089Spjd io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); 4495219089Spjd else 4496219089Spjd io_off = 0; 4497219089Spjd if (len > 0 && ISP2(blksz)) 4498219089Spjd io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); 4499219089Spjd else 4500219089Spjd io_len = 0; 4501219089Spjd 4502219089Spjd if (io_len == 0) { 4503219089Spjd /* 4504219089Spjd * Search the entire vp list for pages >= io_off. 4505219089Spjd */ 4506219089Spjd rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); 4507219089Spjd error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); 4508219089Spjd goto out; 4509219089Spjd } 4510219089Spjd rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); 4511219089Spjd 4512219089Spjd if (off > zp->z_size) { 4513219089Spjd /* past end of file */ 4514219089Spjd zfs_range_unlock(rl); 4515219089Spjd ZFS_EXIT(zfsvfs); 4516219089Spjd return (0); 4517219089Spjd } 4518219089Spjd 4519219089Spjd len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); 4520219089Spjd 4521219089Spjd for (off = io_off; io_off < off + len; io_off += io_len) { 4522219089Spjd if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 4523219089Spjd pp = page_lookup(vp, io_off, 4524219089Spjd (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); 4525219089Spjd } else { 4526219089Spjd pp = page_lookup_nowait(vp, io_off, 4527219089Spjd (flags & B_FREE) ? SE_EXCL : SE_SHARED); 4528219089Spjd } 4529219089Spjd 4530219089Spjd if (pp != NULL && pvn_getdirty(pp, flags)) { 4531219089Spjd int err; 4532219089Spjd 4533219089Spjd /* 4534219089Spjd * Found a dirty page to push 4535219089Spjd */ 4536219089Spjd err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); 4537219089Spjd if (err) 4538219089Spjd error = err; 4539219089Spjd } else { 4540219089Spjd io_len = PAGESIZE; 4541219089Spjd } 4542219089Spjd } 4543219089Spjdout: 4544219089Spjd zfs_range_unlock(rl); 4545219089Spjd if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4546219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 4547219089Spjd ZFS_EXIT(zfsvfs); 4548219089Spjd return (error); 4549219089Spjd} 4550219089Spjd#endif /* sun */ 4551219089Spjd 4552219089Spjd/*ARGSUSED*/ 4553168962Spjdvoid 4554185029Spjdzfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4555168404Spjd{ 4556168962Spjd znode_t *zp = VTOZ(vp); 4557168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4558168962Spjd int error; 4559168404Spjd 4560185029Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4561219089Spjd if (zp->z_sa_hdl == NULL) { 4562185029Spjd /* 4563185029Spjd * The fs has been unmounted, or we did a 4564185029Spjd * suspend/resume and this file no longer exists. 4565185029Spjd */ 4566168404Spjd VI_LOCK(vp); 4567219089Spjd ASSERT(vp->v_count <= 1); 4568219089Spjd vp->v_count = 0; 4569196299Spjd VI_UNLOCK(vp); 4570234607Strasz vrecycle(vp); 4571185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4572168962Spjd return; 4573168404Spjd } 4574168404Spjd 4575168404Spjd if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4576168404Spjd dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4577168404Spjd 4578219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4579219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4580168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 4581168404Spjd if (error) { 4582168404Spjd dmu_tx_abort(tx); 4583168404Spjd } else { 4584168404Spjd mutex_enter(&zp->z_lock); 4585219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 4586219089Spjd (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4587168404Spjd zp->z_atime_dirty = 0; 4588168404Spjd mutex_exit(&zp->z_lock); 4589168404Spjd dmu_tx_commit(tx); 4590168404Spjd } 4591168404Spjd } 4592168404Spjd 4593168404Spjd zfs_zinactive(zp); 4594185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4595168404Spjd} 4596168404Spjd 4597219089Spjd#ifdef sun 4598219089Spjd/* 4599219089Spjd * Bounds-check the seek operation. 4600219089Spjd * 4601219089Spjd * IN: vp - vnode seeking within 4602219089Spjd * ooff - old file offset 4603219089Spjd * noffp - pointer to new file offset 4604219089Spjd * ct - caller context 4605219089Spjd * 4606219089Spjd * RETURN: 0 if success 4607219089Spjd * EINVAL if new offset invalid 4608219089Spjd */ 4609219089Spjd/* ARGSUSED */ 4610219089Spjdstatic int 4611219089Spjdzfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, 4612219089Spjd caller_context_t *ct) 4613219089Spjd{ 4614219089Spjd if (vp->v_type == VDIR) 4615219089Spjd return (0); 4616219089Spjd return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 4617219089Spjd} 4618219089Spjd 4619219089Spjd/* 4620219089Spjd * Pre-filter the generic locking function to trap attempts to place 4621219089Spjd * a mandatory lock on a memory mapped file. 4622219089Spjd */ 4623219089Spjdstatic int 4624219089Spjdzfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 4625219089Spjd flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 4626219089Spjd{ 4627219089Spjd znode_t *zp = VTOZ(vp); 4628219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4629219089Spjd 4630219089Spjd ZFS_ENTER(zfsvfs); 4631219089Spjd ZFS_VERIFY_ZP(zp); 4632219089Spjd 4633219089Spjd /* 4634219089Spjd * We are following the UFS semantics with respect to mapcnt 4635219089Spjd * here: If we see that the file is mapped already, then we will 4636219089Spjd * return an error, but we don't worry about races between this 4637219089Spjd * function and zfs_map(). 4638219089Spjd */ 4639219089Spjd if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) { 4640219089Spjd ZFS_EXIT(zfsvfs); 4641219089Spjd return (EAGAIN); 4642219089Spjd } 4643219089Spjd ZFS_EXIT(zfsvfs); 4644219089Spjd return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 4645219089Spjd} 4646219089Spjd 4647219089Spjd/* 4648219089Spjd * If we can't find a page in the cache, we will create a new page 4649219089Spjd * and fill it with file data. For efficiency, we may try to fill 4650219089Spjd * multiple pages at once (klustering) to fill up the supplied page 4651219089Spjd * list. Note that the pages to be filled are held with an exclusive 4652219089Spjd * lock to prevent access by other threads while they are being filled. 4653219089Spjd */ 4654219089Spjdstatic int 4655219089Spjdzfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, 4656219089Spjd caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) 4657219089Spjd{ 4658219089Spjd znode_t *zp = VTOZ(vp); 4659219089Spjd page_t *pp, *cur_pp; 4660219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 4661219089Spjd u_offset_t io_off, total; 4662219089Spjd size_t io_len; 4663219089Spjd int err; 4664219089Spjd 4665219089Spjd if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { 4666219089Spjd /* 4667219089Spjd * We only have a single page, don't bother klustering 4668219089Spjd */ 4669219089Spjd io_off = off; 4670219089Spjd io_len = PAGESIZE; 4671219089Spjd pp = page_create_va(vp, io_off, io_len, 4672219089Spjd PG_EXCL | PG_WAIT, seg, addr); 4673219089Spjd } else { 4674219089Spjd /* 4675219089Spjd * Try to find enough pages to fill the page list 4676219089Spjd */ 4677219089Spjd pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 4678219089Spjd &io_len, off, plsz, 0); 4679219089Spjd } 4680219089Spjd if (pp == NULL) { 4681219089Spjd /* 4682219089Spjd * The page already exists, nothing to do here. 4683219089Spjd */ 4684219089Spjd *pl = NULL; 4685219089Spjd return (0); 4686219089Spjd } 4687219089Spjd 4688219089Spjd /* 4689219089Spjd * Fill the pages in the kluster. 4690219089Spjd */ 4691219089Spjd cur_pp = pp; 4692219089Spjd for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { 4693219089Spjd caddr_t va; 4694219089Spjd 4695219089Spjd ASSERT3U(io_off, ==, cur_pp->p_offset); 4696219089Spjd va = zfs_map_page(cur_pp, S_WRITE); 4697219089Spjd err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, 4698219089Spjd DMU_READ_PREFETCH); 4699219089Spjd zfs_unmap_page(cur_pp, va); 4700219089Spjd if (err) { 4701219089Spjd /* On error, toss the entire kluster */ 4702219089Spjd pvn_read_done(pp, B_ERROR); 4703219089Spjd /* convert checksum errors into IO errors */ 4704219089Spjd if (err == ECKSUM) 4705219089Spjd err = EIO; 4706219089Spjd return (err); 4707219089Spjd } 4708219089Spjd cur_pp = cur_pp->p_next; 4709219089Spjd } 4710219089Spjd 4711219089Spjd /* 4712219089Spjd * Fill in the page list array from the kluster starting 4713219089Spjd * from the desired offset `off'. 4714219089Spjd * NOTE: the page list will always be null terminated. 4715219089Spjd */ 4716219089Spjd pvn_plist_init(pp, pl, plsz, off, io_len, rw); 4717219089Spjd ASSERT(pl == NULL || (*pl)->p_offset == off); 4718219089Spjd 4719219089Spjd return (0); 4720219089Spjd} 4721219089Spjd 4722219089Spjd/* 4723219089Spjd * Return pointers to the pages for the file region [off, off + len] 4724219089Spjd * in the pl array. If plsz is greater than len, this function may 4725219089Spjd * also return page pointers from after the specified region 4726219089Spjd * (i.e. the region [off, off + plsz]). These additional pages are 4727219089Spjd * only returned if they are already in the cache, or were created as 4728219089Spjd * part of a klustered read. 4729219089Spjd * 4730219089Spjd * IN: vp - vnode of file to get data from. 4731219089Spjd * off - position in file to get data from. 4732219089Spjd * len - amount of data to retrieve. 4733219089Spjd * plsz - length of provided page list. 4734219089Spjd * seg - segment to obtain pages for. 4735219089Spjd * addr - virtual address of fault. 4736219089Spjd * rw - mode of created pages. 4737219089Spjd * cr - credentials of caller. 4738219089Spjd * ct - caller context. 4739219089Spjd * 4740219089Spjd * OUT: protp - protection mode of created pages. 4741219089Spjd * pl - list of pages created. 4742219089Spjd * 4743219089Spjd * RETURN: 0 if success 4744219089Spjd * error code if failure 4745219089Spjd * 4746219089Spjd * Timestamps: 4747219089Spjd * vp - atime updated 4748219089Spjd */ 4749219089Spjd/* ARGSUSED */ 4750219089Spjdstatic int 4751219089Spjdzfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 4752219089Spjd page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 4753219089Spjd enum seg_rw rw, cred_t *cr, caller_context_t *ct) 4754219089Spjd{ 4755219089Spjd znode_t *zp = VTOZ(vp); 4756219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4757219089Spjd page_t **pl0 = pl; 4758219089Spjd int err = 0; 4759219089Spjd 4760219089Spjd /* we do our own caching, faultahead is unnecessary */ 4761219089Spjd if (pl == NULL) 4762219089Spjd return (0); 4763219089Spjd else if (len > plsz) 4764219089Spjd len = plsz; 4765219089Spjd else 4766219089Spjd len = P2ROUNDUP(len, PAGESIZE); 4767219089Spjd ASSERT(plsz >= len); 4768219089Spjd 4769219089Spjd ZFS_ENTER(zfsvfs); 4770219089Spjd ZFS_VERIFY_ZP(zp); 4771219089Spjd 4772219089Spjd if (protp) 4773219089Spjd *protp = PROT_ALL; 4774219089Spjd 4775219089Spjd /* 4776219089Spjd * Loop through the requested range [off, off + len) looking 4777219089Spjd * for pages. If we don't find a page, we will need to create 4778219089Spjd * a new page and fill it with data from the file. 4779219089Spjd */ 4780219089Spjd while (len > 0) { 4781219089Spjd if (*pl = page_lookup(vp, off, SE_SHARED)) 4782219089Spjd *(pl+1) = NULL; 4783219089Spjd else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) 4784219089Spjd goto out; 4785219089Spjd while (*pl) { 4786219089Spjd ASSERT3U((*pl)->p_offset, ==, off); 4787219089Spjd off += PAGESIZE; 4788219089Spjd addr += PAGESIZE; 4789219089Spjd if (len > 0) { 4790219089Spjd ASSERT3U(len, >=, PAGESIZE); 4791219089Spjd len -= PAGESIZE; 4792219089Spjd } 4793219089Spjd ASSERT3U(plsz, >=, PAGESIZE); 4794219089Spjd plsz -= PAGESIZE; 4795219089Spjd pl++; 4796219089Spjd } 4797219089Spjd } 4798219089Spjd 4799219089Spjd /* 4800219089Spjd * Fill out the page array with any pages already in the cache. 4801219089Spjd */ 4802219089Spjd while (plsz > 0 && 4803219089Spjd (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { 4804219089Spjd off += PAGESIZE; 4805219089Spjd plsz -= PAGESIZE; 4806219089Spjd } 4807219089Spjdout: 4808219089Spjd if (err) { 4809219089Spjd /* 4810219089Spjd * Release any pages we have previously locked. 4811219089Spjd */ 4812219089Spjd while (pl > pl0) 4813219089Spjd page_unlock(*--pl); 4814219089Spjd } else { 4815219089Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4816219089Spjd } 4817219089Spjd 4818219089Spjd *pl = NULL; 4819219089Spjd 4820219089Spjd ZFS_EXIT(zfsvfs); 4821219089Spjd return (err); 4822219089Spjd} 4823219089Spjd 4824219089Spjd/* 4825219089Spjd * Request a memory map for a section of a file. This code interacts 4826219089Spjd * with common code and the VM system as follows: 4827219089Spjd * 4828219089Spjd * common code calls mmap(), which ends up in smmap_common() 4829219089Spjd * 4830219089Spjd * this calls VOP_MAP(), which takes you into (say) zfs 4831219089Spjd * 4832219089Spjd * zfs_map() calls as_map(), passing segvn_create() as the callback 4833219089Spjd * 4834219089Spjd * segvn_create() creates the new segment and calls VOP_ADDMAP() 4835219089Spjd * 4836219089Spjd * zfs_addmap() updates z_mapcnt 4837219089Spjd */ 4838219089Spjd/*ARGSUSED*/ 4839219089Spjdstatic int 4840219089Spjdzfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 4841219089Spjd size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4842219089Spjd caller_context_t *ct) 4843219089Spjd{ 4844219089Spjd znode_t *zp = VTOZ(vp); 4845219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4846219089Spjd segvn_crargs_t vn_a; 4847219089Spjd int error; 4848219089Spjd 4849219089Spjd ZFS_ENTER(zfsvfs); 4850219089Spjd ZFS_VERIFY_ZP(zp); 4851219089Spjd 4852219089Spjd if ((prot & PROT_WRITE) && (zp->z_pflags & 4853219089Spjd (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) { 4854219089Spjd ZFS_EXIT(zfsvfs); 4855219089Spjd return (EPERM); 4856219089Spjd } 4857219089Spjd 4858219089Spjd if ((prot & (PROT_READ | PROT_EXEC)) && 4859219089Spjd (zp->z_pflags & ZFS_AV_QUARANTINED)) { 4860219089Spjd ZFS_EXIT(zfsvfs); 4861219089Spjd return (EACCES); 4862219089Spjd } 4863219089Spjd 4864219089Spjd if (vp->v_flag & VNOMAP) { 4865219089Spjd ZFS_EXIT(zfsvfs); 4866219089Spjd return (ENOSYS); 4867219089Spjd } 4868219089Spjd 4869219089Spjd if (off < 0 || len > MAXOFFSET_T - off) { 4870219089Spjd ZFS_EXIT(zfsvfs); 4871219089Spjd return (ENXIO); 4872219089Spjd } 4873219089Spjd 4874219089Spjd if (vp->v_type != VREG) { 4875219089Spjd ZFS_EXIT(zfsvfs); 4876219089Spjd return (ENODEV); 4877219089Spjd } 4878219089Spjd 4879219089Spjd /* 4880219089Spjd * If file is locked, disallow mapping. 4881219089Spjd */ 4882219089Spjd if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) { 4883219089Spjd ZFS_EXIT(zfsvfs); 4884219089Spjd return (EAGAIN); 4885219089Spjd } 4886219089Spjd 4887219089Spjd as_rangelock(as); 4888219089Spjd error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 4889219089Spjd if (error != 0) { 4890219089Spjd as_rangeunlock(as); 4891219089Spjd ZFS_EXIT(zfsvfs); 4892219089Spjd return (error); 4893219089Spjd } 4894219089Spjd 4895219089Spjd vn_a.vp = vp; 4896219089Spjd vn_a.offset = (u_offset_t)off; 4897219089Spjd vn_a.type = flags & MAP_TYPE; 4898219089Spjd vn_a.prot = prot; 4899219089Spjd vn_a.maxprot = maxprot; 4900219089Spjd vn_a.cred = cr; 4901219089Spjd vn_a.amp = NULL; 4902219089Spjd vn_a.flags = flags & ~MAP_TYPE; 4903219089Spjd vn_a.szc = 0; 4904219089Spjd vn_a.lgrp_mem_policy_flags = 0; 4905219089Spjd 4906219089Spjd error = as_map(as, *addrp, len, segvn_create, &vn_a); 4907219089Spjd 4908219089Spjd as_rangeunlock(as); 4909219089Spjd ZFS_EXIT(zfsvfs); 4910219089Spjd return (error); 4911219089Spjd} 4912219089Spjd 4913219089Spjd/* ARGSUSED */ 4914219089Spjdstatic int 4915219089Spjdzfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 4916219089Spjd size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4917219089Spjd caller_context_t *ct) 4918219089Spjd{ 4919219089Spjd uint64_t pages = btopr(len); 4920219089Spjd 4921219089Spjd atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); 4922219089Spjd return (0); 4923219089Spjd} 4924219089Spjd 4925219089Spjd/* 4926219089Spjd * The reason we push dirty pages as part of zfs_delmap() is so that we get a 4927219089Spjd * more accurate mtime for the associated file. Since we don't have a way of 4928219089Spjd * detecting when the data was actually modified, we have to resort to 4929219089Spjd * heuristics. If an explicit msync() is done, then we mark the mtime when the 4930219089Spjd * last page is pushed. The problem occurs when the msync() call is omitted, 4931219089Spjd * which by far the most common case: 4932219089Spjd * 4933219089Spjd * open() 4934219089Spjd * mmap() 4935219089Spjd * <modify memory> 4936219089Spjd * munmap() 4937219089Spjd * close() 4938219089Spjd * <time lapse> 4939219089Spjd * putpage() via fsflush 4940219089Spjd * 4941219089Spjd * If we wait until fsflush to come along, we can have a modification time that 4942219089Spjd * is some arbitrary point in the future. In order to prevent this in the 4943219089Spjd * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is 4944219089Spjd * torn down. 4945219089Spjd */ 4946219089Spjd/* ARGSUSED */ 4947219089Spjdstatic int 4948219089Spjdzfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 4949219089Spjd size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, 4950219089Spjd caller_context_t *ct) 4951219089Spjd{ 4952219089Spjd uint64_t pages = btopr(len); 4953219089Spjd 4954219089Spjd ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); 4955219089Spjd atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); 4956219089Spjd 4957219089Spjd if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && 4958219089Spjd vn_has_cached_data(vp)) 4959219089Spjd (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); 4960219089Spjd 4961219089Spjd return (0); 4962219089Spjd} 4963219089Spjd 4964219089Spjd/* 4965219089Spjd * Free or allocate space in a file. Currently, this function only 4966219089Spjd * supports the `F_FREESP' command. However, this command is somewhat 4967219089Spjd * misnamed, as its functionality includes the ability to allocate as 4968219089Spjd * well as free space. 4969219089Spjd * 4970219089Spjd * IN: vp - vnode of file to free data in. 4971219089Spjd * cmd - action to take (only F_FREESP supported). 4972219089Spjd * bfp - section of file to free/alloc. 4973219089Spjd * flag - current file open mode flags. 4974219089Spjd * offset - current file offset. 4975219089Spjd * cr - credentials of caller [UNUSED]. 4976219089Spjd * ct - caller context. 4977219089Spjd * 4978219089Spjd * RETURN: 0 if success 4979219089Spjd * error code if failure 4980219089Spjd * 4981219089Spjd * Timestamps: 4982219089Spjd * vp - ctime|mtime updated 4983219089Spjd */ 4984219089Spjd/* ARGSUSED */ 4985219089Spjdstatic int 4986219089Spjdzfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, 4987219089Spjd offset_t offset, cred_t *cr, caller_context_t *ct) 4988219089Spjd{ 4989219089Spjd znode_t *zp = VTOZ(vp); 4990219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4991219089Spjd uint64_t off, len; 4992219089Spjd int error; 4993219089Spjd 4994219089Spjd ZFS_ENTER(zfsvfs); 4995219089Spjd ZFS_VERIFY_ZP(zp); 4996219089Spjd 4997219089Spjd if (cmd != F_FREESP) { 4998219089Spjd ZFS_EXIT(zfsvfs); 4999219089Spjd return (EINVAL); 5000219089Spjd } 5001219089Spjd 5002219089Spjd if (error = convoff(vp, bfp, 0, offset)) { 5003219089Spjd ZFS_EXIT(zfsvfs); 5004219089Spjd return (error); 5005219089Spjd } 5006219089Spjd 5007219089Spjd if (bfp->l_len < 0) { 5008219089Spjd ZFS_EXIT(zfsvfs); 5009219089Spjd return (EINVAL); 5010219089Spjd } 5011219089Spjd 5012219089Spjd off = bfp->l_start; 5013219089Spjd len = bfp->l_len; /* 0 means from off to end of file */ 5014219089Spjd 5015219089Spjd error = zfs_freesp(zp, off, len, flag, TRUE); 5016219089Spjd 5017219089Spjd ZFS_EXIT(zfsvfs); 5018219089Spjd return (error); 5019219089Spjd} 5020219089Spjd#endif /* sun */ 5021219089Spjd 5022168404SpjdCTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 5023168404SpjdCTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 5024168404Spjd 5025185029Spjd/*ARGSUSED*/ 5026168404Spjdstatic int 5027185029Spjdzfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 5028168404Spjd{ 5029168404Spjd znode_t *zp = VTOZ(vp); 5030168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5031185029Spjd uint32_t gen; 5032219089Spjd uint64_t gen64; 5033168404Spjd uint64_t object = zp->z_id; 5034168404Spjd zfid_short_t *zfid; 5035219089Spjd int size, i, error; 5036168404Spjd 5037168404Spjd ZFS_ENTER(zfsvfs); 5038185029Spjd ZFS_VERIFY_ZP(zp); 5039168404Spjd 5040219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 5041219089Spjd &gen64, sizeof (uint64_t))) != 0) { 5042219089Spjd ZFS_EXIT(zfsvfs); 5043219089Spjd return (error); 5044219089Spjd } 5045219089Spjd 5046219089Spjd gen = (uint32_t)gen64; 5047219089Spjd 5048168404Spjd size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 5049168404Spjd fidp->fid_len = size; 5050168404Spjd 5051168404Spjd zfid = (zfid_short_t *)fidp; 5052168404Spjd 5053168404Spjd zfid->zf_len = size; 5054168404Spjd 5055168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 5056168404Spjd zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 5057168404Spjd 5058168404Spjd /* Must have a non-zero generation number to distinguish from .zfs */ 5059168404Spjd if (gen == 0) 5060168404Spjd gen = 1; 5061168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 5062168404Spjd zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 5063168404Spjd 5064168404Spjd if (size == LONG_FID_LEN) { 5065168404Spjd uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 5066169023Spjd zfid_long_t *zlfid; 5067168404Spjd 5068168404Spjd zlfid = (zfid_long_t *)fidp; 5069168404Spjd 5070168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 5071168404Spjd zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 5072168404Spjd 5073168404Spjd /* XXX - this should be the generation number for the objset */ 5074168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 5075168404Spjd zlfid->zf_setgen[i] = 0; 5076168404Spjd } 5077168404Spjd 5078168404Spjd ZFS_EXIT(zfsvfs); 5079168404Spjd return (0); 5080168404Spjd} 5081168404Spjd 5082168404Spjdstatic int 5083185029Spjdzfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 5084185029Spjd caller_context_t *ct) 5085168404Spjd{ 5086168404Spjd znode_t *zp, *xzp; 5087168404Spjd zfsvfs_t *zfsvfs; 5088168404Spjd zfs_dirlock_t *dl; 5089168404Spjd int error; 5090168404Spjd 5091168404Spjd switch (cmd) { 5092168404Spjd case _PC_LINK_MAX: 5093168404Spjd *valp = INT_MAX; 5094168404Spjd return (0); 5095168404Spjd 5096168404Spjd case _PC_FILESIZEBITS: 5097168404Spjd *valp = 64; 5098168404Spjd return (0); 5099219089Spjd#ifdef sun 5100168404Spjd case _PC_XATTR_EXISTS: 5101168404Spjd zp = VTOZ(vp); 5102168404Spjd zfsvfs = zp->z_zfsvfs; 5103168404Spjd ZFS_ENTER(zfsvfs); 5104185029Spjd ZFS_VERIFY_ZP(zp); 5105168404Spjd *valp = 0; 5106168404Spjd error = zfs_dirent_lock(&dl, zp, "", &xzp, 5107185029Spjd ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 5108168404Spjd if (error == 0) { 5109168404Spjd zfs_dirent_unlock(dl); 5110168404Spjd if (!zfs_dirempty(xzp)) 5111168404Spjd *valp = 1; 5112168404Spjd VN_RELE(ZTOV(xzp)); 5113168404Spjd } else if (error == ENOENT) { 5114168404Spjd /* 5115168404Spjd * If there aren't extended attributes, it's the 5116168404Spjd * same as having zero of them. 5117168404Spjd */ 5118168404Spjd error = 0; 5119168404Spjd } 5120168404Spjd ZFS_EXIT(zfsvfs); 5121168404Spjd return (error); 5122168404Spjd 5123219089Spjd case _PC_SATTR_ENABLED: 5124219089Spjd case _PC_SATTR_EXISTS: 5125219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 5126219089Spjd (vp->v_type == VREG || vp->v_type == VDIR); 5127219089Spjd return (0); 5128219089Spjd 5129219089Spjd case _PC_ACCESS_FILTERING: 5130219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 5131219089Spjd vp->v_type == VDIR; 5132219089Spjd return (0); 5133219089Spjd 5134219089Spjd case _PC_ACL_ENABLED: 5135219089Spjd *valp = _ACL_ACE_ENABLED; 5136219089Spjd return (0); 5137219089Spjd#endif /* sun */ 5138219089Spjd case _PC_MIN_HOLE_SIZE: 5139219089Spjd *valp = (int)SPA_MINBLOCKSIZE; 5140219089Spjd return (0); 5141219089Spjd#ifdef sun 5142219089Spjd case _PC_TIMESTAMP_RESOLUTION: 5143219089Spjd /* nanosecond timestamp resolution */ 5144219089Spjd *valp = 1L; 5145219089Spjd return (0); 5146219089Spjd#endif /* sun */ 5147168404Spjd case _PC_ACL_EXTENDED: 5148196949Strasz *valp = 0; 5149168404Spjd return (0); 5150168404Spjd 5151196949Strasz case _PC_ACL_NFS4: 5152196949Strasz *valp = 1; 5153196949Strasz return (0); 5154196949Strasz 5155196949Strasz case _PC_ACL_PATH_MAX: 5156196949Strasz *valp = ACL_MAX_ENTRIES; 5157196949Strasz return (0); 5158196949Strasz 5159168404Spjd default: 5160168962Spjd return (EOPNOTSUPP); 5161168404Spjd } 5162168404Spjd} 5163168404Spjd 5164168404Spjd/*ARGSUSED*/ 5165168404Spjdstatic int 5166185029Spjdzfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5167185029Spjd caller_context_t *ct) 5168168404Spjd{ 5169168404Spjd znode_t *zp = VTOZ(vp); 5170168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5171168404Spjd int error; 5172185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5173168404Spjd 5174168404Spjd ZFS_ENTER(zfsvfs); 5175185029Spjd ZFS_VERIFY_ZP(zp); 5176185029Spjd error = zfs_getacl(zp, vsecp, skipaclchk, cr); 5177168404Spjd ZFS_EXIT(zfsvfs); 5178168404Spjd 5179168404Spjd return (error); 5180168404Spjd} 5181168404Spjd 5182168404Spjd/*ARGSUSED*/ 5183228685Spjdint 5184185029Spjdzfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5185185029Spjd caller_context_t *ct) 5186168404Spjd{ 5187168404Spjd znode_t *zp = VTOZ(vp); 5188168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5189168404Spjd int error; 5190185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5191219089Spjd zilog_t *zilog = zfsvfs->z_log; 5192168404Spjd 5193168404Spjd ZFS_ENTER(zfsvfs); 5194185029Spjd ZFS_VERIFY_ZP(zp); 5195219089Spjd 5196185029Spjd error = zfs_setacl(zp, vsecp, skipaclchk, cr); 5197219089Spjd 5198219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5199219089Spjd zil_commit(zilog, 0); 5200219089Spjd 5201168404Spjd ZFS_EXIT(zfsvfs); 5202168404Spjd return (error); 5203168404Spjd} 5204168404Spjd 5205219089Spjd#ifdef sun 5206219089Spjd/* 5207219089Spjd * Tunable, both must be a power of 2. 5208219089Spjd * 5209219089Spjd * zcr_blksz_min: the smallest read we may consider to loan out an arcbuf 5210219089Spjd * zcr_blksz_max: if set to less than the file block size, allow loaning out of 5211219089Spjd * an arcbuf for a partial block read 5212219089Spjd */ 5213219089Spjdint zcr_blksz_min = (1 << 10); /* 1K */ 5214219089Spjdint zcr_blksz_max = (1 << 17); /* 128K */ 5215219089Spjd 5216219089Spjd/*ARGSUSED*/ 5217168962Spjdstatic int 5218219089Spjdzfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, 5219219089Spjd caller_context_t *ct) 5220219089Spjd{ 5221219089Spjd znode_t *zp = VTOZ(vp); 5222219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5223219089Spjd int max_blksz = zfsvfs->z_max_blksz; 5224219089Spjd uio_t *uio = &xuio->xu_uio; 5225219089Spjd ssize_t size = uio->uio_resid; 5226219089Spjd offset_t offset = uio->uio_loffset; 5227219089Spjd int blksz; 5228219089Spjd int fullblk, i; 5229219089Spjd arc_buf_t *abuf; 5230219089Spjd ssize_t maxsize; 5231219089Spjd int preamble, postamble; 5232219089Spjd 5233219089Spjd if (xuio->xu_type != UIOTYPE_ZEROCOPY) 5234219089Spjd return (EINVAL); 5235219089Spjd 5236219089Spjd ZFS_ENTER(zfsvfs); 5237219089Spjd ZFS_VERIFY_ZP(zp); 5238219089Spjd switch (ioflag) { 5239219089Spjd case UIO_WRITE: 5240219089Spjd /* 5241219089Spjd * Loan out an arc_buf for write if write size is bigger than 5242219089Spjd * max_blksz, and the file's block size is also max_blksz. 5243219089Spjd */ 5244219089Spjd blksz = max_blksz; 5245219089Spjd if (size < blksz || zp->z_blksz != blksz) { 5246219089Spjd ZFS_EXIT(zfsvfs); 5247219089Spjd return (EINVAL); 5248219089Spjd } 5249219089Spjd /* 5250219089Spjd * Caller requests buffers for write before knowing where the 5251219089Spjd * write offset might be (e.g. NFS TCP write). 5252219089Spjd */ 5253219089Spjd if (offset == -1) { 5254219089Spjd preamble = 0; 5255219089Spjd } else { 5256219089Spjd preamble = P2PHASE(offset, blksz); 5257219089Spjd if (preamble) { 5258219089Spjd preamble = blksz - preamble; 5259219089Spjd size -= preamble; 5260219089Spjd } 5261219089Spjd } 5262219089Spjd 5263219089Spjd postamble = P2PHASE(size, blksz); 5264219089Spjd size -= postamble; 5265219089Spjd 5266219089Spjd fullblk = size / blksz; 5267219089Spjd (void) dmu_xuio_init(xuio, 5268219089Spjd (preamble != 0) + fullblk + (postamble != 0)); 5269219089Spjd DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble, 5270219089Spjd int, postamble, int, 5271219089Spjd (preamble != 0) + fullblk + (postamble != 0)); 5272219089Spjd 5273219089Spjd /* 5274219089Spjd * Have to fix iov base/len for partial buffers. They 5275219089Spjd * currently represent full arc_buf's. 5276219089Spjd */ 5277219089Spjd if (preamble) { 5278219089Spjd /* data begins in the middle of the arc_buf */ 5279219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5280219089Spjd blksz); 5281219089Spjd ASSERT(abuf); 5282219089Spjd (void) dmu_xuio_add(xuio, abuf, 5283219089Spjd blksz - preamble, preamble); 5284219089Spjd } 5285219089Spjd 5286219089Spjd for (i = 0; i < fullblk; i++) { 5287219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5288219089Spjd blksz); 5289219089Spjd ASSERT(abuf); 5290219089Spjd (void) dmu_xuio_add(xuio, abuf, 0, blksz); 5291219089Spjd } 5292219089Spjd 5293219089Spjd if (postamble) { 5294219089Spjd /* data ends in the middle of the arc_buf */ 5295219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5296219089Spjd blksz); 5297219089Spjd ASSERT(abuf); 5298219089Spjd (void) dmu_xuio_add(xuio, abuf, 0, postamble); 5299219089Spjd } 5300219089Spjd break; 5301219089Spjd case UIO_READ: 5302219089Spjd /* 5303219089Spjd * Loan out an arc_buf for read if the read size is larger than 5304219089Spjd * the current file block size. Block alignment is not 5305219089Spjd * considered. Partial arc_buf will be loaned out for read. 5306219089Spjd */ 5307219089Spjd blksz = zp->z_blksz; 5308219089Spjd if (blksz < zcr_blksz_min) 5309219089Spjd blksz = zcr_blksz_min; 5310219089Spjd if (blksz > zcr_blksz_max) 5311219089Spjd blksz = zcr_blksz_max; 5312219089Spjd /* avoid potential complexity of dealing with it */ 5313219089Spjd if (blksz > max_blksz) { 5314219089Spjd ZFS_EXIT(zfsvfs); 5315219089Spjd return (EINVAL); 5316219089Spjd } 5317219089Spjd 5318219089Spjd maxsize = zp->z_size - uio->uio_loffset; 5319219089Spjd if (size > maxsize) 5320219089Spjd size = maxsize; 5321219089Spjd 5322219089Spjd if (size < blksz || vn_has_cached_data(vp)) { 5323219089Spjd ZFS_EXIT(zfsvfs); 5324219089Spjd return (EINVAL); 5325219089Spjd } 5326219089Spjd break; 5327219089Spjd default: 5328219089Spjd ZFS_EXIT(zfsvfs); 5329219089Spjd return (EINVAL); 5330219089Spjd } 5331219089Spjd 5332219089Spjd uio->uio_extflg = UIO_XUIO; 5333219089Spjd XUIO_XUZC_RW(xuio) = ioflag; 5334219089Spjd ZFS_EXIT(zfsvfs); 5335219089Spjd return (0); 5336219089Spjd} 5337219089Spjd 5338219089Spjd/*ARGSUSED*/ 5339219089Spjdstatic int 5340219089Spjdzfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) 5341219089Spjd{ 5342219089Spjd int i; 5343219089Spjd arc_buf_t *abuf; 5344219089Spjd int ioflag = XUIO_XUZC_RW(xuio); 5345219089Spjd 5346219089Spjd ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); 5347219089Spjd 5348219089Spjd i = dmu_xuio_cnt(xuio); 5349219089Spjd while (i-- > 0) { 5350219089Spjd abuf = dmu_xuio_arcbuf(xuio, i); 5351219089Spjd /* 5352219089Spjd * if abuf == NULL, it must be a write buffer 5353219089Spjd * that has been returned in zfs_write(). 5354219089Spjd */ 5355219089Spjd if (abuf) 5356219089Spjd dmu_return_arcbuf(abuf); 5357219089Spjd ASSERT(abuf || ioflag == UIO_WRITE); 5358219089Spjd } 5359219089Spjd 5360219089Spjd dmu_xuio_fini(xuio); 5361219089Spjd return (0); 5362219089Spjd} 5363219089Spjd 5364219089Spjd/* 5365219089Spjd * Predeclare these here so that the compiler assumes that 5366219089Spjd * this is an "old style" function declaration that does 5367219089Spjd * not include arguments => we won't get type mismatch errors 5368219089Spjd * in the initializations that follow. 5369219089Spjd */ 5370219089Spjdstatic int zfs_inval(); 5371219089Spjdstatic int zfs_isdir(); 5372219089Spjd 5373219089Spjdstatic int 5374219089Spjdzfs_inval() 5375219089Spjd{ 5376219089Spjd return (EINVAL); 5377219089Spjd} 5378219089Spjd 5379219089Spjdstatic int 5380219089Spjdzfs_isdir() 5381219089Spjd{ 5382219089Spjd return (EISDIR); 5383219089Spjd} 5384219089Spjd/* 5385219089Spjd * Directory vnode operations template 5386219089Spjd */ 5387219089Spjdvnodeops_t *zfs_dvnodeops; 5388219089Spjdconst fs_operation_def_t zfs_dvnodeops_template[] = { 5389219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5390219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5391219089Spjd VOPNAME_READ, { .error = zfs_isdir }, 5392219089Spjd VOPNAME_WRITE, { .error = zfs_isdir }, 5393219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5394219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5395219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5396219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5397219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5398219089Spjd VOPNAME_CREATE, { .vop_create = zfs_create }, 5399219089Spjd VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5400219089Spjd VOPNAME_LINK, { .vop_link = zfs_link }, 5401219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5402219089Spjd VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, 5403219089Spjd VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5404219089Spjd VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5405219089Spjd VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, 5406219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5407219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5408219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5409219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5410219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5411219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5412219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5413219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5414219089Spjd NULL, NULL 5415219089Spjd}; 5416219089Spjd 5417219089Spjd/* 5418219089Spjd * Regular file vnode operations template 5419219089Spjd */ 5420219089Spjdvnodeops_t *zfs_fvnodeops; 5421219089Spjdconst fs_operation_def_t zfs_fvnodeops_template[] = { 5422219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5423219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5424219089Spjd VOPNAME_READ, { .vop_read = zfs_read }, 5425219089Spjd VOPNAME_WRITE, { .vop_write = zfs_write }, 5426219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5427219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5428219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5429219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5430219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5431219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5432219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5433219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5434219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5435219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5436219089Spjd VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, 5437219089Spjd VOPNAME_SPACE, { .vop_space = zfs_space }, 5438219089Spjd VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, 5439219089Spjd VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, 5440219089Spjd VOPNAME_MAP, { .vop_map = zfs_map }, 5441219089Spjd VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, 5442219089Spjd VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, 5443219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5444219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5445219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5446219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5447219089Spjd VOPNAME_REQZCBUF, { .vop_reqzcbuf = zfs_reqzcbuf }, 5448219089Spjd VOPNAME_RETZCBUF, { .vop_retzcbuf = zfs_retzcbuf }, 5449219089Spjd NULL, NULL 5450219089Spjd}; 5451219089Spjd 5452219089Spjd/* 5453219089Spjd * Symbolic link vnode operations template 5454219089Spjd */ 5455219089Spjdvnodeops_t *zfs_symvnodeops; 5456219089Spjdconst fs_operation_def_t zfs_symvnodeops_template[] = { 5457219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5458219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5459219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5460219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5461219089Spjd VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, 5462219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5463219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5464219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5465219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5466219089Spjd NULL, NULL 5467219089Spjd}; 5468219089Spjd 5469219089Spjd/* 5470219089Spjd * special share hidden files vnode operations template 5471219089Spjd */ 5472219089Spjdvnodeops_t *zfs_sharevnodeops; 5473219089Spjdconst fs_operation_def_t zfs_sharevnodeops_template[] = { 5474219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5475219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5476219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5477219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5478219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5479219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5480219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5481219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5482219089Spjd NULL, NULL 5483219089Spjd}; 5484219089Spjd 5485219089Spjd/* 5486219089Spjd * Extended attribute directory vnode operations template 5487219089Spjd * This template is identical to the directory vnodes 5488219089Spjd * operation template except for restricted operations: 5489219089Spjd * VOP_MKDIR() 5490219089Spjd * VOP_SYMLINK() 5491219089Spjd * Note that there are other restrictions embedded in: 5492219089Spjd * zfs_create() - restrict type to VREG 5493219089Spjd * zfs_link() - no links into/out of attribute space 5494219089Spjd * zfs_rename() - no moves into/out of attribute space 5495219089Spjd */ 5496219089Spjdvnodeops_t *zfs_xdvnodeops; 5497219089Spjdconst fs_operation_def_t zfs_xdvnodeops_template[] = { 5498219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5499219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5500219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5501219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5502219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5503219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5504219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5505219089Spjd VOPNAME_CREATE, { .vop_create = zfs_create }, 5506219089Spjd VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5507219089Spjd VOPNAME_LINK, { .vop_link = zfs_link }, 5508219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5509219089Spjd VOPNAME_MKDIR, { .error = zfs_inval }, 5510219089Spjd VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5511219089Spjd VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5512219089Spjd VOPNAME_SYMLINK, { .error = zfs_inval }, 5513219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5514219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5515219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5516219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5517219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5518219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5519219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5520219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5521219089Spjd NULL, NULL 5522219089Spjd}; 5523219089Spjd 5524219089Spjd/* 5525219089Spjd * Error vnode operations template 5526219089Spjd */ 5527219089Spjdvnodeops_t *zfs_evnodeops; 5528219089Spjdconst fs_operation_def_t zfs_evnodeops_template[] = { 5529219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5530219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5531219089Spjd NULL, NULL 5532219089Spjd}; 5533219089Spjd#endif /* sun */ 5534219089Spjd 5535219089Spjdstatic int 5536213673Spjdioflags(int ioflags) 5537213673Spjd{ 5538213673Spjd int flags = 0; 5539213673Spjd 5540213673Spjd if (ioflags & IO_APPEND) 5541213673Spjd flags |= FAPPEND; 5542213673Spjd if (ioflags & IO_NDELAY) 5543213673Spjd flags |= FNONBLOCK; 5544213673Spjd if (ioflags & IO_SYNC) 5545213673Spjd flags |= (FSYNC | FDSYNC | FRSYNC); 5546213673Spjd 5547213673Spjd return (flags); 5548213673Spjd} 5549213673Spjd 5550213673Spjdstatic int 5551213937Savgzfs_getpages(struct vnode *vp, vm_page_t *m, int count, int reqpage) 5552213937Savg{ 5553213937Savg znode_t *zp = VTOZ(vp); 5554213937Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5555213937Savg objset_t *os = zp->z_zfsvfs->z_os; 5556213937Savg vm_page_t mreq; 5557213937Savg vm_object_t object; 5558213937Savg caddr_t va; 5559213937Savg struct sf_buf *sf; 5560213937Savg int i, error; 5561213937Savg int pcount, size; 5562213937Savg 5563213937Savg ZFS_ENTER(zfsvfs); 5564213937Savg ZFS_VERIFY_ZP(zp); 5565213937Savg 5566213937Savg pcount = round_page(count) / PAGE_SIZE; 5567213937Savg mreq = m[reqpage]; 5568213937Savg object = mreq->object; 5569213937Savg error = 0; 5570213937Savg 5571213937Savg KASSERT(vp->v_object == object, ("mismatching object")); 5572213937Savg 5573213937Savg VM_OBJECT_LOCK(object); 5574213937Savg 5575213937Savg for (i = 0; i < pcount; i++) { 5576213937Savg if (i != reqpage) { 5577213937Savg vm_page_lock(m[i]); 5578213937Savg vm_page_free(m[i]); 5579213937Savg vm_page_unlock(m[i]); 5580213937Savg } 5581213937Savg } 5582213937Savg 5583213937Savg if (mreq->valid) { 5584213937Savg if (mreq->valid != VM_PAGE_BITS_ALL) 5585213937Savg vm_page_zero_invalid(mreq, TRUE); 5586213937Savg VM_OBJECT_UNLOCK(object); 5587213937Savg ZFS_EXIT(zfsvfs); 5588213937Savg return (VM_PAGER_OK); 5589213937Savg } 5590213937Savg 5591213937Savg PCPU_INC(cnt.v_vnodein); 5592213937Savg PCPU_INC(cnt.v_vnodepgsin); 5593213937Savg 5594213937Savg if (IDX_TO_OFF(mreq->pindex) >= object->un_pager.vnp.vnp_size) { 5595213937Savg VM_OBJECT_UNLOCK(object); 5596213937Savg ZFS_EXIT(zfsvfs); 5597213937Savg return (VM_PAGER_BAD); 5598213937Savg } 5599213937Savg 5600213937Savg size = PAGE_SIZE; 5601213937Savg if (IDX_TO_OFF(mreq->pindex) + size > object->un_pager.vnp.vnp_size) 5602213937Savg size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(mreq->pindex); 5603213937Savg 5604213937Savg VM_OBJECT_UNLOCK(object); 5605213937Savg va = zfs_map_page(mreq, &sf); 5606213937Savg error = dmu_read(os, zp->z_id, IDX_TO_OFF(mreq->pindex), 5607213937Savg size, va, DMU_READ_PREFETCH); 5608213937Savg if (size != PAGE_SIZE) 5609213937Savg bzero(va + size, PAGE_SIZE - size); 5610213937Savg zfs_unmap_page(sf); 5611213937Savg VM_OBJECT_LOCK(object); 5612213937Savg 5613213937Savg if (!error) 5614213937Savg mreq->valid = VM_PAGE_BITS_ALL; 5615213937Savg KASSERT(mreq->dirty == 0, ("zfs_getpages: page %p is dirty", mreq)); 5616213937Savg 5617213937Savg VM_OBJECT_UNLOCK(object); 5618213937Savg 5619213937Savg ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 5620213937Savg ZFS_EXIT(zfsvfs); 5621213937Savg return (error ? VM_PAGER_ERROR : VM_PAGER_OK); 5622213937Savg} 5623213937Savg 5624213937Savgstatic int 5625213937Savgzfs_freebsd_getpages(ap) 5626213937Savg struct vop_getpages_args /* { 5627213937Savg struct vnode *a_vp; 5628213937Savg vm_page_t *a_m; 5629213937Savg int a_count; 5630213937Savg int a_reqpage; 5631213937Savg vm_ooffset_t a_offset; 5632213937Savg } */ *ap; 5633213937Savg{ 5634213937Savg 5635213937Savg return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage)); 5636213937Savg} 5637213937Savg 5638213937Savgstatic int 5639168962Spjdzfs_freebsd_open(ap) 5640168962Spjd struct vop_open_args /* { 5641168962Spjd struct vnode *a_vp; 5642168962Spjd int a_mode; 5643168962Spjd struct ucred *a_cred; 5644168962Spjd struct thread *a_td; 5645168962Spjd } */ *ap; 5646168962Spjd{ 5647168962Spjd vnode_t *vp = ap->a_vp; 5648168962Spjd znode_t *zp = VTOZ(vp); 5649168962Spjd int error; 5650168962Spjd 5651185029Spjd error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); 5652168962Spjd if (error == 0) 5653219089Spjd vnode_create_vobject(vp, zp->z_size, ap->a_td); 5654168962Spjd return (error); 5655168962Spjd} 5656168962Spjd 5657168962Spjdstatic int 5658168962Spjdzfs_freebsd_close(ap) 5659168962Spjd struct vop_close_args /* { 5660168962Spjd struct vnode *a_vp; 5661168962Spjd int a_fflag; 5662168962Spjd struct ucred *a_cred; 5663168962Spjd struct thread *a_td; 5664168962Spjd } */ *ap; 5665168962Spjd{ 5666168962Spjd 5667185029Spjd return (zfs_close(ap->a_vp, ap->a_fflag, 0, 0, ap->a_cred, NULL)); 5668168962Spjd} 5669168962Spjd 5670168962Spjdstatic int 5671168962Spjdzfs_freebsd_ioctl(ap) 5672168962Spjd struct vop_ioctl_args /* { 5673168962Spjd struct vnode *a_vp; 5674168962Spjd u_long a_command; 5675168962Spjd caddr_t a_data; 5676168962Spjd int a_fflag; 5677168962Spjd struct ucred *cred; 5678168962Spjd struct thread *td; 5679168962Spjd } */ *ap; 5680168962Spjd{ 5681168962Spjd 5682168978Spjd return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 5683185029Spjd ap->a_fflag, ap->a_cred, NULL, NULL)); 5684168962Spjd} 5685168962Spjd 5686168962Spjdstatic int 5687168962Spjdzfs_freebsd_read(ap) 5688168962Spjd struct vop_read_args /* { 5689168962Spjd struct vnode *a_vp; 5690168962Spjd struct uio *a_uio; 5691168962Spjd int a_ioflag; 5692168962Spjd struct ucred *a_cred; 5693168962Spjd } */ *ap; 5694168962Spjd{ 5695168962Spjd 5696213673Spjd return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 5697213673Spjd ap->a_cred, NULL)); 5698168962Spjd} 5699168962Spjd 5700168962Spjdstatic int 5701168962Spjdzfs_freebsd_write(ap) 5702168962Spjd struct vop_write_args /* { 5703168962Spjd struct vnode *a_vp; 5704168962Spjd struct uio *a_uio; 5705168962Spjd int a_ioflag; 5706168962Spjd struct ucred *a_cred; 5707168962Spjd } */ *ap; 5708168962Spjd{ 5709168962Spjd 5710213673Spjd return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 5711213673Spjd ap->a_cred, NULL)); 5712168962Spjd} 5713168962Spjd 5714168962Spjdstatic int 5715168962Spjdzfs_freebsd_access(ap) 5716168962Spjd struct vop_access_args /* { 5717168962Spjd struct vnode *a_vp; 5718192689Strasz accmode_t a_accmode; 5719168962Spjd struct ucred *a_cred; 5720168962Spjd struct thread *a_td; 5721168962Spjd } */ *ap; 5722168962Spjd{ 5723212002Sjh vnode_t *vp = ap->a_vp; 5724212002Sjh znode_t *zp = VTOZ(vp); 5725198703Spjd accmode_t accmode; 5726198703Spjd int error = 0; 5727168962Spjd 5728185172Spjd /* 5729198703Spjd * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 5730185172Spjd */ 5731198703Spjd accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 5732198703Spjd if (accmode != 0) 5733198703Spjd error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL); 5734185172Spjd 5735198703Spjd /* 5736198703Spjd * VADMIN has to be handled by vaccess(). 5737198703Spjd */ 5738198703Spjd if (error == 0) { 5739198703Spjd accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 5740198703Spjd if (accmode != 0) { 5741219089Spjd error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 5742219089Spjd zp->z_gid, accmode, ap->a_cred, NULL); 5743198703Spjd } 5744185172Spjd } 5745185172Spjd 5746212002Sjh /* 5747212002Sjh * For VEXEC, ensure that at least one execute bit is set for 5748212002Sjh * non-directories. 5749212002Sjh */ 5750212002Sjh if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 5751219089Spjd (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 5752212002Sjh error = EACCES; 5753219089Spjd } 5754212002Sjh 5755198703Spjd return (error); 5756168962Spjd} 5757168962Spjd 5758168962Spjdstatic int 5759168962Spjdzfs_freebsd_lookup(ap) 5760168962Spjd struct vop_lookup_args /* { 5761168962Spjd struct vnode *a_dvp; 5762168962Spjd struct vnode **a_vpp; 5763168962Spjd struct componentname *a_cnp; 5764168962Spjd } */ *ap; 5765168962Spjd{ 5766168962Spjd struct componentname *cnp = ap->a_cnp; 5767168962Spjd char nm[NAME_MAX + 1]; 5768168962Spjd 5769168962Spjd ASSERT(cnp->cn_namelen < sizeof(nm)); 5770168962Spjd strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 5771168962Spjd 5772168962Spjd return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 5773185029Spjd cnp->cn_cred, cnp->cn_thread, 0)); 5774168962Spjd} 5775168962Spjd 5776168962Spjdstatic int 5777168962Spjdzfs_freebsd_create(ap) 5778168962Spjd struct vop_create_args /* { 5779168962Spjd struct vnode *a_dvp; 5780168962Spjd struct vnode **a_vpp; 5781168962Spjd struct componentname *a_cnp; 5782168962Spjd struct vattr *a_vap; 5783168962Spjd } */ *ap; 5784168962Spjd{ 5785168962Spjd struct componentname *cnp = ap->a_cnp; 5786168962Spjd vattr_t *vap = ap->a_vap; 5787168962Spjd int mode; 5788168962Spjd 5789168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5790168962Spjd 5791168962Spjd vattr_init_mask(vap); 5792168962Spjd mode = vap->va_mode & ALLPERMS; 5793168962Spjd 5794168962Spjd return (zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 5795185029Spjd ap->a_vpp, cnp->cn_cred, cnp->cn_thread)); 5796168962Spjd} 5797168962Spjd 5798168962Spjdstatic int 5799168962Spjdzfs_freebsd_remove(ap) 5800168962Spjd struct vop_remove_args /* { 5801168962Spjd struct vnode *a_dvp; 5802168962Spjd struct vnode *a_vp; 5803168962Spjd struct componentname *a_cnp; 5804168962Spjd } */ *ap; 5805168962Spjd{ 5806168962Spjd 5807168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 5808168962Spjd 5809168962Spjd return (zfs_remove(ap->a_dvp, ap->a_cnp->cn_nameptr, 5810185029Spjd ap->a_cnp->cn_cred, NULL, 0)); 5811168962Spjd} 5812168962Spjd 5813168962Spjdstatic int 5814168962Spjdzfs_freebsd_mkdir(ap) 5815168962Spjd struct vop_mkdir_args /* { 5816168962Spjd struct vnode *a_dvp; 5817168962Spjd struct vnode **a_vpp; 5818168962Spjd struct componentname *a_cnp; 5819168962Spjd struct vattr *a_vap; 5820168962Spjd } */ *ap; 5821168962Spjd{ 5822168962Spjd vattr_t *vap = ap->a_vap; 5823168962Spjd 5824168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 5825168962Spjd 5826168962Spjd vattr_init_mask(vap); 5827168962Spjd 5828168962Spjd return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 5829185029Spjd ap->a_cnp->cn_cred, NULL, 0, NULL)); 5830168962Spjd} 5831168962Spjd 5832168962Spjdstatic int 5833168962Spjdzfs_freebsd_rmdir(ap) 5834168962Spjd struct vop_rmdir_args /* { 5835168962Spjd struct vnode *a_dvp; 5836168962Spjd struct vnode *a_vp; 5837168962Spjd struct componentname *a_cnp; 5838168962Spjd } */ *ap; 5839168962Spjd{ 5840168962Spjd struct componentname *cnp = ap->a_cnp; 5841168962Spjd 5842168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5843168962Spjd 5844185029Spjd return (zfs_rmdir(ap->a_dvp, cnp->cn_nameptr, NULL, cnp->cn_cred, NULL, 0)); 5845168962Spjd} 5846168962Spjd 5847168962Spjdstatic int 5848168962Spjdzfs_freebsd_readdir(ap) 5849168962Spjd struct vop_readdir_args /* { 5850168962Spjd struct vnode *a_vp; 5851168962Spjd struct uio *a_uio; 5852168962Spjd struct ucred *a_cred; 5853168962Spjd int *a_eofflag; 5854168962Spjd int *a_ncookies; 5855168962Spjd u_long **a_cookies; 5856168962Spjd } */ *ap; 5857168962Spjd{ 5858168962Spjd 5859168962Spjd return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 5860168962Spjd ap->a_ncookies, ap->a_cookies)); 5861168962Spjd} 5862168962Spjd 5863168962Spjdstatic int 5864168962Spjdzfs_freebsd_fsync(ap) 5865168962Spjd struct vop_fsync_args /* { 5866168962Spjd struct vnode *a_vp; 5867168962Spjd int a_waitfor; 5868168962Spjd struct thread *a_td; 5869168962Spjd } */ *ap; 5870168962Spjd{ 5871168962Spjd 5872168962Spjd vop_stdfsync(ap); 5873185029Spjd return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); 5874168962Spjd} 5875168962Spjd 5876168962Spjdstatic int 5877168962Spjdzfs_freebsd_getattr(ap) 5878168962Spjd struct vop_getattr_args /* { 5879168962Spjd struct vnode *a_vp; 5880168962Spjd struct vattr *a_vap; 5881168962Spjd struct ucred *a_cred; 5882168962Spjd } */ *ap; 5883168962Spjd{ 5884185029Spjd vattr_t *vap = ap->a_vap; 5885185029Spjd xvattr_t xvap; 5886185029Spjd u_long fflags = 0; 5887185029Spjd int error; 5888168962Spjd 5889185029Spjd xva_init(&xvap); 5890185029Spjd xvap.xva_vattr = *vap; 5891185029Spjd xvap.xva_vattr.va_mask |= AT_XVATTR; 5892185029Spjd 5893185029Spjd /* Convert chflags into ZFS-type flags. */ 5894185029Spjd /* XXX: what about SF_SETTABLE?. */ 5895185029Spjd XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 5896185029Spjd XVA_SET_REQ(&xvap, XAT_APPENDONLY); 5897185029Spjd XVA_SET_REQ(&xvap, XAT_NOUNLINK); 5898185029Spjd XVA_SET_REQ(&xvap, XAT_NODUMP); 5899185029Spjd error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 5900185029Spjd if (error != 0) 5901185029Spjd return (error); 5902185029Spjd 5903185029Spjd /* Convert ZFS xattr into chflags. */ 5904185029Spjd#define FLAG_CHECK(fflag, xflag, xfield) do { \ 5905185029Spjd if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 5906185029Spjd fflags |= (fflag); \ 5907185029Spjd} while (0) 5908185029Spjd FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 5909185029Spjd xvap.xva_xoptattrs.xoa_immutable); 5910185029Spjd FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 5911185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 5912185029Spjd FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 5913185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 5914185029Spjd FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 5915185029Spjd xvap.xva_xoptattrs.xoa_nodump); 5916185029Spjd#undef FLAG_CHECK 5917185029Spjd *vap = xvap.xva_vattr; 5918185029Spjd vap->va_flags = fflags; 5919185029Spjd return (0); 5920168962Spjd} 5921168962Spjd 5922168962Spjdstatic int 5923168962Spjdzfs_freebsd_setattr(ap) 5924168962Spjd struct vop_setattr_args /* { 5925168962Spjd struct vnode *a_vp; 5926168962Spjd struct vattr *a_vap; 5927168962Spjd struct ucred *a_cred; 5928168962Spjd } */ *ap; 5929168962Spjd{ 5930185172Spjd vnode_t *vp = ap->a_vp; 5931168962Spjd vattr_t *vap = ap->a_vap; 5932185172Spjd cred_t *cred = ap->a_cred; 5933185029Spjd xvattr_t xvap; 5934185029Spjd u_long fflags; 5935185029Spjd uint64_t zflags; 5936168962Spjd 5937168962Spjd vattr_init_mask(vap); 5938170044Spjd vap->va_mask &= ~AT_NOSET; 5939168962Spjd 5940185029Spjd xva_init(&xvap); 5941185029Spjd xvap.xva_vattr = *vap; 5942185029Spjd 5943219089Spjd zflags = VTOZ(vp)->z_pflags; 5944185172Spjd 5945185029Spjd if (vap->va_flags != VNOVAL) { 5946197683Sdelphij zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 5947185172Spjd int error; 5948185172Spjd 5949197683Sdelphij if (zfsvfs->z_use_fuids == B_FALSE) 5950197683Sdelphij return (EOPNOTSUPP); 5951197683Sdelphij 5952185029Spjd fflags = vap->va_flags; 5953185029Spjd if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_NODUMP)) != 0) 5954185029Spjd return (EOPNOTSUPP); 5955185172Spjd /* 5956185172Spjd * Unprivileged processes are not permitted to unset system 5957185172Spjd * flags, or modify flags if any system flags are set. 5958185172Spjd * Privileged non-jail processes may not modify system flags 5959185172Spjd * if securelevel > 0 and any existing system flags are set. 5960185172Spjd * Privileged jail processes behave like privileged non-jail 5961185172Spjd * processes if the security.jail.chflags_allowed sysctl is 5962185172Spjd * is non-zero; otherwise, they behave like unprivileged 5963185172Spjd * processes. 5964185172Spjd */ 5965197861Spjd if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 5966197861Spjd priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) { 5967185172Spjd if (zflags & 5968185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 5969185172Spjd error = securelevel_gt(cred, 0); 5970197861Spjd if (error != 0) 5971185172Spjd return (error); 5972185172Spjd } 5973185172Spjd } else { 5974197861Spjd /* 5975197861Spjd * Callers may only modify the file flags on objects they 5976197861Spjd * have VADMIN rights for. 5977197861Spjd */ 5978197861Spjd if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0) 5979197861Spjd return (error); 5980185172Spjd if (zflags & 5981185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 5982185172Spjd return (EPERM); 5983185172Spjd } 5984185172Spjd if (fflags & 5985185172Spjd (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 5986185172Spjd return (EPERM); 5987185172Spjd } 5988185172Spjd } 5989185029Spjd 5990185029Spjd#define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 5991185029Spjd if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 5992185029Spjd ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 5993185029Spjd XVA_SET_REQ(&xvap, (xflag)); \ 5994185029Spjd (xfield) = ((fflags & (fflag)) != 0); \ 5995185029Spjd } \ 5996185029Spjd} while (0) 5997185029Spjd /* Convert chflags into ZFS-type flags. */ 5998185029Spjd /* XXX: what about SF_SETTABLE?. */ 5999185029Spjd FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 6000185029Spjd xvap.xva_xoptattrs.xoa_immutable); 6001185029Spjd FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 6002185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 6003185029Spjd FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 6004185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 6005185029Spjd FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 6006185172Spjd xvap.xva_xoptattrs.xoa_nodump); 6007185029Spjd#undef FLAG_CHANGE 6008185029Spjd } 6009185172Spjd return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL)); 6010168962Spjd} 6011168962Spjd 6012168962Spjdstatic int 6013168962Spjdzfs_freebsd_rename(ap) 6014168962Spjd struct vop_rename_args /* { 6015168962Spjd struct vnode *a_fdvp; 6016168962Spjd struct vnode *a_fvp; 6017168962Spjd struct componentname *a_fcnp; 6018168962Spjd struct vnode *a_tdvp; 6019168962Spjd struct vnode *a_tvp; 6020168962Spjd struct componentname *a_tcnp; 6021168962Spjd } */ *ap; 6022168962Spjd{ 6023168962Spjd vnode_t *fdvp = ap->a_fdvp; 6024168962Spjd vnode_t *fvp = ap->a_fvp; 6025168962Spjd vnode_t *tdvp = ap->a_tdvp; 6026168962Spjd vnode_t *tvp = ap->a_tvp; 6027168962Spjd int error; 6028168962Spjd 6029192237Skmacy ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 6030192237Skmacy ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 6031168962Spjd 6032168962Spjd error = zfs_rename(fdvp, ap->a_fcnp->cn_nameptr, tdvp, 6033185029Spjd ap->a_tcnp->cn_nameptr, ap->a_fcnp->cn_cred, NULL, 0); 6034168962Spjd 6035168962Spjd if (tdvp == tvp) 6036168962Spjd VN_RELE(tdvp); 6037168962Spjd else 6038168962Spjd VN_URELE(tdvp); 6039168962Spjd if (tvp) 6040168962Spjd VN_URELE(tvp); 6041168962Spjd VN_RELE(fdvp); 6042168962Spjd VN_RELE(fvp); 6043168962Spjd 6044168962Spjd return (error); 6045168962Spjd} 6046168962Spjd 6047168962Spjdstatic int 6048168962Spjdzfs_freebsd_symlink(ap) 6049168962Spjd struct vop_symlink_args /* { 6050168962Spjd struct vnode *a_dvp; 6051168962Spjd struct vnode **a_vpp; 6052168962Spjd struct componentname *a_cnp; 6053168962Spjd struct vattr *a_vap; 6054168962Spjd char *a_target; 6055168962Spjd } */ *ap; 6056168962Spjd{ 6057168962Spjd struct componentname *cnp = ap->a_cnp; 6058168962Spjd vattr_t *vap = ap->a_vap; 6059168962Spjd 6060168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6061168962Spjd 6062168962Spjd vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 6063168962Spjd vattr_init_mask(vap); 6064168962Spjd 6065168962Spjd return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 6066168962Spjd ap->a_target, cnp->cn_cred, cnp->cn_thread)); 6067168962Spjd} 6068168962Spjd 6069168962Spjdstatic int 6070168962Spjdzfs_freebsd_readlink(ap) 6071168962Spjd struct vop_readlink_args /* { 6072168962Spjd struct vnode *a_vp; 6073168962Spjd struct uio *a_uio; 6074168962Spjd struct ucred *a_cred; 6075168962Spjd } */ *ap; 6076168962Spjd{ 6077168962Spjd 6078185029Spjd return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 6079168962Spjd} 6080168962Spjd 6081168962Spjdstatic int 6082168962Spjdzfs_freebsd_link(ap) 6083168962Spjd struct vop_link_args /* { 6084168962Spjd struct vnode *a_tdvp; 6085168962Spjd struct vnode *a_vp; 6086168962Spjd struct componentname *a_cnp; 6087168962Spjd } */ *ap; 6088168962Spjd{ 6089168962Spjd struct componentname *cnp = ap->a_cnp; 6090168962Spjd 6091168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6092168962Spjd 6093185029Spjd return (zfs_link(ap->a_tdvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); 6094168962Spjd} 6095168962Spjd 6096168962Spjdstatic int 6097168962Spjdzfs_freebsd_inactive(ap) 6098169170Spjd struct vop_inactive_args /* { 6099169170Spjd struct vnode *a_vp; 6100169170Spjd struct thread *a_td; 6101169170Spjd } */ *ap; 6102168962Spjd{ 6103168962Spjd vnode_t *vp = ap->a_vp; 6104168962Spjd 6105185029Spjd zfs_inactive(vp, ap->a_td->td_ucred, NULL); 6106168962Spjd return (0); 6107168962Spjd} 6108168962Spjd 6109185029Spjdstatic void 6110185029Spjdzfs_reclaim_complete(void *arg, int pending) 6111185029Spjd{ 6112185029Spjd znode_t *zp = arg; 6113185029Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6114185029Spjd 6115197133Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 6116219089Spjd if (zp->z_sa_hdl != NULL) { 6117197133Spjd ZFS_OBJ_HOLD_ENTER(zfsvfs, zp->z_id); 6118197133Spjd zfs_znode_dmu_fini(zp); 6119197133Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); 6120197133Spjd } 6121185029Spjd zfs_znode_free(zp); 6122197133Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 6123197133Spjd /* 6124197133Spjd * If the file system is being unmounted, there is a process waiting 6125197133Spjd * for us, wake it up. 6126197133Spjd */ 6127197133Spjd if (zfsvfs->z_unmounted) 6128197133Spjd wakeup_one(zfsvfs); 6129185029Spjd} 6130185029Spjd 6131168962Spjdstatic int 6132168962Spjdzfs_freebsd_reclaim(ap) 6133168962Spjd struct vop_reclaim_args /* { 6134168962Spjd struct vnode *a_vp; 6135168962Spjd struct thread *a_td; 6136168962Spjd } */ *ap; 6137168962Spjd{ 6138169170Spjd vnode_t *vp = ap->a_vp; 6139168962Spjd znode_t *zp = VTOZ(vp); 6140197133Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6141219089Spjd boolean_t rlocked; 6142168962Spjd 6143219089Spjd rlocked = rw_tryenter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 6144197133Spjd 6145169025Spjd ASSERT(zp != NULL); 6146169025Spjd 6147168962Spjd /* 6148168962Spjd * Destroy the vm object and flush associated pages. 6149168962Spjd */ 6150168962Spjd vnode_destroy_vobject(vp); 6151169025Spjd 6152169025Spjd mutex_enter(&zp->z_lock); 6153197153Spjd zp->z_vnode = NULL; 6154196301Spjd mutex_exit(&zp->z_lock); 6155196301Spjd 6156219089Spjd if (zp->z_unlinked) { 6157196301Spjd ; /* Do nothing. */ 6158219089Spjd } else if (!rlocked) { 6159219089Spjd TASK_INIT(&zp->z_task, 0, zfs_reclaim_complete, zp); 6160219089Spjd taskqueue_enqueue(taskqueue_thread, &zp->z_task); 6161219089Spjd } else if (zp->z_sa_hdl == NULL) { 6162196301Spjd zfs_znode_free(zp); 6163219089Spjd } else /* if (!zp->z_unlinked && zp->z_dbuf != NULL) */ { 6164185029Spjd int locked; 6165185029Spjd 6166185029Spjd locked = MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)) ? 2 : 6167185029Spjd ZFS_OBJ_HOLD_TRYENTER(zfsvfs, zp->z_id); 6168185029Spjd if (locked == 0) { 6169185029Spjd /* 6170185029Spjd * Lock can't be obtained due to deadlock possibility, 6171185029Spjd * so defer znode destruction. 6172185029Spjd */ 6173185029Spjd TASK_INIT(&zp->z_task, 0, zfs_reclaim_complete, zp); 6174185029Spjd taskqueue_enqueue(taskqueue_thread, &zp->z_task); 6175185029Spjd } else { 6176185029Spjd zfs_znode_dmu_fini(zp); 6177185029Spjd if (locked == 1) 6178185029Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); 6179185029Spjd zfs_znode_free(zp); 6180185029Spjd } 6181169025Spjd } 6182168962Spjd VI_LOCK(vp); 6183168962Spjd vp->v_data = NULL; 6184171567Spjd ASSERT(vp->v_holdcnt >= 1); 6185171316Sdfr VI_UNLOCK(vp); 6186219089Spjd if (rlocked) 6187219089Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 6188168962Spjd return (0); 6189168962Spjd} 6190168962Spjd 6191168962Spjdstatic int 6192168962Spjdzfs_freebsd_fid(ap) 6193168962Spjd struct vop_fid_args /* { 6194168962Spjd struct vnode *a_vp; 6195168962Spjd struct fid *a_fid; 6196168962Spjd } */ *ap; 6197168962Spjd{ 6198168962Spjd 6199185029Spjd return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 6200168962Spjd} 6201168962Spjd 6202168962Spjdstatic int 6203168962Spjdzfs_freebsd_pathconf(ap) 6204168962Spjd struct vop_pathconf_args /* { 6205168962Spjd struct vnode *a_vp; 6206168962Spjd int a_name; 6207168962Spjd register_t *a_retval; 6208168962Spjd } */ *ap; 6209168962Spjd{ 6210168962Spjd ulong_t val; 6211168962Spjd int error; 6212168962Spjd 6213185029Spjd error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); 6214168962Spjd if (error == 0) 6215168962Spjd *ap->a_retval = val; 6216168962Spjd else if (error == EOPNOTSUPP) 6217168962Spjd error = vop_stdpathconf(ap); 6218168962Spjd return (error); 6219168962Spjd} 6220168962Spjd 6221196949Straszstatic int 6222196949Straszzfs_freebsd_fifo_pathconf(ap) 6223196949Strasz struct vop_pathconf_args /* { 6224196949Strasz struct vnode *a_vp; 6225196949Strasz int a_name; 6226196949Strasz register_t *a_retval; 6227196949Strasz } */ *ap; 6228196949Strasz{ 6229196949Strasz 6230196949Strasz switch (ap->a_name) { 6231196949Strasz case _PC_ACL_EXTENDED: 6232196949Strasz case _PC_ACL_NFS4: 6233196949Strasz case _PC_ACL_PATH_MAX: 6234196949Strasz case _PC_MAC_PRESENT: 6235196949Strasz return (zfs_freebsd_pathconf(ap)); 6236196949Strasz default: 6237196949Strasz return (fifo_specops.vop_pathconf(ap)); 6238196949Strasz } 6239196949Strasz} 6240196949Strasz 6241185029Spjd/* 6242185029Spjd * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 6243185029Spjd * extended attribute name: 6244185029Spjd * 6245185029Spjd * NAMESPACE PREFIX 6246185029Spjd * system freebsd:system: 6247185029Spjd * user (none, can be used to access ZFS fsattr(5) attributes 6248185029Spjd * created on Solaris) 6249185029Spjd */ 6250185029Spjdstatic int 6251185029Spjdzfs_create_attrname(int attrnamespace, const char *name, char *attrname, 6252185029Spjd size_t size) 6253185029Spjd{ 6254185029Spjd const char *namespace, *prefix, *suffix; 6255185029Spjd 6256185029Spjd /* We don't allow '/' character in attribute name. */ 6257185029Spjd if (strchr(name, '/') != NULL) 6258185029Spjd return (EINVAL); 6259185029Spjd /* We don't allow attribute names that start with "freebsd:" string. */ 6260185029Spjd if (strncmp(name, "freebsd:", 8) == 0) 6261185029Spjd return (EINVAL); 6262185029Spjd 6263185029Spjd bzero(attrname, size); 6264185029Spjd 6265185029Spjd switch (attrnamespace) { 6266185029Spjd case EXTATTR_NAMESPACE_USER: 6267185029Spjd#if 0 6268185029Spjd prefix = "freebsd:"; 6269185029Spjd namespace = EXTATTR_NAMESPACE_USER_STRING; 6270185029Spjd suffix = ":"; 6271185029Spjd#else 6272185029Spjd /* 6273185029Spjd * This is the default namespace by which we can access all 6274185029Spjd * attributes created on Solaris. 6275185029Spjd */ 6276185029Spjd prefix = namespace = suffix = ""; 6277185029Spjd#endif 6278185029Spjd break; 6279185029Spjd case EXTATTR_NAMESPACE_SYSTEM: 6280185029Spjd prefix = "freebsd:"; 6281185029Spjd namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 6282185029Spjd suffix = ":"; 6283185029Spjd break; 6284185029Spjd case EXTATTR_NAMESPACE_EMPTY: 6285185029Spjd default: 6286185029Spjd return (EINVAL); 6287185029Spjd } 6288185029Spjd if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 6289185029Spjd name) >= size) { 6290185029Spjd return (ENAMETOOLONG); 6291185029Spjd } 6292185029Spjd return (0); 6293185029Spjd} 6294185029Spjd 6295185029Spjd/* 6296185029Spjd * Vnode operating to retrieve a named extended attribute. 6297185029Spjd */ 6298185029Spjdstatic int 6299185029Spjdzfs_getextattr(struct vop_getextattr_args *ap) 6300185029Spjd/* 6301185029Spjdvop_getextattr { 6302185029Spjd IN struct vnode *a_vp; 6303185029Spjd IN int a_attrnamespace; 6304185029Spjd IN const char *a_name; 6305185029Spjd INOUT struct uio *a_uio; 6306185029Spjd OUT size_t *a_size; 6307185029Spjd IN struct ucred *a_cred; 6308185029Spjd IN struct thread *a_td; 6309185029Spjd}; 6310185029Spjd*/ 6311185029Spjd{ 6312185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6313185029Spjd struct thread *td = ap->a_td; 6314185029Spjd struct nameidata nd; 6315185029Spjd char attrname[255]; 6316185029Spjd struct vattr va; 6317185029Spjd vnode_t *xvp = NULL, *vp; 6318185029Spjd int error, flags; 6319185029Spjd 6320195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6321195785Strasz ap->a_cred, ap->a_td, VREAD); 6322195785Strasz if (error != 0) 6323195785Strasz return (error); 6324195785Strasz 6325185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6326185029Spjd sizeof(attrname)); 6327185029Spjd if (error != 0) 6328185029Spjd return (error); 6329185029Spjd 6330185029Spjd ZFS_ENTER(zfsvfs); 6331185029Spjd 6332185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6333185029Spjd LOOKUP_XATTR); 6334185029Spjd if (error != 0) { 6335185029Spjd ZFS_EXIT(zfsvfs); 6336185029Spjd return (error); 6337185029Spjd } 6338185029Spjd 6339185029Spjd flags = FREAD; 6340185029Spjd NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, attrname, 6341185029Spjd xvp, td); 6342194586Skib error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL); 6343185029Spjd vp = nd.ni_vp; 6344185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6345185029Spjd if (error != 0) { 6346196303Spjd ZFS_EXIT(zfsvfs); 6347195785Strasz if (error == ENOENT) 6348195785Strasz error = ENOATTR; 6349185029Spjd return (error); 6350185029Spjd } 6351185029Spjd 6352185029Spjd if (ap->a_size != NULL) { 6353185029Spjd error = VOP_GETATTR(vp, &va, ap->a_cred); 6354185029Spjd if (error == 0) 6355185029Spjd *ap->a_size = (size_t)va.va_size; 6356185029Spjd } else if (ap->a_uio != NULL) 6357224605Smm error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 6358185029Spjd 6359185029Spjd VOP_UNLOCK(vp, 0); 6360185029Spjd vn_close(vp, flags, ap->a_cred, td); 6361185029Spjd ZFS_EXIT(zfsvfs); 6362185029Spjd 6363185029Spjd return (error); 6364185029Spjd} 6365185029Spjd 6366185029Spjd/* 6367185029Spjd * Vnode operation to remove a named attribute. 6368185029Spjd */ 6369185029Spjdint 6370185029Spjdzfs_deleteextattr(struct vop_deleteextattr_args *ap) 6371185029Spjd/* 6372185029Spjdvop_deleteextattr { 6373185029Spjd IN struct vnode *a_vp; 6374185029Spjd IN int a_attrnamespace; 6375185029Spjd IN const char *a_name; 6376185029Spjd IN struct ucred *a_cred; 6377185029Spjd IN struct thread *a_td; 6378185029Spjd}; 6379185029Spjd*/ 6380185029Spjd{ 6381185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6382185029Spjd struct thread *td = ap->a_td; 6383185029Spjd struct nameidata nd; 6384185029Spjd char attrname[255]; 6385185029Spjd struct vattr va; 6386185029Spjd vnode_t *xvp = NULL, *vp; 6387185029Spjd int error, flags; 6388185029Spjd 6389195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6390195785Strasz ap->a_cred, ap->a_td, VWRITE); 6391195785Strasz if (error != 0) 6392195785Strasz return (error); 6393195785Strasz 6394185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6395185029Spjd sizeof(attrname)); 6396185029Spjd if (error != 0) 6397185029Spjd return (error); 6398185029Spjd 6399185029Spjd ZFS_ENTER(zfsvfs); 6400185029Spjd 6401185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6402185029Spjd LOOKUP_XATTR); 6403185029Spjd if (error != 0) { 6404185029Spjd ZFS_EXIT(zfsvfs); 6405185029Spjd return (error); 6406185029Spjd } 6407185029Spjd 6408185029Spjd NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF | MPSAFE, 6409185029Spjd UIO_SYSSPACE, attrname, xvp, td); 6410185029Spjd error = namei(&nd); 6411185029Spjd vp = nd.ni_vp; 6412185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6413185029Spjd if (error != 0) { 6414196303Spjd ZFS_EXIT(zfsvfs); 6415195785Strasz if (error == ENOENT) 6416195785Strasz error = ENOATTR; 6417185029Spjd return (error); 6418185029Spjd } 6419185029Spjd error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 6420185029Spjd 6421185029Spjd vput(nd.ni_dvp); 6422185029Spjd if (vp == nd.ni_dvp) 6423185029Spjd vrele(vp); 6424185029Spjd else 6425185029Spjd vput(vp); 6426185029Spjd ZFS_EXIT(zfsvfs); 6427185029Spjd 6428185029Spjd return (error); 6429185029Spjd} 6430185029Spjd 6431185029Spjd/* 6432185029Spjd * Vnode operation to set a named attribute. 6433185029Spjd */ 6434185029Spjdstatic int 6435185029Spjdzfs_setextattr(struct vop_setextattr_args *ap) 6436185029Spjd/* 6437185029Spjdvop_setextattr { 6438185029Spjd IN struct vnode *a_vp; 6439185029Spjd IN int a_attrnamespace; 6440185029Spjd IN const char *a_name; 6441185029Spjd INOUT struct uio *a_uio; 6442185029Spjd IN struct ucred *a_cred; 6443185029Spjd IN struct thread *a_td; 6444185029Spjd}; 6445185029Spjd*/ 6446185029Spjd{ 6447185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6448185029Spjd struct thread *td = ap->a_td; 6449185029Spjd struct nameidata nd; 6450185029Spjd char attrname[255]; 6451185029Spjd struct vattr va; 6452185029Spjd vnode_t *xvp = NULL, *vp; 6453185029Spjd int error, flags; 6454185029Spjd 6455195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6456195785Strasz ap->a_cred, ap->a_td, VWRITE); 6457195785Strasz if (error != 0) 6458195785Strasz return (error); 6459195785Strasz 6460185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6461185029Spjd sizeof(attrname)); 6462185029Spjd if (error != 0) 6463185029Spjd return (error); 6464185029Spjd 6465185029Spjd ZFS_ENTER(zfsvfs); 6466185029Spjd 6467185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6468195785Strasz LOOKUP_XATTR | CREATE_XATTR_DIR); 6469185029Spjd if (error != 0) { 6470185029Spjd ZFS_EXIT(zfsvfs); 6471185029Spjd return (error); 6472185029Spjd } 6473185029Spjd 6474185029Spjd flags = FFLAGS(O_WRONLY | O_CREAT); 6475185029Spjd NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, attrname, 6476185029Spjd xvp, td); 6477194586Skib error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL); 6478185029Spjd vp = nd.ni_vp; 6479185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6480185029Spjd if (error != 0) { 6481185029Spjd ZFS_EXIT(zfsvfs); 6482185029Spjd return (error); 6483185029Spjd } 6484185029Spjd 6485185029Spjd VATTR_NULL(&va); 6486185029Spjd va.va_size = 0; 6487185029Spjd error = VOP_SETATTR(vp, &va, ap->a_cred); 6488185029Spjd if (error == 0) 6489185029Spjd VOP_WRITE(vp, ap->a_uio, IO_UNIT | IO_SYNC, ap->a_cred); 6490185029Spjd 6491185029Spjd VOP_UNLOCK(vp, 0); 6492185029Spjd vn_close(vp, flags, ap->a_cred, td); 6493185029Spjd ZFS_EXIT(zfsvfs); 6494185029Spjd 6495185029Spjd return (error); 6496185029Spjd} 6497185029Spjd 6498185029Spjd/* 6499185029Spjd * Vnode operation to retrieve extended attributes on a vnode. 6500185029Spjd */ 6501185029Spjdstatic int 6502185029Spjdzfs_listextattr(struct vop_listextattr_args *ap) 6503185029Spjd/* 6504185029Spjdvop_listextattr { 6505185029Spjd IN struct vnode *a_vp; 6506185029Spjd IN int a_attrnamespace; 6507185029Spjd INOUT struct uio *a_uio; 6508185029Spjd OUT size_t *a_size; 6509185029Spjd IN struct ucred *a_cred; 6510185029Spjd IN struct thread *a_td; 6511185029Spjd}; 6512185029Spjd*/ 6513185029Spjd{ 6514185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6515185029Spjd struct thread *td = ap->a_td; 6516185029Spjd struct nameidata nd; 6517185029Spjd char attrprefix[16]; 6518185029Spjd u_char dirbuf[sizeof(struct dirent)]; 6519185029Spjd struct dirent *dp; 6520185029Spjd struct iovec aiov; 6521185029Spjd struct uio auio, *uio = ap->a_uio; 6522185029Spjd size_t *sizep = ap->a_size; 6523185029Spjd size_t plen; 6524185029Spjd vnode_t *xvp = NULL, *vp; 6525185029Spjd int done, error, eof, pos; 6526185029Spjd 6527195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6528195785Strasz ap->a_cred, ap->a_td, VREAD); 6529196303Spjd if (error != 0) 6530195785Strasz return (error); 6531195785Strasz 6532185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 6533185029Spjd sizeof(attrprefix)); 6534185029Spjd if (error != 0) 6535185029Spjd return (error); 6536185029Spjd plen = strlen(attrprefix); 6537185029Spjd 6538185029Spjd ZFS_ENTER(zfsvfs); 6539185029Spjd 6540195822Strasz if (sizep != NULL) 6541195822Strasz *sizep = 0; 6542195822Strasz 6543185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6544185029Spjd LOOKUP_XATTR); 6545185029Spjd if (error != 0) { 6546196303Spjd ZFS_EXIT(zfsvfs); 6547195785Strasz /* 6548195785Strasz * ENOATTR means that the EA directory does not yet exist, 6549195785Strasz * i.e. there are no extended attributes there. 6550195785Strasz */ 6551195785Strasz if (error == ENOATTR) 6552195785Strasz error = 0; 6553185029Spjd return (error); 6554185029Spjd } 6555185029Spjd 6556188588Sjhb NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE, 6557188588Sjhb UIO_SYSSPACE, ".", xvp, td); 6558185029Spjd error = namei(&nd); 6559185029Spjd vp = nd.ni_vp; 6560185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6561185029Spjd if (error != 0) { 6562185029Spjd ZFS_EXIT(zfsvfs); 6563185029Spjd return (error); 6564185029Spjd } 6565185029Spjd 6566185029Spjd auio.uio_iov = &aiov; 6567185029Spjd auio.uio_iovcnt = 1; 6568185029Spjd auio.uio_segflg = UIO_SYSSPACE; 6569185029Spjd auio.uio_td = td; 6570185029Spjd auio.uio_rw = UIO_READ; 6571185029Spjd auio.uio_offset = 0; 6572185029Spjd 6573185029Spjd do { 6574185029Spjd u_char nlen; 6575185029Spjd 6576185029Spjd aiov.iov_base = (void *)dirbuf; 6577185029Spjd aiov.iov_len = sizeof(dirbuf); 6578185029Spjd auio.uio_resid = sizeof(dirbuf); 6579185029Spjd error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 6580185029Spjd done = sizeof(dirbuf) - auio.uio_resid; 6581185029Spjd if (error != 0) 6582185029Spjd break; 6583185029Spjd for (pos = 0; pos < done;) { 6584185029Spjd dp = (struct dirent *)(dirbuf + pos); 6585185029Spjd pos += dp->d_reclen; 6586185029Spjd /* 6587185029Spjd * XXX: Temporarily we also accept DT_UNKNOWN, as this 6588185029Spjd * is what we get when attribute was created on Solaris. 6589185029Spjd */ 6590185029Spjd if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 6591185029Spjd continue; 6592185029Spjd if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) 6593185029Spjd continue; 6594185029Spjd else if (strncmp(dp->d_name, attrprefix, plen) != 0) 6595185029Spjd continue; 6596185029Spjd nlen = dp->d_namlen - plen; 6597185029Spjd if (sizep != NULL) 6598185029Spjd *sizep += 1 + nlen; 6599185029Spjd else if (uio != NULL) { 6600185029Spjd /* 6601185029Spjd * Format of extattr name entry is one byte for 6602185029Spjd * length and the rest for name. 6603185029Spjd */ 6604185029Spjd error = uiomove(&nlen, 1, uio->uio_rw, uio); 6605185029Spjd if (error == 0) { 6606185029Spjd error = uiomove(dp->d_name + plen, nlen, 6607185029Spjd uio->uio_rw, uio); 6608185029Spjd } 6609185029Spjd if (error != 0) 6610185029Spjd break; 6611185029Spjd } 6612185029Spjd } 6613185029Spjd } while (!eof && error == 0); 6614185029Spjd 6615185029Spjd vput(vp); 6616185029Spjd ZFS_EXIT(zfsvfs); 6617185029Spjd 6618185029Spjd return (error); 6619185029Spjd} 6620185029Spjd 6621192800Straszint 6622192800Straszzfs_freebsd_getacl(ap) 6623192800Strasz struct vop_getacl_args /* { 6624192800Strasz struct vnode *vp; 6625192800Strasz acl_type_t type; 6626192800Strasz struct acl *aclp; 6627192800Strasz struct ucred *cred; 6628192800Strasz struct thread *td; 6629192800Strasz } */ *ap; 6630192800Strasz{ 6631192800Strasz int error; 6632192800Strasz vsecattr_t vsecattr; 6633192800Strasz 6634192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 6635197435Strasz return (EINVAL); 6636192800Strasz 6637192800Strasz vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 6638192800Strasz if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL)) 6639192800Strasz return (error); 6640192800Strasz 6641192800Strasz error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt); 6642196303Spjd if (vsecattr.vsa_aclentp != NULL) 6643196303Spjd kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 6644192800Strasz 6645196303Spjd return (error); 6646192800Strasz} 6647192800Strasz 6648192800Straszint 6649192800Straszzfs_freebsd_setacl(ap) 6650192800Strasz struct vop_setacl_args /* { 6651192800Strasz struct vnode *vp; 6652192800Strasz acl_type_t type; 6653192800Strasz struct acl *aclp; 6654192800Strasz struct ucred *cred; 6655192800Strasz struct thread *td; 6656192800Strasz } */ *ap; 6657192800Strasz{ 6658192800Strasz int error; 6659192800Strasz vsecattr_t vsecattr; 6660192800Strasz int aclbsize; /* size of acl list in bytes */ 6661192800Strasz aclent_t *aaclp; 6662192800Strasz 6663192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 6664197435Strasz return (EINVAL); 6665192800Strasz 6666192800Strasz if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 6667192800Strasz return (EINVAL); 6668192800Strasz 6669192800Strasz /* 6670196949Strasz * With NFSv4 ACLs, chmod(2) may need to add additional entries, 6671192800Strasz * splitting every entry into two and appending "canonical six" 6672192800Strasz * entries at the end. Don't allow for setting an ACL that would 6673192800Strasz * cause chmod(2) to run out of ACL entries. 6674192800Strasz */ 6675192800Strasz if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 6676192800Strasz return (ENOSPC); 6677192800Strasz 6678208030Strasz error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 6679208030Strasz if (error != 0) 6680208030Strasz return (error); 6681208030Strasz 6682192800Strasz vsecattr.vsa_mask = VSA_ACE; 6683192800Strasz aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t); 6684192800Strasz vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 6685192800Strasz aaclp = vsecattr.vsa_aclentp; 6686192800Strasz vsecattr.vsa_aclentsz = aclbsize; 6687192800Strasz 6688192800Strasz aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 6689192800Strasz error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL); 6690192800Strasz kmem_free(aaclp, aclbsize); 6691192800Strasz 6692192800Strasz return (error); 6693192800Strasz} 6694192800Strasz 6695192800Straszint 6696192800Straszzfs_freebsd_aclcheck(ap) 6697192800Strasz struct vop_aclcheck_args /* { 6698192800Strasz struct vnode *vp; 6699192800Strasz acl_type_t type; 6700192800Strasz struct acl *aclp; 6701192800Strasz struct ucred *cred; 6702192800Strasz struct thread *td; 6703192800Strasz } */ *ap; 6704192800Strasz{ 6705192800Strasz 6706192800Strasz return (EOPNOTSUPP); 6707192800Strasz} 6708192800Strasz 6709168404Spjdstruct vop_vector zfs_vnodeops; 6710168404Spjdstruct vop_vector zfs_fifoops; 6711209962Smmstruct vop_vector zfs_shareops; 6712168404Spjd 6713168404Spjdstruct vop_vector zfs_vnodeops = { 6714185029Spjd .vop_default = &default_vnodeops, 6715185029Spjd .vop_inactive = zfs_freebsd_inactive, 6716185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 6717185029Spjd .vop_access = zfs_freebsd_access, 6718168404Spjd#ifdef FREEBSD_NAMECACHE 6719185029Spjd .vop_lookup = vfs_cache_lookup, 6720185029Spjd .vop_cachedlookup = zfs_freebsd_lookup, 6721168404Spjd#else 6722185029Spjd .vop_lookup = zfs_freebsd_lookup, 6723168404Spjd#endif 6724185029Spjd .vop_getattr = zfs_freebsd_getattr, 6725185029Spjd .vop_setattr = zfs_freebsd_setattr, 6726185029Spjd .vop_create = zfs_freebsd_create, 6727185029Spjd .vop_mknod = zfs_freebsd_create, 6728185029Spjd .vop_mkdir = zfs_freebsd_mkdir, 6729185029Spjd .vop_readdir = zfs_freebsd_readdir, 6730185029Spjd .vop_fsync = zfs_freebsd_fsync, 6731185029Spjd .vop_open = zfs_freebsd_open, 6732185029Spjd .vop_close = zfs_freebsd_close, 6733185029Spjd .vop_rmdir = zfs_freebsd_rmdir, 6734185029Spjd .vop_ioctl = zfs_freebsd_ioctl, 6735185029Spjd .vop_link = zfs_freebsd_link, 6736185029Spjd .vop_symlink = zfs_freebsd_symlink, 6737185029Spjd .vop_readlink = zfs_freebsd_readlink, 6738185029Spjd .vop_read = zfs_freebsd_read, 6739185029Spjd .vop_write = zfs_freebsd_write, 6740185029Spjd .vop_remove = zfs_freebsd_remove, 6741185029Spjd .vop_rename = zfs_freebsd_rename, 6742185029Spjd .vop_pathconf = zfs_freebsd_pathconf, 6743185029Spjd .vop_bmap = VOP_EOPNOTSUPP, 6744185029Spjd .vop_fid = zfs_freebsd_fid, 6745185029Spjd .vop_getextattr = zfs_getextattr, 6746185029Spjd .vop_deleteextattr = zfs_deleteextattr, 6747185029Spjd .vop_setextattr = zfs_setextattr, 6748185029Spjd .vop_listextattr = zfs_listextattr, 6749192800Strasz .vop_getacl = zfs_freebsd_getacl, 6750192800Strasz .vop_setacl = zfs_freebsd_setacl, 6751192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 6752213937Savg .vop_getpages = zfs_freebsd_getpages, 6753168404Spjd}; 6754168404Spjd 6755169170Spjdstruct vop_vector zfs_fifoops = { 6756185029Spjd .vop_default = &fifo_specops, 6757200162Skib .vop_fsync = zfs_freebsd_fsync, 6758185029Spjd .vop_access = zfs_freebsd_access, 6759185029Spjd .vop_getattr = zfs_freebsd_getattr, 6760185029Spjd .vop_inactive = zfs_freebsd_inactive, 6761185029Spjd .vop_read = VOP_PANIC, 6762185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 6763185029Spjd .vop_setattr = zfs_freebsd_setattr, 6764185029Spjd .vop_write = VOP_PANIC, 6765196949Strasz .vop_pathconf = zfs_freebsd_fifo_pathconf, 6766185029Spjd .vop_fid = zfs_freebsd_fid, 6767192800Strasz .vop_getacl = zfs_freebsd_getacl, 6768192800Strasz .vop_setacl = zfs_freebsd_setacl, 6769192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 6770168404Spjd}; 6771209962Smm 6772209962Smm/* 6773209962Smm * special share hidden files vnode operations template 6774209962Smm */ 6775209962Smmstruct vop_vector zfs_shareops = { 6776209962Smm .vop_default = &default_vnodeops, 6777209962Smm .vop_access = zfs_freebsd_access, 6778209962Smm .vop_inactive = zfs_freebsd_inactive, 6779209962Smm .vop_reclaim = zfs_freebsd_reclaim, 6780209962Smm .vop_fid = zfs_freebsd_fid, 6781209962Smm .vop_pathconf = zfs_freebsd_pathconf, 6782209962Smm}; 6783