zfs_vnops.c revision 274337
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22212694Smm * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23271536Sdelphij * Copyright (c) 2012, 2014 by Delphix. All rights reserved. 24262990Sdelphij * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 25168404Spjd */ 26168404Spjd 27169195Spjd/* Portions Copyright 2007 Jeremy Teo */ 28219089Spjd/* Portions Copyright 2010 Robert Milkowski */ 29169195Spjd 30168404Spjd#include <sys/types.h> 31168404Spjd#include <sys/param.h> 32168404Spjd#include <sys/time.h> 33168404Spjd#include <sys/systm.h> 34168404Spjd#include <sys/sysmacros.h> 35168404Spjd#include <sys/resource.h> 36168404Spjd#include <sys/vfs.h> 37248084Sattilio#include <sys/vm.h> 38168404Spjd#include <sys/vnode.h> 39168404Spjd#include <sys/file.h> 40168404Spjd#include <sys/stat.h> 41168404Spjd#include <sys/kmem.h> 42168404Spjd#include <sys/taskq.h> 43168404Spjd#include <sys/uio.h> 44168404Spjd#include <sys/atomic.h> 45168404Spjd#include <sys/namei.h> 46168404Spjd#include <sys/mman.h> 47168404Spjd#include <sys/cmn_err.h> 48168404Spjd#include <sys/errno.h> 49168404Spjd#include <sys/unistd.h> 50168404Spjd#include <sys/zfs_dir.h> 51168404Spjd#include <sys/zfs_ioctl.h> 52168404Spjd#include <sys/fs/zfs.h> 53168404Spjd#include <sys/dmu.h> 54219089Spjd#include <sys/dmu_objset.h> 55168404Spjd#include <sys/spa.h> 56168404Spjd#include <sys/txg.h> 57168404Spjd#include <sys/dbuf.h> 58168404Spjd#include <sys/zap.h> 59219089Spjd#include <sys/sa.h> 60168404Spjd#include <sys/dirent.h> 61168962Spjd#include <sys/policy.h> 62168962Spjd#include <sys/sunddi.h> 63168404Spjd#include <sys/filio.h> 64209962Smm#include <sys/sid.h> 65168404Spjd#include <sys/zfs_ctldir.h> 66185029Spjd#include <sys/zfs_fuid.h> 67219089Spjd#include <sys/zfs_sa.h> 68168404Spjd#include <sys/dnlc.h> 69168404Spjd#include <sys/zfs_rlock.h> 70185029Spjd#include <sys/extdirent.h> 71185029Spjd#include <sys/kidmap.h> 72168404Spjd#include <sys/bio.h> 73168404Spjd#include <sys/buf.h> 74168404Spjd#include <sys/sched.h> 75192800Strasz#include <sys/acl.h> 76239077Smarius#include <vm/vm_param.h> 77215401Savg#include <vm/vm_pageout.h> 78168404Spjd 79168404Spjd/* 80168404Spjd * Programming rules. 81168404Spjd * 82168404Spjd * Each vnode op performs some logical unit of work. To do this, the ZPL must 83168404Spjd * properly lock its in-core state, create a DMU transaction, do the work, 84168404Spjd * record this work in the intent log (ZIL), commit the DMU transaction, 85185029Spjd * and wait for the intent log to commit if it is a synchronous operation. 86185029Spjd * Moreover, the vnode ops must work in both normal and log replay context. 87168404Spjd * The ordering of events is important to avoid deadlocks and references 88168404Spjd * to freed memory. The example below illustrates the following Big Rules: 89168404Spjd * 90251631Sdelphij * (1) A check must be made in each zfs thread for a mounted file system. 91168404Spjd * This is done avoiding races using ZFS_ENTER(zfsvfs). 92251631Sdelphij * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 93251631Sdelphij * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 94251631Sdelphij * can return EIO from the calling function. 95168404Spjd * 96168404Spjd * (2) VN_RELE() should always be the last thing except for zil_commit() 97168404Spjd * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 98168404Spjd * First, if it's the last reference, the vnode/znode 99168404Spjd * can be freed, so the zp may point to freed memory. Second, the last 100168404Spjd * reference will call zfs_zinactive(), which may induce a lot of work -- 101168404Spjd * pushing cached pages (which acquires range locks) and syncing out 102168404Spjd * cached atime changes. Third, zfs_zinactive() may require a new tx, 103168404Spjd * which could deadlock the system if you were already holding one. 104191900Skmacy * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 105168404Spjd * 106168404Spjd * (3) All range locks must be grabbed before calling dmu_tx_assign(), 107168404Spjd * as they can span dmu_tx_assign() calls. 108168404Spjd * 109258720Savg * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 110258720Savg * dmu_tx_assign(). This is critical because we don't want to block 111258720Savg * while holding locks. 112168404Spjd * 113258720Savg * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 114258720Savg * reduces lock contention and CPU usage when we must wait (note that if 115258720Savg * throughput is constrained by the storage, nearly every transaction 116258720Savg * must wait). 117258720Savg * 118258720Savg * Note, in particular, that if a lock is sometimes acquired before 119258720Savg * the tx assigns, and sometimes after (e.g. z_lock), then failing 120258720Savg * to use a non-blocking assign can deadlock the system. The scenario: 121258720Savg * 122168404Spjd * Thread A has grabbed a lock before calling dmu_tx_assign(). 123168404Spjd * Thread B is in an already-assigned tx, and blocks for this lock. 124168404Spjd * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 125168404Spjd * forever, because the previous txg can't quiesce until B's tx commits. 126168404Spjd * 127168404Spjd * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 128258632Savg * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 129258632Savg * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT, 130258632Savg * to indicate that this operation has already called dmu_tx_wait(). 131258632Savg * This will ensure that we don't retry forever, waiting a short bit 132258632Savg * each time. 133168404Spjd * 134168404Spjd * (5) If the operation succeeded, generate the intent log entry for it 135168404Spjd * before dropping locks. This ensures that the ordering of events 136168404Spjd * in the intent log matches the order in which they actually occurred. 137251631Sdelphij * During ZIL replay the zfs_log_* functions will update the sequence 138209962Smm * number to indicate the zil transaction has replayed. 139168404Spjd * 140168404Spjd * (6) At the end of each vnode op, the DMU tx must always commit, 141168404Spjd * regardless of whether there were any errors. 142168404Spjd * 143219089Spjd * (7) After dropping all locks, invoke zil_commit(zilog, foid) 144168404Spjd * to ensure that synchronous semantics are provided when necessary. 145168404Spjd * 146168404Spjd * In general, this is how things should be ordered in each vnode op: 147168404Spjd * 148168404Spjd * ZFS_ENTER(zfsvfs); // exit if unmounted 149168404Spjd * top: 150168404Spjd * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 151168404Spjd * rw_enter(...); // grab any other locks you need 152168404Spjd * tx = dmu_tx_create(...); // get DMU tx 153168404Spjd * dmu_tx_hold_*(); // hold each object you might modify 154258632Savg * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 155168404Spjd * if (error) { 156168404Spjd * rw_exit(...); // drop locks 157168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 158168404Spjd * VN_RELE(...); // release held vnodes 159209962Smm * if (error == ERESTART) { 160258632Savg * waited = B_TRUE; 161168404Spjd * dmu_tx_wait(tx); 162168404Spjd * dmu_tx_abort(tx); 163168404Spjd * goto top; 164168404Spjd * } 165168404Spjd * dmu_tx_abort(tx); // abort DMU tx 166168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 167168404Spjd * return (error); // really out of space 168168404Spjd * } 169168404Spjd * error = do_real_work(); // do whatever this VOP does 170168404Spjd * if (error == 0) 171168404Spjd * zfs_log_*(...); // on success, make ZIL entry 172168404Spjd * dmu_tx_commit(tx); // commit DMU tx -- error or not 173168404Spjd * rw_exit(...); // drop locks 174168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 175168404Spjd * VN_RELE(...); // release held vnodes 176219089Spjd * zil_commit(zilog, foid); // synchronous when necessary 177168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 178168404Spjd * return (error); // done, report error 179168404Spjd */ 180185029Spjd 181168404Spjd/* ARGSUSED */ 182168404Spjdstatic int 183185029Spjdzfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 184168404Spjd{ 185168962Spjd znode_t *zp = VTOZ(*vpp); 186209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 187168404Spjd 188209962Smm ZFS_ENTER(zfsvfs); 189209962Smm ZFS_VERIFY_ZP(zp); 190209962Smm 191219089Spjd if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 192185029Spjd ((flag & FAPPEND) == 0)) { 193209962Smm ZFS_EXIT(zfsvfs); 194249195Smm return (SET_ERROR(EPERM)); 195185029Spjd } 196185029Spjd 197185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 198185029Spjd ZTOV(zp)->v_type == VREG && 199219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 200209962Smm if (fs_vscan(*vpp, cr, 0) != 0) { 201209962Smm ZFS_EXIT(zfsvfs); 202249195Smm return (SET_ERROR(EACCES)); 203209962Smm } 204209962Smm } 205185029Spjd 206168404Spjd /* Keep a count of the synchronous opens in the znode */ 207168962Spjd if (flag & (FSYNC | FDSYNC)) 208168404Spjd atomic_inc_32(&zp->z_sync_cnt); 209185029Spjd 210209962Smm ZFS_EXIT(zfsvfs); 211168404Spjd return (0); 212168404Spjd} 213168404Spjd 214168404Spjd/* ARGSUSED */ 215168404Spjdstatic int 216185029Spjdzfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 217185029Spjd caller_context_t *ct) 218168404Spjd{ 219168962Spjd znode_t *zp = VTOZ(vp); 220209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 221168404Spjd 222210470Smm /* 223210470Smm * Clean up any locks held by this process on the vp. 224210470Smm */ 225210470Smm cleanlocks(vp, ddi_get_pid(), 0); 226210470Smm cleanshares(vp, ddi_get_pid()); 227210470Smm 228209962Smm ZFS_ENTER(zfsvfs); 229209962Smm ZFS_VERIFY_ZP(zp); 230209962Smm 231168404Spjd /* Decrement the synchronous opens in the znode */ 232185029Spjd if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 233168404Spjd atomic_dec_32(&zp->z_sync_cnt); 234168404Spjd 235185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 236185029Spjd ZTOV(zp)->v_type == VREG && 237219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 238185029Spjd VERIFY(fs_vscan(vp, cr, 1) == 0); 239185029Spjd 240209962Smm ZFS_EXIT(zfsvfs); 241168404Spjd return (0); 242168404Spjd} 243168404Spjd 244168404Spjd/* 245168404Spjd * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 246168404Spjd * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 247168404Spjd */ 248168404Spjdstatic int 249168978Spjdzfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 250168404Spjd{ 251168404Spjd znode_t *zp = VTOZ(vp); 252168404Spjd uint64_t noff = (uint64_t)*off; /* new offset */ 253168404Spjd uint64_t file_sz; 254168404Spjd int error; 255168404Spjd boolean_t hole; 256168404Spjd 257219089Spjd file_sz = zp->z_size; 258168404Spjd if (noff >= file_sz) { 259249195Smm return (SET_ERROR(ENXIO)); 260168404Spjd } 261168404Spjd 262168962Spjd if (cmd == _FIO_SEEK_HOLE) 263168404Spjd hole = B_TRUE; 264168404Spjd else 265168404Spjd hole = B_FALSE; 266168404Spjd 267168404Spjd error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 268168404Spjd 269271536Sdelphij if (error == ESRCH) 270249195Smm return (SET_ERROR(ENXIO)); 271271536Sdelphij 272271536Sdelphij /* 273271536Sdelphij * We could find a hole that begins after the logical end-of-file, 274271536Sdelphij * because dmu_offset_next() only works on whole blocks. If the 275271536Sdelphij * EOF falls mid-block, then indicate that the "virtual hole" 276271536Sdelphij * at the end of the file begins at the logical EOF, rather than 277271536Sdelphij * at the end of the last block. 278271536Sdelphij */ 279271536Sdelphij if (noff > file_sz) { 280271536Sdelphij ASSERT(hole); 281271536Sdelphij noff = file_sz; 282168404Spjd } 283168404Spjd 284168404Spjd if (noff < *off) 285168404Spjd return (error); 286168404Spjd *off = noff; 287168404Spjd return (error); 288168404Spjd} 289168404Spjd 290168404Spjd/* ARGSUSED */ 291168404Spjdstatic int 292168978Spjdzfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 293185029Spjd int *rvalp, caller_context_t *ct) 294168404Spjd{ 295168962Spjd offset_t off; 296168962Spjd int error; 297168962Spjd zfsvfs_t *zfsvfs; 298185029Spjd znode_t *zp; 299168404Spjd 300168404Spjd switch (com) { 301185029Spjd case _FIOFFS: 302168962Spjd return (0); 303168404Spjd 304168962Spjd /* 305168962Spjd * The following two ioctls are used by bfu. Faking out, 306168962Spjd * necessary to avoid bfu errors. 307168962Spjd */ 308185029Spjd case _FIOGDIO: 309185029Spjd case _FIOSDIO: 310168962Spjd return (0); 311168962Spjd 312185029Spjd case _FIO_SEEK_DATA: 313185029Spjd case _FIO_SEEK_HOLE: 314233918Savg#ifdef sun 315168962Spjd if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 316249195Smm return (SET_ERROR(EFAULT)); 317233918Savg#else 318233918Savg off = *(offset_t *)data; 319233918Savg#endif 320185029Spjd zp = VTOZ(vp); 321185029Spjd zfsvfs = zp->z_zfsvfs; 322168404Spjd ZFS_ENTER(zfsvfs); 323185029Spjd ZFS_VERIFY_ZP(zp); 324168404Spjd 325168404Spjd /* offset parameter is in/out */ 326168404Spjd error = zfs_holey(vp, com, &off); 327168404Spjd ZFS_EXIT(zfsvfs); 328168404Spjd if (error) 329168404Spjd return (error); 330233918Savg#ifdef sun 331168962Spjd if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 332249195Smm return (SET_ERROR(EFAULT)); 333233918Savg#else 334233918Savg *(offset_t *)data = off; 335233918Savg#endif 336168404Spjd return (0); 337168404Spjd } 338249195Smm return (SET_ERROR(ENOTTY)); 339168404Spjd} 340168404Spjd 341209962Smmstatic vm_page_t 342253953Sattiliopage_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 343209962Smm{ 344209962Smm vm_object_t obj; 345209962Smm vm_page_t pp; 346258353Savg int64_t end; 347209962Smm 348258353Savg /* 349258353Savg * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE 350258353Savg * aligned boundaries, if the range is not aligned. As a result a 351258353Savg * DEV_BSIZE subrange with partially dirty data may get marked as clean. 352258353Savg * It may happen that all DEV_BSIZE subranges are marked clean and thus 353258353Savg * the whole page would be considred clean despite have some dirty data. 354258353Savg * For this reason we should shrink the range to DEV_BSIZE aligned 355258353Savg * boundaries before calling vm_page_clear_dirty. 356258353Savg */ 357258353Savg end = rounddown2(off + nbytes, DEV_BSIZE); 358258353Savg off = roundup2(off, DEV_BSIZE); 359258353Savg nbytes = end - off; 360258353Savg 361209962Smm obj = vp->v_object; 362248084Sattilio zfs_vmobject_assert_wlocked(obj); 363209962Smm 364209962Smm for (;;) { 365209962Smm if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 366246293Savg pp->valid) { 367254138Sattilio if (vm_page_xbusied(pp)) { 368212652Savg /* 369212652Savg * Reference the page before unlocking and 370212652Savg * sleeping so that the page daemon is less 371212652Savg * likely to reclaim it. 372212652Savg */ 373225418Skib vm_page_reference(pp); 374254138Sattilio vm_page_lock(pp); 375254138Sattilio zfs_vmobject_wunlock(obj); 376254138Sattilio vm_page_busy_sleep(pp, "zfsmwb"); 377254138Sattilio zfs_vmobject_wlock(obj); 378209962Smm continue; 379212652Savg } 380254138Sattilio vm_page_sbusy(pp); 381252337Sgavin } else if (pp == NULL) { 382246293Savg pp = vm_page_alloc(obj, OFF_TO_IDX(start), 383246293Savg VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED | 384254138Sattilio VM_ALLOC_SBUSY); 385252337Sgavin } else { 386252337Sgavin ASSERT(pp != NULL && !pp->valid); 387252337Sgavin pp = NULL; 388209962Smm } 389246293Savg 390246293Savg if (pp != NULL) { 391246293Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 392253953Sattilio vm_object_pip_add(obj, 1); 393246293Savg pmap_remove_write(pp); 394258353Savg if (nbytes != 0) 395258353Savg vm_page_clear_dirty(pp, off, nbytes); 396246293Savg } 397209962Smm break; 398209962Smm } 399209962Smm return (pp); 400209962Smm} 401209962Smm 402209962Smmstatic void 403253953Sattiliopage_unbusy(vm_page_t pp) 404209962Smm{ 405209962Smm 406254138Sattilio vm_page_sunbusy(pp); 407253953Sattilio vm_object_pip_subtract(pp->object, 1); 408209962Smm} 409209962Smm 410253953Sattiliostatic vm_page_t 411253953Sattiliopage_hold(vnode_t *vp, int64_t start) 412253953Sattilio{ 413253953Sattilio vm_object_t obj; 414253953Sattilio vm_page_t pp; 415253953Sattilio 416253953Sattilio obj = vp->v_object; 417253953Sattilio zfs_vmobject_assert_wlocked(obj); 418253953Sattilio 419253953Sattilio for (;;) { 420253953Sattilio if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 421253953Sattilio pp->valid) { 422254138Sattilio if (vm_page_xbusied(pp)) { 423253953Sattilio /* 424253953Sattilio * Reference the page before unlocking and 425253953Sattilio * sleeping so that the page daemon is less 426253953Sattilio * likely to reclaim it. 427253953Sattilio */ 428253953Sattilio vm_page_reference(pp); 429254138Sattilio vm_page_lock(pp); 430254138Sattilio zfs_vmobject_wunlock(obj); 431254138Sattilio vm_page_busy_sleep(pp, "zfsmwb"); 432254138Sattilio zfs_vmobject_wlock(obj); 433253953Sattilio continue; 434253953Sattilio } 435253953Sattilio 436253953Sattilio ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 437253953Sattilio vm_page_lock(pp); 438253953Sattilio vm_page_hold(pp); 439253953Sattilio vm_page_unlock(pp); 440253953Sattilio 441253953Sattilio } else 442253953Sattilio pp = NULL; 443253953Sattilio break; 444253953Sattilio } 445253953Sattilio return (pp); 446253953Sattilio} 447253953Sattilio 448253953Sattiliostatic void 449253953Sattiliopage_unhold(vm_page_t pp) 450253953Sattilio{ 451253953Sattilio 452253953Sattilio vm_page_lock(pp); 453253953Sattilio vm_page_unhold(pp); 454253953Sattilio vm_page_unlock(pp); 455253953Sattilio} 456253953Sattilio 457168404Spjd/* 458168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 459168404Spjd * between the DMU cache and the memory mapped pages. What this means: 460168404Spjd * 461168404Spjd * On Write: If we find a memory mapped page, we write to *both* 462168404Spjd * the page and the dmu buffer. 463168404Spjd */ 464209962Smmstatic void 465209962Smmupdate_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 466209962Smm int segflg, dmu_tx_t *tx) 467168404Spjd{ 468168404Spjd vm_object_t obj; 469168404Spjd struct sf_buf *sf; 470246293Savg caddr_t va; 471212655Savg int off; 472168404Spjd 473258746Savg ASSERT(segflg != UIO_NOCOPY); 474168404Spjd ASSERT(vp->v_mount != NULL); 475168404Spjd obj = vp->v_object; 476168404Spjd ASSERT(obj != NULL); 477168404Spjd 478168404Spjd off = start & PAGEOFFSET; 479248084Sattilio zfs_vmobject_wlock(obj); 480168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 481209962Smm vm_page_t pp; 482246293Savg int nbytes = imin(PAGESIZE - off, len); 483168404Spjd 484258746Savg if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 485248084Sattilio zfs_vmobject_wunlock(obj); 486168404Spjd 487246293Savg va = zfs_map_page(pp, &sf); 488246293Savg (void) dmu_read(os, oid, start+off, nbytes, 489246293Savg va+off, DMU_READ_PREFETCH);; 490209962Smm zfs_unmap_page(sf); 491246293Savg 492248084Sattilio zfs_vmobject_wlock(obj); 493253953Sattilio page_unbusy(pp); 494168404Spjd } 495209962Smm len -= nbytes; 496168404Spjd off = 0; 497168404Spjd } 498258746Savg vm_object_pip_wakeupn(obj, 0); 499248084Sattilio zfs_vmobject_wunlock(obj); 500168404Spjd} 501168404Spjd 502168404Spjd/* 503219089Spjd * Read with UIO_NOCOPY flag means that sendfile(2) requests 504219089Spjd * ZFS to populate a range of page cache pages with data. 505219089Spjd * 506219089Spjd * NOTE: this function could be optimized to pre-allocate 507254138Sattilio * all pages in advance, drain exclusive busy on all of them, 508219089Spjd * map them into contiguous KVA region and populate them 509219089Spjd * in one single dmu_read() call. 510219089Spjd */ 511219089Spjdstatic int 512219089Spjdmappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) 513219089Spjd{ 514219089Spjd znode_t *zp = VTOZ(vp); 515219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 516219089Spjd struct sf_buf *sf; 517219089Spjd vm_object_t obj; 518219089Spjd vm_page_t pp; 519219089Spjd int64_t start; 520219089Spjd caddr_t va; 521219089Spjd int len = nbytes; 522219089Spjd int off; 523219089Spjd int error = 0; 524219089Spjd 525219089Spjd ASSERT(uio->uio_segflg == UIO_NOCOPY); 526219089Spjd ASSERT(vp->v_mount != NULL); 527219089Spjd obj = vp->v_object; 528219089Spjd ASSERT(obj != NULL); 529219089Spjd ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); 530219089Spjd 531248084Sattilio zfs_vmobject_wlock(obj); 532219089Spjd for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { 533219089Spjd int bytes = MIN(PAGESIZE, len); 534219089Spjd 535254138Sattilio pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY | 536254649Skib VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 537219089Spjd if (pp->valid == 0) { 538248084Sattilio zfs_vmobject_wunlock(obj); 539219089Spjd va = zfs_map_page(pp, &sf); 540219089Spjd error = dmu_read(os, zp->z_id, start, bytes, va, 541219089Spjd DMU_READ_PREFETCH); 542219089Spjd if (bytes != PAGESIZE && error == 0) 543219089Spjd bzero(va + bytes, PAGESIZE - bytes); 544219089Spjd zfs_unmap_page(sf); 545248084Sattilio zfs_vmobject_wlock(obj); 546254138Sattilio vm_page_sunbusy(pp); 547219089Spjd vm_page_lock(pp); 548219089Spjd if (error) { 549253073Savg if (pp->wire_count == 0 && pp->valid == 0 && 550254138Sattilio !vm_page_busied(pp)) 551253073Savg vm_page_free(pp); 552219089Spjd } else { 553219089Spjd pp->valid = VM_PAGE_BITS_ALL; 554219089Spjd vm_page_activate(pp); 555219089Spjd } 556219089Spjd vm_page_unlock(pp); 557258739Savg } else { 558258739Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 559254138Sattilio vm_page_sunbusy(pp); 560258739Savg } 561219089Spjd if (error) 562219089Spjd break; 563219089Spjd uio->uio_resid -= bytes; 564219089Spjd uio->uio_offset += bytes; 565219089Spjd len -= bytes; 566219089Spjd } 567248084Sattilio zfs_vmobject_wunlock(obj); 568219089Spjd return (error); 569219089Spjd} 570219089Spjd 571219089Spjd/* 572168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 573168404Spjd * between the DMU cache and the memory mapped pages. What this means: 574168404Spjd * 575168404Spjd * On Read: We "read" preferentially from memory mapped pages, 576168404Spjd * else we default from the dmu buffer. 577168404Spjd * 578168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 579251631Sdelphij * the file is memory mapped. 580168404Spjd */ 581168404Spjdstatic int 582168404Spjdmappedread(vnode_t *vp, int nbytes, uio_t *uio) 583168404Spjd{ 584168404Spjd znode_t *zp = VTOZ(vp); 585168404Spjd vm_object_t obj; 586212655Savg int64_t start; 587168926Spjd caddr_t va; 588168404Spjd int len = nbytes; 589212655Savg int off; 590168404Spjd int error = 0; 591168404Spjd 592168404Spjd ASSERT(vp->v_mount != NULL); 593168404Spjd obj = vp->v_object; 594168404Spjd ASSERT(obj != NULL); 595168404Spjd 596168404Spjd start = uio->uio_loffset; 597168404Spjd off = start & PAGEOFFSET; 598248084Sattilio zfs_vmobject_wlock(obj); 599168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 600219089Spjd vm_page_t pp; 601219089Spjd uint64_t bytes = MIN(PAGESIZE - off, len); 602168404Spjd 603253953Sattilio if (pp = page_hold(vp, start)) { 604219089Spjd struct sf_buf *sf; 605219089Spjd caddr_t va; 606212652Savg 607248084Sattilio zfs_vmobject_wunlock(obj); 608219089Spjd va = zfs_map_page(pp, &sf); 609219089Spjd error = uiomove(va + off, bytes, UIO_READ, uio); 610219089Spjd zfs_unmap_page(sf); 611248084Sattilio zfs_vmobject_wlock(obj); 612253953Sattilio page_unhold(pp); 613219089Spjd } else { 614248084Sattilio zfs_vmobject_wunlock(obj); 615272809Sdelphij error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 616272809Sdelphij uio, bytes); 617248084Sattilio zfs_vmobject_wlock(obj); 618168404Spjd } 619168404Spjd len -= bytes; 620168404Spjd off = 0; 621168404Spjd if (error) 622168404Spjd break; 623168404Spjd } 624248084Sattilio zfs_vmobject_wunlock(obj); 625168404Spjd return (error); 626168404Spjd} 627168404Spjd 628168404Spjdoffset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 629168404Spjd 630168404Spjd/* 631168404Spjd * Read bytes from specified file into supplied buffer. 632168404Spjd * 633168404Spjd * IN: vp - vnode of file to be read from. 634168404Spjd * uio - structure supplying read location, range info, 635168404Spjd * and return buffer. 636168404Spjd * ioflag - SYNC flags; used to provide FRSYNC semantics. 637168404Spjd * cr - credentials of caller. 638185029Spjd * ct - caller context 639168404Spjd * 640168404Spjd * OUT: uio - updated offset and range, buffer filled. 641168404Spjd * 642251631Sdelphij * RETURN: 0 on success, error code on failure. 643168404Spjd * 644168404Spjd * Side Effects: 645168404Spjd * vp - atime updated if byte count > 0 646168404Spjd */ 647168404Spjd/* ARGSUSED */ 648168404Spjdstatic int 649168962Spjdzfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 650168404Spjd{ 651168404Spjd znode_t *zp = VTOZ(vp); 652168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 653168404Spjd ssize_t n, nbytes; 654247187Smm int error = 0; 655168404Spjd rl_t *rl; 656219089Spjd xuio_t *xuio = NULL; 657168404Spjd 658168404Spjd ZFS_ENTER(zfsvfs); 659185029Spjd ZFS_VERIFY_ZP(zp); 660168404Spjd 661219089Spjd if (zp->z_pflags & ZFS_AV_QUARANTINED) { 662185029Spjd ZFS_EXIT(zfsvfs); 663249195Smm return (SET_ERROR(EACCES)); 664185029Spjd } 665185029Spjd 666168404Spjd /* 667168404Spjd * Validate file offset 668168404Spjd */ 669168404Spjd if (uio->uio_loffset < (offset_t)0) { 670168404Spjd ZFS_EXIT(zfsvfs); 671249195Smm return (SET_ERROR(EINVAL)); 672168404Spjd } 673168404Spjd 674168404Spjd /* 675168404Spjd * Fasttrack empty reads 676168404Spjd */ 677168404Spjd if (uio->uio_resid == 0) { 678168404Spjd ZFS_EXIT(zfsvfs); 679168404Spjd return (0); 680168404Spjd } 681168404Spjd 682168404Spjd /* 683168962Spjd * Check for mandatory locks 684168962Spjd */ 685219089Spjd if (MANDMODE(zp->z_mode)) { 686168962Spjd if (error = chklock(vp, FREAD, 687168962Spjd uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 688168962Spjd ZFS_EXIT(zfsvfs); 689168962Spjd return (error); 690168962Spjd } 691168962Spjd } 692168962Spjd 693168962Spjd /* 694168404Spjd * If we're in FRSYNC mode, sync out this znode before reading it. 695168404Spjd */ 696224605Smm if (zfsvfs->z_log && 697224605Smm (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) 698219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 699168404Spjd 700168404Spjd /* 701168404Spjd * Lock the range against changes. 702168404Spjd */ 703168404Spjd rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 704168404Spjd 705168404Spjd /* 706168404Spjd * If we are reading past end-of-file we can skip 707168404Spjd * to the end; but we might still need to set atime. 708168404Spjd */ 709219089Spjd if (uio->uio_loffset >= zp->z_size) { 710168404Spjd error = 0; 711168404Spjd goto out; 712168404Spjd } 713168404Spjd 714219089Spjd ASSERT(uio->uio_loffset < zp->z_size); 715219089Spjd n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 716168404Spjd 717219089Spjd#ifdef sun 718219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 719219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 720219089Spjd int nblk; 721219089Spjd int blksz = zp->z_blksz; 722219089Spjd uint64_t offset = uio->uio_loffset; 723219089Spjd 724219089Spjd xuio = (xuio_t *)uio; 725219089Spjd if ((ISP2(blksz))) { 726219089Spjd nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 727219089Spjd blksz)) / blksz; 728219089Spjd } else { 729219089Spjd ASSERT(offset + n <= blksz); 730219089Spjd nblk = 1; 731219089Spjd } 732219089Spjd (void) dmu_xuio_init(xuio, nblk); 733219089Spjd 734219089Spjd if (vn_has_cached_data(vp)) { 735219089Spjd /* 736219089Spjd * For simplicity, we always allocate a full buffer 737219089Spjd * even if we only expect to read a portion of a block. 738219089Spjd */ 739219089Spjd while (--nblk >= 0) { 740219089Spjd (void) dmu_xuio_add(xuio, 741219089Spjd dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 742219089Spjd blksz), 0, blksz); 743219089Spjd } 744219089Spjd } 745219089Spjd } 746219089Spjd#endif /* sun */ 747219089Spjd 748168404Spjd while (n > 0) { 749168404Spjd nbytes = MIN(n, zfs_read_chunk_size - 750168404Spjd P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 751168404Spjd 752219089Spjd#ifdef __FreeBSD__ 753219089Spjd if (uio->uio_segflg == UIO_NOCOPY) 754219089Spjd error = mappedread_sf(vp, nbytes, uio); 755219089Spjd else 756219089Spjd#endif /* __FreeBSD__ */ 757272809Sdelphij if (vn_has_cached_data(vp)) { 758168404Spjd error = mappedread(vp, nbytes, uio); 759272809Sdelphij } else { 760272809Sdelphij error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 761272809Sdelphij uio, nbytes); 762272809Sdelphij } 763185029Spjd if (error) { 764185029Spjd /* convert checksum errors into IO errors */ 765185029Spjd if (error == ECKSUM) 766249195Smm error = SET_ERROR(EIO); 767168404Spjd break; 768185029Spjd } 769168962Spjd 770168404Spjd n -= nbytes; 771168404Spjd } 772168404Spjdout: 773168404Spjd zfs_range_unlock(rl); 774168404Spjd 775168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 776168404Spjd ZFS_EXIT(zfsvfs); 777168404Spjd return (error); 778168404Spjd} 779168404Spjd 780168404Spjd/* 781168404Spjd * Write the bytes to a file. 782168404Spjd * 783168404Spjd * IN: vp - vnode of file to be written to. 784168404Spjd * uio - structure supplying write location, range info, 785168404Spjd * and data buffer. 786251631Sdelphij * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 787251631Sdelphij * set if in append mode. 788168404Spjd * cr - credentials of caller. 789185029Spjd * ct - caller context (NFS/CIFS fem monitor only) 790168404Spjd * 791168404Spjd * OUT: uio - updated offset and range. 792168404Spjd * 793251631Sdelphij * RETURN: 0 on success, error code on failure. 794168404Spjd * 795168404Spjd * Timestamps: 796168404Spjd * vp - ctime|mtime updated if byte count > 0 797168404Spjd */ 798219089Spjd 799168404Spjd/* ARGSUSED */ 800168404Spjdstatic int 801168962Spjdzfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 802168404Spjd{ 803168404Spjd znode_t *zp = VTOZ(vp); 804168962Spjd rlim64_t limit = MAXOFFSET_T; 805168404Spjd ssize_t start_resid = uio->uio_resid; 806168404Spjd ssize_t tx_bytes; 807168404Spjd uint64_t end_size; 808168404Spjd dmu_tx_t *tx; 809168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 810185029Spjd zilog_t *zilog; 811168404Spjd offset_t woff; 812168404Spjd ssize_t n, nbytes; 813168404Spjd rl_t *rl; 814168404Spjd int max_blksz = zfsvfs->z_max_blksz; 815247187Smm int error = 0; 816209962Smm arc_buf_t *abuf; 817247187Smm iovec_t *aiov = NULL; 818219089Spjd xuio_t *xuio = NULL; 819219089Spjd int i_iov = 0; 820219089Spjd int iovcnt = uio->uio_iovcnt; 821219089Spjd iovec_t *iovp = uio->uio_iov; 822219089Spjd int write_eof; 823219089Spjd int count = 0; 824219089Spjd sa_bulk_attr_t bulk[4]; 825219089Spjd uint64_t mtime[2], ctime[2]; 826168404Spjd 827168404Spjd /* 828168404Spjd * Fasttrack empty write 829168404Spjd */ 830168404Spjd n = start_resid; 831168404Spjd if (n == 0) 832168404Spjd return (0); 833168404Spjd 834168962Spjd if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 835168962Spjd limit = MAXOFFSET_T; 836168962Spjd 837168404Spjd ZFS_ENTER(zfsvfs); 838185029Spjd ZFS_VERIFY_ZP(zp); 839168404Spjd 840219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 841219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 842219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 843219089Spjd &zp->z_size, 8); 844219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 845219089Spjd &zp->z_pflags, 8); 846219089Spjd 847168404Spjd /* 848262990Sdelphij * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 849262990Sdelphij * callers might not be able to detect properly that we are read-only, 850262990Sdelphij * so check it explicitly here. 851262990Sdelphij */ 852262990Sdelphij if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 853262990Sdelphij ZFS_EXIT(zfsvfs); 854262990Sdelphij return (SET_ERROR(EROFS)); 855262990Sdelphij } 856262990Sdelphij 857262990Sdelphij /* 858185029Spjd * If immutable or not appending then return EPERM 859185029Spjd */ 860219089Spjd if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 861219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 862219089Spjd (uio->uio_loffset < zp->z_size))) { 863185029Spjd ZFS_EXIT(zfsvfs); 864249195Smm return (SET_ERROR(EPERM)); 865185029Spjd } 866185029Spjd 867185029Spjd zilog = zfsvfs->z_log; 868185029Spjd 869185029Spjd /* 870219089Spjd * Validate file offset 871219089Spjd */ 872219089Spjd woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 873219089Spjd if (woff < 0) { 874219089Spjd ZFS_EXIT(zfsvfs); 875249195Smm return (SET_ERROR(EINVAL)); 876219089Spjd } 877219089Spjd 878219089Spjd /* 879219089Spjd * Check for mandatory locks before calling zfs_range_lock() 880219089Spjd * in order to prevent a deadlock with locks set via fcntl(). 881219089Spjd */ 882219089Spjd if (MANDMODE((mode_t)zp->z_mode) && 883219089Spjd (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 884219089Spjd ZFS_EXIT(zfsvfs); 885219089Spjd return (error); 886219089Spjd } 887219089Spjd 888219089Spjd#ifdef sun 889219089Spjd /* 890168404Spjd * Pre-fault the pages to ensure slow (eg NFS) pages 891168404Spjd * don't hold up txg. 892219089Spjd * Skip this if uio contains loaned arc_buf. 893168404Spjd */ 894219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 895219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 896219089Spjd xuio = (xuio_t *)uio; 897219089Spjd else 898219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 899219089Spjd#endif /* sun */ 900168404Spjd 901168404Spjd /* 902168404Spjd * If in append mode, set the io offset pointer to eof. 903168404Spjd */ 904213673Spjd if (ioflag & FAPPEND) { 905168404Spjd /* 906219089Spjd * Obtain an appending range lock to guarantee file append 907219089Spjd * semantics. We reset the write offset once we have the lock. 908168404Spjd */ 909168404Spjd rl = zfs_range_lock(zp, 0, n, RL_APPEND); 910219089Spjd woff = rl->r_off; 911168404Spjd if (rl->r_len == UINT64_MAX) { 912219089Spjd /* 913219089Spjd * We overlocked the file because this write will cause 914219089Spjd * the file block size to increase. 915219089Spjd * Note that zp_size cannot change with this lock held. 916219089Spjd */ 917219089Spjd woff = zp->z_size; 918168404Spjd } 919219089Spjd uio->uio_loffset = woff; 920168404Spjd } else { 921168404Spjd /* 922219089Spjd * Note that if the file block size will change as a result of 923219089Spjd * this write, then this range lock will lock the entire file 924219089Spjd * so that we can re-write the block safely. 925168404Spjd */ 926168404Spjd rl = zfs_range_lock(zp, woff, n, RL_WRITER); 927168404Spjd } 928168404Spjd 929235781Strasz if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { 930235781Strasz zfs_range_unlock(rl); 931235781Strasz ZFS_EXIT(zfsvfs); 932235781Strasz return (EFBIG); 933235781Strasz } 934235781Strasz 935168962Spjd if (woff >= limit) { 936168962Spjd zfs_range_unlock(rl); 937168962Spjd ZFS_EXIT(zfsvfs); 938249195Smm return (SET_ERROR(EFBIG)); 939168962Spjd } 940168962Spjd 941168962Spjd if ((woff + n) > limit || woff > (limit - n)) 942168962Spjd n = limit - woff; 943168962Spjd 944219089Spjd /* Will this write extend the file length? */ 945219089Spjd write_eof = (woff + n > zp->z_size); 946168404Spjd 947219089Spjd end_size = MAX(zp->z_size, woff + n); 948219089Spjd 949168404Spjd /* 950168404Spjd * Write the file in reasonable size chunks. Each chunk is written 951168404Spjd * in a separate transaction; this keeps the intent log records small 952168404Spjd * and allows us to do more fine-grained space accounting. 953168404Spjd */ 954168404Spjd while (n > 0) { 955209962Smm abuf = NULL; 956209962Smm woff = uio->uio_loffset; 957219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 958219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 959209962Smm if (abuf != NULL) 960209962Smm dmu_return_arcbuf(abuf); 961249195Smm error = SET_ERROR(EDQUOT); 962209962Smm break; 963209962Smm } 964209962Smm 965219089Spjd if (xuio && abuf == NULL) { 966219089Spjd ASSERT(i_iov < iovcnt); 967219089Spjd aiov = &iovp[i_iov]; 968219089Spjd abuf = dmu_xuio_arcbuf(xuio, i_iov); 969219089Spjd dmu_xuio_clear(xuio, i_iov); 970219089Spjd DTRACE_PROBE3(zfs_cp_write, int, i_iov, 971219089Spjd iovec_t *, aiov, arc_buf_t *, abuf); 972219089Spjd ASSERT((aiov->iov_base == abuf->b_data) || 973219089Spjd ((char *)aiov->iov_base - (char *)abuf->b_data + 974219089Spjd aiov->iov_len == arc_buf_size(abuf))); 975219089Spjd i_iov++; 976219089Spjd } else if (abuf == NULL && n >= max_blksz && 977219089Spjd woff >= zp->z_size && 978209962Smm P2PHASE(woff, max_blksz) == 0 && 979209962Smm zp->z_blksz == max_blksz) { 980219089Spjd /* 981219089Spjd * This write covers a full block. "Borrow" a buffer 982219089Spjd * from the dmu so that we can fill it before we enter 983219089Spjd * a transaction. This avoids the possibility of 984219089Spjd * holding up the transaction if the data copy hangs 985219089Spjd * up on a pagefault (e.g., from an NFS server mapping). 986219089Spjd */ 987209962Smm size_t cbytes; 988209962Smm 989219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 990219089Spjd max_blksz); 991209962Smm ASSERT(abuf != NULL); 992209962Smm ASSERT(arc_buf_size(abuf) == max_blksz); 993209962Smm if (error = uiocopy(abuf->b_data, max_blksz, 994209962Smm UIO_WRITE, uio, &cbytes)) { 995209962Smm dmu_return_arcbuf(abuf); 996209962Smm break; 997209962Smm } 998209962Smm ASSERT(cbytes == max_blksz); 999209962Smm } 1000209962Smm 1001209962Smm /* 1002168404Spjd * Start a transaction. 1003168404Spjd */ 1004168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1005219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1006168404Spjd dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 1007219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1008258720Savg error = dmu_tx_assign(tx, TXG_WAIT); 1009168404Spjd if (error) { 1010168404Spjd dmu_tx_abort(tx); 1011209962Smm if (abuf != NULL) 1012209962Smm dmu_return_arcbuf(abuf); 1013168404Spjd break; 1014168404Spjd } 1015168404Spjd 1016168404Spjd /* 1017168404Spjd * If zfs_range_lock() over-locked we grow the blocksize 1018168404Spjd * and then reduce the lock range. This will only happen 1019168404Spjd * on the first iteration since zfs_range_reduce() will 1020168404Spjd * shrink down r_len to the appropriate size. 1021168404Spjd */ 1022168404Spjd if (rl->r_len == UINT64_MAX) { 1023168404Spjd uint64_t new_blksz; 1024168404Spjd 1025168404Spjd if (zp->z_blksz > max_blksz) { 1026274337Sdelphij /* 1027274337Sdelphij * File's blocksize is already larger than the 1028274337Sdelphij * "recordsize" property. Only let it grow to 1029274337Sdelphij * the next power of 2. 1030274337Sdelphij */ 1031168404Spjd ASSERT(!ISP2(zp->z_blksz)); 1032274337Sdelphij new_blksz = MIN(end_size, 1033274337Sdelphij 1 << highbit64(zp->z_blksz)); 1034168404Spjd } else { 1035168404Spjd new_blksz = MIN(end_size, max_blksz); 1036168404Spjd } 1037168404Spjd zfs_grow_blocksize(zp, new_blksz, tx); 1038168404Spjd zfs_range_reduce(rl, woff, n); 1039168404Spjd } 1040168404Spjd 1041168404Spjd /* 1042168404Spjd * XXX - should we really limit each write to z_max_blksz? 1043168404Spjd * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 1044168404Spjd */ 1045168404Spjd nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 1046168404Spjd 1047219089Spjd if (woff + nbytes > zp->z_size) 1048168404Spjd vnode_pager_setsize(vp, woff + nbytes); 1049168404Spjd 1050209962Smm if (abuf == NULL) { 1051209962Smm tx_bytes = uio->uio_resid; 1052219089Spjd error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 1053219089Spjd uio, nbytes, tx); 1054209962Smm tx_bytes -= uio->uio_resid; 1055168404Spjd } else { 1056209962Smm tx_bytes = nbytes; 1057219089Spjd ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 1058219089Spjd /* 1059219089Spjd * If this is not a full block write, but we are 1060219089Spjd * extending the file past EOF and this data starts 1061219089Spjd * block-aligned, use assign_arcbuf(). Otherwise, 1062219089Spjd * write via dmu_write(). 1063219089Spjd */ 1064219089Spjd if (tx_bytes < max_blksz && (!write_eof || 1065219089Spjd aiov->iov_base != abuf->b_data)) { 1066219089Spjd ASSERT(xuio); 1067219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, woff, 1068219089Spjd aiov->iov_len, aiov->iov_base, tx); 1069219089Spjd dmu_return_arcbuf(abuf); 1070219089Spjd xuio_stat_wbuf_copied(); 1071219089Spjd } else { 1072219089Spjd ASSERT(xuio || tx_bytes == max_blksz); 1073219089Spjd dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 1074219089Spjd woff, abuf, tx); 1075219089Spjd } 1076209962Smm ASSERT(tx_bytes <= uio->uio_resid); 1077209962Smm uioskip(uio, tx_bytes); 1078168404Spjd } 1079212657Savg if (tx_bytes && vn_has_cached_data(vp)) { 1080209962Smm update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 1081209962Smm zp->z_id, uio->uio_segflg, tx); 1082209962Smm } 1083209962Smm 1084209962Smm /* 1085168404Spjd * If we made no progress, we're done. If we made even 1086168404Spjd * partial progress, update the znode and ZIL accordingly. 1087168404Spjd */ 1088168404Spjd if (tx_bytes == 0) { 1089219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 1090219089Spjd (void *)&zp->z_size, sizeof (uint64_t), tx); 1091168404Spjd dmu_tx_commit(tx); 1092168404Spjd ASSERT(error != 0); 1093168404Spjd break; 1094168404Spjd } 1095168404Spjd 1096168404Spjd /* 1097168404Spjd * Clear Set-UID/Set-GID bits on successful write if not 1098168404Spjd * privileged and at least one of the excute bits is set. 1099168404Spjd * 1100168404Spjd * It would be nice to to this after all writes have 1101168404Spjd * been done, but that would still expose the ISUID/ISGID 1102168404Spjd * to another app after the partial write is committed. 1103185029Spjd * 1104185029Spjd * Note: we don't call zfs_fuid_map_id() here because 1105185029Spjd * user 0 is not an ephemeral uid. 1106168404Spjd */ 1107168404Spjd mutex_enter(&zp->z_acl_lock); 1108219089Spjd if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 1109168404Spjd (S_IXUSR >> 6))) != 0 && 1110219089Spjd (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 1111185029Spjd secpolicy_vnode_setid_retain(vp, cr, 1112219089Spjd (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 1113219089Spjd uint64_t newmode; 1114219089Spjd zp->z_mode &= ~(S_ISUID | S_ISGID); 1115219089Spjd newmode = zp->z_mode; 1116219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 1117219089Spjd (void *)&newmode, sizeof (uint64_t), tx); 1118168404Spjd } 1119168404Spjd mutex_exit(&zp->z_acl_lock); 1120168404Spjd 1121219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 1122219089Spjd B_TRUE); 1123168404Spjd 1124168404Spjd /* 1125168404Spjd * Update the file size (zp_size) if it has changed; 1126168404Spjd * account for possible concurrent updates. 1127168404Spjd */ 1128219089Spjd while ((end_size = zp->z_size) < uio->uio_loffset) { 1129219089Spjd (void) atomic_cas_64(&zp->z_size, end_size, 1130168404Spjd uio->uio_loffset); 1131219089Spjd ASSERT(error == 0); 1132219089Spjd } 1133219089Spjd /* 1134219089Spjd * If we are replaying and eof is non zero then force 1135219089Spjd * the file size to the specified eof. Note, there's no 1136219089Spjd * concurrency during replay. 1137219089Spjd */ 1138219089Spjd if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 1139219089Spjd zp->z_size = zfsvfs->z_replay_eof; 1140219089Spjd 1141219089Spjd error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1142219089Spjd 1143168404Spjd zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 1144168404Spjd dmu_tx_commit(tx); 1145168404Spjd 1146168404Spjd if (error != 0) 1147168404Spjd break; 1148168404Spjd ASSERT(tx_bytes == nbytes); 1149168404Spjd n -= nbytes; 1150219089Spjd 1151219089Spjd#ifdef sun 1152219089Spjd if (!xuio && n > 0) 1153219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 1154219089Spjd#endif /* sun */ 1155168404Spjd } 1156168404Spjd 1157168404Spjd zfs_range_unlock(rl); 1158168404Spjd 1159168404Spjd /* 1160168404Spjd * If we're in replay mode, or we made no progress, return error. 1161168404Spjd * Otherwise, it's at least a partial write, so it's successful. 1162168404Spjd */ 1163209962Smm if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1164168404Spjd ZFS_EXIT(zfsvfs); 1165168404Spjd return (error); 1166168404Spjd } 1167168404Spjd 1168219089Spjd if (ioflag & (FSYNC | FDSYNC) || 1169219089Spjd zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1170219089Spjd zil_commit(zilog, zp->z_id); 1171168404Spjd 1172168404Spjd ZFS_EXIT(zfsvfs); 1173168404Spjd return (0); 1174168404Spjd} 1175168404Spjd 1176168404Spjdvoid 1177219089Spjdzfs_get_done(zgd_t *zgd, int error) 1178168404Spjd{ 1179219089Spjd znode_t *zp = zgd->zgd_private; 1180219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 1181168404Spjd 1182219089Spjd if (zgd->zgd_db) 1183219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1184219089Spjd 1185219089Spjd zfs_range_unlock(zgd->zgd_rl); 1186219089Spjd 1187191900Skmacy /* 1188191900Skmacy * Release the vnode asynchronously as we currently have the 1189191900Skmacy * txg stopped from syncing. 1190191900Skmacy */ 1191219089Spjd VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1192219089Spjd 1193219089Spjd if (error == 0 && zgd->zgd_bp) 1194219089Spjd zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1195219089Spjd 1196168404Spjd kmem_free(zgd, sizeof (zgd_t)); 1197168404Spjd} 1198168404Spjd 1199214378Smm#ifdef DEBUG 1200214378Smmstatic int zil_fault_io = 0; 1201214378Smm#endif 1202214378Smm 1203168404Spjd/* 1204168404Spjd * Get data to generate a TX_WRITE intent log record. 1205168404Spjd */ 1206168404Spjdint 1207168404Spjdzfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1208168404Spjd{ 1209168404Spjd zfsvfs_t *zfsvfs = arg; 1210168404Spjd objset_t *os = zfsvfs->z_os; 1211168404Spjd znode_t *zp; 1212219089Spjd uint64_t object = lr->lr_foid; 1213219089Spjd uint64_t offset = lr->lr_offset; 1214219089Spjd uint64_t size = lr->lr_length; 1215219089Spjd blkptr_t *bp = &lr->lr_blkptr; 1216168404Spjd dmu_buf_t *db; 1217168404Spjd zgd_t *zgd; 1218168404Spjd int error = 0; 1219168404Spjd 1220219089Spjd ASSERT(zio != NULL); 1221219089Spjd ASSERT(size != 0); 1222168404Spjd 1223168404Spjd /* 1224168404Spjd * Nothing to do if the file has been removed 1225168404Spjd */ 1226219089Spjd if (zfs_zget(zfsvfs, object, &zp) != 0) 1227249195Smm return (SET_ERROR(ENOENT)); 1228168404Spjd if (zp->z_unlinked) { 1229191900Skmacy /* 1230191900Skmacy * Release the vnode asynchronously as we currently have the 1231191900Skmacy * txg stopped from syncing. 1232191900Skmacy */ 1233196307Spjd VN_RELE_ASYNC(ZTOV(zp), 1234196307Spjd dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1235249195Smm return (SET_ERROR(ENOENT)); 1236168404Spjd } 1237168404Spjd 1238219089Spjd zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1239219089Spjd zgd->zgd_zilog = zfsvfs->z_log; 1240219089Spjd zgd->zgd_private = zp; 1241219089Spjd 1242168404Spjd /* 1243168404Spjd * Write records come in two flavors: immediate and indirect. 1244168404Spjd * For small writes it's cheaper to store the data with the 1245168404Spjd * log record (immediate); for large writes it's cheaper to 1246168404Spjd * sync the data and get a pointer to it (indirect) so that 1247168404Spjd * we don't have to write the data twice. 1248168404Spjd */ 1249168404Spjd if (buf != NULL) { /* immediate write */ 1250219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1251168404Spjd /* test for truncation needs to be done while range locked */ 1252219089Spjd if (offset >= zp->z_size) { 1253249195Smm error = SET_ERROR(ENOENT); 1254219089Spjd } else { 1255219089Spjd error = dmu_read(os, object, offset, size, buf, 1256219089Spjd DMU_READ_NO_PREFETCH); 1257168404Spjd } 1258219089Spjd ASSERT(error == 0 || error == ENOENT); 1259168404Spjd } else { /* indirect write */ 1260168404Spjd /* 1261168404Spjd * Have to lock the whole block to ensure when it's 1262168404Spjd * written out and it's checksum is being calculated 1263168404Spjd * that no one can change the data. We need to re-check 1264168404Spjd * blocksize after we get the lock in case it's changed! 1265168404Spjd */ 1266168404Spjd for (;;) { 1267219089Spjd uint64_t blkoff; 1268219089Spjd size = zp->z_blksz; 1269219089Spjd blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1270219089Spjd offset -= blkoff; 1271219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1272219089Spjd RL_READER); 1273219089Spjd if (zp->z_blksz == size) 1274168404Spjd break; 1275219089Spjd offset += blkoff; 1276219089Spjd zfs_range_unlock(zgd->zgd_rl); 1277168404Spjd } 1278168404Spjd /* test for truncation needs to be done while range locked */ 1279219089Spjd if (lr->lr_offset >= zp->z_size) 1280249195Smm error = SET_ERROR(ENOENT); 1281214378Smm#ifdef DEBUG 1282214378Smm if (zil_fault_io) { 1283249195Smm error = SET_ERROR(EIO); 1284214378Smm zil_fault_io = 0; 1285214378Smm } 1286214378Smm#endif 1287219089Spjd if (error == 0) 1288219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1289219089Spjd DMU_READ_NO_PREFETCH); 1290214378Smm 1291209962Smm if (error == 0) { 1292243524Smm blkptr_t *obp = dmu_buf_get_blkptr(db); 1293243524Smm if (obp) { 1294243524Smm ASSERT(BP_IS_HOLE(bp)); 1295243524Smm *bp = *obp; 1296243524Smm } 1297243524Smm 1298219089Spjd zgd->zgd_db = db; 1299219089Spjd zgd->zgd_bp = bp; 1300219089Spjd 1301219089Spjd ASSERT(db->db_offset == offset); 1302219089Spjd ASSERT(db->db_size == size); 1303219089Spjd 1304219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1305219089Spjd zfs_get_done, zgd); 1306219089Spjd ASSERT(error || lr->lr_length <= zp->z_blksz); 1307219089Spjd 1308209962Smm /* 1309219089Spjd * On success, we need to wait for the write I/O 1310219089Spjd * initiated by dmu_sync() to complete before we can 1311219089Spjd * release this dbuf. We will finish everything up 1312219089Spjd * in the zfs_get_done() callback. 1313209962Smm */ 1314219089Spjd if (error == 0) 1315219089Spjd return (0); 1316209962Smm 1317219089Spjd if (error == EALREADY) { 1318219089Spjd lr->lr_common.lrc_txtype = TX_WRITE2; 1319219089Spjd error = 0; 1320219089Spjd } 1321209962Smm } 1322168404Spjd } 1323219089Spjd 1324219089Spjd zfs_get_done(zgd, error); 1325219089Spjd 1326168404Spjd return (error); 1327168404Spjd} 1328168404Spjd 1329168404Spjd/*ARGSUSED*/ 1330168404Spjdstatic int 1331185029Spjdzfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1332185029Spjd caller_context_t *ct) 1333168404Spjd{ 1334168404Spjd znode_t *zp = VTOZ(vp); 1335168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1336168404Spjd int error; 1337168404Spjd 1338168404Spjd ZFS_ENTER(zfsvfs); 1339185029Spjd ZFS_VERIFY_ZP(zp); 1340185029Spjd 1341185029Spjd if (flag & V_ACE_MASK) 1342185029Spjd error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1343185029Spjd else 1344185029Spjd error = zfs_zaccess_rwx(zp, mode, flag, cr); 1345185029Spjd 1346168404Spjd ZFS_EXIT(zfsvfs); 1347168404Spjd return (error); 1348168404Spjd} 1349168404Spjd 1350168404Spjd/* 1351211932Smm * If vnode is for a device return a specfs vnode instead. 1352211932Smm */ 1353211932Smmstatic int 1354211932Smmspecvp_check(vnode_t **vpp, cred_t *cr) 1355211932Smm{ 1356211932Smm int error = 0; 1357211932Smm 1358211932Smm if (IS_DEVVP(*vpp)) { 1359211932Smm struct vnode *svp; 1360211932Smm 1361211932Smm svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1362211932Smm VN_RELE(*vpp); 1363211932Smm if (svp == NULL) 1364249195Smm error = SET_ERROR(ENOSYS); 1365211932Smm *vpp = svp; 1366211932Smm } 1367211932Smm return (error); 1368211932Smm} 1369211932Smm 1370211932Smm 1371211932Smm/* 1372168404Spjd * Lookup an entry in a directory, or an extended attribute directory. 1373168404Spjd * If it exists, return a held vnode reference for it. 1374168404Spjd * 1375168404Spjd * IN: dvp - vnode of directory to search. 1376168404Spjd * nm - name of entry to lookup. 1377168404Spjd * pnp - full pathname to lookup [UNUSED]. 1378168404Spjd * flags - LOOKUP_XATTR set if looking for an attribute. 1379168404Spjd * rdir - root directory vnode [UNUSED]. 1380168404Spjd * cr - credentials of caller. 1381185029Spjd * ct - caller context 1382185029Spjd * direntflags - directory lookup flags 1383185029Spjd * realpnp - returned pathname. 1384168404Spjd * 1385168404Spjd * OUT: vpp - vnode of located entry, NULL if not found. 1386168404Spjd * 1387251631Sdelphij * RETURN: 0 on success, error code on failure. 1388168404Spjd * 1389168404Spjd * Timestamps: 1390168404Spjd * NA 1391168404Spjd */ 1392168404Spjd/* ARGSUSED */ 1393168962Spjdstatic int 1394168962Spjdzfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1395185029Spjd int nameiop, cred_t *cr, kthread_t *td, int flags) 1396168404Spjd{ 1397168962Spjd znode_t *zdp = VTOZ(dvp); 1398168962Spjd zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1399211932Smm int error = 0; 1400185029Spjd int *direntflags = NULL; 1401185029Spjd void *realpnp = NULL; 1402168404Spjd 1403211932Smm /* fast path */ 1404211932Smm if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 1405211932Smm 1406211932Smm if (dvp->v_type != VDIR) { 1407249195Smm return (SET_ERROR(ENOTDIR)); 1408219089Spjd } else if (zdp->z_sa_hdl == NULL) { 1409249195Smm return (SET_ERROR(EIO)); 1410211932Smm } 1411211932Smm 1412211932Smm if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1413211932Smm error = zfs_fastaccesschk_execute(zdp, cr); 1414211932Smm if (!error) { 1415211932Smm *vpp = dvp; 1416211932Smm VN_HOLD(*vpp); 1417211932Smm return (0); 1418211932Smm } 1419211932Smm return (error); 1420211932Smm } else { 1421211932Smm vnode_t *tvp = dnlc_lookup(dvp, nm); 1422211932Smm 1423211932Smm if (tvp) { 1424211932Smm error = zfs_fastaccesschk_execute(zdp, cr); 1425211932Smm if (error) { 1426211932Smm VN_RELE(tvp); 1427211932Smm return (error); 1428211932Smm } 1429211932Smm if (tvp == DNLC_NO_VNODE) { 1430211932Smm VN_RELE(tvp); 1431249195Smm return (SET_ERROR(ENOENT)); 1432211932Smm } else { 1433211932Smm *vpp = tvp; 1434211932Smm return (specvp_check(vpp, cr)); 1435211932Smm } 1436211932Smm } 1437211932Smm } 1438211932Smm } 1439211932Smm 1440211932Smm DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1441211932Smm 1442168404Spjd ZFS_ENTER(zfsvfs); 1443185029Spjd ZFS_VERIFY_ZP(zdp); 1444168404Spjd 1445168404Spjd *vpp = NULL; 1446168404Spjd 1447185029Spjd if (flags & LOOKUP_XATTR) { 1448168404Spjd#ifdef TODO 1449168404Spjd /* 1450168404Spjd * If the xattr property is off, refuse the lookup request. 1451168404Spjd */ 1452168404Spjd if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1453168404Spjd ZFS_EXIT(zfsvfs); 1454249195Smm return (SET_ERROR(EINVAL)); 1455168404Spjd } 1456185029Spjd#endif 1457168404Spjd 1458168404Spjd /* 1459168404Spjd * We don't allow recursive attributes.. 1460168404Spjd * Maybe someday we will. 1461168404Spjd */ 1462219089Spjd if (zdp->z_pflags & ZFS_XATTR) { 1463168404Spjd ZFS_EXIT(zfsvfs); 1464249195Smm return (SET_ERROR(EINVAL)); 1465168404Spjd } 1466168404Spjd 1467168404Spjd if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1468168404Spjd ZFS_EXIT(zfsvfs); 1469168404Spjd return (error); 1470168404Spjd } 1471168404Spjd 1472168404Spjd /* 1473168404Spjd * Do we have permission to get into attribute directory? 1474168404Spjd */ 1475168404Spjd 1476185029Spjd if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1477185029Spjd B_FALSE, cr)) { 1478168404Spjd VN_RELE(*vpp); 1479185029Spjd *vpp = NULL; 1480168404Spjd } 1481168404Spjd 1482168404Spjd ZFS_EXIT(zfsvfs); 1483168404Spjd return (error); 1484168404Spjd } 1485168404Spjd 1486168404Spjd if (dvp->v_type != VDIR) { 1487168404Spjd ZFS_EXIT(zfsvfs); 1488249195Smm return (SET_ERROR(ENOTDIR)); 1489168404Spjd } 1490168404Spjd 1491168404Spjd /* 1492168404Spjd * Check accessibility of directory. 1493168404Spjd */ 1494168404Spjd 1495185029Spjd if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1496168404Spjd ZFS_EXIT(zfsvfs); 1497168404Spjd return (error); 1498168404Spjd } 1499168404Spjd 1500185029Spjd if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1501185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1502185029Spjd ZFS_EXIT(zfsvfs); 1503249195Smm return (SET_ERROR(EILSEQ)); 1504185029Spjd } 1505168404Spjd 1506185029Spjd error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1507211932Smm if (error == 0) 1508211932Smm error = specvp_check(vpp, cr); 1509168962Spjd 1510168404Spjd /* Translate errors and add SAVENAME when needed. */ 1511168404Spjd if (cnp->cn_flags & ISLASTCN) { 1512168404Spjd switch (nameiop) { 1513168404Spjd case CREATE: 1514168404Spjd case RENAME: 1515168404Spjd if (error == ENOENT) { 1516168404Spjd error = EJUSTRETURN; 1517168404Spjd cnp->cn_flags |= SAVENAME; 1518168404Spjd break; 1519168404Spjd } 1520168404Spjd /* FALLTHROUGH */ 1521168404Spjd case DELETE: 1522168404Spjd if (error == 0) 1523168404Spjd cnp->cn_flags |= SAVENAME; 1524168404Spjd break; 1525168404Spjd } 1526168404Spjd } 1527168404Spjd if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) { 1528169198Spjd int ltype = 0; 1529169198Spjd 1530169198Spjd if (cnp->cn_flags & ISDOTDOT) { 1531176559Sattilio ltype = VOP_ISLOCKED(dvp); 1532175294Sattilio VOP_UNLOCK(dvp, 0); 1533169198Spjd } 1534206667Spjd ZFS_EXIT(zfsvfs); 1535254711Savg error = vn_lock(*vpp, cnp->cn_lkflags); 1536168962Spjd if (cnp->cn_flags & ISDOTDOT) 1537175202Sattilio vn_lock(dvp, ltype | LK_RETRY); 1538169172Spjd if (error != 0) { 1539169172Spjd VN_RELE(*vpp); 1540169172Spjd *vpp = NULL; 1541169172Spjd return (error); 1542169172Spjd } 1543206667Spjd } else { 1544206667Spjd ZFS_EXIT(zfsvfs); 1545168404Spjd } 1546168404Spjd 1547168404Spjd#ifdef FREEBSD_NAMECACHE 1548168404Spjd /* 1549168404Spjd * Insert name into cache (as non-existent) if appropriate. 1550168404Spjd */ 1551168404Spjd if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 1552168404Spjd cache_enter(dvp, *vpp, cnp); 1553169170Spjd /* 1554169170Spjd * Insert name into cache if appropriate. 1555169170Spjd */ 1556168404Spjd if (error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1557168404Spjd if (!(cnp->cn_flags & ISLASTCN) || 1558168404Spjd (nameiop != DELETE && nameiop != RENAME)) { 1559168404Spjd cache_enter(dvp, *vpp, cnp); 1560168404Spjd } 1561168404Spjd } 1562168404Spjd#endif 1563168404Spjd 1564168404Spjd return (error); 1565168404Spjd} 1566168404Spjd 1567168404Spjd/* 1568168404Spjd * Attempt to create a new entry in a directory. If the entry 1569168404Spjd * already exists, truncate the file if permissible, else return 1570168404Spjd * an error. Return the vp of the created or trunc'd file. 1571168404Spjd * 1572168404Spjd * IN: dvp - vnode of directory to put new file entry in. 1573168404Spjd * name - name of new file entry. 1574168404Spjd * vap - attributes of new file. 1575168404Spjd * excl - flag indicating exclusive or non-exclusive mode. 1576168404Spjd * mode - mode to open file with. 1577168404Spjd * cr - credentials of caller. 1578168404Spjd * flag - large file flag [UNUSED]. 1579185029Spjd * ct - caller context 1580268464Sdelphij * vsecp - ACL to be set 1581168404Spjd * 1582168404Spjd * OUT: vpp - vnode of created or trunc'd entry. 1583168404Spjd * 1584251631Sdelphij * RETURN: 0 on success, error code on failure. 1585168404Spjd * 1586168404Spjd * Timestamps: 1587168404Spjd * dvp - ctime|mtime updated if new entry created 1588168404Spjd * vp - ctime|mtime always, atime if new 1589168404Spjd */ 1590185029Spjd 1591168404Spjd/* ARGSUSED */ 1592168404Spjdstatic int 1593168962Spjdzfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 1594185029Spjd vnode_t **vpp, cred_t *cr, kthread_t *td) 1595168404Spjd{ 1596168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1597168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1598185029Spjd zilog_t *zilog; 1599185029Spjd objset_t *os; 1600168404Spjd zfs_dirlock_t *dl; 1601168404Spjd dmu_tx_t *tx; 1602168404Spjd int error; 1603209962Smm ksid_t *ksid; 1604209962Smm uid_t uid; 1605209962Smm gid_t gid = crgetgid(cr); 1606219089Spjd zfs_acl_ids_t acl_ids; 1607209962Smm boolean_t fuid_dirtied; 1608219089Spjd boolean_t have_acl = B_FALSE; 1609258632Savg boolean_t waited = B_FALSE; 1610185029Spjd void *vsecp = NULL; 1611185029Spjd int flag = 0; 1612168404Spjd 1613185029Spjd /* 1614185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 1615185029Spjd * make sure file system is at proper version 1616185029Spjd */ 1617185029Spjd 1618209962Smm ksid = crgetsid(cr, KSID_OWNER); 1619209962Smm if (ksid) 1620209962Smm uid = ksid_getid(ksid); 1621209962Smm else 1622209962Smm uid = crgetuid(cr); 1623219089Spjd 1624185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 1625185029Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 1626219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1627249195Smm return (SET_ERROR(EINVAL)); 1628185029Spjd 1629168404Spjd ZFS_ENTER(zfsvfs); 1630185029Spjd ZFS_VERIFY_ZP(dzp); 1631185029Spjd os = zfsvfs->z_os; 1632185029Spjd zilog = zfsvfs->z_log; 1633168404Spjd 1634185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1635185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1636185029Spjd ZFS_EXIT(zfsvfs); 1637249195Smm return (SET_ERROR(EILSEQ)); 1638185029Spjd } 1639185029Spjd 1640185029Spjd if (vap->va_mask & AT_XVATTR) { 1641197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1642185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 1643185029Spjd ZFS_EXIT(zfsvfs); 1644185029Spjd return (error); 1645185029Spjd } 1646185029Spjd } 1647260704Savg 1648260704Savg getnewvnode_reserve(1); 1649260704Savg 1650168404Spjdtop: 1651168404Spjd *vpp = NULL; 1652168404Spjd 1653182905Strasz if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1654182905Strasz vap->va_mode &= ~S_ISVTX; 1655168404Spjd 1656168404Spjd if (*name == '\0') { 1657168404Spjd /* 1658168404Spjd * Null component name refers to the directory itself. 1659168404Spjd */ 1660168404Spjd VN_HOLD(dvp); 1661168404Spjd zp = dzp; 1662168404Spjd dl = NULL; 1663168404Spjd error = 0; 1664168404Spjd } else { 1665168404Spjd /* possible VN_HOLD(zp) */ 1666185029Spjd int zflg = 0; 1667185029Spjd 1668185029Spjd if (flag & FIGNORECASE) 1669185029Spjd zflg |= ZCILOOK; 1670185029Spjd 1671185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1672185029Spjd NULL, NULL); 1673185029Spjd if (error) { 1674219089Spjd if (have_acl) 1675219089Spjd zfs_acl_ids_free(&acl_ids); 1676168404Spjd if (strcmp(name, "..") == 0) 1677249195Smm error = SET_ERROR(EISDIR); 1678260704Savg getnewvnode_drop_reserve(); 1679168404Spjd ZFS_EXIT(zfsvfs); 1680168404Spjd return (error); 1681168404Spjd } 1682168404Spjd } 1683219089Spjd 1684185029Spjd if (zp == NULL) { 1685185029Spjd uint64_t txtype; 1686168404Spjd 1687168404Spjd /* 1688168404Spjd * Create a new file object and update the directory 1689168404Spjd * to reference it. 1690168404Spjd */ 1691185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1692219089Spjd if (have_acl) 1693219089Spjd zfs_acl_ids_free(&acl_ids); 1694168404Spjd goto out; 1695168404Spjd } 1696168404Spjd 1697168404Spjd /* 1698168404Spjd * We only support the creation of regular files in 1699168404Spjd * extended attribute directories. 1700168404Spjd */ 1701219089Spjd 1702219089Spjd if ((dzp->z_pflags & ZFS_XATTR) && 1703168404Spjd (vap->va_type != VREG)) { 1704219089Spjd if (have_acl) 1705219089Spjd zfs_acl_ids_free(&acl_ids); 1706249195Smm error = SET_ERROR(EINVAL); 1707168404Spjd goto out; 1708168404Spjd } 1709168404Spjd 1710219089Spjd if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, 1711219089Spjd cr, vsecp, &acl_ids)) != 0) 1712219089Spjd goto out; 1713219089Spjd have_acl = B_TRUE; 1714209962Smm 1715209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1716211932Smm zfs_acl_ids_free(&acl_ids); 1717249195Smm error = SET_ERROR(EDQUOT); 1718209962Smm goto out; 1719209962Smm } 1720209962Smm 1721168404Spjd tx = dmu_tx_create(os); 1722219089Spjd 1723219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1724219089Spjd ZFS_SA_BASE_ATTR_SIZE); 1725219089Spjd 1726209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 1727209962Smm if (fuid_dirtied) 1728209962Smm zfs_fuid_txhold(zfsvfs, tx); 1729168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1730219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 1731219089Spjd if (!zfsvfs->z_use_sa && 1732219089Spjd acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1733168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1734219089Spjd 0, acl_ids.z_aclp->z_acl_bytes); 1735185029Spjd } 1736258632Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 1737168404Spjd if (error) { 1738168404Spjd zfs_dirent_unlock(dl); 1739209962Smm if (error == ERESTART) { 1740258632Savg waited = B_TRUE; 1741168404Spjd dmu_tx_wait(tx); 1742168404Spjd dmu_tx_abort(tx); 1743168404Spjd goto top; 1744168404Spjd } 1745219089Spjd zfs_acl_ids_free(&acl_ids); 1746168404Spjd dmu_tx_abort(tx); 1747260704Savg getnewvnode_drop_reserve(); 1748168404Spjd ZFS_EXIT(zfsvfs); 1749168404Spjd return (error); 1750168404Spjd } 1751219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1752209962Smm 1753209962Smm if (fuid_dirtied) 1754209962Smm zfs_fuid_sync(zfsvfs, tx); 1755209962Smm 1756168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 1757185029Spjd txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1758185029Spjd if (flag & FIGNORECASE) 1759185029Spjd txtype |= TX_CI; 1760185029Spjd zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1761209962Smm vsecp, acl_ids.z_fuidp, vap); 1762209962Smm zfs_acl_ids_free(&acl_ids); 1763168404Spjd dmu_tx_commit(tx); 1764168404Spjd } else { 1765185029Spjd int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1766185029Spjd 1767219089Spjd if (have_acl) 1768219089Spjd zfs_acl_ids_free(&acl_ids); 1769219089Spjd have_acl = B_FALSE; 1770219089Spjd 1771168404Spjd /* 1772168404Spjd * A directory entry already exists for this name. 1773168404Spjd */ 1774168404Spjd /* 1775168962Spjd * Can't truncate an existing file if in exclusive mode. 1776168962Spjd */ 1777168962Spjd if (excl == EXCL) { 1778249195Smm error = SET_ERROR(EEXIST); 1779168962Spjd goto out; 1780168962Spjd } 1781168962Spjd /* 1782168404Spjd * Can't open a directory for writing. 1783168404Spjd */ 1784168404Spjd if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1785249195Smm error = SET_ERROR(EISDIR); 1786168404Spjd goto out; 1787168404Spjd } 1788168404Spjd /* 1789168404Spjd * Verify requested access to file. 1790168404Spjd */ 1791185029Spjd if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1792168404Spjd goto out; 1793168404Spjd } 1794168404Spjd 1795168404Spjd mutex_enter(&dzp->z_lock); 1796168404Spjd dzp->z_seq++; 1797168404Spjd mutex_exit(&dzp->z_lock); 1798168404Spjd 1799168404Spjd /* 1800168404Spjd * Truncate regular files if requested. 1801168404Spjd */ 1802168404Spjd if ((ZTOV(zp)->v_type == VREG) && 1803168404Spjd (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1804185029Spjd /* we can't hold any locks when calling zfs_freesp() */ 1805185029Spjd zfs_dirent_unlock(dl); 1806185029Spjd dl = NULL; 1807168404Spjd error = zfs_freesp(zp, 0, 0, mode, TRUE); 1808185029Spjd if (error == 0) { 1809185029Spjd vnevent_create(ZTOV(zp), ct); 1810168404Spjd } 1811168404Spjd } 1812168404Spjd } 1813168404Spjdout: 1814260704Savg getnewvnode_drop_reserve(); 1815168404Spjd if (dl) 1816168404Spjd zfs_dirent_unlock(dl); 1817168404Spjd 1818168404Spjd if (error) { 1819168404Spjd if (zp) 1820168404Spjd VN_RELE(ZTOV(zp)); 1821168962Spjd } else { 1822168962Spjd *vpp = ZTOV(zp); 1823211932Smm error = specvp_check(vpp, cr); 1824168404Spjd } 1825168404Spjd 1826219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1827219089Spjd zil_commit(zilog, 0); 1828219089Spjd 1829168404Spjd ZFS_EXIT(zfsvfs); 1830168404Spjd return (error); 1831168404Spjd} 1832168404Spjd 1833168404Spjd/* 1834168404Spjd * Remove an entry from a directory. 1835168404Spjd * 1836168404Spjd * IN: dvp - vnode of directory to remove entry from. 1837168404Spjd * name - name of entry to remove. 1838168404Spjd * cr - credentials of caller. 1839185029Spjd * ct - caller context 1840185029Spjd * flags - case flags 1841168404Spjd * 1842251631Sdelphij * RETURN: 0 on success, error code on failure. 1843168404Spjd * 1844168404Spjd * Timestamps: 1845168404Spjd * dvp - ctime|mtime 1846168404Spjd * vp - ctime (if nlink > 0) 1847168404Spjd */ 1848219089Spjd 1849219089Spjduint64_t null_xattr = 0; 1850219089Spjd 1851185029Spjd/*ARGSUSED*/ 1852168404Spjdstatic int 1853185029Spjdzfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1854185029Spjd int flags) 1855168404Spjd{ 1856168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1857219089Spjd znode_t *xzp; 1858168404Spjd vnode_t *vp; 1859168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1860185029Spjd zilog_t *zilog; 1861168962Spjd uint64_t acl_obj, xattr_obj; 1862268464Sdelphij uint64_t xattr_obj_unlinked = 0; 1863219089Spjd uint64_t obj = 0; 1864168404Spjd zfs_dirlock_t *dl; 1865168404Spjd dmu_tx_t *tx; 1866168962Spjd boolean_t may_delete_now, delete_now = FALSE; 1867185029Spjd boolean_t unlinked, toobig = FALSE; 1868185029Spjd uint64_t txtype; 1869185029Spjd pathname_t *realnmp = NULL; 1870185029Spjd pathname_t realnm; 1871168404Spjd int error; 1872185029Spjd int zflg = ZEXISTS; 1873258632Savg boolean_t waited = B_FALSE; 1874168404Spjd 1875168404Spjd ZFS_ENTER(zfsvfs); 1876185029Spjd ZFS_VERIFY_ZP(dzp); 1877185029Spjd zilog = zfsvfs->z_log; 1878168404Spjd 1879185029Spjd if (flags & FIGNORECASE) { 1880185029Spjd zflg |= ZCILOOK; 1881185029Spjd pn_alloc(&realnm); 1882185029Spjd realnmp = &realnm; 1883185029Spjd } 1884185029Spjd 1885168404Spjdtop: 1886219089Spjd xattr_obj = 0; 1887219089Spjd xzp = NULL; 1888168404Spjd /* 1889168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 1890168404Spjd */ 1891185029Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1892185029Spjd NULL, realnmp)) { 1893185029Spjd if (realnmp) 1894185029Spjd pn_free(realnmp); 1895168404Spjd ZFS_EXIT(zfsvfs); 1896168404Spjd return (error); 1897168404Spjd } 1898168404Spjd 1899168404Spjd vp = ZTOV(zp); 1900168404Spjd 1901168962Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1902168404Spjd goto out; 1903168962Spjd } 1904168404Spjd 1905168962Spjd /* 1906168962Spjd * Need to use rmdir for removing directories. 1907168962Spjd */ 1908168962Spjd if (vp->v_type == VDIR) { 1909249195Smm error = SET_ERROR(EPERM); 1910168962Spjd goto out; 1911168962Spjd } 1912168962Spjd 1913185029Spjd vnevent_remove(vp, dvp, name, ct); 1914168962Spjd 1915185029Spjd if (realnmp) 1916185029Spjd dnlc_remove(dvp, realnmp->pn_buf); 1917185029Spjd else 1918185029Spjd dnlc_remove(dvp, name); 1919168404Spjd 1920219089Spjd VI_LOCK(vp); 1921219089Spjd may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); 1922219089Spjd VI_UNLOCK(vp); 1923168962Spjd 1924168404Spjd /* 1925168404Spjd * We may delete the znode now, or we may put it in the unlinked set; 1926168404Spjd * it depends on whether we're the last link, and on whether there are 1927168404Spjd * other holds on the vnode. So we dmu_tx_hold() the right things to 1928168404Spjd * allow for either case. 1929168404Spjd */ 1930219089Spjd obj = zp->z_id; 1931168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1932168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1933219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1934219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1935219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 1936185029Spjd if (may_delete_now) { 1937185029Spjd toobig = 1938219089Spjd zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1939185029Spjd /* if the file is too big, only hold_free a token amount */ 1940185029Spjd dmu_tx_hold_free(tx, zp->z_id, 0, 1941185029Spjd (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1942185029Spjd } 1943168404Spjd 1944168404Spjd /* are there any extended attributes? */ 1945219089Spjd error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1946219089Spjd &xattr_obj, sizeof (xattr_obj)); 1947219089Spjd if (error == 0 && xattr_obj) { 1948219089Spjd error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1949240415Smm ASSERT0(error); 1950219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 1951219089Spjd dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1952168404Spjd } 1953168404Spjd 1954219089Spjd mutex_enter(&zp->z_lock); 1955219089Spjd if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) 1956168962Spjd dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 1957219089Spjd mutex_exit(&zp->z_lock); 1958168962Spjd 1959168404Spjd /* charge as an update -- would be nice not to charge at all */ 1960168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1961168404Spjd 1962268464Sdelphij /* 1963268464Sdelphij * Mark this transaction as typically resulting in a net free of 1964268464Sdelphij * space, unless object removal will be delayed indefinitely 1965268464Sdelphij * (due to active holds on the vnode due to the file being open). 1966268464Sdelphij */ 1967268464Sdelphij if (may_delete_now) 1968268464Sdelphij dmu_tx_mark_netfree(tx); 1969268464Sdelphij 1970258632Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 1971168404Spjd if (error) { 1972168404Spjd zfs_dirent_unlock(dl); 1973168962Spjd VN_RELE(vp); 1974219089Spjd if (xzp) 1975219089Spjd VN_RELE(ZTOV(xzp)); 1976209962Smm if (error == ERESTART) { 1977258632Savg waited = B_TRUE; 1978168404Spjd dmu_tx_wait(tx); 1979168404Spjd dmu_tx_abort(tx); 1980168404Spjd goto top; 1981168404Spjd } 1982185029Spjd if (realnmp) 1983185029Spjd pn_free(realnmp); 1984168404Spjd dmu_tx_abort(tx); 1985168404Spjd ZFS_EXIT(zfsvfs); 1986168404Spjd return (error); 1987168404Spjd } 1988168404Spjd 1989168404Spjd /* 1990168404Spjd * Remove the directory entry. 1991168404Spjd */ 1992185029Spjd error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1993168404Spjd 1994168404Spjd if (error) { 1995168404Spjd dmu_tx_commit(tx); 1996168404Spjd goto out; 1997168404Spjd } 1998168404Spjd 1999219089Spjd if (unlinked) { 2000219089Spjd /* 2001219089Spjd * Hold z_lock so that we can make sure that the ACL obj 2002219089Spjd * hasn't changed. Could have been deleted due to 2003219089Spjd * zfs_sa_upgrade(). 2004219089Spjd */ 2005219089Spjd mutex_enter(&zp->z_lock); 2006168962Spjd VI_LOCK(vp); 2007219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 2008219089Spjd &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); 2009185029Spjd delete_now = may_delete_now && !toobig && 2010168962Spjd vp->v_count == 1 && !vn_has_cached_data(vp) && 2011219089Spjd xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == 2012219089Spjd acl_obj; 2013168962Spjd VI_UNLOCK(vp); 2014168962Spjd } 2015168962Spjd 2016168962Spjd if (delete_now) { 2017243270Savg#ifdef __FreeBSD__ 2018243270Savg panic("zfs_remove: delete_now branch taken"); 2019243270Savg#endif 2020219089Spjd if (xattr_obj_unlinked) { 2021219089Spjd ASSERT3U(xzp->z_links, ==, 2); 2022168962Spjd mutex_enter(&xzp->z_lock); 2023168962Spjd xzp->z_unlinked = 1; 2024219089Spjd xzp->z_links = 0; 2025219089Spjd error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 2026219089Spjd &xzp->z_links, sizeof (xzp->z_links), tx); 2027219089Spjd ASSERT3U(error, ==, 0); 2028168962Spjd mutex_exit(&xzp->z_lock); 2029168962Spjd zfs_unlinked_add(xzp, tx); 2030219089Spjd 2031219089Spjd if (zp->z_is_sa) 2032219089Spjd error = sa_remove(zp->z_sa_hdl, 2033219089Spjd SA_ZPL_XATTR(zfsvfs), tx); 2034219089Spjd else 2035219089Spjd error = sa_update(zp->z_sa_hdl, 2036219089Spjd SA_ZPL_XATTR(zfsvfs), &null_xattr, 2037219089Spjd sizeof (uint64_t), tx); 2038240415Smm ASSERT0(error); 2039168962Spjd } 2040168962Spjd VI_LOCK(vp); 2041168962Spjd vp->v_count--; 2042240415Smm ASSERT0(vp->v_count); 2043168962Spjd VI_UNLOCK(vp); 2044168962Spjd mutex_exit(&zp->z_lock); 2045168962Spjd zfs_znode_delete(zp, tx); 2046168962Spjd } else if (unlinked) { 2047219089Spjd mutex_exit(&zp->z_lock); 2048168404Spjd zfs_unlinked_add(zp, tx); 2049243268Savg#ifdef __FreeBSD__ 2050243268Savg vp->v_vflag |= VV_NOSYNC; 2051243268Savg#endif 2052168962Spjd } 2053168404Spjd 2054185029Spjd txtype = TX_REMOVE; 2055185029Spjd if (flags & FIGNORECASE) 2056185029Spjd txtype |= TX_CI; 2057219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 2058168404Spjd 2059168404Spjd dmu_tx_commit(tx); 2060168404Spjdout: 2061185029Spjd if (realnmp) 2062185029Spjd pn_free(realnmp); 2063185029Spjd 2064168404Spjd zfs_dirent_unlock(dl); 2065168404Spjd 2066219089Spjd if (!delete_now) 2067168962Spjd VN_RELE(vp); 2068219089Spjd if (xzp) 2069168962Spjd VN_RELE(ZTOV(xzp)); 2070168962Spjd 2071219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2072219089Spjd zil_commit(zilog, 0); 2073219089Spjd 2074168404Spjd ZFS_EXIT(zfsvfs); 2075168404Spjd return (error); 2076168404Spjd} 2077168404Spjd 2078168404Spjd/* 2079168404Spjd * Create a new directory and insert it into dvp using the name 2080168404Spjd * provided. Return a pointer to the inserted directory. 2081168404Spjd * 2082168404Spjd * IN: dvp - vnode of directory to add subdir to. 2083168404Spjd * dirname - name of new directory. 2084168404Spjd * vap - attributes of new directory. 2085168404Spjd * cr - credentials of caller. 2086185029Spjd * ct - caller context 2087251631Sdelphij * flags - case flags 2088185029Spjd * vsecp - ACL to be set 2089168404Spjd * 2090168404Spjd * OUT: vpp - vnode of created directory. 2091168404Spjd * 2092251631Sdelphij * RETURN: 0 on success, error code on failure. 2093168404Spjd * 2094168404Spjd * Timestamps: 2095168404Spjd * dvp - ctime|mtime updated 2096168404Spjd * vp - ctime|mtime|atime updated 2097168404Spjd */ 2098185029Spjd/*ARGSUSED*/ 2099168404Spjdstatic int 2100185029Spjdzfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 2101185029Spjd caller_context_t *ct, int flags, vsecattr_t *vsecp) 2102168404Spjd{ 2103168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 2104168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2105185029Spjd zilog_t *zilog; 2106168404Spjd zfs_dirlock_t *dl; 2107185029Spjd uint64_t txtype; 2108168404Spjd dmu_tx_t *tx; 2109168404Spjd int error; 2110185029Spjd int zf = ZNEW; 2111209962Smm ksid_t *ksid; 2112209962Smm uid_t uid; 2113209962Smm gid_t gid = crgetgid(cr); 2114219089Spjd zfs_acl_ids_t acl_ids; 2115209962Smm boolean_t fuid_dirtied; 2116258632Savg boolean_t waited = B_FALSE; 2117168404Spjd 2118168404Spjd ASSERT(vap->va_type == VDIR); 2119168404Spjd 2120185029Spjd /* 2121185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 2122185029Spjd * make sure file system is at proper version 2123185029Spjd */ 2124185029Spjd 2125209962Smm ksid = crgetsid(cr, KSID_OWNER); 2126209962Smm if (ksid) 2127209962Smm uid = ksid_getid(ksid); 2128209962Smm else 2129209962Smm uid = crgetuid(cr); 2130185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2131219089Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 2132219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2133249195Smm return (SET_ERROR(EINVAL)); 2134185029Spjd 2135168404Spjd ZFS_ENTER(zfsvfs); 2136185029Spjd ZFS_VERIFY_ZP(dzp); 2137185029Spjd zilog = zfsvfs->z_log; 2138168404Spjd 2139219089Spjd if (dzp->z_pflags & ZFS_XATTR) { 2140168404Spjd ZFS_EXIT(zfsvfs); 2141249195Smm return (SET_ERROR(EINVAL)); 2142168404Spjd } 2143168404Spjd 2144185029Spjd if (zfsvfs->z_utf8 && u8_validate(dirname, 2145185029Spjd strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2146185029Spjd ZFS_EXIT(zfsvfs); 2147249195Smm return (SET_ERROR(EILSEQ)); 2148185029Spjd } 2149185029Spjd if (flags & FIGNORECASE) 2150185029Spjd zf |= ZCILOOK; 2151185029Spjd 2152219089Spjd if (vap->va_mask & AT_XVATTR) { 2153197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2154185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 2155185029Spjd ZFS_EXIT(zfsvfs); 2156185029Spjd return (error); 2157185029Spjd } 2158219089Spjd } 2159185029Spjd 2160219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 2161219089Spjd vsecp, &acl_ids)) != 0) { 2162219089Spjd ZFS_EXIT(zfsvfs); 2163219089Spjd return (error); 2164219089Spjd } 2165260704Savg 2166260704Savg getnewvnode_reserve(1); 2167260704Savg 2168168404Spjd /* 2169168404Spjd * First make sure the new directory doesn't exist. 2170219089Spjd * 2171219089Spjd * Existence is checked first to make sure we don't return 2172219089Spjd * EACCES instead of EEXIST which can cause some applications 2173219089Spjd * to fail. 2174168404Spjd */ 2175185029Spjdtop: 2176185029Spjd *vpp = NULL; 2177185029Spjd 2178185029Spjd if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 2179185029Spjd NULL, NULL)) { 2180219089Spjd zfs_acl_ids_free(&acl_ids); 2181260704Savg getnewvnode_drop_reserve(); 2182168404Spjd ZFS_EXIT(zfsvfs); 2183168404Spjd return (error); 2184168404Spjd } 2185168404Spjd 2186185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 2187219089Spjd zfs_acl_ids_free(&acl_ids); 2188168404Spjd zfs_dirent_unlock(dl); 2189260704Savg getnewvnode_drop_reserve(); 2190168404Spjd ZFS_EXIT(zfsvfs); 2191168404Spjd return (error); 2192168404Spjd } 2193168404Spjd 2194209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2195211932Smm zfs_acl_ids_free(&acl_ids); 2196209962Smm zfs_dirent_unlock(dl); 2197260704Savg getnewvnode_drop_reserve(); 2198209962Smm ZFS_EXIT(zfsvfs); 2199249195Smm return (SET_ERROR(EDQUOT)); 2200209962Smm } 2201209962Smm 2202168404Spjd /* 2203168404Spjd * Add a new entry to the directory. 2204168404Spjd */ 2205168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2206168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 2207168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2208209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 2209209962Smm if (fuid_dirtied) 2210209962Smm zfs_fuid_txhold(zfsvfs, tx); 2211219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2212219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2213219089Spjd acl_ids.z_aclp->z_acl_bytes); 2214219089Spjd } 2215219089Spjd 2216219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2217219089Spjd ZFS_SA_BASE_ATTR_SIZE); 2218219089Spjd 2219258632Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 2220168404Spjd if (error) { 2221168404Spjd zfs_dirent_unlock(dl); 2222209962Smm if (error == ERESTART) { 2223258632Savg waited = B_TRUE; 2224168404Spjd dmu_tx_wait(tx); 2225168404Spjd dmu_tx_abort(tx); 2226168404Spjd goto top; 2227168404Spjd } 2228219089Spjd zfs_acl_ids_free(&acl_ids); 2229168404Spjd dmu_tx_abort(tx); 2230260704Savg getnewvnode_drop_reserve(); 2231168404Spjd ZFS_EXIT(zfsvfs); 2232168404Spjd return (error); 2233168404Spjd } 2234168404Spjd 2235168404Spjd /* 2236168404Spjd * Create new node. 2237168404Spjd */ 2238219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2239168404Spjd 2240209962Smm if (fuid_dirtied) 2241209962Smm zfs_fuid_sync(zfsvfs, tx); 2242219089Spjd 2243168404Spjd /* 2244168404Spjd * Now put new name in parent dir. 2245168404Spjd */ 2246168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 2247168404Spjd 2248168404Spjd *vpp = ZTOV(zp); 2249168404Spjd 2250185029Spjd txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 2251185029Spjd if (flags & FIGNORECASE) 2252185029Spjd txtype |= TX_CI; 2253209962Smm zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 2254209962Smm acl_ids.z_fuidp, vap); 2255185029Spjd 2256209962Smm zfs_acl_ids_free(&acl_ids); 2257219089Spjd 2258168404Spjd dmu_tx_commit(tx); 2259168404Spjd 2260260704Savg getnewvnode_drop_reserve(); 2261260704Savg 2262168404Spjd zfs_dirent_unlock(dl); 2263168404Spjd 2264219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2265219089Spjd zil_commit(zilog, 0); 2266219089Spjd 2267168404Spjd ZFS_EXIT(zfsvfs); 2268168404Spjd return (0); 2269168404Spjd} 2270168404Spjd 2271168404Spjd/* 2272168404Spjd * Remove a directory subdir entry. If the current working 2273168404Spjd * directory is the same as the subdir to be removed, the 2274168404Spjd * remove will fail. 2275168404Spjd * 2276168404Spjd * IN: dvp - vnode of directory to remove from. 2277168404Spjd * name - name of directory to be removed. 2278168404Spjd * cwd - vnode of current working directory. 2279168404Spjd * cr - credentials of caller. 2280185029Spjd * ct - caller context 2281185029Spjd * flags - case flags 2282168404Spjd * 2283251631Sdelphij * RETURN: 0 on success, error code on failure. 2284168404Spjd * 2285168404Spjd * Timestamps: 2286168404Spjd * dvp - ctime|mtime updated 2287168404Spjd */ 2288185029Spjd/*ARGSUSED*/ 2289168404Spjdstatic int 2290185029Spjdzfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 2291185029Spjd caller_context_t *ct, int flags) 2292168404Spjd{ 2293168404Spjd znode_t *dzp = VTOZ(dvp); 2294168404Spjd znode_t *zp; 2295168404Spjd vnode_t *vp; 2296168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2297185029Spjd zilog_t *zilog; 2298168404Spjd zfs_dirlock_t *dl; 2299168404Spjd dmu_tx_t *tx; 2300168404Spjd int error; 2301185029Spjd int zflg = ZEXISTS; 2302258632Savg boolean_t waited = B_FALSE; 2303168404Spjd 2304168962Spjd ZFS_ENTER(zfsvfs); 2305185029Spjd ZFS_VERIFY_ZP(dzp); 2306185029Spjd zilog = zfsvfs->z_log; 2307168404Spjd 2308185029Spjd if (flags & FIGNORECASE) 2309185029Spjd zflg |= ZCILOOK; 2310168404Spjdtop: 2311168404Spjd zp = NULL; 2312168404Spjd 2313168404Spjd /* 2314168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 2315168404Spjd */ 2316185029Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 2317185029Spjd NULL, NULL)) { 2318168404Spjd ZFS_EXIT(zfsvfs); 2319168404Spjd return (error); 2320168404Spjd } 2321168404Spjd 2322168404Spjd vp = ZTOV(zp); 2323168404Spjd 2324168404Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2325168404Spjd goto out; 2326168404Spjd } 2327168404Spjd 2328168962Spjd if (vp->v_type != VDIR) { 2329249195Smm error = SET_ERROR(ENOTDIR); 2330168962Spjd goto out; 2331168962Spjd } 2332168962Spjd 2333168962Spjd if (vp == cwd) { 2334249195Smm error = SET_ERROR(EINVAL); 2335168962Spjd goto out; 2336168962Spjd } 2337168962Spjd 2338185029Spjd vnevent_rmdir(vp, dvp, name, ct); 2339168962Spjd 2340168404Spjd /* 2341168404Spjd * Grab a lock on the directory to make sure that noone is 2342168404Spjd * trying to add (or lookup) entries while we are removing it. 2343168404Spjd */ 2344168404Spjd rw_enter(&zp->z_name_lock, RW_WRITER); 2345168404Spjd 2346168404Spjd /* 2347168404Spjd * Grab a lock on the parent pointer to make sure we play well 2348168404Spjd * with the treewalk and directory rename code. 2349168404Spjd */ 2350168404Spjd rw_enter(&zp->z_parent_lock, RW_WRITER); 2351168404Spjd 2352168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2353168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2354219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2355168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2356219089Spjd zfs_sa_upgrade_txholds(tx, zp); 2357219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 2358258632Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 2359168404Spjd if (error) { 2360168404Spjd rw_exit(&zp->z_parent_lock); 2361168404Spjd rw_exit(&zp->z_name_lock); 2362168404Spjd zfs_dirent_unlock(dl); 2363168962Spjd VN_RELE(vp); 2364209962Smm if (error == ERESTART) { 2365258632Savg waited = B_TRUE; 2366168404Spjd dmu_tx_wait(tx); 2367168404Spjd dmu_tx_abort(tx); 2368168404Spjd goto top; 2369168404Spjd } 2370168404Spjd dmu_tx_abort(tx); 2371168404Spjd ZFS_EXIT(zfsvfs); 2372168404Spjd return (error); 2373168404Spjd } 2374168404Spjd 2375168404Spjd#ifdef FREEBSD_NAMECACHE 2376168404Spjd cache_purge(dvp); 2377168404Spjd#endif 2378168404Spjd 2379185029Spjd error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 2380168404Spjd 2381185029Spjd if (error == 0) { 2382185029Spjd uint64_t txtype = TX_RMDIR; 2383185029Spjd if (flags & FIGNORECASE) 2384185029Spjd txtype |= TX_CI; 2385219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2386185029Spjd } 2387168404Spjd 2388168404Spjd dmu_tx_commit(tx); 2389168404Spjd 2390168404Spjd rw_exit(&zp->z_parent_lock); 2391168404Spjd rw_exit(&zp->z_name_lock); 2392168404Spjd#ifdef FREEBSD_NAMECACHE 2393168404Spjd cache_purge(vp); 2394168404Spjd#endif 2395168404Spjdout: 2396168404Spjd zfs_dirent_unlock(dl); 2397168404Spjd 2398168962Spjd VN_RELE(vp); 2399168962Spjd 2400219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2401219089Spjd zil_commit(zilog, 0); 2402219089Spjd 2403168404Spjd ZFS_EXIT(zfsvfs); 2404168404Spjd return (error); 2405168404Spjd} 2406168404Spjd 2407168404Spjd/* 2408168404Spjd * Read as many directory entries as will fit into the provided 2409168404Spjd * buffer from the given directory cursor position (specified in 2410251631Sdelphij * the uio structure). 2411168404Spjd * 2412168404Spjd * IN: vp - vnode of directory to read. 2413168404Spjd * uio - structure supplying read location, range info, 2414168404Spjd * and return buffer. 2415168404Spjd * cr - credentials of caller. 2416185029Spjd * ct - caller context 2417185029Spjd * flags - case flags 2418168404Spjd * 2419168404Spjd * OUT: uio - updated offset and range, buffer filled. 2420168404Spjd * eofp - set to true if end-of-file detected. 2421168404Spjd * 2422251631Sdelphij * RETURN: 0 on success, error code on failure. 2423168404Spjd * 2424168404Spjd * Timestamps: 2425168404Spjd * vp - atime updated 2426168404Spjd * 2427168404Spjd * Note that the low 4 bits of the cookie returned by zap is always zero. 2428168404Spjd * This allows us to use the low range for "special" directory entries: 2429168404Spjd * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2430168404Spjd * we use the offset 2 for the '.zfs' directory. 2431168404Spjd */ 2432168404Spjd/* ARGSUSED */ 2433168404Spjdstatic int 2434168962Spjdzfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) 2435168404Spjd{ 2436168404Spjd znode_t *zp = VTOZ(vp); 2437168404Spjd iovec_t *iovp; 2438185029Spjd edirent_t *eodp; 2439168404Spjd dirent64_t *odp; 2440168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2441168404Spjd objset_t *os; 2442168404Spjd caddr_t outbuf; 2443168404Spjd size_t bufsize; 2444168404Spjd zap_cursor_t zc; 2445168404Spjd zap_attribute_t zap; 2446168404Spjd uint_t bytes_wanted; 2447168404Spjd uint64_t offset; /* must be unsigned; checks for < 1 */ 2448219089Spjd uint64_t parent; 2449168404Spjd int local_eof; 2450168404Spjd int outcount; 2451168404Spjd int error; 2452168404Spjd uint8_t prefetch; 2453185029Spjd boolean_t check_sysattrs; 2454168404Spjd uint8_t type; 2455168962Spjd int ncooks; 2456168962Spjd u_long *cooks = NULL; 2457185029Spjd int flags = 0; 2458168404Spjd 2459168404Spjd ZFS_ENTER(zfsvfs); 2460185029Spjd ZFS_VERIFY_ZP(zp); 2461168404Spjd 2462219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 2463219089Spjd &parent, sizeof (parent))) != 0) { 2464219089Spjd ZFS_EXIT(zfsvfs); 2465219089Spjd return (error); 2466219089Spjd } 2467219089Spjd 2468168404Spjd /* 2469168404Spjd * If we are not given an eof variable, 2470168404Spjd * use a local one. 2471168404Spjd */ 2472168404Spjd if (eofp == NULL) 2473168404Spjd eofp = &local_eof; 2474168404Spjd 2475168404Spjd /* 2476168404Spjd * Check for valid iov_len. 2477168404Spjd */ 2478168404Spjd if (uio->uio_iov->iov_len <= 0) { 2479168404Spjd ZFS_EXIT(zfsvfs); 2480249195Smm return (SET_ERROR(EINVAL)); 2481168404Spjd } 2482168404Spjd 2483168404Spjd /* 2484168404Spjd * Quit if directory has been removed (posix) 2485168404Spjd */ 2486168404Spjd if ((*eofp = zp->z_unlinked) != 0) { 2487168404Spjd ZFS_EXIT(zfsvfs); 2488168404Spjd return (0); 2489168404Spjd } 2490168404Spjd 2491168404Spjd error = 0; 2492168404Spjd os = zfsvfs->z_os; 2493168404Spjd offset = uio->uio_loffset; 2494168404Spjd prefetch = zp->z_zn_prefetch; 2495168404Spjd 2496168404Spjd /* 2497168404Spjd * Initialize the iterator cursor. 2498168404Spjd */ 2499168404Spjd if (offset <= 3) { 2500168404Spjd /* 2501168404Spjd * Start iteration from the beginning of the directory. 2502168404Spjd */ 2503168404Spjd zap_cursor_init(&zc, os, zp->z_id); 2504168404Spjd } else { 2505168404Spjd /* 2506168404Spjd * The offset is a serialized cursor. 2507168404Spjd */ 2508168404Spjd zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2509168404Spjd } 2510168404Spjd 2511168404Spjd /* 2512168404Spjd * Get space to change directory entries into fs independent format. 2513168404Spjd */ 2514168404Spjd iovp = uio->uio_iov; 2515168404Spjd bytes_wanted = iovp->iov_len; 2516168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2517168404Spjd bufsize = bytes_wanted; 2518168404Spjd outbuf = kmem_alloc(bufsize, KM_SLEEP); 2519168404Spjd odp = (struct dirent64 *)outbuf; 2520168404Spjd } else { 2521168404Spjd bufsize = bytes_wanted; 2522247187Smm outbuf = NULL; 2523168404Spjd odp = (struct dirent64 *)iovp->iov_base; 2524168404Spjd } 2525185029Spjd eodp = (struct edirent *)odp; 2526168404Spjd 2527169170Spjd if (ncookies != NULL) { 2528168404Spjd /* 2529168404Spjd * Minimum entry size is dirent size and 1 byte for a file name. 2530168404Spjd */ 2531168962Spjd ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2532219404Spjd cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2533219404Spjd *cookies = cooks; 2534168962Spjd *ncookies = ncooks; 2535168404Spjd } 2536185029Spjd /* 2537185029Spjd * If this VFS supports the system attribute view interface; and 2538185029Spjd * we're looking at an extended attribute directory; and we care 2539185029Spjd * about normalization conflicts on this vfs; then we must check 2540185029Spjd * for normalization conflicts with the sysattr name space. 2541185029Spjd */ 2542185029Spjd#ifdef TODO 2543185029Spjd check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2544185029Spjd (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2545185029Spjd (flags & V_RDDIR_ENTFLAGS); 2546185029Spjd#else 2547185029Spjd check_sysattrs = 0; 2548185029Spjd#endif 2549168404Spjd 2550168404Spjd /* 2551168404Spjd * Transform to file-system independent format 2552168404Spjd */ 2553168404Spjd outcount = 0; 2554168404Spjd while (outcount < bytes_wanted) { 2555168404Spjd ino64_t objnum; 2556168404Spjd ushort_t reclen; 2557219089Spjd off64_t *next = NULL; 2558168404Spjd 2559168404Spjd /* 2560168404Spjd * Special case `.', `..', and `.zfs'. 2561168404Spjd */ 2562168404Spjd if (offset == 0) { 2563168404Spjd (void) strcpy(zap.za_name, "."); 2564185029Spjd zap.za_normalization_conflict = 0; 2565168404Spjd objnum = zp->z_id; 2566169108Spjd type = DT_DIR; 2567168404Spjd } else if (offset == 1) { 2568168404Spjd (void) strcpy(zap.za_name, ".."); 2569185029Spjd zap.za_normalization_conflict = 0; 2570219089Spjd objnum = parent; 2571169108Spjd type = DT_DIR; 2572168404Spjd } else if (offset == 2 && zfs_show_ctldir(zp)) { 2573168404Spjd (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2574185029Spjd zap.za_normalization_conflict = 0; 2575168404Spjd objnum = ZFSCTL_INO_ROOT; 2576169108Spjd type = DT_DIR; 2577168404Spjd } else { 2578168404Spjd /* 2579168404Spjd * Grab next entry. 2580168404Spjd */ 2581168404Spjd if (error = zap_cursor_retrieve(&zc, &zap)) { 2582168404Spjd if ((*eofp = (error == ENOENT)) != 0) 2583168404Spjd break; 2584168404Spjd else 2585168404Spjd goto update; 2586168404Spjd } 2587168404Spjd 2588168404Spjd if (zap.za_integer_length != 8 || 2589168404Spjd zap.za_num_integers != 1) { 2590168404Spjd cmn_err(CE_WARN, "zap_readdir: bad directory " 2591168404Spjd "entry, obj = %lld, offset = %lld\n", 2592168404Spjd (u_longlong_t)zp->z_id, 2593168404Spjd (u_longlong_t)offset); 2594249195Smm error = SET_ERROR(ENXIO); 2595168404Spjd goto update; 2596168404Spjd } 2597168404Spjd 2598168404Spjd objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2599168404Spjd /* 2600168404Spjd * MacOS X can extract the object type here such as: 2601168404Spjd * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2602168404Spjd */ 2603168404Spjd type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2604185029Spjd 2605185029Spjd if (check_sysattrs && !zap.za_normalization_conflict) { 2606185029Spjd#ifdef TODO 2607185029Spjd zap.za_normalization_conflict = 2608185029Spjd xattr_sysattr_casechk(zap.za_name); 2609185029Spjd#else 2610185029Spjd panic("%s:%u: TODO", __func__, __LINE__); 2611185029Spjd#endif 2612185029Spjd } 2613168404Spjd } 2614168404Spjd 2615211932Smm if (flags & V_RDDIR_ACCFILTER) { 2616211932Smm /* 2617211932Smm * If we have no access at all, don't include 2618211932Smm * this entry in the returned information 2619211932Smm */ 2620211932Smm znode_t *ezp; 2621211932Smm if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2622211932Smm goto skip_entry; 2623211932Smm if (!zfs_has_access(ezp, cr)) { 2624211932Smm VN_RELE(ZTOV(ezp)); 2625211932Smm goto skip_entry; 2626211932Smm } 2627211932Smm VN_RELE(ZTOV(ezp)); 2628211932Smm } 2629211932Smm 2630185029Spjd if (flags & V_RDDIR_ENTFLAGS) 2631185029Spjd reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2632185029Spjd else 2633185029Spjd reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2634185029Spjd 2635168404Spjd /* 2636168404Spjd * Will this entry fit in the buffer? 2637168404Spjd */ 2638168404Spjd if (outcount + reclen > bufsize) { 2639168404Spjd /* 2640168404Spjd * Did we manage to fit anything in the buffer? 2641168404Spjd */ 2642168404Spjd if (!outcount) { 2643249195Smm error = SET_ERROR(EINVAL); 2644168404Spjd goto update; 2645168404Spjd } 2646168404Spjd break; 2647168404Spjd } 2648185029Spjd if (flags & V_RDDIR_ENTFLAGS) { 2649185029Spjd /* 2650185029Spjd * Add extended flag entry: 2651185029Spjd */ 2652185029Spjd eodp->ed_ino = objnum; 2653185029Spjd eodp->ed_reclen = reclen; 2654185029Spjd /* NOTE: ed_off is the offset for the *next* entry */ 2655185029Spjd next = &(eodp->ed_off); 2656185029Spjd eodp->ed_eflags = zap.za_normalization_conflict ? 2657185029Spjd ED_CASE_CONFLICT : 0; 2658185029Spjd (void) strncpy(eodp->ed_name, zap.za_name, 2659185029Spjd EDIRENT_NAMELEN(reclen)); 2660185029Spjd eodp = (edirent_t *)((intptr_t)eodp + reclen); 2661185029Spjd } else { 2662185029Spjd /* 2663185029Spjd * Add normal entry: 2664185029Spjd */ 2665185029Spjd odp->d_ino = objnum; 2666185029Spjd odp->d_reclen = reclen; 2667185029Spjd odp->d_namlen = strlen(zap.za_name); 2668185029Spjd (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2669185029Spjd odp->d_type = type; 2670185029Spjd odp = (dirent64_t *)((intptr_t)odp + reclen); 2671185029Spjd } 2672168404Spjd outcount += reclen; 2673168404Spjd 2674168404Spjd ASSERT(outcount <= bufsize); 2675168404Spjd 2676168404Spjd /* Prefetch znode */ 2677168404Spjd if (prefetch) 2678168404Spjd dmu_prefetch(os, objnum, 0, 0); 2679168404Spjd 2680211932Smm skip_entry: 2681168404Spjd /* 2682168404Spjd * Move to the next entry, fill in the previous offset. 2683168404Spjd */ 2684168404Spjd if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2685168404Spjd zap_cursor_advance(&zc); 2686168404Spjd offset = zap_cursor_serialize(&zc); 2687168404Spjd } else { 2688168404Spjd offset += 1; 2689168404Spjd } 2690219404Spjd 2691219404Spjd if (cooks != NULL) { 2692219404Spjd *cooks++ = offset; 2693219404Spjd ncooks--; 2694219404Spjd KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2695219404Spjd } 2696168404Spjd } 2697168404Spjd zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2698168404Spjd 2699168404Spjd /* Subtract unused cookies */ 2700168962Spjd if (ncookies != NULL) 2701168962Spjd *ncookies -= ncooks; 2702168404Spjd 2703168404Spjd if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2704168404Spjd iovp->iov_base += outcount; 2705168404Spjd iovp->iov_len -= outcount; 2706168404Spjd uio->uio_resid -= outcount; 2707168404Spjd } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2708168404Spjd /* 2709168404Spjd * Reset the pointer. 2710168404Spjd */ 2711168404Spjd offset = uio->uio_loffset; 2712168404Spjd } 2713168404Spjd 2714168404Spjdupdate: 2715168404Spjd zap_cursor_fini(&zc); 2716168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2717168404Spjd kmem_free(outbuf, bufsize); 2718168404Spjd 2719168404Spjd if (error == ENOENT) 2720168404Spjd error = 0; 2721168404Spjd 2722168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2723168404Spjd 2724168404Spjd uio->uio_loffset = offset; 2725168404Spjd ZFS_EXIT(zfsvfs); 2726169107Spjd if (error != 0 && cookies != NULL) { 2727168962Spjd free(*cookies, M_TEMP); 2728168962Spjd *cookies = NULL; 2729168962Spjd *ncookies = 0; 2730168404Spjd } 2731168404Spjd return (error); 2732168404Spjd} 2733168404Spjd 2734185029Spjdulong_t zfs_fsync_sync_cnt = 4; 2735185029Spjd 2736168404Spjdstatic int 2737185029Spjdzfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2738168404Spjd{ 2739168962Spjd znode_t *zp = VTOZ(vp); 2740168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2741168404Spjd 2742185029Spjd (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2743185029Spjd 2744219089Spjd if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 2745219089Spjd ZFS_ENTER(zfsvfs); 2746219089Spjd ZFS_VERIFY_ZP(zp); 2747219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 2748219089Spjd ZFS_EXIT(zfsvfs); 2749219089Spjd } 2750168404Spjd return (0); 2751168404Spjd} 2752168404Spjd 2753185029Spjd 2754168404Spjd/* 2755168404Spjd * Get the requested file attributes and place them in the provided 2756168404Spjd * vattr structure. 2757168404Spjd * 2758168404Spjd * IN: vp - vnode of file. 2759168404Spjd * vap - va_mask identifies requested attributes. 2760185029Spjd * If AT_XVATTR set, then optional attrs are requested 2761185029Spjd * flags - ATTR_NOACLCHECK (CIFS server context) 2762168404Spjd * cr - credentials of caller. 2763185029Spjd * ct - caller context 2764168404Spjd * 2765168404Spjd * OUT: vap - attribute values. 2766168404Spjd * 2767251631Sdelphij * RETURN: 0 (always succeeds). 2768168404Spjd */ 2769168404Spjd/* ARGSUSED */ 2770168404Spjdstatic int 2771185029Spjdzfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2772185029Spjd caller_context_t *ct) 2773168404Spjd{ 2774168962Spjd znode_t *zp = VTOZ(vp); 2775168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2776185029Spjd int error = 0; 2777168962Spjd uint32_t blksize; 2778168962Spjd u_longlong_t nblocks; 2779185029Spjd uint64_t links; 2780224251Sdelphij uint64_t mtime[2], ctime[2], crtime[2], rdev; 2781185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2782185029Spjd xoptattr_t *xoap = NULL; 2783185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2784224251Sdelphij sa_bulk_attr_t bulk[4]; 2785219089Spjd int count = 0; 2786168404Spjd 2787168404Spjd ZFS_ENTER(zfsvfs); 2788185029Spjd ZFS_VERIFY_ZP(zp); 2789168404Spjd 2790219089Spjd zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2791219089Spjd 2792219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 2793219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 2794243807Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 2795224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2796224251Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 2797224251Sdelphij &rdev, 8); 2798219089Spjd 2799219089Spjd if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 2800219089Spjd ZFS_EXIT(zfsvfs); 2801219089Spjd return (error); 2802219089Spjd } 2803219089Spjd 2804168404Spjd /* 2805185029Spjd * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2806185029Spjd * Also, if we are the owner don't bother, since owner should 2807185029Spjd * always be allowed to read basic attributes of file. 2808185029Spjd */ 2809219089Spjd if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2810219089Spjd (vap->va_uid != crgetuid(cr))) { 2811185029Spjd if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2812185029Spjd skipaclchk, cr)) { 2813185029Spjd ZFS_EXIT(zfsvfs); 2814185029Spjd return (error); 2815185029Spjd } 2816185029Spjd } 2817185029Spjd 2818185029Spjd /* 2819168404Spjd * Return all attributes. It's cheaper to provide the answer 2820168404Spjd * than to determine whether we were asked the question. 2821168404Spjd */ 2822168404Spjd 2823209097Smm mutex_enter(&zp->z_lock); 2824219089Spjd vap->va_type = IFTOVT(zp->z_mode); 2825219089Spjd vap->va_mode = zp->z_mode & ~S_IFMT; 2826224252Sdelphij#ifdef sun 2827224252Sdelphij vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2828224252Sdelphij#else 2829224252Sdelphij vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 2830224252Sdelphij#endif 2831168404Spjd vap->va_nodeid = zp->z_id; 2832185029Spjd if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2833219089Spjd links = zp->z_links + 1; 2834185029Spjd else 2835219089Spjd links = zp->z_links; 2836229425Sdim vap->va_nlink = MIN(links, LINK_MAX); /* nlink_t limit! */ 2837219089Spjd vap->va_size = zp->z_size; 2838224252Sdelphij#ifdef sun 2839224252Sdelphij vap->va_rdev = vp->v_rdev; 2840224252Sdelphij#else 2841224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2842224251Sdelphij vap->va_rdev = zfs_cmpldev(rdev); 2843224252Sdelphij#endif 2844168404Spjd vap->va_seq = zp->z_seq; 2845168404Spjd vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2846272467Saraujo vap->va_filerev = zp->z_seq; 2847168404Spjd 2848185029Spjd /* 2849185029Spjd * Add in any requested optional attributes and the create time. 2850185029Spjd * Also set the corresponding bits in the returned attribute bitmap. 2851185029Spjd */ 2852185029Spjd if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2853185029Spjd if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2854185029Spjd xoap->xoa_archive = 2855219089Spjd ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2856185029Spjd XVA_SET_RTN(xvap, XAT_ARCHIVE); 2857185029Spjd } 2858185029Spjd 2859185029Spjd if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2860185029Spjd xoap->xoa_readonly = 2861219089Spjd ((zp->z_pflags & ZFS_READONLY) != 0); 2862185029Spjd XVA_SET_RTN(xvap, XAT_READONLY); 2863185029Spjd } 2864185029Spjd 2865185029Spjd if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2866185029Spjd xoap->xoa_system = 2867219089Spjd ((zp->z_pflags & ZFS_SYSTEM) != 0); 2868185029Spjd XVA_SET_RTN(xvap, XAT_SYSTEM); 2869185029Spjd } 2870185029Spjd 2871185029Spjd if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2872185029Spjd xoap->xoa_hidden = 2873219089Spjd ((zp->z_pflags & ZFS_HIDDEN) != 0); 2874185029Spjd XVA_SET_RTN(xvap, XAT_HIDDEN); 2875185029Spjd } 2876185029Spjd 2877185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2878185029Spjd xoap->xoa_nounlink = 2879219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2880185029Spjd XVA_SET_RTN(xvap, XAT_NOUNLINK); 2881185029Spjd } 2882185029Spjd 2883185029Spjd if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2884185029Spjd xoap->xoa_immutable = 2885219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2886185029Spjd XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2887185029Spjd } 2888185029Spjd 2889185029Spjd if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2890185029Spjd xoap->xoa_appendonly = 2891219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2892185029Spjd XVA_SET_RTN(xvap, XAT_APPENDONLY); 2893185029Spjd } 2894185029Spjd 2895185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2896185029Spjd xoap->xoa_nodump = 2897219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0); 2898185029Spjd XVA_SET_RTN(xvap, XAT_NODUMP); 2899185029Spjd } 2900185029Spjd 2901185029Spjd if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2902185029Spjd xoap->xoa_opaque = 2903219089Spjd ((zp->z_pflags & ZFS_OPAQUE) != 0); 2904185029Spjd XVA_SET_RTN(xvap, XAT_OPAQUE); 2905185029Spjd } 2906185029Spjd 2907185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2908185029Spjd xoap->xoa_av_quarantined = 2909219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2910185029Spjd XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2911185029Spjd } 2912185029Spjd 2913185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2914185029Spjd xoap->xoa_av_modified = 2915219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2916185029Spjd XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2917185029Spjd } 2918185029Spjd 2919185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2920219089Spjd vp->v_type == VREG) { 2921219089Spjd zfs_sa_get_scanstamp(zp, xvap); 2922185029Spjd } 2923185029Spjd 2924185029Spjd if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 2925219089Spjd uint64_t times[2]; 2926219089Spjd 2927219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 2928219089Spjd times, sizeof (times)); 2929219089Spjd ZFS_TIME_DECODE(&xoap->xoa_createtime, times); 2930185029Spjd XVA_SET_RTN(xvap, XAT_CREATETIME); 2931185029Spjd } 2932219089Spjd 2933219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2934219089Spjd xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 2935219089Spjd XVA_SET_RTN(xvap, XAT_REPARSE); 2936219089Spjd } 2937219089Spjd if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 2938219089Spjd xoap->xoa_generation = zp->z_gen; 2939219089Spjd XVA_SET_RTN(xvap, XAT_GEN); 2940219089Spjd } 2941219089Spjd 2942219089Spjd if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2943219089Spjd xoap->xoa_offline = 2944219089Spjd ((zp->z_pflags & ZFS_OFFLINE) != 0); 2945219089Spjd XVA_SET_RTN(xvap, XAT_OFFLINE); 2946219089Spjd } 2947219089Spjd 2948219089Spjd if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2949219089Spjd xoap->xoa_sparse = 2950219089Spjd ((zp->z_pflags & ZFS_SPARSE) != 0); 2951219089Spjd XVA_SET_RTN(xvap, XAT_SPARSE); 2952219089Spjd } 2953185029Spjd } 2954185029Spjd 2955219089Spjd ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 2956219089Spjd ZFS_TIME_DECODE(&vap->va_mtime, mtime); 2957219089Spjd ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2958219089Spjd ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 2959168404Spjd 2960168404Spjd mutex_exit(&zp->z_lock); 2961168404Spjd 2962219089Spjd sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 2963168404Spjd vap->va_blksize = blksize; 2964168404Spjd vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2965168404Spjd 2966168404Spjd if (zp->z_blksz == 0) { 2967168404Spjd /* 2968168404Spjd * Block size hasn't been set; suggest maximal I/O transfers. 2969168404Spjd */ 2970168404Spjd vap->va_blksize = zfsvfs->z_max_blksz; 2971168404Spjd } 2972168404Spjd 2973168404Spjd ZFS_EXIT(zfsvfs); 2974168404Spjd return (0); 2975168404Spjd} 2976168404Spjd 2977168404Spjd/* 2978168404Spjd * Set the file attributes to the values contained in the 2979168404Spjd * vattr structure. 2980168404Spjd * 2981168404Spjd * IN: vp - vnode of file to be modified. 2982168404Spjd * vap - new attribute values. 2983185029Spjd * If AT_XVATTR set, then optional attrs are being set 2984168404Spjd * flags - ATTR_UTIME set if non-default time values provided. 2985185029Spjd * - ATTR_NOACLCHECK (CIFS context only). 2986168404Spjd * cr - credentials of caller. 2987185029Spjd * ct - caller context 2988168404Spjd * 2989251631Sdelphij * RETURN: 0 on success, error code on failure. 2990168404Spjd * 2991168404Spjd * Timestamps: 2992168404Spjd * vp - ctime updated, mtime updated if size changed. 2993168404Spjd */ 2994168404Spjd/* ARGSUSED */ 2995168404Spjdstatic int 2996168962Spjdzfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2997251631Sdelphij caller_context_t *ct) 2998168404Spjd{ 2999185029Spjd znode_t *zp = VTOZ(vp); 3000168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3001185029Spjd zilog_t *zilog; 3002168404Spjd dmu_tx_t *tx; 3003168404Spjd vattr_t oldva; 3004209962Smm xvattr_t tmpxvattr; 3005168962Spjd uint_t mask = vap->va_mask; 3006247187Smm uint_t saved_mask = 0; 3007197831Spjd uint64_t saved_mode; 3008168404Spjd int trim_mask = 0; 3009168404Spjd uint64_t new_mode; 3010209962Smm uint64_t new_uid, new_gid; 3011219089Spjd uint64_t xattr_obj; 3012219089Spjd uint64_t mtime[2], ctime[2]; 3013168404Spjd znode_t *attrzp; 3014168404Spjd int need_policy = FALSE; 3015219089Spjd int err, err2; 3016185029Spjd zfs_fuid_info_t *fuidp = NULL; 3017185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 3018185029Spjd xoptattr_t *xoap; 3019219089Spjd zfs_acl_t *aclp; 3020185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 3021219089Spjd boolean_t fuid_dirtied = B_FALSE; 3022219089Spjd sa_bulk_attr_t bulk[7], xattr_bulk[7]; 3023219089Spjd int count = 0, xattr_count = 0; 3024168404Spjd 3025168404Spjd if (mask == 0) 3026168404Spjd return (0); 3027168404Spjd 3028168962Spjd if (mask & AT_NOSET) 3029249195Smm return (SET_ERROR(EINVAL)); 3030168962Spjd 3031185029Spjd ZFS_ENTER(zfsvfs); 3032185029Spjd ZFS_VERIFY_ZP(zp); 3033185029Spjd 3034185029Spjd zilog = zfsvfs->z_log; 3035185029Spjd 3036185029Spjd /* 3037185029Spjd * Make sure that if we have ephemeral uid/gid or xvattr specified 3038185029Spjd * that file system is at proper version level 3039185029Spjd */ 3040185029Spjd 3041185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 3042185029Spjd (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 3043185029Spjd ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 3044185029Spjd (mask & AT_XVATTR))) { 3045185029Spjd ZFS_EXIT(zfsvfs); 3046249195Smm return (SET_ERROR(EINVAL)); 3047185029Spjd } 3048185029Spjd 3049185029Spjd if (mask & AT_SIZE && vp->v_type == VDIR) { 3050185029Spjd ZFS_EXIT(zfsvfs); 3051249195Smm return (SET_ERROR(EISDIR)); 3052185029Spjd } 3053168404Spjd 3054185029Spjd if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 3055185029Spjd ZFS_EXIT(zfsvfs); 3056249195Smm return (SET_ERROR(EINVAL)); 3057185029Spjd } 3058168404Spjd 3059185029Spjd /* 3060185029Spjd * If this is an xvattr_t, then get a pointer to the structure of 3061185029Spjd * optional attributes. If this is NULL, then we have a vattr_t. 3062185029Spjd */ 3063185029Spjd xoap = xva_getxoptattr(xvap); 3064168404Spjd 3065209962Smm xva_init(&tmpxvattr); 3066209962Smm 3067185029Spjd /* 3068185029Spjd * Immutable files can only alter immutable bit and atime 3069185029Spjd */ 3070219089Spjd if ((zp->z_pflags & ZFS_IMMUTABLE) && 3071185029Spjd ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 3072185029Spjd ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 3073185029Spjd ZFS_EXIT(zfsvfs); 3074249195Smm return (SET_ERROR(EPERM)); 3075185029Spjd } 3076185029Spjd 3077219089Spjd if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { 3078185029Spjd ZFS_EXIT(zfsvfs); 3079249195Smm return (SET_ERROR(EPERM)); 3080185029Spjd } 3081185029Spjd 3082185029Spjd /* 3083185029Spjd * Verify timestamps doesn't overflow 32 bits. 3084185029Spjd * ZFS can handle large timestamps, but 32bit syscalls can't 3085185029Spjd * handle times greater than 2039. This check should be removed 3086185029Spjd * once large timestamps are fully supported. 3087185029Spjd */ 3088185029Spjd if (mask & (AT_ATIME | AT_MTIME)) { 3089185029Spjd if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 3090185029Spjd ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 3091185029Spjd ZFS_EXIT(zfsvfs); 3092249195Smm return (SET_ERROR(EOVERFLOW)); 3093185029Spjd } 3094185029Spjd } 3095185029Spjd 3096168404Spjdtop: 3097168404Spjd attrzp = NULL; 3098219089Spjd aclp = NULL; 3099168404Spjd 3100211932Smm /* Can this be moved to before the top label? */ 3101168404Spjd if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 3102168404Spjd ZFS_EXIT(zfsvfs); 3103249195Smm return (SET_ERROR(EROFS)); 3104168404Spjd } 3105168404Spjd 3106168404Spjd /* 3107168404Spjd * First validate permissions 3108168404Spjd */ 3109168404Spjd 3110168404Spjd if (mask & AT_SIZE) { 3111168404Spjd /* 3112168404Spjd * XXX - Note, we are not providing any open 3113168404Spjd * mode flags here (like FNDELAY), so we may 3114168404Spjd * block if there are locks present... this 3115168404Spjd * should be addressed in openat(). 3116168404Spjd */ 3117185029Spjd /* XXX - would it be OK to generate a log record here? */ 3118185029Spjd err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 3119168404Spjd if (err) { 3120168404Spjd ZFS_EXIT(zfsvfs); 3121168404Spjd return (err); 3122168404Spjd } 3123168404Spjd } 3124168404Spjd 3125185029Spjd if (mask & (AT_ATIME|AT_MTIME) || 3126185029Spjd ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 3127185029Spjd XVA_ISSET_REQ(xvap, XAT_READONLY) || 3128185029Spjd XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 3129219089Spjd XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 3130219089Spjd XVA_ISSET_REQ(xvap, XAT_SPARSE) || 3131185029Spjd XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 3132219089Spjd XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 3133185029Spjd need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 3134185029Spjd skipaclchk, cr); 3135219089Spjd } 3136168404Spjd 3137168404Spjd if (mask & (AT_UID|AT_GID)) { 3138168404Spjd int idmask = (mask & (AT_UID|AT_GID)); 3139168404Spjd int take_owner; 3140168404Spjd int take_group; 3141168404Spjd 3142168404Spjd /* 3143168404Spjd * NOTE: even if a new mode is being set, 3144168404Spjd * we may clear S_ISUID/S_ISGID bits. 3145168404Spjd */ 3146168404Spjd 3147168404Spjd if (!(mask & AT_MODE)) 3148219089Spjd vap->va_mode = zp->z_mode; 3149168404Spjd 3150168404Spjd /* 3151168404Spjd * Take ownership or chgrp to group we are a member of 3152168404Spjd */ 3153168404Spjd 3154168404Spjd take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 3155185029Spjd take_group = (mask & AT_GID) && 3156185029Spjd zfs_groupmember(zfsvfs, vap->va_gid, cr); 3157168404Spjd 3158168404Spjd /* 3159168404Spjd * If both AT_UID and AT_GID are set then take_owner and 3160168404Spjd * take_group must both be set in order to allow taking 3161168404Spjd * ownership. 3162168404Spjd * 3163168404Spjd * Otherwise, send the check through secpolicy_vnode_setattr() 3164168404Spjd * 3165168404Spjd */ 3166168404Spjd 3167168404Spjd if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 3168168404Spjd ((idmask == AT_UID) && take_owner) || 3169168404Spjd ((idmask == AT_GID) && take_group)) { 3170185029Spjd if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 3171185029Spjd skipaclchk, cr) == 0) { 3172168404Spjd /* 3173168404Spjd * Remove setuid/setgid for non-privileged users 3174168404Spjd */ 3175185029Spjd secpolicy_setid_clear(vap, vp, cr); 3176168404Spjd trim_mask = (mask & (AT_UID|AT_GID)); 3177168404Spjd } else { 3178168404Spjd need_policy = TRUE; 3179168404Spjd } 3180168404Spjd } else { 3181168404Spjd need_policy = TRUE; 3182168404Spjd } 3183168404Spjd } 3184168404Spjd 3185168404Spjd mutex_enter(&zp->z_lock); 3186219089Spjd oldva.va_mode = zp->z_mode; 3187185029Spjd zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 3188185029Spjd if (mask & AT_XVATTR) { 3189209962Smm /* 3190209962Smm * Update xvattr mask to include only those attributes 3191209962Smm * that are actually changing. 3192209962Smm * 3193209962Smm * the bits will be restored prior to actually setting 3194209962Smm * the attributes so the caller thinks they were set. 3195209962Smm */ 3196209962Smm if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3197209962Smm if (xoap->xoa_appendonly != 3198219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 3199209962Smm need_policy = TRUE; 3200209962Smm } else { 3201209962Smm XVA_CLR_REQ(xvap, XAT_APPENDONLY); 3202209962Smm XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 3203209962Smm } 3204209962Smm } 3205209962Smm 3206209962Smm if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3207209962Smm if (xoap->xoa_nounlink != 3208219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 3209209962Smm need_policy = TRUE; 3210209962Smm } else { 3211209962Smm XVA_CLR_REQ(xvap, XAT_NOUNLINK); 3212209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 3213209962Smm } 3214209962Smm } 3215209962Smm 3216209962Smm if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3217209962Smm if (xoap->xoa_immutable != 3218219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 3219209962Smm need_policy = TRUE; 3220209962Smm } else { 3221209962Smm XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 3222209962Smm XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 3223209962Smm } 3224209962Smm } 3225209962Smm 3226209962Smm if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3227209962Smm if (xoap->xoa_nodump != 3228219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0)) { 3229209962Smm need_policy = TRUE; 3230209962Smm } else { 3231209962Smm XVA_CLR_REQ(xvap, XAT_NODUMP); 3232209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 3233209962Smm } 3234209962Smm } 3235209962Smm 3236209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3237209962Smm if (xoap->xoa_av_modified != 3238219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 3239209962Smm need_policy = TRUE; 3240209962Smm } else { 3241209962Smm XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 3242209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 3243209962Smm } 3244209962Smm } 3245209962Smm 3246209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3247209962Smm if ((vp->v_type != VREG && 3248209962Smm xoap->xoa_av_quarantined) || 3249209962Smm xoap->xoa_av_quarantined != 3250219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 3251209962Smm need_policy = TRUE; 3252209962Smm } else { 3253209962Smm XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 3254209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 3255209962Smm } 3256209962Smm } 3257209962Smm 3258219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3259219089Spjd mutex_exit(&zp->z_lock); 3260219089Spjd ZFS_EXIT(zfsvfs); 3261249195Smm return (SET_ERROR(EPERM)); 3262219089Spjd } 3263219089Spjd 3264209962Smm if (need_policy == FALSE && 3265209962Smm (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3266209962Smm XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3267185029Spjd need_policy = TRUE; 3268185029Spjd } 3269185029Spjd } 3270185029Spjd 3271168404Spjd mutex_exit(&zp->z_lock); 3272168404Spjd 3273168404Spjd if (mask & AT_MODE) { 3274185029Spjd if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3275168962Spjd err = secpolicy_setid_setsticky_clear(vp, vap, 3276168962Spjd &oldva, cr); 3277168962Spjd if (err) { 3278168962Spjd ZFS_EXIT(zfsvfs); 3279168962Spjd return (err); 3280168962Spjd } 3281168404Spjd trim_mask |= AT_MODE; 3282168404Spjd } else { 3283168404Spjd need_policy = TRUE; 3284168404Spjd } 3285168404Spjd } 3286168404Spjd 3287168404Spjd if (need_policy) { 3288168404Spjd /* 3289168404Spjd * If trim_mask is set then take ownership 3290168404Spjd * has been granted or write_acl is present and user 3291168404Spjd * has the ability to modify mode. In that case remove 3292168404Spjd * UID|GID and or MODE from mask so that 3293168404Spjd * secpolicy_vnode_setattr() doesn't revoke it. 3294168404Spjd */ 3295168404Spjd 3296168404Spjd if (trim_mask) { 3297168404Spjd saved_mask = vap->va_mask; 3298168404Spjd vap->va_mask &= ~trim_mask; 3299197831Spjd if (trim_mask & AT_MODE) { 3300197831Spjd /* 3301197831Spjd * Save the mode, as secpolicy_vnode_setattr() 3302197831Spjd * will overwrite it with ova.va_mode. 3303197831Spjd */ 3304197831Spjd saved_mode = vap->va_mode; 3305197831Spjd } 3306168404Spjd } 3307168404Spjd err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3308185029Spjd (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3309168404Spjd if (err) { 3310168404Spjd ZFS_EXIT(zfsvfs); 3311168404Spjd return (err); 3312168404Spjd } 3313168404Spjd 3314197831Spjd if (trim_mask) { 3315168404Spjd vap->va_mask |= saved_mask; 3316197831Spjd if (trim_mask & AT_MODE) { 3317197831Spjd /* 3318197831Spjd * Recover the mode after 3319197831Spjd * secpolicy_vnode_setattr(). 3320197831Spjd */ 3321197831Spjd vap->va_mode = saved_mode; 3322197831Spjd } 3323197831Spjd } 3324168404Spjd } 3325168404Spjd 3326168404Spjd /* 3327168404Spjd * secpolicy_vnode_setattr, or take ownership may have 3328168404Spjd * changed va_mask 3329168404Spjd */ 3330168404Spjd mask = vap->va_mask; 3331168404Spjd 3332219089Spjd if ((mask & (AT_UID | AT_GID))) { 3333219089Spjd err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 3334219089Spjd &xattr_obj, sizeof (xattr_obj)); 3335168404Spjd 3336219089Spjd if (err == 0 && xattr_obj) { 3337219089Spjd err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 3338209962Smm if (err) 3339219089Spjd goto out2; 3340168404Spjd } 3341209962Smm if (mask & AT_UID) { 3342209962Smm new_uid = zfs_fuid_create(zfsvfs, 3343209962Smm (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3344219089Spjd if (new_uid != zp->z_uid && 3345219089Spjd zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 3346219089Spjd if (attrzp) 3347219089Spjd VN_RELE(ZTOV(attrzp)); 3348249195Smm err = SET_ERROR(EDQUOT); 3349219089Spjd goto out2; 3350209962Smm } 3351209962Smm } 3352209962Smm 3353209962Smm if (mask & AT_GID) { 3354209962Smm new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3355209962Smm cr, ZFS_GROUP, &fuidp); 3356219089Spjd if (new_gid != zp->z_gid && 3357219089Spjd zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 3358219089Spjd if (attrzp) 3359219089Spjd VN_RELE(ZTOV(attrzp)); 3360249195Smm err = SET_ERROR(EDQUOT); 3361219089Spjd goto out2; 3362209962Smm } 3363209962Smm } 3364219089Spjd } 3365219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 3366219089Spjd 3367219089Spjd if (mask & AT_MODE) { 3368219089Spjd uint64_t pmode = zp->z_mode; 3369219089Spjd uint64_t acl_obj; 3370219089Spjd new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3371219089Spjd 3372243560Smm if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 3373243560Smm !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3374249195Smm err = SET_ERROR(EPERM); 3375243560Smm goto out; 3376243560Smm } 3377243560Smm 3378224174Smm if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3379224174Smm goto out; 3380219089Spjd 3381219089Spjd mutex_enter(&zp->z_lock); 3382219089Spjd if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 3383219089Spjd /* 3384219089Spjd * Are we upgrading ACL from old V0 format 3385219089Spjd * to V1 format? 3386219089Spjd */ 3387219089Spjd if (zfsvfs->z_version >= ZPL_VERSION_FUID && 3388219089Spjd zfs_znode_acl_version(zp) == 3389219089Spjd ZFS_ACL_VERSION_INITIAL) { 3390219089Spjd dmu_tx_hold_free(tx, acl_obj, 0, 3391219089Spjd DMU_OBJECT_END); 3392219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3393219089Spjd 0, aclp->z_acl_bytes); 3394209962Smm } else { 3395219089Spjd dmu_tx_hold_write(tx, acl_obj, 0, 3396219089Spjd aclp->z_acl_bytes); 3397209962Smm } 3398219089Spjd } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3399219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3400219089Spjd 0, aclp->z_acl_bytes); 3401209962Smm } 3402219089Spjd mutex_exit(&zp->z_lock); 3403219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3404219089Spjd } else { 3405219089Spjd if ((mask & AT_XVATTR) && 3406219089Spjd XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3407219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3408219089Spjd else 3409219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3410168404Spjd } 3411168404Spjd 3412219089Spjd if (attrzp) { 3413219089Spjd dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3414219089Spjd } 3415219089Spjd 3416219089Spjd fuid_dirtied = zfsvfs->z_fuid_dirty; 3417219089Spjd if (fuid_dirtied) 3418219089Spjd zfs_fuid_txhold(zfsvfs, tx); 3419219089Spjd 3420219089Spjd zfs_sa_upgrade_txholds(tx, zp); 3421219089Spjd 3422258720Savg err = dmu_tx_assign(tx, TXG_WAIT); 3423258720Savg if (err) 3424209962Smm goto out; 3425168404Spjd 3426219089Spjd count = 0; 3427168404Spjd /* 3428168404Spjd * Set each attribute requested. 3429168404Spjd * We group settings according to the locks they need to acquire. 3430168404Spjd * 3431168404Spjd * Note: you cannot set ctime directly, although it will be 3432168404Spjd * updated as a side-effect of calling this function. 3433168404Spjd */ 3434168404Spjd 3435219089Spjd 3436219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3437219089Spjd mutex_enter(&zp->z_acl_lock); 3438168404Spjd mutex_enter(&zp->z_lock); 3439168404Spjd 3440219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3441219089Spjd &zp->z_pflags, sizeof (zp->z_pflags)); 3442219089Spjd 3443219089Spjd if (attrzp) { 3444219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3445219089Spjd mutex_enter(&attrzp->z_acl_lock); 3446219089Spjd mutex_enter(&attrzp->z_lock); 3447219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3448219089Spjd SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3449219089Spjd sizeof (attrzp->z_pflags)); 3450219089Spjd } 3451219089Spjd 3452219089Spjd if (mask & (AT_UID|AT_GID)) { 3453219089Spjd 3454219089Spjd if (mask & AT_UID) { 3455219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 3456219089Spjd &new_uid, sizeof (new_uid)); 3457219089Spjd zp->z_uid = new_uid; 3458219089Spjd if (attrzp) { 3459219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3460219089Spjd SA_ZPL_UID(zfsvfs), NULL, &new_uid, 3461219089Spjd sizeof (new_uid)); 3462219089Spjd attrzp->z_uid = new_uid; 3463219089Spjd } 3464219089Spjd } 3465219089Spjd 3466219089Spjd if (mask & AT_GID) { 3467219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 3468219089Spjd NULL, &new_gid, sizeof (new_gid)); 3469219089Spjd zp->z_gid = new_gid; 3470219089Spjd if (attrzp) { 3471219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3472219089Spjd SA_ZPL_GID(zfsvfs), NULL, &new_gid, 3473219089Spjd sizeof (new_gid)); 3474219089Spjd attrzp->z_gid = new_gid; 3475219089Spjd } 3476219089Spjd } 3477219089Spjd if (!(mask & AT_MODE)) { 3478219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 3479219089Spjd NULL, &new_mode, sizeof (new_mode)); 3480219089Spjd new_mode = zp->z_mode; 3481219089Spjd } 3482219089Spjd err = zfs_acl_chown_setattr(zp); 3483219089Spjd ASSERT(err == 0); 3484219089Spjd if (attrzp) { 3485219089Spjd err = zfs_acl_chown_setattr(attrzp); 3486219089Spjd ASSERT(err == 0); 3487219089Spjd } 3488219089Spjd } 3489219089Spjd 3490168404Spjd if (mask & AT_MODE) { 3491219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 3492219089Spjd &new_mode, sizeof (new_mode)); 3493219089Spjd zp->z_mode = new_mode; 3494219089Spjd ASSERT3U((uintptr_t)aclp, !=, 0); 3495209962Smm err = zfs_aclset_common(zp, aclp, cr, tx); 3496240415Smm ASSERT0(err); 3497219089Spjd if (zp->z_acl_cached) 3498219089Spjd zfs_acl_free(zp->z_acl_cached); 3499211932Smm zp->z_acl_cached = aclp; 3500211932Smm aclp = NULL; 3501168404Spjd } 3502168404Spjd 3503168404Spjd 3504219089Spjd if (mask & AT_ATIME) { 3505219089Spjd ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 3506219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 3507219089Spjd &zp->z_atime, sizeof (zp->z_atime)); 3508168404Spjd } 3509168404Spjd 3510219089Spjd if (mask & AT_MTIME) { 3511219089Spjd ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 3512219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 3513219089Spjd mtime, sizeof (mtime)); 3514168404Spjd } 3515168404Spjd 3516185029Spjd /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3517219089Spjd if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3518219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3519219089Spjd NULL, mtime, sizeof (mtime)); 3520219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3521219089Spjd &ctime, sizeof (ctime)); 3522219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 3523219089Spjd B_TRUE); 3524219089Spjd } else if (mask != 0) { 3525219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3526219089Spjd &ctime, sizeof (ctime)); 3527219089Spjd zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 3528219089Spjd B_TRUE); 3529219089Spjd if (attrzp) { 3530219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3531219089Spjd SA_ZPL_CTIME(zfsvfs), NULL, 3532219089Spjd &ctime, sizeof (ctime)); 3533219089Spjd zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 3534219089Spjd mtime, ctime, B_TRUE); 3535219089Spjd } 3536219089Spjd } 3537185029Spjd /* 3538185029Spjd * Do this after setting timestamps to prevent timestamp 3539185029Spjd * update from toggling bit 3540185029Spjd */ 3541168404Spjd 3542185029Spjd if (xoap && (mask & AT_XVATTR)) { 3543209962Smm 3544209962Smm /* 3545209962Smm * restore trimmed off masks 3546209962Smm * so that return masks can be set for caller. 3547209962Smm */ 3548209962Smm 3549209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3550209962Smm XVA_SET_REQ(xvap, XAT_APPENDONLY); 3551209962Smm } 3552209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3553209962Smm XVA_SET_REQ(xvap, XAT_NOUNLINK); 3554209962Smm } 3555209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3556209962Smm XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3557209962Smm } 3558209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3559209962Smm XVA_SET_REQ(xvap, XAT_NODUMP); 3560209962Smm } 3561209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3562209962Smm XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3563209962Smm } 3564209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3565209962Smm XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3566209962Smm } 3567209962Smm 3568219089Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3569185029Spjd ASSERT(vp->v_type == VREG); 3570185029Spjd 3571219089Spjd zfs_xvattr_set(zp, xvap, tx); 3572185029Spjd } 3573185029Spjd 3574209962Smm if (fuid_dirtied) 3575209962Smm zfs_fuid_sync(zfsvfs, tx); 3576209962Smm 3577168404Spjd if (mask != 0) 3578185029Spjd zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3579168404Spjd 3580168404Spjd mutex_exit(&zp->z_lock); 3581219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3582219089Spjd mutex_exit(&zp->z_acl_lock); 3583168404Spjd 3584219089Spjd if (attrzp) { 3585219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3586219089Spjd mutex_exit(&attrzp->z_acl_lock); 3587219089Spjd mutex_exit(&attrzp->z_lock); 3588219089Spjd } 3589209962Smmout: 3590219089Spjd if (err == 0 && attrzp) { 3591219089Spjd err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 3592219089Spjd xattr_count, tx); 3593219089Spjd ASSERT(err2 == 0); 3594219089Spjd } 3595219089Spjd 3596168404Spjd if (attrzp) 3597168404Spjd VN_RELE(ZTOV(attrzp)); 3598251631Sdelphij 3599211932Smm if (aclp) 3600209962Smm zfs_acl_free(aclp); 3601168404Spjd 3602209962Smm if (fuidp) { 3603209962Smm zfs_fuid_info_free(fuidp); 3604209962Smm fuidp = NULL; 3605209962Smm } 3606209962Smm 3607219089Spjd if (err) { 3608209962Smm dmu_tx_abort(tx); 3609219089Spjd if (err == ERESTART) 3610219089Spjd goto top; 3611219089Spjd } else { 3612219089Spjd err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 3613209962Smm dmu_tx_commit(tx); 3614219089Spjd } 3615209962Smm 3616219089Spjdout2: 3617219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3618219089Spjd zil_commit(zilog, 0); 3619209962Smm 3620168404Spjd ZFS_EXIT(zfsvfs); 3621168404Spjd return (err); 3622168404Spjd} 3623168404Spjd 3624168404Spjdtypedef struct zfs_zlock { 3625168404Spjd krwlock_t *zl_rwlock; /* lock we acquired */ 3626168404Spjd znode_t *zl_znode; /* znode we held */ 3627168404Spjd struct zfs_zlock *zl_next; /* next in list */ 3628168404Spjd} zfs_zlock_t; 3629168404Spjd 3630168404Spjd/* 3631168404Spjd * Drop locks and release vnodes that were held by zfs_rename_lock(). 3632168404Spjd */ 3633168404Spjdstatic void 3634168404Spjdzfs_rename_unlock(zfs_zlock_t **zlpp) 3635168404Spjd{ 3636168404Spjd zfs_zlock_t *zl; 3637168404Spjd 3638168404Spjd while ((zl = *zlpp) != NULL) { 3639168404Spjd if (zl->zl_znode != NULL) 3640168404Spjd VN_RELE(ZTOV(zl->zl_znode)); 3641168404Spjd rw_exit(zl->zl_rwlock); 3642168404Spjd *zlpp = zl->zl_next; 3643168404Spjd kmem_free(zl, sizeof (*zl)); 3644168404Spjd } 3645168404Spjd} 3646168404Spjd 3647168404Spjd/* 3648168404Spjd * Search back through the directory tree, using the ".." entries. 3649168404Spjd * Lock each directory in the chain to prevent concurrent renames. 3650168404Spjd * Fail any attempt to move a directory into one of its own descendants. 3651168404Spjd * XXX - z_parent_lock can overlap with map or grow locks 3652168404Spjd */ 3653168404Spjdstatic int 3654168404Spjdzfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 3655168404Spjd{ 3656168404Spjd zfs_zlock_t *zl; 3657168404Spjd znode_t *zp = tdzp; 3658168404Spjd uint64_t rootid = zp->z_zfsvfs->z_root; 3659219089Spjd uint64_t oidp = zp->z_id; 3660168404Spjd krwlock_t *rwlp = &szp->z_parent_lock; 3661168404Spjd krw_t rw = RW_WRITER; 3662168404Spjd 3663168404Spjd /* 3664168404Spjd * First pass write-locks szp and compares to zp->z_id. 3665168404Spjd * Later passes read-lock zp and compare to zp->z_parent. 3666168404Spjd */ 3667168404Spjd do { 3668168404Spjd if (!rw_tryenter(rwlp, rw)) { 3669168404Spjd /* 3670168404Spjd * Another thread is renaming in this path. 3671168404Spjd * Note that if we are a WRITER, we don't have any 3672168404Spjd * parent_locks held yet. 3673168404Spjd */ 3674168404Spjd if (rw == RW_READER && zp->z_id > szp->z_id) { 3675168404Spjd /* 3676168404Spjd * Drop our locks and restart 3677168404Spjd */ 3678168404Spjd zfs_rename_unlock(&zl); 3679168404Spjd *zlpp = NULL; 3680168404Spjd zp = tdzp; 3681219089Spjd oidp = zp->z_id; 3682168404Spjd rwlp = &szp->z_parent_lock; 3683168404Spjd rw = RW_WRITER; 3684168404Spjd continue; 3685168404Spjd } else { 3686168404Spjd /* 3687168404Spjd * Wait for other thread to drop its locks 3688168404Spjd */ 3689168404Spjd rw_enter(rwlp, rw); 3690168404Spjd } 3691168404Spjd } 3692168404Spjd 3693168404Spjd zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3694168404Spjd zl->zl_rwlock = rwlp; 3695168404Spjd zl->zl_znode = NULL; 3696168404Spjd zl->zl_next = *zlpp; 3697168404Spjd *zlpp = zl; 3698168404Spjd 3699219089Spjd if (oidp == szp->z_id) /* We're a descendant of szp */ 3700249195Smm return (SET_ERROR(EINVAL)); 3701168404Spjd 3702219089Spjd if (oidp == rootid) /* We've hit the top */ 3703168404Spjd return (0); 3704168404Spjd 3705168404Spjd if (rw == RW_READER) { /* i.e. not the first pass */ 3706219089Spjd int error = zfs_zget(zp->z_zfsvfs, oidp, &zp); 3707168404Spjd if (error) 3708168404Spjd return (error); 3709168404Spjd zl->zl_znode = zp; 3710168404Spjd } 3711219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), 3712219089Spjd &oidp, sizeof (oidp)); 3713168404Spjd rwlp = &zp->z_parent_lock; 3714168404Spjd rw = RW_READER; 3715168404Spjd 3716168404Spjd } while (zp->z_id != sdzp->z_id); 3717168404Spjd 3718168404Spjd return (0); 3719168404Spjd} 3720168404Spjd 3721168404Spjd/* 3722168404Spjd * Move an entry from the provided source directory to the target 3723168404Spjd * directory. Change the entry name as indicated. 3724168404Spjd * 3725168404Spjd * IN: sdvp - Source directory containing the "old entry". 3726168404Spjd * snm - Old entry name. 3727168404Spjd * tdvp - Target directory to contain the "new entry". 3728168404Spjd * tnm - New entry name. 3729168404Spjd * cr - credentials of caller. 3730185029Spjd * ct - caller context 3731185029Spjd * flags - case flags 3732168404Spjd * 3733251631Sdelphij * RETURN: 0 on success, error code on failure. 3734168404Spjd * 3735168404Spjd * Timestamps: 3736168404Spjd * sdvp,tdvp - ctime|mtime updated 3737168404Spjd */ 3738185029Spjd/*ARGSUSED*/ 3739168404Spjdstatic int 3740185029Spjdzfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3741185029Spjd caller_context_t *ct, int flags) 3742168404Spjd{ 3743264392Sdavide znode_t *tdzp, *sdzp, *szp, *tzp; 3744264392Sdavide zfsvfs_t *zfsvfs; 3745185029Spjd zilog_t *zilog; 3746168962Spjd vnode_t *realvp; 3747168404Spjd zfs_dirlock_t *sdl, *tdl; 3748168404Spjd dmu_tx_t *tx; 3749168404Spjd zfs_zlock_t *zl; 3750185029Spjd int cmp, serr, terr; 3751185029Spjd int error = 0; 3752185029Spjd int zflg = 0; 3753258632Savg boolean_t waited = B_FALSE; 3754168404Spjd 3755264392Sdavide tdzp = VTOZ(tdvp); 3756264392Sdavide ZFS_VERIFY_ZP(tdzp); 3757264392Sdavide zfsvfs = tdzp->z_zfsvfs; 3758168404Spjd ZFS_ENTER(zfsvfs); 3759185029Spjd zilog = zfsvfs->z_log; 3760264392Sdavide sdzp = VTOZ(sdvp); 3761168404Spjd 3762168962Spjd /* 3763264392Sdavide * In case sdzp is not valid, let's be sure to exit from the right 3764264392Sdavide * zfsvfs_t. 3765168962Spjd */ 3766264392Sdavide if (sdzp->z_sa_hdl == NULL) { 3767264392Sdavide ZFS_EXIT(zfsvfs); 3768264392Sdavide return (SET_ERROR(EIO)); 3769264392Sdavide } 3770168962Spjd 3771254585Sdelphij /* 3772254585Sdelphij * We check z_zfsvfs rather than v_vfsp here, because snapshots and the 3773254585Sdelphij * ctldir appear to have the same v_vfsp. 3774254585Sdelphij */ 3775264392Sdavide if (sdzp->z_zfsvfs != zfsvfs || zfsctl_is_node(tdvp)) { 3776168404Spjd ZFS_EXIT(zfsvfs); 3777249195Smm return (SET_ERROR(EXDEV)); 3778168404Spjd } 3779168404Spjd 3780185029Spjd if (zfsvfs->z_utf8 && u8_validate(tnm, 3781185029Spjd strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3782185029Spjd ZFS_EXIT(zfsvfs); 3783249195Smm return (SET_ERROR(EILSEQ)); 3784185029Spjd } 3785185029Spjd 3786185029Spjd if (flags & FIGNORECASE) 3787185029Spjd zflg |= ZCILOOK; 3788185029Spjd 3789168404Spjdtop: 3790168404Spjd szp = NULL; 3791168404Spjd tzp = NULL; 3792168404Spjd zl = NULL; 3793168404Spjd 3794168404Spjd /* 3795168404Spjd * This is to prevent the creation of links into attribute space 3796168404Spjd * by renaming a linked file into/outof an attribute directory. 3797168404Spjd * See the comment in zfs_link() for why this is considered bad. 3798168404Spjd */ 3799219089Spjd if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3800168962Spjd ZFS_EXIT(zfsvfs); 3801249195Smm return (SET_ERROR(EINVAL)); 3802168404Spjd } 3803168404Spjd 3804168404Spjd /* 3805168404Spjd * Lock source and target directory entries. To prevent deadlock, 3806168404Spjd * a lock ordering must be defined. We lock the directory with 3807168404Spjd * the smallest object id first, or if it's a tie, the one with 3808168404Spjd * the lexically first name. 3809168404Spjd */ 3810168404Spjd if (sdzp->z_id < tdzp->z_id) { 3811168962Spjd cmp = -1; 3812168962Spjd } else if (sdzp->z_id > tdzp->z_id) { 3813168962Spjd cmp = 1; 3814168962Spjd } else { 3815185029Spjd /* 3816185029Spjd * First compare the two name arguments without 3817185029Spjd * considering any case folding. 3818185029Spjd */ 3819185029Spjd int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3820185029Spjd 3821185029Spjd cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3822185029Spjd ASSERT(error == 0 || !zfsvfs->z_utf8); 3823168962Spjd if (cmp == 0) { 3824168962Spjd /* 3825168962Spjd * POSIX: "If the old argument and the new argument 3826168962Spjd * both refer to links to the same existing file, 3827168962Spjd * the rename() function shall return successfully 3828168962Spjd * and perform no other action." 3829168962Spjd */ 3830168962Spjd ZFS_EXIT(zfsvfs); 3831168962Spjd return (0); 3832168962Spjd } 3833185029Spjd /* 3834185029Spjd * If the file system is case-folding, then we may 3835185029Spjd * have some more checking to do. A case-folding file 3836185029Spjd * system is either supporting mixed case sensitivity 3837185029Spjd * access or is completely case-insensitive. Note 3838185029Spjd * that the file system is always case preserving. 3839185029Spjd * 3840185029Spjd * In mixed sensitivity mode case sensitive behavior 3841185029Spjd * is the default. FIGNORECASE must be used to 3842185029Spjd * explicitly request case insensitive behavior. 3843185029Spjd * 3844185029Spjd * If the source and target names provided differ only 3845185029Spjd * by case (e.g., a request to rename 'tim' to 'Tim'), 3846185029Spjd * we will treat this as a special case in the 3847185029Spjd * case-insensitive mode: as long as the source name 3848185029Spjd * is an exact match, we will allow this to proceed as 3849185029Spjd * a name-change request. 3850185029Spjd */ 3851185029Spjd if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3852185029Spjd (zfsvfs->z_case == ZFS_CASE_MIXED && 3853185029Spjd flags & FIGNORECASE)) && 3854185029Spjd u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3855185029Spjd &error) == 0) { 3856185029Spjd /* 3857185029Spjd * case preserving rename request, require exact 3858185029Spjd * name matches 3859185029Spjd */ 3860185029Spjd zflg |= ZCIEXACT; 3861185029Spjd zflg &= ~ZCILOOK; 3862185029Spjd } 3863168962Spjd } 3864185029Spjd 3865208131Smm /* 3866208131Smm * If the source and destination directories are the same, we should 3867208131Smm * grab the z_name_lock of that directory only once. 3868208131Smm */ 3869208131Smm if (sdzp == tdzp) { 3870208131Smm zflg |= ZHAVELOCK; 3871208131Smm rw_enter(&sdzp->z_name_lock, RW_READER); 3872208131Smm } 3873208131Smm 3874168962Spjd if (cmp < 0) { 3875185029Spjd serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3876185029Spjd ZEXISTS | zflg, NULL, NULL); 3877185029Spjd terr = zfs_dirent_lock(&tdl, 3878185029Spjd tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3879168962Spjd } else { 3880185029Spjd terr = zfs_dirent_lock(&tdl, 3881185029Spjd tdzp, tnm, &tzp, zflg, NULL, NULL); 3882185029Spjd serr = zfs_dirent_lock(&sdl, 3883185029Spjd sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3884185029Spjd NULL, NULL); 3885168404Spjd } 3886168404Spjd 3887168962Spjd if (serr) { 3888168404Spjd /* 3889168404Spjd * Source entry invalid or not there. 3890168404Spjd */ 3891168962Spjd if (!terr) { 3892168404Spjd zfs_dirent_unlock(tdl); 3893168962Spjd if (tzp) 3894168962Spjd VN_RELE(ZTOV(tzp)); 3895168962Spjd } 3896208131Smm 3897208131Smm if (sdzp == tdzp) 3898208131Smm rw_exit(&sdzp->z_name_lock); 3899208131Smm 3900219089Spjd /* 3901219089Spjd * FreeBSD: In OpenSolaris they only check if rename source is 3902219089Spjd * ".." here, because "." is handled in their lookup. This is 3903219089Spjd * not the case for FreeBSD, so we check for "." explicitly. 3904219089Spjd */ 3905168404Spjd if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0) 3906249195Smm serr = SET_ERROR(EINVAL); 3907168962Spjd ZFS_EXIT(zfsvfs); 3908168962Spjd return (serr); 3909168404Spjd } 3910168404Spjd if (terr) { 3911168404Spjd zfs_dirent_unlock(sdl); 3912168962Spjd VN_RELE(ZTOV(szp)); 3913208131Smm 3914208131Smm if (sdzp == tdzp) 3915208131Smm rw_exit(&sdzp->z_name_lock); 3916208131Smm 3917168404Spjd if (strcmp(tnm, "..") == 0) 3918249195Smm terr = SET_ERROR(EINVAL); 3919168962Spjd ZFS_EXIT(zfsvfs); 3920168962Spjd return (terr); 3921168404Spjd } 3922168404Spjd 3923168404Spjd /* 3924168404Spjd * Must have write access at the source to remove the old entry 3925168404Spjd * and write access at the target to create the new entry. 3926168404Spjd * Note that if target and source are the same, this can be 3927168404Spjd * done in a single check. 3928168404Spjd */ 3929168404Spjd 3930168404Spjd if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3931168404Spjd goto out; 3932168404Spjd 3933168962Spjd if (ZTOV(szp)->v_type == VDIR) { 3934168404Spjd /* 3935168404Spjd * Check to make sure rename is valid. 3936168404Spjd * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3937168404Spjd */ 3938168404Spjd if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3939168404Spjd goto out; 3940168404Spjd } 3941168404Spjd 3942168404Spjd /* 3943168404Spjd * Does target exist? 3944168404Spjd */ 3945168404Spjd if (tzp) { 3946168404Spjd /* 3947168404Spjd * Source and target must be the same type. 3948168404Spjd */ 3949168962Spjd if (ZTOV(szp)->v_type == VDIR) { 3950168962Spjd if (ZTOV(tzp)->v_type != VDIR) { 3951249195Smm error = SET_ERROR(ENOTDIR); 3952168404Spjd goto out; 3953168404Spjd } 3954168404Spjd } else { 3955168962Spjd if (ZTOV(tzp)->v_type == VDIR) { 3956249195Smm error = SET_ERROR(EISDIR); 3957168404Spjd goto out; 3958168404Spjd } 3959168404Spjd } 3960168404Spjd /* 3961168404Spjd * POSIX dictates that when the source and target 3962168404Spjd * entries refer to the same file object, rename 3963168404Spjd * must do nothing and exit without error. 3964168404Spjd */ 3965168404Spjd if (szp->z_id == tzp->z_id) { 3966168404Spjd error = 0; 3967168404Spjd goto out; 3968168404Spjd } 3969168404Spjd } 3970168404Spjd 3971185029Spjd vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 3972168962Spjd if (tzp) 3973185029Spjd vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3974168962Spjd 3975185029Spjd /* 3976185029Spjd * notify the target directory if it is not the same 3977185029Spjd * as source directory. 3978185029Spjd */ 3979185029Spjd if (tdvp != sdvp) { 3980185029Spjd vnevent_rename_dest_dir(tdvp, ct); 3981185029Spjd } 3982185029Spjd 3983168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 3984219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3985219089Spjd dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3986168404Spjd dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3987168404Spjd dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3988219089Spjd if (sdzp != tdzp) { 3989219089Spjd dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 3990219089Spjd zfs_sa_upgrade_txholds(tx, tdzp); 3991219089Spjd } 3992219089Spjd if (tzp) { 3993219089Spjd dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 3994219089Spjd zfs_sa_upgrade_txholds(tx, tzp); 3995219089Spjd } 3996219089Spjd 3997219089Spjd zfs_sa_upgrade_txholds(tx, szp); 3998168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3999258632Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 4000168404Spjd if (error) { 4001168404Spjd if (zl != NULL) 4002168404Spjd zfs_rename_unlock(&zl); 4003168404Spjd zfs_dirent_unlock(sdl); 4004168404Spjd zfs_dirent_unlock(tdl); 4005208131Smm 4006208131Smm if (sdzp == tdzp) 4007208131Smm rw_exit(&sdzp->z_name_lock); 4008208131Smm 4009168962Spjd VN_RELE(ZTOV(szp)); 4010168962Spjd if (tzp) 4011168962Spjd VN_RELE(ZTOV(tzp)); 4012209962Smm if (error == ERESTART) { 4013258632Savg waited = B_TRUE; 4014168404Spjd dmu_tx_wait(tx); 4015168404Spjd dmu_tx_abort(tx); 4016168404Spjd goto top; 4017168404Spjd } 4018168404Spjd dmu_tx_abort(tx); 4019168962Spjd ZFS_EXIT(zfsvfs); 4020168962Spjd return (error); 4021168404Spjd } 4022168404Spjd 4023168404Spjd if (tzp) /* Attempt to remove the existing target */ 4024185029Spjd error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 4025168404Spjd 4026168404Spjd if (error == 0) { 4027168404Spjd error = zfs_link_create(tdl, szp, tx, ZRENAMING); 4028168404Spjd if (error == 0) { 4029219089Spjd szp->z_pflags |= ZFS_AV_MODIFIED; 4030185029Spjd 4031219089Spjd error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 4032219089Spjd (void *)&szp->z_pflags, sizeof (uint64_t), tx); 4033240415Smm ASSERT0(error); 4034219089Spjd 4035168404Spjd error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 4036219089Spjd if (error == 0) { 4037219089Spjd zfs_log_rename(zilog, tx, TX_RENAME | 4038219089Spjd (flags & FIGNORECASE ? TX_CI : 0), sdzp, 4039219089Spjd sdl->dl_name, tdzp, tdl->dl_name, szp); 4040185029Spjd 4041219089Spjd /* 4042219089Spjd * Update path information for the target vnode 4043219089Spjd */ 4044219089Spjd vn_renamepath(tdvp, ZTOV(szp), tnm, 4045219089Spjd strlen(tnm)); 4046219089Spjd } else { 4047219089Spjd /* 4048219089Spjd * At this point, we have successfully created 4049219089Spjd * the target name, but have failed to remove 4050219089Spjd * the source name. Since the create was done 4051219089Spjd * with the ZRENAMING flag, there are 4052219089Spjd * complications; for one, the link count is 4053219089Spjd * wrong. The easiest way to deal with this 4054219089Spjd * is to remove the newly created target, and 4055219089Spjd * return the original error. This must 4056219089Spjd * succeed; fortunately, it is very unlikely to 4057219089Spjd * fail, since we just created it. 4058219089Spjd */ 4059219089Spjd VERIFY3U(zfs_link_destroy(tdl, szp, tx, 4060219089Spjd ZRENAMING, NULL), ==, 0); 4061219089Spjd } 4062168404Spjd } 4063168404Spjd#ifdef FREEBSD_NAMECACHE 4064168404Spjd if (error == 0) { 4065168404Spjd cache_purge(sdvp); 4066168404Spjd cache_purge(tdvp); 4067240829Spjd cache_purge(ZTOV(szp)); 4068240829Spjd if (tzp) 4069240829Spjd cache_purge(ZTOV(tzp)); 4070168404Spjd } 4071168404Spjd#endif 4072168404Spjd } 4073168404Spjd 4074168404Spjd dmu_tx_commit(tx); 4075168404Spjdout: 4076168404Spjd if (zl != NULL) 4077168404Spjd zfs_rename_unlock(&zl); 4078168404Spjd 4079168404Spjd zfs_dirent_unlock(sdl); 4080168404Spjd zfs_dirent_unlock(tdl); 4081168404Spjd 4082208131Smm if (sdzp == tdzp) 4083208131Smm rw_exit(&sdzp->z_name_lock); 4084208131Smm 4085219089Spjd 4086168962Spjd VN_RELE(ZTOV(szp)); 4087168404Spjd if (tzp) 4088168962Spjd VN_RELE(ZTOV(tzp)); 4089168404Spjd 4090219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4091219089Spjd zil_commit(zilog, 0); 4092219089Spjd 4093168404Spjd ZFS_EXIT(zfsvfs); 4094168404Spjd 4095168404Spjd return (error); 4096168404Spjd} 4097168404Spjd 4098168404Spjd/* 4099168404Spjd * Insert the indicated symbolic reference entry into the directory. 4100168404Spjd * 4101168404Spjd * IN: dvp - Directory to contain new symbolic link. 4102168404Spjd * link - Name for new symlink entry. 4103168404Spjd * vap - Attributes of new entry. 4104168404Spjd * cr - credentials of caller. 4105185029Spjd * ct - caller context 4106185029Spjd * flags - case flags 4107168404Spjd * 4108251631Sdelphij * RETURN: 0 on success, error code on failure. 4109168404Spjd * 4110168404Spjd * Timestamps: 4111168404Spjd * dvp - ctime|mtime updated 4112168404Spjd */ 4113185029Spjd/*ARGSUSED*/ 4114168404Spjdstatic int 4115185029Spjdzfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 4116185029Spjd cred_t *cr, kthread_t *td) 4117168404Spjd{ 4118168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 4119168404Spjd zfs_dirlock_t *dl; 4120168404Spjd dmu_tx_t *tx; 4121168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4122185029Spjd zilog_t *zilog; 4123219089Spjd uint64_t len = strlen(link); 4124168404Spjd int error; 4125185029Spjd int zflg = ZNEW; 4126209962Smm zfs_acl_ids_t acl_ids; 4127209962Smm boolean_t fuid_dirtied; 4128219089Spjd uint64_t txtype = TX_SYMLINK; 4129258632Savg boolean_t waited = B_FALSE; 4130185029Spjd int flags = 0; 4131168404Spjd 4132168962Spjd ASSERT(vap->va_type == VLNK); 4133168404Spjd 4134168404Spjd ZFS_ENTER(zfsvfs); 4135185029Spjd ZFS_VERIFY_ZP(dzp); 4136185029Spjd zilog = zfsvfs->z_log; 4137185029Spjd 4138185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 4139185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4140185029Spjd ZFS_EXIT(zfsvfs); 4141249195Smm return (SET_ERROR(EILSEQ)); 4142185029Spjd } 4143185029Spjd if (flags & FIGNORECASE) 4144185029Spjd zflg |= ZCILOOK; 4145168404Spjd 4146168404Spjd if (len > MAXPATHLEN) { 4147168404Spjd ZFS_EXIT(zfsvfs); 4148249195Smm return (SET_ERROR(ENAMETOOLONG)); 4149168404Spjd } 4150168404Spjd 4151219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, 4152219089Spjd vap, cr, NULL, &acl_ids)) != 0) { 4153219089Spjd ZFS_EXIT(zfsvfs); 4154219089Spjd return (error); 4155219089Spjd } 4156260704Savg 4157260704Savg getnewvnode_reserve(1); 4158260704Savg 4159219089Spjdtop: 4160168404Spjd /* 4161168404Spjd * Attempt to lock directory; fail if entry already exists. 4162168404Spjd */ 4163185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 4164185029Spjd if (error) { 4165219089Spjd zfs_acl_ids_free(&acl_ids); 4166260704Savg getnewvnode_drop_reserve(); 4167168404Spjd ZFS_EXIT(zfsvfs); 4168168404Spjd return (error); 4169168404Spjd } 4170168404Spjd 4171219089Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4172219089Spjd zfs_acl_ids_free(&acl_ids); 4173219089Spjd zfs_dirent_unlock(dl); 4174260704Savg getnewvnode_drop_reserve(); 4175219089Spjd ZFS_EXIT(zfsvfs); 4176219089Spjd return (error); 4177219089Spjd } 4178219089Spjd 4179209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 4180209962Smm zfs_acl_ids_free(&acl_ids); 4181209962Smm zfs_dirent_unlock(dl); 4182260704Savg getnewvnode_drop_reserve(); 4183209962Smm ZFS_EXIT(zfsvfs); 4184249195Smm return (SET_ERROR(EDQUOT)); 4185209962Smm } 4186168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4187209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 4188168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 4189168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4190219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 4191219089Spjd ZFS_SA_BASE_ATTR_SIZE + len); 4192219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 4193219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 4194219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 4195219089Spjd acl_ids.z_aclp->z_acl_bytes); 4196219089Spjd } 4197209962Smm if (fuid_dirtied) 4198209962Smm zfs_fuid_txhold(zfsvfs, tx); 4199258632Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 4200168404Spjd if (error) { 4201168404Spjd zfs_dirent_unlock(dl); 4202209962Smm if (error == ERESTART) { 4203258632Savg waited = B_TRUE; 4204168404Spjd dmu_tx_wait(tx); 4205168404Spjd dmu_tx_abort(tx); 4206168404Spjd goto top; 4207168404Spjd } 4208219089Spjd zfs_acl_ids_free(&acl_ids); 4209168404Spjd dmu_tx_abort(tx); 4210260704Savg getnewvnode_drop_reserve(); 4211168404Spjd ZFS_EXIT(zfsvfs); 4212168404Spjd return (error); 4213168404Spjd } 4214168404Spjd 4215168404Spjd /* 4216168404Spjd * Create a new object for the symlink. 4217219089Spjd * for version 4 ZPL datsets the symlink will be an SA attribute 4218168404Spjd */ 4219219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 4220168404Spjd 4221219089Spjd if (fuid_dirtied) 4222219089Spjd zfs_fuid_sync(zfsvfs, tx); 4223209962Smm 4224219089Spjd mutex_enter(&zp->z_lock); 4225219089Spjd if (zp->z_is_sa) 4226219089Spjd error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 4227219089Spjd link, len, tx); 4228219089Spjd else 4229219089Spjd zfs_sa_symlink(zp, link, len, tx); 4230219089Spjd mutex_exit(&zp->z_lock); 4231168404Spjd 4232219089Spjd zp->z_size = len; 4233219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 4234219089Spjd &zp->z_size, sizeof (zp->z_size), tx); 4235168404Spjd /* 4236168404Spjd * Insert the new object into the directory. 4237168404Spjd */ 4238168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 4239168404Spjd 4240219089Spjd if (flags & FIGNORECASE) 4241219089Spjd txtype |= TX_CI; 4242219089Spjd zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 4243219089Spjd *vpp = ZTOV(zp); 4244219089Spjd 4245209962Smm zfs_acl_ids_free(&acl_ids); 4246209962Smm 4247168404Spjd dmu_tx_commit(tx); 4248168404Spjd 4249260704Savg getnewvnode_drop_reserve(); 4250260704Savg 4251168404Spjd zfs_dirent_unlock(dl); 4252168404Spjd 4253219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4254219089Spjd zil_commit(zilog, 0); 4255219089Spjd 4256168404Spjd ZFS_EXIT(zfsvfs); 4257168404Spjd return (error); 4258168404Spjd} 4259168404Spjd 4260168404Spjd/* 4261168404Spjd * Return, in the buffer contained in the provided uio structure, 4262168404Spjd * the symbolic path referred to by vp. 4263168404Spjd * 4264168404Spjd * IN: vp - vnode of symbolic link. 4265251631Sdelphij * uio - structure to contain the link path. 4266168404Spjd * cr - credentials of caller. 4267185029Spjd * ct - caller context 4268168404Spjd * 4269251631Sdelphij * OUT: uio - structure containing the link path. 4270168404Spjd * 4271251631Sdelphij * RETURN: 0 on success, error code on failure. 4272168404Spjd * 4273168404Spjd * Timestamps: 4274168404Spjd * vp - atime updated 4275168404Spjd */ 4276168404Spjd/* ARGSUSED */ 4277168404Spjdstatic int 4278185029Spjdzfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 4279168404Spjd{ 4280168404Spjd znode_t *zp = VTOZ(vp); 4281168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4282168404Spjd int error; 4283168404Spjd 4284168404Spjd ZFS_ENTER(zfsvfs); 4285185029Spjd ZFS_VERIFY_ZP(zp); 4286168404Spjd 4287219089Spjd mutex_enter(&zp->z_lock); 4288219089Spjd if (zp->z_is_sa) 4289219089Spjd error = sa_lookup_uio(zp->z_sa_hdl, 4290219089Spjd SA_ZPL_SYMLINK(zfsvfs), uio); 4291219089Spjd else 4292219089Spjd error = zfs_sa_readlink(zp, uio); 4293219089Spjd mutex_exit(&zp->z_lock); 4294168404Spjd 4295168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4296219089Spjd 4297168404Spjd ZFS_EXIT(zfsvfs); 4298168404Spjd return (error); 4299168404Spjd} 4300168404Spjd 4301168404Spjd/* 4302168404Spjd * Insert a new entry into directory tdvp referencing svp. 4303168404Spjd * 4304168404Spjd * IN: tdvp - Directory to contain new entry. 4305168404Spjd * svp - vnode of new entry. 4306168404Spjd * name - name of new entry. 4307168404Spjd * cr - credentials of caller. 4308185029Spjd * ct - caller context 4309168404Spjd * 4310251631Sdelphij * RETURN: 0 on success, error code on failure. 4311168404Spjd * 4312168404Spjd * Timestamps: 4313168404Spjd * tdvp - ctime|mtime updated 4314168404Spjd * svp - ctime updated 4315168404Spjd */ 4316168404Spjd/* ARGSUSED */ 4317168404Spjdstatic int 4318185029Spjdzfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4319185029Spjd caller_context_t *ct, int flags) 4320168404Spjd{ 4321168404Spjd znode_t *dzp = VTOZ(tdvp); 4322168404Spjd znode_t *tzp, *szp; 4323168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4324185029Spjd zilog_t *zilog; 4325168404Spjd zfs_dirlock_t *dl; 4326168404Spjd dmu_tx_t *tx; 4327168962Spjd vnode_t *realvp; 4328168404Spjd int error; 4329185029Spjd int zf = ZNEW; 4330212694Smm uint64_t parent; 4331185029Spjd uid_t owner; 4332258632Savg boolean_t waited = B_FALSE; 4333168404Spjd 4334168404Spjd ASSERT(tdvp->v_type == VDIR); 4335168404Spjd 4336168404Spjd ZFS_ENTER(zfsvfs); 4337185029Spjd ZFS_VERIFY_ZP(dzp); 4338185029Spjd zilog = zfsvfs->z_log; 4339168404Spjd 4340185029Spjd if (VOP_REALVP(svp, &realvp, ct) == 0) 4341168962Spjd svp = realvp; 4342168962Spjd 4343212694Smm /* 4344212694Smm * POSIX dictates that we return EPERM here. 4345212694Smm * Better choices include ENOTSUP or EISDIR. 4346212694Smm */ 4347212694Smm if (svp->v_type == VDIR) { 4348168404Spjd ZFS_EXIT(zfsvfs); 4349249195Smm return (SET_ERROR(EPERM)); 4350212694Smm } 4351212694Smm 4352254585Sdelphij szp = VTOZ(svp); 4353254585Sdelphij ZFS_VERIFY_ZP(szp); 4354254585Sdelphij 4355258597Spjd if (szp->z_pflags & (ZFS_APPENDONLY | ZFS_IMMUTABLE | ZFS_READONLY)) { 4356258597Spjd ZFS_EXIT(zfsvfs); 4357258597Spjd return (SET_ERROR(EPERM)); 4358258597Spjd } 4359258597Spjd 4360254585Sdelphij /* 4361254585Sdelphij * We check z_zfsvfs rather than v_vfsp here, because snapshots and the 4362254585Sdelphij * ctldir appear to have the same v_vfsp. 4363254585Sdelphij */ 4364254585Sdelphij if (szp->z_zfsvfs != zfsvfs || zfsctl_is_node(svp)) { 4365212694Smm ZFS_EXIT(zfsvfs); 4366249195Smm return (SET_ERROR(EXDEV)); 4367168404Spjd } 4368212694Smm 4369212694Smm /* Prevent links to .zfs/shares files */ 4370212694Smm 4371219089Spjd if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4372219089Spjd &parent, sizeof (uint64_t))) != 0) { 4373212694Smm ZFS_EXIT(zfsvfs); 4374219089Spjd return (error); 4375219089Spjd } 4376219089Spjd if (parent == zfsvfs->z_shares_dir) { 4377219089Spjd ZFS_EXIT(zfsvfs); 4378249195Smm return (SET_ERROR(EPERM)); 4379212694Smm } 4380212694Smm 4381185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, 4382185029Spjd strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4383185029Spjd ZFS_EXIT(zfsvfs); 4384249195Smm return (SET_ERROR(EILSEQ)); 4385185029Spjd } 4386185029Spjd if (flags & FIGNORECASE) 4387185029Spjd zf |= ZCILOOK; 4388185029Spjd 4389168404Spjd /* 4390168404Spjd * We do not support links between attributes and non-attributes 4391168404Spjd * because of the potential security risk of creating links 4392168404Spjd * into "normal" file space in order to circumvent restrictions 4393168404Spjd * imposed in attribute space. 4394168404Spjd */ 4395219089Spjd if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4396168404Spjd ZFS_EXIT(zfsvfs); 4397249195Smm return (SET_ERROR(EINVAL)); 4398168404Spjd } 4399168404Spjd 4400168404Spjd 4401219089Spjd owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4402219089Spjd if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { 4403168404Spjd ZFS_EXIT(zfsvfs); 4404249195Smm return (SET_ERROR(EPERM)); 4405168404Spjd } 4406168404Spjd 4407185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4408168404Spjd ZFS_EXIT(zfsvfs); 4409168404Spjd return (error); 4410168404Spjd } 4411168404Spjd 4412212694Smmtop: 4413168404Spjd /* 4414168404Spjd * Attempt to lock directory; fail if entry already exists. 4415168404Spjd */ 4416185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 4417185029Spjd if (error) { 4418168404Spjd ZFS_EXIT(zfsvfs); 4419168404Spjd return (error); 4420168404Spjd } 4421168404Spjd 4422168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4423219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4424168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4425219089Spjd zfs_sa_upgrade_txholds(tx, szp); 4426219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 4427258632Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 4428168404Spjd if (error) { 4429168404Spjd zfs_dirent_unlock(dl); 4430209962Smm if (error == ERESTART) { 4431258632Savg waited = B_TRUE; 4432168404Spjd dmu_tx_wait(tx); 4433168404Spjd dmu_tx_abort(tx); 4434168404Spjd goto top; 4435168404Spjd } 4436168404Spjd dmu_tx_abort(tx); 4437168404Spjd ZFS_EXIT(zfsvfs); 4438168404Spjd return (error); 4439168404Spjd } 4440168404Spjd 4441168404Spjd error = zfs_link_create(dl, szp, tx, 0); 4442168404Spjd 4443185029Spjd if (error == 0) { 4444185029Spjd uint64_t txtype = TX_LINK; 4445185029Spjd if (flags & FIGNORECASE) 4446185029Spjd txtype |= TX_CI; 4447185029Spjd zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4448185029Spjd } 4449168404Spjd 4450168404Spjd dmu_tx_commit(tx); 4451168404Spjd 4452168404Spjd zfs_dirent_unlock(dl); 4453168404Spjd 4454185029Spjd if (error == 0) { 4455185029Spjd vnevent_link(svp, ct); 4456185029Spjd } 4457185029Spjd 4458219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4459219089Spjd zil_commit(zilog, 0); 4460219089Spjd 4461168404Spjd ZFS_EXIT(zfsvfs); 4462168404Spjd return (error); 4463168404Spjd} 4464168404Spjd 4465219089Spjd#ifdef sun 4466219089Spjd/* 4467219089Spjd * zfs_null_putapage() is used when the file system has been force 4468219089Spjd * unmounted. It just drops the pages. 4469219089Spjd */ 4470219089Spjd/* ARGSUSED */ 4471219089Spjdstatic int 4472219089Spjdzfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4473219089Spjd size_t *lenp, int flags, cred_t *cr) 4474219089Spjd{ 4475219089Spjd pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); 4476219089Spjd return (0); 4477219089Spjd} 4478219089Spjd 4479219089Spjd/* 4480219089Spjd * Push a page out to disk, klustering if possible. 4481219089Spjd * 4482219089Spjd * IN: vp - file to push page to. 4483219089Spjd * pp - page to push. 4484219089Spjd * flags - additional flags. 4485219089Spjd * cr - credentials of caller. 4486219089Spjd * 4487219089Spjd * OUT: offp - start of range pushed. 4488219089Spjd * lenp - len of range pushed. 4489219089Spjd * 4490251631Sdelphij * RETURN: 0 on success, error code on failure. 4491219089Spjd * 4492219089Spjd * NOTE: callers must have locked the page to be pushed. On 4493219089Spjd * exit, the page (and all other pages in the kluster) must be 4494219089Spjd * unlocked. 4495219089Spjd */ 4496219089Spjd/* ARGSUSED */ 4497219089Spjdstatic int 4498219089Spjdzfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4499219089Spjd size_t *lenp, int flags, cred_t *cr) 4500219089Spjd{ 4501219089Spjd znode_t *zp = VTOZ(vp); 4502219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4503219089Spjd dmu_tx_t *tx; 4504219089Spjd u_offset_t off, koff; 4505219089Spjd size_t len, klen; 4506219089Spjd int err; 4507219089Spjd 4508219089Spjd off = pp->p_offset; 4509219089Spjd len = PAGESIZE; 4510219089Spjd /* 4511219089Spjd * If our blocksize is bigger than the page size, try to kluster 4512219089Spjd * multiple pages so that we write a full block (thus avoiding 4513219089Spjd * a read-modify-write). 4514219089Spjd */ 4515219089Spjd if (off < zp->z_size && zp->z_blksz > PAGESIZE) { 4516219089Spjd klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); 4517219089Spjd koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; 4518219089Spjd ASSERT(koff <= zp->z_size); 4519219089Spjd if (koff + klen > zp->z_size) 4520219089Spjd klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE); 4521219089Spjd pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); 4522219089Spjd } 4523219089Spjd ASSERT3U(btop(len), ==, btopr(len)); 4524219089Spjd 4525219089Spjd /* 4526219089Spjd * Can't push pages past end-of-file. 4527219089Spjd */ 4528219089Spjd if (off >= zp->z_size) { 4529219089Spjd /* ignore all pages */ 4530219089Spjd err = 0; 4531219089Spjd goto out; 4532219089Spjd } else if (off + len > zp->z_size) { 4533219089Spjd int npages = btopr(zp->z_size - off); 4534219089Spjd page_t *trunc; 4535219089Spjd 4536219089Spjd page_list_break(&pp, &trunc, npages); 4537219089Spjd /* ignore pages past end of file */ 4538219089Spjd if (trunc) 4539219089Spjd pvn_write_done(trunc, flags); 4540219089Spjd len = zp->z_size - off; 4541219089Spjd } 4542219089Spjd 4543219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 4544219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 4545249195Smm err = SET_ERROR(EDQUOT); 4546219089Spjd goto out; 4547219089Spjd } 4548219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 4549219089Spjd dmu_tx_hold_write(tx, zp->z_id, off, len); 4550219089Spjd 4551219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4552219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4553258720Savg err = dmu_tx_assign(tx, TXG_WAIT); 4554219089Spjd if (err != 0) { 4555219089Spjd dmu_tx_abort(tx); 4556219089Spjd goto out; 4557219089Spjd } 4558219089Spjd 4559219089Spjd if (zp->z_blksz <= PAGESIZE) { 4560219089Spjd caddr_t va = zfs_map_page(pp, S_READ); 4561219089Spjd ASSERT3U(len, <=, PAGESIZE); 4562219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 4563219089Spjd zfs_unmap_page(pp, va); 4564219089Spjd } else { 4565219089Spjd err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 4566219089Spjd } 4567219089Spjd 4568219089Spjd if (err == 0) { 4569219089Spjd uint64_t mtime[2], ctime[2]; 4570219089Spjd sa_bulk_attr_t bulk[3]; 4571219089Spjd int count = 0; 4572219089Spjd 4573219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 4574219089Spjd &mtime, 16); 4575219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 4576219089Spjd &ctime, 16); 4577219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4578219089Spjd &zp->z_pflags, 8); 4579219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 4580219089Spjd B_TRUE); 4581219089Spjd zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 4582219089Spjd } 4583219089Spjd dmu_tx_commit(tx); 4584219089Spjd 4585219089Spjdout: 4586219089Spjd pvn_write_done(pp, (err ? B_ERROR : 0) | flags); 4587219089Spjd if (offp) 4588219089Spjd *offp = off; 4589219089Spjd if (lenp) 4590219089Spjd *lenp = len; 4591219089Spjd 4592219089Spjd return (err); 4593219089Spjd} 4594219089Spjd 4595219089Spjd/* 4596219089Spjd * Copy the portion of the file indicated from pages into the file. 4597219089Spjd * The pages are stored in a page list attached to the files vnode. 4598219089Spjd * 4599219089Spjd * IN: vp - vnode of file to push page data to. 4600219089Spjd * off - position in file to put data. 4601219089Spjd * len - amount of data to write. 4602219089Spjd * flags - flags to control the operation. 4603219089Spjd * cr - credentials of caller. 4604219089Spjd * ct - caller context. 4605219089Spjd * 4606251631Sdelphij * RETURN: 0 on success, error code on failure. 4607219089Spjd * 4608219089Spjd * Timestamps: 4609219089Spjd * vp - ctime|mtime updated 4610219089Spjd */ 4611185029Spjd/*ARGSUSED*/ 4612219089Spjdstatic int 4613219089Spjdzfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 4614219089Spjd caller_context_t *ct) 4615219089Spjd{ 4616219089Spjd znode_t *zp = VTOZ(vp); 4617219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4618219089Spjd page_t *pp; 4619219089Spjd size_t io_len; 4620219089Spjd u_offset_t io_off; 4621219089Spjd uint_t blksz; 4622219089Spjd rl_t *rl; 4623219089Spjd int error = 0; 4624219089Spjd 4625219089Spjd ZFS_ENTER(zfsvfs); 4626219089Spjd ZFS_VERIFY_ZP(zp); 4627219089Spjd 4628219089Spjd /* 4629219089Spjd * Align this request to the file block size in case we kluster. 4630219089Spjd * XXX - this can result in pretty aggresive locking, which can 4631219089Spjd * impact simultanious read/write access. One option might be 4632219089Spjd * to break up long requests (len == 0) into block-by-block 4633219089Spjd * operations to get narrower locking. 4634219089Spjd */ 4635219089Spjd blksz = zp->z_blksz; 4636219089Spjd if (ISP2(blksz)) 4637219089Spjd io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); 4638219089Spjd else 4639219089Spjd io_off = 0; 4640219089Spjd if (len > 0 && ISP2(blksz)) 4641219089Spjd io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); 4642219089Spjd else 4643219089Spjd io_len = 0; 4644219089Spjd 4645219089Spjd if (io_len == 0) { 4646219089Spjd /* 4647219089Spjd * Search the entire vp list for pages >= io_off. 4648219089Spjd */ 4649219089Spjd rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); 4650219089Spjd error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); 4651219089Spjd goto out; 4652219089Spjd } 4653219089Spjd rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); 4654219089Spjd 4655219089Spjd if (off > zp->z_size) { 4656219089Spjd /* past end of file */ 4657219089Spjd zfs_range_unlock(rl); 4658219089Spjd ZFS_EXIT(zfsvfs); 4659219089Spjd return (0); 4660219089Spjd } 4661219089Spjd 4662219089Spjd len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); 4663219089Spjd 4664219089Spjd for (off = io_off; io_off < off + len; io_off += io_len) { 4665219089Spjd if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 4666219089Spjd pp = page_lookup(vp, io_off, 4667219089Spjd (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); 4668219089Spjd } else { 4669219089Spjd pp = page_lookup_nowait(vp, io_off, 4670219089Spjd (flags & B_FREE) ? SE_EXCL : SE_SHARED); 4671219089Spjd } 4672219089Spjd 4673219089Spjd if (pp != NULL && pvn_getdirty(pp, flags)) { 4674219089Spjd int err; 4675219089Spjd 4676219089Spjd /* 4677219089Spjd * Found a dirty page to push 4678219089Spjd */ 4679219089Spjd err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); 4680219089Spjd if (err) 4681219089Spjd error = err; 4682219089Spjd } else { 4683219089Spjd io_len = PAGESIZE; 4684219089Spjd } 4685219089Spjd } 4686219089Spjdout: 4687219089Spjd zfs_range_unlock(rl); 4688219089Spjd if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4689219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 4690219089Spjd ZFS_EXIT(zfsvfs); 4691219089Spjd return (error); 4692219089Spjd} 4693219089Spjd#endif /* sun */ 4694219089Spjd 4695219089Spjd/*ARGSUSED*/ 4696168962Spjdvoid 4697185029Spjdzfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4698168404Spjd{ 4699168962Spjd znode_t *zp = VTOZ(vp); 4700168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4701168962Spjd int error; 4702168404Spjd 4703185029Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4704219089Spjd if (zp->z_sa_hdl == NULL) { 4705185029Spjd /* 4706185029Spjd * The fs has been unmounted, or we did a 4707185029Spjd * suspend/resume and this file no longer exists. 4708185029Spjd */ 4709243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 4710234607Strasz vrecycle(vp); 4711243520Savg return; 4712243520Savg } 4713243520Savg 4714243520Savg mutex_enter(&zp->z_lock); 4715243520Savg if (zp->z_unlinked) { 4716243520Savg /* 4717243520Savg * Fast path to recycle a vnode of a removed file. 4718243520Savg */ 4719243520Savg mutex_exit(&zp->z_lock); 4720185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4721243520Savg vrecycle(vp); 4722168962Spjd return; 4723168404Spjd } 4724243520Savg mutex_exit(&zp->z_lock); 4725168404Spjd 4726168404Spjd if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4727168404Spjd dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4728168404Spjd 4729219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4730219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4731168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 4732168404Spjd if (error) { 4733168404Spjd dmu_tx_abort(tx); 4734168404Spjd } else { 4735168404Spjd mutex_enter(&zp->z_lock); 4736219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 4737219089Spjd (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4738168404Spjd zp->z_atime_dirty = 0; 4739168404Spjd mutex_exit(&zp->z_lock); 4740168404Spjd dmu_tx_commit(tx); 4741168404Spjd } 4742168404Spjd } 4743185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4744168404Spjd} 4745168404Spjd 4746219089Spjd#ifdef sun 4747219089Spjd/* 4748219089Spjd * Bounds-check the seek operation. 4749219089Spjd * 4750219089Spjd * IN: vp - vnode seeking within 4751219089Spjd * ooff - old file offset 4752219089Spjd * noffp - pointer to new file offset 4753219089Spjd * ct - caller context 4754219089Spjd * 4755251631Sdelphij * RETURN: 0 on success, EINVAL if new offset invalid. 4756219089Spjd */ 4757219089Spjd/* ARGSUSED */ 4758219089Spjdstatic int 4759219089Spjdzfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, 4760219089Spjd caller_context_t *ct) 4761219089Spjd{ 4762219089Spjd if (vp->v_type == VDIR) 4763219089Spjd return (0); 4764219089Spjd return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 4765219089Spjd} 4766219089Spjd 4767219089Spjd/* 4768219089Spjd * Pre-filter the generic locking function to trap attempts to place 4769219089Spjd * a mandatory lock on a memory mapped file. 4770219089Spjd */ 4771219089Spjdstatic int 4772219089Spjdzfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 4773219089Spjd flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 4774219089Spjd{ 4775219089Spjd znode_t *zp = VTOZ(vp); 4776219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4777219089Spjd 4778219089Spjd ZFS_ENTER(zfsvfs); 4779219089Spjd ZFS_VERIFY_ZP(zp); 4780219089Spjd 4781219089Spjd /* 4782219089Spjd * We are following the UFS semantics with respect to mapcnt 4783219089Spjd * here: If we see that the file is mapped already, then we will 4784219089Spjd * return an error, but we don't worry about races between this 4785219089Spjd * function and zfs_map(). 4786219089Spjd */ 4787219089Spjd if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) { 4788219089Spjd ZFS_EXIT(zfsvfs); 4789249195Smm return (SET_ERROR(EAGAIN)); 4790219089Spjd } 4791219089Spjd ZFS_EXIT(zfsvfs); 4792219089Spjd return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 4793219089Spjd} 4794219089Spjd 4795219089Spjd/* 4796219089Spjd * If we can't find a page in the cache, we will create a new page 4797219089Spjd * and fill it with file data. For efficiency, we may try to fill 4798219089Spjd * multiple pages at once (klustering) to fill up the supplied page 4799219089Spjd * list. Note that the pages to be filled are held with an exclusive 4800219089Spjd * lock to prevent access by other threads while they are being filled. 4801219089Spjd */ 4802219089Spjdstatic int 4803219089Spjdzfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, 4804219089Spjd caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) 4805219089Spjd{ 4806219089Spjd znode_t *zp = VTOZ(vp); 4807219089Spjd page_t *pp, *cur_pp; 4808219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 4809219089Spjd u_offset_t io_off, total; 4810219089Spjd size_t io_len; 4811219089Spjd int err; 4812219089Spjd 4813219089Spjd if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { 4814219089Spjd /* 4815219089Spjd * We only have a single page, don't bother klustering 4816219089Spjd */ 4817219089Spjd io_off = off; 4818219089Spjd io_len = PAGESIZE; 4819219089Spjd pp = page_create_va(vp, io_off, io_len, 4820219089Spjd PG_EXCL | PG_WAIT, seg, addr); 4821219089Spjd } else { 4822219089Spjd /* 4823219089Spjd * Try to find enough pages to fill the page list 4824219089Spjd */ 4825219089Spjd pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 4826219089Spjd &io_len, off, plsz, 0); 4827219089Spjd } 4828219089Spjd if (pp == NULL) { 4829219089Spjd /* 4830219089Spjd * The page already exists, nothing to do here. 4831219089Spjd */ 4832219089Spjd *pl = NULL; 4833219089Spjd return (0); 4834219089Spjd } 4835219089Spjd 4836219089Spjd /* 4837219089Spjd * Fill the pages in the kluster. 4838219089Spjd */ 4839219089Spjd cur_pp = pp; 4840219089Spjd for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { 4841219089Spjd caddr_t va; 4842219089Spjd 4843219089Spjd ASSERT3U(io_off, ==, cur_pp->p_offset); 4844219089Spjd va = zfs_map_page(cur_pp, S_WRITE); 4845219089Spjd err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, 4846219089Spjd DMU_READ_PREFETCH); 4847219089Spjd zfs_unmap_page(cur_pp, va); 4848219089Spjd if (err) { 4849219089Spjd /* On error, toss the entire kluster */ 4850219089Spjd pvn_read_done(pp, B_ERROR); 4851219089Spjd /* convert checksum errors into IO errors */ 4852219089Spjd if (err == ECKSUM) 4853249195Smm err = SET_ERROR(EIO); 4854219089Spjd return (err); 4855219089Spjd } 4856219089Spjd cur_pp = cur_pp->p_next; 4857219089Spjd } 4858219089Spjd 4859219089Spjd /* 4860219089Spjd * Fill in the page list array from the kluster starting 4861219089Spjd * from the desired offset `off'. 4862219089Spjd * NOTE: the page list will always be null terminated. 4863219089Spjd */ 4864219089Spjd pvn_plist_init(pp, pl, plsz, off, io_len, rw); 4865219089Spjd ASSERT(pl == NULL || (*pl)->p_offset == off); 4866219089Spjd 4867219089Spjd return (0); 4868219089Spjd} 4869219089Spjd 4870219089Spjd/* 4871219089Spjd * Return pointers to the pages for the file region [off, off + len] 4872219089Spjd * in the pl array. If plsz is greater than len, this function may 4873219089Spjd * also return page pointers from after the specified region 4874219089Spjd * (i.e. the region [off, off + plsz]). These additional pages are 4875219089Spjd * only returned if they are already in the cache, or were created as 4876219089Spjd * part of a klustered read. 4877219089Spjd * 4878219089Spjd * IN: vp - vnode of file to get data from. 4879219089Spjd * off - position in file to get data from. 4880219089Spjd * len - amount of data to retrieve. 4881219089Spjd * plsz - length of provided page list. 4882219089Spjd * seg - segment to obtain pages for. 4883219089Spjd * addr - virtual address of fault. 4884219089Spjd * rw - mode of created pages. 4885219089Spjd * cr - credentials of caller. 4886219089Spjd * ct - caller context. 4887219089Spjd * 4888219089Spjd * OUT: protp - protection mode of created pages. 4889219089Spjd * pl - list of pages created. 4890219089Spjd * 4891251631Sdelphij * RETURN: 0 on success, error code on failure. 4892219089Spjd * 4893219089Spjd * Timestamps: 4894219089Spjd * vp - atime updated 4895219089Spjd */ 4896219089Spjd/* ARGSUSED */ 4897219089Spjdstatic int 4898219089Spjdzfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 4899251631Sdelphij page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 4900251631Sdelphij enum seg_rw rw, cred_t *cr, caller_context_t *ct) 4901219089Spjd{ 4902219089Spjd znode_t *zp = VTOZ(vp); 4903219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4904219089Spjd page_t **pl0 = pl; 4905219089Spjd int err = 0; 4906219089Spjd 4907219089Spjd /* we do our own caching, faultahead is unnecessary */ 4908219089Spjd if (pl == NULL) 4909219089Spjd return (0); 4910219089Spjd else if (len > plsz) 4911219089Spjd len = plsz; 4912219089Spjd else 4913219089Spjd len = P2ROUNDUP(len, PAGESIZE); 4914219089Spjd ASSERT(plsz >= len); 4915219089Spjd 4916219089Spjd ZFS_ENTER(zfsvfs); 4917219089Spjd ZFS_VERIFY_ZP(zp); 4918219089Spjd 4919219089Spjd if (protp) 4920219089Spjd *protp = PROT_ALL; 4921219089Spjd 4922219089Spjd /* 4923219089Spjd * Loop through the requested range [off, off + len) looking 4924219089Spjd * for pages. If we don't find a page, we will need to create 4925219089Spjd * a new page and fill it with data from the file. 4926219089Spjd */ 4927219089Spjd while (len > 0) { 4928219089Spjd if (*pl = page_lookup(vp, off, SE_SHARED)) 4929219089Spjd *(pl+1) = NULL; 4930219089Spjd else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) 4931219089Spjd goto out; 4932219089Spjd while (*pl) { 4933219089Spjd ASSERT3U((*pl)->p_offset, ==, off); 4934219089Spjd off += PAGESIZE; 4935219089Spjd addr += PAGESIZE; 4936219089Spjd if (len > 0) { 4937219089Spjd ASSERT3U(len, >=, PAGESIZE); 4938219089Spjd len -= PAGESIZE; 4939219089Spjd } 4940219089Spjd ASSERT3U(plsz, >=, PAGESIZE); 4941219089Spjd plsz -= PAGESIZE; 4942219089Spjd pl++; 4943219089Spjd } 4944219089Spjd } 4945219089Spjd 4946219089Spjd /* 4947219089Spjd * Fill out the page array with any pages already in the cache. 4948219089Spjd */ 4949219089Spjd while (plsz > 0 && 4950219089Spjd (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { 4951219089Spjd off += PAGESIZE; 4952219089Spjd plsz -= PAGESIZE; 4953219089Spjd } 4954219089Spjdout: 4955219089Spjd if (err) { 4956219089Spjd /* 4957219089Spjd * Release any pages we have previously locked. 4958219089Spjd */ 4959219089Spjd while (pl > pl0) 4960219089Spjd page_unlock(*--pl); 4961219089Spjd } else { 4962219089Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4963219089Spjd } 4964219089Spjd 4965219089Spjd *pl = NULL; 4966219089Spjd 4967219089Spjd ZFS_EXIT(zfsvfs); 4968219089Spjd return (err); 4969219089Spjd} 4970219089Spjd 4971219089Spjd/* 4972219089Spjd * Request a memory map for a section of a file. This code interacts 4973219089Spjd * with common code and the VM system as follows: 4974219089Spjd * 4975251631Sdelphij * - common code calls mmap(), which ends up in smmap_common() 4976251631Sdelphij * - this calls VOP_MAP(), which takes you into (say) zfs 4977251631Sdelphij * - zfs_map() calls as_map(), passing segvn_create() as the callback 4978251631Sdelphij * - segvn_create() creates the new segment and calls VOP_ADDMAP() 4979251631Sdelphij * - zfs_addmap() updates z_mapcnt 4980219089Spjd */ 4981219089Spjd/*ARGSUSED*/ 4982219089Spjdstatic int 4983219089Spjdzfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 4984219089Spjd size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4985219089Spjd caller_context_t *ct) 4986219089Spjd{ 4987219089Spjd znode_t *zp = VTOZ(vp); 4988219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4989219089Spjd segvn_crargs_t vn_a; 4990219089Spjd int error; 4991219089Spjd 4992219089Spjd ZFS_ENTER(zfsvfs); 4993219089Spjd ZFS_VERIFY_ZP(zp); 4994219089Spjd 4995219089Spjd if ((prot & PROT_WRITE) && (zp->z_pflags & 4996219089Spjd (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) { 4997219089Spjd ZFS_EXIT(zfsvfs); 4998249195Smm return (SET_ERROR(EPERM)); 4999219089Spjd } 5000219089Spjd 5001219089Spjd if ((prot & (PROT_READ | PROT_EXEC)) && 5002219089Spjd (zp->z_pflags & ZFS_AV_QUARANTINED)) { 5003219089Spjd ZFS_EXIT(zfsvfs); 5004249195Smm return (SET_ERROR(EACCES)); 5005219089Spjd } 5006219089Spjd 5007219089Spjd if (vp->v_flag & VNOMAP) { 5008219089Spjd ZFS_EXIT(zfsvfs); 5009249195Smm return (SET_ERROR(ENOSYS)); 5010219089Spjd } 5011219089Spjd 5012219089Spjd if (off < 0 || len > MAXOFFSET_T - off) { 5013219089Spjd ZFS_EXIT(zfsvfs); 5014249195Smm return (SET_ERROR(ENXIO)); 5015219089Spjd } 5016219089Spjd 5017219089Spjd if (vp->v_type != VREG) { 5018219089Spjd ZFS_EXIT(zfsvfs); 5019249195Smm return (SET_ERROR(ENODEV)); 5020219089Spjd } 5021219089Spjd 5022219089Spjd /* 5023219089Spjd * If file is locked, disallow mapping. 5024219089Spjd */ 5025219089Spjd if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) { 5026219089Spjd ZFS_EXIT(zfsvfs); 5027249195Smm return (SET_ERROR(EAGAIN)); 5028219089Spjd } 5029219089Spjd 5030219089Spjd as_rangelock(as); 5031219089Spjd error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 5032219089Spjd if (error != 0) { 5033219089Spjd as_rangeunlock(as); 5034219089Spjd ZFS_EXIT(zfsvfs); 5035219089Spjd return (error); 5036219089Spjd } 5037219089Spjd 5038219089Spjd vn_a.vp = vp; 5039219089Spjd vn_a.offset = (u_offset_t)off; 5040219089Spjd vn_a.type = flags & MAP_TYPE; 5041219089Spjd vn_a.prot = prot; 5042219089Spjd vn_a.maxprot = maxprot; 5043219089Spjd vn_a.cred = cr; 5044219089Spjd vn_a.amp = NULL; 5045219089Spjd vn_a.flags = flags & ~MAP_TYPE; 5046219089Spjd vn_a.szc = 0; 5047219089Spjd vn_a.lgrp_mem_policy_flags = 0; 5048219089Spjd 5049219089Spjd error = as_map(as, *addrp, len, segvn_create, &vn_a); 5050219089Spjd 5051219089Spjd as_rangeunlock(as); 5052219089Spjd ZFS_EXIT(zfsvfs); 5053219089Spjd return (error); 5054219089Spjd} 5055219089Spjd 5056219089Spjd/* ARGSUSED */ 5057219089Spjdstatic int 5058219089Spjdzfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 5059219089Spjd size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 5060219089Spjd caller_context_t *ct) 5061219089Spjd{ 5062219089Spjd uint64_t pages = btopr(len); 5063219089Spjd 5064219089Spjd atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); 5065219089Spjd return (0); 5066219089Spjd} 5067219089Spjd 5068219089Spjd/* 5069219089Spjd * The reason we push dirty pages as part of zfs_delmap() is so that we get a 5070219089Spjd * more accurate mtime for the associated file. Since we don't have a way of 5071219089Spjd * detecting when the data was actually modified, we have to resort to 5072219089Spjd * heuristics. If an explicit msync() is done, then we mark the mtime when the 5073219089Spjd * last page is pushed. The problem occurs when the msync() call is omitted, 5074219089Spjd * which by far the most common case: 5075219089Spjd * 5076268464Sdelphij * open() 5077268464Sdelphij * mmap() 5078268464Sdelphij * <modify memory> 5079268464Sdelphij * munmap() 5080268464Sdelphij * close() 5081268464Sdelphij * <time lapse> 5082268464Sdelphij * putpage() via fsflush 5083219089Spjd * 5084219089Spjd * If we wait until fsflush to come along, we can have a modification time that 5085219089Spjd * is some arbitrary point in the future. In order to prevent this in the 5086219089Spjd * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is 5087219089Spjd * torn down. 5088219089Spjd */ 5089219089Spjd/* ARGSUSED */ 5090219089Spjdstatic int 5091219089Spjdzfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 5092219089Spjd size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, 5093219089Spjd caller_context_t *ct) 5094219089Spjd{ 5095219089Spjd uint64_t pages = btopr(len); 5096219089Spjd 5097219089Spjd ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); 5098219089Spjd atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); 5099219089Spjd 5100219089Spjd if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && 5101219089Spjd vn_has_cached_data(vp)) 5102219089Spjd (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); 5103219089Spjd 5104219089Spjd return (0); 5105219089Spjd} 5106219089Spjd 5107219089Spjd/* 5108219089Spjd * Free or allocate space in a file. Currently, this function only 5109219089Spjd * supports the `F_FREESP' command. However, this command is somewhat 5110219089Spjd * misnamed, as its functionality includes the ability to allocate as 5111219089Spjd * well as free space. 5112219089Spjd * 5113219089Spjd * IN: vp - vnode of file to free data in. 5114219089Spjd * cmd - action to take (only F_FREESP supported). 5115219089Spjd * bfp - section of file to free/alloc. 5116219089Spjd * flag - current file open mode flags. 5117219089Spjd * offset - current file offset. 5118219089Spjd * cr - credentials of caller [UNUSED]. 5119219089Spjd * ct - caller context. 5120219089Spjd * 5121251631Sdelphij * RETURN: 0 on success, error code on failure. 5122219089Spjd * 5123219089Spjd * Timestamps: 5124219089Spjd * vp - ctime|mtime updated 5125219089Spjd */ 5126219089Spjd/* ARGSUSED */ 5127219089Spjdstatic int 5128219089Spjdzfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, 5129219089Spjd offset_t offset, cred_t *cr, caller_context_t *ct) 5130219089Spjd{ 5131219089Spjd znode_t *zp = VTOZ(vp); 5132219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5133219089Spjd uint64_t off, len; 5134219089Spjd int error; 5135219089Spjd 5136219089Spjd ZFS_ENTER(zfsvfs); 5137219089Spjd ZFS_VERIFY_ZP(zp); 5138219089Spjd 5139219089Spjd if (cmd != F_FREESP) { 5140219089Spjd ZFS_EXIT(zfsvfs); 5141249195Smm return (SET_ERROR(EINVAL)); 5142219089Spjd } 5143219089Spjd 5144262990Sdelphij /* 5145262990Sdelphij * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 5146262990Sdelphij * callers might not be able to detect properly that we are read-only, 5147262990Sdelphij * so check it explicitly here. 5148262990Sdelphij */ 5149262990Sdelphij if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 5150262990Sdelphij ZFS_EXIT(zfsvfs); 5151262990Sdelphij return (SET_ERROR(EROFS)); 5152262990Sdelphij } 5153262990Sdelphij 5154219089Spjd if (error = convoff(vp, bfp, 0, offset)) { 5155219089Spjd ZFS_EXIT(zfsvfs); 5156219089Spjd return (error); 5157219089Spjd } 5158219089Spjd 5159219089Spjd if (bfp->l_len < 0) { 5160219089Spjd ZFS_EXIT(zfsvfs); 5161249195Smm return (SET_ERROR(EINVAL)); 5162219089Spjd } 5163219089Spjd 5164219089Spjd off = bfp->l_start; 5165219089Spjd len = bfp->l_len; /* 0 means from off to end of file */ 5166219089Spjd 5167219089Spjd error = zfs_freesp(zp, off, len, flag, TRUE); 5168219089Spjd 5169219089Spjd ZFS_EXIT(zfsvfs); 5170219089Spjd return (error); 5171219089Spjd} 5172219089Spjd#endif /* sun */ 5173219089Spjd 5174168404SpjdCTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 5175168404SpjdCTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 5176168404Spjd 5177185029Spjd/*ARGSUSED*/ 5178168404Spjdstatic int 5179185029Spjdzfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 5180168404Spjd{ 5181168404Spjd znode_t *zp = VTOZ(vp); 5182168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5183185029Spjd uint32_t gen; 5184219089Spjd uint64_t gen64; 5185168404Spjd uint64_t object = zp->z_id; 5186168404Spjd zfid_short_t *zfid; 5187219089Spjd int size, i, error; 5188168404Spjd 5189168404Spjd ZFS_ENTER(zfsvfs); 5190185029Spjd ZFS_VERIFY_ZP(zp); 5191168404Spjd 5192219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 5193219089Spjd &gen64, sizeof (uint64_t))) != 0) { 5194219089Spjd ZFS_EXIT(zfsvfs); 5195219089Spjd return (error); 5196219089Spjd } 5197219089Spjd 5198219089Spjd gen = (uint32_t)gen64; 5199219089Spjd 5200168404Spjd size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 5201249195Smm 5202249195Smm#ifdef illumos 5203249195Smm if (fidp->fid_len < size) { 5204249195Smm fidp->fid_len = size; 5205249195Smm ZFS_EXIT(zfsvfs); 5206249195Smm return (SET_ERROR(ENOSPC)); 5207249195Smm } 5208249195Smm#else 5209168404Spjd fidp->fid_len = size; 5210249195Smm#endif 5211168404Spjd 5212168404Spjd zfid = (zfid_short_t *)fidp; 5213168404Spjd 5214168404Spjd zfid->zf_len = size; 5215168404Spjd 5216168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 5217168404Spjd zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 5218168404Spjd 5219168404Spjd /* Must have a non-zero generation number to distinguish from .zfs */ 5220168404Spjd if (gen == 0) 5221168404Spjd gen = 1; 5222168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 5223168404Spjd zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 5224168404Spjd 5225168404Spjd if (size == LONG_FID_LEN) { 5226168404Spjd uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 5227169023Spjd zfid_long_t *zlfid; 5228168404Spjd 5229168404Spjd zlfid = (zfid_long_t *)fidp; 5230168404Spjd 5231168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 5232168404Spjd zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 5233168404Spjd 5234168404Spjd /* XXX - this should be the generation number for the objset */ 5235168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 5236168404Spjd zlfid->zf_setgen[i] = 0; 5237168404Spjd } 5238168404Spjd 5239168404Spjd ZFS_EXIT(zfsvfs); 5240168404Spjd return (0); 5241168404Spjd} 5242168404Spjd 5243168404Spjdstatic int 5244185029Spjdzfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 5245185029Spjd caller_context_t *ct) 5246168404Spjd{ 5247168404Spjd znode_t *zp, *xzp; 5248168404Spjd zfsvfs_t *zfsvfs; 5249168404Spjd zfs_dirlock_t *dl; 5250168404Spjd int error; 5251168404Spjd 5252168404Spjd switch (cmd) { 5253168404Spjd case _PC_LINK_MAX: 5254168404Spjd *valp = INT_MAX; 5255168404Spjd return (0); 5256168404Spjd 5257168404Spjd case _PC_FILESIZEBITS: 5258168404Spjd *valp = 64; 5259168404Spjd return (0); 5260219089Spjd#ifdef sun 5261168404Spjd case _PC_XATTR_EXISTS: 5262168404Spjd zp = VTOZ(vp); 5263168404Spjd zfsvfs = zp->z_zfsvfs; 5264168404Spjd ZFS_ENTER(zfsvfs); 5265185029Spjd ZFS_VERIFY_ZP(zp); 5266168404Spjd *valp = 0; 5267168404Spjd error = zfs_dirent_lock(&dl, zp, "", &xzp, 5268185029Spjd ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 5269168404Spjd if (error == 0) { 5270168404Spjd zfs_dirent_unlock(dl); 5271168404Spjd if (!zfs_dirempty(xzp)) 5272168404Spjd *valp = 1; 5273168404Spjd VN_RELE(ZTOV(xzp)); 5274168404Spjd } else if (error == ENOENT) { 5275168404Spjd /* 5276168404Spjd * If there aren't extended attributes, it's the 5277168404Spjd * same as having zero of them. 5278168404Spjd */ 5279168404Spjd error = 0; 5280168404Spjd } 5281168404Spjd ZFS_EXIT(zfsvfs); 5282168404Spjd return (error); 5283168404Spjd 5284219089Spjd case _PC_SATTR_ENABLED: 5285219089Spjd case _PC_SATTR_EXISTS: 5286219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 5287219089Spjd (vp->v_type == VREG || vp->v_type == VDIR); 5288219089Spjd return (0); 5289219089Spjd 5290219089Spjd case _PC_ACCESS_FILTERING: 5291219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 5292219089Spjd vp->v_type == VDIR; 5293219089Spjd return (0); 5294219089Spjd 5295219089Spjd case _PC_ACL_ENABLED: 5296219089Spjd *valp = _ACL_ACE_ENABLED; 5297219089Spjd return (0); 5298219089Spjd#endif /* sun */ 5299219089Spjd case _PC_MIN_HOLE_SIZE: 5300219089Spjd *valp = (int)SPA_MINBLOCKSIZE; 5301219089Spjd return (0); 5302219089Spjd#ifdef sun 5303219089Spjd case _PC_TIMESTAMP_RESOLUTION: 5304219089Spjd /* nanosecond timestamp resolution */ 5305219089Spjd *valp = 1L; 5306219089Spjd return (0); 5307219089Spjd#endif /* sun */ 5308168404Spjd case _PC_ACL_EXTENDED: 5309196949Strasz *valp = 0; 5310168404Spjd return (0); 5311168404Spjd 5312196949Strasz case _PC_ACL_NFS4: 5313196949Strasz *valp = 1; 5314196949Strasz return (0); 5315196949Strasz 5316196949Strasz case _PC_ACL_PATH_MAX: 5317196949Strasz *valp = ACL_MAX_ENTRIES; 5318196949Strasz return (0); 5319196949Strasz 5320168404Spjd default: 5321168962Spjd return (EOPNOTSUPP); 5322168404Spjd } 5323168404Spjd} 5324168404Spjd 5325168404Spjd/*ARGSUSED*/ 5326168404Spjdstatic int 5327185029Spjdzfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5328185029Spjd caller_context_t *ct) 5329168404Spjd{ 5330168404Spjd znode_t *zp = VTOZ(vp); 5331168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5332168404Spjd int error; 5333185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5334168404Spjd 5335168404Spjd ZFS_ENTER(zfsvfs); 5336185029Spjd ZFS_VERIFY_ZP(zp); 5337185029Spjd error = zfs_getacl(zp, vsecp, skipaclchk, cr); 5338168404Spjd ZFS_EXIT(zfsvfs); 5339168404Spjd 5340168404Spjd return (error); 5341168404Spjd} 5342168404Spjd 5343168404Spjd/*ARGSUSED*/ 5344228685Spjdint 5345185029Spjdzfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5346185029Spjd caller_context_t *ct) 5347168404Spjd{ 5348168404Spjd znode_t *zp = VTOZ(vp); 5349168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5350168404Spjd int error; 5351185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5352219089Spjd zilog_t *zilog = zfsvfs->z_log; 5353168404Spjd 5354168404Spjd ZFS_ENTER(zfsvfs); 5355185029Spjd ZFS_VERIFY_ZP(zp); 5356219089Spjd 5357185029Spjd error = zfs_setacl(zp, vsecp, skipaclchk, cr); 5358219089Spjd 5359219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5360219089Spjd zil_commit(zilog, 0); 5361219089Spjd 5362168404Spjd ZFS_EXIT(zfsvfs); 5363168404Spjd return (error); 5364168404Spjd} 5365168404Spjd 5366219089Spjd#ifdef sun 5367219089Spjd/* 5368251631Sdelphij * The smallest read we may consider to loan out an arcbuf. 5369251631Sdelphij * This must be a power of 2. 5370219089Spjd */ 5371219089Spjdint zcr_blksz_min = (1 << 10); /* 1K */ 5372251631Sdelphij/* 5373251631Sdelphij * If set to less than the file block size, allow loaning out of an 5374251631Sdelphij * arcbuf for a partial block read. This must be a power of 2. 5375251631Sdelphij */ 5376219089Spjdint zcr_blksz_max = (1 << 17); /* 128K */ 5377219089Spjd 5378219089Spjd/*ARGSUSED*/ 5379168962Spjdstatic int 5380219089Spjdzfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, 5381219089Spjd caller_context_t *ct) 5382219089Spjd{ 5383219089Spjd znode_t *zp = VTOZ(vp); 5384219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5385219089Spjd int max_blksz = zfsvfs->z_max_blksz; 5386219089Spjd uio_t *uio = &xuio->xu_uio; 5387219089Spjd ssize_t size = uio->uio_resid; 5388219089Spjd offset_t offset = uio->uio_loffset; 5389219089Spjd int blksz; 5390219089Spjd int fullblk, i; 5391219089Spjd arc_buf_t *abuf; 5392219089Spjd ssize_t maxsize; 5393219089Spjd int preamble, postamble; 5394219089Spjd 5395219089Spjd if (xuio->xu_type != UIOTYPE_ZEROCOPY) 5396249195Smm return (SET_ERROR(EINVAL)); 5397219089Spjd 5398219089Spjd ZFS_ENTER(zfsvfs); 5399219089Spjd ZFS_VERIFY_ZP(zp); 5400219089Spjd switch (ioflag) { 5401219089Spjd case UIO_WRITE: 5402219089Spjd /* 5403219089Spjd * Loan out an arc_buf for write if write size is bigger than 5404219089Spjd * max_blksz, and the file's block size is also max_blksz. 5405219089Spjd */ 5406219089Spjd blksz = max_blksz; 5407219089Spjd if (size < blksz || zp->z_blksz != blksz) { 5408219089Spjd ZFS_EXIT(zfsvfs); 5409249195Smm return (SET_ERROR(EINVAL)); 5410219089Spjd } 5411219089Spjd /* 5412219089Spjd * Caller requests buffers for write before knowing where the 5413219089Spjd * write offset might be (e.g. NFS TCP write). 5414219089Spjd */ 5415219089Spjd if (offset == -1) { 5416219089Spjd preamble = 0; 5417219089Spjd } else { 5418219089Spjd preamble = P2PHASE(offset, blksz); 5419219089Spjd if (preamble) { 5420219089Spjd preamble = blksz - preamble; 5421219089Spjd size -= preamble; 5422219089Spjd } 5423219089Spjd } 5424219089Spjd 5425219089Spjd postamble = P2PHASE(size, blksz); 5426219089Spjd size -= postamble; 5427219089Spjd 5428219089Spjd fullblk = size / blksz; 5429219089Spjd (void) dmu_xuio_init(xuio, 5430219089Spjd (preamble != 0) + fullblk + (postamble != 0)); 5431219089Spjd DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble, 5432219089Spjd int, postamble, int, 5433219089Spjd (preamble != 0) + fullblk + (postamble != 0)); 5434219089Spjd 5435219089Spjd /* 5436219089Spjd * Have to fix iov base/len for partial buffers. They 5437219089Spjd * currently represent full arc_buf's. 5438219089Spjd */ 5439219089Spjd if (preamble) { 5440219089Spjd /* data begins in the middle of the arc_buf */ 5441219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5442219089Spjd blksz); 5443219089Spjd ASSERT(abuf); 5444219089Spjd (void) dmu_xuio_add(xuio, abuf, 5445219089Spjd blksz - preamble, preamble); 5446219089Spjd } 5447219089Spjd 5448219089Spjd for (i = 0; i < fullblk; i++) { 5449219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5450219089Spjd blksz); 5451219089Spjd ASSERT(abuf); 5452219089Spjd (void) dmu_xuio_add(xuio, abuf, 0, blksz); 5453219089Spjd } 5454219089Spjd 5455219089Spjd if (postamble) { 5456219089Spjd /* data ends in the middle of the arc_buf */ 5457219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5458219089Spjd blksz); 5459219089Spjd ASSERT(abuf); 5460219089Spjd (void) dmu_xuio_add(xuio, abuf, 0, postamble); 5461219089Spjd } 5462219089Spjd break; 5463219089Spjd case UIO_READ: 5464219089Spjd /* 5465219089Spjd * Loan out an arc_buf for read if the read size is larger than 5466219089Spjd * the current file block size. Block alignment is not 5467219089Spjd * considered. Partial arc_buf will be loaned out for read. 5468219089Spjd */ 5469219089Spjd blksz = zp->z_blksz; 5470219089Spjd if (blksz < zcr_blksz_min) 5471219089Spjd blksz = zcr_blksz_min; 5472219089Spjd if (blksz > zcr_blksz_max) 5473219089Spjd blksz = zcr_blksz_max; 5474219089Spjd /* avoid potential complexity of dealing with it */ 5475219089Spjd if (blksz > max_blksz) { 5476219089Spjd ZFS_EXIT(zfsvfs); 5477249195Smm return (SET_ERROR(EINVAL)); 5478219089Spjd } 5479219089Spjd 5480219089Spjd maxsize = zp->z_size - uio->uio_loffset; 5481219089Spjd if (size > maxsize) 5482219089Spjd size = maxsize; 5483219089Spjd 5484219089Spjd if (size < blksz || vn_has_cached_data(vp)) { 5485219089Spjd ZFS_EXIT(zfsvfs); 5486249195Smm return (SET_ERROR(EINVAL)); 5487219089Spjd } 5488219089Spjd break; 5489219089Spjd default: 5490219089Spjd ZFS_EXIT(zfsvfs); 5491249195Smm return (SET_ERROR(EINVAL)); 5492219089Spjd } 5493219089Spjd 5494219089Spjd uio->uio_extflg = UIO_XUIO; 5495219089Spjd XUIO_XUZC_RW(xuio) = ioflag; 5496219089Spjd ZFS_EXIT(zfsvfs); 5497219089Spjd return (0); 5498219089Spjd} 5499219089Spjd 5500219089Spjd/*ARGSUSED*/ 5501219089Spjdstatic int 5502219089Spjdzfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) 5503219089Spjd{ 5504219089Spjd int i; 5505219089Spjd arc_buf_t *abuf; 5506219089Spjd int ioflag = XUIO_XUZC_RW(xuio); 5507219089Spjd 5508219089Spjd ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); 5509219089Spjd 5510219089Spjd i = dmu_xuio_cnt(xuio); 5511219089Spjd while (i-- > 0) { 5512219089Spjd abuf = dmu_xuio_arcbuf(xuio, i); 5513219089Spjd /* 5514219089Spjd * if abuf == NULL, it must be a write buffer 5515219089Spjd * that has been returned in zfs_write(). 5516219089Spjd */ 5517219089Spjd if (abuf) 5518219089Spjd dmu_return_arcbuf(abuf); 5519219089Spjd ASSERT(abuf || ioflag == UIO_WRITE); 5520219089Spjd } 5521219089Spjd 5522219089Spjd dmu_xuio_fini(xuio); 5523219089Spjd return (0); 5524219089Spjd} 5525219089Spjd 5526219089Spjd/* 5527219089Spjd * Predeclare these here so that the compiler assumes that 5528219089Spjd * this is an "old style" function declaration that does 5529219089Spjd * not include arguments => we won't get type mismatch errors 5530219089Spjd * in the initializations that follow. 5531219089Spjd */ 5532219089Spjdstatic int zfs_inval(); 5533219089Spjdstatic int zfs_isdir(); 5534219089Spjd 5535219089Spjdstatic int 5536219089Spjdzfs_inval() 5537219089Spjd{ 5538249195Smm return (SET_ERROR(EINVAL)); 5539219089Spjd} 5540219089Spjd 5541219089Spjdstatic int 5542219089Spjdzfs_isdir() 5543219089Spjd{ 5544249195Smm return (SET_ERROR(EISDIR)); 5545219089Spjd} 5546219089Spjd/* 5547219089Spjd * Directory vnode operations template 5548219089Spjd */ 5549219089Spjdvnodeops_t *zfs_dvnodeops; 5550219089Spjdconst fs_operation_def_t zfs_dvnodeops_template[] = { 5551219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5552219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5553219089Spjd VOPNAME_READ, { .error = zfs_isdir }, 5554219089Spjd VOPNAME_WRITE, { .error = zfs_isdir }, 5555219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5556219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5557219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5558219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5559219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5560219089Spjd VOPNAME_CREATE, { .vop_create = zfs_create }, 5561219089Spjd VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5562219089Spjd VOPNAME_LINK, { .vop_link = zfs_link }, 5563219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5564219089Spjd VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, 5565219089Spjd VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5566219089Spjd VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5567219089Spjd VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, 5568219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5569219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5570219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5571219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5572219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5573219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5574219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5575268464Sdelphij VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5576219089Spjd NULL, NULL 5577219089Spjd}; 5578219089Spjd 5579219089Spjd/* 5580219089Spjd * Regular file vnode operations template 5581219089Spjd */ 5582219089Spjdvnodeops_t *zfs_fvnodeops; 5583219089Spjdconst fs_operation_def_t zfs_fvnodeops_template[] = { 5584219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5585219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5586219089Spjd VOPNAME_READ, { .vop_read = zfs_read }, 5587219089Spjd VOPNAME_WRITE, { .vop_write = zfs_write }, 5588219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5589219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5590219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5591219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5592219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5593219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5594219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5595219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5596219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5597219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5598219089Spjd VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, 5599219089Spjd VOPNAME_SPACE, { .vop_space = zfs_space }, 5600219089Spjd VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, 5601219089Spjd VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, 5602219089Spjd VOPNAME_MAP, { .vop_map = zfs_map }, 5603219089Spjd VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, 5604219089Spjd VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, 5605219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5606219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5607219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5608219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5609268464Sdelphij VOPNAME_REQZCBUF, { .vop_reqzcbuf = zfs_reqzcbuf }, 5610268464Sdelphij VOPNAME_RETZCBUF, { .vop_retzcbuf = zfs_retzcbuf }, 5611219089Spjd NULL, NULL 5612219089Spjd}; 5613219089Spjd 5614219089Spjd/* 5615219089Spjd * Symbolic link vnode operations template 5616219089Spjd */ 5617219089Spjdvnodeops_t *zfs_symvnodeops; 5618219089Spjdconst fs_operation_def_t zfs_symvnodeops_template[] = { 5619219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5620219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5621219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5622219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5623219089Spjd VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, 5624219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5625219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5626219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5627219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5628219089Spjd NULL, NULL 5629219089Spjd}; 5630219089Spjd 5631219089Spjd/* 5632219089Spjd * special share hidden files vnode operations template 5633219089Spjd */ 5634219089Spjdvnodeops_t *zfs_sharevnodeops; 5635219089Spjdconst fs_operation_def_t zfs_sharevnodeops_template[] = { 5636219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5637219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5638219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5639219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5640219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5641219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5642219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5643219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5644219089Spjd NULL, NULL 5645219089Spjd}; 5646219089Spjd 5647219089Spjd/* 5648219089Spjd * Extended attribute directory vnode operations template 5649251631Sdelphij * 5650251631Sdelphij * This template is identical to the directory vnodes 5651251631Sdelphij * operation template except for restricted operations: 5652251631Sdelphij * VOP_MKDIR() 5653251631Sdelphij * VOP_SYMLINK() 5654251631Sdelphij * 5655219089Spjd * Note that there are other restrictions embedded in: 5656219089Spjd * zfs_create() - restrict type to VREG 5657219089Spjd * zfs_link() - no links into/out of attribute space 5658219089Spjd * zfs_rename() - no moves into/out of attribute space 5659219089Spjd */ 5660219089Spjdvnodeops_t *zfs_xdvnodeops; 5661219089Spjdconst fs_operation_def_t zfs_xdvnodeops_template[] = { 5662219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5663219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5664219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5665219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5666219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5667219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5668219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5669219089Spjd VOPNAME_CREATE, { .vop_create = zfs_create }, 5670219089Spjd VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5671219089Spjd VOPNAME_LINK, { .vop_link = zfs_link }, 5672219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5673219089Spjd VOPNAME_MKDIR, { .error = zfs_inval }, 5674219089Spjd VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5675219089Spjd VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5676219089Spjd VOPNAME_SYMLINK, { .error = zfs_inval }, 5677219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5678219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5679219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5680219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5681219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5682219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5683219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5684219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5685219089Spjd NULL, NULL 5686219089Spjd}; 5687219089Spjd 5688219089Spjd/* 5689219089Spjd * Error vnode operations template 5690219089Spjd */ 5691219089Spjdvnodeops_t *zfs_evnodeops; 5692219089Spjdconst fs_operation_def_t zfs_evnodeops_template[] = { 5693219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5694219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5695219089Spjd NULL, NULL 5696219089Spjd}; 5697219089Spjd#endif /* sun */ 5698219089Spjd 5699219089Spjdstatic int 5700213673Spjdioflags(int ioflags) 5701213673Spjd{ 5702213673Spjd int flags = 0; 5703213673Spjd 5704213673Spjd if (ioflags & IO_APPEND) 5705213673Spjd flags |= FAPPEND; 5706213673Spjd if (ioflags & IO_NDELAY) 5707213673Spjd flags |= FNONBLOCK; 5708213673Spjd if (ioflags & IO_SYNC) 5709213673Spjd flags |= (FSYNC | FDSYNC | FRSYNC); 5710213673Spjd 5711213673Spjd return (flags); 5712213673Spjd} 5713213673Spjd 5714213673Spjdstatic int 5715213937Savgzfs_getpages(struct vnode *vp, vm_page_t *m, int count, int reqpage) 5716213937Savg{ 5717213937Savg znode_t *zp = VTOZ(vp); 5718213937Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5719213937Savg objset_t *os = zp->z_zfsvfs->z_os; 5720243517Savg vm_page_t mfirst, mlast, mreq; 5721213937Savg vm_object_t object; 5722213937Savg caddr_t va; 5723213937Savg struct sf_buf *sf; 5724243517Savg off_t startoff, endoff; 5725213937Savg int i, error; 5726243517Savg vm_pindex_t reqstart, reqend; 5727243517Savg int pcount, lsize, reqsize, size; 5728213937Savg 5729213937Savg ZFS_ENTER(zfsvfs); 5730213937Savg ZFS_VERIFY_ZP(zp); 5731213937Savg 5732243517Savg pcount = OFF_TO_IDX(round_page(count)); 5733213937Savg mreq = m[reqpage]; 5734213937Savg object = mreq->object; 5735213937Savg error = 0; 5736213937Savg 5737213937Savg KASSERT(vp->v_object == object, ("mismatching object")); 5738213937Savg 5739243517Savg if (pcount > 1 && zp->z_blksz > PAGESIZE) { 5740243517Savg startoff = rounddown(IDX_TO_OFF(mreq->pindex), zp->z_blksz); 5741243517Savg reqstart = OFF_TO_IDX(round_page(startoff)); 5742243517Savg if (reqstart < m[0]->pindex) 5743243517Savg reqstart = 0; 5744243517Savg else 5745243517Savg reqstart = reqstart - m[0]->pindex; 5746243517Savg endoff = roundup(IDX_TO_OFF(mreq->pindex) + PAGE_SIZE, 5747243517Savg zp->z_blksz); 5748243517Savg reqend = OFF_TO_IDX(trunc_page(endoff)) - 1; 5749243517Savg if (reqend > m[pcount - 1]->pindex) 5750243517Savg reqend = m[pcount - 1]->pindex; 5751243517Savg reqsize = reqend - m[reqstart]->pindex + 1; 5752243517Savg KASSERT(reqstart <= reqpage && reqpage < reqstart + reqsize, 5753243517Savg ("reqpage beyond [reqstart, reqstart + reqsize[ bounds")); 5754243517Savg } else { 5755243517Savg reqstart = reqpage; 5756243517Savg reqsize = 1; 5757243517Savg } 5758243517Savg mfirst = m[reqstart]; 5759243517Savg mlast = m[reqstart + reqsize - 1]; 5760243517Savg 5761248084Sattilio zfs_vmobject_wlock(object); 5762213937Savg 5763243517Savg for (i = 0; i < reqstart; i++) { 5764243517Savg vm_page_lock(m[i]); 5765243517Savg vm_page_free(m[i]); 5766243517Savg vm_page_unlock(m[i]); 5767213937Savg } 5768243517Savg for (i = reqstart + reqsize; i < pcount; i++) { 5769243517Savg vm_page_lock(m[i]); 5770243517Savg vm_page_free(m[i]); 5771243517Savg vm_page_unlock(m[i]); 5772243517Savg } 5773213937Savg 5774243517Savg if (mreq->valid && reqsize == 1) { 5775213937Savg if (mreq->valid != VM_PAGE_BITS_ALL) 5776213937Savg vm_page_zero_invalid(mreq, TRUE); 5777248084Sattilio zfs_vmobject_wunlock(object); 5778213937Savg ZFS_EXIT(zfsvfs); 5779248084Sattilio return (zfs_vm_pagerret_ok); 5780213937Savg } 5781213937Savg 5782213937Savg PCPU_INC(cnt.v_vnodein); 5783243517Savg PCPU_ADD(cnt.v_vnodepgsin, reqsize); 5784213937Savg 5785213937Savg if (IDX_TO_OFF(mreq->pindex) >= object->un_pager.vnp.vnp_size) { 5786243517Savg for (i = reqstart; i < reqstart + reqsize; i++) { 5787243517Savg if (i != reqpage) { 5788243517Savg vm_page_lock(m[i]); 5789243517Savg vm_page_free(m[i]); 5790243517Savg vm_page_unlock(m[i]); 5791243517Savg } 5792243517Savg } 5793248084Sattilio zfs_vmobject_wunlock(object); 5794213937Savg ZFS_EXIT(zfsvfs); 5795248084Sattilio return (zfs_vm_pagerret_bad); 5796213937Savg } 5797213937Savg 5798243517Savg lsize = PAGE_SIZE; 5799243517Savg if (IDX_TO_OFF(mlast->pindex) + lsize > object->un_pager.vnp.vnp_size) 5800243517Savg lsize = object->un_pager.vnp.vnp_size - IDX_TO_OFF(mlast->pindex); 5801213937Savg 5802248084Sattilio zfs_vmobject_wunlock(object); 5803243517Savg 5804243517Savg for (i = reqstart; i < reqstart + reqsize; i++) { 5805243517Savg size = PAGE_SIZE; 5806243517Savg if (i == (reqstart + reqsize - 1)) 5807243517Savg size = lsize; 5808243517Savg va = zfs_map_page(m[i], &sf); 5809243517Savg error = dmu_read(os, zp->z_id, IDX_TO_OFF(m[i]->pindex), 5810243517Savg size, va, DMU_READ_PREFETCH); 5811243517Savg if (size != PAGE_SIZE) 5812243517Savg bzero(va + size, PAGE_SIZE - size); 5813243517Savg zfs_unmap_page(sf); 5814243517Savg if (error != 0) 5815243517Savg break; 5816243517Savg } 5817243517Savg 5818248084Sattilio zfs_vmobject_wlock(object); 5819213937Savg 5820243517Savg for (i = reqstart; i < reqstart + reqsize; i++) { 5821243763Savg if (!error) 5822243763Savg m[i]->valid = VM_PAGE_BITS_ALL; 5823243517Savg KASSERT(m[i]->dirty == 0, ("zfs_getpages: page %p is dirty", m[i])); 5824243763Savg if (i != reqpage) 5825243763Savg vm_page_readahead_finish(m[i]); 5826243517Savg } 5827243517Savg 5828248084Sattilio zfs_vmobject_wunlock(object); 5829213937Savg 5830213937Savg ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 5831213937Savg ZFS_EXIT(zfsvfs); 5832248084Sattilio return (error ? zfs_vm_pagerret_error : zfs_vm_pagerret_ok); 5833213937Savg} 5834213937Savg 5835213937Savgstatic int 5836213937Savgzfs_freebsd_getpages(ap) 5837213937Savg struct vop_getpages_args /* { 5838213937Savg struct vnode *a_vp; 5839213937Savg vm_page_t *a_m; 5840213937Savg int a_count; 5841213937Savg int a_reqpage; 5842213937Savg } */ *ap; 5843213937Savg{ 5844213937Savg 5845213937Savg return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage)); 5846213937Savg} 5847213937Savg 5848213937Savgstatic int 5849258746Savgzfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, 5850258746Savg int *rtvals) 5851258746Savg{ 5852258746Savg znode_t *zp = VTOZ(vp); 5853258746Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5854258746Savg rl_t *rl; 5855258746Savg dmu_tx_t *tx; 5856258746Savg struct sf_buf *sf; 5857258746Savg vm_object_t object; 5858258746Savg vm_page_t m; 5859258746Savg caddr_t va; 5860258746Savg size_t tocopy; 5861258746Savg size_t lo_len; 5862258746Savg vm_ooffset_t lo_off; 5863258746Savg vm_ooffset_t off; 5864258746Savg uint_t blksz; 5865258746Savg int ncount; 5866258746Savg int pcount; 5867258746Savg int err; 5868258746Savg int i; 5869258746Savg 5870258746Savg ZFS_ENTER(zfsvfs); 5871258746Savg ZFS_VERIFY_ZP(zp); 5872258746Savg 5873258746Savg object = vp->v_object; 5874258746Savg pcount = btoc(len); 5875258746Savg ncount = pcount; 5876258746Savg 5877258746Savg KASSERT(ma[0]->object == object, ("mismatching object")); 5878258746Savg KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length")); 5879258746Savg 5880258746Savg for (i = 0; i < pcount; i++) 5881258746Savg rtvals[i] = zfs_vm_pagerret_error; 5882258746Savg 5883258746Savg off = IDX_TO_OFF(ma[0]->pindex); 5884258746Savg blksz = zp->z_blksz; 5885258746Savg lo_off = rounddown(off, blksz); 5886258746Savg lo_len = roundup(len + (off - lo_off), blksz); 5887258746Savg rl = zfs_range_lock(zp, lo_off, lo_len, RL_WRITER); 5888258746Savg 5889258746Savg zfs_vmobject_wlock(object); 5890258746Savg if (len + off > object->un_pager.vnp.vnp_size) { 5891258746Savg if (object->un_pager.vnp.vnp_size > off) { 5892258746Savg int pgoff; 5893258746Savg 5894258746Savg len = object->un_pager.vnp.vnp_size - off; 5895258746Savg ncount = btoc(len); 5896258746Savg if ((pgoff = (int)len & PAGE_MASK) != 0) { 5897258746Savg /* 5898258746Savg * If the object is locked and the following 5899258746Savg * conditions hold, then the page's dirty 5900258746Savg * field cannot be concurrently changed by a 5901258746Savg * pmap operation. 5902258746Savg */ 5903258746Savg m = ma[ncount - 1]; 5904258746Savg vm_page_assert_sbusied(m); 5905258746Savg KASSERT(!pmap_page_is_write_mapped(m), 5906258746Savg ("zfs_putpages: page %p is not read-only", m)); 5907258746Savg vm_page_clear_dirty(m, pgoff, PAGE_SIZE - 5908258746Savg pgoff); 5909258746Savg } 5910258746Savg } else { 5911258746Savg len = 0; 5912258746Savg ncount = 0; 5913258746Savg } 5914258746Savg if (ncount < pcount) { 5915258746Savg for (i = ncount; i < pcount; i++) { 5916258746Savg rtvals[i] = zfs_vm_pagerret_bad; 5917258746Savg } 5918258746Savg } 5919258746Savg } 5920258746Savg zfs_vmobject_wunlock(object); 5921258746Savg 5922258746Savg if (ncount == 0) 5923258746Savg goto out; 5924258746Savg 5925258746Savg if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 5926258746Savg zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 5927258746Savg goto out; 5928258746Savg } 5929258746Savg 5930258746Savgtop: 5931258746Savg tx = dmu_tx_create(zfsvfs->z_os); 5932258746Savg dmu_tx_hold_write(tx, zp->z_id, off, len); 5933258746Savg 5934258746Savg dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 5935258746Savg zfs_sa_upgrade_txholds(tx, zp); 5936258746Savg err = dmu_tx_assign(tx, TXG_NOWAIT); 5937258746Savg if (err != 0) { 5938258746Savg if (err == ERESTART) { 5939258746Savg dmu_tx_wait(tx); 5940258746Savg dmu_tx_abort(tx); 5941258746Savg goto top; 5942258746Savg } 5943258746Savg dmu_tx_abort(tx); 5944258746Savg goto out; 5945258746Savg } 5946258746Savg 5947258746Savg if (zp->z_blksz < PAGE_SIZE) { 5948258746Savg i = 0; 5949258746Savg for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) { 5950258746Savg tocopy = len > PAGE_SIZE ? PAGE_SIZE : len; 5951258746Savg va = zfs_map_page(ma[i], &sf); 5952258746Savg dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx); 5953258746Savg zfs_unmap_page(sf); 5954258746Savg } 5955258746Savg } else { 5956258746Savg err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx); 5957258746Savg } 5958258746Savg 5959258746Savg if (err == 0) { 5960258746Savg uint64_t mtime[2], ctime[2]; 5961258746Savg sa_bulk_attr_t bulk[3]; 5962258746Savg int count = 0; 5963258746Savg 5964258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 5965258746Savg &mtime, 16); 5966258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 5967258746Savg &ctime, 16); 5968258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 5969258746Savg &zp->z_pflags, 8); 5970258746Savg zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 5971258746Savg B_TRUE); 5972258746Savg zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 5973258746Savg 5974258746Savg zfs_vmobject_wlock(object); 5975258746Savg for (i = 0; i < ncount; i++) { 5976258746Savg rtvals[i] = zfs_vm_pagerret_ok; 5977258746Savg vm_page_undirty(ma[i]); 5978258746Savg } 5979258746Savg zfs_vmobject_wunlock(object); 5980258746Savg PCPU_INC(cnt.v_vnodeout); 5981258746Savg PCPU_ADD(cnt.v_vnodepgsout, ncount); 5982258746Savg } 5983258746Savg dmu_tx_commit(tx); 5984258746Savg 5985258746Savgout: 5986258746Savg zfs_range_unlock(rl); 5987258746Savg if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 || 5988258746Savg zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5989258746Savg zil_commit(zfsvfs->z_log, zp->z_id); 5990258746Savg ZFS_EXIT(zfsvfs); 5991258746Savg return (rtvals[0]); 5992258746Savg} 5993258746Savg 5994258746Savgint 5995258746Savgzfs_freebsd_putpages(ap) 5996258746Savg struct vop_putpages_args /* { 5997258746Savg struct vnode *a_vp; 5998258746Savg vm_page_t *a_m; 5999258746Savg int a_count; 6000258746Savg int a_sync; 6001258746Savg int *a_rtvals; 6002258746Savg } */ *ap; 6003258746Savg{ 6004258746Savg 6005258746Savg return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, 6006258746Savg ap->a_rtvals)); 6007258746Savg} 6008258746Savg 6009258746Savgstatic int 6010243518Savgzfs_freebsd_bmap(ap) 6011243518Savg struct vop_bmap_args /* { 6012243518Savg struct vnode *a_vp; 6013243518Savg daddr_t a_bn; 6014243518Savg struct bufobj **a_bop; 6015243518Savg daddr_t *a_bnp; 6016243518Savg int *a_runp; 6017243518Savg int *a_runb; 6018243518Savg } */ *ap; 6019243518Savg{ 6020243518Savg 6021243518Savg if (ap->a_bop != NULL) 6022243518Savg *ap->a_bop = &ap->a_vp->v_bufobj; 6023243518Savg if (ap->a_bnp != NULL) 6024243518Savg *ap->a_bnp = ap->a_bn; 6025243518Savg if (ap->a_runp != NULL) 6026243518Savg *ap->a_runp = 0; 6027243518Savg if (ap->a_runb != NULL) 6028243518Savg *ap->a_runb = 0; 6029243518Savg 6030243518Savg return (0); 6031243518Savg} 6032243518Savg 6033243518Savgstatic int 6034168962Spjdzfs_freebsd_open(ap) 6035168962Spjd struct vop_open_args /* { 6036168962Spjd struct vnode *a_vp; 6037168962Spjd int a_mode; 6038168962Spjd struct ucred *a_cred; 6039168962Spjd struct thread *a_td; 6040168962Spjd } */ *ap; 6041168962Spjd{ 6042168962Spjd vnode_t *vp = ap->a_vp; 6043168962Spjd znode_t *zp = VTOZ(vp); 6044168962Spjd int error; 6045168962Spjd 6046185029Spjd error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); 6047168962Spjd if (error == 0) 6048219089Spjd vnode_create_vobject(vp, zp->z_size, ap->a_td); 6049168962Spjd return (error); 6050168962Spjd} 6051168962Spjd 6052168962Spjdstatic int 6053168962Spjdzfs_freebsd_close(ap) 6054168962Spjd struct vop_close_args /* { 6055168962Spjd struct vnode *a_vp; 6056168962Spjd int a_fflag; 6057168962Spjd struct ucred *a_cred; 6058168962Spjd struct thread *a_td; 6059168962Spjd } */ *ap; 6060168962Spjd{ 6061168962Spjd 6062242566Savg return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred, NULL)); 6063168962Spjd} 6064168962Spjd 6065168962Spjdstatic int 6066168962Spjdzfs_freebsd_ioctl(ap) 6067168962Spjd struct vop_ioctl_args /* { 6068168962Spjd struct vnode *a_vp; 6069168962Spjd u_long a_command; 6070168962Spjd caddr_t a_data; 6071168962Spjd int a_fflag; 6072168962Spjd struct ucred *cred; 6073168962Spjd struct thread *td; 6074168962Spjd } */ *ap; 6075168962Spjd{ 6076168962Spjd 6077168978Spjd return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 6078185029Spjd ap->a_fflag, ap->a_cred, NULL, NULL)); 6079168962Spjd} 6080168962Spjd 6081168962Spjdstatic int 6082168962Spjdzfs_freebsd_read(ap) 6083168962Spjd struct vop_read_args /* { 6084168962Spjd struct vnode *a_vp; 6085168962Spjd struct uio *a_uio; 6086168962Spjd int a_ioflag; 6087168962Spjd struct ucred *a_cred; 6088168962Spjd } */ *ap; 6089168962Spjd{ 6090168962Spjd 6091213673Spjd return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 6092213673Spjd ap->a_cred, NULL)); 6093168962Spjd} 6094168962Spjd 6095168962Spjdstatic int 6096168962Spjdzfs_freebsd_write(ap) 6097168962Spjd struct vop_write_args /* { 6098168962Spjd struct vnode *a_vp; 6099168962Spjd struct uio *a_uio; 6100168962Spjd int a_ioflag; 6101168962Spjd struct ucred *a_cred; 6102168962Spjd } */ *ap; 6103168962Spjd{ 6104168962Spjd 6105213673Spjd return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 6106213673Spjd ap->a_cred, NULL)); 6107168962Spjd} 6108168962Spjd 6109168962Spjdstatic int 6110168962Spjdzfs_freebsd_access(ap) 6111168962Spjd struct vop_access_args /* { 6112168962Spjd struct vnode *a_vp; 6113192689Strasz accmode_t a_accmode; 6114168962Spjd struct ucred *a_cred; 6115168962Spjd struct thread *a_td; 6116168962Spjd } */ *ap; 6117168962Spjd{ 6118212002Sjh vnode_t *vp = ap->a_vp; 6119212002Sjh znode_t *zp = VTOZ(vp); 6120198703Spjd accmode_t accmode; 6121198703Spjd int error = 0; 6122168962Spjd 6123185172Spjd /* 6124198703Spjd * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 6125185172Spjd */ 6126198703Spjd accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 6127198703Spjd if (accmode != 0) 6128198703Spjd error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL); 6129185172Spjd 6130198703Spjd /* 6131198703Spjd * VADMIN has to be handled by vaccess(). 6132198703Spjd */ 6133198703Spjd if (error == 0) { 6134198703Spjd accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 6135198703Spjd if (accmode != 0) { 6136219089Spjd error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 6137219089Spjd zp->z_gid, accmode, ap->a_cred, NULL); 6138198703Spjd } 6139185172Spjd } 6140185172Spjd 6141212002Sjh /* 6142212002Sjh * For VEXEC, ensure that at least one execute bit is set for 6143212002Sjh * non-directories. 6144212002Sjh */ 6145212002Sjh if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 6146219089Spjd (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 6147212002Sjh error = EACCES; 6148219089Spjd } 6149212002Sjh 6150198703Spjd return (error); 6151168962Spjd} 6152168962Spjd 6153168962Spjdstatic int 6154168962Spjdzfs_freebsd_lookup(ap) 6155168962Spjd struct vop_lookup_args /* { 6156168962Spjd struct vnode *a_dvp; 6157168962Spjd struct vnode **a_vpp; 6158168962Spjd struct componentname *a_cnp; 6159168962Spjd } */ *ap; 6160168962Spjd{ 6161168962Spjd struct componentname *cnp = ap->a_cnp; 6162168962Spjd char nm[NAME_MAX + 1]; 6163168962Spjd 6164168962Spjd ASSERT(cnp->cn_namelen < sizeof(nm)); 6165168962Spjd strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 6166168962Spjd 6167168962Spjd return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 6168185029Spjd cnp->cn_cred, cnp->cn_thread, 0)); 6169168962Spjd} 6170168962Spjd 6171168962Spjdstatic int 6172168962Spjdzfs_freebsd_create(ap) 6173168962Spjd struct vop_create_args /* { 6174168962Spjd struct vnode *a_dvp; 6175168962Spjd struct vnode **a_vpp; 6176168962Spjd struct componentname *a_cnp; 6177168962Spjd struct vattr *a_vap; 6178168962Spjd } */ *ap; 6179168962Spjd{ 6180168962Spjd struct componentname *cnp = ap->a_cnp; 6181168962Spjd vattr_t *vap = ap->a_vap; 6182168962Spjd int mode; 6183168962Spjd 6184168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6185168962Spjd 6186168962Spjd vattr_init_mask(vap); 6187168962Spjd mode = vap->va_mode & ALLPERMS; 6188168962Spjd 6189168962Spjd return (zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 6190185029Spjd ap->a_vpp, cnp->cn_cred, cnp->cn_thread)); 6191168962Spjd} 6192168962Spjd 6193168962Spjdstatic int 6194168962Spjdzfs_freebsd_remove(ap) 6195168962Spjd struct vop_remove_args /* { 6196168962Spjd struct vnode *a_dvp; 6197168962Spjd struct vnode *a_vp; 6198168962Spjd struct componentname *a_cnp; 6199168962Spjd } */ *ap; 6200168962Spjd{ 6201168962Spjd 6202168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 6203168962Spjd 6204168962Spjd return (zfs_remove(ap->a_dvp, ap->a_cnp->cn_nameptr, 6205185029Spjd ap->a_cnp->cn_cred, NULL, 0)); 6206168962Spjd} 6207168962Spjd 6208168962Spjdstatic int 6209168962Spjdzfs_freebsd_mkdir(ap) 6210168962Spjd struct vop_mkdir_args /* { 6211168962Spjd struct vnode *a_dvp; 6212168962Spjd struct vnode **a_vpp; 6213168962Spjd struct componentname *a_cnp; 6214168962Spjd struct vattr *a_vap; 6215168962Spjd } */ *ap; 6216168962Spjd{ 6217168962Spjd vattr_t *vap = ap->a_vap; 6218168962Spjd 6219168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 6220168962Spjd 6221168962Spjd vattr_init_mask(vap); 6222168962Spjd 6223168962Spjd return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 6224185029Spjd ap->a_cnp->cn_cred, NULL, 0, NULL)); 6225168962Spjd} 6226168962Spjd 6227168962Spjdstatic int 6228168962Spjdzfs_freebsd_rmdir(ap) 6229168962Spjd struct vop_rmdir_args /* { 6230168962Spjd struct vnode *a_dvp; 6231168962Spjd struct vnode *a_vp; 6232168962Spjd struct componentname *a_cnp; 6233168962Spjd } */ *ap; 6234168962Spjd{ 6235168962Spjd struct componentname *cnp = ap->a_cnp; 6236168962Spjd 6237168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6238168962Spjd 6239185029Spjd return (zfs_rmdir(ap->a_dvp, cnp->cn_nameptr, NULL, cnp->cn_cred, NULL, 0)); 6240168962Spjd} 6241168962Spjd 6242168962Spjdstatic int 6243168962Spjdzfs_freebsd_readdir(ap) 6244168962Spjd struct vop_readdir_args /* { 6245168962Spjd struct vnode *a_vp; 6246168962Spjd struct uio *a_uio; 6247168962Spjd struct ucred *a_cred; 6248168962Spjd int *a_eofflag; 6249168962Spjd int *a_ncookies; 6250168962Spjd u_long **a_cookies; 6251168962Spjd } */ *ap; 6252168962Spjd{ 6253168962Spjd 6254168962Spjd return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 6255168962Spjd ap->a_ncookies, ap->a_cookies)); 6256168962Spjd} 6257168962Spjd 6258168962Spjdstatic int 6259168962Spjdzfs_freebsd_fsync(ap) 6260168962Spjd struct vop_fsync_args /* { 6261168962Spjd struct vnode *a_vp; 6262168962Spjd int a_waitfor; 6263168962Spjd struct thread *a_td; 6264168962Spjd } */ *ap; 6265168962Spjd{ 6266168962Spjd 6267168962Spjd vop_stdfsync(ap); 6268185029Spjd return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); 6269168962Spjd} 6270168962Spjd 6271168962Spjdstatic int 6272168962Spjdzfs_freebsd_getattr(ap) 6273168962Spjd struct vop_getattr_args /* { 6274168962Spjd struct vnode *a_vp; 6275168962Spjd struct vattr *a_vap; 6276168962Spjd struct ucred *a_cred; 6277168962Spjd } */ *ap; 6278168962Spjd{ 6279185029Spjd vattr_t *vap = ap->a_vap; 6280185029Spjd xvattr_t xvap; 6281185029Spjd u_long fflags = 0; 6282185029Spjd int error; 6283168962Spjd 6284185029Spjd xva_init(&xvap); 6285185029Spjd xvap.xva_vattr = *vap; 6286185029Spjd xvap.xva_vattr.va_mask |= AT_XVATTR; 6287185029Spjd 6288185029Spjd /* Convert chflags into ZFS-type flags. */ 6289185029Spjd /* XXX: what about SF_SETTABLE?. */ 6290185029Spjd XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 6291185029Spjd XVA_SET_REQ(&xvap, XAT_APPENDONLY); 6292185029Spjd XVA_SET_REQ(&xvap, XAT_NOUNLINK); 6293185029Spjd XVA_SET_REQ(&xvap, XAT_NODUMP); 6294254627Sken XVA_SET_REQ(&xvap, XAT_READONLY); 6295254627Sken XVA_SET_REQ(&xvap, XAT_ARCHIVE); 6296254627Sken XVA_SET_REQ(&xvap, XAT_SYSTEM); 6297254627Sken XVA_SET_REQ(&xvap, XAT_HIDDEN); 6298254627Sken XVA_SET_REQ(&xvap, XAT_REPARSE); 6299254627Sken XVA_SET_REQ(&xvap, XAT_OFFLINE); 6300254627Sken XVA_SET_REQ(&xvap, XAT_SPARSE); 6301254627Sken 6302185029Spjd error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 6303185029Spjd if (error != 0) 6304185029Spjd return (error); 6305185029Spjd 6306185029Spjd /* Convert ZFS xattr into chflags. */ 6307185029Spjd#define FLAG_CHECK(fflag, xflag, xfield) do { \ 6308185029Spjd if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 6309185029Spjd fflags |= (fflag); \ 6310185029Spjd} while (0) 6311185029Spjd FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 6312185029Spjd xvap.xva_xoptattrs.xoa_immutable); 6313185029Spjd FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 6314185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 6315185029Spjd FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 6316185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 6317254627Sken FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE, 6318254627Sken xvap.xva_xoptattrs.xoa_archive); 6319185029Spjd FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 6320185029Spjd xvap.xva_xoptattrs.xoa_nodump); 6321254627Sken FLAG_CHECK(UF_READONLY, XAT_READONLY, 6322254627Sken xvap.xva_xoptattrs.xoa_readonly); 6323254627Sken FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM, 6324254627Sken xvap.xva_xoptattrs.xoa_system); 6325254627Sken FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN, 6326254627Sken xvap.xva_xoptattrs.xoa_hidden); 6327254627Sken FLAG_CHECK(UF_REPARSE, XAT_REPARSE, 6328254627Sken xvap.xva_xoptattrs.xoa_reparse); 6329254627Sken FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE, 6330254627Sken xvap.xva_xoptattrs.xoa_offline); 6331254627Sken FLAG_CHECK(UF_SPARSE, XAT_SPARSE, 6332254627Sken xvap.xva_xoptattrs.xoa_sparse); 6333254627Sken 6334185029Spjd#undef FLAG_CHECK 6335185029Spjd *vap = xvap.xva_vattr; 6336185029Spjd vap->va_flags = fflags; 6337185029Spjd return (0); 6338168962Spjd} 6339168962Spjd 6340168962Spjdstatic int 6341168962Spjdzfs_freebsd_setattr(ap) 6342168962Spjd struct vop_setattr_args /* { 6343168962Spjd struct vnode *a_vp; 6344168962Spjd struct vattr *a_vap; 6345168962Spjd struct ucred *a_cred; 6346168962Spjd } */ *ap; 6347168962Spjd{ 6348185172Spjd vnode_t *vp = ap->a_vp; 6349168962Spjd vattr_t *vap = ap->a_vap; 6350185172Spjd cred_t *cred = ap->a_cred; 6351185029Spjd xvattr_t xvap; 6352185029Spjd u_long fflags; 6353185029Spjd uint64_t zflags; 6354168962Spjd 6355168962Spjd vattr_init_mask(vap); 6356170044Spjd vap->va_mask &= ~AT_NOSET; 6357168962Spjd 6358185029Spjd xva_init(&xvap); 6359185029Spjd xvap.xva_vattr = *vap; 6360185029Spjd 6361219089Spjd zflags = VTOZ(vp)->z_pflags; 6362185172Spjd 6363185029Spjd if (vap->va_flags != VNOVAL) { 6364197683Sdelphij zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 6365185172Spjd int error; 6366185172Spjd 6367197683Sdelphij if (zfsvfs->z_use_fuids == B_FALSE) 6368197683Sdelphij return (EOPNOTSUPP); 6369197683Sdelphij 6370185029Spjd fflags = vap->va_flags; 6371254627Sken /* 6372254627Sken * XXX KDM 6373254627Sken * We need to figure out whether it makes sense to allow 6374254627Sken * UF_REPARSE through, since we don't really have other 6375254627Sken * facilities to handle reparse points and zfs_setattr() 6376254627Sken * doesn't currently allow setting that attribute anyway. 6377254627Sken */ 6378254627Sken if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE| 6379254627Sken UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE| 6380254627Sken UF_OFFLINE|UF_SPARSE)) != 0) 6381185029Spjd return (EOPNOTSUPP); 6382185172Spjd /* 6383185172Spjd * Unprivileged processes are not permitted to unset system 6384185172Spjd * flags, or modify flags if any system flags are set. 6385185172Spjd * Privileged non-jail processes may not modify system flags 6386185172Spjd * if securelevel > 0 and any existing system flags are set. 6387185172Spjd * Privileged jail processes behave like privileged non-jail 6388185172Spjd * processes if the security.jail.chflags_allowed sysctl is 6389185172Spjd * is non-zero; otherwise, they behave like unprivileged 6390185172Spjd * processes. 6391185172Spjd */ 6392197861Spjd if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 6393197861Spjd priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) { 6394185172Spjd if (zflags & 6395185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 6396185172Spjd error = securelevel_gt(cred, 0); 6397197861Spjd if (error != 0) 6398185172Spjd return (error); 6399185172Spjd } 6400185172Spjd } else { 6401197861Spjd /* 6402197861Spjd * Callers may only modify the file flags on objects they 6403197861Spjd * have VADMIN rights for. 6404197861Spjd */ 6405197861Spjd if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0) 6406197861Spjd return (error); 6407185172Spjd if (zflags & 6408185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 6409185172Spjd return (EPERM); 6410185172Spjd } 6411185172Spjd if (fflags & 6412185172Spjd (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 6413185172Spjd return (EPERM); 6414185172Spjd } 6415185172Spjd } 6416185029Spjd 6417185029Spjd#define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 6418185029Spjd if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 6419185029Spjd ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 6420185029Spjd XVA_SET_REQ(&xvap, (xflag)); \ 6421185029Spjd (xfield) = ((fflags & (fflag)) != 0); \ 6422185029Spjd } \ 6423185029Spjd} while (0) 6424185029Spjd /* Convert chflags into ZFS-type flags. */ 6425185029Spjd /* XXX: what about SF_SETTABLE?. */ 6426185029Spjd FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 6427185029Spjd xvap.xva_xoptattrs.xoa_immutable); 6428185029Spjd FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 6429185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 6430185029Spjd FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 6431185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 6432254627Sken FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE, 6433254627Sken xvap.xva_xoptattrs.xoa_archive); 6434185029Spjd FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 6435185172Spjd xvap.xva_xoptattrs.xoa_nodump); 6436254627Sken FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY, 6437254627Sken xvap.xva_xoptattrs.xoa_readonly); 6438254627Sken FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM, 6439254627Sken xvap.xva_xoptattrs.xoa_system); 6440254627Sken FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN, 6441254627Sken xvap.xva_xoptattrs.xoa_hidden); 6442254627Sken FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE, 6443254627Sken xvap.xva_xoptattrs.xoa_hidden); 6444254627Sken FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE, 6445254627Sken xvap.xva_xoptattrs.xoa_offline); 6446254627Sken FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE, 6447254627Sken xvap.xva_xoptattrs.xoa_sparse); 6448185029Spjd#undef FLAG_CHANGE 6449185029Spjd } 6450185172Spjd return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL)); 6451168962Spjd} 6452168962Spjd 6453168962Spjdstatic int 6454168962Spjdzfs_freebsd_rename(ap) 6455168962Spjd struct vop_rename_args /* { 6456168962Spjd struct vnode *a_fdvp; 6457168962Spjd struct vnode *a_fvp; 6458168962Spjd struct componentname *a_fcnp; 6459168962Spjd struct vnode *a_tdvp; 6460168962Spjd struct vnode *a_tvp; 6461168962Spjd struct componentname *a_tcnp; 6462168962Spjd } */ *ap; 6463168962Spjd{ 6464168962Spjd vnode_t *fdvp = ap->a_fdvp; 6465168962Spjd vnode_t *fvp = ap->a_fvp; 6466168962Spjd vnode_t *tdvp = ap->a_tdvp; 6467168962Spjd vnode_t *tvp = ap->a_tvp; 6468168962Spjd int error; 6469168962Spjd 6470192237Skmacy ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 6471192237Skmacy ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 6472168962Spjd 6473255748Sdavide /* 6474255748Sdavide * Check for cross-device rename. 6475255748Sdavide */ 6476255748Sdavide if ((fdvp->v_mount != tdvp->v_mount) || 6477255748Sdavide (tvp && (fdvp->v_mount != tvp->v_mount))) 6478255748Sdavide error = EXDEV; 6479255748Sdavide else 6480254982Sdelphij error = zfs_rename(fdvp, ap->a_fcnp->cn_nameptr, tdvp, 6481254982Sdelphij ap->a_tcnp->cn_nameptr, ap->a_fcnp->cn_cred, NULL, 0); 6482168962Spjd if (tdvp == tvp) 6483168962Spjd VN_RELE(tdvp); 6484168962Spjd else 6485168962Spjd VN_URELE(tdvp); 6486168962Spjd if (tvp) 6487168962Spjd VN_URELE(tvp); 6488168962Spjd VN_RELE(fdvp); 6489168962Spjd VN_RELE(fvp); 6490168962Spjd 6491168962Spjd return (error); 6492168962Spjd} 6493168962Spjd 6494168962Spjdstatic int 6495168962Spjdzfs_freebsd_symlink(ap) 6496168962Spjd struct vop_symlink_args /* { 6497168962Spjd struct vnode *a_dvp; 6498168962Spjd struct vnode **a_vpp; 6499168962Spjd struct componentname *a_cnp; 6500168962Spjd struct vattr *a_vap; 6501168962Spjd char *a_target; 6502168962Spjd } */ *ap; 6503168962Spjd{ 6504168962Spjd struct componentname *cnp = ap->a_cnp; 6505168962Spjd vattr_t *vap = ap->a_vap; 6506168962Spjd 6507168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6508168962Spjd 6509168962Spjd vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 6510168962Spjd vattr_init_mask(vap); 6511168962Spjd 6512168962Spjd return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 6513168962Spjd ap->a_target, cnp->cn_cred, cnp->cn_thread)); 6514168962Spjd} 6515168962Spjd 6516168962Spjdstatic int 6517168962Spjdzfs_freebsd_readlink(ap) 6518168962Spjd struct vop_readlink_args /* { 6519168962Spjd struct vnode *a_vp; 6520168962Spjd struct uio *a_uio; 6521168962Spjd struct ucred *a_cred; 6522168962Spjd } */ *ap; 6523168962Spjd{ 6524168962Spjd 6525185029Spjd return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 6526168962Spjd} 6527168962Spjd 6528168962Spjdstatic int 6529168962Spjdzfs_freebsd_link(ap) 6530168962Spjd struct vop_link_args /* { 6531168962Spjd struct vnode *a_tdvp; 6532168962Spjd struct vnode *a_vp; 6533168962Spjd struct componentname *a_cnp; 6534168962Spjd } */ *ap; 6535168962Spjd{ 6536168962Spjd struct componentname *cnp = ap->a_cnp; 6537254982Sdelphij vnode_t *vp = ap->a_vp; 6538254982Sdelphij vnode_t *tdvp = ap->a_tdvp; 6539168962Spjd 6540254982Sdelphij if (tdvp->v_mount != vp->v_mount) 6541254982Sdelphij return (EXDEV); 6542254982Sdelphij 6543168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6544168962Spjd 6545254982Sdelphij return (zfs_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); 6546168962Spjd} 6547168962Spjd 6548168962Spjdstatic int 6549168962Spjdzfs_freebsd_inactive(ap) 6550169170Spjd struct vop_inactive_args /* { 6551169170Spjd struct vnode *a_vp; 6552169170Spjd struct thread *a_td; 6553169170Spjd } */ *ap; 6554168962Spjd{ 6555168962Spjd vnode_t *vp = ap->a_vp; 6556168962Spjd 6557185029Spjd zfs_inactive(vp, ap->a_td->td_ucred, NULL); 6558168962Spjd return (0); 6559168962Spjd} 6560168962Spjd 6561168962Spjdstatic int 6562168962Spjdzfs_freebsd_reclaim(ap) 6563168962Spjd struct vop_reclaim_args /* { 6564168962Spjd struct vnode *a_vp; 6565168962Spjd struct thread *a_td; 6566168962Spjd } */ *ap; 6567168962Spjd{ 6568169170Spjd vnode_t *vp = ap->a_vp; 6569168962Spjd znode_t *zp = VTOZ(vp); 6570197133Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6571168962Spjd 6572169025Spjd ASSERT(zp != NULL); 6573169025Spjd 6574243520Savg /* Destroy the vm object and flush associated pages. */ 6575243520Savg vnode_destroy_vobject(vp); 6576243520Savg 6577168962Spjd /* 6578243520Savg * z_teardown_inactive_lock protects from a race with 6579243520Savg * zfs_znode_dmu_fini in zfsvfs_teardown during 6580243520Savg * force unmount. 6581168962Spjd */ 6582243520Savg rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 6583243520Savg if (zp->z_sa_hdl == NULL) 6584196301Spjd zfs_znode_free(zp); 6585243520Savg else 6586243520Savg zfs_zinactive(zp); 6587243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 6588185029Spjd 6589168962Spjd vp->v_data = NULL; 6590168962Spjd return (0); 6591168962Spjd} 6592168962Spjd 6593168962Spjdstatic int 6594168962Spjdzfs_freebsd_fid(ap) 6595168962Spjd struct vop_fid_args /* { 6596168962Spjd struct vnode *a_vp; 6597168962Spjd struct fid *a_fid; 6598168962Spjd } */ *ap; 6599168962Spjd{ 6600168962Spjd 6601185029Spjd return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 6602168962Spjd} 6603168962Spjd 6604168962Spjdstatic int 6605168962Spjdzfs_freebsd_pathconf(ap) 6606168962Spjd struct vop_pathconf_args /* { 6607168962Spjd struct vnode *a_vp; 6608168962Spjd int a_name; 6609168962Spjd register_t *a_retval; 6610168962Spjd } */ *ap; 6611168962Spjd{ 6612168962Spjd ulong_t val; 6613168962Spjd int error; 6614168962Spjd 6615185029Spjd error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); 6616168962Spjd if (error == 0) 6617168962Spjd *ap->a_retval = val; 6618168962Spjd else if (error == EOPNOTSUPP) 6619168962Spjd error = vop_stdpathconf(ap); 6620168962Spjd return (error); 6621168962Spjd} 6622168962Spjd 6623196949Straszstatic int 6624196949Straszzfs_freebsd_fifo_pathconf(ap) 6625196949Strasz struct vop_pathconf_args /* { 6626196949Strasz struct vnode *a_vp; 6627196949Strasz int a_name; 6628196949Strasz register_t *a_retval; 6629196949Strasz } */ *ap; 6630196949Strasz{ 6631196949Strasz 6632196949Strasz switch (ap->a_name) { 6633196949Strasz case _PC_ACL_EXTENDED: 6634196949Strasz case _PC_ACL_NFS4: 6635196949Strasz case _PC_ACL_PATH_MAX: 6636196949Strasz case _PC_MAC_PRESENT: 6637196949Strasz return (zfs_freebsd_pathconf(ap)); 6638196949Strasz default: 6639196949Strasz return (fifo_specops.vop_pathconf(ap)); 6640196949Strasz } 6641196949Strasz} 6642196949Strasz 6643185029Spjd/* 6644185029Spjd * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 6645185029Spjd * extended attribute name: 6646185029Spjd * 6647185029Spjd * NAMESPACE PREFIX 6648185029Spjd * system freebsd:system: 6649185029Spjd * user (none, can be used to access ZFS fsattr(5) attributes 6650185029Spjd * created on Solaris) 6651185029Spjd */ 6652185029Spjdstatic int 6653185029Spjdzfs_create_attrname(int attrnamespace, const char *name, char *attrname, 6654185029Spjd size_t size) 6655185029Spjd{ 6656185029Spjd const char *namespace, *prefix, *suffix; 6657185029Spjd 6658185029Spjd /* We don't allow '/' character in attribute name. */ 6659185029Spjd if (strchr(name, '/') != NULL) 6660185029Spjd return (EINVAL); 6661185029Spjd /* We don't allow attribute names that start with "freebsd:" string. */ 6662185029Spjd if (strncmp(name, "freebsd:", 8) == 0) 6663185029Spjd return (EINVAL); 6664185029Spjd 6665185029Spjd bzero(attrname, size); 6666185029Spjd 6667185029Spjd switch (attrnamespace) { 6668185029Spjd case EXTATTR_NAMESPACE_USER: 6669185029Spjd#if 0 6670185029Spjd prefix = "freebsd:"; 6671185029Spjd namespace = EXTATTR_NAMESPACE_USER_STRING; 6672185029Spjd suffix = ":"; 6673185029Spjd#else 6674185029Spjd /* 6675185029Spjd * This is the default namespace by which we can access all 6676185029Spjd * attributes created on Solaris. 6677185029Spjd */ 6678185029Spjd prefix = namespace = suffix = ""; 6679185029Spjd#endif 6680185029Spjd break; 6681185029Spjd case EXTATTR_NAMESPACE_SYSTEM: 6682185029Spjd prefix = "freebsd:"; 6683185029Spjd namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 6684185029Spjd suffix = ":"; 6685185029Spjd break; 6686185029Spjd case EXTATTR_NAMESPACE_EMPTY: 6687185029Spjd default: 6688185029Spjd return (EINVAL); 6689185029Spjd } 6690185029Spjd if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 6691185029Spjd name) >= size) { 6692185029Spjd return (ENAMETOOLONG); 6693185029Spjd } 6694185029Spjd return (0); 6695185029Spjd} 6696185029Spjd 6697185029Spjd/* 6698185029Spjd * Vnode operating to retrieve a named extended attribute. 6699185029Spjd */ 6700185029Spjdstatic int 6701185029Spjdzfs_getextattr(struct vop_getextattr_args *ap) 6702185029Spjd/* 6703185029Spjdvop_getextattr { 6704185029Spjd IN struct vnode *a_vp; 6705185029Spjd IN int a_attrnamespace; 6706185029Spjd IN const char *a_name; 6707185029Spjd INOUT struct uio *a_uio; 6708185029Spjd OUT size_t *a_size; 6709185029Spjd IN struct ucred *a_cred; 6710185029Spjd IN struct thread *a_td; 6711185029Spjd}; 6712185029Spjd*/ 6713185029Spjd{ 6714185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6715185029Spjd struct thread *td = ap->a_td; 6716185029Spjd struct nameidata nd; 6717185029Spjd char attrname[255]; 6718185029Spjd struct vattr va; 6719185029Spjd vnode_t *xvp = NULL, *vp; 6720185029Spjd int error, flags; 6721185029Spjd 6722195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6723195785Strasz ap->a_cred, ap->a_td, VREAD); 6724195785Strasz if (error != 0) 6725195785Strasz return (error); 6726195785Strasz 6727185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6728185029Spjd sizeof(attrname)); 6729185029Spjd if (error != 0) 6730185029Spjd return (error); 6731185029Spjd 6732185029Spjd ZFS_ENTER(zfsvfs); 6733185029Spjd 6734185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6735185029Spjd LOOKUP_XATTR); 6736185029Spjd if (error != 0) { 6737185029Spjd ZFS_EXIT(zfsvfs); 6738185029Spjd return (error); 6739185029Spjd } 6740185029Spjd 6741185029Spjd flags = FREAD; 6742241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 6743185029Spjd xvp, td); 6744194586Skib error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL); 6745185029Spjd vp = nd.ni_vp; 6746185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6747185029Spjd if (error != 0) { 6748196303Spjd ZFS_EXIT(zfsvfs); 6749195785Strasz if (error == ENOENT) 6750195785Strasz error = ENOATTR; 6751185029Spjd return (error); 6752185029Spjd } 6753185029Spjd 6754185029Spjd if (ap->a_size != NULL) { 6755185029Spjd error = VOP_GETATTR(vp, &va, ap->a_cred); 6756185029Spjd if (error == 0) 6757185029Spjd *ap->a_size = (size_t)va.va_size; 6758185029Spjd } else if (ap->a_uio != NULL) 6759224605Smm error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 6760185029Spjd 6761185029Spjd VOP_UNLOCK(vp, 0); 6762185029Spjd vn_close(vp, flags, ap->a_cred, td); 6763185029Spjd ZFS_EXIT(zfsvfs); 6764185029Spjd 6765185029Spjd return (error); 6766185029Spjd} 6767185029Spjd 6768185029Spjd/* 6769185029Spjd * Vnode operation to remove a named attribute. 6770185029Spjd */ 6771185029Spjdint 6772185029Spjdzfs_deleteextattr(struct vop_deleteextattr_args *ap) 6773185029Spjd/* 6774185029Spjdvop_deleteextattr { 6775185029Spjd IN struct vnode *a_vp; 6776185029Spjd IN int a_attrnamespace; 6777185029Spjd IN const char *a_name; 6778185029Spjd IN struct ucred *a_cred; 6779185029Spjd IN struct thread *a_td; 6780185029Spjd}; 6781185029Spjd*/ 6782185029Spjd{ 6783185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6784185029Spjd struct thread *td = ap->a_td; 6785185029Spjd struct nameidata nd; 6786185029Spjd char attrname[255]; 6787185029Spjd struct vattr va; 6788185029Spjd vnode_t *xvp = NULL, *vp; 6789185029Spjd int error, flags; 6790185029Spjd 6791195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6792195785Strasz ap->a_cred, ap->a_td, VWRITE); 6793195785Strasz if (error != 0) 6794195785Strasz return (error); 6795195785Strasz 6796185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6797185029Spjd sizeof(attrname)); 6798185029Spjd if (error != 0) 6799185029Spjd return (error); 6800185029Spjd 6801185029Spjd ZFS_ENTER(zfsvfs); 6802185029Spjd 6803185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6804185029Spjd LOOKUP_XATTR); 6805185029Spjd if (error != 0) { 6806185029Spjd ZFS_EXIT(zfsvfs); 6807185029Spjd return (error); 6808185029Spjd } 6809185029Spjd 6810241896Skib NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 6811185029Spjd UIO_SYSSPACE, attrname, xvp, td); 6812185029Spjd error = namei(&nd); 6813185029Spjd vp = nd.ni_vp; 6814185029Spjd if (error != 0) { 6815196303Spjd ZFS_EXIT(zfsvfs); 6816260706Savg NDFREE(&nd, NDF_ONLY_PNBUF); 6817195785Strasz if (error == ENOENT) 6818195785Strasz error = ENOATTR; 6819185029Spjd return (error); 6820185029Spjd } 6821260706Savg 6822185029Spjd error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 6823260706Savg NDFREE(&nd, NDF_ONLY_PNBUF); 6824185029Spjd 6825185029Spjd vput(nd.ni_dvp); 6826185029Spjd if (vp == nd.ni_dvp) 6827185029Spjd vrele(vp); 6828185029Spjd else 6829185029Spjd vput(vp); 6830185029Spjd ZFS_EXIT(zfsvfs); 6831185029Spjd 6832185029Spjd return (error); 6833185029Spjd} 6834185029Spjd 6835185029Spjd/* 6836185029Spjd * Vnode operation to set a named attribute. 6837185029Spjd */ 6838185029Spjdstatic int 6839185029Spjdzfs_setextattr(struct vop_setextattr_args *ap) 6840185029Spjd/* 6841185029Spjdvop_setextattr { 6842185029Spjd IN struct vnode *a_vp; 6843185029Spjd IN int a_attrnamespace; 6844185029Spjd IN const char *a_name; 6845185029Spjd INOUT struct uio *a_uio; 6846185029Spjd IN struct ucred *a_cred; 6847185029Spjd IN struct thread *a_td; 6848185029Spjd}; 6849185029Spjd*/ 6850185029Spjd{ 6851185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6852185029Spjd struct thread *td = ap->a_td; 6853185029Spjd struct nameidata nd; 6854185029Spjd char attrname[255]; 6855185029Spjd struct vattr va; 6856185029Spjd vnode_t *xvp = NULL, *vp; 6857185029Spjd int error, flags; 6858185029Spjd 6859195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6860195785Strasz ap->a_cred, ap->a_td, VWRITE); 6861195785Strasz if (error != 0) 6862195785Strasz return (error); 6863195785Strasz 6864185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6865185029Spjd sizeof(attrname)); 6866185029Spjd if (error != 0) 6867185029Spjd return (error); 6868185029Spjd 6869185029Spjd ZFS_ENTER(zfsvfs); 6870185029Spjd 6871185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6872195785Strasz LOOKUP_XATTR | CREATE_XATTR_DIR); 6873185029Spjd if (error != 0) { 6874185029Spjd ZFS_EXIT(zfsvfs); 6875185029Spjd return (error); 6876185029Spjd } 6877185029Spjd 6878185029Spjd flags = FFLAGS(O_WRONLY | O_CREAT); 6879241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 6880185029Spjd xvp, td); 6881194586Skib error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL); 6882185029Spjd vp = nd.ni_vp; 6883185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6884185029Spjd if (error != 0) { 6885185029Spjd ZFS_EXIT(zfsvfs); 6886185029Spjd return (error); 6887185029Spjd } 6888185029Spjd 6889185029Spjd VATTR_NULL(&va); 6890185029Spjd va.va_size = 0; 6891185029Spjd error = VOP_SETATTR(vp, &va, ap->a_cred); 6892185029Spjd if (error == 0) 6893268420Smav VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred); 6894185029Spjd 6895185029Spjd VOP_UNLOCK(vp, 0); 6896185029Spjd vn_close(vp, flags, ap->a_cred, td); 6897185029Spjd ZFS_EXIT(zfsvfs); 6898185029Spjd 6899185029Spjd return (error); 6900185029Spjd} 6901185029Spjd 6902185029Spjd/* 6903185029Spjd * Vnode operation to retrieve extended attributes on a vnode. 6904185029Spjd */ 6905185029Spjdstatic int 6906185029Spjdzfs_listextattr(struct vop_listextattr_args *ap) 6907185029Spjd/* 6908185029Spjdvop_listextattr { 6909185029Spjd IN struct vnode *a_vp; 6910185029Spjd IN int a_attrnamespace; 6911185029Spjd INOUT struct uio *a_uio; 6912185029Spjd OUT size_t *a_size; 6913185029Spjd IN struct ucred *a_cred; 6914185029Spjd IN struct thread *a_td; 6915185029Spjd}; 6916185029Spjd*/ 6917185029Spjd{ 6918185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6919185029Spjd struct thread *td = ap->a_td; 6920185029Spjd struct nameidata nd; 6921185029Spjd char attrprefix[16]; 6922185029Spjd u_char dirbuf[sizeof(struct dirent)]; 6923185029Spjd struct dirent *dp; 6924185029Spjd struct iovec aiov; 6925185029Spjd struct uio auio, *uio = ap->a_uio; 6926185029Spjd size_t *sizep = ap->a_size; 6927185029Spjd size_t plen; 6928185029Spjd vnode_t *xvp = NULL, *vp; 6929185029Spjd int done, error, eof, pos; 6930185029Spjd 6931195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6932195785Strasz ap->a_cred, ap->a_td, VREAD); 6933196303Spjd if (error != 0) 6934195785Strasz return (error); 6935195785Strasz 6936185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 6937185029Spjd sizeof(attrprefix)); 6938185029Spjd if (error != 0) 6939185029Spjd return (error); 6940185029Spjd plen = strlen(attrprefix); 6941185029Spjd 6942185029Spjd ZFS_ENTER(zfsvfs); 6943185029Spjd 6944195822Strasz if (sizep != NULL) 6945195822Strasz *sizep = 0; 6946195822Strasz 6947185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6948185029Spjd LOOKUP_XATTR); 6949185029Spjd if (error != 0) { 6950196303Spjd ZFS_EXIT(zfsvfs); 6951195785Strasz /* 6952195785Strasz * ENOATTR means that the EA directory does not yet exist, 6953195785Strasz * i.e. there are no extended attributes there. 6954195785Strasz */ 6955195785Strasz if (error == ENOATTR) 6956195785Strasz error = 0; 6957185029Spjd return (error); 6958185029Spjd } 6959185029Spjd 6960241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 6961188588Sjhb UIO_SYSSPACE, ".", xvp, td); 6962185029Spjd error = namei(&nd); 6963185029Spjd vp = nd.ni_vp; 6964185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6965185029Spjd if (error != 0) { 6966185029Spjd ZFS_EXIT(zfsvfs); 6967185029Spjd return (error); 6968185029Spjd } 6969185029Spjd 6970185029Spjd auio.uio_iov = &aiov; 6971185029Spjd auio.uio_iovcnt = 1; 6972185029Spjd auio.uio_segflg = UIO_SYSSPACE; 6973185029Spjd auio.uio_td = td; 6974185029Spjd auio.uio_rw = UIO_READ; 6975185029Spjd auio.uio_offset = 0; 6976185029Spjd 6977185029Spjd do { 6978185029Spjd u_char nlen; 6979185029Spjd 6980185029Spjd aiov.iov_base = (void *)dirbuf; 6981185029Spjd aiov.iov_len = sizeof(dirbuf); 6982185029Spjd auio.uio_resid = sizeof(dirbuf); 6983185029Spjd error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 6984185029Spjd done = sizeof(dirbuf) - auio.uio_resid; 6985185029Spjd if (error != 0) 6986185029Spjd break; 6987185029Spjd for (pos = 0; pos < done;) { 6988185029Spjd dp = (struct dirent *)(dirbuf + pos); 6989185029Spjd pos += dp->d_reclen; 6990185029Spjd /* 6991185029Spjd * XXX: Temporarily we also accept DT_UNKNOWN, as this 6992185029Spjd * is what we get when attribute was created on Solaris. 6993185029Spjd */ 6994185029Spjd if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 6995185029Spjd continue; 6996185029Spjd if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) 6997185029Spjd continue; 6998185029Spjd else if (strncmp(dp->d_name, attrprefix, plen) != 0) 6999185029Spjd continue; 7000185029Spjd nlen = dp->d_namlen - plen; 7001185029Spjd if (sizep != NULL) 7002185029Spjd *sizep += 1 + nlen; 7003185029Spjd else if (uio != NULL) { 7004185029Spjd /* 7005185029Spjd * Format of extattr name entry is one byte for 7006185029Spjd * length and the rest for name. 7007185029Spjd */ 7008185029Spjd error = uiomove(&nlen, 1, uio->uio_rw, uio); 7009185029Spjd if (error == 0) { 7010185029Spjd error = uiomove(dp->d_name + plen, nlen, 7011185029Spjd uio->uio_rw, uio); 7012185029Spjd } 7013185029Spjd if (error != 0) 7014185029Spjd break; 7015185029Spjd } 7016185029Spjd } 7017185029Spjd } while (!eof && error == 0); 7018185029Spjd 7019185029Spjd vput(vp); 7020185029Spjd ZFS_EXIT(zfsvfs); 7021185029Spjd 7022185029Spjd return (error); 7023185029Spjd} 7024185029Spjd 7025192800Straszint 7026192800Straszzfs_freebsd_getacl(ap) 7027192800Strasz struct vop_getacl_args /* { 7028192800Strasz struct vnode *vp; 7029192800Strasz acl_type_t type; 7030192800Strasz struct acl *aclp; 7031192800Strasz struct ucred *cred; 7032192800Strasz struct thread *td; 7033192800Strasz } */ *ap; 7034192800Strasz{ 7035192800Strasz int error; 7036192800Strasz vsecattr_t vsecattr; 7037192800Strasz 7038192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 7039197435Strasz return (EINVAL); 7040192800Strasz 7041192800Strasz vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 7042192800Strasz if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL)) 7043192800Strasz return (error); 7044192800Strasz 7045192800Strasz error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt); 7046196303Spjd if (vsecattr.vsa_aclentp != NULL) 7047196303Spjd kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 7048192800Strasz 7049196303Spjd return (error); 7050192800Strasz} 7051192800Strasz 7052192800Straszint 7053192800Straszzfs_freebsd_setacl(ap) 7054192800Strasz struct vop_setacl_args /* { 7055192800Strasz struct vnode *vp; 7056192800Strasz acl_type_t type; 7057192800Strasz struct acl *aclp; 7058192800Strasz struct ucred *cred; 7059192800Strasz struct thread *td; 7060192800Strasz } */ *ap; 7061192800Strasz{ 7062192800Strasz int error; 7063192800Strasz vsecattr_t vsecattr; 7064192800Strasz int aclbsize; /* size of acl list in bytes */ 7065192800Strasz aclent_t *aaclp; 7066192800Strasz 7067192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 7068197435Strasz return (EINVAL); 7069192800Strasz 7070192800Strasz if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 7071192800Strasz return (EINVAL); 7072192800Strasz 7073192800Strasz /* 7074196949Strasz * With NFSv4 ACLs, chmod(2) may need to add additional entries, 7075192800Strasz * splitting every entry into two and appending "canonical six" 7076192800Strasz * entries at the end. Don't allow for setting an ACL that would 7077192800Strasz * cause chmod(2) to run out of ACL entries. 7078192800Strasz */ 7079192800Strasz if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 7080192800Strasz return (ENOSPC); 7081192800Strasz 7082208030Strasz error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 7083208030Strasz if (error != 0) 7084208030Strasz return (error); 7085208030Strasz 7086192800Strasz vsecattr.vsa_mask = VSA_ACE; 7087192800Strasz aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t); 7088192800Strasz vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 7089192800Strasz aaclp = vsecattr.vsa_aclentp; 7090192800Strasz vsecattr.vsa_aclentsz = aclbsize; 7091192800Strasz 7092192800Strasz aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 7093192800Strasz error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL); 7094192800Strasz kmem_free(aaclp, aclbsize); 7095192800Strasz 7096192800Strasz return (error); 7097192800Strasz} 7098192800Strasz 7099192800Straszint 7100192800Straszzfs_freebsd_aclcheck(ap) 7101192800Strasz struct vop_aclcheck_args /* { 7102192800Strasz struct vnode *vp; 7103192800Strasz acl_type_t type; 7104192800Strasz struct acl *aclp; 7105192800Strasz struct ucred *cred; 7106192800Strasz struct thread *td; 7107192800Strasz } */ *ap; 7108192800Strasz{ 7109192800Strasz 7110192800Strasz return (EOPNOTSUPP); 7111192800Strasz} 7112192800Strasz 7113168404Spjdstruct vop_vector zfs_vnodeops; 7114168404Spjdstruct vop_vector zfs_fifoops; 7115209962Smmstruct vop_vector zfs_shareops; 7116168404Spjd 7117168404Spjdstruct vop_vector zfs_vnodeops = { 7118185029Spjd .vop_default = &default_vnodeops, 7119185029Spjd .vop_inactive = zfs_freebsd_inactive, 7120185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 7121185029Spjd .vop_access = zfs_freebsd_access, 7122168404Spjd#ifdef FREEBSD_NAMECACHE 7123185029Spjd .vop_lookup = vfs_cache_lookup, 7124185029Spjd .vop_cachedlookup = zfs_freebsd_lookup, 7125168404Spjd#else 7126185029Spjd .vop_lookup = zfs_freebsd_lookup, 7127168404Spjd#endif 7128185029Spjd .vop_getattr = zfs_freebsd_getattr, 7129185029Spjd .vop_setattr = zfs_freebsd_setattr, 7130185029Spjd .vop_create = zfs_freebsd_create, 7131185029Spjd .vop_mknod = zfs_freebsd_create, 7132185029Spjd .vop_mkdir = zfs_freebsd_mkdir, 7133185029Spjd .vop_readdir = zfs_freebsd_readdir, 7134185029Spjd .vop_fsync = zfs_freebsd_fsync, 7135185029Spjd .vop_open = zfs_freebsd_open, 7136185029Spjd .vop_close = zfs_freebsd_close, 7137185029Spjd .vop_rmdir = zfs_freebsd_rmdir, 7138185029Spjd .vop_ioctl = zfs_freebsd_ioctl, 7139185029Spjd .vop_link = zfs_freebsd_link, 7140185029Spjd .vop_symlink = zfs_freebsd_symlink, 7141185029Spjd .vop_readlink = zfs_freebsd_readlink, 7142185029Spjd .vop_read = zfs_freebsd_read, 7143185029Spjd .vop_write = zfs_freebsd_write, 7144185029Spjd .vop_remove = zfs_freebsd_remove, 7145185029Spjd .vop_rename = zfs_freebsd_rename, 7146185029Spjd .vop_pathconf = zfs_freebsd_pathconf, 7147243518Savg .vop_bmap = zfs_freebsd_bmap, 7148185029Spjd .vop_fid = zfs_freebsd_fid, 7149185029Spjd .vop_getextattr = zfs_getextattr, 7150185029Spjd .vop_deleteextattr = zfs_deleteextattr, 7151185029Spjd .vop_setextattr = zfs_setextattr, 7152185029Spjd .vop_listextattr = zfs_listextattr, 7153192800Strasz .vop_getacl = zfs_freebsd_getacl, 7154192800Strasz .vop_setacl = zfs_freebsd_setacl, 7155192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 7156213937Savg .vop_getpages = zfs_freebsd_getpages, 7157258746Savg .vop_putpages = zfs_freebsd_putpages, 7158168404Spjd}; 7159168404Spjd 7160169170Spjdstruct vop_vector zfs_fifoops = { 7161185029Spjd .vop_default = &fifo_specops, 7162200162Skib .vop_fsync = zfs_freebsd_fsync, 7163185029Spjd .vop_access = zfs_freebsd_access, 7164185029Spjd .vop_getattr = zfs_freebsd_getattr, 7165185029Spjd .vop_inactive = zfs_freebsd_inactive, 7166185029Spjd .vop_read = VOP_PANIC, 7167185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 7168185029Spjd .vop_setattr = zfs_freebsd_setattr, 7169185029Spjd .vop_write = VOP_PANIC, 7170196949Strasz .vop_pathconf = zfs_freebsd_fifo_pathconf, 7171185029Spjd .vop_fid = zfs_freebsd_fid, 7172192800Strasz .vop_getacl = zfs_freebsd_getacl, 7173192800Strasz .vop_setacl = zfs_freebsd_setacl, 7174192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 7175168404Spjd}; 7176209962Smm 7177209962Smm/* 7178209962Smm * special share hidden files vnode operations template 7179209962Smm */ 7180209962Smmstruct vop_vector zfs_shareops = { 7181209962Smm .vop_default = &default_vnodeops, 7182209962Smm .vop_access = zfs_freebsd_access, 7183209962Smm .vop_inactive = zfs_freebsd_inactive, 7184209962Smm .vop_reclaim = zfs_freebsd_reclaim, 7185209962Smm .vop_fid = zfs_freebsd_fid, 7186209962Smm .vop_pathconf = zfs_freebsd_pathconf, 7187209962Smm}; 7188