zfs_vnops.c revision 272467
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22212694Smm * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23271536Sdelphij * Copyright (c) 2012, 2014 by Delphix. All rights reserved. 24262990Sdelphij * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 25168404Spjd */ 26168404Spjd 27169195Spjd/* Portions Copyright 2007 Jeremy Teo */ 28219089Spjd/* Portions Copyright 2010 Robert Milkowski */ 29169195Spjd 30168404Spjd#include <sys/types.h> 31168404Spjd#include <sys/param.h> 32168404Spjd#include <sys/time.h> 33168404Spjd#include <sys/systm.h> 34168404Spjd#include <sys/sysmacros.h> 35168404Spjd#include <sys/resource.h> 36168404Spjd#include <sys/vfs.h> 37248084Sattilio#include <sys/vm.h> 38168404Spjd#include <sys/vnode.h> 39168404Spjd#include <sys/file.h> 40168404Spjd#include <sys/stat.h> 41168404Spjd#include <sys/kmem.h> 42168404Spjd#include <sys/taskq.h> 43168404Spjd#include <sys/uio.h> 44168404Spjd#include <sys/atomic.h> 45168404Spjd#include <sys/namei.h> 46168404Spjd#include <sys/mman.h> 47168404Spjd#include <sys/cmn_err.h> 48168404Spjd#include <sys/errno.h> 49168404Spjd#include <sys/unistd.h> 50168404Spjd#include <sys/zfs_dir.h> 51168404Spjd#include <sys/zfs_ioctl.h> 52168404Spjd#include <sys/fs/zfs.h> 53168404Spjd#include <sys/dmu.h> 54219089Spjd#include <sys/dmu_objset.h> 55168404Spjd#include <sys/spa.h> 56168404Spjd#include <sys/txg.h> 57168404Spjd#include <sys/dbuf.h> 58168404Spjd#include <sys/zap.h> 59219089Spjd#include <sys/sa.h> 60168404Spjd#include <sys/dirent.h> 61168962Spjd#include <sys/policy.h> 62168962Spjd#include <sys/sunddi.h> 63168404Spjd#include <sys/filio.h> 64209962Smm#include <sys/sid.h> 65168404Spjd#include <sys/zfs_ctldir.h> 66185029Spjd#include <sys/zfs_fuid.h> 67219089Spjd#include <sys/zfs_sa.h> 68168404Spjd#include <sys/dnlc.h> 69168404Spjd#include <sys/zfs_rlock.h> 70185029Spjd#include <sys/extdirent.h> 71185029Spjd#include <sys/kidmap.h> 72168404Spjd#include <sys/bio.h> 73168404Spjd#include <sys/buf.h> 74168404Spjd#include <sys/sched.h> 75192800Strasz#include <sys/acl.h> 76239077Smarius#include <vm/vm_param.h> 77215401Savg#include <vm/vm_pageout.h> 78168404Spjd 79168404Spjd/* 80168404Spjd * Programming rules. 81168404Spjd * 82168404Spjd * Each vnode op performs some logical unit of work. To do this, the ZPL must 83168404Spjd * properly lock its in-core state, create a DMU transaction, do the work, 84168404Spjd * record this work in the intent log (ZIL), commit the DMU transaction, 85185029Spjd * and wait for the intent log to commit if it is a synchronous operation. 86185029Spjd * Moreover, the vnode ops must work in both normal and log replay context. 87168404Spjd * The ordering of events is important to avoid deadlocks and references 88168404Spjd * to freed memory. The example below illustrates the following Big Rules: 89168404Spjd * 90251631Sdelphij * (1) A check must be made in each zfs thread for a mounted file system. 91168404Spjd * This is done avoiding races using ZFS_ENTER(zfsvfs). 92251631Sdelphij * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 93251631Sdelphij * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 94251631Sdelphij * can return EIO from the calling function. 95168404Spjd * 96168404Spjd * (2) VN_RELE() should always be the last thing except for zil_commit() 97168404Spjd * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 98168404Spjd * First, if it's the last reference, the vnode/znode 99168404Spjd * can be freed, so the zp may point to freed memory. Second, the last 100168404Spjd * reference will call zfs_zinactive(), which may induce a lot of work -- 101168404Spjd * pushing cached pages (which acquires range locks) and syncing out 102168404Spjd * cached atime changes. Third, zfs_zinactive() may require a new tx, 103168404Spjd * which could deadlock the system if you were already holding one. 104191900Skmacy * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 105168404Spjd * 106168404Spjd * (3) All range locks must be grabbed before calling dmu_tx_assign(), 107168404Spjd * as they can span dmu_tx_assign() calls. 108168404Spjd * 109258720Savg * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 110258720Savg * dmu_tx_assign(). This is critical because we don't want to block 111258720Savg * while holding locks. 112168404Spjd * 113258720Savg * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 114258720Savg * reduces lock contention and CPU usage when we must wait (note that if 115258720Savg * throughput is constrained by the storage, nearly every transaction 116258720Savg * must wait). 117258720Savg * 118258720Savg * Note, in particular, that if a lock is sometimes acquired before 119258720Savg * the tx assigns, and sometimes after (e.g. z_lock), then failing 120258720Savg * to use a non-blocking assign can deadlock the system. The scenario: 121258720Savg * 122168404Spjd * Thread A has grabbed a lock before calling dmu_tx_assign(). 123168404Spjd * Thread B is in an already-assigned tx, and blocks for this lock. 124168404Spjd * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 125168404Spjd * forever, because the previous txg can't quiesce until B's tx commits. 126168404Spjd * 127168404Spjd * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 128258632Savg * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 129258632Savg * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT, 130258632Savg * to indicate that this operation has already called dmu_tx_wait(). 131258632Savg * This will ensure that we don't retry forever, waiting a short bit 132258632Savg * each time. 133168404Spjd * 134168404Spjd * (5) If the operation succeeded, generate the intent log entry for it 135168404Spjd * before dropping locks. This ensures that the ordering of events 136168404Spjd * in the intent log matches the order in which they actually occurred. 137251631Sdelphij * During ZIL replay the zfs_log_* functions will update the sequence 138209962Smm * number to indicate the zil transaction has replayed. 139168404Spjd * 140168404Spjd * (6) At the end of each vnode op, the DMU tx must always commit, 141168404Spjd * regardless of whether there were any errors. 142168404Spjd * 143219089Spjd * (7) After dropping all locks, invoke zil_commit(zilog, foid) 144168404Spjd * to ensure that synchronous semantics are provided when necessary. 145168404Spjd * 146168404Spjd * In general, this is how things should be ordered in each vnode op: 147168404Spjd * 148168404Spjd * ZFS_ENTER(zfsvfs); // exit if unmounted 149168404Spjd * top: 150168404Spjd * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 151168404Spjd * rw_enter(...); // grab any other locks you need 152168404Spjd * tx = dmu_tx_create(...); // get DMU tx 153168404Spjd * dmu_tx_hold_*(); // hold each object you might modify 154258632Savg * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 155168404Spjd * if (error) { 156168404Spjd * rw_exit(...); // drop locks 157168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 158168404Spjd * VN_RELE(...); // release held vnodes 159209962Smm * if (error == ERESTART) { 160258632Savg * waited = B_TRUE; 161168404Spjd * dmu_tx_wait(tx); 162168404Spjd * dmu_tx_abort(tx); 163168404Spjd * goto top; 164168404Spjd * } 165168404Spjd * dmu_tx_abort(tx); // abort DMU tx 166168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 167168404Spjd * return (error); // really out of space 168168404Spjd * } 169168404Spjd * error = do_real_work(); // do whatever this VOP does 170168404Spjd * if (error == 0) 171168404Spjd * zfs_log_*(...); // on success, make ZIL entry 172168404Spjd * dmu_tx_commit(tx); // commit DMU tx -- error or not 173168404Spjd * rw_exit(...); // drop locks 174168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 175168404Spjd * VN_RELE(...); // release held vnodes 176219089Spjd * zil_commit(zilog, foid); // synchronous when necessary 177168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 178168404Spjd * return (error); // done, report error 179168404Spjd */ 180185029Spjd 181168404Spjd/* ARGSUSED */ 182168404Spjdstatic int 183185029Spjdzfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 184168404Spjd{ 185168962Spjd znode_t *zp = VTOZ(*vpp); 186209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 187168404Spjd 188209962Smm ZFS_ENTER(zfsvfs); 189209962Smm ZFS_VERIFY_ZP(zp); 190209962Smm 191219089Spjd if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 192185029Spjd ((flag & FAPPEND) == 0)) { 193209962Smm ZFS_EXIT(zfsvfs); 194249195Smm return (SET_ERROR(EPERM)); 195185029Spjd } 196185029Spjd 197185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 198185029Spjd ZTOV(zp)->v_type == VREG && 199219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 200209962Smm if (fs_vscan(*vpp, cr, 0) != 0) { 201209962Smm ZFS_EXIT(zfsvfs); 202249195Smm return (SET_ERROR(EACCES)); 203209962Smm } 204209962Smm } 205185029Spjd 206168404Spjd /* Keep a count of the synchronous opens in the znode */ 207168962Spjd if (flag & (FSYNC | FDSYNC)) 208168404Spjd atomic_inc_32(&zp->z_sync_cnt); 209185029Spjd 210209962Smm ZFS_EXIT(zfsvfs); 211168404Spjd return (0); 212168404Spjd} 213168404Spjd 214168404Spjd/* ARGSUSED */ 215168404Spjdstatic int 216185029Spjdzfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 217185029Spjd caller_context_t *ct) 218168404Spjd{ 219168962Spjd znode_t *zp = VTOZ(vp); 220209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 221168404Spjd 222210470Smm /* 223210470Smm * Clean up any locks held by this process on the vp. 224210470Smm */ 225210470Smm cleanlocks(vp, ddi_get_pid(), 0); 226210470Smm cleanshares(vp, ddi_get_pid()); 227210470Smm 228209962Smm ZFS_ENTER(zfsvfs); 229209962Smm ZFS_VERIFY_ZP(zp); 230209962Smm 231168404Spjd /* Decrement the synchronous opens in the znode */ 232185029Spjd if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 233168404Spjd atomic_dec_32(&zp->z_sync_cnt); 234168404Spjd 235185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 236185029Spjd ZTOV(zp)->v_type == VREG && 237219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 238185029Spjd VERIFY(fs_vscan(vp, cr, 1) == 0); 239185029Spjd 240209962Smm ZFS_EXIT(zfsvfs); 241168404Spjd return (0); 242168404Spjd} 243168404Spjd 244168404Spjd/* 245168404Spjd * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 246168404Spjd * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 247168404Spjd */ 248168404Spjdstatic int 249168978Spjdzfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 250168404Spjd{ 251168404Spjd znode_t *zp = VTOZ(vp); 252168404Spjd uint64_t noff = (uint64_t)*off; /* new offset */ 253168404Spjd uint64_t file_sz; 254168404Spjd int error; 255168404Spjd boolean_t hole; 256168404Spjd 257219089Spjd file_sz = zp->z_size; 258168404Spjd if (noff >= file_sz) { 259249195Smm return (SET_ERROR(ENXIO)); 260168404Spjd } 261168404Spjd 262168962Spjd if (cmd == _FIO_SEEK_HOLE) 263168404Spjd hole = B_TRUE; 264168404Spjd else 265168404Spjd hole = B_FALSE; 266168404Spjd 267168404Spjd error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 268168404Spjd 269271536Sdelphij if (error == ESRCH) 270249195Smm return (SET_ERROR(ENXIO)); 271271536Sdelphij 272271536Sdelphij /* 273271536Sdelphij * We could find a hole that begins after the logical end-of-file, 274271536Sdelphij * because dmu_offset_next() only works on whole blocks. If the 275271536Sdelphij * EOF falls mid-block, then indicate that the "virtual hole" 276271536Sdelphij * at the end of the file begins at the logical EOF, rather than 277271536Sdelphij * at the end of the last block. 278271536Sdelphij */ 279271536Sdelphij if (noff > file_sz) { 280271536Sdelphij ASSERT(hole); 281271536Sdelphij noff = file_sz; 282168404Spjd } 283168404Spjd 284168404Spjd if (noff < *off) 285168404Spjd return (error); 286168404Spjd *off = noff; 287168404Spjd return (error); 288168404Spjd} 289168404Spjd 290168404Spjd/* ARGSUSED */ 291168404Spjdstatic int 292168978Spjdzfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 293185029Spjd int *rvalp, caller_context_t *ct) 294168404Spjd{ 295168962Spjd offset_t off; 296168962Spjd int error; 297168962Spjd zfsvfs_t *zfsvfs; 298185029Spjd znode_t *zp; 299168404Spjd 300168404Spjd switch (com) { 301185029Spjd case _FIOFFS: 302168962Spjd return (0); 303168404Spjd 304168962Spjd /* 305168962Spjd * The following two ioctls are used by bfu. Faking out, 306168962Spjd * necessary to avoid bfu errors. 307168962Spjd */ 308185029Spjd case _FIOGDIO: 309185029Spjd case _FIOSDIO: 310168962Spjd return (0); 311168962Spjd 312185029Spjd case _FIO_SEEK_DATA: 313185029Spjd case _FIO_SEEK_HOLE: 314233918Savg#ifdef sun 315168962Spjd if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 316249195Smm return (SET_ERROR(EFAULT)); 317233918Savg#else 318233918Savg off = *(offset_t *)data; 319233918Savg#endif 320185029Spjd zp = VTOZ(vp); 321185029Spjd zfsvfs = zp->z_zfsvfs; 322168404Spjd ZFS_ENTER(zfsvfs); 323185029Spjd ZFS_VERIFY_ZP(zp); 324168404Spjd 325168404Spjd /* offset parameter is in/out */ 326168404Spjd error = zfs_holey(vp, com, &off); 327168404Spjd ZFS_EXIT(zfsvfs); 328168404Spjd if (error) 329168404Spjd return (error); 330233918Savg#ifdef sun 331168962Spjd if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 332249195Smm return (SET_ERROR(EFAULT)); 333233918Savg#else 334233918Savg *(offset_t *)data = off; 335233918Savg#endif 336168404Spjd return (0); 337168404Spjd } 338249195Smm return (SET_ERROR(ENOTTY)); 339168404Spjd} 340168404Spjd 341209962Smmstatic vm_page_t 342253953Sattiliopage_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 343209962Smm{ 344209962Smm vm_object_t obj; 345209962Smm vm_page_t pp; 346258353Savg int64_t end; 347209962Smm 348258353Savg /* 349258353Savg * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE 350258353Savg * aligned boundaries, if the range is not aligned. As a result a 351258353Savg * DEV_BSIZE subrange with partially dirty data may get marked as clean. 352258353Savg * It may happen that all DEV_BSIZE subranges are marked clean and thus 353258353Savg * the whole page would be considred clean despite have some dirty data. 354258353Savg * For this reason we should shrink the range to DEV_BSIZE aligned 355258353Savg * boundaries before calling vm_page_clear_dirty. 356258353Savg */ 357258353Savg end = rounddown2(off + nbytes, DEV_BSIZE); 358258353Savg off = roundup2(off, DEV_BSIZE); 359258353Savg nbytes = end - off; 360258353Savg 361209962Smm obj = vp->v_object; 362248084Sattilio zfs_vmobject_assert_wlocked(obj); 363209962Smm 364209962Smm for (;;) { 365209962Smm if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 366246293Savg pp->valid) { 367254138Sattilio if (vm_page_xbusied(pp)) { 368212652Savg /* 369212652Savg * Reference the page before unlocking and 370212652Savg * sleeping so that the page daemon is less 371212652Savg * likely to reclaim it. 372212652Savg */ 373225418Skib vm_page_reference(pp); 374254138Sattilio vm_page_lock(pp); 375254138Sattilio zfs_vmobject_wunlock(obj); 376254138Sattilio vm_page_busy_sleep(pp, "zfsmwb"); 377254138Sattilio zfs_vmobject_wlock(obj); 378209962Smm continue; 379212652Savg } 380254138Sattilio vm_page_sbusy(pp); 381252337Sgavin } else if (pp == NULL) { 382246293Savg pp = vm_page_alloc(obj, OFF_TO_IDX(start), 383246293Savg VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED | 384254138Sattilio VM_ALLOC_SBUSY); 385252337Sgavin } else { 386252337Sgavin ASSERT(pp != NULL && !pp->valid); 387252337Sgavin pp = NULL; 388209962Smm } 389246293Savg 390246293Savg if (pp != NULL) { 391246293Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 392253953Sattilio vm_object_pip_add(obj, 1); 393246293Savg pmap_remove_write(pp); 394258353Savg if (nbytes != 0) 395258353Savg vm_page_clear_dirty(pp, off, nbytes); 396246293Savg } 397209962Smm break; 398209962Smm } 399209962Smm return (pp); 400209962Smm} 401209962Smm 402209962Smmstatic void 403253953Sattiliopage_unbusy(vm_page_t pp) 404209962Smm{ 405209962Smm 406254138Sattilio vm_page_sunbusy(pp); 407253953Sattilio vm_object_pip_subtract(pp->object, 1); 408209962Smm} 409209962Smm 410253953Sattiliostatic vm_page_t 411253953Sattiliopage_hold(vnode_t *vp, int64_t start) 412253953Sattilio{ 413253953Sattilio vm_object_t obj; 414253953Sattilio vm_page_t pp; 415253953Sattilio 416253953Sattilio obj = vp->v_object; 417253953Sattilio zfs_vmobject_assert_wlocked(obj); 418253953Sattilio 419253953Sattilio for (;;) { 420253953Sattilio if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 421253953Sattilio pp->valid) { 422254138Sattilio if (vm_page_xbusied(pp)) { 423253953Sattilio /* 424253953Sattilio * Reference the page before unlocking and 425253953Sattilio * sleeping so that the page daemon is less 426253953Sattilio * likely to reclaim it. 427253953Sattilio */ 428253953Sattilio vm_page_reference(pp); 429254138Sattilio vm_page_lock(pp); 430254138Sattilio zfs_vmobject_wunlock(obj); 431254138Sattilio vm_page_busy_sleep(pp, "zfsmwb"); 432254138Sattilio zfs_vmobject_wlock(obj); 433253953Sattilio continue; 434253953Sattilio } 435253953Sattilio 436253953Sattilio ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 437253953Sattilio vm_page_lock(pp); 438253953Sattilio vm_page_hold(pp); 439253953Sattilio vm_page_unlock(pp); 440253953Sattilio 441253953Sattilio } else 442253953Sattilio pp = NULL; 443253953Sattilio break; 444253953Sattilio } 445253953Sattilio return (pp); 446253953Sattilio} 447253953Sattilio 448253953Sattiliostatic void 449253953Sattiliopage_unhold(vm_page_t pp) 450253953Sattilio{ 451253953Sattilio 452253953Sattilio vm_page_lock(pp); 453253953Sattilio vm_page_unhold(pp); 454253953Sattilio vm_page_unlock(pp); 455253953Sattilio} 456253953Sattilio 457168404Spjd/* 458168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 459168404Spjd * between the DMU cache and the memory mapped pages. What this means: 460168404Spjd * 461168404Spjd * On Write: If we find a memory mapped page, we write to *both* 462168404Spjd * the page and the dmu buffer. 463168404Spjd */ 464209962Smmstatic void 465209962Smmupdate_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 466209962Smm int segflg, dmu_tx_t *tx) 467168404Spjd{ 468168404Spjd vm_object_t obj; 469168404Spjd struct sf_buf *sf; 470246293Savg caddr_t va; 471212655Savg int off; 472168404Spjd 473258746Savg ASSERT(segflg != UIO_NOCOPY); 474168404Spjd ASSERT(vp->v_mount != NULL); 475168404Spjd obj = vp->v_object; 476168404Spjd ASSERT(obj != NULL); 477168404Spjd 478168404Spjd off = start & PAGEOFFSET; 479248084Sattilio zfs_vmobject_wlock(obj); 480168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 481209962Smm vm_page_t pp; 482246293Savg int nbytes = imin(PAGESIZE - off, len); 483168404Spjd 484258746Savg if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 485248084Sattilio zfs_vmobject_wunlock(obj); 486168404Spjd 487246293Savg va = zfs_map_page(pp, &sf); 488246293Savg (void) dmu_read(os, oid, start+off, nbytes, 489246293Savg va+off, DMU_READ_PREFETCH);; 490209962Smm zfs_unmap_page(sf); 491246293Savg 492248084Sattilio zfs_vmobject_wlock(obj); 493253953Sattilio page_unbusy(pp); 494168404Spjd } 495209962Smm len -= nbytes; 496168404Spjd off = 0; 497168404Spjd } 498258746Savg vm_object_pip_wakeupn(obj, 0); 499248084Sattilio zfs_vmobject_wunlock(obj); 500168404Spjd} 501168404Spjd 502168404Spjd/* 503219089Spjd * Read with UIO_NOCOPY flag means that sendfile(2) requests 504219089Spjd * ZFS to populate a range of page cache pages with data. 505219089Spjd * 506219089Spjd * NOTE: this function could be optimized to pre-allocate 507254138Sattilio * all pages in advance, drain exclusive busy on all of them, 508219089Spjd * map them into contiguous KVA region and populate them 509219089Spjd * in one single dmu_read() call. 510219089Spjd */ 511219089Spjdstatic int 512219089Spjdmappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) 513219089Spjd{ 514219089Spjd znode_t *zp = VTOZ(vp); 515219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 516219089Spjd struct sf_buf *sf; 517219089Spjd vm_object_t obj; 518219089Spjd vm_page_t pp; 519219089Spjd int64_t start; 520219089Spjd caddr_t va; 521219089Spjd int len = nbytes; 522219089Spjd int off; 523219089Spjd int error = 0; 524219089Spjd 525219089Spjd ASSERT(uio->uio_segflg == UIO_NOCOPY); 526219089Spjd ASSERT(vp->v_mount != NULL); 527219089Spjd obj = vp->v_object; 528219089Spjd ASSERT(obj != NULL); 529219089Spjd ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); 530219089Spjd 531248084Sattilio zfs_vmobject_wlock(obj); 532219089Spjd for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { 533219089Spjd int bytes = MIN(PAGESIZE, len); 534219089Spjd 535254138Sattilio pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY | 536254649Skib VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 537219089Spjd if (pp->valid == 0) { 538248084Sattilio zfs_vmobject_wunlock(obj); 539219089Spjd va = zfs_map_page(pp, &sf); 540219089Spjd error = dmu_read(os, zp->z_id, start, bytes, va, 541219089Spjd DMU_READ_PREFETCH); 542219089Spjd if (bytes != PAGESIZE && error == 0) 543219089Spjd bzero(va + bytes, PAGESIZE - bytes); 544219089Spjd zfs_unmap_page(sf); 545248084Sattilio zfs_vmobject_wlock(obj); 546254138Sattilio vm_page_sunbusy(pp); 547219089Spjd vm_page_lock(pp); 548219089Spjd if (error) { 549253073Savg if (pp->wire_count == 0 && pp->valid == 0 && 550254138Sattilio !vm_page_busied(pp)) 551253073Savg vm_page_free(pp); 552219089Spjd } else { 553219089Spjd pp->valid = VM_PAGE_BITS_ALL; 554219089Spjd vm_page_activate(pp); 555219089Spjd } 556219089Spjd vm_page_unlock(pp); 557258739Savg } else { 558258739Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 559254138Sattilio vm_page_sunbusy(pp); 560258739Savg } 561219089Spjd if (error) 562219089Spjd break; 563219089Spjd uio->uio_resid -= bytes; 564219089Spjd uio->uio_offset += bytes; 565219089Spjd len -= bytes; 566219089Spjd } 567248084Sattilio zfs_vmobject_wunlock(obj); 568219089Spjd return (error); 569219089Spjd} 570219089Spjd 571219089Spjd/* 572168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 573168404Spjd * between the DMU cache and the memory mapped pages. What this means: 574168404Spjd * 575168404Spjd * On Read: We "read" preferentially from memory mapped pages, 576168404Spjd * else we default from the dmu buffer. 577168404Spjd * 578168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 579251631Sdelphij * the file is memory mapped. 580168404Spjd */ 581168404Spjdstatic int 582168404Spjdmappedread(vnode_t *vp, int nbytes, uio_t *uio) 583168404Spjd{ 584168404Spjd znode_t *zp = VTOZ(vp); 585168404Spjd objset_t *os = zp->z_zfsvfs->z_os; 586168404Spjd vm_object_t obj; 587212655Savg int64_t start; 588168926Spjd caddr_t va; 589168404Spjd int len = nbytes; 590212655Savg int off; 591168404Spjd int error = 0; 592168404Spjd 593168404Spjd ASSERT(vp->v_mount != NULL); 594168404Spjd obj = vp->v_object; 595168404Spjd ASSERT(obj != NULL); 596168404Spjd 597168404Spjd start = uio->uio_loffset; 598168404Spjd off = start & PAGEOFFSET; 599248084Sattilio zfs_vmobject_wlock(obj); 600168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 601219089Spjd vm_page_t pp; 602219089Spjd uint64_t bytes = MIN(PAGESIZE - off, len); 603168404Spjd 604253953Sattilio if (pp = page_hold(vp, start)) { 605219089Spjd struct sf_buf *sf; 606219089Spjd caddr_t va; 607212652Savg 608248084Sattilio zfs_vmobject_wunlock(obj); 609219089Spjd va = zfs_map_page(pp, &sf); 610219089Spjd error = uiomove(va + off, bytes, UIO_READ, uio); 611219089Spjd zfs_unmap_page(sf); 612248084Sattilio zfs_vmobject_wlock(obj); 613253953Sattilio page_unhold(pp); 614219089Spjd } else { 615248084Sattilio zfs_vmobject_wunlock(obj); 616219089Spjd error = dmu_read_uio(os, zp->z_id, uio, bytes); 617248084Sattilio zfs_vmobject_wlock(obj); 618168404Spjd } 619168404Spjd len -= bytes; 620168404Spjd off = 0; 621168404Spjd if (error) 622168404Spjd break; 623168404Spjd } 624248084Sattilio zfs_vmobject_wunlock(obj); 625168404Spjd return (error); 626168404Spjd} 627168404Spjd 628168404Spjdoffset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 629168404Spjd 630168404Spjd/* 631168404Spjd * Read bytes from specified file into supplied buffer. 632168404Spjd * 633168404Spjd * IN: vp - vnode of file to be read from. 634168404Spjd * uio - structure supplying read location, range info, 635168404Spjd * and return buffer. 636168404Spjd * ioflag - SYNC flags; used to provide FRSYNC semantics. 637168404Spjd * cr - credentials of caller. 638185029Spjd * ct - caller context 639168404Spjd * 640168404Spjd * OUT: uio - updated offset and range, buffer filled. 641168404Spjd * 642251631Sdelphij * RETURN: 0 on success, error code on failure. 643168404Spjd * 644168404Spjd * Side Effects: 645168404Spjd * vp - atime updated if byte count > 0 646168404Spjd */ 647168404Spjd/* ARGSUSED */ 648168404Spjdstatic int 649168962Spjdzfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 650168404Spjd{ 651168404Spjd znode_t *zp = VTOZ(vp); 652168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 653185029Spjd objset_t *os; 654168404Spjd ssize_t n, nbytes; 655247187Smm int error = 0; 656168404Spjd rl_t *rl; 657219089Spjd xuio_t *xuio = NULL; 658168404Spjd 659168404Spjd ZFS_ENTER(zfsvfs); 660185029Spjd ZFS_VERIFY_ZP(zp); 661185029Spjd os = zfsvfs->z_os; 662168404Spjd 663219089Spjd if (zp->z_pflags & ZFS_AV_QUARANTINED) { 664185029Spjd ZFS_EXIT(zfsvfs); 665249195Smm return (SET_ERROR(EACCES)); 666185029Spjd } 667185029Spjd 668168404Spjd /* 669168404Spjd * Validate file offset 670168404Spjd */ 671168404Spjd if (uio->uio_loffset < (offset_t)0) { 672168404Spjd ZFS_EXIT(zfsvfs); 673249195Smm return (SET_ERROR(EINVAL)); 674168404Spjd } 675168404Spjd 676168404Spjd /* 677168404Spjd * Fasttrack empty reads 678168404Spjd */ 679168404Spjd if (uio->uio_resid == 0) { 680168404Spjd ZFS_EXIT(zfsvfs); 681168404Spjd return (0); 682168404Spjd } 683168404Spjd 684168404Spjd /* 685168962Spjd * Check for mandatory locks 686168962Spjd */ 687219089Spjd if (MANDMODE(zp->z_mode)) { 688168962Spjd if (error = chklock(vp, FREAD, 689168962Spjd uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 690168962Spjd ZFS_EXIT(zfsvfs); 691168962Spjd return (error); 692168962Spjd } 693168962Spjd } 694168962Spjd 695168962Spjd /* 696168404Spjd * If we're in FRSYNC mode, sync out this znode before reading it. 697168404Spjd */ 698224605Smm if (zfsvfs->z_log && 699224605Smm (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) 700219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 701168404Spjd 702168404Spjd /* 703168404Spjd * Lock the range against changes. 704168404Spjd */ 705168404Spjd rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 706168404Spjd 707168404Spjd /* 708168404Spjd * If we are reading past end-of-file we can skip 709168404Spjd * to the end; but we might still need to set atime. 710168404Spjd */ 711219089Spjd if (uio->uio_loffset >= zp->z_size) { 712168404Spjd error = 0; 713168404Spjd goto out; 714168404Spjd } 715168404Spjd 716219089Spjd ASSERT(uio->uio_loffset < zp->z_size); 717219089Spjd n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 718168404Spjd 719219089Spjd#ifdef sun 720219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 721219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 722219089Spjd int nblk; 723219089Spjd int blksz = zp->z_blksz; 724219089Spjd uint64_t offset = uio->uio_loffset; 725219089Spjd 726219089Spjd xuio = (xuio_t *)uio; 727219089Spjd if ((ISP2(blksz))) { 728219089Spjd nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 729219089Spjd blksz)) / blksz; 730219089Spjd } else { 731219089Spjd ASSERT(offset + n <= blksz); 732219089Spjd nblk = 1; 733219089Spjd } 734219089Spjd (void) dmu_xuio_init(xuio, nblk); 735219089Spjd 736219089Spjd if (vn_has_cached_data(vp)) { 737219089Spjd /* 738219089Spjd * For simplicity, we always allocate a full buffer 739219089Spjd * even if we only expect to read a portion of a block. 740219089Spjd */ 741219089Spjd while (--nblk >= 0) { 742219089Spjd (void) dmu_xuio_add(xuio, 743219089Spjd dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 744219089Spjd blksz), 0, blksz); 745219089Spjd } 746219089Spjd } 747219089Spjd } 748219089Spjd#endif /* sun */ 749219089Spjd 750168404Spjd while (n > 0) { 751168404Spjd nbytes = MIN(n, zfs_read_chunk_size - 752168404Spjd P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 753168404Spjd 754219089Spjd#ifdef __FreeBSD__ 755219089Spjd if (uio->uio_segflg == UIO_NOCOPY) 756219089Spjd error = mappedread_sf(vp, nbytes, uio); 757219089Spjd else 758219089Spjd#endif /* __FreeBSD__ */ 759168404Spjd if (vn_has_cached_data(vp)) 760168404Spjd error = mappedread(vp, nbytes, uio); 761168404Spjd else 762168404Spjd error = dmu_read_uio(os, zp->z_id, uio, nbytes); 763185029Spjd if (error) { 764185029Spjd /* convert checksum errors into IO errors */ 765185029Spjd if (error == ECKSUM) 766249195Smm error = SET_ERROR(EIO); 767168404Spjd break; 768185029Spjd } 769168962Spjd 770168404Spjd n -= nbytes; 771168404Spjd } 772168404Spjdout: 773168404Spjd zfs_range_unlock(rl); 774168404Spjd 775168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 776168404Spjd ZFS_EXIT(zfsvfs); 777168404Spjd return (error); 778168404Spjd} 779168404Spjd 780168404Spjd/* 781168404Spjd * Write the bytes to a file. 782168404Spjd * 783168404Spjd * IN: vp - vnode of file to be written to. 784168404Spjd * uio - structure supplying write location, range info, 785168404Spjd * and data buffer. 786251631Sdelphij * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 787251631Sdelphij * set if in append mode. 788168404Spjd * cr - credentials of caller. 789185029Spjd * ct - caller context (NFS/CIFS fem monitor only) 790168404Spjd * 791168404Spjd * OUT: uio - updated offset and range. 792168404Spjd * 793251631Sdelphij * RETURN: 0 on success, error code on failure. 794168404Spjd * 795168404Spjd * Timestamps: 796168404Spjd * vp - ctime|mtime updated if byte count > 0 797168404Spjd */ 798219089Spjd 799168404Spjd/* ARGSUSED */ 800168404Spjdstatic int 801168962Spjdzfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 802168404Spjd{ 803168404Spjd znode_t *zp = VTOZ(vp); 804168962Spjd rlim64_t limit = MAXOFFSET_T; 805168404Spjd ssize_t start_resid = uio->uio_resid; 806168404Spjd ssize_t tx_bytes; 807168404Spjd uint64_t end_size; 808168404Spjd dmu_tx_t *tx; 809168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 810185029Spjd zilog_t *zilog; 811168404Spjd offset_t woff; 812168404Spjd ssize_t n, nbytes; 813168404Spjd rl_t *rl; 814168404Spjd int max_blksz = zfsvfs->z_max_blksz; 815247187Smm int error = 0; 816209962Smm arc_buf_t *abuf; 817247187Smm iovec_t *aiov = NULL; 818219089Spjd xuio_t *xuio = NULL; 819219089Spjd int i_iov = 0; 820219089Spjd int iovcnt = uio->uio_iovcnt; 821219089Spjd iovec_t *iovp = uio->uio_iov; 822219089Spjd int write_eof; 823219089Spjd int count = 0; 824219089Spjd sa_bulk_attr_t bulk[4]; 825219089Spjd uint64_t mtime[2], ctime[2]; 826168404Spjd 827168404Spjd /* 828168404Spjd * Fasttrack empty write 829168404Spjd */ 830168404Spjd n = start_resid; 831168404Spjd if (n == 0) 832168404Spjd return (0); 833168404Spjd 834168962Spjd if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 835168962Spjd limit = MAXOFFSET_T; 836168962Spjd 837168404Spjd ZFS_ENTER(zfsvfs); 838185029Spjd ZFS_VERIFY_ZP(zp); 839168404Spjd 840219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 841219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 842219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 843219089Spjd &zp->z_size, 8); 844219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 845219089Spjd &zp->z_pflags, 8); 846219089Spjd 847168404Spjd /* 848262990Sdelphij * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 849262990Sdelphij * callers might not be able to detect properly that we are read-only, 850262990Sdelphij * so check it explicitly here. 851262990Sdelphij */ 852262990Sdelphij if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 853262990Sdelphij ZFS_EXIT(zfsvfs); 854262990Sdelphij return (SET_ERROR(EROFS)); 855262990Sdelphij } 856262990Sdelphij 857262990Sdelphij /* 858185029Spjd * If immutable or not appending then return EPERM 859185029Spjd */ 860219089Spjd if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 861219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 862219089Spjd (uio->uio_loffset < zp->z_size))) { 863185029Spjd ZFS_EXIT(zfsvfs); 864249195Smm return (SET_ERROR(EPERM)); 865185029Spjd } 866185029Spjd 867185029Spjd zilog = zfsvfs->z_log; 868185029Spjd 869185029Spjd /* 870219089Spjd * Validate file offset 871219089Spjd */ 872219089Spjd woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 873219089Spjd if (woff < 0) { 874219089Spjd ZFS_EXIT(zfsvfs); 875249195Smm return (SET_ERROR(EINVAL)); 876219089Spjd } 877219089Spjd 878219089Spjd /* 879219089Spjd * Check for mandatory locks before calling zfs_range_lock() 880219089Spjd * in order to prevent a deadlock with locks set via fcntl(). 881219089Spjd */ 882219089Spjd if (MANDMODE((mode_t)zp->z_mode) && 883219089Spjd (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 884219089Spjd ZFS_EXIT(zfsvfs); 885219089Spjd return (error); 886219089Spjd } 887219089Spjd 888219089Spjd#ifdef sun 889219089Spjd /* 890168404Spjd * Pre-fault the pages to ensure slow (eg NFS) pages 891168404Spjd * don't hold up txg. 892219089Spjd * Skip this if uio contains loaned arc_buf. 893168404Spjd */ 894219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 895219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 896219089Spjd xuio = (xuio_t *)uio; 897219089Spjd else 898219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 899219089Spjd#endif /* sun */ 900168404Spjd 901168404Spjd /* 902168404Spjd * If in append mode, set the io offset pointer to eof. 903168404Spjd */ 904213673Spjd if (ioflag & FAPPEND) { 905168404Spjd /* 906219089Spjd * Obtain an appending range lock to guarantee file append 907219089Spjd * semantics. We reset the write offset once we have the lock. 908168404Spjd */ 909168404Spjd rl = zfs_range_lock(zp, 0, n, RL_APPEND); 910219089Spjd woff = rl->r_off; 911168404Spjd if (rl->r_len == UINT64_MAX) { 912219089Spjd /* 913219089Spjd * We overlocked the file because this write will cause 914219089Spjd * the file block size to increase. 915219089Spjd * Note that zp_size cannot change with this lock held. 916219089Spjd */ 917219089Spjd woff = zp->z_size; 918168404Spjd } 919219089Spjd uio->uio_loffset = woff; 920168404Spjd } else { 921168404Spjd /* 922219089Spjd * Note that if the file block size will change as a result of 923219089Spjd * this write, then this range lock will lock the entire file 924219089Spjd * so that we can re-write the block safely. 925168404Spjd */ 926168404Spjd rl = zfs_range_lock(zp, woff, n, RL_WRITER); 927168404Spjd } 928168404Spjd 929235781Strasz if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { 930235781Strasz zfs_range_unlock(rl); 931235781Strasz ZFS_EXIT(zfsvfs); 932235781Strasz return (EFBIG); 933235781Strasz } 934235781Strasz 935168962Spjd if (woff >= limit) { 936168962Spjd zfs_range_unlock(rl); 937168962Spjd ZFS_EXIT(zfsvfs); 938249195Smm return (SET_ERROR(EFBIG)); 939168962Spjd } 940168962Spjd 941168962Spjd if ((woff + n) > limit || woff > (limit - n)) 942168962Spjd n = limit - woff; 943168962Spjd 944219089Spjd /* Will this write extend the file length? */ 945219089Spjd write_eof = (woff + n > zp->z_size); 946168404Spjd 947219089Spjd end_size = MAX(zp->z_size, woff + n); 948219089Spjd 949168404Spjd /* 950168404Spjd * Write the file in reasonable size chunks. Each chunk is written 951168404Spjd * in a separate transaction; this keeps the intent log records small 952168404Spjd * and allows us to do more fine-grained space accounting. 953168404Spjd */ 954168404Spjd while (n > 0) { 955209962Smm abuf = NULL; 956209962Smm woff = uio->uio_loffset; 957219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 958219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 959209962Smm if (abuf != NULL) 960209962Smm dmu_return_arcbuf(abuf); 961249195Smm error = SET_ERROR(EDQUOT); 962209962Smm break; 963209962Smm } 964209962Smm 965219089Spjd if (xuio && abuf == NULL) { 966219089Spjd ASSERT(i_iov < iovcnt); 967219089Spjd aiov = &iovp[i_iov]; 968219089Spjd abuf = dmu_xuio_arcbuf(xuio, i_iov); 969219089Spjd dmu_xuio_clear(xuio, i_iov); 970219089Spjd DTRACE_PROBE3(zfs_cp_write, int, i_iov, 971219089Spjd iovec_t *, aiov, arc_buf_t *, abuf); 972219089Spjd ASSERT((aiov->iov_base == abuf->b_data) || 973219089Spjd ((char *)aiov->iov_base - (char *)abuf->b_data + 974219089Spjd aiov->iov_len == arc_buf_size(abuf))); 975219089Spjd i_iov++; 976219089Spjd } else if (abuf == NULL && n >= max_blksz && 977219089Spjd woff >= zp->z_size && 978209962Smm P2PHASE(woff, max_blksz) == 0 && 979209962Smm zp->z_blksz == max_blksz) { 980219089Spjd /* 981219089Spjd * This write covers a full block. "Borrow" a buffer 982219089Spjd * from the dmu so that we can fill it before we enter 983219089Spjd * a transaction. This avoids the possibility of 984219089Spjd * holding up the transaction if the data copy hangs 985219089Spjd * up on a pagefault (e.g., from an NFS server mapping). 986219089Spjd */ 987209962Smm size_t cbytes; 988209962Smm 989219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 990219089Spjd max_blksz); 991209962Smm ASSERT(abuf != NULL); 992209962Smm ASSERT(arc_buf_size(abuf) == max_blksz); 993209962Smm if (error = uiocopy(abuf->b_data, max_blksz, 994209962Smm UIO_WRITE, uio, &cbytes)) { 995209962Smm dmu_return_arcbuf(abuf); 996209962Smm break; 997209962Smm } 998209962Smm ASSERT(cbytes == max_blksz); 999209962Smm } 1000209962Smm 1001209962Smm /* 1002168404Spjd * Start a transaction. 1003168404Spjd */ 1004168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1005219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1006168404Spjd dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 1007219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1008258720Savg error = dmu_tx_assign(tx, TXG_WAIT); 1009168404Spjd if (error) { 1010168404Spjd dmu_tx_abort(tx); 1011209962Smm if (abuf != NULL) 1012209962Smm dmu_return_arcbuf(abuf); 1013168404Spjd break; 1014168404Spjd } 1015168404Spjd 1016168404Spjd /* 1017168404Spjd * If zfs_range_lock() over-locked we grow the blocksize 1018168404Spjd * and then reduce the lock range. This will only happen 1019168404Spjd * on the first iteration since zfs_range_reduce() will 1020168404Spjd * shrink down r_len to the appropriate size. 1021168404Spjd */ 1022168404Spjd if (rl->r_len == UINT64_MAX) { 1023168404Spjd uint64_t new_blksz; 1024168404Spjd 1025168404Spjd if (zp->z_blksz > max_blksz) { 1026168404Spjd ASSERT(!ISP2(zp->z_blksz)); 1027168404Spjd new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 1028168404Spjd } else { 1029168404Spjd new_blksz = MIN(end_size, max_blksz); 1030168404Spjd } 1031168404Spjd zfs_grow_blocksize(zp, new_blksz, tx); 1032168404Spjd zfs_range_reduce(rl, woff, n); 1033168404Spjd } 1034168404Spjd 1035168404Spjd /* 1036168404Spjd * XXX - should we really limit each write to z_max_blksz? 1037168404Spjd * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 1038168404Spjd */ 1039168404Spjd nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 1040168404Spjd 1041219089Spjd if (woff + nbytes > zp->z_size) 1042168404Spjd vnode_pager_setsize(vp, woff + nbytes); 1043168404Spjd 1044209962Smm if (abuf == NULL) { 1045209962Smm tx_bytes = uio->uio_resid; 1046219089Spjd error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 1047219089Spjd uio, nbytes, tx); 1048209962Smm tx_bytes -= uio->uio_resid; 1049168404Spjd } else { 1050209962Smm tx_bytes = nbytes; 1051219089Spjd ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 1052219089Spjd /* 1053219089Spjd * If this is not a full block write, but we are 1054219089Spjd * extending the file past EOF and this data starts 1055219089Spjd * block-aligned, use assign_arcbuf(). Otherwise, 1056219089Spjd * write via dmu_write(). 1057219089Spjd */ 1058219089Spjd if (tx_bytes < max_blksz && (!write_eof || 1059219089Spjd aiov->iov_base != abuf->b_data)) { 1060219089Spjd ASSERT(xuio); 1061219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, woff, 1062219089Spjd aiov->iov_len, aiov->iov_base, tx); 1063219089Spjd dmu_return_arcbuf(abuf); 1064219089Spjd xuio_stat_wbuf_copied(); 1065219089Spjd } else { 1066219089Spjd ASSERT(xuio || tx_bytes == max_blksz); 1067219089Spjd dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 1068219089Spjd woff, abuf, tx); 1069219089Spjd } 1070209962Smm ASSERT(tx_bytes <= uio->uio_resid); 1071209962Smm uioskip(uio, tx_bytes); 1072168404Spjd } 1073212657Savg if (tx_bytes && vn_has_cached_data(vp)) { 1074209962Smm update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 1075209962Smm zp->z_id, uio->uio_segflg, tx); 1076209962Smm } 1077209962Smm 1078209962Smm /* 1079168404Spjd * If we made no progress, we're done. If we made even 1080168404Spjd * partial progress, update the znode and ZIL accordingly. 1081168404Spjd */ 1082168404Spjd if (tx_bytes == 0) { 1083219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 1084219089Spjd (void *)&zp->z_size, sizeof (uint64_t), tx); 1085168404Spjd dmu_tx_commit(tx); 1086168404Spjd ASSERT(error != 0); 1087168404Spjd break; 1088168404Spjd } 1089168404Spjd 1090168404Spjd /* 1091168404Spjd * Clear Set-UID/Set-GID bits on successful write if not 1092168404Spjd * privileged and at least one of the excute bits is set. 1093168404Spjd * 1094168404Spjd * It would be nice to to this after all writes have 1095168404Spjd * been done, but that would still expose the ISUID/ISGID 1096168404Spjd * to another app after the partial write is committed. 1097185029Spjd * 1098185029Spjd * Note: we don't call zfs_fuid_map_id() here because 1099185029Spjd * user 0 is not an ephemeral uid. 1100168404Spjd */ 1101168404Spjd mutex_enter(&zp->z_acl_lock); 1102219089Spjd if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 1103168404Spjd (S_IXUSR >> 6))) != 0 && 1104219089Spjd (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 1105185029Spjd secpolicy_vnode_setid_retain(vp, cr, 1106219089Spjd (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 1107219089Spjd uint64_t newmode; 1108219089Spjd zp->z_mode &= ~(S_ISUID | S_ISGID); 1109219089Spjd newmode = zp->z_mode; 1110219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 1111219089Spjd (void *)&newmode, sizeof (uint64_t), tx); 1112168404Spjd } 1113168404Spjd mutex_exit(&zp->z_acl_lock); 1114168404Spjd 1115219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 1116219089Spjd B_TRUE); 1117168404Spjd 1118168404Spjd /* 1119168404Spjd * Update the file size (zp_size) if it has changed; 1120168404Spjd * account for possible concurrent updates. 1121168404Spjd */ 1122219089Spjd while ((end_size = zp->z_size) < uio->uio_loffset) { 1123219089Spjd (void) atomic_cas_64(&zp->z_size, end_size, 1124168404Spjd uio->uio_loffset); 1125219089Spjd ASSERT(error == 0); 1126219089Spjd } 1127219089Spjd /* 1128219089Spjd * If we are replaying and eof is non zero then force 1129219089Spjd * the file size to the specified eof. Note, there's no 1130219089Spjd * concurrency during replay. 1131219089Spjd */ 1132219089Spjd if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 1133219089Spjd zp->z_size = zfsvfs->z_replay_eof; 1134219089Spjd 1135219089Spjd error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1136219089Spjd 1137168404Spjd zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 1138168404Spjd dmu_tx_commit(tx); 1139168404Spjd 1140168404Spjd if (error != 0) 1141168404Spjd break; 1142168404Spjd ASSERT(tx_bytes == nbytes); 1143168404Spjd n -= nbytes; 1144219089Spjd 1145219089Spjd#ifdef sun 1146219089Spjd if (!xuio && n > 0) 1147219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 1148219089Spjd#endif /* sun */ 1149168404Spjd } 1150168404Spjd 1151168404Spjd zfs_range_unlock(rl); 1152168404Spjd 1153168404Spjd /* 1154168404Spjd * If we're in replay mode, or we made no progress, return error. 1155168404Spjd * Otherwise, it's at least a partial write, so it's successful. 1156168404Spjd */ 1157209962Smm if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1158168404Spjd ZFS_EXIT(zfsvfs); 1159168404Spjd return (error); 1160168404Spjd } 1161168404Spjd 1162219089Spjd if (ioflag & (FSYNC | FDSYNC) || 1163219089Spjd zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1164219089Spjd zil_commit(zilog, zp->z_id); 1165168404Spjd 1166168404Spjd ZFS_EXIT(zfsvfs); 1167168404Spjd return (0); 1168168404Spjd} 1169168404Spjd 1170168404Spjdvoid 1171219089Spjdzfs_get_done(zgd_t *zgd, int error) 1172168404Spjd{ 1173219089Spjd znode_t *zp = zgd->zgd_private; 1174219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 1175168404Spjd 1176219089Spjd if (zgd->zgd_db) 1177219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1178219089Spjd 1179219089Spjd zfs_range_unlock(zgd->zgd_rl); 1180219089Spjd 1181191900Skmacy /* 1182191900Skmacy * Release the vnode asynchronously as we currently have the 1183191900Skmacy * txg stopped from syncing. 1184191900Skmacy */ 1185219089Spjd VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1186219089Spjd 1187219089Spjd if (error == 0 && zgd->zgd_bp) 1188219089Spjd zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1189219089Spjd 1190168404Spjd kmem_free(zgd, sizeof (zgd_t)); 1191168404Spjd} 1192168404Spjd 1193214378Smm#ifdef DEBUG 1194214378Smmstatic int zil_fault_io = 0; 1195214378Smm#endif 1196214378Smm 1197168404Spjd/* 1198168404Spjd * Get data to generate a TX_WRITE intent log record. 1199168404Spjd */ 1200168404Spjdint 1201168404Spjdzfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1202168404Spjd{ 1203168404Spjd zfsvfs_t *zfsvfs = arg; 1204168404Spjd objset_t *os = zfsvfs->z_os; 1205168404Spjd znode_t *zp; 1206219089Spjd uint64_t object = lr->lr_foid; 1207219089Spjd uint64_t offset = lr->lr_offset; 1208219089Spjd uint64_t size = lr->lr_length; 1209219089Spjd blkptr_t *bp = &lr->lr_blkptr; 1210168404Spjd dmu_buf_t *db; 1211168404Spjd zgd_t *zgd; 1212168404Spjd int error = 0; 1213168404Spjd 1214219089Spjd ASSERT(zio != NULL); 1215219089Spjd ASSERT(size != 0); 1216168404Spjd 1217168404Spjd /* 1218168404Spjd * Nothing to do if the file has been removed 1219168404Spjd */ 1220219089Spjd if (zfs_zget(zfsvfs, object, &zp) != 0) 1221249195Smm return (SET_ERROR(ENOENT)); 1222168404Spjd if (zp->z_unlinked) { 1223191900Skmacy /* 1224191900Skmacy * Release the vnode asynchronously as we currently have the 1225191900Skmacy * txg stopped from syncing. 1226191900Skmacy */ 1227196307Spjd VN_RELE_ASYNC(ZTOV(zp), 1228196307Spjd dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1229249195Smm return (SET_ERROR(ENOENT)); 1230168404Spjd } 1231168404Spjd 1232219089Spjd zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1233219089Spjd zgd->zgd_zilog = zfsvfs->z_log; 1234219089Spjd zgd->zgd_private = zp; 1235219089Spjd 1236168404Spjd /* 1237168404Spjd * Write records come in two flavors: immediate and indirect. 1238168404Spjd * For small writes it's cheaper to store the data with the 1239168404Spjd * log record (immediate); for large writes it's cheaper to 1240168404Spjd * sync the data and get a pointer to it (indirect) so that 1241168404Spjd * we don't have to write the data twice. 1242168404Spjd */ 1243168404Spjd if (buf != NULL) { /* immediate write */ 1244219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1245168404Spjd /* test for truncation needs to be done while range locked */ 1246219089Spjd if (offset >= zp->z_size) { 1247249195Smm error = SET_ERROR(ENOENT); 1248219089Spjd } else { 1249219089Spjd error = dmu_read(os, object, offset, size, buf, 1250219089Spjd DMU_READ_NO_PREFETCH); 1251168404Spjd } 1252219089Spjd ASSERT(error == 0 || error == ENOENT); 1253168404Spjd } else { /* indirect write */ 1254168404Spjd /* 1255168404Spjd * Have to lock the whole block to ensure when it's 1256168404Spjd * written out and it's checksum is being calculated 1257168404Spjd * that no one can change the data. We need to re-check 1258168404Spjd * blocksize after we get the lock in case it's changed! 1259168404Spjd */ 1260168404Spjd for (;;) { 1261219089Spjd uint64_t blkoff; 1262219089Spjd size = zp->z_blksz; 1263219089Spjd blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1264219089Spjd offset -= blkoff; 1265219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1266219089Spjd RL_READER); 1267219089Spjd if (zp->z_blksz == size) 1268168404Spjd break; 1269219089Spjd offset += blkoff; 1270219089Spjd zfs_range_unlock(zgd->zgd_rl); 1271168404Spjd } 1272168404Spjd /* test for truncation needs to be done while range locked */ 1273219089Spjd if (lr->lr_offset >= zp->z_size) 1274249195Smm error = SET_ERROR(ENOENT); 1275214378Smm#ifdef DEBUG 1276214378Smm if (zil_fault_io) { 1277249195Smm error = SET_ERROR(EIO); 1278214378Smm zil_fault_io = 0; 1279214378Smm } 1280214378Smm#endif 1281219089Spjd if (error == 0) 1282219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1283219089Spjd DMU_READ_NO_PREFETCH); 1284214378Smm 1285209962Smm if (error == 0) { 1286243524Smm blkptr_t *obp = dmu_buf_get_blkptr(db); 1287243524Smm if (obp) { 1288243524Smm ASSERT(BP_IS_HOLE(bp)); 1289243524Smm *bp = *obp; 1290243524Smm } 1291243524Smm 1292219089Spjd zgd->zgd_db = db; 1293219089Spjd zgd->zgd_bp = bp; 1294219089Spjd 1295219089Spjd ASSERT(db->db_offset == offset); 1296219089Spjd ASSERT(db->db_size == size); 1297219089Spjd 1298219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1299219089Spjd zfs_get_done, zgd); 1300219089Spjd ASSERT(error || lr->lr_length <= zp->z_blksz); 1301219089Spjd 1302209962Smm /* 1303219089Spjd * On success, we need to wait for the write I/O 1304219089Spjd * initiated by dmu_sync() to complete before we can 1305219089Spjd * release this dbuf. We will finish everything up 1306219089Spjd * in the zfs_get_done() callback. 1307209962Smm */ 1308219089Spjd if (error == 0) 1309219089Spjd return (0); 1310209962Smm 1311219089Spjd if (error == EALREADY) { 1312219089Spjd lr->lr_common.lrc_txtype = TX_WRITE2; 1313219089Spjd error = 0; 1314219089Spjd } 1315209962Smm } 1316168404Spjd } 1317219089Spjd 1318219089Spjd zfs_get_done(zgd, error); 1319219089Spjd 1320168404Spjd return (error); 1321168404Spjd} 1322168404Spjd 1323168404Spjd/*ARGSUSED*/ 1324168404Spjdstatic int 1325185029Spjdzfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1326185029Spjd caller_context_t *ct) 1327168404Spjd{ 1328168404Spjd znode_t *zp = VTOZ(vp); 1329168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1330168404Spjd int error; 1331168404Spjd 1332168404Spjd ZFS_ENTER(zfsvfs); 1333185029Spjd ZFS_VERIFY_ZP(zp); 1334185029Spjd 1335185029Spjd if (flag & V_ACE_MASK) 1336185029Spjd error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1337185029Spjd else 1338185029Spjd error = zfs_zaccess_rwx(zp, mode, flag, cr); 1339185029Spjd 1340168404Spjd ZFS_EXIT(zfsvfs); 1341168404Spjd return (error); 1342168404Spjd} 1343168404Spjd 1344168404Spjd/* 1345211932Smm * If vnode is for a device return a specfs vnode instead. 1346211932Smm */ 1347211932Smmstatic int 1348211932Smmspecvp_check(vnode_t **vpp, cred_t *cr) 1349211932Smm{ 1350211932Smm int error = 0; 1351211932Smm 1352211932Smm if (IS_DEVVP(*vpp)) { 1353211932Smm struct vnode *svp; 1354211932Smm 1355211932Smm svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1356211932Smm VN_RELE(*vpp); 1357211932Smm if (svp == NULL) 1358249195Smm error = SET_ERROR(ENOSYS); 1359211932Smm *vpp = svp; 1360211932Smm } 1361211932Smm return (error); 1362211932Smm} 1363211932Smm 1364211932Smm 1365211932Smm/* 1366168404Spjd * Lookup an entry in a directory, or an extended attribute directory. 1367168404Spjd * If it exists, return a held vnode reference for it. 1368168404Spjd * 1369168404Spjd * IN: dvp - vnode of directory to search. 1370168404Spjd * nm - name of entry to lookup. 1371168404Spjd * pnp - full pathname to lookup [UNUSED]. 1372168404Spjd * flags - LOOKUP_XATTR set if looking for an attribute. 1373168404Spjd * rdir - root directory vnode [UNUSED]. 1374168404Spjd * cr - credentials of caller. 1375185029Spjd * ct - caller context 1376185029Spjd * direntflags - directory lookup flags 1377185029Spjd * realpnp - returned pathname. 1378168404Spjd * 1379168404Spjd * OUT: vpp - vnode of located entry, NULL if not found. 1380168404Spjd * 1381251631Sdelphij * RETURN: 0 on success, error code on failure. 1382168404Spjd * 1383168404Spjd * Timestamps: 1384168404Spjd * NA 1385168404Spjd */ 1386168404Spjd/* ARGSUSED */ 1387168962Spjdstatic int 1388168962Spjdzfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1389185029Spjd int nameiop, cred_t *cr, kthread_t *td, int flags) 1390168404Spjd{ 1391168962Spjd znode_t *zdp = VTOZ(dvp); 1392168962Spjd zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1393211932Smm int error = 0; 1394185029Spjd int *direntflags = NULL; 1395185029Spjd void *realpnp = NULL; 1396168404Spjd 1397211932Smm /* fast path */ 1398211932Smm if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 1399211932Smm 1400211932Smm if (dvp->v_type != VDIR) { 1401249195Smm return (SET_ERROR(ENOTDIR)); 1402219089Spjd } else if (zdp->z_sa_hdl == NULL) { 1403249195Smm return (SET_ERROR(EIO)); 1404211932Smm } 1405211932Smm 1406211932Smm if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1407211932Smm error = zfs_fastaccesschk_execute(zdp, cr); 1408211932Smm if (!error) { 1409211932Smm *vpp = dvp; 1410211932Smm VN_HOLD(*vpp); 1411211932Smm return (0); 1412211932Smm } 1413211932Smm return (error); 1414211932Smm } else { 1415211932Smm vnode_t *tvp = dnlc_lookup(dvp, nm); 1416211932Smm 1417211932Smm if (tvp) { 1418211932Smm error = zfs_fastaccesschk_execute(zdp, cr); 1419211932Smm if (error) { 1420211932Smm VN_RELE(tvp); 1421211932Smm return (error); 1422211932Smm } 1423211932Smm if (tvp == DNLC_NO_VNODE) { 1424211932Smm VN_RELE(tvp); 1425249195Smm return (SET_ERROR(ENOENT)); 1426211932Smm } else { 1427211932Smm *vpp = tvp; 1428211932Smm return (specvp_check(vpp, cr)); 1429211932Smm } 1430211932Smm } 1431211932Smm } 1432211932Smm } 1433211932Smm 1434211932Smm DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1435211932Smm 1436168404Spjd ZFS_ENTER(zfsvfs); 1437185029Spjd ZFS_VERIFY_ZP(zdp); 1438168404Spjd 1439168404Spjd *vpp = NULL; 1440168404Spjd 1441185029Spjd if (flags & LOOKUP_XATTR) { 1442168404Spjd#ifdef TODO 1443168404Spjd /* 1444168404Spjd * If the xattr property is off, refuse the lookup request. 1445168404Spjd */ 1446168404Spjd if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1447168404Spjd ZFS_EXIT(zfsvfs); 1448249195Smm return (SET_ERROR(EINVAL)); 1449168404Spjd } 1450185029Spjd#endif 1451168404Spjd 1452168404Spjd /* 1453168404Spjd * We don't allow recursive attributes.. 1454168404Spjd * Maybe someday we will. 1455168404Spjd */ 1456219089Spjd if (zdp->z_pflags & ZFS_XATTR) { 1457168404Spjd ZFS_EXIT(zfsvfs); 1458249195Smm return (SET_ERROR(EINVAL)); 1459168404Spjd } 1460168404Spjd 1461168404Spjd if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1462168404Spjd ZFS_EXIT(zfsvfs); 1463168404Spjd return (error); 1464168404Spjd } 1465168404Spjd 1466168404Spjd /* 1467168404Spjd * Do we have permission to get into attribute directory? 1468168404Spjd */ 1469168404Spjd 1470185029Spjd if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1471185029Spjd B_FALSE, cr)) { 1472168404Spjd VN_RELE(*vpp); 1473185029Spjd *vpp = NULL; 1474168404Spjd } 1475168404Spjd 1476168404Spjd ZFS_EXIT(zfsvfs); 1477168404Spjd return (error); 1478168404Spjd } 1479168404Spjd 1480168404Spjd if (dvp->v_type != VDIR) { 1481168404Spjd ZFS_EXIT(zfsvfs); 1482249195Smm return (SET_ERROR(ENOTDIR)); 1483168404Spjd } 1484168404Spjd 1485168404Spjd /* 1486168404Spjd * Check accessibility of directory. 1487168404Spjd */ 1488168404Spjd 1489185029Spjd if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1490168404Spjd ZFS_EXIT(zfsvfs); 1491168404Spjd return (error); 1492168404Spjd } 1493168404Spjd 1494185029Spjd if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1495185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1496185029Spjd ZFS_EXIT(zfsvfs); 1497249195Smm return (SET_ERROR(EILSEQ)); 1498185029Spjd } 1499168404Spjd 1500185029Spjd error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1501211932Smm if (error == 0) 1502211932Smm error = specvp_check(vpp, cr); 1503168962Spjd 1504168404Spjd /* Translate errors and add SAVENAME when needed. */ 1505168404Spjd if (cnp->cn_flags & ISLASTCN) { 1506168404Spjd switch (nameiop) { 1507168404Spjd case CREATE: 1508168404Spjd case RENAME: 1509168404Spjd if (error == ENOENT) { 1510168404Spjd error = EJUSTRETURN; 1511168404Spjd cnp->cn_flags |= SAVENAME; 1512168404Spjd break; 1513168404Spjd } 1514168404Spjd /* FALLTHROUGH */ 1515168404Spjd case DELETE: 1516168404Spjd if (error == 0) 1517168404Spjd cnp->cn_flags |= SAVENAME; 1518168404Spjd break; 1519168404Spjd } 1520168404Spjd } 1521168404Spjd if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) { 1522169198Spjd int ltype = 0; 1523169198Spjd 1524169198Spjd if (cnp->cn_flags & ISDOTDOT) { 1525176559Sattilio ltype = VOP_ISLOCKED(dvp); 1526175294Sattilio VOP_UNLOCK(dvp, 0); 1527169198Spjd } 1528206667Spjd ZFS_EXIT(zfsvfs); 1529254711Savg error = vn_lock(*vpp, cnp->cn_lkflags); 1530168962Spjd if (cnp->cn_flags & ISDOTDOT) 1531175202Sattilio vn_lock(dvp, ltype | LK_RETRY); 1532169172Spjd if (error != 0) { 1533169172Spjd VN_RELE(*vpp); 1534169172Spjd *vpp = NULL; 1535169172Spjd return (error); 1536169172Spjd } 1537206667Spjd } else { 1538206667Spjd ZFS_EXIT(zfsvfs); 1539168404Spjd } 1540168404Spjd 1541168404Spjd#ifdef FREEBSD_NAMECACHE 1542168404Spjd /* 1543168404Spjd * Insert name into cache (as non-existent) if appropriate. 1544168404Spjd */ 1545168404Spjd if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 1546168404Spjd cache_enter(dvp, *vpp, cnp); 1547169170Spjd /* 1548169170Spjd * Insert name into cache if appropriate. 1549169170Spjd */ 1550168404Spjd if (error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1551168404Spjd if (!(cnp->cn_flags & ISLASTCN) || 1552168404Spjd (nameiop != DELETE && nameiop != RENAME)) { 1553168404Spjd cache_enter(dvp, *vpp, cnp); 1554168404Spjd } 1555168404Spjd } 1556168404Spjd#endif 1557168404Spjd 1558168404Spjd return (error); 1559168404Spjd} 1560168404Spjd 1561168404Spjd/* 1562168404Spjd * Attempt to create a new entry in a directory. If the entry 1563168404Spjd * already exists, truncate the file if permissible, else return 1564168404Spjd * an error. Return the vp of the created or trunc'd file. 1565168404Spjd * 1566168404Spjd * IN: dvp - vnode of directory to put new file entry in. 1567168404Spjd * name - name of new file entry. 1568168404Spjd * vap - attributes of new file. 1569168404Spjd * excl - flag indicating exclusive or non-exclusive mode. 1570168404Spjd * mode - mode to open file with. 1571168404Spjd * cr - credentials of caller. 1572168404Spjd * flag - large file flag [UNUSED]. 1573185029Spjd * ct - caller context 1574268464Sdelphij * vsecp - ACL to be set 1575168404Spjd * 1576168404Spjd * OUT: vpp - vnode of created or trunc'd entry. 1577168404Spjd * 1578251631Sdelphij * RETURN: 0 on success, error code on failure. 1579168404Spjd * 1580168404Spjd * Timestamps: 1581168404Spjd * dvp - ctime|mtime updated if new entry created 1582168404Spjd * vp - ctime|mtime always, atime if new 1583168404Spjd */ 1584185029Spjd 1585168404Spjd/* ARGSUSED */ 1586168404Spjdstatic int 1587168962Spjdzfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 1588185029Spjd vnode_t **vpp, cred_t *cr, kthread_t *td) 1589168404Spjd{ 1590168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1591168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1592185029Spjd zilog_t *zilog; 1593185029Spjd objset_t *os; 1594168404Spjd zfs_dirlock_t *dl; 1595168404Spjd dmu_tx_t *tx; 1596168404Spjd int error; 1597209962Smm ksid_t *ksid; 1598209962Smm uid_t uid; 1599209962Smm gid_t gid = crgetgid(cr); 1600219089Spjd zfs_acl_ids_t acl_ids; 1601209962Smm boolean_t fuid_dirtied; 1602219089Spjd boolean_t have_acl = B_FALSE; 1603258632Savg boolean_t waited = B_FALSE; 1604185029Spjd void *vsecp = NULL; 1605185029Spjd int flag = 0; 1606168404Spjd 1607185029Spjd /* 1608185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 1609185029Spjd * make sure file system is at proper version 1610185029Spjd */ 1611185029Spjd 1612209962Smm ksid = crgetsid(cr, KSID_OWNER); 1613209962Smm if (ksid) 1614209962Smm uid = ksid_getid(ksid); 1615209962Smm else 1616209962Smm uid = crgetuid(cr); 1617219089Spjd 1618185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 1619185029Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 1620219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1621249195Smm return (SET_ERROR(EINVAL)); 1622185029Spjd 1623168404Spjd ZFS_ENTER(zfsvfs); 1624185029Spjd ZFS_VERIFY_ZP(dzp); 1625185029Spjd os = zfsvfs->z_os; 1626185029Spjd zilog = zfsvfs->z_log; 1627168404Spjd 1628185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1629185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1630185029Spjd ZFS_EXIT(zfsvfs); 1631249195Smm return (SET_ERROR(EILSEQ)); 1632185029Spjd } 1633185029Spjd 1634185029Spjd if (vap->va_mask & AT_XVATTR) { 1635197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1636185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 1637185029Spjd ZFS_EXIT(zfsvfs); 1638185029Spjd return (error); 1639185029Spjd } 1640185029Spjd } 1641260704Savg 1642260704Savg getnewvnode_reserve(1); 1643260704Savg 1644168404Spjdtop: 1645168404Spjd *vpp = NULL; 1646168404Spjd 1647182905Strasz if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1648182905Strasz vap->va_mode &= ~S_ISVTX; 1649168404Spjd 1650168404Spjd if (*name == '\0') { 1651168404Spjd /* 1652168404Spjd * Null component name refers to the directory itself. 1653168404Spjd */ 1654168404Spjd VN_HOLD(dvp); 1655168404Spjd zp = dzp; 1656168404Spjd dl = NULL; 1657168404Spjd error = 0; 1658168404Spjd } else { 1659168404Spjd /* possible VN_HOLD(zp) */ 1660185029Spjd int zflg = 0; 1661185029Spjd 1662185029Spjd if (flag & FIGNORECASE) 1663185029Spjd zflg |= ZCILOOK; 1664185029Spjd 1665185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1666185029Spjd NULL, NULL); 1667185029Spjd if (error) { 1668219089Spjd if (have_acl) 1669219089Spjd zfs_acl_ids_free(&acl_ids); 1670168404Spjd if (strcmp(name, "..") == 0) 1671249195Smm error = SET_ERROR(EISDIR); 1672260704Savg getnewvnode_drop_reserve(); 1673168404Spjd ZFS_EXIT(zfsvfs); 1674168404Spjd return (error); 1675168404Spjd } 1676168404Spjd } 1677219089Spjd 1678185029Spjd if (zp == NULL) { 1679185029Spjd uint64_t txtype; 1680168404Spjd 1681168404Spjd /* 1682168404Spjd * Create a new file object and update the directory 1683168404Spjd * to reference it. 1684168404Spjd */ 1685185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1686219089Spjd if (have_acl) 1687219089Spjd zfs_acl_ids_free(&acl_ids); 1688168404Spjd goto out; 1689168404Spjd } 1690168404Spjd 1691168404Spjd /* 1692168404Spjd * We only support the creation of regular files in 1693168404Spjd * extended attribute directories. 1694168404Spjd */ 1695219089Spjd 1696219089Spjd if ((dzp->z_pflags & ZFS_XATTR) && 1697168404Spjd (vap->va_type != VREG)) { 1698219089Spjd if (have_acl) 1699219089Spjd zfs_acl_ids_free(&acl_ids); 1700249195Smm error = SET_ERROR(EINVAL); 1701168404Spjd goto out; 1702168404Spjd } 1703168404Spjd 1704219089Spjd if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, 1705219089Spjd cr, vsecp, &acl_ids)) != 0) 1706219089Spjd goto out; 1707219089Spjd have_acl = B_TRUE; 1708209962Smm 1709209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1710211932Smm zfs_acl_ids_free(&acl_ids); 1711249195Smm error = SET_ERROR(EDQUOT); 1712209962Smm goto out; 1713209962Smm } 1714209962Smm 1715168404Spjd tx = dmu_tx_create(os); 1716219089Spjd 1717219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1718219089Spjd ZFS_SA_BASE_ATTR_SIZE); 1719219089Spjd 1720209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 1721209962Smm if (fuid_dirtied) 1722209962Smm zfs_fuid_txhold(zfsvfs, tx); 1723168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1724219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 1725219089Spjd if (!zfsvfs->z_use_sa && 1726219089Spjd acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1727168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1728219089Spjd 0, acl_ids.z_aclp->z_acl_bytes); 1729185029Spjd } 1730258632Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 1731168404Spjd if (error) { 1732168404Spjd zfs_dirent_unlock(dl); 1733209962Smm if (error == ERESTART) { 1734258632Savg waited = B_TRUE; 1735168404Spjd dmu_tx_wait(tx); 1736168404Spjd dmu_tx_abort(tx); 1737168404Spjd goto top; 1738168404Spjd } 1739219089Spjd zfs_acl_ids_free(&acl_ids); 1740168404Spjd dmu_tx_abort(tx); 1741260704Savg getnewvnode_drop_reserve(); 1742168404Spjd ZFS_EXIT(zfsvfs); 1743168404Spjd return (error); 1744168404Spjd } 1745219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1746209962Smm 1747209962Smm if (fuid_dirtied) 1748209962Smm zfs_fuid_sync(zfsvfs, tx); 1749209962Smm 1750168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 1751185029Spjd txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1752185029Spjd if (flag & FIGNORECASE) 1753185029Spjd txtype |= TX_CI; 1754185029Spjd zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1755209962Smm vsecp, acl_ids.z_fuidp, vap); 1756209962Smm zfs_acl_ids_free(&acl_ids); 1757168404Spjd dmu_tx_commit(tx); 1758168404Spjd } else { 1759185029Spjd int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1760185029Spjd 1761219089Spjd if (have_acl) 1762219089Spjd zfs_acl_ids_free(&acl_ids); 1763219089Spjd have_acl = B_FALSE; 1764219089Spjd 1765168404Spjd /* 1766168404Spjd * A directory entry already exists for this name. 1767168404Spjd */ 1768168404Spjd /* 1769168962Spjd * Can't truncate an existing file if in exclusive mode. 1770168962Spjd */ 1771168962Spjd if (excl == EXCL) { 1772249195Smm error = SET_ERROR(EEXIST); 1773168962Spjd goto out; 1774168962Spjd } 1775168962Spjd /* 1776168404Spjd * Can't open a directory for writing. 1777168404Spjd */ 1778168404Spjd if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1779249195Smm error = SET_ERROR(EISDIR); 1780168404Spjd goto out; 1781168404Spjd } 1782168404Spjd /* 1783168404Spjd * Verify requested access to file. 1784168404Spjd */ 1785185029Spjd if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1786168404Spjd goto out; 1787168404Spjd } 1788168404Spjd 1789168404Spjd mutex_enter(&dzp->z_lock); 1790168404Spjd dzp->z_seq++; 1791168404Spjd mutex_exit(&dzp->z_lock); 1792168404Spjd 1793168404Spjd /* 1794168404Spjd * Truncate regular files if requested. 1795168404Spjd */ 1796168404Spjd if ((ZTOV(zp)->v_type == VREG) && 1797168404Spjd (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1798185029Spjd /* we can't hold any locks when calling zfs_freesp() */ 1799185029Spjd zfs_dirent_unlock(dl); 1800185029Spjd dl = NULL; 1801168404Spjd error = zfs_freesp(zp, 0, 0, mode, TRUE); 1802185029Spjd if (error == 0) { 1803185029Spjd vnevent_create(ZTOV(zp), ct); 1804168404Spjd } 1805168404Spjd } 1806168404Spjd } 1807168404Spjdout: 1808260704Savg getnewvnode_drop_reserve(); 1809168404Spjd if (dl) 1810168404Spjd zfs_dirent_unlock(dl); 1811168404Spjd 1812168404Spjd if (error) { 1813168404Spjd if (zp) 1814168404Spjd VN_RELE(ZTOV(zp)); 1815168962Spjd } else { 1816168962Spjd *vpp = ZTOV(zp); 1817211932Smm error = specvp_check(vpp, cr); 1818168404Spjd } 1819168404Spjd 1820219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1821219089Spjd zil_commit(zilog, 0); 1822219089Spjd 1823168404Spjd ZFS_EXIT(zfsvfs); 1824168404Spjd return (error); 1825168404Spjd} 1826168404Spjd 1827168404Spjd/* 1828168404Spjd * Remove an entry from a directory. 1829168404Spjd * 1830168404Spjd * IN: dvp - vnode of directory to remove entry from. 1831168404Spjd * name - name of entry to remove. 1832168404Spjd * cr - credentials of caller. 1833185029Spjd * ct - caller context 1834185029Spjd * flags - case flags 1835168404Spjd * 1836251631Sdelphij * RETURN: 0 on success, error code on failure. 1837168404Spjd * 1838168404Spjd * Timestamps: 1839168404Spjd * dvp - ctime|mtime 1840168404Spjd * vp - ctime (if nlink > 0) 1841168404Spjd */ 1842219089Spjd 1843219089Spjduint64_t null_xattr = 0; 1844219089Spjd 1845185029Spjd/*ARGSUSED*/ 1846168404Spjdstatic int 1847185029Spjdzfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1848185029Spjd int flags) 1849168404Spjd{ 1850168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1851219089Spjd znode_t *xzp; 1852168404Spjd vnode_t *vp; 1853168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1854185029Spjd zilog_t *zilog; 1855168962Spjd uint64_t acl_obj, xattr_obj; 1856268464Sdelphij uint64_t xattr_obj_unlinked = 0; 1857219089Spjd uint64_t obj = 0; 1858168404Spjd zfs_dirlock_t *dl; 1859168404Spjd dmu_tx_t *tx; 1860168962Spjd boolean_t may_delete_now, delete_now = FALSE; 1861185029Spjd boolean_t unlinked, toobig = FALSE; 1862185029Spjd uint64_t txtype; 1863185029Spjd pathname_t *realnmp = NULL; 1864185029Spjd pathname_t realnm; 1865168404Spjd int error; 1866185029Spjd int zflg = ZEXISTS; 1867258632Savg boolean_t waited = B_FALSE; 1868168404Spjd 1869168404Spjd ZFS_ENTER(zfsvfs); 1870185029Spjd ZFS_VERIFY_ZP(dzp); 1871185029Spjd zilog = zfsvfs->z_log; 1872168404Spjd 1873185029Spjd if (flags & FIGNORECASE) { 1874185029Spjd zflg |= ZCILOOK; 1875185029Spjd pn_alloc(&realnm); 1876185029Spjd realnmp = &realnm; 1877185029Spjd } 1878185029Spjd 1879168404Spjdtop: 1880219089Spjd xattr_obj = 0; 1881219089Spjd xzp = NULL; 1882168404Spjd /* 1883168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 1884168404Spjd */ 1885185029Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1886185029Spjd NULL, realnmp)) { 1887185029Spjd if (realnmp) 1888185029Spjd pn_free(realnmp); 1889168404Spjd ZFS_EXIT(zfsvfs); 1890168404Spjd return (error); 1891168404Spjd } 1892168404Spjd 1893168404Spjd vp = ZTOV(zp); 1894168404Spjd 1895168962Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1896168404Spjd goto out; 1897168962Spjd } 1898168404Spjd 1899168962Spjd /* 1900168962Spjd * Need to use rmdir for removing directories. 1901168962Spjd */ 1902168962Spjd if (vp->v_type == VDIR) { 1903249195Smm error = SET_ERROR(EPERM); 1904168962Spjd goto out; 1905168962Spjd } 1906168962Spjd 1907185029Spjd vnevent_remove(vp, dvp, name, ct); 1908168962Spjd 1909185029Spjd if (realnmp) 1910185029Spjd dnlc_remove(dvp, realnmp->pn_buf); 1911185029Spjd else 1912185029Spjd dnlc_remove(dvp, name); 1913168404Spjd 1914219089Spjd VI_LOCK(vp); 1915219089Spjd may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); 1916219089Spjd VI_UNLOCK(vp); 1917168962Spjd 1918168404Spjd /* 1919168404Spjd * We may delete the znode now, or we may put it in the unlinked set; 1920168404Spjd * it depends on whether we're the last link, and on whether there are 1921168404Spjd * other holds on the vnode. So we dmu_tx_hold() the right things to 1922168404Spjd * allow for either case. 1923168404Spjd */ 1924219089Spjd obj = zp->z_id; 1925168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1926168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1927219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1928219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1929219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 1930185029Spjd if (may_delete_now) { 1931185029Spjd toobig = 1932219089Spjd zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1933185029Spjd /* if the file is too big, only hold_free a token amount */ 1934185029Spjd dmu_tx_hold_free(tx, zp->z_id, 0, 1935185029Spjd (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1936185029Spjd } 1937168404Spjd 1938168404Spjd /* are there any extended attributes? */ 1939219089Spjd error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1940219089Spjd &xattr_obj, sizeof (xattr_obj)); 1941219089Spjd if (error == 0 && xattr_obj) { 1942219089Spjd error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1943240415Smm ASSERT0(error); 1944219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 1945219089Spjd dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1946168404Spjd } 1947168404Spjd 1948219089Spjd mutex_enter(&zp->z_lock); 1949219089Spjd if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) 1950168962Spjd dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 1951219089Spjd mutex_exit(&zp->z_lock); 1952168962Spjd 1953168404Spjd /* charge as an update -- would be nice not to charge at all */ 1954168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1955168404Spjd 1956268464Sdelphij /* 1957268464Sdelphij * Mark this transaction as typically resulting in a net free of 1958268464Sdelphij * space, unless object removal will be delayed indefinitely 1959268464Sdelphij * (due to active holds on the vnode due to the file being open). 1960268464Sdelphij */ 1961268464Sdelphij if (may_delete_now) 1962268464Sdelphij dmu_tx_mark_netfree(tx); 1963268464Sdelphij 1964258632Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 1965168404Spjd if (error) { 1966168404Spjd zfs_dirent_unlock(dl); 1967168962Spjd VN_RELE(vp); 1968219089Spjd if (xzp) 1969219089Spjd VN_RELE(ZTOV(xzp)); 1970209962Smm if (error == ERESTART) { 1971258632Savg waited = B_TRUE; 1972168404Spjd dmu_tx_wait(tx); 1973168404Spjd dmu_tx_abort(tx); 1974168404Spjd goto top; 1975168404Spjd } 1976185029Spjd if (realnmp) 1977185029Spjd pn_free(realnmp); 1978168404Spjd dmu_tx_abort(tx); 1979168404Spjd ZFS_EXIT(zfsvfs); 1980168404Spjd return (error); 1981168404Spjd } 1982168404Spjd 1983168404Spjd /* 1984168404Spjd * Remove the directory entry. 1985168404Spjd */ 1986185029Spjd error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1987168404Spjd 1988168404Spjd if (error) { 1989168404Spjd dmu_tx_commit(tx); 1990168404Spjd goto out; 1991168404Spjd } 1992168404Spjd 1993219089Spjd if (unlinked) { 1994219089Spjd /* 1995219089Spjd * Hold z_lock so that we can make sure that the ACL obj 1996219089Spjd * hasn't changed. Could have been deleted due to 1997219089Spjd * zfs_sa_upgrade(). 1998219089Spjd */ 1999219089Spjd mutex_enter(&zp->z_lock); 2000168962Spjd VI_LOCK(vp); 2001219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 2002219089Spjd &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); 2003185029Spjd delete_now = may_delete_now && !toobig && 2004168962Spjd vp->v_count == 1 && !vn_has_cached_data(vp) && 2005219089Spjd xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == 2006219089Spjd acl_obj; 2007168962Spjd VI_UNLOCK(vp); 2008168962Spjd } 2009168962Spjd 2010168962Spjd if (delete_now) { 2011243270Savg#ifdef __FreeBSD__ 2012243270Savg panic("zfs_remove: delete_now branch taken"); 2013243270Savg#endif 2014219089Spjd if (xattr_obj_unlinked) { 2015219089Spjd ASSERT3U(xzp->z_links, ==, 2); 2016168962Spjd mutex_enter(&xzp->z_lock); 2017168962Spjd xzp->z_unlinked = 1; 2018219089Spjd xzp->z_links = 0; 2019219089Spjd error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 2020219089Spjd &xzp->z_links, sizeof (xzp->z_links), tx); 2021219089Spjd ASSERT3U(error, ==, 0); 2022168962Spjd mutex_exit(&xzp->z_lock); 2023168962Spjd zfs_unlinked_add(xzp, tx); 2024219089Spjd 2025219089Spjd if (zp->z_is_sa) 2026219089Spjd error = sa_remove(zp->z_sa_hdl, 2027219089Spjd SA_ZPL_XATTR(zfsvfs), tx); 2028219089Spjd else 2029219089Spjd error = sa_update(zp->z_sa_hdl, 2030219089Spjd SA_ZPL_XATTR(zfsvfs), &null_xattr, 2031219089Spjd sizeof (uint64_t), tx); 2032240415Smm ASSERT0(error); 2033168962Spjd } 2034168962Spjd VI_LOCK(vp); 2035168962Spjd vp->v_count--; 2036240415Smm ASSERT0(vp->v_count); 2037168962Spjd VI_UNLOCK(vp); 2038168962Spjd mutex_exit(&zp->z_lock); 2039168962Spjd zfs_znode_delete(zp, tx); 2040168962Spjd } else if (unlinked) { 2041219089Spjd mutex_exit(&zp->z_lock); 2042168404Spjd zfs_unlinked_add(zp, tx); 2043243268Savg#ifdef __FreeBSD__ 2044243268Savg vp->v_vflag |= VV_NOSYNC; 2045243268Savg#endif 2046168962Spjd } 2047168404Spjd 2048185029Spjd txtype = TX_REMOVE; 2049185029Spjd if (flags & FIGNORECASE) 2050185029Spjd txtype |= TX_CI; 2051219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 2052168404Spjd 2053168404Spjd dmu_tx_commit(tx); 2054168404Spjdout: 2055185029Spjd if (realnmp) 2056185029Spjd pn_free(realnmp); 2057185029Spjd 2058168404Spjd zfs_dirent_unlock(dl); 2059168404Spjd 2060219089Spjd if (!delete_now) 2061168962Spjd VN_RELE(vp); 2062219089Spjd if (xzp) 2063168962Spjd VN_RELE(ZTOV(xzp)); 2064168962Spjd 2065219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2066219089Spjd zil_commit(zilog, 0); 2067219089Spjd 2068168404Spjd ZFS_EXIT(zfsvfs); 2069168404Spjd return (error); 2070168404Spjd} 2071168404Spjd 2072168404Spjd/* 2073168404Spjd * Create a new directory and insert it into dvp using the name 2074168404Spjd * provided. Return a pointer to the inserted directory. 2075168404Spjd * 2076168404Spjd * IN: dvp - vnode of directory to add subdir to. 2077168404Spjd * dirname - name of new directory. 2078168404Spjd * vap - attributes of new directory. 2079168404Spjd * cr - credentials of caller. 2080185029Spjd * ct - caller context 2081251631Sdelphij * flags - case flags 2082185029Spjd * vsecp - ACL to be set 2083168404Spjd * 2084168404Spjd * OUT: vpp - vnode of created directory. 2085168404Spjd * 2086251631Sdelphij * RETURN: 0 on success, error code on failure. 2087168404Spjd * 2088168404Spjd * Timestamps: 2089168404Spjd * dvp - ctime|mtime updated 2090168404Spjd * vp - ctime|mtime|atime updated 2091168404Spjd */ 2092185029Spjd/*ARGSUSED*/ 2093168404Spjdstatic int 2094185029Spjdzfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 2095185029Spjd caller_context_t *ct, int flags, vsecattr_t *vsecp) 2096168404Spjd{ 2097168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 2098168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2099185029Spjd zilog_t *zilog; 2100168404Spjd zfs_dirlock_t *dl; 2101185029Spjd uint64_t txtype; 2102168404Spjd dmu_tx_t *tx; 2103168404Spjd int error; 2104185029Spjd int zf = ZNEW; 2105209962Smm ksid_t *ksid; 2106209962Smm uid_t uid; 2107209962Smm gid_t gid = crgetgid(cr); 2108219089Spjd zfs_acl_ids_t acl_ids; 2109209962Smm boolean_t fuid_dirtied; 2110258632Savg boolean_t waited = B_FALSE; 2111168404Spjd 2112168404Spjd ASSERT(vap->va_type == VDIR); 2113168404Spjd 2114185029Spjd /* 2115185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 2116185029Spjd * make sure file system is at proper version 2117185029Spjd */ 2118185029Spjd 2119209962Smm ksid = crgetsid(cr, KSID_OWNER); 2120209962Smm if (ksid) 2121209962Smm uid = ksid_getid(ksid); 2122209962Smm else 2123209962Smm uid = crgetuid(cr); 2124185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2125219089Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 2126219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2127249195Smm return (SET_ERROR(EINVAL)); 2128185029Spjd 2129168404Spjd ZFS_ENTER(zfsvfs); 2130185029Spjd ZFS_VERIFY_ZP(dzp); 2131185029Spjd zilog = zfsvfs->z_log; 2132168404Spjd 2133219089Spjd if (dzp->z_pflags & ZFS_XATTR) { 2134168404Spjd ZFS_EXIT(zfsvfs); 2135249195Smm return (SET_ERROR(EINVAL)); 2136168404Spjd } 2137168404Spjd 2138185029Spjd if (zfsvfs->z_utf8 && u8_validate(dirname, 2139185029Spjd strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2140185029Spjd ZFS_EXIT(zfsvfs); 2141249195Smm return (SET_ERROR(EILSEQ)); 2142185029Spjd } 2143185029Spjd if (flags & FIGNORECASE) 2144185029Spjd zf |= ZCILOOK; 2145185029Spjd 2146219089Spjd if (vap->va_mask & AT_XVATTR) { 2147197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2148185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 2149185029Spjd ZFS_EXIT(zfsvfs); 2150185029Spjd return (error); 2151185029Spjd } 2152219089Spjd } 2153185029Spjd 2154219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 2155219089Spjd vsecp, &acl_ids)) != 0) { 2156219089Spjd ZFS_EXIT(zfsvfs); 2157219089Spjd return (error); 2158219089Spjd } 2159260704Savg 2160260704Savg getnewvnode_reserve(1); 2161260704Savg 2162168404Spjd /* 2163168404Spjd * First make sure the new directory doesn't exist. 2164219089Spjd * 2165219089Spjd * Existence is checked first to make sure we don't return 2166219089Spjd * EACCES instead of EEXIST which can cause some applications 2167219089Spjd * to fail. 2168168404Spjd */ 2169185029Spjdtop: 2170185029Spjd *vpp = NULL; 2171185029Spjd 2172185029Spjd if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 2173185029Spjd NULL, NULL)) { 2174219089Spjd zfs_acl_ids_free(&acl_ids); 2175260704Savg getnewvnode_drop_reserve(); 2176168404Spjd ZFS_EXIT(zfsvfs); 2177168404Spjd return (error); 2178168404Spjd } 2179168404Spjd 2180185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 2181219089Spjd zfs_acl_ids_free(&acl_ids); 2182168404Spjd zfs_dirent_unlock(dl); 2183260704Savg getnewvnode_drop_reserve(); 2184168404Spjd ZFS_EXIT(zfsvfs); 2185168404Spjd return (error); 2186168404Spjd } 2187168404Spjd 2188209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2189211932Smm zfs_acl_ids_free(&acl_ids); 2190209962Smm zfs_dirent_unlock(dl); 2191260704Savg getnewvnode_drop_reserve(); 2192209962Smm ZFS_EXIT(zfsvfs); 2193249195Smm return (SET_ERROR(EDQUOT)); 2194209962Smm } 2195209962Smm 2196168404Spjd /* 2197168404Spjd * Add a new entry to the directory. 2198168404Spjd */ 2199168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2200168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 2201168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2202209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 2203209962Smm if (fuid_dirtied) 2204209962Smm zfs_fuid_txhold(zfsvfs, tx); 2205219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2206219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2207219089Spjd acl_ids.z_aclp->z_acl_bytes); 2208219089Spjd } 2209219089Spjd 2210219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2211219089Spjd ZFS_SA_BASE_ATTR_SIZE); 2212219089Spjd 2213258632Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 2214168404Spjd if (error) { 2215168404Spjd zfs_dirent_unlock(dl); 2216209962Smm if (error == ERESTART) { 2217258632Savg waited = B_TRUE; 2218168404Spjd dmu_tx_wait(tx); 2219168404Spjd dmu_tx_abort(tx); 2220168404Spjd goto top; 2221168404Spjd } 2222219089Spjd zfs_acl_ids_free(&acl_ids); 2223168404Spjd dmu_tx_abort(tx); 2224260704Savg getnewvnode_drop_reserve(); 2225168404Spjd ZFS_EXIT(zfsvfs); 2226168404Spjd return (error); 2227168404Spjd } 2228168404Spjd 2229168404Spjd /* 2230168404Spjd * Create new node. 2231168404Spjd */ 2232219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2233168404Spjd 2234209962Smm if (fuid_dirtied) 2235209962Smm zfs_fuid_sync(zfsvfs, tx); 2236219089Spjd 2237168404Spjd /* 2238168404Spjd * Now put new name in parent dir. 2239168404Spjd */ 2240168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 2241168404Spjd 2242168404Spjd *vpp = ZTOV(zp); 2243168404Spjd 2244185029Spjd txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 2245185029Spjd if (flags & FIGNORECASE) 2246185029Spjd txtype |= TX_CI; 2247209962Smm zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 2248209962Smm acl_ids.z_fuidp, vap); 2249185029Spjd 2250209962Smm zfs_acl_ids_free(&acl_ids); 2251219089Spjd 2252168404Spjd dmu_tx_commit(tx); 2253168404Spjd 2254260704Savg getnewvnode_drop_reserve(); 2255260704Savg 2256168404Spjd zfs_dirent_unlock(dl); 2257168404Spjd 2258219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2259219089Spjd zil_commit(zilog, 0); 2260219089Spjd 2261168404Spjd ZFS_EXIT(zfsvfs); 2262168404Spjd return (0); 2263168404Spjd} 2264168404Spjd 2265168404Spjd/* 2266168404Spjd * Remove a directory subdir entry. If the current working 2267168404Spjd * directory is the same as the subdir to be removed, the 2268168404Spjd * remove will fail. 2269168404Spjd * 2270168404Spjd * IN: dvp - vnode of directory to remove from. 2271168404Spjd * name - name of directory to be removed. 2272168404Spjd * cwd - vnode of current working directory. 2273168404Spjd * cr - credentials of caller. 2274185029Spjd * ct - caller context 2275185029Spjd * flags - case flags 2276168404Spjd * 2277251631Sdelphij * RETURN: 0 on success, error code on failure. 2278168404Spjd * 2279168404Spjd * Timestamps: 2280168404Spjd * dvp - ctime|mtime updated 2281168404Spjd */ 2282185029Spjd/*ARGSUSED*/ 2283168404Spjdstatic int 2284185029Spjdzfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 2285185029Spjd caller_context_t *ct, int flags) 2286168404Spjd{ 2287168404Spjd znode_t *dzp = VTOZ(dvp); 2288168404Spjd znode_t *zp; 2289168404Spjd vnode_t *vp; 2290168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2291185029Spjd zilog_t *zilog; 2292168404Spjd zfs_dirlock_t *dl; 2293168404Spjd dmu_tx_t *tx; 2294168404Spjd int error; 2295185029Spjd int zflg = ZEXISTS; 2296258632Savg boolean_t waited = B_FALSE; 2297168404Spjd 2298168962Spjd ZFS_ENTER(zfsvfs); 2299185029Spjd ZFS_VERIFY_ZP(dzp); 2300185029Spjd zilog = zfsvfs->z_log; 2301168404Spjd 2302185029Spjd if (flags & FIGNORECASE) 2303185029Spjd zflg |= ZCILOOK; 2304168404Spjdtop: 2305168404Spjd zp = NULL; 2306168404Spjd 2307168404Spjd /* 2308168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 2309168404Spjd */ 2310185029Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 2311185029Spjd NULL, NULL)) { 2312168404Spjd ZFS_EXIT(zfsvfs); 2313168404Spjd return (error); 2314168404Spjd } 2315168404Spjd 2316168404Spjd vp = ZTOV(zp); 2317168404Spjd 2318168404Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2319168404Spjd goto out; 2320168404Spjd } 2321168404Spjd 2322168962Spjd if (vp->v_type != VDIR) { 2323249195Smm error = SET_ERROR(ENOTDIR); 2324168962Spjd goto out; 2325168962Spjd } 2326168962Spjd 2327168962Spjd if (vp == cwd) { 2328249195Smm error = SET_ERROR(EINVAL); 2329168962Spjd goto out; 2330168962Spjd } 2331168962Spjd 2332185029Spjd vnevent_rmdir(vp, dvp, name, ct); 2333168962Spjd 2334168404Spjd /* 2335168404Spjd * Grab a lock on the directory to make sure that noone is 2336168404Spjd * trying to add (or lookup) entries while we are removing it. 2337168404Spjd */ 2338168404Spjd rw_enter(&zp->z_name_lock, RW_WRITER); 2339168404Spjd 2340168404Spjd /* 2341168404Spjd * Grab a lock on the parent pointer to make sure we play well 2342168404Spjd * with the treewalk and directory rename code. 2343168404Spjd */ 2344168404Spjd rw_enter(&zp->z_parent_lock, RW_WRITER); 2345168404Spjd 2346168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2347168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2348219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2349168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2350219089Spjd zfs_sa_upgrade_txholds(tx, zp); 2351219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 2352258632Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 2353168404Spjd if (error) { 2354168404Spjd rw_exit(&zp->z_parent_lock); 2355168404Spjd rw_exit(&zp->z_name_lock); 2356168404Spjd zfs_dirent_unlock(dl); 2357168962Spjd VN_RELE(vp); 2358209962Smm if (error == ERESTART) { 2359258632Savg waited = B_TRUE; 2360168404Spjd dmu_tx_wait(tx); 2361168404Spjd dmu_tx_abort(tx); 2362168404Spjd goto top; 2363168404Spjd } 2364168404Spjd dmu_tx_abort(tx); 2365168404Spjd ZFS_EXIT(zfsvfs); 2366168404Spjd return (error); 2367168404Spjd } 2368168404Spjd 2369168404Spjd#ifdef FREEBSD_NAMECACHE 2370168404Spjd cache_purge(dvp); 2371168404Spjd#endif 2372168404Spjd 2373185029Spjd error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 2374168404Spjd 2375185029Spjd if (error == 0) { 2376185029Spjd uint64_t txtype = TX_RMDIR; 2377185029Spjd if (flags & FIGNORECASE) 2378185029Spjd txtype |= TX_CI; 2379219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2380185029Spjd } 2381168404Spjd 2382168404Spjd dmu_tx_commit(tx); 2383168404Spjd 2384168404Spjd rw_exit(&zp->z_parent_lock); 2385168404Spjd rw_exit(&zp->z_name_lock); 2386168404Spjd#ifdef FREEBSD_NAMECACHE 2387168404Spjd cache_purge(vp); 2388168404Spjd#endif 2389168404Spjdout: 2390168404Spjd zfs_dirent_unlock(dl); 2391168404Spjd 2392168962Spjd VN_RELE(vp); 2393168962Spjd 2394219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2395219089Spjd zil_commit(zilog, 0); 2396219089Spjd 2397168404Spjd ZFS_EXIT(zfsvfs); 2398168404Spjd return (error); 2399168404Spjd} 2400168404Spjd 2401168404Spjd/* 2402168404Spjd * Read as many directory entries as will fit into the provided 2403168404Spjd * buffer from the given directory cursor position (specified in 2404251631Sdelphij * the uio structure). 2405168404Spjd * 2406168404Spjd * IN: vp - vnode of directory to read. 2407168404Spjd * uio - structure supplying read location, range info, 2408168404Spjd * and return buffer. 2409168404Spjd * cr - credentials of caller. 2410185029Spjd * ct - caller context 2411185029Spjd * flags - case flags 2412168404Spjd * 2413168404Spjd * OUT: uio - updated offset and range, buffer filled. 2414168404Spjd * eofp - set to true if end-of-file detected. 2415168404Spjd * 2416251631Sdelphij * RETURN: 0 on success, error code on failure. 2417168404Spjd * 2418168404Spjd * Timestamps: 2419168404Spjd * vp - atime updated 2420168404Spjd * 2421168404Spjd * Note that the low 4 bits of the cookie returned by zap is always zero. 2422168404Spjd * This allows us to use the low range for "special" directory entries: 2423168404Spjd * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2424168404Spjd * we use the offset 2 for the '.zfs' directory. 2425168404Spjd */ 2426168404Spjd/* ARGSUSED */ 2427168404Spjdstatic int 2428168962Spjdzfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) 2429168404Spjd{ 2430168404Spjd znode_t *zp = VTOZ(vp); 2431168404Spjd iovec_t *iovp; 2432185029Spjd edirent_t *eodp; 2433168404Spjd dirent64_t *odp; 2434168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2435168404Spjd objset_t *os; 2436168404Spjd caddr_t outbuf; 2437168404Spjd size_t bufsize; 2438168404Spjd zap_cursor_t zc; 2439168404Spjd zap_attribute_t zap; 2440168404Spjd uint_t bytes_wanted; 2441168404Spjd uint64_t offset; /* must be unsigned; checks for < 1 */ 2442219089Spjd uint64_t parent; 2443168404Spjd int local_eof; 2444168404Spjd int outcount; 2445168404Spjd int error; 2446168404Spjd uint8_t prefetch; 2447185029Spjd boolean_t check_sysattrs; 2448168404Spjd uint8_t type; 2449168962Spjd int ncooks; 2450168962Spjd u_long *cooks = NULL; 2451185029Spjd int flags = 0; 2452168404Spjd 2453168404Spjd ZFS_ENTER(zfsvfs); 2454185029Spjd ZFS_VERIFY_ZP(zp); 2455168404Spjd 2456219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 2457219089Spjd &parent, sizeof (parent))) != 0) { 2458219089Spjd ZFS_EXIT(zfsvfs); 2459219089Spjd return (error); 2460219089Spjd } 2461219089Spjd 2462168404Spjd /* 2463168404Spjd * If we are not given an eof variable, 2464168404Spjd * use a local one. 2465168404Spjd */ 2466168404Spjd if (eofp == NULL) 2467168404Spjd eofp = &local_eof; 2468168404Spjd 2469168404Spjd /* 2470168404Spjd * Check for valid iov_len. 2471168404Spjd */ 2472168404Spjd if (uio->uio_iov->iov_len <= 0) { 2473168404Spjd ZFS_EXIT(zfsvfs); 2474249195Smm return (SET_ERROR(EINVAL)); 2475168404Spjd } 2476168404Spjd 2477168404Spjd /* 2478168404Spjd * Quit if directory has been removed (posix) 2479168404Spjd */ 2480168404Spjd if ((*eofp = zp->z_unlinked) != 0) { 2481168404Spjd ZFS_EXIT(zfsvfs); 2482168404Spjd return (0); 2483168404Spjd } 2484168404Spjd 2485168404Spjd error = 0; 2486168404Spjd os = zfsvfs->z_os; 2487168404Spjd offset = uio->uio_loffset; 2488168404Spjd prefetch = zp->z_zn_prefetch; 2489168404Spjd 2490168404Spjd /* 2491168404Spjd * Initialize the iterator cursor. 2492168404Spjd */ 2493168404Spjd if (offset <= 3) { 2494168404Spjd /* 2495168404Spjd * Start iteration from the beginning of the directory. 2496168404Spjd */ 2497168404Spjd zap_cursor_init(&zc, os, zp->z_id); 2498168404Spjd } else { 2499168404Spjd /* 2500168404Spjd * The offset is a serialized cursor. 2501168404Spjd */ 2502168404Spjd zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2503168404Spjd } 2504168404Spjd 2505168404Spjd /* 2506168404Spjd * Get space to change directory entries into fs independent format. 2507168404Spjd */ 2508168404Spjd iovp = uio->uio_iov; 2509168404Spjd bytes_wanted = iovp->iov_len; 2510168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2511168404Spjd bufsize = bytes_wanted; 2512168404Spjd outbuf = kmem_alloc(bufsize, KM_SLEEP); 2513168404Spjd odp = (struct dirent64 *)outbuf; 2514168404Spjd } else { 2515168404Spjd bufsize = bytes_wanted; 2516247187Smm outbuf = NULL; 2517168404Spjd odp = (struct dirent64 *)iovp->iov_base; 2518168404Spjd } 2519185029Spjd eodp = (struct edirent *)odp; 2520168404Spjd 2521169170Spjd if (ncookies != NULL) { 2522168404Spjd /* 2523168404Spjd * Minimum entry size is dirent size and 1 byte for a file name. 2524168404Spjd */ 2525168962Spjd ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2526219404Spjd cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2527219404Spjd *cookies = cooks; 2528168962Spjd *ncookies = ncooks; 2529168404Spjd } 2530185029Spjd /* 2531185029Spjd * If this VFS supports the system attribute view interface; and 2532185029Spjd * we're looking at an extended attribute directory; and we care 2533185029Spjd * about normalization conflicts on this vfs; then we must check 2534185029Spjd * for normalization conflicts with the sysattr name space. 2535185029Spjd */ 2536185029Spjd#ifdef TODO 2537185029Spjd check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2538185029Spjd (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2539185029Spjd (flags & V_RDDIR_ENTFLAGS); 2540185029Spjd#else 2541185029Spjd check_sysattrs = 0; 2542185029Spjd#endif 2543168404Spjd 2544168404Spjd /* 2545168404Spjd * Transform to file-system independent format 2546168404Spjd */ 2547168404Spjd outcount = 0; 2548168404Spjd while (outcount < bytes_wanted) { 2549168404Spjd ino64_t objnum; 2550168404Spjd ushort_t reclen; 2551219089Spjd off64_t *next = NULL; 2552168404Spjd 2553168404Spjd /* 2554168404Spjd * Special case `.', `..', and `.zfs'. 2555168404Spjd */ 2556168404Spjd if (offset == 0) { 2557168404Spjd (void) strcpy(zap.za_name, "."); 2558185029Spjd zap.za_normalization_conflict = 0; 2559168404Spjd objnum = zp->z_id; 2560169108Spjd type = DT_DIR; 2561168404Spjd } else if (offset == 1) { 2562168404Spjd (void) strcpy(zap.za_name, ".."); 2563185029Spjd zap.za_normalization_conflict = 0; 2564219089Spjd objnum = parent; 2565169108Spjd type = DT_DIR; 2566168404Spjd } else if (offset == 2 && zfs_show_ctldir(zp)) { 2567168404Spjd (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2568185029Spjd zap.za_normalization_conflict = 0; 2569168404Spjd objnum = ZFSCTL_INO_ROOT; 2570169108Spjd type = DT_DIR; 2571168404Spjd } else { 2572168404Spjd /* 2573168404Spjd * Grab next entry. 2574168404Spjd */ 2575168404Spjd if (error = zap_cursor_retrieve(&zc, &zap)) { 2576168404Spjd if ((*eofp = (error == ENOENT)) != 0) 2577168404Spjd break; 2578168404Spjd else 2579168404Spjd goto update; 2580168404Spjd } 2581168404Spjd 2582168404Spjd if (zap.za_integer_length != 8 || 2583168404Spjd zap.za_num_integers != 1) { 2584168404Spjd cmn_err(CE_WARN, "zap_readdir: bad directory " 2585168404Spjd "entry, obj = %lld, offset = %lld\n", 2586168404Spjd (u_longlong_t)zp->z_id, 2587168404Spjd (u_longlong_t)offset); 2588249195Smm error = SET_ERROR(ENXIO); 2589168404Spjd goto update; 2590168404Spjd } 2591168404Spjd 2592168404Spjd objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2593168404Spjd /* 2594168404Spjd * MacOS X can extract the object type here such as: 2595168404Spjd * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2596168404Spjd */ 2597168404Spjd type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2598185029Spjd 2599185029Spjd if (check_sysattrs && !zap.za_normalization_conflict) { 2600185029Spjd#ifdef TODO 2601185029Spjd zap.za_normalization_conflict = 2602185029Spjd xattr_sysattr_casechk(zap.za_name); 2603185029Spjd#else 2604185029Spjd panic("%s:%u: TODO", __func__, __LINE__); 2605185029Spjd#endif 2606185029Spjd } 2607168404Spjd } 2608168404Spjd 2609211932Smm if (flags & V_RDDIR_ACCFILTER) { 2610211932Smm /* 2611211932Smm * If we have no access at all, don't include 2612211932Smm * this entry in the returned information 2613211932Smm */ 2614211932Smm znode_t *ezp; 2615211932Smm if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2616211932Smm goto skip_entry; 2617211932Smm if (!zfs_has_access(ezp, cr)) { 2618211932Smm VN_RELE(ZTOV(ezp)); 2619211932Smm goto skip_entry; 2620211932Smm } 2621211932Smm VN_RELE(ZTOV(ezp)); 2622211932Smm } 2623211932Smm 2624185029Spjd if (flags & V_RDDIR_ENTFLAGS) 2625185029Spjd reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2626185029Spjd else 2627185029Spjd reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2628185029Spjd 2629168404Spjd /* 2630168404Spjd * Will this entry fit in the buffer? 2631168404Spjd */ 2632168404Spjd if (outcount + reclen > bufsize) { 2633168404Spjd /* 2634168404Spjd * Did we manage to fit anything in the buffer? 2635168404Spjd */ 2636168404Spjd if (!outcount) { 2637249195Smm error = SET_ERROR(EINVAL); 2638168404Spjd goto update; 2639168404Spjd } 2640168404Spjd break; 2641168404Spjd } 2642185029Spjd if (flags & V_RDDIR_ENTFLAGS) { 2643185029Spjd /* 2644185029Spjd * Add extended flag entry: 2645185029Spjd */ 2646185029Spjd eodp->ed_ino = objnum; 2647185029Spjd eodp->ed_reclen = reclen; 2648185029Spjd /* NOTE: ed_off is the offset for the *next* entry */ 2649185029Spjd next = &(eodp->ed_off); 2650185029Spjd eodp->ed_eflags = zap.za_normalization_conflict ? 2651185029Spjd ED_CASE_CONFLICT : 0; 2652185029Spjd (void) strncpy(eodp->ed_name, zap.za_name, 2653185029Spjd EDIRENT_NAMELEN(reclen)); 2654185029Spjd eodp = (edirent_t *)((intptr_t)eodp + reclen); 2655185029Spjd } else { 2656185029Spjd /* 2657185029Spjd * Add normal entry: 2658185029Spjd */ 2659185029Spjd odp->d_ino = objnum; 2660185029Spjd odp->d_reclen = reclen; 2661185029Spjd odp->d_namlen = strlen(zap.za_name); 2662185029Spjd (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2663185029Spjd odp->d_type = type; 2664185029Spjd odp = (dirent64_t *)((intptr_t)odp + reclen); 2665185029Spjd } 2666168404Spjd outcount += reclen; 2667168404Spjd 2668168404Spjd ASSERT(outcount <= bufsize); 2669168404Spjd 2670168404Spjd /* Prefetch znode */ 2671168404Spjd if (prefetch) 2672168404Spjd dmu_prefetch(os, objnum, 0, 0); 2673168404Spjd 2674211932Smm skip_entry: 2675168404Spjd /* 2676168404Spjd * Move to the next entry, fill in the previous offset. 2677168404Spjd */ 2678168404Spjd if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2679168404Spjd zap_cursor_advance(&zc); 2680168404Spjd offset = zap_cursor_serialize(&zc); 2681168404Spjd } else { 2682168404Spjd offset += 1; 2683168404Spjd } 2684219404Spjd 2685219404Spjd if (cooks != NULL) { 2686219404Spjd *cooks++ = offset; 2687219404Spjd ncooks--; 2688219404Spjd KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2689219404Spjd } 2690168404Spjd } 2691168404Spjd zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2692168404Spjd 2693168404Spjd /* Subtract unused cookies */ 2694168962Spjd if (ncookies != NULL) 2695168962Spjd *ncookies -= ncooks; 2696168404Spjd 2697168404Spjd if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2698168404Spjd iovp->iov_base += outcount; 2699168404Spjd iovp->iov_len -= outcount; 2700168404Spjd uio->uio_resid -= outcount; 2701168404Spjd } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2702168404Spjd /* 2703168404Spjd * Reset the pointer. 2704168404Spjd */ 2705168404Spjd offset = uio->uio_loffset; 2706168404Spjd } 2707168404Spjd 2708168404Spjdupdate: 2709168404Spjd zap_cursor_fini(&zc); 2710168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2711168404Spjd kmem_free(outbuf, bufsize); 2712168404Spjd 2713168404Spjd if (error == ENOENT) 2714168404Spjd error = 0; 2715168404Spjd 2716168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2717168404Spjd 2718168404Spjd uio->uio_loffset = offset; 2719168404Spjd ZFS_EXIT(zfsvfs); 2720169107Spjd if (error != 0 && cookies != NULL) { 2721168962Spjd free(*cookies, M_TEMP); 2722168962Spjd *cookies = NULL; 2723168962Spjd *ncookies = 0; 2724168404Spjd } 2725168404Spjd return (error); 2726168404Spjd} 2727168404Spjd 2728185029Spjdulong_t zfs_fsync_sync_cnt = 4; 2729185029Spjd 2730168404Spjdstatic int 2731185029Spjdzfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2732168404Spjd{ 2733168962Spjd znode_t *zp = VTOZ(vp); 2734168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2735168404Spjd 2736185029Spjd (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2737185029Spjd 2738219089Spjd if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 2739219089Spjd ZFS_ENTER(zfsvfs); 2740219089Spjd ZFS_VERIFY_ZP(zp); 2741219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 2742219089Spjd ZFS_EXIT(zfsvfs); 2743219089Spjd } 2744168404Spjd return (0); 2745168404Spjd} 2746168404Spjd 2747185029Spjd 2748168404Spjd/* 2749168404Spjd * Get the requested file attributes and place them in the provided 2750168404Spjd * vattr structure. 2751168404Spjd * 2752168404Spjd * IN: vp - vnode of file. 2753168404Spjd * vap - va_mask identifies requested attributes. 2754185029Spjd * If AT_XVATTR set, then optional attrs are requested 2755185029Spjd * flags - ATTR_NOACLCHECK (CIFS server context) 2756168404Spjd * cr - credentials of caller. 2757185029Spjd * ct - caller context 2758168404Spjd * 2759168404Spjd * OUT: vap - attribute values. 2760168404Spjd * 2761251631Sdelphij * RETURN: 0 (always succeeds). 2762168404Spjd */ 2763168404Spjd/* ARGSUSED */ 2764168404Spjdstatic int 2765185029Spjdzfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2766185029Spjd caller_context_t *ct) 2767168404Spjd{ 2768168962Spjd znode_t *zp = VTOZ(vp); 2769168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2770185029Spjd int error = 0; 2771168962Spjd uint32_t blksize; 2772168962Spjd u_longlong_t nblocks; 2773185029Spjd uint64_t links; 2774224251Sdelphij uint64_t mtime[2], ctime[2], crtime[2], rdev; 2775185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2776185029Spjd xoptattr_t *xoap = NULL; 2777185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2778224251Sdelphij sa_bulk_attr_t bulk[4]; 2779219089Spjd int count = 0; 2780168404Spjd 2781168404Spjd ZFS_ENTER(zfsvfs); 2782185029Spjd ZFS_VERIFY_ZP(zp); 2783168404Spjd 2784219089Spjd zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2785219089Spjd 2786219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 2787219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 2788243807Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 2789224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2790224251Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 2791224251Sdelphij &rdev, 8); 2792219089Spjd 2793219089Spjd if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 2794219089Spjd ZFS_EXIT(zfsvfs); 2795219089Spjd return (error); 2796219089Spjd } 2797219089Spjd 2798168404Spjd /* 2799185029Spjd * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2800185029Spjd * Also, if we are the owner don't bother, since owner should 2801185029Spjd * always be allowed to read basic attributes of file. 2802185029Spjd */ 2803219089Spjd if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2804219089Spjd (vap->va_uid != crgetuid(cr))) { 2805185029Spjd if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2806185029Spjd skipaclchk, cr)) { 2807185029Spjd ZFS_EXIT(zfsvfs); 2808185029Spjd return (error); 2809185029Spjd } 2810185029Spjd } 2811185029Spjd 2812185029Spjd /* 2813168404Spjd * Return all attributes. It's cheaper to provide the answer 2814168404Spjd * than to determine whether we were asked the question. 2815168404Spjd */ 2816168404Spjd 2817209097Smm mutex_enter(&zp->z_lock); 2818219089Spjd vap->va_type = IFTOVT(zp->z_mode); 2819219089Spjd vap->va_mode = zp->z_mode & ~S_IFMT; 2820224252Sdelphij#ifdef sun 2821224252Sdelphij vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2822224252Sdelphij#else 2823224252Sdelphij vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 2824224252Sdelphij#endif 2825168404Spjd vap->va_nodeid = zp->z_id; 2826185029Spjd if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2827219089Spjd links = zp->z_links + 1; 2828185029Spjd else 2829219089Spjd links = zp->z_links; 2830229425Sdim vap->va_nlink = MIN(links, LINK_MAX); /* nlink_t limit! */ 2831219089Spjd vap->va_size = zp->z_size; 2832224252Sdelphij#ifdef sun 2833224252Sdelphij vap->va_rdev = vp->v_rdev; 2834224252Sdelphij#else 2835224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2836224251Sdelphij vap->va_rdev = zfs_cmpldev(rdev); 2837224252Sdelphij#endif 2838168404Spjd vap->va_seq = zp->z_seq; 2839168404Spjd vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2840272467Saraujo vap->va_filerev = zp->z_seq; 2841168404Spjd 2842185029Spjd /* 2843185029Spjd * Add in any requested optional attributes and the create time. 2844185029Spjd * Also set the corresponding bits in the returned attribute bitmap. 2845185029Spjd */ 2846185029Spjd if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2847185029Spjd if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2848185029Spjd xoap->xoa_archive = 2849219089Spjd ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2850185029Spjd XVA_SET_RTN(xvap, XAT_ARCHIVE); 2851185029Spjd } 2852185029Spjd 2853185029Spjd if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2854185029Spjd xoap->xoa_readonly = 2855219089Spjd ((zp->z_pflags & ZFS_READONLY) != 0); 2856185029Spjd XVA_SET_RTN(xvap, XAT_READONLY); 2857185029Spjd } 2858185029Spjd 2859185029Spjd if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2860185029Spjd xoap->xoa_system = 2861219089Spjd ((zp->z_pflags & ZFS_SYSTEM) != 0); 2862185029Spjd XVA_SET_RTN(xvap, XAT_SYSTEM); 2863185029Spjd } 2864185029Spjd 2865185029Spjd if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2866185029Spjd xoap->xoa_hidden = 2867219089Spjd ((zp->z_pflags & ZFS_HIDDEN) != 0); 2868185029Spjd XVA_SET_RTN(xvap, XAT_HIDDEN); 2869185029Spjd } 2870185029Spjd 2871185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2872185029Spjd xoap->xoa_nounlink = 2873219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2874185029Spjd XVA_SET_RTN(xvap, XAT_NOUNLINK); 2875185029Spjd } 2876185029Spjd 2877185029Spjd if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2878185029Spjd xoap->xoa_immutable = 2879219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2880185029Spjd XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2881185029Spjd } 2882185029Spjd 2883185029Spjd if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2884185029Spjd xoap->xoa_appendonly = 2885219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2886185029Spjd XVA_SET_RTN(xvap, XAT_APPENDONLY); 2887185029Spjd } 2888185029Spjd 2889185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2890185029Spjd xoap->xoa_nodump = 2891219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0); 2892185029Spjd XVA_SET_RTN(xvap, XAT_NODUMP); 2893185029Spjd } 2894185029Spjd 2895185029Spjd if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2896185029Spjd xoap->xoa_opaque = 2897219089Spjd ((zp->z_pflags & ZFS_OPAQUE) != 0); 2898185029Spjd XVA_SET_RTN(xvap, XAT_OPAQUE); 2899185029Spjd } 2900185029Spjd 2901185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2902185029Spjd xoap->xoa_av_quarantined = 2903219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2904185029Spjd XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2905185029Spjd } 2906185029Spjd 2907185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2908185029Spjd xoap->xoa_av_modified = 2909219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2910185029Spjd XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2911185029Spjd } 2912185029Spjd 2913185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2914219089Spjd vp->v_type == VREG) { 2915219089Spjd zfs_sa_get_scanstamp(zp, xvap); 2916185029Spjd } 2917185029Spjd 2918185029Spjd if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 2919219089Spjd uint64_t times[2]; 2920219089Spjd 2921219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 2922219089Spjd times, sizeof (times)); 2923219089Spjd ZFS_TIME_DECODE(&xoap->xoa_createtime, times); 2924185029Spjd XVA_SET_RTN(xvap, XAT_CREATETIME); 2925185029Spjd } 2926219089Spjd 2927219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2928219089Spjd xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 2929219089Spjd XVA_SET_RTN(xvap, XAT_REPARSE); 2930219089Spjd } 2931219089Spjd if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 2932219089Spjd xoap->xoa_generation = zp->z_gen; 2933219089Spjd XVA_SET_RTN(xvap, XAT_GEN); 2934219089Spjd } 2935219089Spjd 2936219089Spjd if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2937219089Spjd xoap->xoa_offline = 2938219089Spjd ((zp->z_pflags & ZFS_OFFLINE) != 0); 2939219089Spjd XVA_SET_RTN(xvap, XAT_OFFLINE); 2940219089Spjd } 2941219089Spjd 2942219089Spjd if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2943219089Spjd xoap->xoa_sparse = 2944219089Spjd ((zp->z_pflags & ZFS_SPARSE) != 0); 2945219089Spjd XVA_SET_RTN(xvap, XAT_SPARSE); 2946219089Spjd } 2947185029Spjd } 2948185029Spjd 2949219089Spjd ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 2950219089Spjd ZFS_TIME_DECODE(&vap->va_mtime, mtime); 2951219089Spjd ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2952219089Spjd ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 2953168404Spjd 2954168404Spjd mutex_exit(&zp->z_lock); 2955168404Spjd 2956219089Spjd sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 2957168404Spjd vap->va_blksize = blksize; 2958168404Spjd vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2959168404Spjd 2960168404Spjd if (zp->z_blksz == 0) { 2961168404Spjd /* 2962168404Spjd * Block size hasn't been set; suggest maximal I/O transfers. 2963168404Spjd */ 2964168404Spjd vap->va_blksize = zfsvfs->z_max_blksz; 2965168404Spjd } 2966168404Spjd 2967168404Spjd ZFS_EXIT(zfsvfs); 2968168404Spjd return (0); 2969168404Spjd} 2970168404Spjd 2971168404Spjd/* 2972168404Spjd * Set the file attributes to the values contained in the 2973168404Spjd * vattr structure. 2974168404Spjd * 2975168404Spjd * IN: vp - vnode of file to be modified. 2976168404Spjd * vap - new attribute values. 2977185029Spjd * If AT_XVATTR set, then optional attrs are being set 2978168404Spjd * flags - ATTR_UTIME set if non-default time values provided. 2979185029Spjd * - ATTR_NOACLCHECK (CIFS context only). 2980168404Spjd * cr - credentials of caller. 2981185029Spjd * ct - caller context 2982168404Spjd * 2983251631Sdelphij * RETURN: 0 on success, error code on failure. 2984168404Spjd * 2985168404Spjd * Timestamps: 2986168404Spjd * vp - ctime updated, mtime updated if size changed. 2987168404Spjd */ 2988168404Spjd/* ARGSUSED */ 2989168404Spjdstatic int 2990168962Spjdzfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2991251631Sdelphij caller_context_t *ct) 2992168404Spjd{ 2993185029Spjd znode_t *zp = VTOZ(vp); 2994168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2995185029Spjd zilog_t *zilog; 2996168404Spjd dmu_tx_t *tx; 2997168404Spjd vattr_t oldva; 2998209962Smm xvattr_t tmpxvattr; 2999168962Spjd uint_t mask = vap->va_mask; 3000247187Smm uint_t saved_mask = 0; 3001197831Spjd uint64_t saved_mode; 3002168404Spjd int trim_mask = 0; 3003168404Spjd uint64_t new_mode; 3004209962Smm uint64_t new_uid, new_gid; 3005219089Spjd uint64_t xattr_obj; 3006219089Spjd uint64_t mtime[2], ctime[2]; 3007168404Spjd znode_t *attrzp; 3008168404Spjd int need_policy = FALSE; 3009219089Spjd int err, err2; 3010185029Spjd zfs_fuid_info_t *fuidp = NULL; 3011185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 3012185029Spjd xoptattr_t *xoap; 3013219089Spjd zfs_acl_t *aclp; 3014185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 3015219089Spjd boolean_t fuid_dirtied = B_FALSE; 3016219089Spjd sa_bulk_attr_t bulk[7], xattr_bulk[7]; 3017219089Spjd int count = 0, xattr_count = 0; 3018168404Spjd 3019168404Spjd if (mask == 0) 3020168404Spjd return (0); 3021168404Spjd 3022168962Spjd if (mask & AT_NOSET) 3023249195Smm return (SET_ERROR(EINVAL)); 3024168962Spjd 3025185029Spjd ZFS_ENTER(zfsvfs); 3026185029Spjd ZFS_VERIFY_ZP(zp); 3027185029Spjd 3028185029Spjd zilog = zfsvfs->z_log; 3029185029Spjd 3030185029Spjd /* 3031185029Spjd * Make sure that if we have ephemeral uid/gid or xvattr specified 3032185029Spjd * that file system is at proper version level 3033185029Spjd */ 3034185029Spjd 3035185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 3036185029Spjd (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 3037185029Spjd ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 3038185029Spjd (mask & AT_XVATTR))) { 3039185029Spjd ZFS_EXIT(zfsvfs); 3040249195Smm return (SET_ERROR(EINVAL)); 3041185029Spjd } 3042185029Spjd 3043185029Spjd if (mask & AT_SIZE && vp->v_type == VDIR) { 3044185029Spjd ZFS_EXIT(zfsvfs); 3045249195Smm return (SET_ERROR(EISDIR)); 3046185029Spjd } 3047168404Spjd 3048185029Spjd if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 3049185029Spjd ZFS_EXIT(zfsvfs); 3050249195Smm return (SET_ERROR(EINVAL)); 3051185029Spjd } 3052168404Spjd 3053185029Spjd /* 3054185029Spjd * If this is an xvattr_t, then get a pointer to the structure of 3055185029Spjd * optional attributes. If this is NULL, then we have a vattr_t. 3056185029Spjd */ 3057185029Spjd xoap = xva_getxoptattr(xvap); 3058168404Spjd 3059209962Smm xva_init(&tmpxvattr); 3060209962Smm 3061185029Spjd /* 3062185029Spjd * Immutable files can only alter immutable bit and atime 3063185029Spjd */ 3064219089Spjd if ((zp->z_pflags & ZFS_IMMUTABLE) && 3065185029Spjd ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 3066185029Spjd ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 3067185029Spjd ZFS_EXIT(zfsvfs); 3068249195Smm return (SET_ERROR(EPERM)); 3069185029Spjd } 3070185029Spjd 3071219089Spjd if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { 3072185029Spjd ZFS_EXIT(zfsvfs); 3073249195Smm return (SET_ERROR(EPERM)); 3074185029Spjd } 3075185029Spjd 3076185029Spjd /* 3077185029Spjd * Verify timestamps doesn't overflow 32 bits. 3078185029Spjd * ZFS can handle large timestamps, but 32bit syscalls can't 3079185029Spjd * handle times greater than 2039. This check should be removed 3080185029Spjd * once large timestamps are fully supported. 3081185029Spjd */ 3082185029Spjd if (mask & (AT_ATIME | AT_MTIME)) { 3083185029Spjd if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 3084185029Spjd ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 3085185029Spjd ZFS_EXIT(zfsvfs); 3086249195Smm return (SET_ERROR(EOVERFLOW)); 3087185029Spjd } 3088185029Spjd } 3089185029Spjd 3090168404Spjdtop: 3091168404Spjd attrzp = NULL; 3092219089Spjd aclp = NULL; 3093168404Spjd 3094211932Smm /* Can this be moved to before the top label? */ 3095168404Spjd if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 3096168404Spjd ZFS_EXIT(zfsvfs); 3097249195Smm return (SET_ERROR(EROFS)); 3098168404Spjd } 3099168404Spjd 3100168404Spjd /* 3101168404Spjd * First validate permissions 3102168404Spjd */ 3103168404Spjd 3104168404Spjd if (mask & AT_SIZE) { 3105168404Spjd /* 3106168404Spjd * XXX - Note, we are not providing any open 3107168404Spjd * mode flags here (like FNDELAY), so we may 3108168404Spjd * block if there are locks present... this 3109168404Spjd * should be addressed in openat(). 3110168404Spjd */ 3111185029Spjd /* XXX - would it be OK to generate a log record here? */ 3112185029Spjd err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 3113168404Spjd if (err) { 3114168404Spjd ZFS_EXIT(zfsvfs); 3115168404Spjd return (err); 3116168404Spjd } 3117168404Spjd } 3118168404Spjd 3119185029Spjd if (mask & (AT_ATIME|AT_MTIME) || 3120185029Spjd ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 3121185029Spjd XVA_ISSET_REQ(xvap, XAT_READONLY) || 3122185029Spjd XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 3123219089Spjd XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 3124219089Spjd XVA_ISSET_REQ(xvap, XAT_SPARSE) || 3125185029Spjd XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 3126219089Spjd XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 3127185029Spjd need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 3128185029Spjd skipaclchk, cr); 3129219089Spjd } 3130168404Spjd 3131168404Spjd if (mask & (AT_UID|AT_GID)) { 3132168404Spjd int idmask = (mask & (AT_UID|AT_GID)); 3133168404Spjd int take_owner; 3134168404Spjd int take_group; 3135168404Spjd 3136168404Spjd /* 3137168404Spjd * NOTE: even if a new mode is being set, 3138168404Spjd * we may clear S_ISUID/S_ISGID bits. 3139168404Spjd */ 3140168404Spjd 3141168404Spjd if (!(mask & AT_MODE)) 3142219089Spjd vap->va_mode = zp->z_mode; 3143168404Spjd 3144168404Spjd /* 3145168404Spjd * Take ownership or chgrp to group we are a member of 3146168404Spjd */ 3147168404Spjd 3148168404Spjd take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 3149185029Spjd take_group = (mask & AT_GID) && 3150185029Spjd zfs_groupmember(zfsvfs, vap->va_gid, cr); 3151168404Spjd 3152168404Spjd /* 3153168404Spjd * If both AT_UID and AT_GID are set then take_owner and 3154168404Spjd * take_group must both be set in order to allow taking 3155168404Spjd * ownership. 3156168404Spjd * 3157168404Spjd * Otherwise, send the check through secpolicy_vnode_setattr() 3158168404Spjd * 3159168404Spjd */ 3160168404Spjd 3161168404Spjd if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 3162168404Spjd ((idmask == AT_UID) && take_owner) || 3163168404Spjd ((idmask == AT_GID) && take_group)) { 3164185029Spjd if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 3165185029Spjd skipaclchk, cr) == 0) { 3166168404Spjd /* 3167168404Spjd * Remove setuid/setgid for non-privileged users 3168168404Spjd */ 3169185029Spjd secpolicy_setid_clear(vap, vp, cr); 3170168404Spjd trim_mask = (mask & (AT_UID|AT_GID)); 3171168404Spjd } else { 3172168404Spjd need_policy = TRUE; 3173168404Spjd } 3174168404Spjd } else { 3175168404Spjd need_policy = TRUE; 3176168404Spjd } 3177168404Spjd } 3178168404Spjd 3179168404Spjd mutex_enter(&zp->z_lock); 3180219089Spjd oldva.va_mode = zp->z_mode; 3181185029Spjd zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 3182185029Spjd if (mask & AT_XVATTR) { 3183209962Smm /* 3184209962Smm * Update xvattr mask to include only those attributes 3185209962Smm * that are actually changing. 3186209962Smm * 3187209962Smm * the bits will be restored prior to actually setting 3188209962Smm * the attributes so the caller thinks they were set. 3189209962Smm */ 3190209962Smm if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3191209962Smm if (xoap->xoa_appendonly != 3192219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 3193209962Smm need_policy = TRUE; 3194209962Smm } else { 3195209962Smm XVA_CLR_REQ(xvap, XAT_APPENDONLY); 3196209962Smm XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 3197209962Smm } 3198209962Smm } 3199209962Smm 3200209962Smm if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3201209962Smm if (xoap->xoa_nounlink != 3202219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 3203209962Smm need_policy = TRUE; 3204209962Smm } else { 3205209962Smm XVA_CLR_REQ(xvap, XAT_NOUNLINK); 3206209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 3207209962Smm } 3208209962Smm } 3209209962Smm 3210209962Smm if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3211209962Smm if (xoap->xoa_immutable != 3212219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 3213209962Smm need_policy = TRUE; 3214209962Smm } else { 3215209962Smm XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 3216209962Smm XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 3217209962Smm } 3218209962Smm } 3219209962Smm 3220209962Smm if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3221209962Smm if (xoap->xoa_nodump != 3222219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0)) { 3223209962Smm need_policy = TRUE; 3224209962Smm } else { 3225209962Smm XVA_CLR_REQ(xvap, XAT_NODUMP); 3226209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 3227209962Smm } 3228209962Smm } 3229209962Smm 3230209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3231209962Smm if (xoap->xoa_av_modified != 3232219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 3233209962Smm need_policy = TRUE; 3234209962Smm } else { 3235209962Smm XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 3236209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 3237209962Smm } 3238209962Smm } 3239209962Smm 3240209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3241209962Smm if ((vp->v_type != VREG && 3242209962Smm xoap->xoa_av_quarantined) || 3243209962Smm xoap->xoa_av_quarantined != 3244219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 3245209962Smm need_policy = TRUE; 3246209962Smm } else { 3247209962Smm XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 3248209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 3249209962Smm } 3250209962Smm } 3251209962Smm 3252219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3253219089Spjd mutex_exit(&zp->z_lock); 3254219089Spjd ZFS_EXIT(zfsvfs); 3255249195Smm return (SET_ERROR(EPERM)); 3256219089Spjd } 3257219089Spjd 3258209962Smm if (need_policy == FALSE && 3259209962Smm (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3260209962Smm XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3261185029Spjd need_policy = TRUE; 3262185029Spjd } 3263185029Spjd } 3264185029Spjd 3265168404Spjd mutex_exit(&zp->z_lock); 3266168404Spjd 3267168404Spjd if (mask & AT_MODE) { 3268185029Spjd if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3269168962Spjd err = secpolicy_setid_setsticky_clear(vp, vap, 3270168962Spjd &oldva, cr); 3271168962Spjd if (err) { 3272168962Spjd ZFS_EXIT(zfsvfs); 3273168962Spjd return (err); 3274168962Spjd } 3275168404Spjd trim_mask |= AT_MODE; 3276168404Spjd } else { 3277168404Spjd need_policy = TRUE; 3278168404Spjd } 3279168404Spjd } 3280168404Spjd 3281168404Spjd if (need_policy) { 3282168404Spjd /* 3283168404Spjd * If trim_mask is set then take ownership 3284168404Spjd * has been granted or write_acl is present and user 3285168404Spjd * has the ability to modify mode. In that case remove 3286168404Spjd * UID|GID and or MODE from mask so that 3287168404Spjd * secpolicy_vnode_setattr() doesn't revoke it. 3288168404Spjd */ 3289168404Spjd 3290168404Spjd if (trim_mask) { 3291168404Spjd saved_mask = vap->va_mask; 3292168404Spjd vap->va_mask &= ~trim_mask; 3293197831Spjd if (trim_mask & AT_MODE) { 3294197831Spjd /* 3295197831Spjd * Save the mode, as secpolicy_vnode_setattr() 3296197831Spjd * will overwrite it with ova.va_mode. 3297197831Spjd */ 3298197831Spjd saved_mode = vap->va_mode; 3299197831Spjd } 3300168404Spjd } 3301168404Spjd err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3302185029Spjd (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3303168404Spjd if (err) { 3304168404Spjd ZFS_EXIT(zfsvfs); 3305168404Spjd return (err); 3306168404Spjd } 3307168404Spjd 3308197831Spjd if (trim_mask) { 3309168404Spjd vap->va_mask |= saved_mask; 3310197831Spjd if (trim_mask & AT_MODE) { 3311197831Spjd /* 3312197831Spjd * Recover the mode after 3313197831Spjd * secpolicy_vnode_setattr(). 3314197831Spjd */ 3315197831Spjd vap->va_mode = saved_mode; 3316197831Spjd } 3317197831Spjd } 3318168404Spjd } 3319168404Spjd 3320168404Spjd /* 3321168404Spjd * secpolicy_vnode_setattr, or take ownership may have 3322168404Spjd * changed va_mask 3323168404Spjd */ 3324168404Spjd mask = vap->va_mask; 3325168404Spjd 3326219089Spjd if ((mask & (AT_UID | AT_GID))) { 3327219089Spjd err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 3328219089Spjd &xattr_obj, sizeof (xattr_obj)); 3329168404Spjd 3330219089Spjd if (err == 0 && xattr_obj) { 3331219089Spjd err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 3332209962Smm if (err) 3333219089Spjd goto out2; 3334168404Spjd } 3335209962Smm if (mask & AT_UID) { 3336209962Smm new_uid = zfs_fuid_create(zfsvfs, 3337209962Smm (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3338219089Spjd if (new_uid != zp->z_uid && 3339219089Spjd zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 3340219089Spjd if (attrzp) 3341219089Spjd VN_RELE(ZTOV(attrzp)); 3342249195Smm err = SET_ERROR(EDQUOT); 3343219089Spjd goto out2; 3344209962Smm } 3345209962Smm } 3346209962Smm 3347209962Smm if (mask & AT_GID) { 3348209962Smm new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3349209962Smm cr, ZFS_GROUP, &fuidp); 3350219089Spjd if (new_gid != zp->z_gid && 3351219089Spjd zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 3352219089Spjd if (attrzp) 3353219089Spjd VN_RELE(ZTOV(attrzp)); 3354249195Smm err = SET_ERROR(EDQUOT); 3355219089Spjd goto out2; 3356209962Smm } 3357209962Smm } 3358219089Spjd } 3359219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 3360219089Spjd 3361219089Spjd if (mask & AT_MODE) { 3362219089Spjd uint64_t pmode = zp->z_mode; 3363219089Spjd uint64_t acl_obj; 3364219089Spjd new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3365219089Spjd 3366243560Smm if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 3367243560Smm !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3368249195Smm err = SET_ERROR(EPERM); 3369243560Smm goto out; 3370243560Smm } 3371243560Smm 3372224174Smm if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3373224174Smm goto out; 3374219089Spjd 3375219089Spjd mutex_enter(&zp->z_lock); 3376219089Spjd if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 3377219089Spjd /* 3378219089Spjd * Are we upgrading ACL from old V0 format 3379219089Spjd * to V1 format? 3380219089Spjd */ 3381219089Spjd if (zfsvfs->z_version >= ZPL_VERSION_FUID && 3382219089Spjd zfs_znode_acl_version(zp) == 3383219089Spjd ZFS_ACL_VERSION_INITIAL) { 3384219089Spjd dmu_tx_hold_free(tx, acl_obj, 0, 3385219089Spjd DMU_OBJECT_END); 3386219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3387219089Spjd 0, aclp->z_acl_bytes); 3388209962Smm } else { 3389219089Spjd dmu_tx_hold_write(tx, acl_obj, 0, 3390219089Spjd aclp->z_acl_bytes); 3391209962Smm } 3392219089Spjd } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3393219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3394219089Spjd 0, aclp->z_acl_bytes); 3395209962Smm } 3396219089Spjd mutex_exit(&zp->z_lock); 3397219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3398219089Spjd } else { 3399219089Spjd if ((mask & AT_XVATTR) && 3400219089Spjd XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3401219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3402219089Spjd else 3403219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3404168404Spjd } 3405168404Spjd 3406219089Spjd if (attrzp) { 3407219089Spjd dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3408219089Spjd } 3409219089Spjd 3410219089Spjd fuid_dirtied = zfsvfs->z_fuid_dirty; 3411219089Spjd if (fuid_dirtied) 3412219089Spjd zfs_fuid_txhold(zfsvfs, tx); 3413219089Spjd 3414219089Spjd zfs_sa_upgrade_txholds(tx, zp); 3415219089Spjd 3416258720Savg err = dmu_tx_assign(tx, TXG_WAIT); 3417258720Savg if (err) 3418209962Smm goto out; 3419168404Spjd 3420219089Spjd count = 0; 3421168404Spjd /* 3422168404Spjd * Set each attribute requested. 3423168404Spjd * We group settings according to the locks they need to acquire. 3424168404Spjd * 3425168404Spjd * Note: you cannot set ctime directly, although it will be 3426168404Spjd * updated as a side-effect of calling this function. 3427168404Spjd */ 3428168404Spjd 3429219089Spjd 3430219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3431219089Spjd mutex_enter(&zp->z_acl_lock); 3432168404Spjd mutex_enter(&zp->z_lock); 3433168404Spjd 3434219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3435219089Spjd &zp->z_pflags, sizeof (zp->z_pflags)); 3436219089Spjd 3437219089Spjd if (attrzp) { 3438219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3439219089Spjd mutex_enter(&attrzp->z_acl_lock); 3440219089Spjd mutex_enter(&attrzp->z_lock); 3441219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3442219089Spjd SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3443219089Spjd sizeof (attrzp->z_pflags)); 3444219089Spjd } 3445219089Spjd 3446219089Spjd if (mask & (AT_UID|AT_GID)) { 3447219089Spjd 3448219089Spjd if (mask & AT_UID) { 3449219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 3450219089Spjd &new_uid, sizeof (new_uid)); 3451219089Spjd zp->z_uid = new_uid; 3452219089Spjd if (attrzp) { 3453219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3454219089Spjd SA_ZPL_UID(zfsvfs), NULL, &new_uid, 3455219089Spjd sizeof (new_uid)); 3456219089Spjd attrzp->z_uid = new_uid; 3457219089Spjd } 3458219089Spjd } 3459219089Spjd 3460219089Spjd if (mask & AT_GID) { 3461219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 3462219089Spjd NULL, &new_gid, sizeof (new_gid)); 3463219089Spjd zp->z_gid = new_gid; 3464219089Spjd if (attrzp) { 3465219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3466219089Spjd SA_ZPL_GID(zfsvfs), NULL, &new_gid, 3467219089Spjd sizeof (new_gid)); 3468219089Spjd attrzp->z_gid = new_gid; 3469219089Spjd } 3470219089Spjd } 3471219089Spjd if (!(mask & AT_MODE)) { 3472219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 3473219089Spjd NULL, &new_mode, sizeof (new_mode)); 3474219089Spjd new_mode = zp->z_mode; 3475219089Spjd } 3476219089Spjd err = zfs_acl_chown_setattr(zp); 3477219089Spjd ASSERT(err == 0); 3478219089Spjd if (attrzp) { 3479219089Spjd err = zfs_acl_chown_setattr(attrzp); 3480219089Spjd ASSERT(err == 0); 3481219089Spjd } 3482219089Spjd } 3483219089Spjd 3484168404Spjd if (mask & AT_MODE) { 3485219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 3486219089Spjd &new_mode, sizeof (new_mode)); 3487219089Spjd zp->z_mode = new_mode; 3488219089Spjd ASSERT3U((uintptr_t)aclp, !=, 0); 3489209962Smm err = zfs_aclset_common(zp, aclp, cr, tx); 3490240415Smm ASSERT0(err); 3491219089Spjd if (zp->z_acl_cached) 3492219089Spjd zfs_acl_free(zp->z_acl_cached); 3493211932Smm zp->z_acl_cached = aclp; 3494211932Smm aclp = NULL; 3495168404Spjd } 3496168404Spjd 3497168404Spjd 3498219089Spjd if (mask & AT_ATIME) { 3499219089Spjd ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 3500219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 3501219089Spjd &zp->z_atime, sizeof (zp->z_atime)); 3502168404Spjd } 3503168404Spjd 3504219089Spjd if (mask & AT_MTIME) { 3505219089Spjd ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 3506219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 3507219089Spjd mtime, sizeof (mtime)); 3508168404Spjd } 3509168404Spjd 3510185029Spjd /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3511219089Spjd if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3512219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3513219089Spjd NULL, mtime, sizeof (mtime)); 3514219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3515219089Spjd &ctime, sizeof (ctime)); 3516219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 3517219089Spjd B_TRUE); 3518219089Spjd } else if (mask != 0) { 3519219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3520219089Spjd &ctime, sizeof (ctime)); 3521219089Spjd zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 3522219089Spjd B_TRUE); 3523219089Spjd if (attrzp) { 3524219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3525219089Spjd SA_ZPL_CTIME(zfsvfs), NULL, 3526219089Spjd &ctime, sizeof (ctime)); 3527219089Spjd zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 3528219089Spjd mtime, ctime, B_TRUE); 3529219089Spjd } 3530219089Spjd } 3531185029Spjd /* 3532185029Spjd * Do this after setting timestamps to prevent timestamp 3533185029Spjd * update from toggling bit 3534185029Spjd */ 3535168404Spjd 3536185029Spjd if (xoap && (mask & AT_XVATTR)) { 3537209962Smm 3538209962Smm /* 3539209962Smm * restore trimmed off masks 3540209962Smm * so that return masks can be set for caller. 3541209962Smm */ 3542209962Smm 3543209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3544209962Smm XVA_SET_REQ(xvap, XAT_APPENDONLY); 3545209962Smm } 3546209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3547209962Smm XVA_SET_REQ(xvap, XAT_NOUNLINK); 3548209962Smm } 3549209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3550209962Smm XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3551209962Smm } 3552209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3553209962Smm XVA_SET_REQ(xvap, XAT_NODUMP); 3554209962Smm } 3555209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3556209962Smm XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3557209962Smm } 3558209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3559209962Smm XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3560209962Smm } 3561209962Smm 3562219089Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3563185029Spjd ASSERT(vp->v_type == VREG); 3564185029Spjd 3565219089Spjd zfs_xvattr_set(zp, xvap, tx); 3566185029Spjd } 3567185029Spjd 3568209962Smm if (fuid_dirtied) 3569209962Smm zfs_fuid_sync(zfsvfs, tx); 3570209962Smm 3571168404Spjd if (mask != 0) 3572185029Spjd zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3573168404Spjd 3574168404Spjd mutex_exit(&zp->z_lock); 3575219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3576219089Spjd mutex_exit(&zp->z_acl_lock); 3577168404Spjd 3578219089Spjd if (attrzp) { 3579219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3580219089Spjd mutex_exit(&attrzp->z_acl_lock); 3581219089Spjd mutex_exit(&attrzp->z_lock); 3582219089Spjd } 3583209962Smmout: 3584219089Spjd if (err == 0 && attrzp) { 3585219089Spjd err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 3586219089Spjd xattr_count, tx); 3587219089Spjd ASSERT(err2 == 0); 3588219089Spjd } 3589219089Spjd 3590168404Spjd if (attrzp) 3591168404Spjd VN_RELE(ZTOV(attrzp)); 3592251631Sdelphij 3593211932Smm if (aclp) 3594209962Smm zfs_acl_free(aclp); 3595168404Spjd 3596209962Smm if (fuidp) { 3597209962Smm zfs_fuid_info_free(fuidp); 3598209962Smm fuidp = NULL; 3599209962Smm } 3600209962Smm 3601219089Spjd if (err) { 3602209962Smm dmu_tx_abort(tx); 3603219089Spjd if (err == ERESTART) 3604219089Spjd goto top; 3605219089Spjd } else { 3606219089Spjd err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 3607209962Smm dmu_tx_commit(tx); 3608219089Spjd } 3609209962Smm 3610219089Spjdout2: 3611219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3612219089Spjd zil_commit(zilog, 0); 3613209962Smm 3614168404Spjd ZFS_EXIT(zfsvfs); 3615168404Spjd return (err); 3616168404Spjd} 3617168404Spjd 3618168404Spjdtypedef struct zfs_zlock { 3619168404Spjd krwlock_t *zl_rwlock; /* lock we acquired */ 3620168404Spjd znode_t *zl_znode; /* znode we held */ 3621168404Spjd struct zfs_zlock *zl_next; /* next in list */ 3622168404Spjd} zfs_zlock_t; 3623168404Spjd 3624168404Spjd/* 3625168404Spjd * Drop locks and release vnodes that were held by zfs_rename_lock(). 3626168404Spjd */ 3627168404Spjdstatic void 3628168404Spjdzfs_rename_unlock(zfs_zlock_t **zlpp) 3629168404Spjd{ 3630168404Spjd zfs_zlock_t *zl; 3631168404Spjd 3632168404Spjd while ((zl = *zlpp) != NULL) { 3633168404Spjd if (zl->zl_znode != NULL) 3634168404Spjd VN_RELE(ZTOV(zl->zl_znode)); 3635168404Spjd rw_exit(zl->zl_rwlock); 3636168404Spjd *zlpp = zl->zl_next; 3637168404Spjd kmem_free(zl, sizeof (*zl)); 3638168404Spjd } 3639168404Spjd} 3640168404Spjd 3641168404Spjd/* 3642168404Spjd * Search back through the directory tree, using the ".." entries. 3643168404Spjd * Lock each directory in the chain to prevent concurrent renames. 3644168404Spjd * Fail any attempt to move a directory into one of its own descendants. 3645168404Spjd * XXX - z_parent_lock can overlap with map or grow locks 3646168404Spjd */ 3647168404Spjdstatic int 3648168404Spjdzfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 3649168404Spjd{ 3650168404Spjd zfs_zlock_t *zl; 3651168404Spjd znode_t *zp = tdzp; 3652168404Spjd uint64_t rootid = zp->z_zfsvfs->z_root; 3653219089Spjd uint64_t oidp = zp->z_id; 3654168404Spjd krwlock_t *rwlp = &szp->z_parent_lock; 3655168404Spjd krw_t rw = RW_WRITER; 3656168404Spjd 3657168404Spjd /* 3658168404Spjd * First pass write-locks szp and compares to zp->z_id. 3659168404Spjd * Later passes read-lock zp and compare to zp->z_parent. 3660168404Spjd */ 3661168404Spjd do { 3662168404Spjd if (!rw_tryenter(rwlp, rw)) { 3663168404Spjd /* 3664168404Spjd * Another thread is renaming in this path. 3665168404Spjd * Note that if we are a WRITER, we don't have any 3666168404Spjd * parent_locks held yet. 3667168404Spjd */ 3668168404Spjd if (rw == RW_READER && zp->z_id > szp->z_id) { 3669168404Spjd /* 3670168404Spjd * Drop our locks and restart 3671168404Spjd */ 3672168404Spjd zfs_rename_unlock(&zl); 3673168404Spjd *zlpp = NULL; 3674168404Spjd zp = tdzp; 3675219089Spjd oidp = zp->z_id; 3676168404Spjd rwlp = &szp->z_parent_lock; 3677168404Spjd rw = RW_WRITER; 3678168404Spjd continue; 3679168404Spjd } else { 3680168404Spjd /* 3681168404Spjd * Wait for other thread to drop its locks 3682168404Spjd */ 3683168404Spjd rw_enter(rwlp, rw); 3684168404Spjd } 3685168404Spjd } 3686168404Spjd 3687168404Spjd zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3688168404Spjd zl->zl_rwlock = rwlp; 3689168404Spjd zl->zl_znode = NULL; 3690168404Spjd zl->zl_next = *zlpp; 3691168404Spjd *zlpp = zl; 3692168404Spjd 3693219089Spjd if (oidp == szp->z_id) /* We're a descendant of szp */ 3694249195Smm return (SET_ERROR(EINVAL)); 3695168404Spjd 3696219089Spjd if (oidp == rootid) /* We've hit the top */ 3697168404Spjd return (0); 3698168404Spjd 3699168404Spjd if (rw == RW_READER) { /* i.e. not the first pass */ 3700219089Spjd int error = zfs_zget(zp->z_zfsvfs, oidp, &zp); 3701168404Spjd if (error) 3702168404Spjd return (error); 3703168404Spjd zl->zl_znode = zp; 3704168404Spjd } 3705219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), 3706219089Spjd &oidp, sizeof (oidp)); 3707168404Spjd rwlp = &zp->z_parent_lock; 3708168404Spjd rw = RW_READER; 3709168404Spjd 3710168404Spjd } while (zp->z_id != sdzp->z_id); 3711168404Spjd 3712168404Spjd return (0); 3713168404Spjd} 3714168404Spjd 3715168404Spjd/* 3716168404Spjd * Move an entry from the provided source directory to the target 3717168404Spjd * directory. Change the entry name as indicated. 3718168404Spjd * 3719168404Spjd * IN: sdvp - Source directory containing the "old entry". 3720168404Spjd * snm - Old entry name. 3721168404Spjd * tdvp - Target directory to contain the "new entry". 3722168404Spjd * tnm - New entry name. 3723168404Spjd * cr - credentials of caller. 3724185029Spjd * ct - caller context 3725185029Spjd * flags - case flags 3726168404Spjd * 3727251631Sdelphij * RETURN: 0 on success, error code on failure. 3728168404Spjd * 3729168404Spjd * Timestamps: 3730168404Spjd * sdvp,tdvp - ctime|mtime updated 3731168404Spjd */ 3732185029Spjd/*ARGSUSED*/ 3733168404Spjdstatic int 3734185029Spjdzfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3735185029Spjd caller_context_t *ct, int flags) 3736168404Spjd{ 3737264392Sdavide znode_t *tdzp, *sdzp, *szp, *tzp; 3738264392Sdavide zfsvfs_t *zfsvfs; 3739185029Spjd zilog_t *zilog; 3740168962Spjd vnode_t *realvp; 3741168404Spjd zfs_dirlock_t *sdl, *tdl; 3742168404Spjd dmu_tx_t *tx; 3743168404Spjd zfs_zlock_t *zl; 3744185029Spjd int cmp, serr, terr; 3745185029Spjd int error = 0; 3746185029Spjd int zflg = 0; 3747258632Savg boolean_t waited = B_FALSE; 3748168404Spjd 3749264392Sdavide tdzp = VTOZ(tdvp); 3750264392Sdavide ZFS_VERIFY_ZP(tdzp); 3751264392Sdavide zfsvfs = tdzp->z_zfsvfs; 3752168404Spjd ZFS_ENTER(zfsvfs); 3753185029Spjd zilog = zfsvfs->z_log; 3754264392Sdavide sdzp = VTOZ(sdvp); 3755168404Spjd 3756168962Spjd /* 3757264392Sdavide * In case sdzp is not valid, let's be sure to exit from the right 3758264392Sdavide * zfsvfs_t. 3759168962Spjd */ 3760264392Sdavide if (sdzp->z_sa_hdl == NULL) { 3761264392Sdavide ZFS_EXIT(zfsvfs); 3762264392Sdavide return (SET_ERROR(EIO)); 3763264392Sdavide } 3764168962Spjd 3765254585Sdelphij /* 3766254585Sdelphij * We check z_zfsvfs rather than v_vfsp here, because snapshots and the 3767254585Sdelphij * ctldir appear to have the same v_vfsp. 3768254585Sdelphij */ 3769264392Sdavide if (sdzp->z_zfsvfs != zfsvfs || zfsctl_is_node(tdvp)) { 3770168404Spjd ZFS_EXIT(zfsvfs); 3771249195Smm return (SET_ERROR(EXDEV)); 3772168404Spjd } 3773168404Spjd 3774185029Spjd if (zfsvfs->z_utf8 && u8_validate(tnm, 3775185029Spjd strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3776185029Spjd ZFS_EXIT(zfsvfs); 3777249195Smm return (SET_ERROR(EILSEQ)); 3778185029Spjd } 3779185029Spjd 3780185029Spjd if (flags & FIGNORECASE) 3781185029Spjd zflg |= ZCILOOK; 3782185029Spjd 3783168404Spjdtop: 3784168404Spjd szp = NULL; 3785168404Spjd tzp = NULL; 3786168404Spjd zl = NULL; 3787168404Spjd 3788168404Spjd /* 3789168404Spjd * This is to prevent the creation of links into attribute space 3790168404Spjd * by renaming a linked file into/outof an attribute directory. 3791168404Spjd * See the comment in zfs_link() for why this is considered bad. 3792168404Spjd */ 3793219089Spjd if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3794168962Spjd ZFS_EXIT(zfsvfs); 3795249195Smm return (SET_ERROR(EINVAL)); 3796168404Spjd } 3797168404Spjd 3798168404Spjd /* 3799168404Spjd * Lock source and target directory entries. To prevent deadlock, 3800168404Spjd * a lock ordering must be defined. We lock the directory with 3801168404Spjd * the smallest object id first, or if it's a tie, the one with 3802168404Spjd * the lexically first name. 3803168404Spjd */ 3804168404Spjd if (sdzp->z_id < tdzp->z_id) { 3805168962Spjd cmp = -1; 3806168962Spjd } else if (sdzp->z_id > tdzp->z_id) { 3807168962Spjd cmp = 1; 3808168962Spjd } else { 3809185029Spjd /* 3810185029Spjd * First compare the two name arguments without 3811185029Spjd * considering any case folding. 3812185029Spjd */ 3813185029Spjd int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3814185029Spjd 3815185029Spjd cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3816185029Spjd ASSERT(error == 0 || !zfsvfs->z_utf8); 3817168962Spjd if (cmp == 0) { 3818168962Spjd /* 3819168962Spjd * POSIX: "If the old argument and the new argument 3820168962Spjd * both refer to links to the same existing file, 3821168962Spjd * the rename() function shall return successfully 3822168962Spjd * and perform no other action." 3823168962Spjd */ 3824168962Spjd ZFS_EXIT(zfsvfs); 3825168962Spjd return (0); 3826168962Spjd } 3827185029Spjd /* 3828185029Spjd * If the file system is case-folding, then we may 3829185029Spjd * have some more checking to do. A case-folding file 3830185029Spjd * system is either supporting mixed case sensitivity 3831185029Spjd * access or is completely case-insensitive. Note 3832185029Spjd * that the file system is always case preserving. 3833185029Spjd * 3834185029Spjd * In mixed sensitivity mode case sensitive behavior 3835185029Spjd * is the default. FIGNORECASE must be used to 3836185029Spjd * explicitly request case insensitive behavior. 3837185029Spjd * 3838185029Spjd * If the source and target names provided differ only 3839185029Spjd * by case (e.g., a request to rename 'tim' to 'Tim'), 3840185029Spjd * we will treat this as a special case in the 3841185029Spjd * case-insensitive mode: as long as the source name 3842185029Spjd * is an exact match, we will allow this to proceed as 3843185029Spjd * a name-change request. 3844185029Spjd */ 3845185029Spjd if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3846185029Spjd (zfsvfs->z_case == ZFS_CASE_MIXED && 3847185029Spjd flags & FIGNORECASE)) && 3848185029Spjd u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3849185029Spjd &error) == 0) { 3850185029Spjd /* 3851185029Spjd * case preserving rename request, require exact 3852185029Spjd * name matches 3853185029Spjd */ 3854185029Spjd zflg |= ZCIEXACT; 3855185029Spjd zflg &= ~ZCILOOK; 3856185029Spjd } 3857168962Spjd } 3858185029Spjd 3859208131Smm /* 3860208131Smm * If the source and destination directories are the same, we should 3861208131Smm * grab the z_name_lock of that directory only once. 3862208131Smm */ 3863208131Smm if (sdzp == tdzp) { 3864208131Smm zflg |= ZHAVELOCK; 3865208131Smm rw_enter(&sdzp->z_name_lock, RW_READER); 3866208131Smm } 3867208131Smm 3868168962Spjd if (cmp < 0) { 3869185029Spjd serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3870185029Spjd ZEXISTS | zflg, NULL, NULL); 3871185029Spjd terr = zfs_dirent_lock(&tdl, 3872185029Spjd tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3873168962Spjd } else { 3874185029Spjd terr = zfs_dirent_lock(&tdl, 3875185029Spjd tdzp, tnm, &tzp, zflg, NULL, NULL); 3876185029Spjd serr = zfs_dirent_lock(&sdl, 3877185029Spjd sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3878185029Spjd NULL, NULL); 3879168404Spjd } 3880168404Spjd 3881168962Spjd if (serr) { 3882168404Spjd /* 3883168404Spjd * Source entry invalid or not there. 3884168404Spjd */ 3885168962Spjd if (!terr) { 3886168404Spjd zfs_dirent_unlock(tdl); 3887168962Spjd if (tzp) 3888168962Spjd VN_RELE(ZTOV(tzp)); 3889168962Spjd } 3890208131Smm 3891208131Smm if (sdzp == tdzp) 3892208131Smm rw_exit(&sdzp->z_name_lock); 3893208131Smm 3894219089Spjd /* 3895219089Spjd * FreeBSD: In OpenSolaris they only check if rename source is 3896219089Spjd * ".." here, because "." is handled in their lookup. This is 3897219089Spjd * not the case for FreeBSD, so we check for "." explicitly. 3898219089Spjd */ 3899168404Spjd if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0) 3900249195Smm serr = SET_ERROR(EINVAL); 3901168962Spjd ZFS_EXIT(zfsvfs); 3902168962Spjd return (serr); 3903168404Spjd } 3904168404Spjd if (terr) { 3905168404Spjd zfs_dirent_unlock(sdl); 3906168962Spjd VN_RELE(ZTOV(szp)); 3907208131Smm 3908208131Smm if (sdzp == tdzp) 3909208131Smm rw_exit(&sdzp->z_name_lock); 3910208131Smm 3911168404Spjd if (strcmp(tnm, "..") == 0) 3912249195Smm terr = SET_ERROR(EINVAL); 3913168962Spjd ZFS_EXIT(zfsvfs); 3914168962Spjd return (terr); 3915168404Spjd } 3916168404Spjd 3917168404Spjd /* 3918168404Spjd * Must have write access at the source to remove the old entry 3919168404Spjd * and write access at the target to create the new entry. 3920168404Spjd * Note that if target and source are the same, this can be 3921168404Spjd * done in a single check. 3922168404Spjd */ 3923168404Spjd 3924168404Spjd if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3925168404Spjd goto out; 3926168404Spjd 3927168962Spjd if (ZTOV(szp)->v_type == VDIR) { 3928168404Spjd /* 3929168404Spjd * Check to make sure rename is valid. 3930168404Spjd * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3931168404Spjd */ 3932168404Spjd if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3933168404Spjd goto out; 3934168404Spjd } 3935168404Spjd 3936168404Spjd /* 3937168404Spjd * Does target exist? 3938168404Spjd */ 3939168404Spjd if (tzp) { 3940168404Spjd /* 3941168404Spjd * Source and target must be the same type. 3942168404Spjd */ 3943168962Spjd if (ZTOV(szp)->v_type == VDIR) { 3944168962Spjd if (ZTOV(tzp)->v_type != VDIR) { 3945249195Smm error = SET_ERROR(ENOTDIR); 3946168404Spjd goto out; 3947168404Spjd } 3948168404Spjd } else { 3949168962Spjd if (ZTOV(tzp)->v_type == VDIR) { 3950249195Smm error = SET_ERROR(EISDIR); 3951168404Spjd goto out; 3952168404Spjd } 3953168404Spjd } 3954168404Spjd /* 3955168404Spjd * POSIX dictates that when the source and target 3956168404Spjd * entries refer to the same file object, rename 3957168404Spjd * must do nothing and exit without error. 3958168404Spjd */ 3959168404Spjd if (szp->z_id == tzp->z_id) { 3960168404Spjd error = 0; 3961168404Spjd goto out; 3962168404Spjd } 3963168404Spjd } 3964168404Spjd 3965185029Spjd vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 3966168962Spjd if (tzp) 3967185029Spjd vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3968168962Spjd 3969185029Spjd /* 3970185029Spjd * notify the target directory if it is not the same 3971185029Spjd * as source directory. 3972185029Spjd */ 3973185029Spjd if (tdvp != sdvp) { 3974185029Spjd vnevent_rename_dest_dir(tdvp, ct); 3975185029Spjd } 3976185029Spjd 3977168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 3978219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3979219089Spjd dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3980168404Spjd dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3981168404Spjd dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3982219089Spjd if (sdzp != tdzp) { 3983219089Spjd dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 3984219089Spjd zfs_sa_upgrade_txholds(tx, tdzp); 3985219089Spjd } 3986219089Spjd if (tzp) { 3987219089Spjd dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 3988219089Spjd zfs_sa_upgrade_txholds(tx, tzp); 3989219089Spjd } 3990219089Spjd 3991219089Spjd zfs_sa_upgrade_txholds(tx, szp); 3992168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3993258632Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 3994168404Spjd if (error) { 3995168404Spjd if (zl != NULL) 3996168404Spjd zfs_rename_unlock(&zl); 3997168404Spjd zfs_dirent_unlock(sdl); 3998168404Spjd zfs_dirent_unlock(tdl); 3999208131Smm 4000208131Smm if (sdzp == tdzp) 4001208131Smm rw_exit(&sdzp->z_name_lock); 4002208131Smm 4003168962Spjd VN_RELE(ZTOV(szp)); 4004168962Spjd if (tzp) 4005168962Spjd VN_RELE(ZTOV(tzp)); 4006209962Smm if (error == ERESTART) { 4007258632Savg waited = B_TRUE; 4008168404Spjd dmu_tx_wait(tx); 4009168404Spjd dmu_tx_abort(tx); 4010168404Spjd goto top; 4011168404Spjd } 4012168404Spjd dmu_tx_abort(tx); 4013168962Spjd ZFS_EXIT(zfsvfs); 4014168962Spjd return (error); 4015168404Spjd } 4016168404Spjd 4017168404Spjd if (tzp) /* Attempt to remove the existing target */ 4018185029Spjd error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 4019168404Spjd 4020168404Spjd if (error == 0) { 4021168404Spjd error = zfs_link_create(tdl, szp, tx, ZRENAMING); 4022168404Spjd if (error == 0) { 4023219089Spjd szp->z_pflags |= ZFS_AV_MODIFIED; 4024185029Spjd 4025219089Spjd error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 4026219089Spjd (void *)&szp->z_pflags, sizeof (uint64_t), tx); 4027240415Smm ASSERT0(error); 4028219089Spjd 4029168404Spjd error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 4030219089Spjd if (error == 0) { 4031219089Spjd zfs_log_rename(zilog, tx, TX_RENAME | 4032219089Spjd (flags & FIGNORECASE ? TX_CI : 0), sdzp, 4033219089Spjd sdl->dl_name, tdzp, tdl->dl_name, szp); 4034185029Spjd 4035219089Spjd /* 4036219089Spjd * Update path information for the target vnode 4037219089Spjd */ 4038219089Spjd vn_renamepath(tdvp, ZTOV(szp), tnm, 4039219089Spjd strlen(tnm)); 4040219089Spjd } else { 4041219089Spjd /* 4042219089Spjd * At this point, we have successfully created 4043219089Spjd * the target name, but have failed to remove 4044219089Spjd * the source name. Since the create was done 4045219089Spjd * with the ZRENAMING flag, there are 4046219089Spjd * complications; for one, the link count is 4047219089Spjd * wrong. The easiest way to deal with this 4048219089Spjd * is to remove the newly created target, and 4049219089Spjd * return the original error. This must 4050219089Spjd * succeed; fortunately, it is very unlikely to 4051219089Spjd * fail, since we just created it. 4052219089Spjd */ 4053219089Spjd VERIFY3U(zfs_link_destroy(tdl, szp, tx, 4054219089Spjd ZRENAMING, NULL), ==, 0); 4055219089Spjd } 4056168404Spjd } 4057168404Spjd#ifdef FREEBSD_NAMECACHE 4058168404Spjd if (error == 0) { 4059168404Spjd cache_purge(sdvp); 4060168404Spjd cache_purge(tdvp); 4061240829Spjd cache_purge(ZTOV(szp)); 4062240829Spjd if (tzp) 4063240829Spjd cache_purge(ZTOV(tzp)); 4064168404Spjd } 4065168404Spjd#endif 4066168404Spjd } 4067168404Spjd 4068168404Spjd dmu_tx_commit(tx); 4069168404Spjdout: 4070168404Spjd if (zl != NULL) 4071168404Spjd zfs_rename_unlock(&zl); 4072168404Spjd 4073168404Spjd zfs_dirent_unlock(sdl); 4074168404Spjd zfs_dirent_unlock(tdl); 4075168404Spjd 4076208131Smm if (sdzp == tdzp) 4077208131Smm rw_exit(&sdzp->z_name_lock); 4078208131Smm 4079219089Spjd 4080168962Spjd VN_RELE(ZTOV(szp)); 4081168404Spjd if (tzp) 4082168962Spjd VN_RELE(ZTOV(tzp)); 4083168404Spjd 4084219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4085219089Spjd zil_commit(zilog, 0); 4086219089Spjd 4087168404Spjd ZFS_EXIT(zfsvfs); 4088168404Spjd 4089168404Spjd return (error); 4090168404Spjd} 4091168404Spjd 4092168404Spjd/* 4093168404Spjd * Insert the indicated symbolic reference entry into the directory. 4094168404Spjd * 4095168404Spjd * IN: dvp - Directory to contain new symbolic link. 4096168404Spjd * link - Name for new symlink entry. 4097168404Spjd * vap - Attributes of new entry. 4098168404Spjd * cr - credentials of caller. 4099185029Spjd * ct - caller context 4100185029Spjd * flags - case flags 4101168404Spjd * 4102251631Sdelphij * RETURN: 0 on success, error code on failure. 4103168404Spjd * 4104168404Spjd * Timestamps: 4105168404Spjd * dvp - ctime|mtime updated 4106168404Spjd */ 4107185029Spjd/*ARGSUSED*/ 4108168404Spjdstatic int 4109185029Spjdzfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 4110185029Spjd cred_t *cr, kthread_t *td) 4111168404Spjd{ 4112168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 4113168404Spjd zfs_dirlock_t *dl; 4114168404Spjd dmu_tx_t *tx; 4115168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4116185029Spjd zilog_t *zilog; 4117219089Spjd uint64_t len = strlen(link); 4118168404Spjd int error; 4119185029Spjd int zflg = ZNEW; 4120209962Smm zfs_acl_ids_t acl_ids; 4121209962Smm boolean_t fuid_dirtied; 4122219089Spjd uint64_t txtype = TX_SYMLINK; 4123258632Savg boolean_t waited = B_FALSE; 4124185029Spjd int flags = 0; 4125168404Spjd 4126168962Spjd ASSERT(vap->va_type == VLNK); 4127168404Spjd 4128168404Spjd ZFS_ENTER(zfsvfs); 4129185029Spjd ZFS_VERIFY_ZP(dzp); 4130185029Spjd zilog = zfsvfs->z_log; 4131185029Spjd 4132185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 4133185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4134185029Spjd ZFS_EXIT(zfsvfs); 4135249195Smm return (SET_ERROR(EILSEQ)); 4136185029Spjd } 4137185029Spjd if (flags & FIGNORECASE) 4138185029Spjd zflg |= ZCILOOK; 4139168404Spjd 4140168404Spjd if (len > MAXPATHLEN) { 4141168404Spjd ZFS_EXIT(zfsvfs); 4142249195Smm return (SET_ERROR(ENAMETOOLONG)); 4143168404Spjd } 4144168404Spjd 4145219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, 4146219089Spjd vap, cr, NULL, &acl_ids)) != 0) { 4147219089Spjd ZFS_EXIT(zfsvfs); 4148219089Spjd return (error); 4149219089Spjd } 4150260704Savg 4151260704Savg getnewvnode_reserve(1); 4152260704Savg 4153219089Spjdtop: 4154168404Spjd /* 4155168404Spjd * Attempt to lock directory; fail if entry already exists. 4156168404Spjd */ 4157185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 4158185029Spjd if (error) { 4159219089Spjd zfs_acl_ids_free(&acl_ids); 4160260704Savg getnewvnode_drop_reserve(); 4161168404Spjd ZFS_EXIT(zfsvfs); 4162168404Spjd return (error); 4163168404Spjd } 4164168404Spjd 4165219089Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4166219089Spjd zfs_acl_ids_free(&acl_ids); 4167219089Spjd zfs_dirent_unlock(dl); 4168260704Savg getnewvnode_drop_reserve(); 4169219089Spjd ZFS_EXIT(zfsvfs); 4170219089Spjd return (error); 4171219089Spjd } 4172219089Spjd 4173209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 4174209962Smm zfs_acl_ids_free(&acl_ids); 4175209962Smm zfs_dirent_unlock(dl); 4176260704Savg getnewvnode_drop_reserve(); 4177209962Smm ZFS_EXIT(zfsvfs); 4178249195Smm return (SET_ERROR(EDQUOT)); 4179209962Smm } 4180168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4181209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 4182168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 4183168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4184219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 4185219089Spjd ZFS_SA_BASE_ATTR_SIZE + len); 4186219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 4187219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 4188219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 4189219089Spjd acl_ids.z_aclp->z_acl_bytes); 4190219089Spjd } 4191209962Smm if (fuid_dirtied) 4192209962Smm zfs_fuid_txhold(zfsvfs, tx); 4193258632Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 4194168404Spjd if (error) { 4195168404Spjd zfs_dirent_unlock(dl); 4196209962Smm if (error == ERESTART) { 4197258632Savg waited = B_TRUE; 4198168404Spjd dmu_tx_wait(tx); 4199168404Spjd dmu_tx_abort(tx); 4200168404Spjd goto top; 4201168404Spjd } 4202219089Spjd zfs_acl_ids_free(&acl_ids); 4203168404Spjd dmu_tx_abort(tx); 4204260704Savg getnewvnode_drop_reserve(); 4205168404Spjd ZFS_EXIT(zfsvfs); 4206168404Spjd return (error); 4207168404Spjd } 4208168404Spjd 4209168404Spjd /* 4210168404Spjd * Create a new object for the symlink. 4211219089Spjd * for version 4 ZPL datsets the symlink will be an SA attribute 4212168404Spjd */ 4213219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 4214168404Spjd 4215219089Spjd if (fuid_dirtied) 4216219089Spjd zfs_fuid_sync(zfsvfs, tx); 4217209962Smm 4218219089Spjd mutex_enter(&zp->z_lock); 4219219089Spjd if (zp->z_is_sa) 4220219089Spjd error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 4221219089Spjd link, len, tx); 4222219089Spjd else 4223219089Spjd zfs_sa_symlink(zp, link, len, tx); 4224219089Spjd mutex_exit(&zp->z_lock); 4225168404Spjd 4226219089Spjd zp->z_size = len; 4227219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 4228219089Spjd &zp->z_size, sizeof (zp->z_size), tx); 4229168404Spjd /* 4230168404Spjd * Insert the new object into the directory. 4231168404Spjd */ 4232168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 4233168404Spjd 4234219089Spjd if (flags & FIGNORECASE) 4235219089Spjd txtype |= TX_CI; 4236219089Spjd zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 4237219089Spjd *vpp = ZTOV(zp); 4238219089Spjd 4239209962Smm zfs_acl_ids_free(&acl_ids); 4240209962Smm 4241168404Spjd dmu_tx_commit(tx); 4242168404Spjd 4243260704Savg getnewvnode_drop_reserve(); 4244260704Savg 4245168404Spjd zfs_dirent_unlock(dl); 4246168404Spjd 4247219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4248219089Spjd zil_commit(zilog, 0); 4249219089Spjd 4250168404Spjd ZFS_EXIT(zfsvfs); 4251168404Spjd return (error); 4252168404Spjd} 4253168404Spjd 4254168404Spjd/* 4255168404Spjd * Return, in the buffer contained in the provided uio structure, 4256168404Spjd * the symbolic path referred to by vp. 4257168404Spjd * 4258168404Spjd * IN: vp - vnode of symbolic link. 4259251631Sdelphij * uio - structure to contain the link path. 4260168404Spjd * cr - credentials of caller. 4261185029Spjd * ct - caller context 4262168404Spjd * 4263251631Sdelphij * OUT: uio - structure containing the link path. 4264168404Spjd * 4265251631Sdelphij * RETURN: 0 on success, error code on failure. 4266168404Spjd * 4267168404Spjd * Timestamps: 4268168404Spjd * vp - atime updated 4269168404Spjd */ 4270168404Spjd/* ARGSUSED */ 4271168404Spjdstatic int 4272185029Spjdzfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 4273168404Spjd{ 4274168404Spjd znode_t *zp = VTOZ(vp); 4275168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4276168404Spjd int error; 4277168404Spjd 4278168404Spjd ZFS_ENTER(zfsvfs); 4279185029Spjd ZFS_VERIFY_ZP(zp); 4280168404Spjd 4281219089Spjd mutex_enter(&zp->z_lock); 4282219089Spjd if (zp->z_is_sa) 4283219089Spjd error = sa_lookup_uio(zp->z_sa_hdl, 4284219089Spjd SA_ZPL_SYMLINK(zfsvfs), uio); 4285219089Spjd else 4286219089Spjd error = zfs_sa_readlink(zp, uio); 4287219089Spjd mutex_exit(&zp->z_lock); 4288168404Spjd 4289168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4290219089Spjd 4291168404Spjd ZFS_EXIT(zfsvfs); 4292168404Spjd return (error); 4293168404Spjd} 4294168404Spjd 4295168404Spjd/* 4296168404Spjd * Insert a new entry into directory tdvp referencing svp. 4297168404Spjd * 4298168404Spjd * IN: tdvp - Directory to contain new entry. 4299168404Spjd * svp - vnode of new entry. 4300168404Spjd * name - name of new entry. 4301168404Spjd * cr - credentials of caller. 4302185029Spjd * ct - caller context 4303168404Spjd * 4304251631Sdelphij * RETURN: 0 on success, error code on failure. 4305168404Spjd * 4306168404Spjd * Timestamps: 4307168404Spjd * tdvp - ctime|mtime updated 4308168404Spjd * svp - ctime updated 4309168404Spjd */ 4310168404Spjd/* ARGSUSED */ 4311168404Spjdstatic int 4312185029Spjdzfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4313185029Spjd caller_context_t *ct, int flags) 4314168404Spjd{ 4315168404Spjd znode_t *dzp = VTOZ(tdvp); 4316168404Spjd znode_t *tzp, *szp; 4317168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4318185029Spjd zilog_t *zilog; 4319168404Spjd zfs_dirlock_t *dl; 4320168404Spjd dmu_tx_t *tx; 4321168962Spjd vnode_t *realvp; 4322168404Spjd int error; 4323185029Spjd int zf = ZNEW; 4324212694Smm uint64_t parent; 4325185029Spjd uid_t owner; 4326258632Savg boolean_t waited = B_FALSE; 4327168404Spjd 4328168404Spjd ASSERT(tdvp->v_type == VDIR); 4329168404Spjd 4330168404Spjd ZFS_ENTER(zfsvfs); 4331185029Spjd ZFS_VERIFY_ZP(dzp); 4332185029Spjd zilog = zfsvfs->z_log; 4333168404Spjd 4334185029Spjd if (VOP_REALVP(svp, &realvp, ct) == 0) 4335168962Spjd svp = realvp; 4336168962Spjd 4337212694Smm /* 4338212694Smm * POSIX dictates that we return EPERM here. 4339212694Smm * Better choices include ENOTSUP or EISDIR. 4340212694Smm */ 4341212694Smm if (svp->v_type == VDIR) { 4342168404Spjd ZFS_EXIT(zfsvfs); 4343249195Smm return (SET_ERROR(EPERM)); 4344212694Smm } 4345212694Smm 4346254585Sdelphij szp = VTOZ(svp); 4347254585Sdelphij ZFS_VERIFY_ZP(szp); 4348254585Sdelphij 4349258597Spjd if (szp->z_pflags & (ZFS_APPENDONLY | ZFS_IMMUTABLE | ZFS_READONLY)) { 4350258597Spjd ZFS_EXIT(zfsvfs); 4351258597Spjd return (SET_ERROR(EPERM)); 4352258597Spjd } 4353258597Spjd 4354254585Sdelphij /* 4355254585Sdelphij * We check z_zfsvfs rather than v_vfsp here, because snapshots and the 4356254585Sdelphij * ctldir appear to have the same v_vfsp. 4357254585Sdelphij */ 4358254585Sdelphij if (szp->z_zfsvfs != zfsvfs || zfsctl_is_node(svp)) { 4359212694Smm ZFS_EXIT(zfsvfs); 4360249195Smm return (SET_ERROR(EXDEV)); 4361168404Spjd } 4362212694Smm 4363212694Smm /* Prevent links to .zfs/shares files */ 4364212694Smm 4365219089Spjd if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4366219089Spjd &parent, sizeof (uint64_t))) != 0) { 4367212694Smm ZFS_EXIT(zfsvfs); 4368219089Spjd return (error); 4369219089Spjd } 4370219089Spjd if (parent == zfsvfs->z_shares_dir) { 4371219089Spjd ZFS_EXIT(zfsvfs); 4372249195Smm return (SET_ERROR(EPERM)); 4373212694Smm } 4374212694Smm 4375185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, 4376185029Spjd strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4377185029Spjd ZFS_EXIT(zfsvfs); 4378249195Smm return (SET_ERROR(EILSEQ)); 4379185029Spjd } 4380185029Spjd if (flags & FIGNORECASE) 4381185029Spjd zf |= ZCILOOK; 4382185029Spjd 4383168404Spjd /* 4384168404Spjd * We do not support links between attributes and non-attributes 4385168404Spjd * because of the potential security risk of creating links 4386168404Spjd * into "normal" file space in order to circumvent restrictions 4387168404Spjd * imposed in attribute space. 4388168404Spjd */ 4389219089Spjd if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4390168404Spjd ZFS_EXIT(zfsvfs); 4391249195Smm return (SET_ERROR(EINVAL)); 4392168404Spjd } 4393168404Spjd 4394168404Spjd 4395219089Spjd owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4396219089Spjd if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { 4397168404Spjd ZFS_EXIT(zfsvfs); 4398249195Smm return (SET_ERROR(EPERM)); 4399168404Spjd } 4400168404Spjd 4401185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4402168404Spjd ZFS_EXIT(zfsvfs); 4403168404Spjd return (error); 4404168404Spjd } 4405168404Spjd 4406212694Smmtop: 4407168404Spjd /* 4408168404Spjd * Attempt to lock directory; fail if entry already exists. 4409168404Spjd */ 4410185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 4411185029Spjd if (error) { 4412168404Spjd ZFS_EXIT(zfsvfs); 4413168404Spjd return (error); 4414168404Spjd } 4415168404Spjd 4416168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4417219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4418168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4419219089Spjd zfs_sa_upgrade_txholds(tx, szp); 4420219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 4421258632Savg error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 4422168404Spjd if (error) { 4423168404Spjd zfs_dirent_unlock(dl); 4424209962Smm if (error == ERESTART) { 4425258632Savg waited = B_TRUE; 4426168404Spjd dmu_tx_wait(tx); 4427168404Spjd dmu_tx_abort(tx); 4428168404Spjd goto top; 4429168404Spjd } 4430168404Spjd dmu_tx_abort(tx); 4431168404Spjd ZFS_EXIT(zfsvfs); 4432168404Spjd return (error); 4433168404Spjd } 4434168404Spjd 4435168404Spjd error = zfs_link_create(dl, szp, tx, 0); 4436168404Spjd 4437185029Spjd if (error == 0) { 4438185029Spjd uint64_t txtype = TX_LINK; 4439185029Spjd if (flags & FIGNORECASE) 4440185029Spjd txtype |= TX_CI; 4441185029Spjd zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4442185029Spjd } 4443168404Spjd 4444168404Spjd dmu_tx_commit(tx); 4445168404Spjd 4446168404Spjd zfs_dirent_unlock(dl); 4447168404Spjd 4448185029Spjd if (error == 0) { 4449185029Spjd vnevent_link(svp, ct); 4450185029Spjd } 4451185029Spjd 4452219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4453219089Spjd zil_commit(zilog, 0); 4454219089Spjd 4455168404Spjd ZFS_EXIT(zfsvfs); 4456168404Spjd return (error); 4457168404Spjd} 4458168404Spjd 4459219089Spjd#ifdef sun 4460219089Spjd/* 4461219089Spjd * zfs_null_putapage() is used when the file system has been force 4462219089Spjd * unmounted. It just drops the pages. 4463219089Spjd */ 4464219089Spjd/* ARGSUSED */ 4465219089Spjdstatic int 4466219089Spjdzfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4467219089Spjd size_t *lenp, int flags, cred_t *cr) 4468219089Spjd{ 4469219089Spjd pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); 4470219089Spjd return (0); 4471219089Spjd} 4472219089Spjd 4473219089Spjd/* 4474219089Spjd * Push a page out to disk, klustering if possible. 4475219089Spjd * 4476219089Spjd * IN: vp - file to push page to. 4477219089Spjd * pp - page to push. 4478219089Spjd * flags - additional flags. 4479219089Spjd * cr - credentials of caller. 4480219089Spjd * 4481219089Spjd * OUT: offp - start of range pushed. 4482219089Spjd * lenp - len of range pushed. 4483219089Spjd * 4484251631Sdelphij * RETURN: 0 on success, error code on failure. 4485219089Spjd * 4486219089Spjd * NOTE: callers must have locked the page to be pushed. On 4487219089Spjd * exit, the page (and all other pages in the kluster) must be 4488219089Spjd * unlocked. 4489219089Spjd */ 4490219089Spjd/* ARGSUSED */ 4491219089Spjdstatic int 4492219089Spjdzfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4493219089Spjd size_t *lenp, int flags, cred_t *cr) 4494219089Spjd{ 4495219089Spjd znode_t *zp = VTOZ(vp); 4496219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4497219089Spjd dmu_tx_t *tx; 4498219089Spjd u_offset_t off, koff; 4499219089Spjd size_t len, klen; 4500219089Spjd int err; 4501219089Spjd 4502219089Spjd off = pp->p_offset; 4503219089Spjd len = PAGESIZE; 4504219089Spjd /* 4505219089Spjd * If our blocksize is bigger than the page size, try to kluster 4506219089Spjd * multiple pages so that we write a full block (thus avoiding 4507219089Spjd * a read-modify-write). 4508219089Spjd */ 4509219089Spjd if (off < zp->z_size && zp->z_blksz > PAGESIZE) { 4510219089Spjd klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); 4511219089Spjd koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; 4512219089Spjd ASSERT(koff <= zp->z_size); 4513219089Spjd if (koff + klen > zp->z_size) 4514219089Spjd klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE); 4515219089Spjd pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); 4516219089Spjd } 4517219089Spjd ASSERT3U(btop(len), ==, btopr(len)); 4518219089Spjd 4519219089Spjd /* 4520219089Spjd * Can't push pages past end-of-file. 4521219089Spjd */ 4522219089Spjd if (off >= zp->z_size) { 4523219089Spjd /* ignore all pages */ 4524219089Spjd err = 0; 4525219089Spjd goto out; 4526219089Spjd } else if (off + len > zp->z_size) { 4527219089Spjd int npages = btopr(zp->z_size - off); 4528219089Spjd page_t *trunc; 4529219089Spjd 4530219089Spjd page_list_break(&pp, &trunc, npages); 4531219089Spjd /* ignore pages past end of file */ 4532219089Spjd if (trunc) 4533219089Spjd pvn_write_done(trunc, flags); 4534219089Spjd len = zp->z_size - off; 4535219089Spjd } 4536219089Spjd 4537219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 4538219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 4539249195Smm err = SET_ERROR(EDQUOT); 4540219089Spjd goto out; 4541219089Spjd } 4542219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 4543219089Spjd dmu_tx_hold_write(tx, zp->z_id, off, len); 4544219089Spjd 4545219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4546219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4547258720Savg err = dmu_tx_assign(tx, TXG_WAIT); 4548219089Spjd if (err != 0) { 4549219089Spjd dmu_tx_abort(tx); 4550219089Spjd goto out; 4551219089Spjd } 4552219089Spjd 4553219089Spjd if (zp->z_blksz <= PAGESIZE) { 4554219089Spjd caddr_t va = zfs_map_page(pp, S_READ); 4555219089Spjd ASSERT3U(len, <=, PAGESIZE); 4556219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 4557219089Spjd zfs_unmap_page(pp, va); 4558219089Spjd } else { 4559219089Spjd err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 4560219089Spjd } 4561219089Spjd 4562219089Spjd if (err == 0) { 4563219089Spjd uint64_t mtime[2], ctime[2]; 4564219089Spjd sa_bulk_attr_t bulk[3]; 4565219089Spjd int count = 0; 4566219089Spjd 4567219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 4568219089Spjd &mtime, 16); 4569219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 4570219089Spjd &ctime, 16); 4571219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4572219089Spjd &zp->z_pflags, 8); 4573219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 4574219089Spjd B_TRUE); 4575219089Spjd zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 4576219089Spjd } 4577219089Spjd dmu_tx_commit(tx); 4578219089Spjd 4579219089Spjdout: 4580219089Spjd pvn_write_done(pp, (err ? B_ERROR : 0) | flags); 4581219089Spjd if (offp) 4582219089Spjd *offp = off; 4583219089Spjd if (lenp) 4584219089Spjd *lenp = len; 4585219089Spjd 4586219089Spjd return (err); 4587219089Spjd} 4588219089Spjd 4589219089Spjd/* 4590219089Spjd * Copy the portion of the file indicated from pages into the file. 4591219089Spjd * The pages are stored in a page list attached to the files vnode. 4592219089Spjd * 4593219089Spjd * IN: vp - vnode of file to push page data to. 4594219089Spjd * off - position in file to put data. 4595219089Spjd * len - amount of data to write. 4596219089Spjd * flags - flags to control the operation. 4597219089Spjd * cr - credentials of caller. 4598219089Spjd * ct - caller context. 4599219089Spjd * 4600251631Sdelphij * RETURN: 0 on success, error code on failure. 4601219089Spjd * 4602219089Spjd * Timestamps: 4603219089Spjd * vp - ctime|mtime updated 4604219089Spjd */ 4605185029Spjd/*ARGSUSED*/ 4606219089Spjdstatic int 4607219089Spjdzfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 4608219089Spjd caller_context_t *ct) 4609219089Spjd{ 4610219089Spjd znode_t *zp = VTOZ(vp); 4611219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4612219089Spjd page_t *pp; 4613219089Spjd size_t io_len; 4614219089Spjd u_offset_t io_off; 4615219089Spjd uint_t blksz; 4616219089Spjd rl_t *rl; 4617219089Spjd int error = 0; 4618219089Spjd 4619219089Spjd ZFS_ENTER(zfsvfs); 4620219089Spjd ZFS_VERIFY_ZP(zp); 4621219089Spjd 4622219089Spjd /* 4623219089Spjd * Align this request to the file block size in case we kluster. 4624219089Spjd * XXX - this can result in pretty aggresive locking, which can 4625219089Spjd * impact simultanious read/write access. One option might be 4626219089Spjd * to break up long requests (len == 0) into block-by-block 4627219089Spjd * operations to get narrower locking. 4628219089Spjd */ 4629219089Spjd blksz = zp->z_blksz; 4630219089Spjd if (ISP2(blksz)) 4631219089Spjd io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); 4632219089Spjd else 4633219089Spjd io_off = 0; 4634219089Spjd if (len > 0 && ISP2(blksz)) 4635219089Spjd io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); 4636219089Spjd else 4637219089Spjd io_len = 0; 4638219089Spjd 4639219089Spjd if (io_len == 0) { 4640219089Spjd /* 4641219089Spjd * Search the entire vp list for pages >= io_off. 4642219089Spjd */ 4643219089Spjd rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); 4644219089Spjd error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); 4645219089Spjd goto out; 4646219089Spjd } 4647219089Spjd rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); 4648219089Spjd 4649219089Spjd if (off > zp->z_size) { 4650219089Spjd /* past end of file */ 4651219089Spjd zfs_range_unlock(rl); 4652219089Spjd ZFS_EXIT(zfsvfs); 4653219089Spjd return (0); 4654219089Spjd } 4655219089Spjd 4656219089Spjd len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); 4657219089Spjd 4658219089Spjd for (off = io_off; io_off < off + len; io_off += io_len) { 4659219089Spjd if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 4660219089Spjd pp = page_lookup(vp, io_off, 4661219089Spjd (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); 4662219089Spjd } else { 4663219089Spjd pp = page_lookup_nowait(vp, io_off, 4664219089Spjd (flags & B_FREE) ? SE_EXCL : SE_SHARED); 4665219089Spjd } 4666219089Spjd 4667219089Spjd if (pp != NULL && pvn_getdirty(pp, flags)) { 4668219089Spjd int err; 4669219089Spjd 4670219089Spjd /* 4671219089Spjd * Found a dirty page to push 4672219089Spjd */ 4673219089Spjd err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); 4674219089Spjd if (err) 4675219089Spjd error = err; 4676219089Spjd } else { 4677219089Spjd io_len = PAGESIZE; 4678219089Spjd } 4679219089Spjd } 4680219089Spjdout: 4681219089Spjd zfs_range_unlock(rl); 4682219089Spjd if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4683219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 4684219089Spjd ZFS_EXIT(zfsvfs); 4685219089Spjd return (error); 4686219089Spjd} 4687219089Spjd#endif /* sun */ 4688219089Spjd 4689219089Spjd/*ARGSUSED*/ 4690168962Spjdvoid 4691185029Spjdzfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4692168404Spjd{ 4693168962Spjd znode_t *zp = VTOZ(vp); 4694168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4695168962Spjd int error; 4696168404Spjd 4697185029Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4698219089Spjd if (zp->z_sa_hdl == NULL) { 4699185029Spjd /* 4700185029Spjd * The fs has been unmounted, or we did a 4701185029Spjd * suspend/resume and this file no longer exists. 4702185029Spjd */ 4703243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 4704234607Strasz vrecycle(vp); 4705243520Savg return; 4706243520Savg } 4707243520Savg 4708243520Savg mutex_enter(&zp->z_lock); 4709243520Savg if (zp->z_unlinked) { 4710243520Savg /* 4711243520Savg * Fast path to recycle a vnode of a removed file. 4712243520Savg */ 4713243520Savg mutex_exit(&zp->z_lock); 4714185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4715243520Savg vrecycle(vp); 4716168962Spjd return; 4717168404Spjd } 4718243520Savg mutex_exit(&zp->z_lock); 4719168404Spjd 4720168404Spjd if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4721168404Spjd dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4722168404Spjd 4723219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4724219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4725168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 4726168404Spjd if (error) { 4727168404Spjd dmu_tx_abort(tx); 4728168404Spjd } else { 4729168404Spjd mutex_enter(&zp->z_lock); 4730219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 4731219089Spjd (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4732168404Spjd zp->z_atime_dirty = 0; 4733168404Spjd mutex_exit(&zp->z_lock); 4734168404Spjd dmu_tx_commit(tx); 4735168404Spjd } 4736168404Spjd } 4737185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4738168404Spjd} 4739168404Spjd 4740219089Spjd#ifdef sun 4741219089Spjd/* 4742219089Spjd * Bounds-check the seek operation. 4743219089Spjd * 4744219089Spjd * IN: vp - vnode seeking within 4745219089Spjd * ooff - old file offset 4746219089Spjd * noffp - pointer to new file offset 4747219089Spjd * ct - caller context 4748219089Spjd * 4749251631Sdelphij * RETURN: 0 on success, EINVAL if new offset invalid. 4750219089Spjd */ 4751219089Spjd/* ARGSUSED */ 4752219089Spjdstatic int 4753219089Spjdzfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, 4754219089Spjd caller_context_t *ct) 4755219089Spjd{ 4756219089Spjd if (vp->v_type == VDIR) 4757219089Spjd return (0); 4758219089Spjd return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 4759219089Spjd} 4760219089Spjd 4761219089Spjd/* 4762219089Spjd * Pre-filter the generic locking function to trap attempts to place 4763219089Spjd * a mandatory lock on a memory mapped file. 4764219089Spjd */ 4765219089Spjdstatic int 4766219089Spjdzfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 4767219089Spjd flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 4768219089Spjd{ 4769219089Spjd znode_t *zp = VTOZ(vp); 4770219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4771219089Spjd 4772219089Spjd ZFS_ENTER(zfsvfs); 4773219089Spjd ZFS_VERIFY_ZP(zp); 4774219089Spjd 4775219089Spjd /* 4776219089Spjd * We are following the UFS semantics with respect to mapcnt 4777219089Spjd * here: If we see that the file is mapped already, then we will 4778219089Spjd * return an error, but we don't worry about races between this 4779219089Spjd * function and zfs_map(). 4780219089Spjd */ 4781219089Spjd if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) { 4782219089Spjd ZFS_EXIT(zfsvfs); 4783249195Smm return (SET_ERROR(EAGAIN)); 4784219089Spjd } 4785219089Spjd ZFS_EXIT(zfsvfs); 4786219089Spjd return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 4787219089Spjd} 4788219089Spjd 4789219089Spjd/* 4790219089Spjd * If we can't find a page in the cache, we will create a new page 4791219089Spjd * and fill it with file data. For efficiency, we may try to fill 4792219089Spjd * multiple pages at once (klustering) to fill up the supplied page 4793219089Spjd * list. Note that the pages to be filled are held with an exclusive 4794219089Spjd * lock to prevent access by other threads while they are being filled. 4795219089Spjd */ 4796219089Spjdstatic int 4797219089Spjdzfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, 4798219089Spjd caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) 4799219089Spjd{ 4800219089Spjd znode_t *zp = VTOZ(vp); 4801219089Spjd page_t *pp, *cur_pp; 4802219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 4803219089Spjd u_offset_t io_off, total; 4804219089Spjd size_t io_len; 4805219089Spjd int err; 4806219089Spjd 4807219089Spjd if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { 4808219089Spjd /* 4809219089Spjd * We only have a single page, don't bother klustering 4810219089Spjd */ 4811219089Spjd io_off = off; 4812219089Spjd io_len = PAGESIZE; 4813219089Spjd pp = page_create_va(vp, io_off, io_len, 4814219089Spjd PG_EXCL | PG_WAIT, seg, addr); 4815219089Spjd } else { 4816219089Spjd /* 4817219089Spjd * Try to find enough pages to fill the page list 4818219089Spjd */ 4819219089Spjd pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 4820219089Spjd &io_len, off, plsz, 0); 4821219089Spjd } 4822219089Spjd if (pp == NULL) { 4823219089Spjd /* 4824219089Spjd * The page already exists, nothing to do here. 4825219089Spjd */ 4826219089Spjd *pl = NULL; 4827219089Spjd return (0); 4828219089Spjd } 4829219089Spjd 4830219089Spjd /* 4831219089Spjd * Fill the pages in the kluster. 4832219089Spjd */ 4833219089Spjd cur_pp = pp; 4834219089Spjd for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { 4835219089Spjd caddr_t va; 4836219089Spjd 4837219089Spjd ASSERT3U(io_off, ==, cur_pp->p_offset); 4838219089Spjd va = zfs_map_page(cur_pp, S_WRITE); 4839219089Spjd err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, 4840219089Spjd DMU_READ_PREFETCH); 4841219089Spjd zfs_unmap_page(cur_pp, va); 4842219089Spjd if (err) { 4843219089Spjd /* On error, toss the entire kluster */ 4844219089Spjd pvn_read_done(pp, B_ERROR); 4845219089Spjd /* convert checksum errors into IO errors */ 4846219089Spjd if (err == ECKSUM) 4847249195Smm err = SET_ERROR(EIO); 4848219089Spjd return (err); 4849219089Spjd } 4850219089Spjd cur_pp = cur_pp->p_next; 4851219089Spjd } 4852219089Spjd 4853219089Spjd /* 4854219089Spjd * Fill in the page list array from the kluster starting 4855219089Spjd * from the desired offset `off'. 4856219089Spjd * NOTE: the page list will always be null terminated. 4857219089Spjd */ 4858219089Spjd pvn_plist_init(pp, pl, plsz, off, io_len, rw); 4859219089Spjd ASSERT(pl == NULL || (*pl)->p_offset == off); 4860219089Spjd 4861219089Spjd return (0); 4862219089Spjd} 4863219089Spjd 4864219089Spjd/* 4865219089Spjd * Return pointers to the pages for the file region [off, off + len] 4866219089Spjd * in the pl array. If plsz is greater than len, this function may 4867219089Spjd * also return page pointers from after the specified region 4868219089Spjd * (i.e. the region [off, off + plsz]). These additional pages are 4869219089Spjd * only returned if they are already in the cache, or were created as 4870219089Spjd * part of a klustered read. 4871219089Spjd * 4872219089Spjd * IN: vp - vnode of file to get data from. 4873219089Spjd * off - position in file to get data from. 4874219089Spjd * len - amount of data to retrieve. 4875219089Spjd * plsz - length of provided page list. 4876219089Spjd * seg - segment to obtain pages for. 4877219089Spjd * addr - virtual address of fault. 4878219089Spjd * rw - mode of created pages. 4879219089Spjd * cr - credentials of caller. 4880219089Spjd * ct - caller context. 4881219089Spjd * 4882219089Spjd * OUT: protp - protection mode of created pages. 4883219089Spjd * pl - list of pages created. 4884219089Spjd * 4885251631Sdelphij * RETURN: 0 on success, error code on failure. 4886219089Spjd * 4887219089Spjd * Timestamps: 4888219089Spjd * vp - atime updated 4889219089Spjd */ 4890219089Spjd/* ARGSUSED */ 4891219089Spjdstatic int 4892219089Spjdzfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 4893251631Sdelphij page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 4894251631Sdelphij enum seg_rw rw, cred_t *cr, caller_context_t *ct) 4895219089Spjd{ 4896219089Spjd znode_t *zp = VTOZ(vp); 4897219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4898219089Spjd page_t **pl0 = pl; 4899219089Spjd int err = 0; 4900219089Spjd 4901219089Spjd /* we do our own caching, faultahead is unnecessary */ 4902219089Spjd if (pl == NULL) 4903219089Spjd return (0); 4904219089Spjd else if (len > plsz) 4905219089Spjd len = plsz; 4906219089Spjd else 4907219089Spjd len = P2ROUNDUP(len, PAGESIZE); 4908219089Spjd ASSERT(plsz >= len); 4909219089Spjd 4910219089Spjd ZFS_ENTER(zfsvfs); 4911219089Spjd ZFS_VERIFY_ZP(zp); 4912219089Spjd 4913219089Spjd if (protp) 4914219089Spjd *protp = PROT_ALL; 4915219089Spjd 4916219089Spjd /* 4917219089Spjd * Loop through the requested range [off, off + len) looking 4918219089Spjd * for pages. If we don't find a page, we will need to create 4919219089Spjd * a new page and fill it with data from the file. 4920219089Spjd */ 4921219089Spjd while (len > 0) { 4922219089Spjd if (*pl = page_lookup(vp, off, SE_SHARED)) 4923219089Spjd *(pl+1) = NULL; 4924219089Spjd else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) 4925219089Spjd goto out; 4926219089Spjd while (*pl) { 4927219089Spjd ASSERT3U((*pl)->p_offset, ==, off); 4928219089Spjd off += PAGESIZE; 4929219089Spjd addr += PAGESIZE; 4930219089Spjd if (len > 0) { 4931219089Spjd ASSERT3U(len, >=, PAGESIZE); 4932219089Spjd len -= PAGESIZE; 4933219089Spjd } 4934219089Spjd ASSERT3U(plsz, >=, PAGESIZE); 4935219089Spjd plsz -= PAGESIZE; 4936219089Spjd pl++; 4937219089Spjd } 4938219089Spjd } 4939219089Spjd 4940219089Spjd /* 4941219089Spjd * Fill out the page array with any pages already in the cache. 4942219089Spjd */ 4943219089Spjd while (plsz > 0 && 4944219089Spjd (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { 4945219089Spjd off += PAGESIZE; 4946219089Spjd plsz -= PAGESIZE; 4947219089Spjd } 4948219089Spjdout: 4949219089Spjd if (err) { 4950219089Spjd /* 4951219089Spjd * Release any pages we have previously locked. 4952219089Spjd */ 4953219089Spjd while (pl > pl0) 4954219089Spjd page_unlock(*--pl); 4955219089Spjd } else { 4956219089Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4957219089Spjd } 4958219089Spjd 4959219089Spjd *pl = NULL; 4960219089Spjd 4961219089Spjd ZFS_EXIT(zfsvfs); 4962219089Spjd return (err); 4963219089Spjd} 4964219089Spjd 4965219089Spjd/* 4966219089Spjd * Request a memory map for a section of a file. This code interacts 4967219089Spjd * with common code and the VM system as follows: 4968219089Spjd * 4969251631Sdelphij * - common code calls mmap(), which ends up in smmap_common() 4970251631Sdelphij * - this calls VOP_MAP(), which takes you into (say) zfs 4971251631Sdelphij * - zfs_map() calls as_map(), passing segvn_create() as the callback 4972251631Sdelphij * - segvn_create() creates the new segment and calls VOP_ADDMAP() 4973251631Sdelphij * - zfs_addmap() updates z_mapcnt 4974219089Spjd */ 4975219089Spjd/*ARGSUSED*/ 4976219089Spjdstatic int 4977219089Spjdzfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 4978219089Spjd size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4979219089Spjd caller_context_t *ct) 4980219089Spjd{ 4981219089Spjd znode_t *zp = VTOZ(vp); 4982219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4983219089Spjd segvn_crargs_t vn_a; 4984219089Spjd int error; 4985219089Spjd 4986219089Spjd ZFS_ENTER(zfsvfs); 4987219089Spjd ZFS_VERIFY_ZP(zp); 4988219089Spjd 4989219089Spjd if ((prot & PROT_WRITE) && (zp->z_pflags & 4990219089Spjd (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) { 4991219089Spjd ZFS_EXIT(zfsvfs); 4992249195Smm return (SET_ERROR(EPERM)); 4993219089Spjd } 4994219089Spjd 4995219089Spjd if ((prot & (PROT_READ | PROT_EXEC)) && 4996219089Spjd (zp->z_pflags & ZFS_AV_QUARANTINED)) { 4997219089Spjd ZFS_EXIT(zfsvfs); 4998249195Smm return (SET_ERROR(EACCES)); 4999219089Spjd } 5000219089Spjd 5001219089Spjd if (vp->v_flag & VNOMAP) { 5002219089Spjd ZFS_EXIT(zfsvfs); 5003249195Smm return (SET_ERROR(ENOSYS)); 5004219089Spjd } 5005219089Spjd 5006219089Spjd if (off < 0 || len > MAXOFFSET_T - off) { 5007219089Spjd ZFS_EXIT(zfsvfs); 5008249195Smm return (SET_ERROR(ENXIO)); 5009219089Spjd } 5010219089Spjd 5011219089Spjd if (vp->v_type != VREG) { 5012219089Spjd ZFS_EXIT(zfsvfs); 5013249195Smm return (SET_ERROR(ENODEV)); 5014219089Spjd } 5015219089Spjd 5016219089Spjd /* 5017219089Spjd * If file is locked, disallow mapping. 5018219089Spjd */ 5019219089Spjd if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) { 5020219089Spjd ZFS_EXIT(zfsvfs); 5021249195Smm return (SET_ERROR(EAGAIN)); 5022219089Spjd } 5023219089Spjd 5024219089Spjd as_rangelock(as); 5025219089Spjd error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 5026219089Spjd if (error != 0) { 5027219089Spjd as_rangeunlock(as); 5028219089Spjd ZFS_EXIT(zfsvfs); 5029219089Spjd return (error); 5030219089Spjd } 5031219089Spjd 5032219089Spjd vn_a.vp = vp; 5033219089Spjd vn_a.offset = (u_offset_t)off; 5034219089Spjd vn_a.type = flags & MAP_TYPE; 5035219089Spjd vn_a.prot = prot; 5036219089Spjd vn_a.maxprot = maxprot; 5037219089Spjd vn_a.cred = cr; 5038219089Spjd vn_a.amp = NULL; 5039219089Spjd vn_a.flags = flags & ~MAP_TYPE; 5040219089Spjd vn_a.szc = 0; 5041219089Spjd vn_a.lgrp_mem_policy_flags = 0; 5042219089Spjd 5043219089Spjd error = as_map(as, *addrp, len, segvn_create, &vn_a); 5044219089Spjd 5045219089Spjd as_rangeunlock(as); 5046219089Spjd ZFS_EXIT(zfsvfs); 5047219089Spjd return (error); 5048219089Spjd} 5049219089Spjd 5050219089Spjd/* ARGSUSED */ 5051219089Spjdstatic int 5052219089Spjdzfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 5053219089Spjd size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 5054219089Spjd caller_context_t *ct) 5055219089Spjd{ 5056219089Spjd uint64_t pages = btopr(len); 5057219089Spjd 5058219089Spjd atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); 5059219089Spjd return (0); 5060219089Spjd} 5061219089Spjd 5062219089Spjd/* 5063219089Spjd * The reason we push dirty pages as part of zfs_delmap() is so that we get a 5064219089Spjd * more accurate mtime for the associated file. Since we don't have a way of 5065219089Spjd * detecting when the data was actually modified, we have to resort to 5066219089Spjd * heuristics. If an explicit msync() is done, then we mark the mtime when the 5067219089Spjd * last page is pushed. The problem occurs when the msync() call is omitted, 5068219089Spjd * which by far the most common case: 5069219089Spjd * 5070268464Sdelphij * open() 5071268464Sdelphij * mmap() 5072268464Sdelphij * <modify memory> 5073268464Sdelphij * munmap() 5074268464Sdelphij * close() 5075268464Sdelphij * <time lapse> 5076268464Sdelphij * putpage() via fsflush 5077219089Spjd * 5078219089Spjd * If we wait until fsflush to come along, we can have a modification time that 5079219089Spjd * is some arbitrary point in the future. In order to prevent this in the 5080219089Spjd * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is 5081219089Spjd * torn down. 5082219089Spjd */ 5083219089Spjd/* ARGSUSED */ 5084219089Spjdstatic int 5085219089Spjdzfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 5086219089Spjd size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, 5087219089Spjd caller_context_t *ct) 5088219089Spjd{ 5089219089Spjd uint64_t pages = btopr(len); 5090219089Spjd 5091219089Spjd ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); 5092219089Spjd atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); 5093219089Spjd 5094219089Spjd if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && 5095219089Spjd vn_has_cached_data(vp)) 5096219089Spjd (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); 5097219089Spjd 5098219089Spjd return (0); 5099219089Spjd} 5100219089Spjd 5101219089Spjd/* 5102219089Spjd * Free or allocate space in a file. Currently, this function only 5103219089Spjd * supports the `F_FREESP' command. However, this command is somewhat 5104219089Spjd * misnamed, as its functionality includes the ability to allocate as 5105219089Spjd * well as free space. 5106219089Spjd * 5107219089Spjd * IN: vp - vnode of file to free data in. 5108219089Spjd * cmd - action to take (only F_FREESP supported). 5109219089Spjd * bfp - section of file to free/alloc. 5110219089Spjd * flag - current file open mode flags. 5111219089Spjd * offset - current file offset. 5112219089Spjd * cr - credentials of caller [UNUSED]. 5113219089Spjd * ct - caller context. 5114219089Spjd * 5115251631Sdelphij * RETURN: 0 on success, error code on failure. 5116219089Spjd * 5117219089Spjd * Timestamps: 5118219089Spjd * vp - ctime|mtime updated 5119219089Spjd */ 5120219089Spjd/* ARGSUSED */ 5121219089Spjdstatic int 5122219089Spjdzfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, 5123219089Spjd offset_t offset, cred_t *cr, caller_context_t *ct) 5124219089Spjd{ 5125219089Spjd znode_t *zp = VTOZ(vp); 5126219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5127219089Spjd uint64_t off, len; 5128219089Spjd int error; 5129219089Spjd 5130219089Spjd ZFS_ENTER(zfsvfs); 5131219089Spjd ZFS_VERIFY_ZP(zp); 5132219089Spjd 5133219089Spjd if (cmd != F_FREESP) { 5134219089Spjd ZFS_EXIT(zfsvfs); 5135249195Smm return (SET_ERROR(EINVAL)); 5136219089Spjd } 5137219089Spjd 5138262990Sdelphij /* 5139262990Sdelphij * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 5140262990Sdelphij * callers might not be able to detect properly that we are read-only, 5141262990Sdelphij * so check it explicitly here. 5142262990Sdelphij */ 5143262990Sdelphij if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 5144262990Sdelphij ZFS_EXIT(zfsvfs); 5145262990Sdelphij return (SET_ERROR(EROFS)); 5146262990Sdelphij } 5147262990Sdelphij 5148219089Spjd if (error = convoff(vp, bfp, 0, offset)) { 5149219089Spjd ZFS_EXIT(zfsvfs); 5150219089Spjd return (error); 5151219089Spjd } 5152219089Spjd 5153219089Spjd if (bfp->l_len < 0) { 5154219089Spjd ZFS_EXIT(zfsvfs); 5155249195Smm return (SET_ERROR(EINVAL)); 5156219089Spjd } 5157219089Spjd 5158219089Spjd off = bfp->l_start; 5159219089Spjd len = bfp->l_len; /* 0 means from off to end of file */ 5160219089Spjd 5161219089Spjd error = zfs_freesp(zp, off, len, flag, TRUE); 5162219089Spjd 5163219089Spjd ZFS_EXIT(zfsvfs); 5164219089Spjd return (error); 5165219089Spjd} 5166219089Spjd#endif /* sun */ 5167219089Spjd 5168168404SpjdCTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 5169168404SpjdCTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 5170168404Spjd 5171185029Spjd/*ARGSUSED*/ 5172168404Spjdstatic int 5173185029Spjdzfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 5174168404Spjd{ 5175168404Spjd znode_t *zp = VTOZ(vp); 5176168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5177185029Spjd uint32_t gen; 5178219089Spjd uint64_t gen64; 5179168404Spjd uint64_t object = zp->z_id; 5180168404Spjd zfid_short_t *zfid; 5181219089Spjd int size, i, error; 5182168404Spjd 5183168404Spjd ZFS_ENTER(zfsvfs); 5184185029Spjd ZFS_VERIFY_ZP(zp); 5185168404Spjd 5186219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 5187219089Spjd &gen64, sizeof (uint64_t))) != 0) { 5188219089Spjd ZFS_EXIT(zfsvfs); 5189219089Spjd return (error); 5190219089Spjd } 5191219089Spjd 5192219089Spjd gen = (uint32_t)gen64; 5193219089Spjd 5194168404Spjd size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 5195249195Smm 5196249195Smm#ifdef illumos 5197249195Smm if (fidp->fid_len < size) { 5198249195Smm fidp->fid_len = size; 5199249195Smm ZFS_EXIT(zfsvfs); 5200249195Smm return (SET_ERROR(ENOSPC)); 5201249195Smm } 5202249195Smm#else 5203168404Spjd fidp->fid_len = size; 5204249195Smm#endif 5205168404Spjd 5206168404Spjd zfid = (zfid_short_t *)fidp; 5207168404Spjd 5208168404Spjd zfid->zf_len = size; 5209168404Spjd 5210168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 5211168404Spjd zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 5212168404Spjd 5213168404Spjd /* Must have a non-zero generation number to distinguish from .zfs */ 5214168404Spjd if (gen == 0) 5215168404Spjd gen = 1; 5216168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 5217168404Spjd zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 5218168404Spjd 5219168404Spjd if (size == LONG_FID_LEN) { 5220168404Spjd uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 5221169023Spjd zfid_long_t *zlfid; 5222168404Spjd 5223168404Spjd zlfid = (zfid_long_t *)fidp; 5224168404Spjd 5225168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 5226168404Spjd zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 5227168404Spjd 5228168404Spjd /* XXX - this should be the generation number for the objset */ 5229168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 5230168404Spjd zlfid->zf_setgen[i] = 0; 5231168404Spjd } 5232168404Spjd 5233168404Spjd ZFS_EXIT(zfsvfs); 5234168404Spjd return (0); 5235168404Spjd} 5236168404Spjd 5237168404Spjdstatic int 5238185029Spjdzfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 5239185029Spjd caller_context_t *ct) 5240168404Spjd{ 5241168404Spjd znode_t *zp, *xzp; 5242168404Spjd zfsvfs_t *zfsvfs; 5243168404Spjd zfs_dirlock_t *dl; 5244168404Spjd int error; 5245168404Spjd 5246168404Spjd switch (cmd) { 5247168404Spjd case _PC_LINK_MAX: 5248168404Spjd *valp = INT_MAX; 5249168404Spjd return (0); 5250168404Spjd 5251168404Spjd case _PC_FILESIZEBITS: 5252168404Spjd *valp = 64; 5253168404Spjd return (0); 5254219089Spjd#ifdef sun 5255168404Spjd case _PC_XATTR_EXISTS: 5256168404Spjd zp = VTOZ(vp); 5257168404Spjd zfsvfs = zp->z_zfsvfs; 5258168404Spjd ZFS_ENTER(zfsvfs); 5259185029Spjd ZFS_VERIFY_ZP(zp); 5260168404Spjd *valp = 0; 5261168404Spjd error = zfs_dirent_lock(&dl, zp, "", &xzp, 5262185029Spjd ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 5263168404Spjd if (error == 0) { 5264168404Spjd zfs_dirent_unlock(dl); 5265168404Spjd if (!zfs_dirempty(xzp)) 5266168404Spjd *valp = 1; 5267168404Spjd VN_RELE(ZTOV(xzp)); 5268168404Spjd } else if (error == ENOENT) { 5269168404Spjd /* 5270168404Spjd * If there aren't extended attributes, it's the 5271168404Spjd * same as having zero of them. 5272168404Spjd */ 5273168404Spjd error = 0; 5274168404Spjd } 5275168404Spjd ZFS_EXIT(zfsvfs); 5276168404Spjd return (error); 5277168404Spjd 5278219089Spjd case _PC_SATTR_ENABLED: 5279219089Spjd case _PC_SATTR_EXISTS: 5280219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 5281219089Spjd (vp->v_type == VREG || vp->v_type == VDIR); 5282219089Spjd return (0); 5283219089Spjd 5284219089Spjd case _PC_ACCESS_FILTERING: 5285219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 5286219089Spjd vp->v_type == VDIR; 5287219089Spjd return (0); 5288219089Spjd 5289219089Spjd case _PC_ACL_ENABLED: 5290219089Spjd *valp = _ACL_ACE_ENABLED; 5291219089Spjd return (0); 5292219089Spjd#endif /* sun */ 5293219089Spjd case _PC_MIN_HOLE_SIZE: 5294219089Spjd *valp = (int)SPA_MINBLOCKSIZE; 5295219089Spjd return (0); 5296219089Spjd#ifdef sun 5297219089Spjd case _PC_TIMESTAMP_RESOLUTION: 5298219089Spjd /* nanosecond timestamp resolution */ 5299219089Spjd *valp = 1L; 5300219089Spjd return (0); 5301219089Spjd#endif /* sun */ 5302168404Spjd case _PC_ACL_EXTENDED: 5303196949Strasz *valp = 0; 5304168404Spjd return (0); 5305168404Spjd 5306196949Strasz case _PC_ACL_NFS4: 5307196949Strasz *valp = 1; 5308196949Strasz return (0); 5309196949Strasz 5310196949Strasz case _PC_ACL_PATH_MAX: 5311196949Strasz *valp = ACL_MAX_ENTRIES; 5312196949Strasz return (0); 5313196949Strasz 5314168404Spjd default: 5315168962Spjd return (EOPNOTSUPP); 5316168404Spjd } 5317168404Spjd} 5318168404Spjd 5319168404Spjd/*ARGSUSED*/ 5320168404Spjdstatic int 5321185029Spjdzfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5322185029Spjd caller_context_t *ct) 5323168404Spjd{ 5324168404Spjd znode_t *zp = VTOZ(vp); 5325168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5326168404Spjd int error; 5327185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5328168404Spjd 5329168404Spjd ZFS_ENTER(zfsvfs); 5330185029Spjd ZFS_VERIFY_ZP(zp); 5331185029Spjd error = zfs_getacl(zp, vsecp, skipaclchk, cr); 5332168404Spjd ZFS_EXIT(zfsvfs); 5333168404Spjd 5334168404Spjd return (error); 5335168404Spjd} 5336168404Spjd 5337168404Spjd/*ARGSUSED*/ 5338228685Spjdint 5339185029Spjdzfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5340185029Spjd caller_context_t *ct) 5341168404Spjd{ 5342168404Spjd znode_t *zp = VTOZ(vp); 5343168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5344168404Spjd int error; 5345185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5346219089Spjd zilog_t *zilog = zfsvfs->z_log; 5347168404Spjd 5348168404Spjd ZFS_ENTER(zfsvfs); 5349185029Spjd ZFS_VERIFY_ZP(zp); 5350219089Spjd 5351185029Spjd error = zfs_setacl(zp, vsecp, skipaclchk, cr); 5352219089Spjd 5353219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5354219089Spjd zil_commit(zilog, 0); 5355219089Spjd 5356168404Spjd ZFS_EXIT(zfsvfs); 5357168404Spjd return (error); 5358168404Spjd} 5359168404Spjd 5360219089Spjd#ifdef sun 5361219089Spjd/* 5362251631Sdelphij * The smallest read we may consider to loan out an arcbuf. 5363251631Sdelphij * This must be a power of 2. 5364219089Spjd */ 5365219089Spjdint zcr_blksz_min = (1 << 10); /* 1K */ 5366251631Sdelphij/* 5367251631Sdelphij * If set to less than the file block size, allow loaning out of an 5368251631Sdelphij * arcbuf for a partial block read. This must be a power of 2. 5369251631Sdelphij */ 5370219089Spjdint zcr_blksz_max = (1 << 17); /* 128K */ 5371219089Spjd 5372219089Spjd/*ARGSUSED*/ 5373168962Spjdstatic int 5374219089Spjdzfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, 5375219089Spjd caller_context_t *ct) 5376219089Spjd{ 5377219089Spjd znode_t *zp = VTOZ(vp); 5378219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5379219089Spjd int max_blksz = zfsvfs->z_max_blksz; 5380219089Spjd uio_t *uio = &xuio->xu_uio; 5381219089Spjd ssize_t size = uio->uio_resid; 5382219089Spjd offset_t offset = uio->uio_loffset; 5383219089Spjd int blksz; 5384219089Spjd int fullblk, i; 5385219089Spjd arc_buf_t *abuf; 5386219089Spjd ssize_t maxsize; 5387219089Spjd int preamble, postamble; 5388219089Spjd 5389219089Spjd if (xuio->xu_type != UIOTYPE_ZEROCOPY) 5390249195Smm return (SET_ERROR(EINVAL)); 5391219089Spjd 5392219089Spjd ZFS_ENTER(zfsvfs); 5393219089Spjd ZFS_VERIFY_ZP(zp); 5394219089Spjd switch (ioflag) { 5395219089Spjd case UIO_WRITE: 5396219089Spjd /* 5397219089Spjd * Loan out an arc_buf for write if write size is bigger than 5398219089Spjd * max_blksz, and the file's block size is also max_blksz. 5399219089Spjd */ 5400219089Spjd blksz = max_blksz; 5401219089Spjd if (size < blksz || zp->z_blksz != blksz) { 5402219089Spjd ZFS_EXIT(zfsvfs); 5403249195Smm return (SET_ERROR(EINVAL)); 5404219089Spjd } 5405219089Spjd /* 5406219089Spjd * Caller requests buffers for write before knowing where the 5407219089Spjd * write offset might be (e.g. NFS TCP write). 5408219089Spjd */ 5409219089Spjd if (offset == -1) { 5410219089Spjd preamble = 0; 5411219089Spjd } else { 5412219089Spjd preamble = P2PHASE(offset, blksz); 5413219089Spjd if (preamble) { 5414219089Spjd preamble = blksz - preamble; 5415219089Spjd size -= preamble; 5416219089Spjd } 5417219089Spjd } 5418219089Spjd 5419219089Spjd postamble = P2PHASE(size, blksz); 5420219089Spjd size -= postamble; 5421219089Spjd 5422219089Spjd fullblk = size / blksz; 5423219089Spjd (void) dmu_xuio_init(xuio, 5424219089Spjd (preamble != 0) + fullblk + (postamble != 0)); 5425219089Spjd DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble, 5426219089Spjd int, postamble, int, 5427219089Spjd (preamble != 0) + fullblk + (postamble != 0)); 5428219089Spjd 5429219089Spjd /* 5430219089Spjd * Have to fix iov base/len for partial buffers. They 5431219089Spjd * currently represent full arc_buf's. 5432219089Spjd */ 5433219089Spjd if (preamble) { 5434219089Spjd /* data begins in the middle of the arc_buf */ 5435219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5436219089Spjd blksz); 5437219089Spjd ASSERT(abuf); 5438219089Spjd (void) dmu_xuio_add(xuio, abuf, 5439219089Spjd blksz - preamble, preamble); 5440219089Spjd } 5441219089Spjd 5442219089Spjd for (i = 0; i < fullblk; i++) { 5443219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5444219089Spjd blksz); 5445219089Spjd ASSERT(abuf); 5446219089Spjd (void) dmu_xuio_add(xuio, abuf, 0, blksz); 5447219089Spjd } 5448219089Spjd 5449219089Spjd if (postamble) { 5450219089Spjd /* data ends in the middle of the arc_buf */ 5451219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5452219089Spjd blksz); 5453219089Spjd ASSERT(abuf); 5454219089Spjd (void) dmu_xuio_add(xuio, abuf, 0, postamble); 5455219089Spjd } 5456219089Spjd break; 5457219089Spjd case UIO_READ: 5458219089Spjd /* 5459219089Spjd * Loan out an arc_buf for read if the read size is larger than 5460219089Spjd * the current file block size. Block alignment is not 5461219089Spjd * considered. Partial arc_buf will be loaned out for read. 5462219089Spjd */ 5463219089Spjd blksz = zp->z_blksz; 5464219089Spjd if (blksz < zcr_blksz_min) 5465219089Spjd blksz = zcr_blksz_min; 5466219089Spjd if (blksz > zcr_blksz_max) 5467219089Spjd blksz = zcr_blksz_max; 5468219089Spjd /* avoid potential complexity of dealing with it */ 5469219089Spjd if (blksz > max_blksz) { 5470219089Spjd ZFS_EXIT(zfsvfs); 5471249195Smm return (SET_ERROR(EINVAL)); 5472219089Spjd } 5473219089Spjd 5474219089Spjd maxsize = zp->z_size - uio->uio_loffset; 5475219089Spjd if (size > maxsize) 5476219089Spjd size = maxsize; 5477219089Spjd 5478219089Spjd if (size < blksz || vn_has_cached_data(vp)) { 5479219089Spjd ZFS_EXIT(zfsvfs); 5480249195Smm return (SET_ERROR(EINVAL)); 5481219089Spjd } 5482219089Spjd break; 5483219089Spjd default: 5484219089Spjd ZFS_EXIT(zfsvfs); 5485249195Smm return (SET_ERROR(EINVAL)); 5486219089Spjd } 5487219089Spjd 5488219089Spjd uio->uio_extflg = UIO_XUIO; 5489219089Spjd XUIO_XUZC_RW(xuio) = ioflag; 5490219089Spjd ZFS_EXIT(zfsvfs); 5491219089Spjd return (0); 5492219089Spjd} 5493219089Spjd 5494219089Spjd/*ARGSUSED*/ 5495219089Spjdstatic int 5496219089Spjdzfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) 5497219089Spjd{ 5498219089Spjd int i; 5499219089Spjd arc_buf_t *abuf; 5500219089Spjd int ioflag = XUIO_XUZC_RW(xuio); 5501219089Spjd 5502219089Spjd ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); 5503219089Spjd 5504219089Spjd i = dmu_xuio_cnt(xuio); 5505219089Spjd while (i-- > 0) { 5506219089Spjd abuf = dmu_xuio_arcbuf(xuio, i); 5507219089Spjd /* 5508219089Spjd * if abuf == NULL, it must be a write buffer 5509219089Spjd * that has been returned in zfs_write(). 5510219089Spjd */ 5511219089Spjd if (abuf) 5512219089Spjd dmu_return_arcbuf(abuf); 5513219089Spjd ASSERT(abuf || ioflag == UIO_WRITE); 5514219089Spjd } 5515219089Spjd 5516219089Spjd dmu_xuio_fini(xuio); 5517219089Spjd return (0); 5518219089Spjd} 5519219089Spjd 5520219089Spjd/* 5521219089Spjd * Predeclare these here so that the compiler assumes that 5522219089Spjd * this is an "old style" function declaration that does 5523219089Spjd * not include arguments => we won't get type mismatch errors 5524219089Spjd * in the initializations that follow. 5525219089Spjd */ 5526219089Spjdstatic int zfs_inval(); 5527219089Spjdstatic int zfs_isdir(); 5528219089Spjd 5529219089Spjdstatic int 5530219089Spjdzfs_inval() 5531219089Spjd{ 5532249195Smm return (SET_ERROR(EINVAL)); 5533219089Spjd} 5534219089Spjd 5535219089Spjdstatic int 5536219089Spjdzfs_isdir() 5537219089Spjd{ 5538249195Smm return (SET_ERROR(EISDIR)); 5539219089Spjd} 5540219089Spjd/* 5541219089Spjd * Directory vnode operations template 5542219089Spjd */ 5543219089Spjdvnodeops_t *zfs_dvnodeops; 5544219089Spjdconst fs_operation_def_t zfs_dvnodeops_template[] = { 5545219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5546219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5547219089Spjd VOPNAME_READ, { .error = zfs_isdir }, 5548219089Spjd VOPNAME_WRITE, { .error = zfs_isdir }, 5549219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5550219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5551219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5552219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5553219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5554219089Spjd VOPNAME_CREATE, { .vop_create = zfs_create }, 5555219089Spjd VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5556219089Spjd VOPNAME_LINK, { .vop_link = zfs_link }, 5557219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5558219089Spjd VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, 5559219089Spjd VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5560219089Spjd VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5561219089Spjd VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, 5562219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5563219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5564219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5565219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5566219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5567219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5568219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5569268464Sdelphij VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5570219089Spjd NULL, NULL 5571219089Spjd}; 5572219089Spjd 5573219089Spjd/* 5574219089Spjd * Regular file vnode operations template 5575219089Spjd */ 5576219089Spjdvnodeops_t *zfs_fvnodeops; 5577219089Spjdconst fs_operation_def_t zfs_fvnodeops_template[] = { 5578219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5579219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5580219089Spjd VOPNAME_READ, { .vop_read = zfs_read }, 5581219089Spjd VOPNAME_WRITE, { .vop_write = zfs_write }, 5582219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5583219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5584219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5585219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5586219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5587219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5588219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5589219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5590219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5591219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5592219089Spjd VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, 5593219089Spjd VOPNAME_SPACE, { .vop_space = zfs_space }, 5594219089Spjd VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, 5595219089Spjd VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, 5596219089Spjd VOPNAME_MAP, { .vop_map = zfs_map }, 5597219089Spjd VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, 5598219089Spjd VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, 5599219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5600219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5601219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5602219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5603268464Sdelphij VOPNAME_REQZCBUF, { .vop_reqzcbuf = zfs_reqzcbuf }, 5604268464Sdelphij VOPNAME_RETZCBUF, { .vop_retzcbuf = zfs_retzcbuf }, 5605219089Spjd NULL, NULL 5606219089Spjd}; 5607219089Spjd 5608219089Spjd/* 5609219089Spjd * Symbolic link vnode operations template 5610219089Spjd */ 5611219089Spjdvnodeops_t *zfs_symvnodeops; 5612219089Spjdconst fs_operation_def_t zfs_symvnodeops_template[] = { 5613219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5614219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5615219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5616219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5617219089Spjd VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, 5618219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5619219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5620219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5621219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5622219089Spjd NULL, NULL 5623219089Spjd}; 5624219089Spjd 5625219089Spjd/* 5626219089Spjd * special share hidden files vnode operations template 5627219089Spjd */ 5628219089Spjdvnodeops_t *zfs_sharevnodeops; 5629219089Spjdconst fs_operation_def_t zfs_sharevnodeops_template[] = { 5630219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5631219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5632219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5633219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5634219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5635219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5636219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5637219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5638219089Spjd NULL, NULL 5639219089Spjd}; 5640219089Spjd 5641219089Spjd/* 5642219089Spjd * Extended attribute directory vnode operations template 5643251631Sdelphij * 5644251631Sdelphij * This template is identical to the directory vnodes 5645251631Sdelphij * operation template except for restricted operations: 5646251631Sdelphij * VOP_MKDIR() 5647251631Sdelphij * VOP_SYMLINK() 5648251631Sdelphij * 5649219089Spjd * Note that there are other restrictions embedded in: 5650219089Spjd * zfs_create() - restrict type to VREG 5651219089Spjd * zfs_link() - no links into/out of attribute space 5652219089Spjd * zfs_rename() - no moves into/out of attribute space 5653219089Spjd */ 5654219089Spjdvnodeops_t *zfs_xdvnodeops; 5655219089Spjdconst fs_operation_def_t zfs_xdvnodeops_template[] = { 5656219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5657219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5658219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5659219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5660219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5661219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5662219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5663219089Spjd VOPNAME_CREATE, { .vop_create = zfs_create }, 5664219089Spjd VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5665219089Spjd VOPNAME_LINK, { .vop_link = zfs_link }, 5666219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5667219089Spjd VOPNAME_MKDIR, { .error = zfs_inval }, 5668219089Spjd VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5669219089Spjd VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5670219089Spjd VOPNAME_SYMLINK, { .error = zfs_inval }, 5671219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5672219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5673219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5674219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5675219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5676219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5677219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5678219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5679219089Spjd NULL, NULL 5680219089Spjd}; 5681219089Spjd 5682219089Spjd/* 5683219089Spjd * Error vnode operations template 5684219089Spjd */ 5685219089Spjdvnodeops_t *zfs_evnodeops; 5686219089Spjdconst fs_operation_def_t zfs_evnodeops_template[] = { 5687219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5688219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5689219089Spjd NULL, NULL 5690219089Spjd}; 5691219089Spjd#endif /* sun */ 5692219089Spjd 5693219089Spjdstatic int 5694213673Spjdioflags(int ioflags) 5695213673Spjd{ 5696213673Spjd int flags = 0; 5697213673Spjd 5698213673Spjd if (ioflags & IO_APPEND) 5699213673Spjd flags |= FAPPEND; 5700213673Spjd if (ioflags & IO_NDELAY) 5701213673Spjd flags |= FNONBLOCK; 5702213673Spjd if (ioflags & IO_SYNC) 5703213673Spjd flags |= (FSYNC | FDSYNC | FRSYNC); 5704213673Spjd 5705213673Spjd return (flags); 5706213673Spjd} 5707213673Spjd 5708213673Spjdstatic int 5709213937Savgzfs_getpages(struct vnode *vp, vm_page_t *m, int count, int reqpage) 5710213937Savg{ 5711213937Savg znode_t *zp = VTOZ(vp); 5712213937Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5713213937Savg objset_t *os = zp->z_zfsvfs->z_os; 5714243517Savg vm_page_t mfirst, mlast, mreq; 5715213937Savg vm_object_t object; 5716213937Savg caddr_t va; 5717213937Savg struct sf_buf *sf; 5718243517Savg off_t startoff, endoff; 5719213937Savg int i, error; 5720243517Savg vm_pindex_t reqstart, reqend; 5721243517Savg int pcount, lsize, reqsize, size; 5722213937Savg 5723213937Savg ZFS_ENTER(zfsvfs); 5724213937Savg ZFS_VERIFY_ZP(zp); 5725213937Savg 5726243517Savg pcount = OFF_TO_IDX(round_page(count)); 5727213937Savg mreq = m[reqpage]; 5728213937Savg object = mreq->object; 5729213937Savg error = 0; 5730213937Savg 5731213937Savg KASSERT(vp->v_object == object, ("mismatching object")); 5732213937Savg 5733243517Savg if (pcount > 1 && zp->z_blksz > PAGESIZE) { 5734243517Savg startoff = rounddown(IDX_TO_OFF(mreq->pindex), zp->z_blksz); 5735243517Savg reqstart = OFF_TO_IDX(round_page(startoff)); 5736243517Savg if (reqstart < m[0]->pindex) 5737243517Savg reqstart = 0; 5738243517Savg else 5739243517Savg reqstart = reqstart - m[0]->pindex; 5740243517Savg endoff = roundup(IDX_TO_OFF(mreq->pindex) + PAGE_SIZE, 5741243517Savg zp->z_blksz); 5742243517Savg reqend = OFF_TO_IDX(trunc_page(endoff)) - 1; 5743243517Savg if (reqend > m[pcount - 1]->pindex) 5744243517Savg reqend = m[pcount - 1]->pindex; 5745243517Savg reqsize = reqend - m[reqstart]->pindex + 1; 5746243517Savg KASSERT(reqstart <= reqpage && reqpage < reqstart + reqsize, 5747243517Savg ("reqpage beyond [reqstart, reqstart + reqsize[ bounds")); 5748243517Savg } else { 5749243517Savg reqstart = reqpage; 5750243517Savg reqsize = 1; 5751243517Savg } 5752243517Savg mfirst = m[reqstart]; 5753243517Savg mlast = m[reqstart + reqsize - 1]; 5754243517Savg 5755248084Sattilio zfs_vmobject_wlock(object); 5756213937Savg 5757243517Savg for (i = 0; i < reqstart; i++) { 5758243517Savg vm_page_lock(m[i]); 5759243517Savg vm_page_free(m[i]); 5760243517Savg vm_page_unlock(m[i]); 5761213937Savg } 5762243517Savg for (i = reqstart + reqsize; i < pcount; i++) { 5763243517Savg vm_page_lock(m[i]); 5764243517Savg vm_page_free(m[i]); 5765243517Savg vm_page_unlock(m[i]); 5766243517Savg } 5767213937Savg 5768243517Savg if (mreq->valid && reqsize == 1) { 5769213937Savg if (mreq->valid != VM_PAGE_BITS_ALL) 5770213937Savg vm_page_zero_invalid(mreq, TRUE); 5771248084Sattilio zfs_vmobject_wunlock(object); 5772213937Savg ZFS_EXIT(zfsvfs); 5773248084Sattilio return (zfs_vm_pagerret_ok); 5774213937Savg } 5775213937Savg 5776213937Savg PCPU_INC(cnt.v_vnodein); 5777243517Savg PCPU_ADD(cnt.v_vnodepgsin, reqsize); 5778213937Savg 5779213937Savg if (IDX_TO_OFF(mreq->pindex) >= object->un_pager.vnp.vnp_size) { 5780243517Savg for (i = reqstart; i < reqstart + reqsize; i++) { 5781243517Savg if (i != reqpage) { 5782243517Savg vm_page_lock(m[i]); 5783243517Savg vm_page_free(m[i]); 5784243517Savg vm_page_unlock(m[i]); 5785243517Savg } 5786243517Savg } 5787248084Sattilio zfs_vmobject_wunlock(object); 5788213937Savg ZFS_EXIT(zfsvfs); 5789248084Sattilio return (zfs_vm_pagerret_bad); 5790213937Savg } 5791213937Savg 5792243517Savg lsize = PAGE_SIZE; 5793243517Savg if (IDX_TO_OFF(mlast->pindex) + lsize > object->un_pager.vnp.vnp_size) 5794243517Savg lsize = object->un_pager.vnp.vnp_size - IDX_TO_OFF(mlast->pindex); 5795213937Savg 5796248084Sattilio zfs_vmobject_wunlock(object); 5797243517Savg 5798243517Savg for (i = reqstart; i < reqstart + reqsize; i++) { 5799243517Savg size = PAGE_SIZE; 5800243517Savg if (i == (reqstart + reqsize - 1)) 5801243517Savg size = lsize; 5802243517Savg va = zfs_map_page(m[i], &sf); 5803243517Savg error = dmu_read(os, zp->z_id, IDX_TO_OFF(m[i]->pindex), 5804243517Savg size, va, DMU_READ_PREFETCH); 5805243517Savg if (size != PAGE_SIZE) 5806243517Savg bzero(va + size, PAGE_SIZE - size); 5807243517Savg zfs_unmap_page(sf); 5808243517Savg if (error != 0) 5809243517Savg break; 5810243517Savg } 5811243517Savg 5812248084Sattilio zfs_vmobject_wlock(object); 5813213937Savg 5814243517Savg for (i = reqstart; i < reqstart + reqsize; i++) { 5815243763Savg if (!error) 5816243763Savg m[i]->valid = VM_PAGE_BITS_ALL; 5817243517Savg KASSERT(m[i]->dirty == 0, ("zfs_getpages: page %p is dirty", m[i])); 5818243763Savg if (i != reqpage) 5819243763Savg vm_page_readahead_finish(m[i]); 5820243517Savg } 5821243517Savg 5822248084Sattilio zfs_vmobject_wunlock(object); 5823213937Savg 5824213937Savg ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 5825213937Savg ZFS_EXIT(zfsvfs); 5826248084Sattilio return (error ? zfs_vm_pagerret_error : zfs_vm_pagerret_ok); 5827213937Savg} 5828213937Savg 5829213937Savgstatic int 5830213937Savgzfs_freebsd_getpages(ap) 5831213937Savg struct vop_getpages_args /* { 5832213937Savg struct vnode *a_vp; 5833213937Savg vm_page_t *a_m; 5834213937Savg int a_count; 5835213937Savg int a_reqpage; 5836213937Savg } */ *ap; 5837213937Savg{ 5838213937Savg 5839213937Savg return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage)); 5840213937Savg} 5841213937Savg 5842213937Savgstatic int 5843258746Savgzfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, 5844258746Savg int *rtvals) 5845258746Savg{ 5846258746Savg znode_t *zp = VTOZ(vp); 5847258746Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5848258746Savg rl_t *rl; 5849258746Savg dmu_tx_t *tx; 5850258746Savg struct sf_buf *sf; 5851258746Savg vm_object_t object; 5852258746Savg vm_page_t m; 5853258746Savg caddr_t va; 5854258746Savg size_t tocopy; 5855258746Savg size_t lo_len; 5856258746Savg vm_ooffset_t lo_off; 5857258746Savg vm_ooffset_t off; 5858258746Savg uint_t blksz; 5859258746Savg int ncount; 5860258746Savg int pcount; 5861258746Savg int err; 5862258746Savg int i; 5863258746Savg 5864258746Savg ZFS_ENTER(zfsvfs); 5865258746Savg ZFS_VERIFY_ZP(zp); 5866258746Savg 5867258746Savg object = vp->v_object; 5868258746Savg pcount = btoc(len); 5869258746Savg ncount = pcount; 5870258746Savg 5871258746Savg KASSERT(ma[0]->object == object, ("mismatching object")); 5872258746Savg KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length")); 5873258746Savg 5874258746Savg for (i = 0; i < pcount; i++) 5875258746Savg rtvals[i] = zfs_vm_pagerret_error; 5876258746Savg 5877258746Savg off = IDX_TO_OFF(ma[0]->pindex); 5878258746Savg blksz = zp->z_blksz; 5879258746Savg lo_off = rounddown(off, blksz); 5880258746Savg lo_len = roundup(len + (off - lo_off), blksz); 5881258746Savg rl = zfs_range_lock(zp, lo_off, lo_len, RL_WRITER); 5882258746Savg 5883258746Savg zfs_vmobject_wlock(object); 5884258746Savg if (len + off > object->un_pager.vnp.vnp_size) { 5885258746Savg if (object->un_pager.vnp.vnp_size > off) { 5886258746Savg int pgoff; 5887258746Savg 5888258746Savg len = object->un_pager.vnp.vnp_size - off; 5889258746Savg ncount = btoc(len); 5890258746Savg if ((pgoff = (int)len & PAGE_MASK) != 0) { 5891258746Savg /* 5892258746Savg * If the object is locked and the following 5893258746Savg * conditions hold, then the page's dirty 5894258746Savg * field cannot be concurrently changed by a 5895258746Savg * pmap operation. 5896258746Savg */ 5897258746Savg m = ma[ncount - 1]; 5898258746Savg vm_page_assert_sbusied(m); 5899258746Savg KASSERT(!pmap_page_is_write_mapped(m), 5900258746Savg ("zfs_putpages: page %p is not read-only", m)); 5901258746Savg vm_page_clear_dirty(m, pgoff, PAGE_SIZE - 5902258746Savg pgoff); 5903258746Savg } 5904258746Savg } else { 5905258746Savg len = 0; 5906258746Savg ncount = 0; 5907258746Savg } 5908258746Savg if (ncount < pcount) { 5909258746Savg for (i = ncount; i < pcount; i++) { 5910258746Savg rtvals[i] = zfs_vm_pagerret_bad; 5911258746Savg } 5912258746Savg } 5913258746Savg } 5914258746Savg zfs_vmobject_wunlock(object); 5915258746Savg 5916258746Savg if (ncount == 0) 5917258746Savg goto out; 5918258746Savg 5919258746Savg if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 5920258746Savg zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 5921258746Savg goto out; 5922258746Savg } 5923258746Savg 5924258746Savgtop: 5925258746Savg tx = dmu_tx_create(zfsvfs->z_os); 5926258746Savg dmu_tx_hold_write(tx, zp->z_id, off, len); 5927258746Savg 5928258746Savg dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 5929258746Savg zfs_sa_upgrade_txholds(tx, zp); 5930258746Savg err = dmu_tx_assign(tx, TXG_NOWAIT); 5931258746Savg if (err != 0) { 5932258746Savg if (err == ERESTART) { 5933258746Savg dmu_tx_wait(tx); 5934258746Savg dmu_tx_abort(tx); 5935258746Savg goto top; 5936258746Savg } 5937258746Savg dmu_tx_abort(tx); 5938258746Savg goto out; 5939258746Savg } 5940258746Savg 5941258746Savg if (zp->z_blksz < PAGE_SIZE) { 5942258746Savg i = 0; 5943258746Savg for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) { 5944258746Savg tocopy = len > PAGE_SIZE ? PAGE_SIZE : len; 5945258746Savg va = zfs_map_page(ma[i], &sf); 5946258746Savg dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx); 5947258746Savg zfs_unmap_page(sf); 5948258746Savg } 5949258746Savg } else { 5950258746Savg err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx); 5951258746Savg } 5952258746Savg 5953258746Savg if (err == 0) { 5954258746Savg uint64_t mtime[2], ctime[2]; 5955258746Savg sa_bulk_attr_t bulk[3]; 5956258746Savg int count = 0; 5957258746Savg 5958258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 5959258746Savg &mtime, 16); 5960258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 5961258746Savg &ctime, 16); 5962258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 5963258746Savg &zp->z_pflags, 8); 5964258746Savg zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 5965258746Savg B_TRUE); 5966258746Savg zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 5967258746Savg 5968258746Savg zfs_vmobject_wlock(object); 5969258746Savg for (i = 0; i < ncount; i++) { 5970258746Savg rtvals[i] = zfs_vm_pagerret_ok; 5971258746Savg vm_page_undirty(ma[i]); 5972258746Savg } 5973258746Savg zfs_vmobject_wunlock(object); 5974258746Savg PCPU_INC(cnt.v_vnodeout); 5975258746Savg PCPU_ADD(cnt.v_vnodepgsout, ncount); 5976258746Savg } 5977258746Savg dmu_tx_commit(tx); 5978258746Savg 5979258746Savgout: 5980258746Savg zfs_range_unlock(rl); 5981258746Savg if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 || 5982258746Savg zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5983258746Savg zil_commit(zfsvfs->z_log, zp->z_id); 5984258746Savg ZFS_EXIT(zfsvfs); 5985258746Savg return (rtvals[0]); 5986258746Savg} 5987258746Savg 5988258746Savgint 5989258746Savgzfs_freebsd_putpages(ap) 5990258746Savg struct vop_putpages_args /* { 5991258746Savg struct vnode *a_vp; 5992258746Savg vm_page_t *a_m; 5993258746Savg int a_count; 5994258746Savg int a_sync; 5995258746Savg int *a_rtvals; 5996258746Savg } */ *ap; 5997258746Savg{ 5998258746Savg 5999258746Savg return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, 6000258746Savg ap->a_rtvals)); 6001258746Savg} 6002258746Savg 6003258746Savgstatic int 6004243518Savgzfs_freebsd_bmap(ap) 6005243518Savg struct vop_bmap_args /* { 6006243518Savg struct vnode *a_vp; 6007243518Savg daddr_t a_bn; 6008243518Savg struct bufobj **a_bop; 6009243518Savg daddr_t *a_bnp; 6010243518Savg int *a_runp; 6011243518Savg int *a_runb; 6012243518Savg } */ *ap; 6013243518Savg{ 6014243518Savg 6015243518Savg if (ap->a_bop != NULL) 6016243518Savg *ap->a_bop = &ap->a_vp->v_bufobj; 6017243518Savg if (ap->a_bnp != NULL) 6018243518Savg *ap->a_bnp = ap->a_bn; 6019243518Savg if (ap->a_runp != NULL) 6020243518Savg *ap->a_runp = 0; 6021243518Savg if (ap->a_runb != NULL) 6022243518Savg *ap->a_runb = 0; 6023243518Savg 6024243518Savg return (0); 6025243518Savg} 6026243518Savg 6027243518Savgstatic int 6028168962Spjdzfs_freebsd_open(ap) 6029168962Spjd struct vop_open_args /* { 6030168962Spjd struct vnode *a_vp; 6031168962Spjd int a_mode; 6032168962Spjd struct ucred *a_cred; 6033168962Spjd struct thread *a_td; 6034168962Spjd } */ *ap; 6035168962Spjd{ 6036168962Spjd vnode_t *vp = ap->a_vp; 6037168962Spjd znode_t *zp = VTOZ(vp); 6038168962Spjd int error; 6039168962Spjd 6040185029Spjd error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); 6041168962Spjd if (error == 0) 6042219089Spjd vnode_create_vobject(vp, zp->z_size, ap->a_td); 6043168962Spjd return (error); 6044168962Spjd} 6045168962Spjd 6046168962Spjdstatic int 6047168962Spjdzfs_freebsd_close(ap) 6048168962Spjd struct vop_close_args /* { 6049168962Spjd struct vnode *a_vp; 6050168962Spjd int a_fflag; 6051168962Spjd struct ucred *a_cred; 6052168962Spjd struct thread *a_td; 6053168962Spjd } */ *ap; 6054168962Spjd{ 6055168962Spjd 6056242566Savg return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred, NULL)); 6057168962Spjd} 6058168962Spjd 6059168962Spjdstatic int 6060168962Spjdzfs_freebsd_ioctl(ap) 6061168962Spjd struct vop_ioctl_args /* { 6062168962Spjd struct vnode *a_vp; 6063168962Spjd u_long a_command; 6064168962Spjd caddr_t a_data; 6065168962Spjd int a_fflag; 6066168962Spjd struct ucred *cred; 6067168962Spjd struct thread *td; 6068168962Spjd } */ *ap; 6069168962Spjd{ 6070168962Spjd 6071168978Spjd return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 6072185029Spjd ap->a_fflag, ap->a_cred, NULL, NULL)); 6073168962Spjd} 6074168962Spjd 6075168962Spjdstatic int 6076168962Spjdzfs_freebsd_read(ap) 6077168962Spjd struct vop_read_args /* { 6078168962Spjd struct vnode *a_vp; 6079168962Spjd struct uio *a_uio; 6080168962Spjd int a_ioflag; 6081168962Spjd struct ucred *a_cred; 6082168962Spjd } */ *ap; 6083168962Spjd{ 6084168962Spjd 6085213673Spjd return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 6086213673Spjd ap->a_cred, NULL)); 6087168962Spjd} 6088168962Spjd 6089168962Spjdstatic int 6090168962Spjdzfs_freebsd_write(ap) 6091168962Spjd struct vop_write_args /* { 6092168962Spjd struct vnode *a_vp; 6093168962Spjd struct uio *a_uio; 6094168962Spjd int a_ioflag; 6095168962Spjd struct ucred *a_cred; 6096168962Spjd } */ *ap; 6097168962Spjd{ 6098168962Spjd 6099213673Spjd return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 6100213673Spjd ap->a_cred, NULL)); 6101168962Spjd} 6102168962Spjd 6103168962Spjdstatic int 6104168962Spjdzfs_freebsd_access(ap) 6105168962Spjd struct vop_access_args /* { 6106168962Spjd struct vnode *a_vp; 6107192689Strasz accmode_t a_accmode; 6108168962Spjd struct ucred *a_cred; 6109168962Spjd struct thread *a_td; 6110168962Spjd } */ *ap; 6111168962Spjd{ 6112212002Sjh vnode_t *vp = ap->a_vp; 6113212002Sjh znode_t *zp = VTOZ(vp); 6114198703Spjd accmode_t accmode; 6115198703Spjd int error = 0; 6116168962Spjd 6117185172Spjd /* 6118198703Spjd * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 6119185172Spjd */ 6120198703Spjd accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 6121198703Spjd if (accmode != 0) 6122198703Spjd error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL); 6123185172Spjd 6124198703Spjd /* 6125198703Spjd * VADMIN has to be handled by vaccess(). 6126198703Spjd */ 6127198703Spjd if (error == 0) { 6128198703Spjd accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 6129198703Spjd if (accmode != 0) { 6130219089Spjd error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 6131219089Spjd zp->z_gid, accmode, ap->a_cred, NULL); 6132198703Spjd } 6133185172Spjd } 6134185172Spjd 6135212002Sjh /* 6136212002Sjh * For VEXEC, ensure that at least one execute bit is set for 6137212002Sjh * non-directories. 6138212002Sjh */ 6139212002Sjh if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 6140219089Spjd (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 6141212002Sjh error = EACCES; 6142219089Spjd } 6143212002Sjh 6144198703Spjd return (error); 6145168962Spjd} 6146168962Spjd 6147168962Spjdstatic int 6148168962Spjdzfs_freebsd_lookup(ap) 6149168962Spjd struct vop_lookup_args /* { 6150168962Spjd struct vnode *a_dvp; 6151168962Spjd struct vnode **a_vpp; 6152168962Spjd struct componentname *a_cnp; 6153168962Spjd } */ *ap; 6154168962Spjd{ 6155168962Spjd struct componentname *cnp = ap->a_cnp; 6156168962Spjd char nm[NAME_MAX + 1]; 6157168962Spjd 6158168962Spjd ASSERT(cnp->cn_namelen < sizeof(nm)); 6159168962Spjd strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 6160168962Spjd 6161168962Spjd return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 6162185029Spjd cnp->cn_cred, cnp->cn_thread, 0)); 6163168962Spjd} 6164168962Spjd 6165168962Spjdstatic int 6166168962Spjdzfs_freebsd_create(ap) 6167168962Spjd struct vop_create_args /* { 6168168962Spjd struct vnode *a_dvp; 6169168962Spjd struct vnode **a_vpp; 6170168962Spjd struct componentname *a_cnp; 6171168962Spjd struct vattr *a_vap; 6172168962Spjd } */ *ap; 6173168962Spjd{ 6174168962Spjd struct componentname *cnp = ap->a_cnp; 6175168962Spjd vattr_t *vap = ap->a_vap; 6176168962Spjd int mode; 6177168962Spjd 6178168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6179168962Spjd 6180168962Spjd vattr_init_mask(vap); 6181168962Spjd mode = vap->va_mode & ALLPERMS; 6182168962Spjd 6183168962Spjd return (zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 6184185029Spjd ap->a_vpp, cnp->cn_cred, cnp->cn_thread)); 6185168962Spjd} 6186168962Spjd 6187168962Spjdstatic int 6188168962Spjdzfs_freebsd_remove(ap) 6189168962Spjd struct vop_remove_args /* { 6190168962Spjd struct vnode *a_dvp; 6191168962Spjd struct vnode *a_vp; 6192168962Spjd struct componentname *a_cnp; 6193168962Spjd } */ *ap; 6194168962Spjd{ 6195168962Spjd 6196168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 6197168962Spjd 6198168962Spjd return (zfs_remove(ap->a_dvp, ap->a_cnp->cn_nameptr, 6199185029Spjd ap->a_cnp->cn_cred, NULL, 0)); 6200168962Spjd} 6201168962Spjd 6202168962Spjdstatic int 6203168962Spjdzfs_freebsd_mkdir(ap) 6204168962Spjd struct vop_mkdir_args /* { 6205168962Spjd struct vnode *a_dvp; 6206168962Spjd struct vnode **a_vpp; 6207168962Spjd struct componentname *a_cnp; 6208168962Spjd struct vattr *a_vap; 6209168962Spjd } */ *ap; 6210168962Spjd{ 6211168962Spjd vattr_t *vap = ap->a_vap; 6212168962Spjd 6213168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 6214168962Spjd 6215168962Spjd vattr_init_mask(vap); 6216168962Spjd 6217168962Spjd return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 6218185029Spjd ap->a_cnp->cn_cred, NULL, 0, NULL)); 6219168962Spjd} 6220168962Spjd 6221168962Spjdstatic int 6222168962Spjdzfs_freebsd_rmdir(ap) 6223168962Spjd struct vop_rmdir_args /* { 6224168962Spjd struct vnode *a_dvp; 6225168962Spjd struct vnode *a_vp; 6226168962Spjd struct componentname *a_cnp; 6227168962Spjd } */ *ap; 6228168962Spjd{ 6229168962Spjd struct componentname *cnp = ap->a_cnp; 6230168962Spjd 6231168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6232168962Spjd 6233185029Spjd return (zfs_rmdir(ap->a_dvp, cnp->cn_nameptr, NULL, cnp->cn_cred, NULL, 0)); 6234168962Spjd} 6235168962Spjd 6236168962Spjdstatic int 6237168962Spjdzfs_freebsd_readdir(ap) 6238168962Spjd struct vop_readdir_args /* { 6239168962Spjd struct vnode *a_vp; 6240168962Spjd struct uio *a_uio; 6241168962Spjd struct ucred *a_cred; 6242168962Spjd int *a_eofflag; 6243168962Spjd int *a_ncookies; 6244168962Spjd u_long **a_cookies; 6245168962Spjd } */ *ap; 6246168962Spjd{ 6247168962Spjd 6248168962Spjd return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 6249168962Spjd ap->a_ncookies, ap->a_cookies)); 6250168962Spjd} 6251168962Spjd 6252168962Spjdstatic int 6253168962Spjdzfs_freebsd_fsync(ap) 6254168962Spjd struct vop_fsync_args /* { 6255168962Spjd struct vnode *a_vp; 6256168962Spjd int a_waitfor; 6257168962Spjd struct thread *a_td; 6258168962Spjd } */ *ap; 6259168962Spjd{ 6260168962Spjd 6261168962Spjd vop_stdfsync(ap); 6262185029Spjd return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); 6263168962Spjd} 6264168962Spjd 6265168962Spjdstatic int 6266168962Spjdzfs_freebsd_getattr(ap) 6267168962Spjd struct vop_getattr_args /* { 6268168962Spjd struct vnode *a_vp; 6269168962Spjd struct vattr *a_vap; 6270168962Spjd struct ucred *a_cred; 6271168962Spjd } */ *ap; 6272168962Spjd{ 6273185029Spjd vattr_t *vap = ap->a_vap; 6274185029Spjd xvattr_t xvap; 6275185029Spjd u_long fflags = 0; 6276185029Spjd int error; 6277168962Spjd 6278185029Spjd xva_init(&xvap); 6279185029Spjd xvap.xva_vattr = *vap; 6280185029Spjd xvap.xva_vattr.va_mask |= AT_XVATTR; 6281185029Spjd 6282185029Spjd /* Convert chflags into ZFS-type flags. */ 6283185029Spjd /* XXX: what about SF_SETTABLE?. */ 6284185029Spjd XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 6285185029Spjd XVA_SET_REQ(&xvap, XAT_APPENDONLY); 6286185029Spjd XVA_SET_REQ(&xvap, XAT_NOUNLINK); 6287185029Spjd XVA_SET_REQ(&xvap, XAT_NODUMP); 6288254627Sken XVA_SET_REQ(&xvap, XAT_READONLY); 6289254627Sken XVA_SET_REQ(&xvap, XAT_ARCHIVE); 6290254627Sken XVA_SET_REQ(&xvap, XAT_SYSTEM); 6291254627Sken XVA_SET_REQ(&xvap, XAT_HIDDEN); 6292254627Sken XVA_SET_REQ(&xvap, XAT_REPARSE); 6293254627Sken XVA_SET_REQ(&xvap, XAT_OFFLINE); 6294254627Sken XVA_SET_REQ(&xvap, XAT_SPARSE); 6295254627Sken 6296185029Spjd error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 6297185029Spjd if (error != 0) 6298185029Spjd return (error); 6299185029Spjd 6300185029Spjd /* Convert ZFS xattr into chflags. */ 6301185029Spjd#define FLAG_CHECK(fflag, xflag, xfield) do { \ 6302185029Spjd if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 6303185029Spjd fflags |= (fflag); \ 6304185029Spjd} while (0) 6305185029Spjd FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 6306185029Spjd xvap.xva_xoptattrs.xoa_immutable); 6307185029Spjd FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 6308185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 6309185029Spjd FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 6310185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 6311254627Sken FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE, 6312254627Sken xvap.xva_xoptattrs.xoa_archive); 6313185029Spjd FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 6314185029Spjd xvap.xva_xoptattrs.xoa_nodump); 6315254627Sken FLAG_CHECK(UF_READONLY, XAT_READONLY, 6316254627Sken xvap.xva_xoptattrs.xoa_readonly); 6317254627Sken FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM, 6318254627Sken xvap.xva_xoptattrs.xoa_system); 6319254627Sken FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN, 6320254627Sken xvap.xva_xoptattrs.xoa_hidden); 6321254627Sken FLAG_CHECK(UF_REPARSE, XAT_REPARSE, 6322254627Sken xvap.xva_xoptattrs.xoa_reparse); 6323254627Sken FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE, 6324254627Sken xvap.xva_xoptattrs.xoa_offline); 6325254627Sken FLAG_CHECK(UF_SPARSE, XAT_SPARSE, 6326254627Sken xvap.xva_xoptattrs.xoa_sparse); 6327254627Sken 6328185029Spjd#undef FLAG_CHECK 6329185029Spjd *vap = xvap.xva_vattr; 6330185029Spjd vap->va_flags = fflags; 6331185029Spjd return (0); 6332168962Spjd} 6333168962Spjd 6334168962Spjdstatic int 6335168962Spjdzfs_freebsd_setattr(ap) 6336168962Spjd struct vop_setattr_args /* { 6337168962Spjd struct vnode *a_vp; 6338168962Spjd struct vattr *a_vap; 6339168962Spjd struct ucred *a_cred; 6340168962Spjd } */ *ap; 6341168962Spjd{ 6342185172Spjd vnode_t *vp = ap->a_vp; 6343168962Spjd vattr_t *vap = ap->a_vap; 6344185172Spjd cred_t *cred = ap->a_cred; 6345185029Spjd xvattr_t xvap; 6346185029Spjd u_long fflags; 6347185029Spjd uint64_t zflags; 6348168962Spjd 6349168962Spjd vattr_init_mask(vap); 6350170044Spjd vap->va_mask &= ~AT_NOSET; 6351168962Spjd 6352185029Spjd xva_init(&xvap); 6353185029Spjd xvap.xva_vattr = *vap; 6354185029Spjd 6355219089Spjd zflags = VTOZ(vp)->z_pflags; 6356185172Spjd 6357185029Spjd if (vap->va_flags != VNOVAL) { 6358197683Sdelphij zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 6359185172Spjd int error; 6360185172Spjd 6361197683Sdelphij if (zfsvfs->z_use_fuids == B_FALSE) 6362197683Sdelphij return (EOPNOTSUPP); 6363197683Sdelphij 6364185029Spjd fflags = vap->va_flags; 6365254627Sken /* 6366254627Sken * XXX KDM 6367254627Sken * We need to figure out whether it makes sense to allow 6368254627Sken * UF_REPARSE through, since we don't really have other 6369254627Sken * facilities to handle reparse points and zfs_setattr() 6370254627Sken * doesn't currently allow setting that attribute anyway. 6371254627Sken */ 6372254627Sken if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE| 6373254627Sken UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE| 6374254627Sken UF_OFFLINE|UF_SPARSE)) != 0) 6375185029Spjd return (EOPNOTSUPP); 6376185172Spjd /* 6377185172Spjd * Unprivileged processes are not permitted to unset system 6378185172Spjd * flags, or modify flags if any system flags are set. 6379185172Spjd * Privileged non-jail processes may not modify system flags 6380185172Spjd * if securelevel > 0 and any existing system flags are set. 6381185172Spjd * Privileged jail processes behave like privileged non-jail 6382185172Spjd * processes if the security.jail.chflags_allowed sysctl is 6383185172Spjd * is non-zero; otherwise, they behave like unprivileged 6384185172Spjd * processes. 6385185172Spjd */ 6386197861Spjd if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 6387197861Spjd priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) { 6388185172Spjd if (zflags & 6389185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 6390185172Spjd error = securelevel_gt(cred, 0); 6391197861Spjd if (error != 0) 6392185172Spjd return (error); 6393185172Spjd } 6394185172Spjd } else { 6395197861Spjd /* 6396197861Spjd * Callers may only modify the file flags on objects they 6397197861Spjd * have VADMIN rights for. 6398197861Spjd */ 6399197861Spjd if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0) 6400197861Spjd return (error); 6401185172Spjd if (zflags & 6402185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 6403185172Spjd return (EPERM); 6404185172Spjd } 6405185172Spjd if (fflags & 6406185172Spjd (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 6407185172Spjd return (EPERM); 6408185172Spjd } 6409185172Spjd } 6410185029Spjd 6411185029Spjd#define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 6412185029Spjd if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 6413185029Spjd ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 6414185029Spjd XVA_SET_REQ(&xvap, (xflag)); \ 6415185029Spjd (xfield) = ((fflags & (fflag)) != 0); \ 6416185029Spjd } \ 6417185029Spjd} while (0) 6418185029Spjd /* Convert chflags into ZFS-type flags. */ 6419185029Spjd /* XXX: what about SF_SETTABLE?. */ 6420185029Spjd FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 6421185029Spjd xvap.xva_xoptattrs.xoa_immutable); 6422185029Spjd FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 6423185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 6424185029Spjd FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 6425185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 6426254627Sken FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE, 6427254627Sken xvap.xva_xoptattrs.xoa_archive); 6428185029Spjd FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 6429185172Spjd xvap.xva_xoptattrs.xoa_nodump); 6430254627Sken FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY, 6431254627Sken xvap.xva_xoptattrs.xoa_readonly); 6432254627Sken FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM, 6433254627Sken xvap.xva_xoptattrs.xoa_system); 6434254627Sken FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN, 6435254627Sken xvap.xva_xoptattrs.xoa_hidden); 6436254627Sken FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE, 6437254627Sken xvap.xva_xoptattrs.xoa_hidden); 6438254627Sken FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE, 6439254627Sken xvap.xva_xoptattrs.xoa_offline); 6440254627Sken FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE, 6441254627Sken xvap.xva_xoptattrs.xoa_sparse); 6442185029Spjd#undef FLAG_CHANGE 6443185029Spjd } 6444185172Spjd return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL)); 6445168962Spjd} 6446168962Spjd 6447168962Spjdstatic int 6448168962Spjdzfs_freebsd_rename(ap) 6449168962Spjd struct vop_rename_args /* { 6450168962Spjd struct vnode *a_fdvp; 6451168962Spjd struct vnode *a_fvp; 6452168962Spjd struct componentname *a_fcnp; 6453168962Spjd struct vnode *a_tdvp; 6454168962Spjd struct vnode *a_tvp; 6455168962Spjd struct componentname *a_tcnp; 6456168962Spjd } */ *ap; 6457168962Spjd{ 6458168962Spjd vnode_t *fdvp = ap->a_fdvp; 6459168962Spjd vnode_t *fvp = ap->a_fvp; 6460168962Spjd vnode_t *tdvp = ap->a_tdvp; 6461168962Spjd vnode_t *tvp = ap->a_tvp; 6462168962Spjd int error; 6463168962Spjd 6464192237Skmacy ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 6465192237Skmacy ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 6466168962Spjd 6467255748Sdavide /* 6468255748Sdavide * Check for cross-device rename. 6469255748Sdavide */ 6470255748Sdavide if ((fdvp->v_mount != tdvp->v_mount) || 6471255748Sdavide (tvp && (fdvp->v_mount != tvp->v_mount))) 6472255748Sdavide error = EXDEV; 6473255748Sdavide else 6474254982Sdelphij error = zfs_rename(fdvp, ap->a_fcnp->cn_nameptr, tdvp, 6475254982Sdelphij ap->a_tcnp->cn_nameptr, ap->a_fcnp->cn_cred, NULL, 0); 6476168962Spjd if (tdvp == tvp) 6477168962Spjd VN_RELE(tdvp); 6478168962Spjd else 6479168962Spjd VN_URELE(tdvp); 6480168962Spjd if (tvp) 6481168962Spjd VN_URELE(tvp); 6482168962Spjd VN_RELE(fdvp); 6483168962Spjd VN_RELE(fvp); 6484168962Spjd 6485168962Spjd return (error); 6486168962Spjd} 6487168962Spjd 6488168962Spjdstatic int 6489168962Spjdzfs_freebsd_symlink(ap) 6490168962Spjd struct vop_symlink_args /* { 6491168962Spjd struct vnode *a_dvp; 6492168962Spjd struct vnode **a_vpp; 6493168962Spjd struct componentname *a_cnp; 6494168962Spjd struct vattr *a_vap; 6495168962Spjd char *a_target; 6496168962Spjd } */ *ap; 6497168962Spjd{ 6498168962Spjd struct componentname *cnp = ap->a_cnp; 6499168962Spjd vattr_t *vap = ap->a_vap; 6500168962Spjd 6501168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6502168962Spjd 6503168962Spjd vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 6504168962Spjd vattr_init_mask(vap); 6505168962Spjd 6506168962Spjd return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 6507168962Spjd ap->a_target, cnp->cn_cred, cnp->cn_thread)); 6508168962Spjd} 6509168962Spjd 6510168962Spjdstatic int 6511168962Spjdzfs_freebsd_readlink(ap) 6512168962Spjd struct vop_readlink_args /* { 6513168962Spjd struct vnode *a_vp; 6514168962Spjd struct uio *a_uio; 6515168962Spjd struct ucred *a_cred; 6516168962Spjd } */ *ap; 6517168962Spjd{ 6518168962Spjd 6519185029Spjd return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 6520168962Spjd} 6521168962Spjd 6522168962Spjdstatic int 6523168962Spjdzfs_freebsd_link(ap) 6524168962Spjd struct vop_link_args /* { 6525168962Spjd struct vnode *a_tdvp; 6526168962Spjd struct vnode *a_vp; 6527168962Spjd struct componentname *a_cnp; 6528168962Spjd } */ *ap; 6529168962Spjd{ 6530168962Spjd struct componentname *cnp = ap->a_cnp; 6531254982Sdelphij vnode_t *vp = ap->a_vp; 6532254982Sdelphij vnode_t *tdvp = ap->a_tdvp; 6533168962Spjd 6534254982Sdelphij if (tdvp->v_mount != vp->v_mount) 6535254982Sdelphij return (EXDEV); 6536254982Sdelphij 6537168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6538168962Spjd 6539254982Sdelphij return (zfs_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); 6540168962Spjd} 6541168962Spjd 6542168962Spjdstatic int 6543168962Spjdzfs_freebsd_inactive(ap) 6544169170Spjd struct vop_inactive_args /* { 6545169170Spjd struct vnode *a_vp; 6546169170Spjd struct thread *a_td; 6547169170Spjd } */ *ap; 6548168962Spjd{ 6549168962Spjd vnode_t *vp = ap->a_vp; 6550168962Spjd 6551185029Spjd zfs_inactive(vp, ap->a_td->td_ucred, NULL); 6552168962Spjd return (0); 6553168962Spjd} 6554168962Spjd 6555168962Spjdstatic int 6556168962Spjdzfs_freebsd_reclaim(ap) 6557168962Spjd struct vop_reclaim_args /* { 6558168962Spjd struct vnode *a_vp; 6559168962Spjd struct thread *a_td; 6560168962Spjd } */ *ap; 6561168962Spjd{ 6562169170Spjd vnode_t *vp = ap->a_vp; 6563168962Spjd znode_t *zp = VTOZ(vp); 6564197133Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6565168962Spjd 6566169025Spjd ASSERT(zp != NULL); 6567169025Spjd 6568243520Savg /* Destroy the vm object and flush associated pages. */ 6569243520Savg vnode_destroy_vobject(vp); 6570243520Savg 6571168962Spjd /* 6572243520Savg * z_teardown_inactive_lock protects from a race with 6573243520Savg * zfs_znode_dmu_fini in zfsvfs_teardown during 6574243520Savg * force unmount. 6575168962Spjd */ 6576243520Savg rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 6577243520Savg if (zp->z_sa_hdl == NULL) 6578196301Spjd zfs_znode_free(zp); 6579243520Savg else 6580243520Savg zfs_zinactive(zp); 6581243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 6582185029Spjd 6583168962Spjd vp->v_data = NULL; 6584168962Spjd return (0); 6585168962Spjd} 6586168962Spjd 6587168962Spjdstatic int 6588168962Spjdzfs_freebsd_fid(ap) 6589168962Spjd struct vop_fid_args /* { 6590168962Spjd struct vnode *a_vp; 6591168962Spjd struct fid *a_fid; 6592168962Spjd } */ *ap; 6593168962Spjd{ 6594168962Spjd 6595185029Spjd return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 6596168962Spjd} 6597168962Spjd 6598168962Spjdstatic int 6599168962Spjdzfs_freebsd_pathconf(ap) 6600168962Spjd struct vop_pathconf_args /* { 6601168962Spjd struct vnode *a_vp; 6602168962Spjd int a_name; 6603168962Spjd register_t *a_retval; 6604168962Spjd } */ *ap; 6605168962Spjd{ 6606168962Spjd ulong_t val; 6607168962Spjd int error; 6608168962Spjd 6609185029Spjd error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); 6610168962Spjd if (error == 0) 6611168962Spjd *ap->a_retval = val; 6612168962Spjd else if (error == EOPNOTSUPP) 6613168962Spjd error = vop_stdpathconf(ap); 6614168962Spjd return (error); 6615168962Spjd} 6616168962Spjd 6617196949Straszstatic int 6618196949Straszzfs_freebsd_fifo_pathconf(ap) 6619196949Strasz struct vop_pathconf_args /* { 6620196949Strasz struct vnode *a_vp; 6621196949Strasz int a_name; 6622196949Strasz register_t *a_retval; 6623196949Strasz } */ *ap; 6624196949Strasz{ 6625196949Strasz 6626196949Strasz switch (ap->a_name) { 6627196949Strasz case _PC_ACL_EXTENDED: 6628196949Strasz case _PC_ACL_NFS4: 6629196949Strasz case _PC_ACL_PATH_MAX: 6630196949Strasz case _PC_MAC_PRESENT: 6631196949Strasz return (zfs_freebsd_pathconf(ap)); 6632196949Strasz default: 6633196949Strasz return (fifo_specops.vop_pathconf(ap)); 6634196949Strasz } 6635196949Strasz} 6636196949Strasz 6637185029Spjd/* 6638185029Spjd * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 6639185029Spjd * extended attribute name: 6640185029Spjd * 6641185029Spjd * NAMESPACE PREFIX 6642185029Spjd * system freebsd:system: 6643185029Spjd * user (none, can be used to access ZFS fsattr(5) attributes 6644185029Spjd * created on Solaris) 6645185029Spjd */ 6646185029Spjdstatic int 6647185029Spjdzfs_create_attrname(int attrnamespace, const char *name, char *attrname, 6648185029Spjd size_t size) 6649185029Spjd{ 6650185029Spjd const char *namespace, *prefix, *suffix; 6651185029Spjd 6652185029Spjd /* We don't allow '/' character in attribute name. */ 6653185029Spjd if (strchr(name, '/') != NULL) 6654185029Spjd return (EINVAL); 6655185029Spjd /* We don't allow attribute names that start with "freebsd:" string. */ 6656185029Spjd if (strncmp(name, "freebsd:", 8) == 0) 6657185029Spjd return (EINVAL); 6658185029Spjd 6659185029Spjd bzero(attrname, size); 6660185029Spjd 6661185029Spjd switch (attrnamespace) { 6662185029Spjd case EXTATTR_NAMESPACE_USER: 6663185029Spjd#if 0 6664185029Spjd prefix = "freebsd:"; 6665185029Spjd namespace = EXTATTR_NAMESPACE_USER_STRING; 6666185029Spjd suffix = ":"; 6667185029Spjd#else 6668185029Spjd /* 6669185029Spjd * This is the default namespace by which we can access all 6670185029Spjd * attributes created on Solaris. 6671185029Spjd */ 6672185029Spjd prefix = namespace = suffix = ""; 6673185029Spjd#endif 6674185029Spjd break; 6675185029Spjd case EXTATTR_NAMESPACE_SYSTEM: 6676185029Spjd prefix = "freebsd:"; 6677185029Spjd namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 6678185029Spjd suffix = ":"; 6679185029Spjd break; 6680185029Spjd case EXTATTR_NAMESPACE_EMPTY: 6681185029Spjd default: 6682185029Spjd return (EINVAL); 6683185029Spjd } 6684185029Spjd if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 6685185029Spjd name) >= size) { 6686185029Spjd return (ENAMETOOLONG); 6687185029Spjd } 6688185029Spjd return (0); 6689185029Spjd} 6690185029Spjd 6691185029Spjd/* 6692185029Spjd * Vnode operating to retrieve a named extended attribute. 6693185029Spjd */ 6694185029Spjdstatic int 6695185029Spjdzfs_getextattr(struct vop_getextattr_args *ap) 6696185029Spjd/* 6697185029Spjdvop_getextattr { 6698185029Spjd IN struct vnode *a_vp; 6699185029Spjd IN int a_attrnamespace; 6700185029Spjd IN const char *a_name; 6701185029Spjd INOUT struct uio *a_uio; 6702185029Spjd OUT size_t *a_size; 6703185029Spjd IN struct ucred *a_cred; 6704185029Spjd IN struct thread *a_td; 6705185029Spjd}; 6706185029Spjd*/ 6707185029Spjd{ 6708185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6709185029Spjd struct thread *td = ap->a_td; 6710185029Spjd struct nameidata nd; 6711185029Spjd char attrname[255]; 6712185029Spjd struct vattr va; 6713185029Spjd vnode_t *xvp = NULL, *vp; 6714185029Spjd int error, flags; 6715185029Spjd 6716195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6717195785Strasz ap->a_cred, ap->a_td, VREAD); 6718195785Strasz if (error != 0) 6719195785Strasz return (error); 6720195785Strasz 6721185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6722185029Spjd sizeof(attrname)); 6723185029Spjd if (error != 0) 6724185029Spjd return (error); 6725185029Spjd 6726185029Spjd ZFS_ENTER(zfsvfs); 6727185029Spjd 6728185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6729185029Spjd LOOKUP_XATTR); 6730185029Spjd if (error != 0) { 6731185029Spjd ZFS_EXIT(zfsvfs); 6732185029Spjd return (error); 6733185029Spjd } 6734185029Spjd 6735185029Spjd flags = FREAD; 6736241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 6737185029Spjd xvp, td); 6738194586Skib error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL); 6739185029Spjd vp = nd.ni_vp; 6740185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6741185029Spjd if (error != 0) { 6742196303Spjd ZFS_EXIT(zfsvfs); 6743195785Strasz if (error == ENOENT) 6744195785Strasz error = ENOATTR; 6745185029Spjd return (error); 6746185029Spjd } 6747185029Spjd 6748185029Spjd if (ap->a_size != NULL) { 6749185029Spjd error = VOP_GETATTR(vp, &va, ap->a_cred); 6750185029Spjd if (error == 0) 6751185029Spjd *ap->a_size = (size_t)va.va_size; 6752185029Spjd } else if (ap->a_uio != NULL) 6753224605Smm error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 6754185029Spjd 6755185029Spjd VOP_UNLOCK(vp, 0); 6756185029Spjd vn_close(vp, flags, ap->a_cred, td); 6757185029Spjd ZFS_EXIT(zfsvfs); 6758185029Spjd 6759185029Spjd return (error); 6760185029Spjd} 6761185029Spjd 6762185029Spjd/* 6763185029Spjd * Vnode operation to remove a named attribute. 6764185029Spjd */ 6765185029Spjdint 6766185029Spjdzfs_deleteextattr(struct vop_deleteextattr_args *ap) 6767185029Spjd/* 6768185029Spjdvop_deleteextattr { 6769185029Spjd IN struct vnode *a_vp; 6770185029Spjd IN int a_attrnamespace; 6771185029Spjd IN const char *a_name; 6772185029Spjd IN struct ucred *a_cred; 6773185029Spjd IN struct thread *a_td; 6774185029Spjd}; 6775185029Spjd*/ 6776185029Spjd{ 6777185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6778185029Spjd struct thread *td = ap->a_td; 6779185029Spjd struct nameidata nd; 6780185029Spjd char attrname[255]; 6781185029Spjd struct vattr va; 6782185029Spjd vnode_t *xvp = NULL, *vp; 6783185029Spjd int error, flags; 6784185029Spjd 6785195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6786195785Strasz ap->a_cred, ap->a_td, VWRITE); 6787195785Strasz if (error != 0) 6788195785Strasz return (error); 6789195785Strasz 6790185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6791185029Spjd sizeof(attrname)); 6792185029Spjd if (error != 0) 6793185029Spjd return (error); 6794185029Spjd 6795185029Spjd ZFS_ENTER(zfsvfs); 6796185029Spjd 6797185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6798185029Spjd LOOKUP_XATTR); 6799185029Spjd if (error != 0) { 6800185029Spjd ZFS_EXIT(zfsvfs); 6801185029Spjd return (error); 6802185029Spjd } 6803185029Spjd 6804241896Skib NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 6805185029Spjd UIO_SYSSPACE, attrname, xvp, td); 6806185029Spjd error = namei(&nd); 6807185029Spjd vp = nd.ni_vp; 6808185029Spjd if (error != 0) { 6809196303Spjd ZFS_EXIT(zfsvfs); 6810260706Savg NDFREE(&nd, NDF_ONLY_PNBUF); 6811195785Strasz if (error == ENOENT) 6812195785Strasz error = ENOATTR; 6813185029Spjd return (error); 6814185029Spjd } 6815260706Savg 6816185029Spjd error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 6817260706Savg NDFREE(&nd, NDF_ONLY_PNBUF); 6818185029Spjd 6819185029Spjd vput(nd.ni_dvp); 6820185029Spjd if (vp == nd.ni_dvp) 6821185029Spjd vrele(vp); 6822185029Spjd else 6823185029Spjd vput(vp); 6824185029Spjd ZFS_EXIT(zfsvfs); 6825185029Spjd 6826185029Spjd return (error); 6827185029Spjd} 6828185029Spjd 6829185029Spjd/* 6830185029Spjd * Vnode operation to set a named attribute. 6831185029Spjd */ 6832185029Spjdstatic int 6833185029Spjdzfs_setextattr(struct vop_setextattr_args *ap) 6834185029Spjd/* 6835185029Spjdvop_setextattr { 6836185029Spjd IN struct vnode *a_vp; 6837185029Spjd IN int a_attrnamespace; 6838185029Spjd IN const char *a_name; 6839185029Spjd INOUT struct uio *a_uio; 6840185029Spjd IN struct ucred *a_cred; 6841185029Spjd IN struct thread *a_td; 6842185029Spjd}; 6843185029Spjd*/ 6844185029Spjd{ 6845185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6846185029Spjd struct thread *td = ap->a_td; 6847185029Spjd struct nameidata nd; 6848185029Spjd char attrname[255]; 6849185029Spjd struct vattr va; 6850185029Spjd vnode_t *xvp = NULL, *vp; 6851185029Spjd int error, flags; 6852185029Spjd 6853195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6854195785Strasz ap->a_cred, ap->a_td, VWRITE); 6855195785Strasz if (error != 0) 6856195785Strasz return (error); 6857195785Strasz 6858185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6859185029Spjd sizeof(attrname)); 6860185029Spjd if (error != 0) 6861185029Spjd return (error); 6862185029Spjd 6863185029Spjd ZFS_ENTER(zfsvfs); 6864185029Spjd 6865185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6866195785Strasz LOOKUP_XATTR | CREATE_XATTR_DIR); 6867185029Spjd if (error != 0) { 6868185029Spjd ZFS_EXIT(zfsvfs); 6869185029Spjd return (error); 6870185029Spjd } 6871185029Spjd 6872185029Spjd flags = FFLAGS(O_WRONLY | O_CREAT); 6873241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 6874185029Spjd xvp, td); 6875194586Skib error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL); 6876185029Spjd vp = nd.ni_vp; 6877185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6878185029Spjd if (error != 0) { 6879185029Spjd ZFS_EXIT(zfsvfs); 6880185029Spjd return (error); 6881185029Spjd } 6882185029Spjd 6883185029Spjd VATTR_NULL(&va); 6884185029Spjd va.va_size = 0; 6885185029Spjd error = VOP_SETATTR(vp, &va, ap->a_cred); 6886185029Spjd if (error == 0) 6887268420Smav VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred); 6888185029Spjd 6889185029Spjd VOP_UNLOCK(vp, 0); 6890185029Spjd vn_close(vp, flags, ap->a_cred, td); 6891185029Spjd ZFS_EXIT(zfsvfs); 6892185029Spjd 6893185029Spjd return (error); 6894185029Spjd} 6895185029Spjd 6896185029Spjd/* 6897185029Spjd * Vnode operation to retrieve extended attributes on a vnode. 6898185029Spjd */ 6899185029Spjdstatic int 6900185029Spjdzfs_listextattr(struct vop_listextattr_args *ap) 6901185029Spjd/* 6902185029Spjdvop_listextattr { 6903185029Spjd IN struct vnode *a_vp; 6904185029Spjd IN int a_attrnamespace; 6905185029Spjd INOUT struct uio *a_uio; 6906185029Spjd OUT size_t *a_size; 6907185029Spjd IN struct ucred *a_cred; 6908185029Spjd IN struct thread *a_td; 6909185029Spjd}; 6910185029Spjd*/ 6911185029Spjd{ 6912185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6913185029Spjd struct thread *td = ap->a_td; 6914185029Spjd struct nameidata nd; 6915185029Spjd char attrprefix[16]; 6916185029Spjd u_char dirbuf[sizeof(struct dirent)]; 6917185029Spjd struct dirent *dp; 6918185029Spjd struct iovec aiov; 6919185029Spjd struct uio auio, *uio = ap->a_uio; 6920185029Spjd size_t *sizep = ap->a_size; 6921185029Spjd size_t plen; 6922185029Spjd vnode_t *xvp = NULL, *vp; 6923185029Spjd int done, error, eof, pos; 6924185029Spjd 6925195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6926195785Strasz ap->a_cred, ap->a_td, VREAD); 6927196303Spjd if (error != 0) 6928195785Strasz return (error); 6929195785Strasz 6930185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 6931185029Spjd sizeof(attrprefix)); 6932185029Spjd if (error != 0) 6933185029Spjd return (error); 6934185029Spjd plen = strlen(attrprefix); 6935185029Spjd 6936185029Spjd ZFS_ENTER(zfsvfs); 6937185029Spjd 6938195822Strasz if (sizep != NULL) 6939195822Strasz *sizep = 0; 6940195822Strasz 6941185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6942185029Spjd LOOKUP_XATTR); 6943185029Spjd if (error != 0) { 6944196303Spjd ZFS_EXIT(zfsvfs); 6945195785Strasz /* 6946195785Strasz * ENOATTR means that the EA directory does not yet exist, 6947195785Strasz * i.e. there are no extended attributes there. 6948195785Strasz */ 6949195785Strasz if (error == ENOATTR) 6950195785Strasz error = 0; 6951185029Spjd return (error); 6952185029Spjd } 6953185029Spjd 6954241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 6955188588Sjhb UIO_SYSSPACE, ".", xvp, td); 6956185029Spjd error = namei(&nd); 6957185029Spjd vp = nd.ni_vp; 6958185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6959185029Spjd if (error != 0) { 6960185029Spjd ZFS_EXIT(zfsvfs); 6961185029Spjd return (error); 6962185029Spjd } 6963185029Spjd 6964185029Spjd auio.uio_iov = &aiov; 6965185029Spjd auio.uio_iovcnt = 1; 6966185029Spjd auio.uio_segflg = UIO_SYSSPACE; 6967185029Spjd auio.uio_td = td; 6968185029Spjd auio.uio_rw = UIO_READ; 6969185029Spjd auio.uio_offset = 0; 6970185029Spjd 6971185029Spjd do { 6972185029Spjd u_char nlen; 6973185029Spjd 6974185029Spjd aiov.iov_base = (void *)dirbuf; 6975185029Spjd aiov.iov_len = sizeof(dirbuf); 6976185029Spjd auio.uio_resid = sizeof(dirbuf); 6977185029Spjd error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 6978185029Spjd done = sizeof(dirbuf) - auio.uio_resid; 6979185029Spjd if (error != 0) 6980185029Spjd break; 6981185029Spjd for (pos = 0; pos < done;) { 6982185029Spjd dp = (struct dirent *)(dirbuf + pos); 6983185029Spjd pos += dp->d_reclen; 6984185029Spjd /* 6985185029Spjd * XXX: Temporarily we also accept DT_UNKNOWN, as this 6986185029Spjd * is what we get when attribute was created on Solaris. 6987185029Spjd */ 6988185029Spjd if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 6989185029Spjd continue; 6990185029Spjd if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) 6991185029Spjd continue; 6992185029Spjd else if (strncmp(dp->d_name, attrprefix, plen) != 0) 6993185029Spjd continue; 6994185029Spjd nlen = dp->d_namlen - plen; 6995185029Spjd if (sizep != NULL) 6996185029Spjd *sizep += 1 + nlen; 6997185029Spjd else if (uio != NULL) { 6998185029Spjd /* 6999185029Spjd * Format of extattr name entry is one byte for 7000185029Spjd * length and the rest for name. 7001185029Spjd */ 7002185029Spjd error = uiomove(&nlen, 1, uio->uio_rw, uio); 7003185029Spjd if (error == 0) { 7004185029Spjd error = uiomove(dp->d_name + plen, nlen, 7005185029Spjd uio->uio_rw, uio); 7006185029Spjd } 7007185029Spjd if (error != 0) 7008185029Spjd break; 7009185029Spjd } 7010185029Spjd } 7011185029Spjd } while (!eof && error == 0); 7012185029Spjd 7013185029Spjd vput(vp); 7014185029Spjd ZFS_EXIT(zfsvfs); 7015185029Spjd 7016185029Spjd return (error); 7017185029Spjd} 7018185029Spjd 7019192800Straszint 7020192800Straszzfs_freebsd_getacl(ap) 7021192800Strasz struct vop_getacl_args /* { 7022192800Strasz struct vnode *vp; 7023192800Strasz acl_type_t type; 7024192800Strasz struct acl *aclp; 7025192800Strasz struct ucred *cred; 7026192800Strasz struct thread *td; 7027192800Strasz } */ *ap; 7028192800Strasz{ 7029192800Strasz int error; 7030192800Strasz vsecattr_t vsecattr; 7031192800Strasz 7032192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 7033197435Strasz return (EINVAL); 7034192800Strasz 7035192800Strasz vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 7036192800Strasz if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL)) 7037192800Strasz return (error); 7038192800Strasz 7039192800Strasz error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt); 7040196303Spjd if (vsecattr.vsa_aclentp != NULL) 7041196303Spjd kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 7042192800Strasz 7043196303Spjd return (error); 7044192800Strasz} 7045192800Strasz 7046192800Straszint 7047192800Straszzfs_freebsd_setacl(ap) 7048192800Strasz struct vop_setacl_args /* { 7049192800Strasz struct vnode *vp; 7050192800Strasz acl_type_t type; 7051192800Strasz struct acl *aclp; 7052192800Strasz struct ucred *cred; 7053192800Strasz struct thread *td; 7054192800Strasz } */ *ap; 7055192800Strasz{ 7056192800Strasz int error; 7057192800Strasz vsecattr_t vsecattr; 7058192800Strasz int aclbsize; /* size of acl list in bytes */ 7059192800Strasz aclent_t *aaclp; 7060192800Strasz 7061192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 7062197435Strasz return (EINVAL); 7063192800Strasz 7064192800Strasz if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 7065192800Strasz return (EINVAL); 7066192800Strasz 7067192800Strasz /* 7068196949Strasz * With NFSv4 ACLs, chmod(2) may need to add additional entries, 7069192800Strasz * splitting every entry into two and appending "canonical six" 7070192800Strasz * entries at the end. Don't allow for setting an ACL that would 7071192800Strasz * cause chmod(2) to run out of ACL entries. 7072192800Strasz */ 7073192800Strasz if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 7074192800Strasz return (ENOSPC); 7075192800Strasz 7076208030Strasz error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 7077208030Strasz if (error != 0) 7078208030Strasz return (error); 7079208030Strasz 7080192800Strasz vsecattr.vsa_mask = VSA_ACE; 7081192800Strasz aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t); 7082192800Strasz vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 7083192800Strasz aaclp = vsecattr.vsa_aclentp; 7084192800Strasz vsecattr.vsa_aclentsz = aclbsize; 7085192800Strasz 7086192800Strasz aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 7087192800Strasz error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL); 7088192800Strasz kmem_free(aaclp, aclbsize); 7089192800Strasz 7090192800Strasz return (error); 7091192800Strasz} 7092192800Strasz 7093192800Straszint 7094192800Straszzfs_freebsd_aclcheck(ap) 7095192800Strasz struct vop_aclcheck_args /* { 7096192800Strasz struct vnode *vp; 7097192800Strasz acl_type_t type; 7098192800Strasz struct acl *aclp; 7099192800Strasz struct ucred *cred; 7100192800Strasz struct thread *td; 7101192800Strasz } */ *ap; 7102192800Strasz{ 7103192800Strasz 7104192800Strasz return (EOPNOTSUPP); 7105192800Strasz} 7106192800Strasz 7107168404Spjdstruct vop_vector zfs_vnodeops; 7108168404Spjdstruct vop_vector zfs_fifoops; 7109209962Smmstruct vop_vector zfs_shareops; 7110168404Spjd 7111168404Spjdstruct vop_vector zfs_vnodeops = { 7112185029Spjd .vop_default = &default_vnodeops, 7113185029Spjd .vop_inactive = zfs_freebsd_inactive, 7114185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 7115185029Spjd .vop_access = zfs_freebsd_access, 7116168404Spjd#ifdef FREEBSD_NAMECACHE 7117185029Spjd .vop_lookup = vfs_cache_lookup, 7118185029Spjd .vop_cachedlookup = zfs_freebsd_lookup, 7119168404Spjd#else 7120185029Spjd .vop_lookup = zfs_freebsd_lookup, 7121168404Spjd#endif 7122185029Spjd .vop_getattr = zfs_freebsd_getattr, 7123185029Spjd .vop_setattr = zfs_freebsd_setattr, 7124185029Spjd .vop_create = zfs_freebsd_create, 7125185029Spjd .vop_mknod = zfs_freebsd_create, 7126185029Spjd .vop_mkdir = zfs_freebsd_mkdir, 7127185029Spjd .vop_readdir = zfs_freebsd_readdir, 7128185029Spjd .vop_fsync = zfs_freebsd_fsync, 7129185029Spjd .vop_open = zfs_freebsd_open, 7130185029Spjd .vop_close = zfs_freebsd_close, 7131185029Spjd .vop_rmdir = zfs_freebsd_rmdir, 7132185029Spjd .vop_ioctl = zfs_freebsd_ioctl, 7133185029Spjd .vop_link = zfs_freebsd_link, 7134185029Spjd .vop_symlink = zfs_freebsd_symlink, 7135185029Spjd .vop_readlink = zfs_freebsd_readlink, 7136185029Spjd .vop_read = zfs_freebsd_read, 7137185029Spjd .vop_write = zfs_freebsd_write, 7138185029Spjd .vop_remove = zfs_freebsd_remove, 7139185029Spjd .vop_rename = zfs_freebsd_rename, 7140185029Spjd .vop_pathconf = zfs_freebsd_pathconf, 7141243518Savg .vop_bmap = zfs_freebsd_bmap, 7142185029Spjd .vop_fid = zfs_freebsd_fid, 7143185029Spjd .vop_getextattr = zfs_getextattr, 7144185029Spjd .vop_deleteextattr = zfs_deleteextattr, 7145185029Spjd .vop_setextattr = zfs_setextattr, 7146185029Spjd .vop_listextattr = zfs_listextattr, 7147192800Strasz .vop_getacl = zfs_freebsd_getacl, 7148192800Strasz .vop_setacl = zfs_freebsd_setacl, 7149192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 7150213937Savg .vop_getpages = zfs_freebsd_getpages, 7151258746Savg .vop_putpages = zfs_freebsd_putpages, 7152168404Spjd}; 7153168404Spjd 7154169170Spjdstruct vop_vector zfs_fifoops = { 7155185029Spjd .vop_default = &fifo_specops, 7156200162Skib .vop_fsync = zfs_freebsd_fsync, 7157185029Spjd .vop_access = zfs_freebsd_access, 7158185029Spjd .vop_getattr = zfs_freebsd_getattr, 7159185029Spjd .vop_inactive = zfs_freebsd_inactive, 7160185029Spjd .vop_read = VOP_PANIC, 7161185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 7162185029Spjd .vop_setattr = zfs_freebsd_setattr, 7163185029Spjd .vop_write = VOP_PANIC, 7164196949Strasz .vop_pathconf = zfs_freebsd_fifo_pathconf, 7165185029Spjd .vop_fid = zfs_freebsd_fid, 7166192800Strasz .vop_getacl = zfs_freebsd_getacl, 7167192800Strasz .vop_setacl = zfs_freebsd_setacl, 7168192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 7169168404Spjd}; 7170209962Smm 7171209962Smm/* 7172209962Smm * special share hidden files vnode operations template 7173209962Smm */ 7174209962Smmstruct vop_vector zfs_shareops = { 7175209962Smm .vop_default = &default_vnodeops, 7176209962Smm .vop_access = zfs_freebsd_access, 7177209962Smm .vop_inactive = zfs_freebsd_inactive, 7178209962Smm .vop_reclaim = zfs_freebsd_reclaim, 7179209962Smm .vop_fid = zfs_freebsd_fid, 7180209962Smm .vop_pathconf = zfs_freebsd_pathconf, 7181209962Smm}; 7182