1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21321545Smav 22168404Spjd/* 23212694Smm * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24289562Smav * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 25296519Smav * Copyright (c) 2014 Integros [integros.com] 26321545Smav * Copyright 2017 Nexenta Systems, Inc. 27168404Spjd */ 28168404Spjd 29169195Spjd/* Portions Copyright 2007 Jeremy Teo */ 30219089Spjd/* Portions Copyright 2010 Robert Milkowski */ 31169195Spjd 32168404Spjd#include <sys/types.h> 33168404Spjd#include <sys/param.h> 34168404Spjd#include <sys/time.h> 35168404Spjd#include <sys/systm.h> 36168404Spjd#include <sys/sysmacros.h> 37168404Spjd#include <sys/resource.h> 38168404Spjd#include <sys/vfs.h> 39248084Sattilio#include <sys/vm.h> 40168404Spjd#include <sys/vnode.h> 41168404Spjd#include <sys/file.h> 42168404Spjd#include <sys/stat.h> 43168404Spjd#include <sys/kmem.h> 44168404Spjd#include <sys/taskq.h> 45168404Spjd#include <sys/uio.h> 46168404Spjd#include <sys/atomic.h> 47168404Spjd#include <sys/namei.h> 48168404Spjd#include <sys/mman.h> 49168404Spjd#include <sys/cmn_err.h> 50168404Spjd#include <sys/errno.h> 51168404Spjd#include <sys/unistd.h> 52168404Spjd#include <sys/zfs_dir.h> 53168404Spjd#include <sys/zfs_ioctl.h> 54168404Spjd#include <sys/fs/zfs.h> 55168404Spjd#include <sys/dmu.h> 56219089Spjd#include <sys/dmu_objset.h> 57168404Spjd#include <sys/spa.h> 58168404Spjd#include <sys/txg.h> 59168404Spjd#include <sys/dbuf.h> 60168404Spjd#include <sys/zap.h> 61219089Spjd#include <sys/sa.h> 62168404Spjd#include <sys/dirent.h> 63168962Spjd#include <sys/policy.h> 64168962Spjd#include <sys/sunddi.h> 65168404Spjd#include <sys/filio.h> 66209962Smm#include <sys/sid.h> 67168404Spjd#include <sys/zfs_ctldir.h> 68185029Spjd#include <sys/zfs_fuid.h> 69219089Spjd#include <sys/zfs_sa.h> 70168404Spjd#include <sys/zfs_rlock.h> 71185029Spjd#include <sys/extdirent.h> 72185029Spjd#include <sys/kidmap.h> 73168404Spjd#include <sys/bio.h> 74168404Spjd#include <sys/buf.h> 75168404Spjd#include <sys/sched.h> 76192800Strasz#include <sys/acl.h> 77331017Skevans#include <sys/vmmeter.h> 78239077Smarius#include <vm/vm_param.h> 79325132Savg#include <sys/zil.h> 80168404Spjd 81168404Spjd/* 82168404Spjd * Programming rules. 83168404Spjd * 84168404Spjd * Each vnode op performs some logical unit of work. To do this, the ZPL must 85168404Spjd * properly lock its in-core state, create a DMU transaction, do the work, 86168404Spjd * record this work in the intent log (ZIL), commit the DMU transaction, 87185029Spjd * and wait for the intent log to commit if it is a synchronous operation. 88185029Spjd * Moreover, the vnode ops must work in both normal and log replay context. 89168404Spjd * The ordering of events is important to avoid deadlocks and references 90168404Spjd * to freed memory. The example below illustrates the following Big Rules: 91168404Spjd * 92251631Sdelphij * (1) A check must be made in each zfs thread for a mounted file system. 93168404Spjd * This is done avoiding races using ZFS_ENTER(zfsvfs). 94251631Sdelphij * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 95251631Sdelphij * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 96251631Sdelphij * can return EIO from the calling function. 97168404Spjd * 98168404Spjd * (2) VN_RELE() should always be the last thing except for zil_commit() 99168404Spjd * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 100168404Spjd * First, if it's the last reference, the vnode/znode 101168404Spjd * can be freed, so the zp may point to freed memory. Second, the last 102168404Spjd * reference will call zfs_zinactive(), which may induce a lot of work -- 103168404Spjd * pushing cached pages (which acquires range locks) and syncing out 104168404Spjd * cached atime changes. Third, zfs_zinactive() may require a new tx, 105168404Spjd * which could deadlock the system if you were already holding one. 106191900Skmacy * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 107168404Spjd * 108168404Spjd * (3) All range locks must be grabbed before calling dmu_tx_assign(), 109168404Spjd * as they can span dmu_tx_assign() calls. 110168404Spjd * 111258720Savg * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 112258720Savg * dmu_tx_assign(). This is critical because we don't want to block 113258720Savg * while holding locks. 114168404Spjd * 115258720Savg * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 116258720Savg * reduces lock contention and CPU usage when we must wait (note that if 117258720Savg * throughput is constrained by the storage, nearly every transaction 118258720Savg * must wait). 119258720Savg * 120258720Savg * Note, in particular, that if a lock is sometimes acquired before 121258720Savg * the tx assigns, and sometimes after (e.g. z_lock), then failing 122258720Savg * to use a non-blocking assign can deadlock the system. The scenario: 123258720Savg * 124168404Spjd * Thread A has grabbed a lock before calling dmu_tx_assign(). 125168404Spjd * Thread B is in an already-assigned tx, and blocks for this lock. 126168404Spjd * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 127168404Spjd * forever, because the previous txg can't quiesce until B's tx commits. 128168404Spjd * 129168404Spjd * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 130258632Savg * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 131330986Savg * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT, 132258632Savg * to indicate that this operation has already called dmu_tx_wait(). 133258632Savg * This will ensure that we don't retry forever, waiting a short bit 134258632Savg * each time. 135168404Spjd * 136168404Spjd * (5) If the operation succeeded, generate the intent log entry for it 137168404Spjd * before dropping locks. This ensures that the ordering of events 138168404Spjd * in the intent log matches the order in which they actually occurred. 139251631Sdelphij * During ZIL replay the zfs_log_* functions will update the sequence 140209962Smm * number to indicate the zil transaction has replayed. 141168404Spjd * 142168404Spjd * (6) At the end of each vnode op, the DMU tx must always commit, 143168404Spjd * regardless of whether there were any errors. 144168404Spjd * 145219089Spjd * (7) After dropping all locks, invoke zil_commit(zilog, foid) 146168404Spjd * to ensure that synchronous semantics are provided when necessary. 147168404Spjd * 148168404Spjd * In general, this is how things should be ordered in each vnode op: 149168404Spjd * 150168404Spjd * ZFS_ENTER(zfsvfs); // exit if unmounted 151168404Spjd * top: 152303970Savg * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD()) 153168404Spjd * rw_enter(...); // grab any other locks you need 154168404Spjd * tx = dmu_tx_create(...); // get DMU tx 155168404Spjd * dmu_tx_hold_*(); // hold each object you might modify 156330986Savg * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); 157168404Spjd * if (error) { 158168404Spjd * rw_exit(...); // drop locks 159168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 160168404Spjd * VN_RELE(...); // release held vnodes 161209962Smm * if (error == ERESTART) { 162258632Savg * waited = B_TRUE; 163168404Spjd * dmu_tx_wait(tx); 164168404Spjd * dmu_tx_abort(tx); 165168404Spjd * goto top; 166168404Spjd * } 167168404Spjd * dmu_tx_abort(tx); // abort DMU tx 168168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 169168404Spjd * return (error); // really out of space 170168404Spjd * } 171168404Spjd * error = do_real_work(); // do whatever this VOP does 172168404Spjd * if (error == 0) 173168404Spjd * zfs_log_*(...); // on success, make ZIL entry 174168404Spjd * dmu_tx_commit(tx); // commit DMU tx -- error or not 175168404Spjd * rw_exit(...); // drop locks 176168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 177168404Spjd * VN_RELE(...); // release held vnodes 178219089Spjd * zil_commit(zilog, foid); // synchronous when necessary 179168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 180168404Spjd * return (error); // done, report error 181168404Spjd */ 182185029Spjd 183168404Spjd/* ARGSUSED */ 184168404Spjdstatic int 185185029Spjdzfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 186168404Spjd{ 187168962Spjd znode_t *zp = VTOZ(*vpp); 188209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 189168404Spjd 190209962Smm ZFS_ENTER(zfsvfs); 191209962Smm ZFS_VERIFY_ZP(zp); 192209962Smm 193219089Spjd if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 194185029Spjd ((flag & FAPPEND) == 0)) { 195209962Smm ZFS_EXIT(zfsvfs); 196249195Smm return (SET_ERROR(EPERM)); 197185029Spjd } 198185029Spjd 199185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 200185029Spjd ZTOV(zp)->v_type == VREG && 201219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 202209962Smm if (fs_vscan(*vpp, cr, 0) != 0) { 203209962Smm ZFS_EXIT(zfsvfs); 204249195Smm return (SET_ERROR(EACCES)); 205209962Smm } 206209962Smm } 207185029Spjd 208168404Spjd /* Keep a count of the synchronous opens in the znode */ 209168962Spjd if (flag & (FSYNC | FDSYNC)) 210168404Spjd atomic_inc_32(&zp->z_sync_cnt); 211185029Spjd 212209962Smm ZFS_EXIT(zfsvfs); 213168404Spjd return (0); 214168404Spjd} 215168404Spjd 216168404Spjd/* ARGSUSED */ 217168404Spjdstatic int 218185029Spjdzfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 219185029Spjd caller_context_t *ct) 220168404Spjd{ 221168962Spjd znode_t *zp = VTOZ(vp); 222209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 223168404Spjd 224210470Smm /* 225210470Smm * Clean up any locks held by this process on the vp. 226210470Smm */ 227210470Smm cleanlocks(vp, ddi_get_pid(), 0); 228210470Smm cleanshares(vp, ddi_get_pid()); 229210470Smm 230209962Smm ZFS_ENTER(zfsvfs); 231209962Smm ZFS_VERIFY_ZP(zp); 232209962Smm 233168404Spjd /* Decrement the synchronous opens in the znode */ 234185029Spjd if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 235168404Spjd atomic_dec_32(&zp->z_sync_cnt); 236168404Spjd 237185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 238185029Spjd ZTOV(zp)->v_type == VREG && 239219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 240185029Spjd VERIFY(fs_vscan(vp, cr, 1) == 0); 241185029Spjd 242209962Smm ZFS_EXIT(zfsvfs); 243168404Spjd return (0); 244168404Spjd} 245168404Spjd 246168404Spjd/* 247168404Spjd * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 248168404Spjd * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 249168404Spjd */ 250168404Spjdstatic int 251168978Spjdzfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 252168404Spjd{ 253168404Spjd znode_t *zp = VTOZ(vp); 254168404Spjd uint64_t noff = (uint64_t)*off; /* new offset */ 255168404Spjd uint64_t file_sz; 256168404Spjd int error; 257168404Spjd boolean_t hole; 258168404Spjd 259219089Spjd file_sz = zp->z_size; 260168404Spjd if (noff >= file_sz) { 261249195Smm return (SET_ERROR(ENXIO)); 262168404Spjd } 263168404Spjd 264168962Spjd if (cmd == _FIO_SEEK_HOLE) 265168404Spjd hole = B_TRUE; 266168404Spjd else 267168404Spjd hole = B_FALSE; 268168404Spjd 269168404Spjd error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 270168404Spjd 271271536Sdelphij if (error == ESRCH) 272249195Smm return (SET_ERROR(ENXIO)); 273271536Sdelphij 274271536Sdelphij /* 275271536Sdelphij * We could find a hole that begins after the logical end-of-file, 276271536Sdelphij * because dmu_offset_next() only works on whole blocks. If the 277271536Sdelphij * EOF falls mid-block, then indicate that the "virtual hole" 278271536Sdelphij * at the end of the file begins at the logical EOF, rather than 279271536Sdelphij * at the end of the last block. 280271536Sdelphij */ 281271536Sdelphij if (noff > file_sz) { 282271536Sdelphij ASSERT(hole); 283271536Sdelphij noff = file_sz; 284168404Spjd } 285168404Spjd 286168404Spjd if (noff < *off) 287168404Spjd return (error); 288168404Spjd *off = noff; 289168404Spjd return (error); 290168404Spjd} 291168404Spjd 292168404Spjd/* ARGSUSED */ 293168404Spjdstatic int 294168978Spjdzfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 295185029Spjd int *rvalp, caller_context_t *ct) 296168404Spjd{ 297168962Spjd offset_t off; 298287103Savg offset_t ndata; 299287103Savg dmu_object_info_t doi; 300168962Spjd int error; 301168962Spjd zfsvfs_t *zfsvfs; 302185029Spjd znode_t *zp; 303168404Spjd 304168404Spjd switch (com) { 305185029Spjd case _FIOFFS: 306287103Savg { 307168962Spjd return (0); 308168404Spjd 309168962Spjd /* 310168962Spjd * The following two ioctls are used by bfu. Faking out, 311168962Spjd * necessary to avoid bfu errors. 312168962Spjd */ 313287103Savg } 314185029Spjd case _FIOGDIO: 315185029Spjd case _FIOSDIO: 316287103Savg { 317168962Spjd return (0); 318287103Savg } 319168962Spjd 320185029Spjd case _FIO_SEEK_DATA: 321185029Spjd case _FIO_SEEK_HOLE: 322287103Savg { 323277300Ssmh#ifdef illumos 324168962Spjd if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 325249195Smm return (SET_ERROR(EFAULT)); 326233918Savg#else 327233918Savg off = *(offset_t *)data; 328233918Savg#endif 329185029Spjd zp = VTOZ(vp); 330185029Spjd zfsvfs = zp->z_zfsvfs; 331168404Spjd ZFS_ENTER(zfsvfs); 332185029Spjd ZFS_VERIFY_ZP(zp); 333168404Spjd 334168404Spjd /* offset parameter is in/out */ 335168404Spjd error = zfs_holey(vp, com, &off); 336168404Spjd ZFS_EXIT(zfsvfs); 337168404Spjd if (error) 338168404Spjd return (error); 339277300Ssmh#ifdef illumos 340168962Spjd if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 341249195Smm return (SET_ERROR(EFAULT)); 342233918Savg#else 343233918Savg *(offset_t *)data = off; 344233918Savg#endif 345168404Spjd return (0); 346168404Spjd } 347287103Savg#ifdef illumos 348287103Savg case _FIO_COUNT_FILLED: 349287103Savg { 350287103Savg /* 351287103Savg * _FIO_COUNT_FILLED adds a new ioctl command which 352287103Savg * exposes the number of filled blocks in a 353287103Savg * ZFS object. 354287103Savg */ 355287103Savg zp = VTOZ(vp); 356287103Savg zfsvfs = zp->z_zfsvfs; 357287103Savg ZFS_ENTER(zfsvfs); 358287103Savg ZFS_VERIFY_ZP(zp); 359287103Savg 360287103Savg /* 361287103Savg * Wait for all dirty blocks for this object 362287103Savg * to get synced out to disk, and the DMU info 363287103Savg * updated. 364287103Savg */ 365287103Savg error = dmu_object_wait_synced(zfsvfs->z_os, zp->z_id); 366287103Savg if (error) { 367287103Savg ZFS_EXIT(zfsvfs); 368287103Savg return (error); 369287103Savg } 370287103Savg 371287103Savg /* 372287103Savg * Retrieve fill count from DMU object. 373287103Savg */ 374287103Savg error = dmu_object_info(zfsvfs->z_os, zp->z_id, &doi); 375287103Savg if (error) { 376287103Savg ZFS_EXIT(zfsvfs); 377287103Savg return (error); 378287103Savg } 379287103Savg 380287103Savg ndata = doi.doi_fill_count; 381287103Savg 382287103Savg ZFS_EXIT(zfsvfs); 383287103Savg if (ddi_copyout(&ndata, (void *)data, sizeof (ndata), flag)) 384287103Savg return (SET_ERROR(EFAULT)); 385287103Savg return (0); 386287103Savg } 387287103Savg#endif 388287103Savg } 389249195Smm return (SET_ERROR(ENOTTY)); 390168404Spjd} 391168404Spjd 392209962Smmstatic vm_page_t 393253953Sattiliopage_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 394209962Smm{ 395209962Smm vm_object_t obj; 396209962Smm vm_page_t pp; 397258353Savg int64_t end; 398209962Smm 399258353Savg /* 400258353Savg * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE 401258353Savg * aligned boundaries, if the range is not aligned. As a result a 402258353Savg * DEV_BSIZE subrange with partially dirty data may get marked as clean. 403258353Savg * It may happen that all DEV_BSIZE subranges are marked clean and thus 404258353Savg * the whole page would be considred clean despite have some dirty data. 405258353Savg * For this reason we should shrink the range to DEV_BSIZE aligned 406258353Savg * boundaries before calling vm_page_clear_dirty. 407258353Savg */ 408258353Savg end = rounddown2(off + nbytes, DEV_BSIZE); 409258353Savg off = roundup2(off, DEV_BSIZE); 410258353Savg nbytes = end - off; 411258353Savg 412209962Smm obj = vp->v_object; 413248084Sattilio zfs_vmobject_assert_wlocked(obj); 414209962Smm 415209962Smm for (;;) { 416209962Smm if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 417246293Savg pp->valid) { 418254138Sattilio if (vm_page_xbusied(pp)) { 419212652Savg /* 420212652Savg * Reference the page before unlocking and 421212652Savg * sleeping so that the page daemon is less 422212652Savg * likely to reclaim it. 423212652Savg */ 424225418Skib vm_page_reference(pp); 425254138Sattilio vm_page_lock(pp); 426254138Sattilio zfs_vmobject_wunlock(obj); 427307671Skib vm_page_busy_sleep(pp, "zfsmwb", true); 428254138Sattilio zfs_vmobject_wlock(obj); 429209962Smm continue; 430212652Savg } 431254138Sattilio vm_page_sbusy(pp); 432319091Savg } else if (pp != NULL) { 433319091Savg ASSERT(!pp->valid); 434252337Sgavin pp = NULL; 435209962Smm } 436246293Savg 437246293Savg if (pp != NULL) { 438246293Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 439253953Sattilio vm_object_pip_add(obj, 1); 440246293Savg pmap_remove_write(pp); 441258353Savg if (nbytes != 0) 442258353Savg vm_page_clear_dirty(pp, off, nbytes); 443246293Savg } 444209962Smm break; 445209962Smm } 446209962Smm return (pp); 447209962Smm} 448209962Smm 449209962Smmstatic void 450253953Sattiliopage_unbusy(vm_page_t pp) 451209962Smm{ 452209962Smm 453254138Sattilio vm_page_sunbusy(pp); 454253953Sattilio vm_object_pip_subtract(pp->object, 1); 455209962Smm} 456209962Smm 457253953Sattiliostatic vm_page_t 458253953Sattiliopage_hold(vnode_t *vp, int64_t start) 459253953Sattilio{ 460253953Sattilio vm_object_t obj; 461253953Sattilio vm_page_t pp; 462253953Sattilio 463253953Sattilio obj = vp->v_object; 464253953Sattilio zfs_vmobject_assert_wlocked(obj); 465253953Sattilio 466253953Sattilio for (;;) { 467253953Sattilio if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 468253953Sattilio pp->valid) { 469254138Sattilio if (vm_page_xbusied(pp)) { 470253953Sattilio /* 471253953Sattilio * Reference the page before unlocking and 472253953Sattilio * sleeping so that the page daemon is less 473253953Sattilio * likely to reclaim it. 474253953Sattilio */ 475253953Sattilio vm_page_reference(pp); 476254138Sattilio vm_page_lock(pp); 477254138Sattilio zfs_vmobject_wunlock(obj); 478307671Skib vm_page_busy_sleep(pp, "zfsmwb", true); 479254138Sattilio zfs_vmobject_wlock(obj); 480253953Sattilio continue; 481253953Sattilio } 482253953Sattilio 483253953Sattilio ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 484253953Sattilio vm_page_lock(pp); 485253953Sattilio vm_page_hold(pp); 486253953Sattilio vm_page_unlock(pp); 487253953Sattilio 488253953Sattilio } else 489253953Sattilio pp = NULL; 490253953Sattilio break; 491253953Sattilio } 492253953Sattilio return (pp); 493253953Sattilio} 494253953Sattilio 495253953Sattiliostatic void 496253953Sattiliopage_unhold(vm_page_t pp) 497253953Sattilio{ 498253953Sattilio 499253953Sattilio vm_page_lock(pp); 500253953Sattilio vm_page_unhold(pp); 501253953Sattilio vm_page_unlock(pp); 502253953Sattilio} 503253953Sattilio 504168404Spjd/* 505168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 506168404Spjd * between the DMU cache and the memory mapped pages. What this means: 507168404Spjd * 508168404Spjd * On Write: If we find a memory mapped page, we write to *both* 509168404Spjd * the page and the dmu buffer. 510168404Spjd */ 511209962Smmstatic void 512209962Smmupdate_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 513209962Smm int segflg, dmu_tx_t *tx) 514168404Spjd{ 515168404Spjd vm_object_t obj; 516168404Spjd struct sf_buf *sf; 517246293Savg caddr_t va; 518212655Savg int off; 519168404Spjd 520258746Savg ASSERT(segflg != UIO_NOCOPY); 521168404Spjd ASSERT(vp->v_mount != NULL); 522168404Spjd obj = vp->v_object; 523168404Spjd ASSERT(obj != NULL); 524168404Spjd 525168404Spjd off = start & PAGEOFFSET; 526248084Sattilio zfs_vmobject_wlock(obj); 527168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 528209962Smm vm_page_t pp; 529246293Savg int nbytes = imin(PAGESIZE - off, len); 530168404Spjd 531258746Savg if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 532248084Sattilio zfs_vmobject_wunlock(obj); 533168404Spjd 534246293Savg va = zfs_map_page(pp, &sf); 535246293Savg (void) dmu_read(os, oid, start+off, nbytes, 536246293Savg va+off, DMU_READ_PREFETCH);; 537209962Smm zfs_unmap_page(sf); 538246293Savg 539248084Sattilio zfs_vmobject_wlock(obj); 540253953Sattilio page_unbusy(pp); 541168404Spjd } 542209962Smm len -= nbytes; 543168404Spjd off = 0; 544168404Spjd } 545258746Savg vm_object_pip_wakeupn(obj, 0); 546248084Sattilio zfs_vmobject_wunlock(obj); 547168404Spjd} 548168404Spjd 549168404Spjd/* 550219089Spjd * Read with UIO_NOCOPY flag means that sendfile(2) requests 551219089Spjd * ZFS to populate a range of page cache pages with data. 552219089Spjd * 553219089Spjd * NOTE: this function could be optimized to pre-allocate 554254138Sattilio * all pages in advance, drain exclusive busy on all of them, 555219089Spjd * map them into contiguous KVA region and populate them 556219089Spjd * in one single dmu_read() call. 557219089Spjd */ 558219089Spjdstatic int 559219089Spjdmappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) 560219089Spjd{ 561219089Spjd znode_t *zp = VTOZ(vp); 562219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 563219089Spjd struct sf_buf *sf; 564219089Spjd vm_object_t obj; 565219089Spjd vm_page_t pp; 566219089Spjd int64_t start; 567219089Spjd caddr_t va; 568219089Spjd int len = nbytes; 569219089Spjd int off; 570219089Spjd int error = 0; 571219089Spjd 572219089Spjd ASSERT(uio->uio_segflg == UIO_NOCOPY); 573219089Spjd ASSERT(vp->v_mount != NULL); 574219089Spjd obj = vp->v_object; 575219089Spjd ASSERT(obj != NULL); 576219089Spjd ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); 577219089Spjd 578248084Sattilio zfs_vmobject_wlock(obj); 579219089Spjd for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { 580219089Spjd int bytes = MIN(PAGESIZE, len); 581219089Spjd 582254138Sattilio pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY | 583254649Skib VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 584219089Spjd if (pp->valid == 0) { 585248084Sattilio zfs_vmobject_wunlock(obj); 586219089Spjd va = zfs_map_page(pp, &sf); 587219089Spjd error = dmu_read(os, zp->z_id, start, bytes, va, 588219089Spjd DMU_READ_PREFETCH); 589219089Spjd if (bytes != PAGESIZE && error == 0) 590219089Spjd bzero(va + bytes, PAGESIZE - bytes); 591219089Spjd zfs_unmap_page(sf); 592248084Sattilio zfs_vmobject_wlock(obj); 593254138Sattilio vm_page_sunbusy(pp); 594219089Spjd vm_page_lock(pp); 595219089Spjd if (error) { 596253073Savg if (pp->wire_count == 0 && pp->valid == 0 && 597254138Sattilio !vm_page_busied(pp)) 598253073Savg vm_page_free(pp); 599219089Spjd } else { 600219089Spjd pp->valid = VM_PAGE_BITS_ALL; 601219089Spjd vm_page_activate(pp); 602219089Spjd } 603219089Spjd vm_page_unlock(pp); 604258739Savg } else { 605258739Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 606254138Sattilio vm_page_sunbusy(pp); 607258739Savg } 608219089Spjd if (error) 609219089Spjd break; 610219089Spjd uio->uio_resid -= bytes; 611219089Spjd uio->uio_offset += bytes; 612219089Spjd len -= bytes; 613219089Spjd } 614248084Sattilio zfs_vmobject_wunlock(obj); 615219089Spjd return (error); 616219089Spjd} 617219089Spjd 618219089Spjd/* 619168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 620168404Spjd * between the DMU cache and the memory mapped pages. What this means: 621168404Spjd * 622168404Spjd * On Read: We "read" preferentially from memory mapped pages, 623168404Spjd * else we default from the dmu buffer. 624168404Spjd * 625168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 626251631Sdelphij * the file is memory mapped. 627168404Spjd */ 628168404Spjdstatic int 629168404Spjdmappedread(vnode_t *vp, int nbytes, uio_t *uio) 630168404Spjd{ 631168404Spjd znode_t *zp = VTOZ(vp); 632168404Spjd vm_object_t obj; 633212655Savg int64_t start; 634168926Spjd caddr_t va; 635168404Spjd int len = nbytes; 636212655Savg int off; 637168404Spjd int error = 0; 638168404Spjd 639168404Spjd ASSERT(vp->v_mount != NULL); 640168404Spjd obj = vp->v_object; 641168404Spjd ASSERT(obj != NULL); 642168404Spjd 643168404Spjd start = uio->uio_loffset; 644168404Spjd off = start & PAGEOFFSET; 645248084Sattilio zfs_vmobject_wlock(obj); 646168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 647219089Spjd vm_page_t pp; 648219089Spjd uint64_t bytes = MIN(PAGESIZE - off, len); 649168404Spjd 650253953Sattilio if (pp = page_hold(vp, start)) { 651219089Spjd struct sf_buf *sf; 652219089Spjd caddr_t va; 653212652Savg 654248084Sattilio zfs_vmobject_wunlock(obj); 655219089Spjd va = zfs_map_page(pp, &sf); 656298105Savg#ifdef illumos 657219089Spjd error = uiomove(va + off, bytes, UIO_READ, uio); 658298105Savg#else 659298105Savg error = vn_io_fault_uiomove(va + off, bytes, uio); 660298105Savg#endif 661219089Spjd zfs_unmap_page(sf); 662248084Sattilio zfs_vmobject_wlock(obj); 663253953Sattilio page_unhold(pp); 664219089Spjd } else { 665248084Sattilio zfs_vmobject_wunlock(obj); 666272809Sdelphij error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 667272809Sdelphij uio, bytes); 668248084Sattilio zfs_vmobject_wlock(obj); 669168404Spjd } 670168404Spjd len -= bytes; 671168404Spjd off = 0; 672168404Spjd if (error) 673168404Spjd break; 674168404Spjd } 675248084Sattilio zfs_vmobject_wunlock(obj); 676168404Spjd return (error); 677168404Spjd} 678168404Spjd 679168404Spjdoffset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 680168404Spjd 681168404Spjd/* 682168404Spjd * Read bytes from specified file into supplied buffer. 683168404Spjd * 684168404Spjd * IN: vp - vnode of file to be read from. 685168404Spjd * uio - structure supplying read location, range info, 686168404Spjd * and return buffer. 687168404Spjd * ioflag - SYNC flags; used to provide FRSYNC semantics. 688168404Spjd * cr - credentials of caller. 689185029Spjd * ct - caller context 690168404Spjd * 691168404Spjd * OUT: uio - updated offset and range, buffer filled. 692168404Spjd * 693251631Sdelphij * RETURN: 0 on success, error code on failure. 694168404Spjd * 695168404Spjd * Side Effects: 696168404Spjd * vp - atime updated if byte count > 0 697168404Spjd */ 698168404Spjd/* ARGSUSED */ 699168404Spjdstatic int 700168962Spjdzfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 701168404Spjd{ 702168404Spjd znode_t *zp = VTOZ(vp); 703168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 704168404Spjd ssize_t n, nbytes; 705247187Smm int error = 0; 706168404Spjd rl_t *rl; 707219089Spjd xuio_t *xuio = NULL; 708168404Spjd 709168404Spjd ZFS_ENTER(zfsvfs); 710185029Spjd ZFS_VERIFY_ZP(zp); 711168404Spjd 712219089Spjd if (zp->z_pflags & ZFS_AV_QUARANTINED) { 713185029Spjd ZFS_EXIT(zfsvfs); 714249195Smm return (SET_ERROR(EACCES)); 715185029Spjd } 716185029Spjd 717168404Spjd /* 718168404Spjd * Validate file offset 719168404Spjd */ 720168404Spjd if (uio->uio_loffset < (offset_t)0) { 721168404Spjd ZFS_EXIT(zfsvfs); 722249195Smm return (SET_ERROR(EINVAL)); 723168404Spjd } 724168404Spjd 725168404Spjd /* 726168404Spjd * Fasttrack empty reads 727168404Spjd */ 728168404Spjd if (uio->uio_resid == 0) { 729168404Spjd ZFS_EXIT(zfsvfs); 730168404Spjd return (0); 731168404Spjd } 732168404Spjd 733168404Spjd /* 734168962Spjd * Check for mandatory locks 735168962Spjd */ 736219089Spjd if (MANDMODE(zp->z_mode)) { 737168962Spjd if (error = chklock(vp, FREAD, 738168962Spjd uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 739168962Spjd ZFS_EXIT(zfsvfs); 740168962Spjd return (error); 741168962Spjd } 742168962Spjd } 743168962Spjd 744168962Spjd /* 745168404Spjd * If we're in FRSYNC mode, sync out this znode before reading it. 746168404Spjd */ 747224605Smm if (zfsvfs->z_log && 748224605Smm (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) 749219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 750168404Spjd 751168404Spjd /* 752168404Spjd * Lock the range against changes. 753168404Spjd */ 754168404Spjd rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 755168404Spjd 756168404Spjd /* 757168404Spjd * If we are reading past end-of-file we can skip 758168404Spjd * to the end; but we might still need to set atime. 759168404Spjd */ 760219089Spjd if (uio->uio_loffset >= zp->z_size) { 761168404Spjd error = 0; 762168404Spjd goto out; 763168404Spjd } 764168404Spjd 765219089Spjd ASSERT(uio->uio_loffset < zp->z_size); 766219089Spjd n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 767168404Spjd 768277300Ssmh#ifdef illumos 769219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 770219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 771219089Spjd int nblk; 772219089Spjd int blksz = zp->z_blksz; 773219089Spjd uint64_t offset = uio->uio_loffset; 774219089Spjd 775219089Spjd xuio = (xuio_t *)uio; 776219089Spjd if ((ISP2(blksz))) { 777219089Spjd nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 778219089Spjd blksz)) / blksz; 779219089Spjd } else { 780219089Spjd ASSERT(offset + n <= blksz); 781219089Spjd nblk = 1; 782219089Spjd } 783219089Spjd (void) dmu_xuio_init(xuio, nblk); 784219089Spjd 785219089Spjd if (vn_has_cached_data(vp)) { 786219089Spjd /* 787219089Spjd * For simplicity, we always allocate a full buffer 788219089Spjd * even if we only expect to read a portion of a block. 789219089Spjd */ 790219089Spjd while (--nblk >= 0) { 791219089Spjd (void) dmu_xuio_add(xuio, 792219089Spjd dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 793219089Spjd blksz), 0, blksz); 794219089Spjd } 795219089Spjd } 796219089Spjd } 797277300Ssmh#endif /* illumos */ 798219089Spjd 799168404Spjd while (n > 0) { 800168404Spjd nbytes = MIN(n, zfs_read_chunk_size - 801168404Spjd P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 802168404Spjd 803219089Spjd#ifdef __FreeBSD__ 804219089Spjd if (uio->uio_segflg == UIO_NOCOPY) 805219089Spjd error = mappedread_sf(vp, nbytes, uio); 806219089Spjd else 807219089Spjd#endif /* __FreeBSD__ */ 808272809Sdelphij if (vn_has_cached_data(vp)) { 809168404Spjd error = mappedread(vp, nbytes, uio); 810272809Sdelphij } else { 811272809Sdelphij error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 812272809Sdelphij uio, nbytes); 813272809Sdelphij } 814185029Spjd if (error) { 815185029Spjd /* convert checksum errors into IO errors */ 816185029Spjd if (error == ECKSUM) 817249195Smm error = SET_ERROR(EIO); 818168404Spjd break; 819185029Spjd } 820168962Spjd 821168404Spjd n -= nbytes; 822168404Spjd } 823168404Spjdout: 824168404Spjd zfs_range_unlock(rl); 825168404Spjd 826168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 827168404Spjd ZFS_EXIT(zfsvfs); 828168404Spjd return (error); 829168404Spjd} 830168404Spjd 831168404Spjd/* 832168404Spjd * Write the bytes to a file. 833168404Spjd * 834168404Spjd * IN: vp - vnode of file to be written to. 835168404Spjd * uio - structure supplying write location, range info, 836168404Spjd * and data buffer. 837251631Sdelphij * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 838251631Sdelphij * set if in append mode. 839168404Spjd * cr - credentials of caller. 840185029Spjd * ct - caller context (NFS/CIFS fem monitor only) 841168404Spjd * 842168404Spjd * OUT: uio - updated offset and range. 843168404Spjd * 844251631Sdelphij * RETURN: 0 on success, error code on failure. 845168404Spjd * 846168404Spjd * Timestamps: 847168404Spjd * vp - ctime|mtime updated if byte count > 0 848168404Spjd */ 849219089Spjd 850168404Spjd/* ARGSUSED */ 851168404Spjdstatic int 852168962Spjdzfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 853168404Spjd{ 854168404Spjd znode_t *zp = VTOZ(vp); 855168962Spjd rlim64_t limit = MAXOFFSET_T; 856168404Spjd ssize_t start_resid = uio->uio_resid; 857168404Spjd ssize_t tx_bytes; 858168404Spjd uint64_t end_size; 859168404Spjd dmu_tx_t *tx; 860168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 861185029Spjd zilog_t *zilog; 862168404Spjd offset_t woff; 863168404Spjd ssize_t n, nbytes; 864168404Spjd rl_t *rl; 865168404Spjd int max_blksz = zfsvfs->z_max_blksz; 866247187Smm int error = 0; 867209962Smm arc_buf_t *abuf; 868247187Smm iovec_t *aiov = NULL; 869219089Spjd xuio_t *xuio = NULL; 870219089Spjd int i_iov = 0; 871219089Spjd int iovcnt = uio->uio_iovcnt; 872219089Spjd iovec_t *iovp = uio->uio_iov; 873219089Spjd int write_eof; 874219089Spjd int count = 0; 875219089Spjd sa_bulk_attr_t bulk[4]; 876219089Spjd uint64_t mtime[2], ctime[2]; 877168404Spjd 878168404Spjd /* 879168404Spjd * Fasttrack empty write 880168404Spjd */ 881168404Spjd n = start_resid; 882168404Spjd if (n == 0) 883168404Spjd return (0); 884168404Spjd 885168962Spjd if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 886168962Spjd limit = MAXOFFSET_T; 887168962Spjd 888168404Spjd ZFS_ENTER(zfsvfs); 889185029Spjd ZFS_VERIFY_ZP(zp); 890168404Spjd 891219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 892219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 893219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 894219089Spjd &zp->z_size, 8); 895219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 896219089Spjd &zp->z_pflags, 8); 897219089Spjd 898168404Spjd /* 899262990Sdelphij * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 900262990Sdelphij * callers might not be able to detect properly that we are read-only, 901262990Sdelphij * so check it explicitly here. 902262990Sdelphij */ 903262990Sdelphij if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 904262990Sdelphij ZFS_EXIT(zfsvfs); 905262990Sdelphij return (SET_ERROR(EROFS)); 906262990Sdelphij } 907262990Sdelphij 908262990Sdelphij /* 909321579Smav * If immutable or not appending then return EPERM. 910321579Smav * Intentionally allow ZFS_READONLY through here. 911321579Smav * See zfs_zaccess_common() 912185029Spjd */ 913321579Smav if ((zp->z_pflags & ZFS_IMMUTABLE) || 914219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 915219089Spjd (uio->uio_loffset < zp->z_size))) { 916185029Spjd ZFS_EXIT(zfsvfs); 917249195Smm return (SET_ERROR(EPERM)); 918185029Spjd } 919185029Spjd 920185029Spjd zilog = zfsvfs->z_log; 921185029Spjd 922185029Spjd /* 923219089Spjd * Validate file offset 924219089Spjd */ 925219089Spjd woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 926219089Spjd if (woff < 0) { 927219089Spjd ZFS_EXIT(zfsvfs); 928249195Smm return (SET_ERROR(EINVAL)); 929219089Spjd } 930219089Spjd 931219089Spjd /* 932219089Spjd * Check for mandatory locks before calling zfs_range_lock() 933219089Spjd * in order to prevent a deadlock with locks set via fcntl(). 934219089Spjd */ 935219089Spjd if (MANDMODE((mode_t)zp->z_mode) && 936219089Spjd (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 937219089Spjd ZFS_EXIT(zfsvfs); 938219089Spjd return (error); 939219089Spjd } 940219089Spjd 941277300Ssmh#ifdef illumos 942219089Spjd /* 943168404Spjd * Pre-fault the pages to ensure slow (eg NFS) pages 944168404Spjd * don't hold up txg. 945219089Spjd * Skip this if uio contains loaned arc_buf. 946168404Spjd */ 947219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 948219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 949219089Spjd xuio = (xuio_t *)uio; 950219089Spjd else 951219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 952277300Ssmh#endif 953168404Spjd 954168404Spjd /* 955168404Spjd * If in append mode, set the io offset pointer to eof. 956168404Spjd */ 957213673Spjd if (ioflag & FAPPEND) { 958168404Spjd /* 959219089Spjd * Obtain an appending range lock to guarantee file append 960219089Spjd * semantics. We reset the write offset once we have the lock. 961168404Spjd */ 962168404Spjd rl = zfs_range_lock(zp, 0, n, RL_APPEND); 963219089Spjd woff = rl->r_off; 964168404Spjd if (rl->r_len == UINT64_MAX) { 965219089Spjd /* 966219089Spjd * We overlocked the file because this write will cause 967219089Spjd * the file block size to increase. 968219089Spjd * Note that zp_size cannot change with this lock held. 969219089Spjd */ 970219089Spjd woff = zp->z_size; 971168404Spjd } 972219089Spjd uio->uio_loffset = woff; 973168404Spjd } else { 974168404Spjd /* 975219089Spjd * Note that if the file block size will change as a result of 976219089Spjd * this write, then this range lock will lock the entire file 977219089Spjd * so that we can re-write the block safely. 978168404Spjd */ 979168404Spjd rl = zfs_range_lock(zp, woff, n, RL_WRITER); 980168404Spjd } 981168404Spjd 982235781Strasz if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { 983235781Strasz zfs_range_unlock(rl); 984235781Strasz ZFS_EXIT(zfsvfs); 985235781Strasz return (EFBIG); 986235781Strasz } 987235781Strasz 988168962Spjd if (woff >= limit) { 989168962Spjd zfs_range_unlock(rl); 990168962Spjd ZFS_EXIT(zfsvfs); 991249195Smm return (SET_ERROR(EFBIG)); 992168962Spjd } 993168962Spjd 994168962Spjd if ((woff + n) > limit || woff > (limit - n)) 995168962Spjd n = limit - woff; 996168962Spjd 997219089Spjd /* Will this write extend the file length? */ 998219089Spjd write_eof = (woff + n > zp->z_size); 999168404Spjd 1000219089Spjd end_size = MAX(zp->z_size, woff + n); 1001219089Spjd 1002168404Spjd /* 1003168404Spjd * Write the file in reasonable size chunks. Each chunk is written 1004168404Spjd * in a separate transaction; this keeps the intent log records small 1005168404Spjd * and allows us to do more fine-grained space accounting. 1006168404Spjd */ 1007168404Spjd while (n > 0) { 1008209962Smm abuf = NULL; 1009209962Smm woff = uio->uio_loffset; 1010219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 1011219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 1012209962Smm if (abuf != NULL) 1013209962Smm dmu_return_arcbuf(abuf); 1014249195Smm error = SET_ERROR(EDQUOT); 1015209962Smm break; 1016209962Smm } 1017209962Smm 1018219089Spjd if (xuio && abuf == NULL) { 1019219089Spjd ASSERT(i_iov < iovcnt); 1020219089Spjd aiov = &iovp[i_iov]; 1021219089Spjd abuf = dmu_xuio_arcbuf(xuio, i_iov); 1022219089Spjd dmu_xuio_clear(xuio, i_iov); 1023219089Spjd DTRACE_PROBE3(zfs_cp_write, int, i_iov, 1024219089Spjd iovec_t *, aiov, arc_buf_t *, abuf); 1025219089Spjd ASSERT((aiov->iov_base == abuf->b_data) || 1026219089Spjd ((char *)aiov->iov_base - (char *)abuf->b_data + 1027219089Spjd aiov->iov_len == arc_buf_size(abuf))); 1028219089Spjd i_iov++; 1029219089Spjd } else if (abuf == NULL && n >= max_blksz && 1030219089Spjd woff >= zp->z_size && 1031209962Smm P2PHASE(woff, max_blksz) == 0 && 1032209962Smm zp->z_blksz == max_blksz) { 1033219089Spjd /* 1034219089Spjd * This write covers a full block. "Borrow" a buffer 1035219089Spjd * from the dmu so that we can fill it before we enter 1036219089Spjd * a transaction. This avoids the possibility of 1037219089Spjd * holding up the transaction if the data copy hangs 1038219089Spjd * up on a pagefault (e.g., from an NFS server mapping). 1039219089Spjd */ 1040209962Smm size_t cbytes; 1041209962Smm 1042219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 1043219089Spjd max_blksz); 1044209962Smm ASSERT(abuf != NULL); 1045209962Smm ASSERT(arc_buf_size(abuf) == max_blksz); 1046209962Smm if (error = uiocopy(abuf->b_data, max_blksz, 1047209962Smm UIO_WRITE, uio, &cbytes)) { 1048209962Smm dmu_return_arcbuf(abuf); 1049209962Smm break; 1050209962Smm } 1051209962Smm ASSERT(cbytes == max_blksz); 1052209962Smm } 1053209962Smm 1054209962Smm /* 1055168404Spjd * Start a transaction. 1056168404Spjd */ 1057168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1058219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1059168404Spjd dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 1060219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1061258720Savg error = dmu_tx_assign(tx, TXG_WAIT); 1062168404Spjd if (error) { 1063168404Spjd dmu_tx_abort(tx); 1064209962Smm if (abuf != NULL) 1065209962Smm dmu_return_arcbuf(abuf); 1066168404Spjd break; 1067168404Spjd } 1068168404Spjd 1069168404Spjd /* 1070168404Spjd * If zfs_range_lock() over-locked we grow the blocksize 1071168404Spjd * and then reduce the lock range. This will only happen 1072168404Spjd * on the first iteration since zfs_range_reduce() will 1073168404Spjd * shrink down r_len to the appropriate size. 1074168404Spjd */ 1075168404Spjd if (rl->r_len == UINT64_MAX) { 1076168404Spjd uint64_t new_blksz; 1077168404Spjd 1078168404Spjd if (zp->z_blksz > max_blksz) { 1079274337Sdelphij /* 1080274337Sdelphij * File's blocksize is already larger than the 1081274337Sdelphij * "recordsize" property. Only let it grow to 1082274337Sdelphij * the next power of 2. 1083274337Sdelphij */ 1084168404Spjd ASSERT(!ISP2(zp->z_blksz)); 1085274337Sdelphij new_blksz = MIN(end_size, 1086274337Sdelphij 1 << highbit64(zp->z_blksz)); 1087168404Spjd } else { 1088168404Spjd new_blksz = MIN(end_size, max_blksz); 1089168404Spjd } 1090168404Spjd zfs_grow_blocksize(zp, new_blksz, tx); 1091168404Spjd zfs_range_reduce(rl, woff, n); 1092168404Spjd } 1093168404Spjd 1094168404Spjd /* 1095168404Spjd * XXX - should we really limit each write to z_max_blksz? 1096168404Spjd * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 1097168404Spjd */ 1098168404Spjd nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 1099168404Spjd 1100219089Spjd if (woff + nbytes > zp->z_size) 1101168404Spjd vnode_pager_setsize(vp, woff + nbytes); 1102168404Spjd 1103209962Smm if (abuf == NULL) { 1104209962Smm tx_bytes = uio->uio_resid; 1105219089Spjd error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 1106219089Spjd uio, nbytes, tx); 1107209962Smm tx_bytes -= uio->uio_resid; 1108168404Spjd } else { 1109209962Smm tx_bytes = nbytes; 1110219089Spjd ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 1111219089Spjd /* 1112219089Spjd * If this is not a full block write, but we are 1113219089Spjd * extending the file past EOF and this data starts 1114219089Spjd * block-aligned, use assign_arcbuf(). Otherwise, 1115219089Spjd * write via dmu_write(). 1116219089Spjd */ 1117219089Spjd if (tx_bytes < max_blksz && (!write_eof || 1118219089Spjd aiov->iov_base != abuf->b_data)) { 1119219089Spjd ASSERT(xuio); 1120219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, woff, 1121219089Spjd aiov->iov_len, aiov->iov_base, tx); 1122219089Spjd dmu_return_arcbuf(abuf); 1123219089Spjd xuio_stat_wbuf_copied(); 1124219089Spjd } else { 1125219089Spjd ASSERT(xuio || tx_bytes == max_blksz); 1126219089Spjd dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 1127219089Spjd woff, abuf, tx); 1128219089Spjd } 1129209962Smm ASSERT(tx_bytes <= uio->uio_resid); 1130209962Smm uioskip(uio, tx_bytes); 1131168404Spjd } 1132212657Savg if (tx_bytes && vn_has_cached_data(vp)) { 1133209962Smm update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 1134209962Smm zp->z_id, uio->uio_segflg, tx); 1135209962Smm } 1136209962Smm 1137209962Smm /* 1138168404Spjd * If we made no progress, we're done. If we made even 1139168404Spjd * partial progress, update the znode and ZIL accordingly. 1140168404Spjd */ 1141168404Spjd if (tx_bytes == 0) { 1142219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 1143219089Spjd (void *)&zp->z_size, sizeof (uint64_t), tx); 1144168404Spjd dmu_tx_commit(tx); 1145168404Spjd ASSERT(error != 0); 1146168404Spjd break; 1147168404Spjd } 1148168404Spjd 1149168404Spjd /* 1150168404Spjd * Clear Set-UID/Set-GID bits on successful write if not 1151168404Spjd * privileged and at least one of the excute bits is set. 1152168404Spjd * 1153168404Spjd * It would be nice to to this after all writes have 1154168404Spjd * been done, but that would still expose the ISUID/ISGID 1155168404Spjd * to another app after the partial write is committed. 1156185029Spjd * 1157185029Spjd * Note: we don't call zfs_fuid_map_id() here because 1158185029Spjd * user 0 is not an ephemeral uid. 1159168404Spjd */ 1160168404Spjd mutex_enter(&zp->z_acl_lock); 1161219089Spjd if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 1162168404Spjd (S_IXUSR >> 6))) != 0 && 1163219089Spjd (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 1164185029Spjd secpolicy_vnode_setid_retain(vp, cr, 1165219089Spjd (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 1166219089Spjd uint64_t newmode; 1167219089Spjd zp->z_mode &= ~(S_ISUID | S_ISGID); 1168219089Spjd newmode = zp->z_mode; 1169219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 1170219089Spjd (void *)&newmode, sizeof (uint64_t), tx); 1171168404Spjd } 1172168404Spjd mutex_exit(&zp->z_acl_lock); 1173168404Spjd 1174219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 1175219089Spjd B_TRUE); 1176168404Spjd 1177168404Spjd /* 1178168404Spjd * Update the file size (zp_size) if it has changed; 1179168404Spjd * account for possible concurrent updates. 1180168404Spjd */ 1181219089Spjd while ((end_size = zp->z_size) < uio->uio_loffset) { 1182219089Spjd (void) atomic_cas_64(&zp->z_size, end_size, 1183168404Spjd uio->uio_loffset); 1184298105Savg#ifdef illumos 1185219089Spjd ASSERT(error == 0); 1186298105Savg#else 1187298105Savg ASSERT(error == 0 || error == EFAULT); 1188298105Savg#endif 1189219089Spjd } 1190219089Spjd /* 1191219089Spjd * If we are replaying and eof is non zero then force 1192219089Spjd * the file size to the specified eof. Note, there's no 1193219089Spjd * concurrency during replay. 1194219089Spjd */ 1195219089Spjd if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 1196219089Spjd zp->z_size = zfsvfs->z_replay_eof; 1197219089Spjd 1198298105Savg if (error == 0) 1199298105Savg error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1200298105Savg else 1201298105Savg (void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1202219089Spjd 1203168404Spjd zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 1204168404Spjd dmu_tx_commit(tx); 1205168404Spjd 1206168404Spjd if (error != 0) 1207168404Spjd break; 1208168404Spjd ASSERT(tx_bytes == nbytes); 1209168404Spjd n -= nbytes; 1210219089Spjd 1211277300Ssmh#ifdef illumos 1212219089Spjd if (!xuio && n > 0) 1213219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 1214277300Ssmh#endif 1215168404Spjd } 1216168404Spjd 1217168404Spjd zfs_range_unlock(rl); 1218168404Spjd 1219168404Spjd /* 1220168404Spjd * If we're in replay mode, or we made no progress, return error. 1221168404Spjd * Otherwise, it's at least a partial write, so it's successful. 1222168404Spjd */ 1223209962Smm if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1224168404Spjd ZFS_EXIT(zfsvfs); 1225168404Spjd return (error); 1226168404Spjd } 1227168404Spjd 1228298105Savg#ifdef __FreeBSD__ 1229298105Savg /* 1230298105Savg * EFAULT means that at least one page of the source buffer was not 1231298105Savg * available. VFS will re-try remaining I/O upon this error. 1232298105Savg */ 1233298105Savg if (error == EFAULT) { 1234298105Savg ZFS_EXIT(zfsvfs); 1235298105Savg return (error); 1236298105Savg } 1237298105Savg#endif 1238298105Savg 1239219089Spjd if (ioflag & (FSYNC | FDSYNC) || 1240219089Spjd zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1241219089Spjd zil_commit(zilog, zp->z_id); 1242168404Spjd 1243168404Spjd ZFS_EXIT(zfsvfs); 1244168404Spjd return (0); 1245168404Spjd} 1246168404Spjd 1247168404Spjdvoid 1248219089Spjdzfs_get_done(zgd_t *zgd, int error) 1249168404Spjd{ 1250219089Spjd znode_t *zp = zgd->zgd_private; 1251219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 1252168404Spjd 1253219089Spjd if (zgd->zgd_db) 1254219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1255219089Spjd 1256219089Spjd zfs_range_unlock(zgd->zgd_rl); 1257219089Spjd 1258191900Skmacy /* 1259191900Skmacy * Release the vnode asynchronously as we currently have the 1260191900Skmacy * txg stopped from syncing. 1261191900Skmacy */ 1262219089Spjd VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1263219089Spjd 1264219089Spjd if (error == 0 && zgd->zgd_bp) 1265325132Savg zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp); 1266219089Spjd 1267168404Spjd kmem_free(zgd, sizeof (zgd_t)); 1268168404Spjd} 1269168404Spjd 1270214378Smm#ifdef DEBUG 1271214378Smmstatic int zil_fault_io = 0; 1272214378Smm#endif 1273214378Smm 1274168404Spjd/* 1275168404Spjd * Get data to generate a TX_WRITE intent log record. 1276168404Spjd */ 1277168404Spjdint 1278325132Savgzfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) 1279168404Spjd{ 1280168404Spjd zfsvfs_t *zfsvfs = arg; 1281168404Spjd objset_t *os = zfsvfs->z_os; 1282168404Spjd znode_t *zp; 1283219089Spjd uint64_t object = lr->lr_foid; 1284219089Spjd uint64_t offset = lr->lr_offset; 1285219089Spjd uint64_t size = lr->lr_length; 1286168404Spjd dmu_buf_t *db; 1287168404Spjd zgd_t *zgd; 1288168404Spjd int error = 0; 1289168404Spjd 1290325132Savg ASSERT3P(lwb, !=, NULL); 1291325132Savg ASSERT3P(zio, !=, NULL); 1292325132Savg ASSERT3U(size, !=, 0); 1293168404Spjd 1294168404Spjd /* 1295168404Spjd * Nothing to do if the file has been removed 1296168404Spjd */ 1297219089Spjd if (zfs_zget(zfsvfs, object, &zp) != 0) 1298249195Smm return (SET_ERROR(ENOENT)); 1299168404Spjd if (zp->z_unlinked) { 1300191900Skmacy /* 1301191900Skmacy * Release the vnode asynchronously as we currently have the 1302191900Skmacy * txg stopped from syncing. 1303191900Skmacy */ 1304196307Spjd VN_RELE_ASYNC(ZTOV(zp), 1305196307Spjd dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1306249195Smm return (SET_ERROR(ENOENT)); 1307168404Spjd } 1308168404Spjd 1309219089Spjd zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1310325132Savg zgd->zgd_lwb = lwb; 1311219089Spjd zgd->zgd_private = zp; 1312219089Spjd 1313168404Spjd /* 1314168404Spjd * Write records come in two flavors: immediate and indirect. 1315168404Spjd * For small writes it's cheaper to store the data with the 1316168404Spjd * log record (immediate); for large writes it's cheaper to 1317168404Spjd * sync the data and get a pointer to it (indirect) so that 1318168404Spjd * we don't have to write the data twice. 1319168404Spjd */ 1320168404Spjd if (buf != NULL) { /* immediate write */ 1321219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1322168404Spjd /* test for truncation needs to be done while range locked */ 1323219089Spjd if (offset >= zp->z_size) { 1324249195Smm error = SET_ERROR(ENOENT); 1325219089Spjd } else { 1326219089Spjd error = dmu_read(os, object, offset, size, buf, 1327219089Spjd DMU_READ_NO_PREFETCH); 1328168404Spjd } 1329219089Spjd ASSERT(error == 0 || error == ENOENT); 1330168404Spjd } else { /* indirect write */ 1331168404Spjd /* 1332168404Spjd * Have to lock the whole block to ensure when it's 1333324203Savg * written out and its checksum is being calculated 1334168404Spjd * that no one can change the data. We need to re-check 1335168404Spjd * blocksize after we get the lock in case it's changed! 1336168404Spjd */ 1337168404Spjd for (;;) { 1338219089Spjd uint64_t blkoff; 1339219089Spjd size = zp->z_blksz; 1340219089Spjd blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1341219089Spjd offset -= blkoff; 1342219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1343219089Spjd RL_READER); 1344219089Spjd if (zp->z_blksz == size) 1345168404Spjd break; 1346219089Spjd offset += blkoff; 1347219089Spjd zfs_range_unlock(zgd->zgd_rl); 1348168404Spjd } 1349168404Spjd /* test for truncation needs to be done while range locked */ 1350219089Spjd if (lr->lr_offset >= zp->z_size) 1351249195Smm error = SET_ERROR(ENOENT); 1352214378Smm#ifdef DEBUG 1353214378Smm if (zil_fault_io) { 1354249195Smm error = SET_ERROR(EIO); 1355214378Smm zil_fault_io = 0; 1356214378Smm } 1357214378Smm#endif 1358219089Spjd if (error == 0) 1359219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1360219089Spjd DMU_READ_NO_PREFETCH); 1361214378Smm 1362209962Smm if (error == 0) { 1363323748Savg blkptr_t *bp = &lr->lr_blkptr; 1364243524Smm 1365219089Spjd zgd->zgd_db = db; 1366219089Spjd zgd->zgd_bp = bp; 1367219089Spjd 1368219089Spjd ASSERT(db->db_offset == offset); 1369219089Spjd ASSERT(db->db_size == size); 1370219089Spjd 1371219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1372219089Spjd zfs_get_done, zgd); 1373321559Smav ASSERT(error || lr->lr_length <= size); 1374219089Spjd 1375209962Smm /* 1376219089Spjd * On success, we need to wait for the write I/O 1377219089Spjd * initiated by dmu_sync() to complete before we can 1378219089Spjd * release this dbuf. We will finish everything up 1379219089Spjd * in the zfs_get_done() callback. 1380209962Smm */ 1381219089Spjd if (error == 0) 1382219089Spjd return (0); 1383209962Smm 1384219089Spjd if (error == EALREADY) { 1385219089Spjd lr->lr_common.lrc_txtype = TX_WRITE2; 1386332525Smav /* 1387332525Smav * TX_WRITE2 relies on the data previously 1388332525Smav * written by the TX_WRITE that caused 1389332525Smav * EALREADY. We zero out the BP because 1390332525Smav * it is the old, currently-on-disk BP, 1391332525Smav * so there's no need to zio_flush() its 1392332525Smav * vdevs (flushing would needlesly hurt 1393332525Smav * performance, and doesn't work on 1394332525Smav * indirect vdevs). 1395332525Smav */ 1396332525Smav zgd->zgd_bp = NULL; 1397332525Smav BP_ZERO(bp); 1398219089Spjd error = 0; 1399219089Spjd } 1400209962Smm } 1401168404Spjd } 1402219089Spjd 1403219089Spjd zfs_get_done(zgd, error); 1404219089Spjd 1405168404Spjd return (error); 1406168404Spjd} 1407168404Spjd 1408168404Spjd/*ARGSUSED*/ 1409168404Spjdstatic int 1410185029Spjdzfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1411185029Spjd caller_context_t *ct) 1412168404Spjd{ 1413168404Spjd znode_t *zp = VTOZ(vp); 1414168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1415168404Spjd int error; 1416168404Spjd 1417168404Spjd ZFS_ENTER(zfsvfs); 1418185029Spjd ZFS_VERIFY_ZP(zp); 1419185029Spjd 1420185029Spjd if (flag & V_ACE_MASK) 1421185029Spjd error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1422185029Spjd else 1423185029Spjd error = zfs_zaccess_rwx(zp, mode, flag, cr); 1424185029Spjd 1425168404Spjd ZFS_EXIT(zfsvfs); 1426168404Spjd return (error); 1427168404Spjd} 1428168404Spjd 1429211932Smmstatic int 1430303970Savgzfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) 1431211932Smm{ 1432303970Savg int error; 1433211932Smm 1434303970Savg *vpp = arg; 1435303970Savg error = vn_lock(*vpp, lkflags); 1436303970Savg if (error != 0) 1437303970Savg vrele(*vpp); 1438303970Savg return (error); 1439303970Savg} 1440211932Smm 1441303970Savgstatic int 1442303970Savgzfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags) 1443303970Savg{ 1444303970Savg znode_t *zdp = VTOZ(dvp); 1445303970Savg zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1446303970Savg int error; 1447303970Savg int ltype; 1448303970Savg 1449303970Savg ASSERT_VOP_LOCKED(dvp, __func__); 1450303970Savg#ifdef DIAGNOSTIC 1451307142Savg if ((zdp->z_pflags & ZFS_XATTR) == 0) 1452307142Savg VERIFY(!RRM_LOCK_HELD(&zfsvfs->z_teardown_lock)); 1453303970Savg#endif 1454303970Savg 1455303970Savg if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 1456303970Savg ASSERT3P(dvp, ==, vp); 1457303970Savg vref(dvp); 1458303970Savg ltype = lkflags & LK_TYPE_MASK; 1459303970Savg if (ltype != VOP_ISLOCKED(dvp)) { 1460303970Savg if (ltype == LK_EXCLUSIVE) 1461303970Savg vn_lock(dvp, LK_UPGRADE | LK_RETRY); 1462303970Savg else /* if (ltype == LK_SHARED) */ 1463303970Savg vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); 1464303970Savg 1465303970Savg /* 1466303970Savg * Relock for the "." case could leave us with 1467303970Savg * reclaimed vnode. 1468303970Savg */ 1469303970Savg if (dvp->v_iflag & VI_DOOMED) { 1470303970Savg vrele(dvp); 1471303970Savg return (SET_ERROR(ENOENT)); 1472303970Savg } 1473303970Savg } 1474303970Savg return (0); 1475303970Savg } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 1476303970Savg /* 1477303970Savg * Note that in this case, dvp is the child vnode, and we 1478303970Savg * are looking up the parent vnode - exactly reverse from 1479303970Savg * normal operation. Unlocking dvp requires some rather 1480303970Savg * tricky unlock/relock dance to prevent mp from being freed; 1481303970Savg * use vn_vget_ino_gen() which takes care of all that. 1482303970Savg * 1483303970Savg * XXX Note that there is a time window when both vnodes are 1484303970Savg * unlocked. It is possible, although highly unlikely, that 1485303970Savg * during that window the parent-child relationship between 1486303970Savg * the vnodes may change, for example, get reversed. 1487303970Savg * In that case we would have a wrong lock order for the vnodes. 1488303970Savg * All other filesystems seem to ignore this problem, so we 1489303970Savg * do the same here. 1490303970Savg * A potential solution could be implemented as follows: 1491303970Savg * - using LK_NOWAIT when locking the second vnode and retrying 1492303970Savg * if necessary 1493303970Savg * - checking that the parent-child relationship still holds 1494303970Savg * after locking both vnodes and retrying if it doesn't 1495303970Savg */ 1496303970Savg error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp); 1497303970Savg return (error); 1498303970Savg } else { 1499303970Savg error = vn_lock(vp, lkflags); 1500303970Savg if (error != 0) 1501303970Savg vrele(vp); 1502303970Savg return (error); 1503211932Smm } 1504211932Smm} 1505211932Smm 1506211932Smm/* 1507168404Spjd * Lookup an entry in a directory, or an extended attribute directory. 1508168404Spjd * If it exists, return a held vnode reference for it. 1509168404Spjd * 1510168404Spjd * IN: dvp - vnode of directory to search. 1511168404Spjd * nm - name of entry to lookup. 1512168404Spjd * pnp - full pathname to lookup [UNUSED]. 1513168404Spjd * flags - LOOKUP_XATTR set if looking for an attribute. 1514168404Spjd * rdir - root directory vnode [UNUSED]. 1515168404Spjd * cr - credentials of caller. 1516185029Spjd * ct - caller context 1517168404Spjd * 1518168404Spjd * OUT: vpp - vnode of located entry, NULL if not found. 1519168404Spjd * 1520251631Sdelphij * RETURN: 0 on success, error code on failure. 1521168404Spjd * 1522168404Spjd * Timestamps: 1523168404Spjd * NA 1524168404Spjd */ 1525168404Spjd/* ARGSUSED */ 1526168962Spjdstatic int 1527168962Spjdzfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1528357605Skevans int nameiop, cred_t *cr, kthread_t *td, int flags, boolean_t cached) 1529168404Spjd{ 1530168962Spjd znode_t *zdp = VTOZ(dvp); 1531303970Savg znode_t *zp; 1532168962Spjd zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1533211932Smm int error = 0; 1534168404Spjd 1535321545Smav /* 1536321545Smav * Fast path lookup, however we must skip DNLC lookup 1537321545Smav * for case folding or normalizing lookups because the 1538321545Smav * DNLC code only stores the passed in name. This means 1539321545Smav * creating 'a' and removing 'A' on a case insensitive 1540321545Smav * file system would work, but DNLC still thinks 'a' 1541321545Smav * exists and won't let you create it again on the next 1542321545Smav * pass through fast path. 1543321545Smav */ 1544303970Savg if (!(flags & LOOKUP_XATTR)) { 1545211932Smm if (dvp->v_type != VDIR) { 1546249195Smm return (SET_ERROR(ENOTDIR)); 1547219089Spjd } else if (zdp->z_sa_hdl == NULL) { 1548249195Smm return (SET_ERROR(EIO)); 1549211932Smm } 1550211932Smm } 1551211932Smm 1552211932Smm DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1553211932Smm 1554168404Spjd ZFS_ENTER(zfsvfs); 1555185029Spjd ZFS_VERIFY_ZP(zdp); 1556168404Spjd 1557168404Spjd *vpp = NULL; 1558168404Spjd 1559185029Spjd if (flags & LOOKUP_XATTR) { 1560168404Spjd#ifdef TODO 1561168404Spjd /* 1562168404Spjd * If the xattr property is off, refuse the lookup request. 1563168404Spjd */ 1564168404Spjd if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1565168404Spjd ZFS_EXIT(zfsvfs); 1566249195Smm return (SET_ERROR(EINVAL)); 1567168404Spjd } 1568185029Spjd#endif 1569168404Spjd 1570168404Spjd /* 1571168404Spjd * We don't allow recursive attributes.. 1572168404Spjd * Maybe someday we will. 1573168404Spjd */ 1574219089Spjd if (zdp->z_pflags & ZFS_XATTR) { 1575168404Spjd ZFS_EXIT(zfsvfs); 1576249195Smm return (SET_ERROR(EINVAL)); 1577168404Spjd } 1578168404Spjd 1579168404Spjd if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1580168404Spjd ZFS_EXIT(zfsvfs); 1581168404Spjd return (error); 1582168404Spjd } 1583168404Spjd 1584168404Spjd /* 1585168404Spjd * Do we have permission to get into attribute directory? 1586168404Spjd */ 1587185029Spjd if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1588185029Spjd B_FALSE, cr)) { 1589303970Savg vrele(*vpp); 1590185029Spjd *vpp = NULL; 1591168404Spjd } 1592168404Spjd 1593168404Spjd ZFS_EXIT(zfsvfs); 1594168404Spjd return (error); 1595168404Spjd } 1596168404Spjd 1597168404Spjd /* 1598168404Spjd * Check accessibility of directory. 1599168404Spjd */ 1600357605Skevans if (!cached) { 1601357706Skevans if ((cnp->cn_flags & NOEXECCHECK) != 0) { 1602357706Skevans cnp->cn_flags &= ~NOEXECCHECK; 1603357706Skevans } else { 1604357706Skevans error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr); 1605357706Skevans if (error != 0) { 1606357706Skevans ZFS_EXIT(zfsvfs); 1607357706Skevans return (error); 1608357706Skevans } 1609357605Skevans } 1610168404Spjd } 1611168404Spjd 1612185029Spjd if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1613185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1614185029Spjd ZFS_EXIT(zfsvfs); 1615249195Smm return (SET_ERROR(EILSEQ)); 1616185029Spjd } 1617168404Spjd 1618168962Spjd 1619303970Savg /* 1620303970Savg * First handle the special cases. 1621303970Savg */ 1622303970Savg if ((cnp->cn_flags & ISDOTDOT) != 0) { 1623303970Savg /* 1624303970Savg * If we are a snapshot mounted under .zfs, return 1625303970Savg * the vp for the snapshot directory. 1626303970Savg */ 1627303970Savg if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 1628315842Savg struct componentname cn; 1629315842Savg vnode_t *zfsctl_vp; 1630315842Savg int ltype; 1631315842Savg 1632303970Savg ZFS_EXIT(zfsvfs); 1633315842Savg ltype = VOP_ISLOCKED(dvp); 1634315842Savg VOP_UNLOCK(dvp, 0); 1635315842Savg error = zfsctl_root(zfsvfs->z_parent, LK_SHARED, 1636315842Savg &zfsctl_vp); 1637303970Savg if (error == 0) { 1638315842Savg cn.cn_nameptr = "snapshot"; 1639315842Savg cn.cn_namelen = strlen(cn.cn_nameptr); 1640315842Savg cn.cn_nameiop = cnp->cn_nameiop; 1641319415Savg cn.cn_flags = cnp->cn_flags & ~ISDOTDOT; 1642315842Savg cn.cn_lkflags = cnp->cn_lkflags; 1643315842Savg error = VOP_LOOKUP(zfsctl_vp, vpp, &cn); 1644315842Savg vput(zfsctl_vp); 1645303970Savg } 1646315842Savg vn_lock(dvp, ltype | LK_RETRY); 1647315842Savg return (error); 1648303970Savg } 1649303970Savg } 1650303970Savg if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { 1651315842Savg ZFS_EXIT(zfsvfs); 1652303970Savg if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) 1653315842Savg return (SET_ERROR(ENOTSUP)); 1654315842Savg error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp); 1655315842Savg return (error); 1656303970Savg } 1657303970Savg 1658303970Savg /* 1659303970Savg * The loop is retry the lookup if the parent-child relationship 1660303970Savg * changes during the dot-dot locking complexities. 1661303970Savg */ 1662303970Savg for (;;) { 1663303970Savg uint64_t parent; 1664303970Savg 1665303970Savg error = zfs_dirlook(zdp, nm, &zp); 1666303970Savg if (error == 0) 1667303970Savg *vpp = ZTOV(zp); 1668303970Savg 1669303970Savg ZFS_EXIT(zfsvfs); 1670303970Savg if (error != 0) 1671303970Savg break; 1672303970Savg 1673303970Savg error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags); 1674303970Savg if (error != 0) { 1675303970Savg /* 1676303970Savg * If we've got a locking error, then the vnode 1677303970Savg * got reclaimed because of a force unmount. 1678303970Savg * We never enter doomed vnodes into the name cache. 1679303970Savg */ 1680303970Savg *vpp = NULL; 1681303970Savg return (error); 1682303970Savg } 1683303970Savg 1684303970Savg if ((cnp->cn_flags & ISDOTDOT) == 0) 1685303970Savg break; 1686303970Savg 1687303970Savg ZFS_ENTER(zfsvfs); 1688303970Savg if (zdp->z_sa_hdl == NULL) { 1689303970Savg error = SET_ERROR(EIO); 1690303970Savg } else { 1691303970Savg error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 1692303970Savg &parent, sizeof (parent)); 1693303970Savg } 1694303970Savg if (error != 0) { 1695303970Savg ZFS_EXIT(zfsvfs); 1696303970Savg vput(ZTOV(zp)); 1697303970Savg break; 1698303970Savg } 1699303970Savg if (zp->z_id == parent) { 1700303970Savg ZFS_EXIT(zfsvfs); 1701303970Savg break; 1702303970Savg } 1703303970Savg vput(ZTOV(zp)); 1704303970Savg } 1705303970Savg 1706303970Savgout: 1707303970Savg if (error != 0) 1708303970Savg *vpp = NULL; 1709303970Savg 1710168404Spjd /* Translate errors and add SAVENAME when needed. */ 1711168404Spjd if (cnp->cn_flags & ISLASTCN) { 1712168404Spjd switch (nameiop) { 1713168404Spjd case CREATE: 1714168404Spjd case RENAME: 1715168404Spjd if (error == ENOENT) { 1716168404Spjd error = EJUSTRETURN; 1717168404Spjd cnp->cn_flags |= SAVENAME; 1718168404Spjd break; 1719168404Spjd } 1720168404Spjd /* FALLTHROUGH */ 1721168404Spjd case DELETE: 1722168404Spjd if (error == 0) 1723168404Spjd cnp->cn_flags |= SAVENAME; 1724168404Spjd break; 1725168404Spjd } 1726168404Spjd } 1727169198Spjd 1728303970Savg /* Insert name into cache (as non-existent) if appropriate. */ 1729303970Savg if (zfsvfs->z_use_namecache && 1730303970Savg error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0) 1731303970Savg cache_enter(dvp, NULL, cnp); 1732168404Spjd 1733303970Savg /* Insert name into cache if appropriate. */ 1734303970Savg if (zfsvfs->z_use_namecache && 1735303970Savg error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1736168404Spjd if (!(cnp->cn_flags & ISLASTCN) || 1737168404Spjd (nameiop != DELETE && nameiop != RENAME)) { 1738168404Spjd cache_enter(dvp, *vpp, cnp); 1739168404Spjd } 1740168404Spjd } 1741168404Spjd 1742168404Spjd return (error); 1743168404Spjd} 1744168404Spjd 1745168404Spjd/* 1746168404Spjd * Attempt to create a new entry in a directory. If the entry 1747168404Spjd * already exists, truncate the file if permissible, else return 1748168404Spjd * an error. Return the vp of the created or trunc'd file. 1749168404Spjd * 1750168404Spjd * IN: dvp - vnode of directory to put new file entry in. 1751168404Spjd * name - name of new file entry. 1752168404Spjd * vap - attributes of new file. 1753168404Spjd * excl - flag indicating exclusive or non-exclusive mode. 1754168404Spjd * mode - mode to open file with. 1755168404Spjd * cr - credentials of caller. 1756168404Spjd * flag - large file flag [UNUSED]. 1757185029Spjd * ct - caller context 1758268464Sdelphij * vsecp - ACL to be set 1759168404Spjd * 1760168404Spjd * OUT: vpp - vnode of created or trunc'd entry. 1761168404Spjd * 1762251631Sdelphij * RETURN: 0 on success, error code on failure. 1763168404Spjd * 1764168404Spjd * Timestamps: 1765168404Spjd * dvp - ctime|mtime updated if new entry created 1766168404Spjd * vp - ctime|mtime always, atime if new 1767168404Spjd */ 1768185029Spjd 1769168404Spjd/* ARGSUSED */ 1770168404Spjdstatic int 1771168962Spjdzfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 1772185029Spjd vnode_t **vpp, cred_t *cr, kthread_t *td) 1773168404Spjd{ 1774168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1775168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1776185029Spjd zilog_t *zilog; 1777185029Spjd objset_t *os; 1778168404Spjd dmu_tx_t *tx; 1779168404Spjd int error; 1780209962Smm ksid_t *ksid; 1781209962Smm uid_t uid; 1782209962Smm gid_t gid = crgetgid(cr); 1783219089Spjd zfs_acl_ids_t acl_ids; 1784209962Smm boolean_t fuid_dirtied; 1785185029Spjd void *vsecp = NULL; 1786185029Spjd int flag = 0; 1787303970Savg uint64_t txtype; 1788168404Spjd 1789185029Spjd /* 1790185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 1791185029Spjd * make sure file system is at proper version 1792185029Spjd */ 1793185029Spjd 1794209962Smm ksid = crgetsid(cr, KSID_OWNER); 1795209962Smm if (ksid) 1796209962Smm uid = ksid_getid(ksid); 1797209962Smm else 1798209962Smm uid = crgetuid(cr); 1799219089Spjd 1800185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 1801185029Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 1802219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1803249195Smm return (SET_ERROR(EINVAL)); 1804185029Spjd 1805168404Spjd ZFS_ENTER(zfsvfs); 1806185029Spjd ZFS_VERIFY_ZP(dzp); 1807185029Spjd os = zfsvfs->z_os; 1808185029Spjd zilog = zfsvfs->z_log; 1809168404Spjd 1810185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1811185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1812185029Spjd ZFS_EXIT(zfsvfs); 1813249195Smm return (SET_ERROR(EILSEQ)); 1814185029Spjd } 1815185029Spjd 1816185029Spjd if (vap->va_mask & AT_XVATTR) { 1817197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1818185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 1819185029Spjd ZFS_EXIT(zfsvfs); 1820185029Spjd return (error); 1821185029Spjd } 1822185029Spjd } 1823260704Savg 1824168404Spjd *vpp = NULL; 1825168404Spjd 1826182905Strasz if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1827182905Strasz vap->va_mode &= ~S_ISVTX; 1828168404Spjd 1829303970Savg error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 1830303970Savg if (error) { 1831303970Savg ZFS_EXIT(zfsvfs); 1832303970Savg return (error); 1833303970Savg } 1834303970Savg ASSERT3P(zp, ==, NULL); 1835185029Spjd 1836303970Savg /* 1837303970Savg * Create a new file object and update the directory 1838303970Savg * to reference it. 1839303970Savg */ 1840303970Savg if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1841303970Savg goto out; 1842168404Spjd } 1843219089Spjd 1844303970Savg /* 1845303970Savg * We only support the creation of regular files in 1846303970Savg * extended attribute directories. 1847303970Savg */ 1848168404Spjd 1849303970Savg if ((dzp->z_pflags & ZFS_XATTR) && 1850303970Savg (vap->va_type != VREG)) { 1851303970Savg error = SET_ERROR(EINVAL); 1852303970Savg goto out; 1853303970Savg } 1854168404Spjd 1855303970Savg if ((error = zfs_acl_ids_create(dzp, 0, vap, 1856303970Savg cr, vsecp, &acl_ids)) != 0) 1857303970Savg goto out; 1858219089Spjd 1859303970Savg if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1860303970Savg zfs_acl_ids_free(&acl_ids); 1861303970Savg error = SET_ERROR(EDQUOT); 1862303970Savg goto out; 1863303970Savg } 1864168404Spjd 1865303970Savg getnewvnode_reserve(1); 1866209962Smm 1867303970Savg tx = dmu_tx_create(os); 1868209962Smm 1869303970Savg dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1870303970Savg ZFS_SA_BASE_ATTR_SIZE); 1871219089Spjd 1872303970Savg fuid_dirtied = zfsvfs->z_fuid_dirty; 1873303970Savg if (fuid_dirtied) 1874303970Savg zfs_fuid_txhold(zfsvfs, tx); 1875303970Savg dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1876303970Savg dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 1877303970Savg if (!zfsvfs->z_use_sa && 1878303970Savg acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1879303970Savg dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1880303970Savg 0, acl_ids.z_aclp->z_acl_bytes); 1881303970Savg } 1882303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 1883303970Savg if (error) { 1884209962Smm zfs_acl_ids_free(&acl_ids); 1885303970Savg dmu_tx_abort(tx); 1886303970Savg getnewvnode_drop_reserve(); 1887303970Savg ZFS_EXIT(zfsvfs); 1888303970Savg return (error); 1889303970Savg } 1890303970Savg zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1891185029Spjd 1892303970Savg if (fuid_dirtied) 1893303970Savg zfs_fuid_sync(zfsvfs, tx); 1894219089Spjd 1895303970Savg (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 1896303970Savg txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1897303970Savg zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1898303970Savg vsecp, acl_ids.z_fuidp, vap); 1899303970Savg zfs_acl_ids_free(&acl_ids); 1900303970Savg dmu_tx_commit(tx); 1901168404Spjd 1902303970Savg getnewvnode_drop_reserve(); 1903168404Spjd 1904168404Spjdout: 1905303970Savg if (error == 0) { 1906168962Spjd *vpp = ZTOV(zp); 1907168404Spjd } 1908168404Spjd 1909219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1910219089Spjd zil_commit(zilog, 0); 1911219089Spjd 1912168404Spjd ZFS_EXIT(zfsvfs); 1913168404Spjd return (error); 1914168404Spjd} 1915168404Spjd 1916168404Spjd/* 1917168404Spjd * Remove an entry from a directory. 1918168404Spjd * 1919168404Spjd * IN: dvp - vnode of directory to remove entry from. 1920168404Spjd * name - name of entry to remove. 1921168404Spjd * cr - credentials of caller. 1922185029Spjd * ct - caller context 1923185029Spjd * flags - case flags 1924168404Spjd * 1925251631Sdelphij * RETURN: 0 on success, error code on failure. 1926168404Spjd * 1927168404Spjd * Timestamps: 1928168404Spjd * dvp - ctime|mtime 1929168404Spjd * vp - ctime (if nlink > 0) 1930168404Spjd */ 1931219089Spjd 1932185029Spjd/*ARGSUSED*/ 1933168404Spjdstatic int 1934303970Savgzfs_remove(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr) 1935168404Spjd{ 1936303970Savg znode_t *dzp = VTOZ(dvp); 1937303970Savg znode_t *zp = VTOZ(vp); 1938219089Spjd znode_t *xzp; 1939168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1940185029Spjd zilog_t *zilog; 1941168962Spjd uint64_t acl_obj, xattr_obj; 1942219089Spjd uint64_t obj = 0; 1943168404Spjd dmu_tx_t *tx; 1944185029Spjd boolean_t unlinked, toobig = FALSE; 1945185029Spjd uint64_t txtype; 1946168404Spjd int error; 1947168404Spjd 1948168404Spjd ZFS_ENTER(zfsvfs); 1949185029Spjd ZFS_VERIFY_ZP(dzp); 1950303970Savg ZFS_VERIFY_ZP(zp); 1951185029Spjd zilog = zfsvfs->z_log; 1952303970Savg zp = VTOZ(vp); 1953168404Spjd 1954219089Spjd xattr_obj = 0; 1955219089Spjd xzp = NULL; 1956168404Spjd 1957168962Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1958168404Spjd goto out; 1959168962Spjd } 1960168404Spjd 1961168962Spjd /* 1962168962Spjd * Need to use rmdir for removing directories. 1963168962Spjd */ 1964168962Spjd if (vp->v_type == VDIR) { 1965249195Smm error = SET_ERROR(EPERM); 1966168962Spjd goto out; 1967168962Spjd } 1968168962Spjd 1969185029Spjd vnevent_remove(vp, dvp, name, ct); 1970168962Spjd 1971303970Savg obj = zp->z_id; 1972168404Spjd 1973303970Savg /* are there any extended attributes? */ 1974303970Savg error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1975303970Savg &xattr_obj, sizeof (xattr_obj)); 1976303970Savg if (error == 0 && xattr_obj) { 1977303970Savg error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1978303970Savg ASSERT0(error); 1979303970Savg } 1980168962Spjd 1981168404Spjd /* 1982168404Spjd * We may delete the znode now, or we may put it in the unlinked set; 1983168404Spjd * it depends on whether we're the last link, and on whether there are 1984168404Spjd * other holds on the vnode. So we dmu_tx_hold() the right things to 1985168404Spjd * allow for either case. 1986168404Spjd */ 1987168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1988168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1989219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1990219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1991219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 1992168404Spjd 1993303970Savg if (xzp) { 1994219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 1995219089Spjd dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1996168404Spjd } 1997168404Spjd 1998168404Spjd /* charge as an update -- would be nice not to charge at all */ 1999168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2000168404Spjd 2001268464Sdelphij /* 2002294803Smav * Mark this transaction as typically resulting in a net free of space 2003268464Sdelphij */ 2004294803Smav dmu_tx_mark_netfree(tx); 2005268464Sdelphij 2006303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 2007168404Spjd if (error) { 2008168404Spjd dmu_tx_abort(tx); 2009168404Spjd ZFS_EXIT(zfsvfs); 2010168404Spjd return (error); 2011168404Spjd } 2012168404Spjd 2013168404Spjd /* 2014168404Spjd * Remove the directory entry. 2015168404Spjd */ 2016303970Savg error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked); 2017168404Spjd 2018168404Spjd if (error) { 2019168404Spjd dmu_tx_commit(tx); 2020168404Spjd goto out; 2021168404Spjd } 2022168404Spjd 2023219089Spjd if (unlinked) { 2024168404Spjd zfs_unlinked_add(zp, tx); 2025243268Savg vp->v_vflag |= VV_NOSYNC; 2026168962Spjd } 2027168404Spjd 2028185029Spjd txtype = TX_REMOVE; 2029219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 2030168404Spjd 2031168404Spjd dmu_tx_commit(tx); 2032168404Spjdout: 2033185029Spjd 2034219089Spjd if (xzp) 2035303970Savg vrele(ZTOV(xzp)); 2036168962Spjd 2037219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2038219089Spjd zil_commit(zilog, 0); 2039219089Spjd 2040168404Spjd ZFS_EXIT(zfsvfs); 2041168404Spjd return (error); 2042168404Spjd} 2043168404Spjd 2044168404Spjd/* 2045168404Spjd * Create a new directory and insert it into dvp using the name 2046168404Spjd * provided. Return a pointer to the inserted directory. 2047168404Spjd * 2048168404Spjd * IN: dvp - vnode of directory to add subdir to. 2049168404Spjd * dirname - name of new directory. 2050168404Spjd * vap - attributes of new directory. 2051168404Spjd * cr - credentials of caller. 2052185029Spjd * ct - caller context 2053251631Sdelphij * flags - case flags 2054185029Spjd * vsecp - ACL to be set 2055168404Spjd * 2056168404Spjd * OUT: vpp - vnode of created directory. 2057168404Spjd * 2058251631Sdelphij * RETURN: 0 on success, error code on failure. 2059168404Spjd * 2060168404Spjd * Timestamps: 2061168404Spjd * dvp - ctime|mtime updated 2062168404Spjd * vp - ctime|mtime|atime updated 2063168404Spjd */ 2064185029Spjd/*ARGSUSED*/ 2065168404Spjdstatic int 2066303970Savgzfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr) 2067168404Spjd{ 2068168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 2069168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2070185029Spjd zilog_t *zilog; 2071185029Spjd uint64_t txtype; 2072168404Spjd dmu_tx_t *tx; 2073168404Spjd int error; 2074209962Smm ksid_t *ksid; 2075209962Smm uid_t uid; 2076209962Smm gid_t gid = crgetgid(cr); 2077219089Spjd zfs_acl_ids_t acl_ids; 2078209962Smm boolean_t fuid_dirtied; 2079168404Spjd 2080168404Spjd ASSERT(vap->va_type == VDIR); 2081168404Spjd 2082185029Spjd /* 2083185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 2084185029Spjd * make sure file system is at proper version 2085185029Spjd */ 2086185029Spjd 2087209962Smm ksid = crgetsid(cr, KSID_OWNER); 2088209962Smm if (ksid) 2089209962Smm uid = ksid_getid(ksid); 2090209962Smm else 2091209962Smm uid = crgetuid(cr); 2092185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2093303970Savg ((vap->va_mask & AT_XVATTR) || 2094219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2095249195Smm return (SET_ERROR(EINVAL)); 2096185029Spjd 2097168404Spjd ZFS_ENTER(zfsvfs); 2098185029Spjd ZFS_VERIFY_ZP(dzp); 2099185029Spjd zilog = zfsvfs->z_log; 2100168404Spjd 2101219089Spjd if (dzp->z_pflags & ZFS_XATTR) { 2102168404Spjd ZFS_EXIT(zfsvfs); 2103249195Smm return (SET_ERROR(EINVAL)); 2104168404Spjd } 2105168404Spjd 2106185029Spjd if (zfsvfs->z_utf8 && u8_validate(dirname, 2107185029Spjd strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2108185029Spjd ZFS_EXIT(zfsvfs); 2109249195Smm return (SET_ERROR(EILSEQ)); 2110185029Spjd } 2111185029Spjd 2112219089Spjd if (vap->va_mask & AT_XVATTR) { 2113197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2114185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 2115185029Spjd ZFS_EXIT(zfsvfs); 2116185029Spjd return (error); 2117185029Spjd } 2118219089Spjd } 2119185029Spjd 2120219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 2121303970Savg NULL, &acl_ids)) != 0) { 2122219089Spjd ZFS_EXIT(zfsvfs); 2123219089Spjd return (error); 2124219089Spjd } 2125260704Savg 2126168404Spjd /* 2127168404Spjd * First make sure the new directory doesn't exist. 2128219089Spjd * 2129219089Spjd * Existence is checked first to make sure we don't return 2130219089Spjd * EACCES instead of EEXIST which can cause some applications 2131219089Spjd * to fail. 2132168404Spjd */ 2133185029Spjd *vpp = NULL; 2134185029Spjd 2135303970Savg if (error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW)) { 2136219089Spjd zfs_acl_ids_free(&acl_ids); 2137168404Spjd ZFS_EXIT(zfsvfs); 2138168404Spjd return (error); 2139168404Spjd } 2140303970Savg ASSERT3P(zp, ==, NULL); 2141168404Spjd 2142185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 2143219089Spjd zfs_acl_ids_free(&acl_ids); 2144168404Spjd ZFS_EXIT(zfsvfs); 2145168404Spjd return (error); 2146168404Spjd } 2147168404Spjd 2148209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2149211932Smm zfs_acl_ids_free(&acl_ids); 2150209962Smm ZFS_EXIT(zfsvfs); 2151249195Smm return (SET_ERROR(EDQUOT)); 2152209962Smm } 2153209962Smm 2154168404Spjd /* 2155168404Spjd * Add a new entry to the directory. 2156168404Spjd */ 2157303970Savg getnewvnode_reserve(1); 2158168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2159168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 2160168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2161209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 2162209962Smm if (fuid_dirtied) 2163209962Smm zfs_fuid_txhold(zfsvfs, tx); 2164219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2165219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2166219089Spjd acl_ids.z_aclp->z_acl_bytes); 2167219089Spjd } 2168219089Spjd 2169219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2170219089Spjd ZFS_SA_BASE_ATTR_SIZE); 2171219089Spjd 2172303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 2173168404Spjd if (error) { 2174219089Spjd zfs_acl_ids_free(&acl_ids); 2175168404Spjd dmu_tx_abort(tx); 2176260704Savg getnewvnode_drop_reserve(); 2177168404Spjd ZFS_EXIT(zfsvfs); 2178168404Spjd return (error); 2179168404Spjd } 2180168404Spjd 2181168404Spjd /* 2182168404Spjd * Create new node. 2183168404Spjd */ 2184219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2185168404Spjd 2186209962Smm if (fuid_dirtied) 2187209962Smm zfs_fuid_sync(zfsvfs, tx); 2188219089Spjd 2189168404Spjd /* 2190168404Spjd * Now put new name in parent dir. 2191168404Spjd */ 2192303970Savg (void) zfs_link_create(dzp, dirname, zp, tx, ZNEW); 2193168404Spjd 2194168404Spjd *vpp = ZTOV(zp); 2195168404Spjd 2196303970Savg txtype = zfs_log_create_txtype(Z_DIR, NULL, vap); 2197303970Savg zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL, 2198209962Smm acl_ids.z_fuidp, vap); 2199185029Spjd 2200209962Smm zfs_acl_ids_free(&acl_ids); 2201219089Spjd 2202168404Spjd dmu_tx_commit(tx); 2203168404Spjd 2204260704Savg getnewvnode_drop_reserve(); 2205260704Savg 2206219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2207219089Spjd zil_commit(zilog, 0); 2208219089Spjd 2209168404Spjd ZFS_EXIT(zfsvfs); 2210168404Spjd return (0); 2211168404Spjd} 2212168404Spjd 2213168404Spjd/* 2214168404Spjd * Remove a directory subdir entry. If the current working 2215168404Spjd * directory is the same as the subdir to be removed, the 2216168404Spjd * remove will fail. 2217168404Spjd * 2218168404Spjd * IN: dvp - vnode of directory to remove from. 2219168404Spjd * name - name of directory to be removed. 2220168404Spjd * cwd - vnode of current working directory. 2221168404Spjd * cr - credentials of caller. 2222185029Spjd * ct - caller context 2223185029Spjd * flags - case flags 2224168404Spjd * 2225251631Sdelphij * RETURN: 0 on success, error code on failure. 2226168404Spjd * 2227168404Spjd * Timestamps: 2228168404Spjd * dvp - ctime|mtime updated 2229168404Spjd */ 2230185029Spjd/*ARGSUSED*/ 2231168404Spjdstatic int 2232303970Savgzfs_rmdir(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr) 2233168404Spjd{ 2234168404Spjd znode_t *dzp = VTOZ(dvp); 2235303970Savg znode_t *zp = VTOZ(vp); 2236168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2237185029Spjd zilog_t *zilog; 2238168404Spjd dmu_tx_t *tx; 2239168404Spjd int error; 2240168404Spjd 2241168962Spjd ZFS_ENTER(zfsvfs); 2242185029Spjd ZFS_VERIFY_ZP(dzp); 2243303970Savg ZFS_VERIFY_ZP(zp); 2244185029Spjd zilog = zfsvfs->z_log; 2245168404Spjd 2246168404Spjd 2247168404Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2248168404Spjd goto out; 2249168404Spjd } 2250168404Spjd 2251168962Spjd if (vp->v_type != VDIR) { 2252249195Smm error = SET_ERROR(ENOTDIR); 2253168962Spjd goto out; 2254168962Spjd } 2255168962Spjd 2256185029Spjd vnevent_rmdir(vp, dvp, name, ct); 2257168962Spjd 2258168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2259168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2260219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2261168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2262219089Spjd zfs_sa_upgrade_txholds(tx, zp); 2263219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 2264304122Savg dmu_tx_mark_netfree(tx); 2265303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 2266168404Spjd if (error) { 2267168404Spjd dmu_tx_abort(tx); 2268168404Spjd ZFS_EXIT(zfsvfs); 2269168404Spjd return (error); 2270168404Spjd } 2271168404Spjd 2272168404Spjd cache_purge(dvp); 2273168404Spjd 2274303970Savg error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL); 2275168404Spjd 2276185029Spjd if (error == 0) { 2277185029Spjd uint64_t txtype = TX_RMDIR; 2278219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2279185029Spjd } 2280168404Spjd 2281168404Spjd dmu_tx_commit(tx); 2282168404Spjd 2283168404Spjd cache_purge(vp); 2284168404Spjdout: 2285219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2286219089Spjd zil_commit(zilog, 0); 2287219089Spjd 2288168404Spjd ZFS_EXIT(zfsvfs); 2289168404Spjd return (error); 2290168404Spjd} 2291168404Spjd 2292168404Spjd/* 2293168404Spjd * Read as many directory entries as will fit into the provided 2294168404Spjd * buffer from the given directory cursor position (specified in 2295251631Sdelphij * the uio structure). 2296168404Spjd * 2297168404Spjd * IN: vp - vnode of directory to read. 2298168404Spjd * uio - structure supplying read location, range info, 2299168404Spjd * and return buffer. 2300168404Spjd * cr - credentials of caller. 2301185029Spjd * ct - caller context 2302185029Spjd * flags - case flags 2303168404Spjd * 2304168404Spjd * OUT: uio - updated offset and range, buffer filled. 2305168404Spjd * eofp - set to true if end-of-file detected. 2306168404Spjd * 2307251631Sdelphij * RETURN: 0 on success, error code on failure. 2308168404Spjd * 2309168404Spjd * Timestamps: 2310168404Spjd * vp - atime updated 2311168404Spjd * 2312168404Spjd * Note that the low 4 bits of the cookie returned by zap is always zero. 2313168404Spjd * This allows us to use the low range for "special" directory entries: 2314168404Spjd * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2315168404Spjd * we use the offset 2 for the '.zfs' directory. 2316168404Spjd */ 2317168404Spjd/* ARGSUSED */ 2318168404Spjdstatic int 2319168962Spjdzfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) 2320168404Spjd{ 2321168404Spjd znode_t *zp = VTOZ(vp); 2322168404Spjd iovec_t *iovp; 2323185029Spjd edirent_t *eodp; 2324168404Spjd dirent64_t *odp; 2325168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2326168404Spjd objset_t *os; 2327168404Spjd caddr_t outbuf; 2328168404Spjd size_t bufsize; 2329168404Spjd zap_cursor_t zc; 2330168404Spjd zap_attribute_t zap; 2331168404Spjd uint_t bytes_wanted; 2332168404Spjd uint64_t offset; /* must be unsigned; checks for < 1 */ 2333219089Spjd uint64_t parent; 2334168404Spjd int local_eof; 2335168404Spjd int outcount; 2336168404Spjd int error; 2337168404Spjd uint8_t prefetch; 2338185029Spjd boolean_t check_sysattrs; 2339168404Spjd uint8_t type; 2340168962Spjd int ncooks; 2341168962Spjd u_long *cooks = NULL; 2342185029Spjd int flags = 0; 2343168404Spjd 2344168404Spjd ZFS_ENTER(zfsvfs); 2345185029Spjd ZFS_VERIFY_ZP(zp); 2346168404Spjd 2347219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 2348219089Spjd &parent, sizeof (parent))) != 0) { 2349219089Spjd ZFS_EXIT(zfsvfs); 2350219089Spjd return (error); 2351219089Spjd } 2352219089Spjd 2353168404Spjd /* 2354168404Spjd * If we are not given an eof variable, 2355168404Spjd * use a local one. 2356168404Spjd */ 2357168404Spjd if (eofp == NULL) 2358168404Spjd eofp = &local_eof; 2359168404Spjd 2360168404Spjd /* 2361168404Spjd * Check for valid iov_len. 2362168404Spjd */ 2363168404Spjd if (uio->uio_iov->iov_len <= 0) { 2364168404Spjd ZFS_EXIT(zfsvfs); 2365249195Smm return (SET_ERROR(EINVAL)); 2366168404Spjd } 2367168404Spjd 2368168404Spjd /* 2369168404Spjd * Quit if directory has been removed (posix) 2370168404Spjd */ 2371168404Spjd if ((*eofp = zp->z_unlinked) != 0) { 2372168404Spjd ZFS_EXIT(zfsvfs); 2373168404Spjd return (0); 2374168404Spjd } 2375168404Spjd 2376168404Spjd error = 0; 2377168404Spjd os = zfsvfs->z_os; 2378168404Spjd offset = uio->uio_loffset; 2379168404Spjd prefetch = zp->z_zn_prefetch; 2380168404Spjd 2381168404Spjd /* 2382168404Spjd * Initialize the iterator cursor. 2383168404Spjd */ 2384168404Spjd if (offset <= 3) { 2385168404Spjd /* 2386168404Spjd * Start iteration from the beginning of the directory. 2387168404Spjd */ 2388168404Spjd zap_cursor_init(&zc, os, zp->z_id); 2389168404Spjd } else { 2390168404Spjd /* 2391168404Spjd * The offset is a serialized cursor. 2392168404Spjd */ 2393168404Spjd zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2394168404Spjd } 2395168404Spjd 2396168404Spjd /* 2397168404Spjd * Get space to change directory entries into fs independent format. 2398168404Spjd */ 2399168404Spjd iovp = uio->uio_iov; 2400168404Spjd bytes_wanted = iovp->iov_len; 2401168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2402168404Spjd bufsize = bytes_wanted; 2403168404Spjd outbuf = kmem_alloc(bufsize, KM_SLEEP); 2404168404Spjd odp = (struct dirent64 *)outbuf; 2405168404Spjd } else { 2406168404Spjd bufsize = bytes_wanted; 2407247187Smm outbuf = NULL; 2408168404Spjd odp = (struct dirent64 *)iovp->iov_base; 2409168404Spjd } 2410185029Spjd eodp = (struct edirent *)odp; 2411168404Spjd 2412169170Spjd if (ncookies != NULL) { 2413168404Spjd /* 2414168404Spjd * Minimum entry size is dirent size and 1 byte for a file name. 2415168404Spjd */ 2416168962Spjd ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2417219404Spjd cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2418219404Spjd *cookies = cooks; 2419168962Spjd *ncookies = ncooks; 2420168404Spjd } 2421185029Spjd /* 2422185029Spjd * If this VFS supports the system attribute view interface; and 2423185029Spjd * we're looking at an extended attribute directory; and we care 2424185029Spjd * about normalization conflicts on this vfs; then we must check 2425185029Spjd * for normalization conflicts with the sysattr name space. 2426185029Spjd */ 2427185029Spjd#ifdef TODO 2428185029Spjd check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2429185029Spjd (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2430185029Spjd (flags & V_RDDIR_ENTFLAGS); 2431185029Spjd#else 2432185029Spjd check_sysattrs = 0; 2433185029Spjd#endif 2434168404Spjd 2435168404Spjd /* 2436168404Spjd * Transform to file-system independent format 2437168404Spjd */ 2438168404Spjd outcount = 0; 2439168404Spjd while (outcount < bytes_wanted) { 2440168404Spjd ino64_t objnum; 2441168404Spjd ushort_t reclen; 2442219089Spjd off64_t *next = NULL; 2443168404Spjd 2444168404Spjd /* 2445168404Spjd * Special case `.', `..', and `.zfs'. 2446168404Spjd */ 2447168404Spjd if (offset == 0) { 2448168404Spjd (void) strcpy(zap.za_name, "."); 2449185029Spjd zap.za_normalization_conflict = 0; 2450168404Spjd objnum = zp->z_id; 2451169108Spjd type = DT_DIR; 2452168404Spjd } else if (offset == 1) { 2453168404Spjd (void) strcpy(zap.za_name, ".."); 2454185029Spjd zap.za_normalization_conflict = 0; 2455219089Spjd objnum = parent; 2456169108Spjd type = DT_DIR; 2457168404Spjd } else if (offset == 2 && zfs_show_ctldir(zp)) { 2458168404Spjd (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2459185029Spjd zap.za_normalization_conflict = 0; 2460168404Spjd objnum = ZFSCTL_INO_ROOT; 2461169108Spjd type = DT_DIR; 2462168404Spjd } else { 2463168404Spjd /* 2464168404Spjd * Grab next entry. 2465168404Spjd */ 2466168404Spjd if (error = zap_cursor_retrieve(&zc, &zap)) { 2467168404Spjd if ((*eofp = (error == ENOENT)) != 0) 2468168404Spjd break; 2469168404Spjd else 2470168404Spjd goto update; 2471168404Spjd } 2472168404Spjd 2473168404Spjd if (zap.za_integer_length != 8 || 2474168404Spjd zap.za_num_integers != 1) { 2475168404Spjd cmn_err(CE_WARN, "zap_readdir: bad directory " 2476168404Spjd "entry, obj = %lld, offset = %lld\n", 2477168404Spjd (u_longlong_t)zp->z_id, 2478168404Spjd (u_longlong_t)offset); 2479249195Smm error = SET_ERROR(ENXIO); 2480168404Spjd goto update; 2481168404Spjd } 2482168404Spjd 2483168404Spjd objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2484168404Spjd /* 2485168404Spjd * MacOS X can extract the object type here such as: 2486168404Spjd * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2487168404Spjd */ 2488168404Spjd type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2489185029Spjd 2490185029Spjd if (check_sysattrs && !zap.za_normalization_conflict) { 2491185029Spjd#ifdef TODO 2492185029Spjd zap.za_normalization_conflict = 2493185029Spjd xattr_sysattr_casechk(zap.za_name); 2494185029Spjd#else 2495185029Spjd panic("%s:%u: TODO", __func__, __LINE__); 2496185029Spjd#endif 2497185029Spjd } 2498168404Spjd } 2499168404Spjd 2500211932Smm if (flags & V_RDDIR_ACCFILTER) { 2501211932Smm /* 2502211932Smm * If we have no access at all, don't include 2503211932Smm * this entry in the returned information 2504211932Smm */ 2505211932Smm znode_t *ezp; 2506211932Smm if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2507211932Smm goto skip_entry; 2508211932Smm if (!zfs_has_access(ezp, cr)) { 2509303970Savg vrele(ZTOV(ezp)); 2510211932Smm goto skip_entry; 2511211932Smm } 2512303970Savg vrele(ZTOV(ezp)); 2513211932Smm } 2514211932Smm 2515185029Spjd if (flags & V_RDDIR_ENTFLAGS) 2516185029Spjd reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2517185029Spjd else 2518185029Spjd reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2519185029Spjd 2520168404Spjd /* 2521168404Spjd * Will this entry fit in the buffer? 2522168404Spjd */ 2523168404Spjd if (outcount + reclen > bufsize) { 2524168404Spjd /* 2525168404Spjd * Did we manage to fit anything in the buffer? 2526168404Spjd */ 2527168404Spjd if (!outcount) { 2528249195Smm error = SET_ERROR(EINVAL); 2529168404Spjd goto update; 2530168404Spjd } 2531168404Spjd break; 2532168404Spjd } 2533185029Spjd if (flags & V_RDDIR_ENTFLAGS) { 2534185029Spjd /* 2535185029Spjd * Add extended flag entry: 2536185029Spjd */ 2537185029Spjd eodp->ed_ino = objnum; 2538185029Spjd eodp->ed_reclen = reclen; 2539185029Spjd /* NOTE: ed_off is the offset for the *next* entry */ 2540185029Spjd next = &(eodp->ed_off); 2541185029Spjd eodp->ed_eflags = zap.za_normalization_conflict ? 2542185029Spjd ED_CASE_CONFLICT : 0; 2543185029Spjd (void) strncpy(eodp->ed_name, zap.za_name, 2544185029Spjd EDIRENT_NAMELEN(reclen)); 2545185029Spjd eodp = (edirent_t *)((intptr_t)eodp + reclen); 2546185029Spjd } else { 2547185029Spjd /* 2548185029Spjd * Add normal entry: 2549185029Spjd */ 2550185029Spjd odp->d_ino = objnum; 2551185029Spjd odp->d_reclen = reclen; 2552185029Spjd odp->d_namlen = strlen(zap.za_name); 2553185029Spjd (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2554185029Spjd odp->d_type = type; 2555341074Smarkj dirent_terminate(odp); 2556185029Spjd odp = (dirent64_t *)((intptr_t)odp + reclen); 2557185029Spjd } 2558168404Spjd outcount += reclen; 2559168404Spjd 2560168404Spjd ASSERT(outcount <= bufsize); 2561168404Spjd 2562168404Spjd /* Prefetch znode */ 2563168404Spjd if (prefetch) 2564286705Smav dmu_prefetch(os, objnum, 0, 0, 0, 2565286705Smav ZIO_PRIORITY_SYNC_READ); 2566168404Spjd 2567211932Smm skip_entry: 2568168404Spjd /* 2569168404Spjd * Move to the next entry, fill in the previous offset. 2570168404Spjd */ 2571168404Spjd if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2572168404Spjd zap_cursor_advance(&zc); 2573168404Spjd offset = zap_cursor_serialize(&zc); 2574168404Spjd } else { 2575168404Spjd offset += 1; 2576168404Spjd } 2577219404Spjd 2578219404Spjd if (cooks != NULL) { 2579219404Spjd *cooks++ = offset; 2580219404Spjd ncooks--; 2581219404Spjd KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2582219404Spjd } 2583168404Spjd } 2584168404Spjd zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2585168404Spjd 2586168404Spjd /* Subtract unused cookies */ 2587168962Spjd if (ncookies != NULL) 2588168962Spjd *ncookies -= ncooks; 2589168404Spjd 2590168404Spjd if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2591168404Spjd iovp->iov_base += outcount; 2592168404Spjd iovp->iov_len -= outcount; 2593168404Spjd uio->uio_resid -= outcount; 2594168404Spjd } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2595168404Spjd /* 2596168404Spjd * Reset the pointer. 2597168404Spjd */ 2598168404Spjd offset = uio->uio_loffset; 2599168404Spjd } 2600168404Spjd 2601168404Spjdupdate: 2602168404Spjd zap_cursor_fini(&zc); 2603168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2604168404Spjd kmem_free(outbuf, bufsize); 2605168404Spjd 2606168404Spjd if (error == ENOENT) 2607168404Spjd error = 0; 2608168404Spjd 2609168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2610168404Spjd 2611168404Spjd uio->uio_loffset = offset; 2612168404Spjd ZFS_EXIT(zfsvfs); 2613169107Spjd if (error != 0 && cookies != NULL) { 2614168962Spjd free(*cookies, M_TEMP); 2615168962Spjd *cookies = NULL; 2616168962Spjd *ncookies = 0; 2617168404Spjd } 2618168404Spjd return (error); 2619168404Spjd} 2620168404Spjd 2621185029Spjdulong_t zfs_fsync_sync_cnt = 4; 2622185029Spjd 2623168404Spjdstatic int 2624185029Spjdzfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2625168404Spjd{ 2626168962Spjd znode_t *zp = VTOZ(vp); 2627168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2628168404Spjd 2629185029Spjd (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2630185029Spjd 2631219089Spjd if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 2632219089Spjd ZFS_ENTER(zfsvfs); 2633219089Spjd ZFS_VERIFY_ZP(zp); 2634219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 2635219089Spjd ZFS_EXIT(zfsvfs); 2636219089Spjd } 2637168404Spjd return (0); 2638168404Spjd} 2639168404Spjd 2640185029Spjd 2641168404Spjd/* 2642168404Spjd * Get the requested file attributes and place them in the provided 2643168404Spjd * vattr structure. 2644168404Spjd * 2645168404Spjd * IN: vp - vnode of file. 2646168404Spjd * vap - va_mask identifies requested attributes. 2647185029Spjd * If AT_XVATTR set, then optional attrs are requested 2648185029Spjd * flags - ATTR_NOACLCHECK (CIFS server context) 2649168404Spjd * cr - credentials of caller. 2650185029Spjd * ct - caller context 2651168404Spjd * 2652168404Spjd * OUT: vap - attribute values. 2653168404Spjd * 2654251631Sdelphij * RETURN: 0 (always succeeds). 2655168404Spjd */ 2656168404Spjd/* ARGSUSED */ 2657168404Spjdstatic int 2658185029Spjdzfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2659185029Spjd caller_context_t *ct) 2660168404Spjd{ 2661168962Spjd znode_t *zp = VTOZ(vp); 2662168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2663185029Spjd int error = 0; 2664168962Spjd uint32_t blksize; 2665168962Spjd u_longlong_t nblocks; 2666185029Spjd uint64_t links; 2667224251Sdelphij uint64_t mtime[2], ctime[2], crtime[2], rdev; 2668185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2669185029Spjd xoptattr_t *xoap = NULL; 2670185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2671224251Sdelphij sa_bulk_attr_t bulk[4]; 2672219089Spjd int count = 0; 2673168404Spjd 2674168404Spjd ZFS_ENTER(zfsvfs); 2675185029Spjd ZFS_VERIFY_ZP(zp); 2676168404Spjd 2677219089Spjd zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2678219089Spjd 2679219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 2680219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 2681243807Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 2682224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2683224251Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 2684224251Sdelphij &rdev, 8); 2685219089Spjd 2686219089Spjd if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 2687219089Spjd ZFS_EXIT(zfsvfs); 2688219089Spjd return (error); 2689219089Spjd } 2690219089Spjd 2691168404Spjd /* 2692185029Spjd * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2693185029Spjd * Also, if we are the owner don't bother, since owner should 2694185029Spjd * always be allowed to read basic attributes of file. 2695185029Spjd */ 2696219089Spjd if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2697219089Spjd (vap->va_uid != crgetuid(cr))) { 2698185029Spjd if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2699185029Spjd skipaclchk, cr)) { 2700185029Spjd ZFS_EXIT(zfsvfs); 2701185029Spjd return (error); 2702185029Spjd } 2703185029Spjd } 2704185029Spjd 2705185029Spjd /* 2706168404Spjd * Return all attributes. It's cheaper to provide the answer 2707168404Spjd * than to determine whether we were asked the question. 2708168404Spjd */ 2709168404Spjd 2710219089Spjd vap->va_type = IFTOVT(zp->z_mode); 2711219089Spjd vap->va_mode = zp->z_mode & ~S_IFMT; 2712277300Ssmh#ifdef illumos 2713224252Sdelphij vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2714224252Sdelphij#else 2715224252Sdelphij vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 2716224252Sdelphij#endif 2717168404Spjd vap->va_nodeid = zp->z_id; 2718185029Spjd if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2719219089Spjd links = zp->z_links + 1; 2720185029Spjd else 2721219089Spjd links = zp->z_links; 2722229425Sdim vap->va_nlink = MIN(links, LINK_MAX); /* nlink_t limit! */ 2723219089Spjd vap->va_size = zp->z_size; 2724277300Ssmh#ifdef illumos 2725224252Sdelphij vap->va_rdev = vp->v_rdev; 2726224252Sdelphij#else 2727224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2728224251Sdelphij vap->va_rdev = zfs_cmpldev(rdev); 2729224252Sdelphij#endif 2730168404Spjd vap->va_seq = zp->z_seq; 2731168404Spjd vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2732272467Saraujo vap->va_filerev = zp->z_seq; 2733168404Spjd 2734185029Spjd /* 2735185029Spjd * Add in any requested optional attributes and the create time. 2736185029Spjd * Also set the corresponding bits in the returned attribute bitmap. 2737185029Spjd */ 2738185029Spjd if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2739185029Spjd if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2740185029Spjd xoap->xoa_archive = 2741219089Spjd ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2742185029Spjd XVA_SET_RTN(xvap, XAT_ARCHIVE); 2743185029Spjd } 2744185029Spjd 2745185029Spjd if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2746185029Spjd xoap->xoa_readonly = 2747219089Spjd ((zp->z_pflags & ZFS_READONLY) != 0); 2748185029Spjd XVA_SET_RTN(xvap, XAT_READONLY); 2749185029Spjd } 2750185029Spjd 2751185029Spjd if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2752185029Spjd xoap->xoa_system = 2753219089Spjd ((zp->z_pflags & ZFS_SYSTEM) != 0); 2754185029Spjd XVA_SET_RTN(xvap, XAT_SYSTEM); 2755185029Spjd } 2756185029Spjd 2757185029Spjd if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2758185029Spjd xoap->xoa_hidden = 2759219089Spjd ((zp->z_pflags & ZFS_HIDDEN) != 0); 2760185029Spjd XVA_SET_RTN(xvap, XAT_HIDDEN); 2761185029Spjd } 2762185029Spjd 2763185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2764185029Spjd xoap->xoa_nounlink = 2765219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2766185029Spjd XVA_SET_RTN(xvap, XAT_NOUNLINK); 2767185029Spjd } 2768185029Spjd 2769185029Spjd if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2770185029Spjd xoap->xoa_immutable = 2771219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2772185029Spjd XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2773185029Spjd } 2774185029Spjd 2775185029Spjd if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2776185029Spjd xoap->xoa_appendonly = 2777219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2778185029Spjd XVA_SET_RTN(xvap, XAT_APPENDONLY); 2779185029Spjd } 2780185029Spjd 2781185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2782185029Spjd xoap->xoa_nodump = 2783219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0); 2784185029Spjd XVA_SET_RTN(xvap, XAT_NODUMP); 2785185029Spjd } 2786185029Spjd 2787185029Spjd if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2788185029Spjd xoap->xoa_opaque = 2789219089Spjd ((zp->z_pflags & ZFS_OPAQUE) != 0); 2790185029Spjd XVA_SET_RTN(xvap, XAT_OPAQUE); 2791185029Spjd } 2792185029Spjd 2793185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2794185029Spjd xoap->xoa_av_quarantined = 2795219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2796185029Spjd XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2797185029Spjd } 2798185029Spjd 2799185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2800185029Spjd xoap->xoa_av_modified = 2801219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2802185029Spjd XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2803185029Spjd } 2804185029Spjd 2805185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2806219089Spjd vp->v_type == VREG) { 2807219089Spjd zfs_sa_get_scanstamp(zp, xvap); 2808185029Spjd } 2809185029Spjd 2810219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2811219089Spjd xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 2812219089Spjd XVA_SET_RTN(xvap, XAT_REPARSE); 2813219089Spjd } 2814219089Spjd if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 2815219089Spjd xoap->xoa_generation = zp->z_gen; 2816219089Spjd XVA_SET_RTN(xvap, XAT_GEN); 2817219089Spjd } 2818219089Spjd 2819219089Spjd if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2820219089Spjd xoap->xoa_offline = 2821219089Spjd ((zp->z_pflags & ZFS_OFFLINE) != 0); 2822219089Spjd XVA_SET_RTN(xvap, XAT_OFFLINE); 2823219089Spjd } 2824219089Spjd 2825219089Spjd if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2826219089Spjd xoap->xoa_sparse = 2827219089Spjd ((zp->z_pflags & ZFS_SPARSE) != 0); 2828219089Spjd XVA_SET_RTN(xvap, XAT_SPARSE); 2829219089Spjd } 2830185029Spjd } 2831185029Spjd 2832219089Spjd ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 2833219089Spjd ZFS_TIME_DECODE(&vap->va_mtime, mtime); 2834219089Spjd ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2835219089Spjd ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 2836168404Spjd 2837168404Spjd 2838219089Spjd sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 2839168404Spjd vap->va_blksize = blksize; 2840168404Spjd vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2841168404Spjd 2842168404Spjd if (zp->z_blksz == 0) { 2843168404Spjd /* 2844168404Spjd * Block size hasn't been set; suggest maximal I/O transfers. 2845168404Spjd */ 2846168404Spjd vap->va_blksize = zfsvfs->z_max_blksz; 2847168404Spjd } 2848168404Spjd 2849168404Spjd ZFS_EXIT(zfsvfs); 2850168404Spjd return (0); 2851168404Spjd} 2852168404Spjd 2853168404Spjd/* 2854168404Spjd * Set the file attributes to the values contained in the 2855168404Spjd * vattr structure. 2856168404Spjd * 2857168404Spjd * IN: vp - vnode of file to be modified. 2858168404Spjd * vap - new attribute values. 2859185029Spjd * If AT_XVATTR set, then optional attrs are being set 2860168404Spjd * flags - ATTR_UTIME set if non-default time values provided. 2861185029Spjd * - ATTR_NOACLCHECK (CIFS context only). 2862168404Spjd * cr - credentials of caller. 2863185029Spjd * ct - caller context 2864168404Spjd * 2865251631Sdelphij * RETURN: 0 on success, error code on failure. 2866168404Spjd * 2867168404Spjd * Timestamps: 2868168404Spjd * vp - ctime updated, mtime updated if size changed. 2869168404Spjd */ 2870168404Spjd/* ARGSUSED */ 2871168404Spjdstatic int 2872168962Spjdzfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2873251631Sdelphij caller_context_t *ct) 2874168404Spjd{ 2875185029Spjd znode_t *zp = VTOZ(vp); 2876168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2877185029Spjd zilog_t *zilog; 2878168404Spjd dmu_tx_t *tx; 2879168404Spjd vattr_t oldva; 2880209962Smm xvattr_t tmpxvattr; 2881168962Spjd uint_t mask = vap->va_mask; 2882247187Smm uint_t saved_mask = 0; 2883197831Spjd uint64_t saved_mode; 2884168404Spjd int trim_mask = 0; 2885168404Spjd uint64_t new_mode; 2886209962Smm uint64_t new_uid, new_gid; 2887219089Spjd uint64_t xattr_obj; 2888219089Spjd uint64_t mtime[2], ctime[2]; 2889168404Spjd znode_t *attrzp; 2890168404Spjd int need_policy = FALSE; 2891219089Spjd int err, err2; 2892185029Spjd zfs_fuid_info_t *fuidp = NULL; 2893185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2894185029Spjd xoptattr_t *xoap; 2895219089Spjd zfs_acl_t *aclp; 2896185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2897219089Spjd boolean_t fuid_dirtied = B_FALSE; 2898219089Spjd sa_bulk_attr_t bulk[7], xattr_bulk[7]; 2899219089Spjd int count = 0, xattr_count = 0; 2900168404Spjd 2901168404Spjd if (mask == 0) 2902168404Spjd return (0); 2903168404Spjd 2904168962Spjd if (mask & AT_NOSET) 2905249195Smm return (SET_ERROR(EINVAL)); 2906168962Spjd 2907185029Spjd ZFS_ENTER(zfsvfs); 2908185029Spjd ZFS_VERIFY_ZP(zp); 2909185029Spjd 2910185029Spjd zilog = zfsvfs->z_log; 2911185029Spjd 2912185029Spjd /* 2913185029Spjd * Make sure that if we have ephemeral uid/gid or xvattr specified 2914185029Spjd * that file system is at proper version level 2915185029Spjd */ 2916185029Spjd 2917185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2918185029Spjd (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2919185029Spjd ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 2920185029Spjd (mask & AT_XVATTR))) { 2921185029Spjd ZFS_EXIT(zfsvfs); 2922249195Smm return (SET_ERROR(EINVAL)); 2923185029Spjd } 2924185029Spjd 2925185029Spjd if (mask & AT_SIZE && vp->v_type == VDIR) { 2926185029Spjd ZFS_EXIT(zfsvfs); 2927249195Smm return (SET_ERROR(EISDIR)); 2928185029Spjd } 2929168404Spjd 2930185029Spjd if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 2931185029Spjd ZFS_EXIT(zfsvfs); 2932249195Smm return (SET_ERROR(EINVAL)); 2933185029Spjd } 2934168404Spjd 2935185029Spjd /* 2936185029Spjd * If this is an xvattr_t, then get a pointer to the structure of 2937185029Spjd * optional attributes. If this is NULL, then we have a vattr_t. 2938185029Spjd */ 2939185029Spjd xoap = xva_getxoptattr(xvap); 2940168404Spjd 2941209962Smm xva_init(&tmpxvattr); 2942209962Smm 2943185029Spjd /* 2944185029Spjd * Immutable files can only alter immutable bit and atime 2945185029Spjd */ 2946219089Spjd if ((zp->z_pflags & ZFS_IMMUTABLE) && 2947185029Spjd ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 2948185029Spjd ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 2949185029Spjd ZFS_EXIT(zfsvfs); 2950249195Smm return (SET_ERROR(EPERM)); 2951185029Spjd } 2952185029Spjd 2953321579Smav /* 2954321579Smav * Note: ZFS_READONLY is handled in zfs_zaccess_common. 2955321579Smav */ 2956185029Spjd 2957185029Spjd /* 2958185029Spjd * Verify timestamps doesn't overflow 32 bits. 2959185029Spjd * ZFS can handle large timestamps, but 32bit syscalls can't 2960185029Spjd * handle times greater than 2039. This check should be removed 2961185029Spjd * once large timestamps are fully supported. 2962185029Spjd */ 2963185029Spjd if (mask & (AT_ATIME | AT_MTIME)) { 2964185029Spjd if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 2965185029Spjd ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 2966185029Spjd ZFS_EXIT(zfsvfs); 2967249195Smm return (SET_ERROR(EOVERFLOW)); 2968185029Spjd } 2969185029Spjd } 2970316391Sasomers if (xoap && (mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME) && 2971316391Sasomers TIMESPEC_OVERFLOW(&vap->va_birthtime)) { 2972316391Sasomers ZFS_EXIT(zfsvfs); 2973316391Sasomers return (SET_ERROR(EOVERFLOW)); 2974316391Sasomers } 2975185029Spjd 2976168404Spjd attrzp = NULL; 2977219089Spjd aclp = NULL; 2978168404Spjd 2979211932Smm /* Can this be moved to before the top label? */ 2980168404Spjd if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2981168404Spjd ZFS_EXIT(zfsvfs); 2982249195Smm return (SET_ERROR(EROFS)); 2983168404Spjd } 2984168404Spjd 2985168404Spjd /* 2986168404Spjd * First validate permissions 2987168404Spjd */ 2988168404Spjd 2989168404Spjd if (mask & AT_SIZE) { 2990168404Spjd /* 2991168404Spjd * XXX - Note, we are not providing any open 2992168404Spjd * mode flags here (like FNDELAY), so we may 2993168404Spjd * block if there are locks present... this 2994168404Spjd * should be addressed in openat(). 2995168404Spjd */ 2996185029Spjd /* XXX - would it be OK to generate a log record here? */ 2997185029Spjd err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 2998168404Spjd if (err) { 2999168404Spjd ZFS_EXIT(zfsvfs); 3000168404Spjd return (err); 3001168404Spjd } 3002168404Spjd } 3003168404Spjd 3004185029Spjd if (mask & (AT_ATIME|AT_MTIME) || 3005185029Spjd ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 3006185029Spjd XVA_ISSET_REQ(xvap, XAT_READONLY) || 3007185029Spjd XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 3008219089Spjd XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 3009219089Spjd XVA_ISSET_REQ(xvap, XAT_SPARSE) || 3010185029Spjd XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 3011219089Spjd XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 3012185029Spjd need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 3013185029Spjd skipaclchk, cr); 3014219089Spjd } 3015168404Spjd 3016168404Spjd if (mask & (AT_UID|AT_GID)) { 3017168404Spjd int idmask = (mask & (AT_UID|AT_GID)); 3018168404Spjd int take_owner; 3019168404Spjd int take_group; 3020168404Spjd 3021168404Spjd /* 3022168404Spjd * NOTE: even if a new mode is being set, 3023168404Spjd * we may clear S_ISUID/S_ISGID bits. 3024168404Spjd */ 3025168404Spjd 3026168404Spjd if (!(mask & AT_MODE)) 3027219089Spjd vap->va_mode = zp->z_mode; 3028168404Spjd 3029168404Spjd /* 3030168404Spjd * Take ownership or chgrp to group we are a member of 3031168404Spjd */ 3032168404Spjd 3033168404Spjd take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 3034185029Spjd take_group = (mask & AT_GID) && 3035185029Spjd zfs_groupmember(zfsvfs, vap->va_gid, cr); 3036168404Spjd 3037168404Spjd /* 3038168404Spjd * If both AT_UID and AT_GID are set then take_owner and 3039168404Spjd * take_group must both be set in order to allow taking 3040168404Spjd * ownership. 3041168404Spjd * 3042168404Spjd * Otherwise, send the check through secpolicy_vnode_setattr() 3043168404Spjd * 3044168404Spjd */ 3045168404Spjd 3046168404Spjd if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 3047168404Spjd ((idmask == AT_UID) && take_owner) || 3048168404Spjd ((idmask == AT_GID) && take_group)) { 3049185029Spjd if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 3050185029Spjd skipaclchk, cr) == 0) { 3051168404Spjd /* 3052168404Spjd * Remove setuid/setgid for non-privileged users 3053168404Spjd */ 3054185029Spjd secpolicy_setid_clear(vap, vp, cr); 3055168404Spjd trim_mask = (mask & (AT_UID|AT_GID)); 3056168404Spjd } else { 3057168404Spjd need_policy = TRUE; 3058168404Spjd } 3059168404Spjd } else { 3060168404Spjd need_policy = TRUE; 3061168404Spjd } 3062168404Spjd } 3063168404Spjd 3064219089Spjd oldva.va_mode = zp->z_mode; 3065185029Spjd zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 3066185029Spjd if (mask & AT_XVATTR) { 3067209962Smm /* 3068209962Smm * Update xvattr mask to include only those attributes 3069209962Smm * that are actually changing. 3070209962Smm * 3071209962Smm * the bits will be restored prior to actually setting 3072209962Smm * the attributes so the caller thinks they were set. 3073209962Smm */ 3074209962Smm if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3075209962Smm if (xoap->xoa_appendonly != 3076219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 3077209962Smm need_policy = TRUE; 3078209962Smm } else { 3079209962Smm XVA_CLR_REQ(xvap, XAT_APPENDONLY); 3080209962Smm XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 3081209962Smm } 3082209962Smm } 3083209962Smm 3084209962Smm if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3085209962Smm if (xoap->xoa_nounlink != 3086219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 3087209962Smm need_policy = TRUE; 3088209962Smm } else { 3089209962Smm XVA_CLR_REQ(xvap, XAT_NOUNLINK); 3090209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 3091209962Smm } 3092209962Smm } 3093209962Smm 3094209962Smm if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3095209962Smm if (xoap->xoa_immutable != 3096219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 3097209962Smm need_policy = TRUE; 3098209962Smm } else { 3099209962Smm XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 3100209962Smm XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 3101209962Smm } 3102209962Smm } 3103209962Smm 3104209962Smm if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3105209962Smm if (xoap->xoa_nodump != 3106219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0)) { 3107209962Smm need_policy = TRUE; 3108209962Smm } else { 3109209962Smm XVA_CLR_REQ(xvap, XAT_NODUMP); 3110209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 3111209962Smm } 3112209962Smm } 3113209962Smm 3114209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3115209962Smm if (xoap->xoa_av_modified != 3116219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 3117209962Smm need_policy = TRUE; 3118209962Smm } else { 3119209962Smm XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 3120209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 3121209962Smm } 3122209962Smm } 3123209962Smm 3124209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3125209962Smm if ((vp->v_type != VREG && 3126209962Smm xoap->xoa_av_quarantined) || 3127209962Smm xoap->xoa_av_quarantined != 3128219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 3129209962Smm need_policy = TRUE; 3130209962Smm } else { 3131209962Smm XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 3132209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 3133209962Smm } 3134209962Smm } 3135209962Smm 3136219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3137219089Spjd ZFS_EXIT(zfsvfs); 3138249195Smm return (SET_ERROR(EPERM)); 3139219089Spjd } 3140219089Spjd 3141209962Smm if (need_policy == FALSE && 3142209962Smm (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3143209962Smm XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3144185029Spjd need_policy = TRUE; 3145185029Spjd } 3146185029Spjd } 3147185029Spjd 3148168404Spjd if (mask & AT_MODE) { 3149185029Spjd if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3150168962Spjd err = secpolicy_setid_setsticky_clear(vp, vap, 3151168962Spjd &oldva, cr); 3152168962Spjd if (err) { 3153168962Spjd ZFS_EXIT(zfsvfs); 3154168962Spjd return (err); 3155168962Spjd } 3156168404Spjd trim_mask |= AT_MODE; 3157168404Spjd } else { 3158168404Spjd need_policy = TRUE; 3159168404Spjd } 3160168404Spjd } 3161168404Spjd 3162168404Spjd if (need_policy) { 3163168404Spjd /* 3164168404Spjd * If trim_mask is set then take ownership 3165168404Spjd * has been granted or write_acl is present and user 3166168404Spjd * has the ability to modify mode. In that case remove 3167168404Spjd * UID|GID and or MODE from mask so that 3168168404Spjd * secpolicy_vnode_setattr() doesn't revoke it. 3169168404Spjd */ 3170168404Spjd 3171168404Spjd if (trim_mask) { 3172168404Spjd saved_mask = vap->va_mask; 3173168404Spjd vap->va_mask &= ~trim_mask; 3174197831Spjd if (trim_mask & AT_MODE) { 3175197831Spjd /* 3176197831Spjd * Save the mode, as secpolicy_vnode_setattr() 3177197831Spjd * will overwrite it with ova.va_mode. 3178197831Spjd */ 3179197831Spjd saved_mode = vap->va_mode; 3180197831Spjd } 3181168404Spjd } 3182168404Spjd err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3183185029Spjd (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3184168404Spjd if (err) { 3185168404Spjd ZFS_EXIT(zfsvfs); 3186168404Spjd return (err); 3187168404Spjd } 3188168404Spjd 3189197831Spjd if (trim_mask) { 3190168404Spjd vap->va_mask |= saved_mask; 3191197831Spjd if (trim_mask & AT_MODE) { 3192197831Spjd /* 3193197831Spjd * Recover the mode after 3194197831Spjd * secpolicy_vnode_setattr(). 3195197831Spjd */ 3196197831Spjd vap->va_mode = saved_mode; 3197197831Spjd } 3198197831Spjd } 3199168404Spjd } 3200168404Spjd 3201168404Spjd /* 3202168404Spjd * secpolicy_vnode_setattr, or take ownership may have 3203168404Spjd * changed va_mask 3204168404Spjd */ 3205168404Spjd mask = vap->va_mask; 3206168404Spjd 3207219089Spjd if ((mask & (AT_UID | AT_GID))) { 3208219089Spjd err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 3209219089Spjd &xattr_obj, sizeof (xattr_obj)); 3210168404Spjd 3211219089Spjd if (err == 0 && xattr_obj) { 3212219089Spjd err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 3213306818Savg if (err == 0) { 3214306818Savg err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE); 3215306818Savg if (err != 0) 3216306818Savg vrele(ZTOV(attrzp)); 3217306818Savg } 3218209962Smm if (err) 3219219089Spjd goto out2; 3220168404Spjd } 3221209962Smm if (mask & AT_UID) { 3222209962Smm new_uid = zfs_fuid_create(zfsvfs, 3223209962Smm (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3224219089Spjd if (new_uid != zp->z_uid && 3225219089Spjd zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 3226219089Spjd if (attrzp) 3227306818Savg vput(ZTOV(attrzp)); 3228249195Smm err = SET_ERROR(EDQUOT); 3229219089Spjd goto out2; 3230209962Smm } 3231209962Smm } 3232209962Smm 3233209962Smm if (mask & AT_GID) { 3234209962Smm new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3235209962Smm cr, ZFS_GROUP, &fuidp); 3236219089Spjd if (new_gid != zp->z_gid && 3237219089Spjd zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 3238219089Spjd if (attrzp) 3239306818Savg vput(ZTOV(attrzp)); 3240249195Smm err = SET_ERROR(EDQUOT); 3241219089Spjd goto out2; 3242209962Smm } 3243209962Smm } 3244219089Spjd } 3245219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 3246219089Spjd 3247219089Spjd if (mask & AT_MODE) { 3248219089Spjd uint64_t pmode = zp->z_mode; 3249219089Spjd uint64_t acl_obj; 3250219089Spjd new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3251219089Spjd 3252243560Smm if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 3253243560Smm !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3254249195Smm err = SET_ERROR(EPERM); 3255243560Smm goto out; 3256243560Smm } 3257243560Smm 3258224174Smm if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3259224174Smm goto out; 3260219089Spjd 3261219089Spjd if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 3262219089Spjd /* 3263219089Spjd * Are we upgrading ACL from old V0 format 3264219089Spjd * to V1 format? 3265219089Spjd */ 3266219089Spjd if (zfsvfs->z_version >= ZPL_VERSION_FUID && 3267219089Spjd zfs_znode_acl_version(zp) == 3268219089Spjd ZFS_ACL_VERSION_INITIAL) { 3269219089Spjd dmu_tx_hold_free(tx, acl_obj, 0, 3270219089Spjd DMU_OBJECT_END); 3271219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3272219089Spjd 0, aclp->z_acl_bytes); 3273209962Smm } else { 3274219089Spjd dmu_tx_hold_write(tx, acl_obj, 0, 3275219089Spjd aclp->z_acl_bytes); 3276209962Smm } 3277219089Spjd } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3278219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3279219089Spjd 0, aclp->z_acl_bytes); 3280209962Smm } 3281219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3282219089Spjd } else { 3283219089Spjd if ((mask & AT_XVATTR) && 3284219089Spjd XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3285219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3286219089Spjd else 3287219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3288168404Spjd } 3289168404Spjd 3290219089Spjd if (attrzp) { 3291219089Spjd dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3292219089Spjd } 3293219089Spjd 3294219089Spjd fuid_dirtied = zfsvfs->z_fuid_dirty; 3295219089Spjd if (fuid_dirtied) 3296219089Spjd zfs_fuid_txhold(zfsvfs, tx); 3297219089Spjd 3298219089Spjd zfs_sa_upgrade_txholds(tx, zp); 3299219089Spjd 3300258720Savg err = dmu_tx_assign(tx, TXG_WAIT); 3301258720Savg if (err) 3302209962Smm goto out; 3303168404Spjd 3304219089Spjd count = 0; 3305168404Spjd /* 3306168404Spjd * Set each attribute requested. 3307168404Spjd * We group settings according to the locks they need to acquire. 3308168404Spjd * 3309168404Spjd * Note: you cannot set ctime directly, although it will be 3310168404Spjd * updated as a side-effect of calling this function. 3311168404Spjd */ 3312168404Spjd 3313219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3314219089Spjd mutex_enter(&zp->z_acl_lock); 3315168404Spjd 3316219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3317219089Spjd &zp->z_pflags, sizeof (zp->z_pflags)); 3318219089Spjd 3319219089Spjd if (attrzp) { 3320219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3321219089Spjd mutex_enter(&attrzp->z_acl_lock); 3322219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3323219089Spjd SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3324219089Spjd sizeof (attrzp->z_pflags)); 3325219089Spjd } 3326219089Spjd 3327219089Spjd if (mask & (AT_UID|AT_GID)) { 3328219089Spjd 3329219089Spjd if (mask & AT_UID) { 3330219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 3331219089Spjd &new_uid, sizeof (new_uid)); 3332219089Spjd zp->z_uid = new_uid; 3333219089Spjd if (attrzp) { 3334219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3335219089Spjd SA_ZPL_UID(zfsvfs), NULL, &new_uid, 3336219089Spjd sizeof (new_uid)); 3337219089Spjd attrzp->z_uid = new_uid; 3338219089Spjd } 3339219089Spjd } 3340219089Spjd 3341219089Spjd if (mask & AT_GID) { 3342219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 3343219089Spjd NULL, &new_gid, sizeof (new_gid)); 3344219089Spjd zp->z_gid = new_gid; 3345219089Spjd if (attrzp) { 3346219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3347219089Spjd SA_ZPL_GID(zfsvfs), NULL, &new_gid, 3348219089Spjd sizeof (new_gid)); 3349219089Spjd attrzp->z_gid = new_gid; 3350219089Spjd } 3351219089Spjd } 3352219089Spjd if (!(mask & AT_MODE)) { 3353219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 3354219089Spjd NULL, &new_mode, sizeof (new_mode)); 3355219089Spjd new_mode = zp->z_mode; 3356219089Spjd } 3357219089Spjd err = zfs_acl_chown_setattr(zp); 3358219089Spjd ASSERT(err == 0); 3359219089Spjd if (attrzp) { 3360219089Spjd err = zfs_acl_chown_setattr(attrzp); 3361219089Spjd ASSERT(err == 0); 3362219089Spjd } 3363219089Spjd } 3364219089Spjd 3365168404Spjd if (mask & AT_MODE) { 3366219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 3367219089Spjd &new_mode, sizeof (new_mode)); 3368219089Spjd zp->z_mode = new_mode; 3369219089Spjd ASSERT3U((uintptr_t)aclp, !=, 0); 3370209962Smm err = zfs_aclset_common(zp, aclp, cr, tx); 3371240415Smm ASSERT0(err); 3372219089Spjd if (zp->z_acl_cached) 3373219089Spjd zfs_acl_free(zp->z_acl_cached); 3374211932Smm zp->z_acl_cached = aclp; 3375211932Smm aclp = NULL; 3376168404Spjd } 3377168404Spjd 3378168404Spjd 3379219089Spjd if (mask & AT_ATIME) { 3380219089Spjd ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 3381219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 3382219089Spjd &zp->z_atime, sizeof (zp->z_atime)); 3383168404Spjd } 3384168404Spjd 3385219089Spjd if (mask & AT_MTIME) { 3386219089Spjd ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 3387219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 3388219089Spjd mtime, sizeof (mtime)); 3389168404Spjd } 3390168404Spjd 3391185029Spjd /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3392219089Spjd if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3393219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3394219089Spjd NULL, mtime, sizeof (mtime)); 3395219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3396219089Spjd &ctime, sizeof (ctime)); 3397219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 3398219089Spjd B_TRUE); 3399219089Spjd } else if (mask != 0) { 3400219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3401219089Spjd &ctime, sizeof (ctime)); 3402219089Spjd zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 3403219089Spjd B_TRUE); 3404219089Spjd if (attrzp) { 3405219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3406219089Spjd SA_ZPL_CTIME(zfsvfs), NULL, 3407219089Spjd &ctime, sizeof (ctime)); 3408219089Spjd zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 3409219089Spjd mtime, ctime, B_TRUE); 3410219089Spjd } 3411219089Spjd } 3412185029Spjd /* 3413185029Spjd * Do this after setting timestamps to prevent timestamp 3414185029Spjd * update from toggling bit 3415185029Spjd */ 3416168404Spjd 3417185029Spjd if (xoap && (mask & AT_XVATTR)) { 3418209962Smm 3419316391Sasomers if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) 3420316391Sasomers xoap->xoa_createtime = vap->va_birthtime; 3421209962Smm /* 3422209962Smm * restore trimmed off masks 3423209962Smm * so that return masks can be set for caller. 3424209962Smm */ 3425209962Smm 3426209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3427209962Smm XVA_SET_REQ(xvap, XAT_APPENDONLY); 3428209962Smm } 3429209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3430209962Smm XVA_SET_REQ(xvap, XAT_NOUNLINK); 3431209962Smm } 3432209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3433209962Smm XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3434209962Smm } 3435209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3436209962Smm XVA_SET_REQ(xvap, XAT_NODUMP); 3437209962Smm } 3438209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3439209962Smm XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3440209962Smm } 3441209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3442209962Smm XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3443209962Smm } 3444209962Smm 3445219089Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3446185029Spjd ASSERT(vp->v_type == VREG); 3447185029Spjd 3448219089Spjd zfs_xvattr_set(zp, xvap, tx); 3449185029Spjd } 3450185029Spjd 3451209962Smm if (fuid_dirtied) 3452209962Smm zfs_fuid_sync(zfsvfs, tx); 3453209962Smm 3454168404Spjd if (mask != 0) 3455185029Spjd zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3456168404Spjd 3457219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3458219089Spjd mutex_exit(&zp->z_acl_lock); 3459168404Spjd 3460219089Spjd if (attrzp) { 3461219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3462219089Spjd mutex_exit(&attrzp->z_acl_lock); 3463219089Spjd } 3464209962Smmout: 3465219089Spjd if (err == 0 && attrzp) { 3466219089Spjd err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 3467219089Spjd xattr_count, tx); 3468219089Spjd ASSERT(err2 == 0); 3469219089Spjd } 3470219089Spjd 3471168404Spjd if (attrzp) 3472306818Savg vput(ZTOV(attrzp)); 3473251631Sdelphij 3474211932Smm if (aclp) 3475209962Smm zfs_acl_free(aclp); 3476168404Spjd 3477209962Smm if (fuidp) { 3478209962Smm zfs_fuid_info_free(fuidp); 3479209962Smm fuidp = NULL; 3480209962Smm } 3481209962Smm 3482219089Spjd if (err) { 3483209962Smm dmu_tx_abort(tx); 3484219089Spjd } else { 3485219089Spjd err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 3486209962Smm dmu_tx_commit(tx); 3487219089Spjd } 3488209962Smm 3489219089Spjdout2: 3490219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3491219089Spjd zil_commit(zilog, 0); 3492209962Smm 3493168404Spjd ZFS_EXIT(zfsvfs); 3494168404Spjd return (err); 3495168404Spjd} 3496168404Spjd 3497168404Spjd/* 3498303970Savg * We acquire all but fdvp locks using non-blocking acquisitions. If we 3499303970Savg * fail to acquire any lock in the path we will drop all held locks, 3500303970Savg * acquire the new lock in a blocking fashion, and then release it and 3501303970Savg * restart the rename. This acquire/release step ensures that we do not 3502303970Savg * spin on a lock waiting for release. On error release all vnode locks 3503303970Savg * and decrement references the way tmpfs_rename() would do. 3504168404Spjd */ 3505303970Savgstatic int 3506303970Savgzfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, 3507303970Savg struct vnode *tdvp, struct vnode **tvpp, 3508303970Savg const struct componentname *scnp, const struct componentname *tcnp) 3509168404Spjd{ 3510303970Savg zfsvfs_t *zfsvfs; 3511303970Savg struct vnode *nvp, *svp, *tvp; 3512303970Savg znode_t *sdzp, *tdzp, *szp, *tzp; 3513303970Savg const char *snm = scnp->cn_nameptr; 3514303970Savg const char *tnm = tcnp->cn_nameptr; 3515303970Savg int error; 3516168404Spjd 3517303970Savg VOP_UNLOCK(tdvp, 0); 3518303970Savg if (*tvpp != NULL && *tvpp != tdvp) 3519303970Savg VOP_UNLOCK(*tvpp, 0); 3520303970Savg 3521303970Savgrelock: 3522303970Savg error = vn_lock(sdvp, LK_EXCLUSIVE); 3523303970Savg if (error) 3524303970Savg goto out; 3525303970Savg sdzp = VTOZ(sdvp); 3526303970Savg 3527303970Savg error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT); 3528303970Savg if (error != 0) { 3529303970Savg VOP_UNLOCK(sdvp, 0); 3530303970Savg if (error != EBUSY) 3531303970Savg goto out; 3532303970Savg error = vn_lock(tdvp, LK_EXCLUSIVE); 3533303970Savg if (error) 3534303970Savg goto out; 3535303970Savg VOP_UNLOCK(tdvp, 0); 3536303970Savg goto relock; 3537168404Spjd } 3538303970Savg tdzp = VTOZ(tdvp); 3539168404Spjd 3540303970Savg /* 3541303970Savg * Before using sdzp and tdzp we must ensure that they are live. 3542303970Savg * As a porting legacy from illumos we have two things to worry 3543303970Savg * about. One is typical for FreeBSD and it is that the vnode is 3544303970Savg * not reclaimed (doomed). The other is that the znode is live. 3545303970Savg * The current code can invalidate the znode without acquiring the 3546303970Savg * corresponding vnode lock if the object represented by the znode 3547303970Savg * and vnode is no longer valid after a rollback or receive operation. 3548303970Savg * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock 3549303970Savg * that protects the znodes from the invalidation. 3550303970Savg */ 3551303970Savg zfsvfs = sdzp->z_zfsvfs; 3552303970Savg ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs); 3553303970Savg ZFS_ENTER(zfsvfs); 3554168404Spjd 3555168404Spjd /* 3556303970Savg * We can not use ZFS_VERIFY_ZP() here because it could directly return 3557303970Savg * bypassing the cleanup code in the case of an error. 3558168404Spjd */ 3559303970Savg if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 3560303970Savg ZFS_EXIT(zfsvfs); 3561303970Savg VOP_UNLOCK(sdvp, 0); 3562303970Savg VOP_UNLOCK(tdvp, 0); 3563303970Savg error = SET_ERROR(EIO); 3564303970Savg goto out; 3565303970Savg } 3566303970Savg 3567303970Savg /* 3568303970Savg * Re-resolve svp to be certain it still exists and fetch the 3569303970Savg * correct vnode. 3570303970Savg */ 3571303970Savg error = zfs_dirent_lookup(sdzp, snm, &szp, ZEXISTS); 3572303970Savg if (error != 0) { 3573303970Savg /* Source entry invalid or not there. */ 3574303970Savg ZFS_EXIT(zfsvfs); 3575303970Savg VOP_UNLOCK(sdvp, 0); 3576303970Savg VOP_UNLOCK(tdvp, 0); 3577303970Savg if ((scnp->cn_flags & ISDOTDOT) != 0 || 3578303970Savg (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.')) 3579303970Savg error = SET_ERROR(EINVAL); 3580303970Savg goto out; 3581303970Savg } 3582303970Savg svp = ZTOV(szp); 3583303970Savg 3584303970Savg /* 3585303970Savg * Re-resolve tvp, if it disappeared we just carry on. 3586303970Savg */ 3587303970Savg error = zfs_dirent_lookup(tdzp, tnm, &tzp, 0); 3588303970Savg if (error != 0) { 3589303970Savg ZFS_EXIT(zfsvfs); 3590303970Savg VOP_UNLOCK(sdvp, 0); 3591303970Savg VOP_UNLOCK(tdvp, 0); 3592303970Savg vrele(svp); 3593303970Savg if ((tcnp->cn_flags & ISDOTDOT) != 0) 3594303970Savg error = SET_ERROR(EINVAL); 3595303970Savg goto out; 3596303970Savg } 3597303970Savg if (tzp != NULL) 3598303970Savg tvp = ZTOV(tzp); 3599303970Savg else 3600303970Savg tvp = NULL; 3601303970Savg 3602303970Savg /* 3603303970Savg * At present the vnode locks must be acquired before z_teardown_lock, 3604303970Savg * although it would be more logical to use the opposite order. 3605303970Savg */ 3606303970Savg ZFS_EXIT(zfsvfs); 3607303970Savg 3608303970Savg /* 3609303970Savg * Now try acquire locks on svp and tvp. 3610303970Savg */ 3611303970Savg nvp = svp; 3612303970Savg error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3613303970Savg if (error != 0) { 3614303970Savg VOP_UNLOCK(sdvp, 0); 3615303970Savg VOP_UNLOCK(tdvp, 0); 3616303970Savg if (tvp != NULL) 3617303970Savg vrele(tvp); 3618303970Savg if (error != EBUSY) { 3619303970Savg vrele(nvp); 3620303970Savg goto out; 3621303970Savg } 3622303970Savg error = vn_lock(nvp, LK_EXCLUSIVE); 3623303970Savg if (error != 0) { 3624303970Savg vrele(nvp); 3625303970Savg goto out; 3626303970Savg } 3627303970Savg VOP_UNLOCK(nvp, 0); 3628303970Savg /* 3629303970Savg * Concurrent rename race. 3630303970Savg * XXX ? 3631303970Savg */ 3632303970Savg if (nvp == tdvp) { 3633303970Savg vrele(nvp); 3634303970Savg error = SET_ERROR(EINVAL); 3635303970Savg goto out; 3636303970Savg } 3637303970Savg vrele(*svpp); 3638303970Savg *svpp = nvp; 3639303970Savg goto relock; 3640303970Savg } 3641303970Savg vrele(*svpp); 3642303970Savg *svpp = nvp; 3643303970Savg 3644303970Savg if (*tvpp != NULL) 3645303970Savg vrele(*tvpp); 3646303970Savg *tvpp = NULL; 3647303970Savg if (tvp != NULL) { 3648303970Savg nvp = tvp; 3649303970Savg error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3650303970Savg if (error != 0) { 3651303970Savg VOP_UNLOCK(sdvp, 0); 3652303970Savg VOP_UNLOCK(tdvp, 0); 3653303970Savg VOP_UNLOCK(*svpp, 0); 3654303970Savg if (error != EBUSY) { 3655303970Savg vrele(nvp); 3656303970Savg goto out; 3657168404Spjd } 3658303970Savg error = vn_lock(nvp, LK_EXCLUSIVE); 3659303970Savg if (error != 0) { 3660303970Savg vrele(nvp); 3661303970Savg goto out; 3662303970Savg } 3663303970Savg vput(nvp); 3664303970Savg goto relock; 3665168404Spjd } 3666303970Savg *tvpp = nvp; 3667303970Savg } 3668168404Spjd 3669303970Savg return (0); 3670168404Spjd 3671303970Savgout: 3672303970Savg return (error); 3673303970Savg} 3674168404Spjd 3675303970Savg/* 3676303970Savg * Note that we must use VRELE_ASYNC in this function as it walks 3677303970Savg * up the directory tree and vrele may need to acquire an exclusive 3678303970Savg * lock if a last reference to a vnode is dropped. 3679303970Savg */ 3680303970Savgstatic int 3681303970Savgzfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp) 3682303970Savg{ 3683303970Savg zfsvfs_t *zfsvfs; 3684303970Savg znode_t *zp, *zp1; 3685303970Savg uint64_t parent; 3686303970Savg int error; 3687168404Spjd 3688303970Savg zfsvfs = tdzp->z_zfsvfs; 3689303970Savg if (tdzp == szp) 3690303970Savg return (SET_ERROR(EINVAL)); 3691303970Savg if (tdzp == sdzp) 3692303970Savg return (0); 3693303970Savg if (tdzp->z_id == zfsvfs->z_root) 3694303970Savg return (0); 3695303970Savg zp = tdzp; 3696303970Savg for (;;) { 3697303970Savg ASSERT(!zp->z_unlinked); 3698303970Savg if ((error = sa_lookup(zp->z_sa_hdl, 3699303970Savg SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 3700303970Savg break; 3701303970Savg 3702303970Savg if (parent == szp->z_id) { 3703303970Savg error = SET_ERROR(EINVAL); 3704303970Savg break; 3705168404Spjd } 3706303970Savg if (parent == zfsvfs->z_root) 3707303970Savg break; 3708303970Savg if (parent == sdzp->z_id) 3709303970Savg break; 3710168404Spjd 3711303970Savg error = zfs_zget(zfsvfs, parent, &zp1); 3712303970Savg if (error != 0) 3713303970Savg break; 3714168404Spjd 3715303970Savg if (zp != tdzp) 3716303970Savg VN_RELE_ASYNC(ZTOV(zp), 3717303970Savg dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 3718303970Savg zp = zp1; 3719303970Savg } 3720303970Savg 3721303970Savg if (error == ENOTDIR) 3722303970Savg panic("checkpath: .. not a directory\n"); 3723303970Savg if (zp != tdzp) 3724303970Savg VN_RELE_ASYNC(ZTOV(zp), 3725303970Savg dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 3726303970Savg return (error); 3727168404Spjd} 3728168404Spjd 3729168404Spjd/* 3730168404Spjd * Move an entry from the provided source directory to the target 3731168404Spjd * directory. Change the entry name as indicated. 3732168404Spjd * 3733168404Spjd * IN: sdvp - Source directory containing the "old entry". 3734168404Spjd * snm - Old entry name. 3735168404Spjd * tdvp - Target directory to contain the "new entry". 3736168404Spjd * tnm - New entry name. 3737168404Spjd * cr - credentials of caller. 3738185029Spjd * ct - caller context 3739185029Spjd * flags - case flags 3740168404Spjd * 3741251631Sdelphij * RETURN: 0 on success, error code on failure. 3742168404Spjd * 3743168404Spjd * Timestamps: 3744168404Spjd * sdvp,tdvp - ctime|mtime updated 3745168404Spjd */ 3746185029Spjd/*ARGSUSED*/ 3747168404Spjdstatic int 3748303970Savgzfs_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 3749303970Savg vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 3750303970Savg cred_t *cr) 3751168404Spjd{ 3752303970Savg zfsvfs_t *zfsvfs; 3753303970Savg znode_t *sdzp, *tdzp, *szp, *tzp; 3754303970Savg zilog_t *zilog = NULL; 3755168404Spjd dmu_tx_t *tx; 3756303970Savg char *snm = scnp->cn_nameptr; 3757303970Savg char *tnm = tcnp->cn_nameptr; 3758185029Spjd int error = 0; 3759168404Spjd 3760303970Savg /* Reject renames across filesystems. */ 3761303970Savg if ((*svpp)->v_mount != tdvp->v_mount || 3762303970Savg ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) { 3763303970Savg error = SET_ERROR(EXDEV); 3764303970Savg goto out; 3765303970Savg } 3766168404Spjd 3767303970Savg if (zfsctl_is_node(tdvp)) { 3768303970Savg error = SET_ERROR(EXDEV); 3769303970Savg goto out; 3770303970Savg } 3771303970Savg 3772168962Spjd /* 3773303970Savg * Lock all four vnodes to ensure safety and semantics of renaming. 3774168962Spjd */ 3775303970Savg error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp); 3776303970Savg if (error != 0) { 3777303970Savg /* no vnodes are locked in the case of error here */ 3778303970Savg return (error); 3779264392Sdavide } 3780168962Spjd 3781303970Savg tdzp = VTOZ(tdvp); 3782303970Savg sdzp = VTOZ(sdvp); 3783303970Savg zfsvfs = tdzp->z_zfsvfs; 3784303970Savg zilog = zfsvfs->z_log; 3785303970Savg 3786254585Sdelphij /* 3787303970Savg * After we re-enter ZFS_ENTER() we will have to revalidate all 3788303970Savg * znodes involved. 3789254585Sdelphij */ 3790303970Savg ZFS_ENTER(zfsvfs); 3791168404Spjd 3792185029Spjd if (zfsvfs->z_utf8 && u8_validate(tnm, 3793185029Spjd strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3794303970Savg error = SET_ERROR(EILSEQ); 3795303970Savg goto unlockout; 3796185029Spjd } 3797185029Spjd 3798303970Savg /* If source and target are the same file, there is nothing to do. */ 3799303970Savg if ((*svpp) == (*tvpp)) { 3800303970Savg error = 0; 3801303970Savg goto unlockout; 3802303970Savg } 3803185029Spjd 3804303970Savg if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) || 3805303970Savg ((*tvpp) != NULL && (*tvpp)->v_type == VDIR && 3806303970Savg (*tvpp)->v_mountedhere != NULL)) { 3807303970Savg error = SET_ERROR(EXDEV); 3808303970Savg goto unlockout; 3809303970Savg } 3810168404Spjd 3811168404Spjd /* 3812303970Savg * We can not use ZFS_VERIFY_ZP() here because it could directly return 3813303970Savg * bypassing the cleanup code in the case of an error. 3814168404Spjd */ 3815303970Savg if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 3816303970Savg error = SET_ERROR(EIO); 3817303970Savg goto unlockout; 3818168404Spjd } 3819168404Spjd 3820303970Savg szp = VTOZ(*svpp); 3821303970Savg tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp); 3822303970Savg if (szp->z_sa_hdl == NULL || (tzp != NULL && tzp->z_sa_hdl == NULL)) { 3823303970Savg error = SET_ERROR(EIO); 3824303970Savg goto unlockout; 3825168962Spjd } 3826185029Spjd 3827208131Smm /* 3828303970Savg * This is to prevent the creation of links into attribute space 3829303970Savg * by renaming a linked file into/outof an attribute directory. 3830303970Savg * See the comment in zfs_link() for why this is considered bad. 3831208131Smm */ 3832303970Savg if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3833303970Savg error = SET_ERROR(EINVAL); 3834303970Savg goto unlockout; 3835208131Smm } 3836208131Smm 3837168404Spjd /* 3838168404Spjd * Must have write access at the source to remove the old entry 3839168404Spjd * and write access at the target to create the new entry. 3840168404Spjd * Note that if target and source are the same, this can be 3841168404Spjd * done in a single check. 3842168404Spjd */ 3843168404Spjd if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3844303970Savg goto unlockout; 3845168404Spjd 3846303970Savg if ((*svpp)->v_type == VDIR) { 3847168404Spjd /* 3848303970Savg * Avoid ".", "..", and aliases of "." for obvious reasons. 3849303970Savg */ 3850303970Savg if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') || 3851303970Savg sdzp == szp || 3852303970Savg (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { 3853303970Savg error = EINVAL; 3854303970Savg goto unlockout; 3855303970Savg } 3856303970Savg 3857303970Savg /* 3858168404Spjd * Check to make sure rename is valid. 3859168404Spjd * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3860168404Spjd */ 3861303970Savg if (error = zfs_rename_check(szp, sdzp, tdzp)) 3862303970Savg goto unlockout; 3863168404Spjd } 3864168404Spjd 3865168404Spjd /* 3866168404Spjd * Does target exist? 3867168404Spjd */ 3868168404Spjd if (tzp) { 3869168404Spjd /* 3870168404Spjd * Source and target must be the same type. 3871168404Spjd */ 3872303970Savg if ((*svpp)->v_type == VDIR) { 3873303970Savg if ((*tvpp)->v_type != VDIR) { 3874249195Smm error = SET_ERROR(ENOTDIR); 3875303970Savg goto unlockout; 3876303970Savg } else { 3877303970Savg cache_purge(tdvp); 3878303970Savg if (sdvp != tdvp) 3879303970Savg cache_purge(sdvp); 3880168404Spjd } 3881168404Spjd } else { 3882303970Savg if ((*tvpp)->v_type == VDIR) { 3883249195Smm error = SET_ERROR(EISDIR); 3884303970Savg goto unlockout; 3885168404Spjd } 3886168404Spjd } 3887168404Spjd } 3888168404Spjd 3889303970Savg vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct); 3890168962Spjd if (tzp) 3891303970Savg vnevent_rename_dest(*tvpp, tdvp, tnm, ct); 3892168962Spjd 3893185029Spjd /* 3894185029Spjd * notify the target directory if it is not the same 3895185029Spjd * as source directory. 3896185029Spjd */ 3897185029Spjd if (tdvp != sdvp) { 3898185029Spjd vnevent_rename_dest_dir(tdvp, ct); 3899185029Spjd } 3900185029Spjd 3901168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 3902219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3903219089Spjd dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3904168404Spjd dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3905168404Spjd dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3906219089Spjd if (sdzp != tdzp) { 3907219089Spjd dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 3908219089Spjd zfs_sa_upgrade_txholds(tx, tdzp); 3909219089Spjd } 3910219089Spjd if (tzp) { 3911219089Spjd dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 3912219089Spjd zfs_sa_upgrade_txholds(tx, tzp); 3913219089Spjd } 3914219089Spjd 3915219089Spjd zfs_sa_upgrade_txholds(tx, szp); 3916168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3917303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 3918168404Spjd if (error) { 3919168404Spjd dmu_tx_abort(tx); 3920303970Savg goto unlockout; 3921168404Spjd } 3922168404Spjd 3923303970Savg 3924168404Spjd if (tzp) /* Attempt to remove the existing target */ 3925303970Savg error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL); 3926168404Spjd 3927168404Spjd if (error == 0) { 3928303970Savg error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING); 3929168404Spjd if (error == 0) { 3930219089Spjd szp->z_pflags |= ZFS_AV_MODIFIED; 3931185029Spjd 3932219089Spjd error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 3933219089Spjd (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3934240415Smm ASSERT0(error); 3935219089Spjd 3936303970Savg error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING, 3937303970Savg NULL); 3938219089Spjd if (error == 0) { 3939303970Savg zfs_log_rename(zilog, tx, TX_RENAME, sdzp, 3940303970Savg snm, tdzp, tnm, szp); 3941185029Spjd 3942219089Spjd /* 3943219089Spjd * Update path information for the target vnode 3944219089Spjd */ 3945303970Savg vn_renamepath(tdvp, *svpp, tnm, strlen(tnm)); 3946219089Spjd } else { 3947219089Spjd /* 3948219089Spjd * At this point, we have successfully created 3949219089Spjd * the target name, but have failed to remove 3950219089Spjd * the source name. Since the create was done 3951219089Spjd * with the ZRENAMING flag, there are 3952219089Spjd * complications; for one, the link count is 3953219089Spjd * wrong. The easiest way to deal with this 3954219089Spjd * is to remove the newly created target, and 3955219089Spjd * return the original error. This must 3956219089Spjd * succeed; fortunately, it is very unlikely to 3957219089Spjd * fail, since we just created it. 3958219089Spjd */ 3959303970Savg VERIFY3U(zfs_link_destroy(tdzp, tnm, szp, tx, 3960219089Spjd ZRENAMING, NULL), ==, 0); 3961219089Spjd } 3962168404Spjd } 3963168404Spjd if (error == 0) { 3964303970Savg cache_purge(*svpp); 3965303970Savg if (*tvpp != NULL) 3966303970Savg cache_purge(*tvpp); 3967303970Savg cache_purge_negative(tdvp); 3968168404Spjd } 3969168404Spjd } 3970168404Spjd 3971168404Spjd dmu_tx_commit(tx); 3972168404Spjd 3973303970Savgunlockout: /* all 4 vnodes are locked, ZFS_ENTER called */ 3974303970Savg ZFS_EXIT(zfsvfs); 3975303970Savg VOP_UNLOCK(*svpp, 0); 3976303970Savg VOP_UNLOCK(sdvp, 0); 3977168404Spjd 3978303970Savgout: /* original two vnodes are locked */ 3979303970Savg if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3980219089Spjd zil_commit(zilog, 0); 3981219089Spjd 3982303970Savg if (*tvpp != NULL) 3983303970Savg VOP_UNLOCK(*tvpp, 0); 3984303970Savg if (tdvp != *tvpp) 3985303970Savg VOP_UNLOCK(tdvp, 0); 3986168404Spjd return (error); 3987168404Spjd} 3988168404Spjd 3989168404Spjd/* 3990168404Spjd * Insert the indicated symbolic reference entry into the directory. 3991168404Spjd * 3992168404Spjd * IN: dvp - Directory to contain new symbolic link. 3993168404Spjd * link - Name for new symlink entry. 3994168404Spjd * vap - Attributes of new entry. 3995168404Spjd * cr - credentials of caller. 3996185029Spjd * ct - caller context 3997185029Spjd * flags - case flags 3998168404Spjd * 3999251631Sdelphij * RETURN: 0 on success, error code on failure. 4000168404Spjd * 4001168404Spjd * Timestamps: 4002168404Spjd * dvp - ctime|mtime updated 4003168404Spjd */ 4004185029Spjd/*ARGSUSED*/ 4005168404Spjdstatic int 4006185029Spjdzfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 4007185029Spjd cred_t *cr, kthread_t *td) 4008168404Spjd{ 4009168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 4010168404Spjd dmu_tx_t *tx; 4011168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4012185029Spjd zilog_t *zilog; 4013219089Spjd uint64_t len = strlen(link); 4014168404Spjd int error; 4015209962Smm zfs_acl_ids_t acl_ids; 4016209962Smm boolean_t fuid_dirtied; 4017219089Spjd uint64_t txtype = TX_SYMLINK; 4018185029Spjd int flags = 0; 4019168404Spjd 4020168962Spjd ASSERT(vap->va_type == VLNK); 4021168404Spjd 4022168404Spjd ZFS_ENTER(zfsvfs); 4023185029Spjd ZFS_VERIFY_ZP(dzp); 4024185029Spjd zilog = zfsvfs->z_log; 4025185029Spjd 4026185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 4027185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4028185029Spjd ZFS_EXIT(zfsvfs); 4029249195Smm return (SET_ERROR(EILSEQ)); 4030185029Spjd } 4031168404Spjd 4032168404Spjd if (len > MAXPATHLEN) { 4033168404Spjd ZFS_EXIT(zfsvfs); 4034249195Smm return (SET_ERROR(ENAMETOOLONG)); 4035168404Spjd } 4036168404Spjd 4037219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, 4038219089Spjd vap, cr, NULL, &acl_ids)) != 0) { 4039219089Spjd ZFS_EXIT(zfsvfs); 4040219089Spjd return (error); 4041219089Spjd } 4042260704Savg 4043168404Spjd /* 4044168404Spjd * Attempt to lock directory; fail if entry already exists. 4045168404Spjd */ 4046303970Savg error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 4047185029Spjd if (error) { 4048219089Spjd zfs_acl_ids_free(&acl_ids); 4049168404Spjd ZFS_EXIT(zfsvfs); 4050168404Spjd return (error); 4051168404Spjd } 4052168404Spjd 4053219089Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4054219089Spjd zfs_acl_ids_free(&acl_ids); 4055219089Spjd ZFS_EXIT(zfsvfs); 4056219089Spjd return (error); 4057219089Spjd } 4058219089Spjd 4059209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 4060209962Smm zfs_acl_ids_free(&acl_ids); 4061209962Smm ZFS_EXIT(zfsvfs); 4062249195Smm return (SET_ERROR(EDQUOT)); 4063209962Smm } 4064303970Savg 4065303970Savg getnewvnode_reserve(1); 4066168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4067209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 4068168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 4069168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4070219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 4071219089Spjd ZFS_SA_BASE_ATTR_SIZE + len); 4072219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 4073219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 4074219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 4075219089Spjd acl_ids.z_aclp->z_acl_bytes); 4076219089Spjd } 4077209962Smm if (fuid_dirtied) 4078209962Smm zfs_fuid_txhold(zfsvfs, tx); 4079303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 4080168404Spjd if (error) { 4081219089Spjd zfs_acl_ids_free(&acl_ids); 4082168404Spjd dmu_tx_abort(tx); 4083260704Savg getnewvnode_drop_reserve(); 4084168404Spjd ZFS_EXIT(zfsvfs); 4085168404Spjd return (error); 4086168404Spjd } 4087168404Spjd 4088168404Spjd /* 4089168404Spjd * Create a new object for the symlink. 4090219089Spjd * for version 4 ZPL datsets the symlink will be an SA attribute 4091168404Spjd */ 4092219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 4093168404Spjd 4094219089Spjd if (fuid_dirtied) 4095219089Spjd zfs_fuid_sync(zfsvfs, tx); 4096209962Smm 4097219089Spjd if (zp->z_is_sa) 4098219089Spjd error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 4099219089Spjd link, len, tx); 4100219089Spjd else 4101219089Spjd zfs_sa_symlink(zp, link, len, tx); 4102168404Spjd 4103219089Spjd zp->z_size = len; 4104219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 4105219089Spjd &zp->z_size, sizeof (zp->z_size), tx); 4106168404Spjd /* 4107168404Spjd * Insert the new object into the directory. 4108168404Spjd */ 4109303970Savg (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 4110168404Spjd 4111219089Spjd zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 4112219089Spjd *vpp = ZTOV(zp); 4113219089Spjd 4114209962Smm zfs_acl_ids_free(&acl_ids); 4115209962Smm 4116168404Spjd dmu_tx_commit(tx); 4117168404Spjd 4118260704Savg getnewvnode_drop_reserve(); 4119260704Savg 4120219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4121219089Spjd zil_commit(zilog, 0); 4122219089Spjd 4123168404Spjd ZFS_EXIT(zfsvfs); 4124168404Spjd return (error); 4125168404Spjd} 4126168404Spjd 4127168404Spjd/* 4128168404Spjd * Return, in the buffer contained in the provided uio structure, 4129168404Spjd * the symbolic path referred to by vp. 4130168404Spjd * 4131168404Spjd * IN: vp - vnode of symbolic link. 4132251631Sdelphij * uio - structure to contain the link path. 4133168404Spjd * cr - credentials of caller. 4134185029Spjd * ct - caller context 4135168404Spjd * 4136251631Sdelphij * OUT: uio - structure containing the link path. 4137168404Spjd * 4138251631Sdelphij * RETURN: 0 on success, error code on failure. 4139168404Spjd * 4140168404Spjd * Timestamps: 4141168404Spjd * vp - atime updated 4142168404Spjd */ 4143168404Spjd/* ARGSUSED */ 4144168404Spjdstatic int 4145185029Spjdzfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 4146168404Spjd{ 4147168404Spjd znode_t *zp = VTOZ(vp); 4148168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4149168404Spjd int error; 4150168404Spjd 4151168404Spjd ZFS_ENTER(zfsvfs); 4152185029Spjd ZFS_VERIFY_ZP(zp); 4153168404Spjd 4154219089Spjd if (zp->z_is_sa) 4155219089Spjd error = sa_lookup_uio(zp->z_sa_hdl, 4156219089Spjd SA_ZPL_SYMLINK(zfsvfs), uio); 4157219089Spjd else 4158219089Spjd error = zfs_sa_readlink(zp, uio); 4159168404Spjd 4160168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4161219089Spjd 4162168404Spjd ZFS_EXIT(zfsvfs); 4163168404Spjd return (error); 4164168404Spjd} 4165168404Spjd 4166168404Spjd/* 4167168404Spjd * Insert a new entry into directory tdvp referencing svp. 4168168404Spjd * 4169168404Spjd * IN: tdvp - Directory to contain new entry. 4170168404Spjd * svp - vnode of new entry. 4171168404Spjd * name - name of new entry. 4172168404Spjd * cr - credentials of caller. 4173185029Spjd * ct - caller context 4174168404Spjd * 4175251631Sdelphij * RETURN: 0 on success, error code on failure. 4176168404Spjd * 4177168404Spjd * Timestamps: 4178168404Spjd * tdvp - ctime|mtime updated 4179168404Spjd * svp - ctime updated 4180168404Spjd */ 4181168404Spjd/* ARGSUSED */ 4182168404Spjdstatic int 4183185029Spjdzfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4184185029Spjd caller_context_t *ct, int flags) 4185168404Spjd{ 4186168404Spjd znode_t *dzp = VTOZ(tdvp); 4187168404Spjd znode_t *tzp, *szp; 4188168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4189185029Spjd zilog_t *zilog; 4190168404Spjd dmu_tx_t *tx; 4191168404Spjd int error; 4192212694Smm uint64_t parent; 4193185029Spjd uid_t owner; 4194168404Spjd 4195168404Spjd ASSERT(tdvp->v_type == VDIR); 4196168404Spjd 4197168404Spjd ZFS_ENTER(zfsvfs); 4198185029Spjd ZFS_VERIFY_ZP(dzp); 4199185029Spjd zilog = zfsvfs->z_log; 4200168404Spjd 4201212694Smm /* 4202212694Smm * POSIX dictates that we return EPERM here. 4203212694Smm * Better choices include ENOTSUP or EISDIR. 4204212694Smm */ 4205212694Smm if (svp->v_type == VDIR) { 4206168404Spjd ZFS_EXIT(zfsvfs); 4207249195Smm return (SET_ERROR(EPERM)); 4208212694Smm } 4209212694Smm 4210254585Sdelphij szp = VTOZ(svp); 4211254585Sdelphij ZFS_VERIFY_ZP(szp); 4212254585Sdelphij 4213258597Spjd if (szp->z_pflags & (ZFS_APPENDONLY | ZFS_IMMUTABLE | ZFS_READONLY)) { 4214258597Spjd ZFS_EXIT(zfsvfs); 4215258597Spjd return (SET_ERROR(EPERM)); 4216258597Spjd } 4217258597Spjd 4218212694Smm /* Prevent links to .zfs/shares files */ 4219212694Smm 4220219089Spjd if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4221219089Spjd &parent, sizeof (uint64_t))) != 0) { 4222212694Smm ZFS_EXIT(zfsvfs); 4223219089Spjd return (error); 4224219089Spjd } 4225219089Spjd if (parent == zfsvfs->z_shares_dir) { 4226219089Spjd ZFS_EXIT(zfsvfs); 4227249195Smm return (SET_ERROR(EPERM)); 4228212694Smm } 4229212694Smm 4230185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, 4231185029Spjd strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4232185029Spjd ZFS_EXIT(zfsvfs); 4233249195Smm return (SET_ERROR(EILSEQ)); 4234185029Spjd } 4235185029Spjd 4236168404Spjd /* 4237168404Spjd * We do not support links between attributes and non-attributes 4238168404Spjd * because of the potential security risk of creating links 4239168404Spjd * into "normal" file space in order to circumvent restrictions 4240168404Spjd * imposed in attribute space. 4241168404Spjd */ 4242219089Spjd if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4243168404Spjd ZFS_EXIT(zfsvfs); 4244249195Smm return (SET_ERROR(EINVAL)); 4245168404Spjd } 4246168404Spjd 4247168404Spjd 4248219089Spjd owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4249219089Spjd if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { 4250168404Spjd ZFS_EXIT(zfsvfs); 4251249195Smm return (SET_ERROR(EPERM)); 4252168404Spjd } 4253168404Spjd 4254185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4255168404Spjd ZFS_EXIT(zfsvfs); 4256168404Spjd return (error); 4257168404Spjd } 4258168404Spjd 4259168404Spjd /* 4260168404Spjd * Attempt to lock directory; fail if entry already exists. 4261168404Spjd */ 4262303970Savg error = zfs_dirent_lookup(dzp, name, &tzp, ZNEW); 4263185029Spjd if (error) { 4264168404Spjd ZFS_EXIT(zfsvfs); 4265168404Spjd return (error); 4266168404Spjd } 4267168404Spjd 4268168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4269219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4270168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4271219089Spjd zfs_sa_upgrade_txholds(tx, szp); 4272219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 4273303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 4274168404Spjd if (error) { 4275168404Spjd dmu_tx_abort(tx); 4276168404Spjd ZFS_EXIT(zfsvfs); 4277168404Spjd return (error); 4278168404Spjd } 4279168404Spjd 4280303970Savg error = zfs_link_create(dzp, name, szp, tx, 0); 4281168404Spjd 4282185029Spjd if (error == 0) { 4283185029Spjd uint64_t txtype = TX_LINK; 4284185029Spjd zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4285185029Spjd } 4286168404Spjd 4287168404Spjd dmu_tx_commit(tx); 4288168404Spjd 4289185029Spjd if (error == 0) { 4290185029Spjd vnevent_link(svp, ct); 4291185029Spjd } 4292185029Spjd 4293219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4294219089Spjd zil_commit(zilog, 0); 4295219089Spjd 4296168404Spjd ZFS_EXIT(zfsvfs); 4297168404Spjd return (error); 4298168404Spjd} 4299168404Spjd 4300219089Spjd 4301185029Spjd/*ARGSUSED*/ 4302168962Spjdvoid 4303185029Spjdzfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4304168404Spjd{ 4305168962Spjd znode_t *zp = VTOZ(vp); 4306168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4307168962Spjd int error; 4308168404Spjd 4309185029Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4310219089Spjd if (zp->z_sa_hdl == NULL) { 4311185029Spjd /* 4312185029Spjd * The fs has been unmounted, or we did a 4313185029Spjd * suspend/resume and this file no longer exists. 4314185029Spjd */ 4315243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 4316234607Strasz vrecycle(vp); 4317243520Savg return; 4318243520Savg } 4319243520Savg 4320243520Savg if (zp->z_unlinked) { 4321243520Savg /* 4322243520Savg * Fast path to recycle a vnode of a removed file. 4323243520Savg */ 4324185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4325243520Savg vrecycle(vp); 4326168962Spjd return; 4327168404Spjd } 4328168404Spjd 4329168404Spjd if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4330168404Spjd dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4331168404Spjd 4332219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4333219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4334168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 4335168404Spjd if (error) { 4336168404Spjd dmu_tx_abort(tx); 4337168404Spjd } else { 4338219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 4339219089Spjd (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4340168404Spjd zp->z_atime_dirty = 0; 4341168404Spjd dmu_tx_commit(tx); 4342168404Spjd } 4343168404Spjd } 4344185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4345168404Spjd} 4346168404Spjd 4347219089Spjd 4348168404SpjdCTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 4349168404SpjdCTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 4350168404Spjd 4351185029Spjd/*ARGSUSED*/ 4352168404Spjdstatic int 4353185029Spjdzfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4354168404Spjd{ 4355168404Spjd znode_t *zp = VTOZ(vp); 4356168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4357185029Spjd uint32_t gen; 4358219089Spjd uint64_t gen64; 4359168404Spjd uint64_t object = zp->z_id; 4360168404Spjd zfid_short_t *zfid; 4361219089Spjd int size, i, error; 4362168404Spjd 4363168404Spjd ZFS_ENTER(zfsvfs); 4364185029Spjd ZFS_VERIFY_ZP(zp); 4365168404Spjd 4366219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 4367219089Spjd &gen64, sizeof (uint64_t))) != 0) { 4368219089Spjd ZFS_EXIT(zfsvfs); 4369219089Spjd return (error); 4370219089Spjd } 4371219089Spjd 4372219089Spjd gen = (uint32_t)gen64; 4373219089Spjd 4374168404Spjd size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4375249195Smm 4376249195Smm#ifdef illumos 4377249195Smm if (fidp->fid_len < size) { 4378249195Smm fidp->fid_len = size; 4379249195Smm ZFS_EXIT(zfsvfs); 4380249195Smm return (SET_ERROR(ENOSPC)); 4381249195Smm } 4382249195Smm#else 4383168404Spjd fidp->fid_len = size; 4384249195Smm#endif 4385168404Spjd 4386168404Spjd zfid = (zfid_short_t *)fidp; 4387168404Spjd 4388168404Spjd zfid->zf_len = size; 4389168404Spjd 4390168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 4391168404Spjd zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4392168404Spjd 4393168404Spjd /* Must have a non-zero generation number to distinguish from .zfs */ 4394168404Spjd if (gen == 0) 4395168404Spjd gen = 1; 4396168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 4397168404Spjd zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4398168404Spjd 4399168404Spjd if (size == LONG_FID_LEN) { 4400168404Spjd uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4401169023Spjd zfid_long_t *zlfid; 4402168404Spjd 4403168404Spjd zlfid = (zfid_long_t *)fidp; 4404168404Spjd 4405168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4406168404Spjd zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4407168404Spjd 4408168404Spjd /* XXX - this should be the generation number for the objset */ 4409168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4410168404Spjd zlfid->zf_setgen[i] = 0; 4411168404Spjd } 4412168404Spjd 4413168404Spjd ZFS_EXIT(zfsvfs); 4414168404Spjd return (0); 4415168404Spjd} 4416168404Spjd 4417168404Spjdstatic int 4418185029Spjdzfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4419185029Spjd caller_context_t *ct) 4420168404Spjd{ 4421168404Spjd znode_t *zp, *xzp; 4422168404Spjd zfsvfs_t *zfsvfs; 4423168404Spjd int error; 4424168404Spjd 4425168404Spjd switch (cmd) { 4426168404Spjd case _PC_LINK_MAX: 4427168404Spjd *valp = INT_MAX; 4428168404Spjd return (0); 4429168404Spjd 4430168404Spjd case _PC_FILESIZEBITS: 4431168404Spjd *valp = 64; 4432168404Spjd return (0); 4433277300Ssmh#ifdef illumos 4434168404Spjd case _PC_XATTR_EXISTS: 4435168404Spjd zp = VTOZ(vp); 4436168404Spjd zfsvfs = zp->z_zfsvfs; 4437168404Spjd ZFS_ENTER(zfsvfs); 4438185029Spjd ZFS_VERIFY_ZP(zp); 4439168404Spjd *valp = 0; 4440303970Savg error = zfs_dirent_lookup(zp, "", &xzp, 4441303970Savg ZXATTR | ZEXISTS | ZSHARED); 4442168404Spjd if (error == 0) { 4443168404Spjd if (!zfs_dirempty(xzp)) 4444168404Spjd *valp = 1; 4445303970Savg vrele(ZTOV(xzp)); 4446168404Spjd } else if (error == ENOENT) { 4447168404Spjd /* 4448168404Spjd * If there aren't extended attributes, it's the 4449168404Spjd * same as having zero of them. 4450168404Spjd */ 4451168404Spjd error = 0; 4452168404Spjd } 4453168404Spjd ZFS_EXIT(zfsvfs); 4454168404Spjd return (error); 4455168404Spjd 4456219089Spjd case _PC_SATTR_ENABLED: 4457219089Spjd case _PC_SATTR_EXISTS: 4458219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 4459219089Spjd (vp->v_type == VREG || vp->v_type == VDIR); 4460219089Spjd return (0); 4461219089Spjd 4462219089Spjd case _PC_ACCESS_FILTERING: 4463219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 4464219089Spjd vp->v_type == VDIR; 4465219089Spjd return (0); 4466219089Spjd 4467219089Spjd case _PC_ACL_ENABLED: 4468219089Spjd *valp = _ACL_ACE_ENABLED; 4469219089Spjd return (0); 4470277300Ssmh#endif /* illumos */ 4471219089Spjd case _PC_MIN_HOLE_SIZE: 4472219089Spjd *valp = (int)SPA_MINBLOCKSIZE; 4473219089Spjd return (0); 4474277300Ssmh#ifdef illumos 4475219089Spjd case _PC_TIMESTAMP_RESOLUTION: 4476219089Spjd /* nanosecond timestamp resolution */ 4477219089Spjd *valp = 1L; 4478219089Spjd return (0); 4479277300Ssmh#endif 4480168404Spjd case _PC_ACL_EXTENDED: 4481196949Strasz *valp = 0; 4482168404Spjd return (0); 4483168404Spjd 4484196949Strasz case _PC_ACL_NFS4: 4485196949Strasz *valp = 1; 4486196949Strasz return (0); 4487196949Strasz 4488196949Strasz case _PC_ACL_PATH_MAX: 4489196949Strasz *valp = ACL_MAX_ENTRIES; 4490196949Strasz return (0); 4491196949Strasz 4492168404Spjd default: 4493168962Spjd return (EOPNOTSUPP); 4494168404Spjd } 4495168404Spjd} 4496168404Spjd 4497168404Spjd/*ARGSUSED*/ 4498168404Spjdstatic int 4499185029Spjdzfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4500185029Spjd caller_context_t *ct) 4501168404Spjd{ 4502168404Spjd znode_t *zp = VTOZ(vp); 4503168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4504168404Spjd int error; 4505185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4506168404Spjd 4507168404Spjd ZFS_ENTER(zfsvfs); 4508185029Spjd ZFS_VERIFY_ZP(zp); 4509185029Spjd error = zfs_getacl(zp, vsecp, skipaclchk, cr); 4510168404Spjd ZFS_EXIT(zfsvfs); 4511168404Spjd 4512168404Spjd return (error); 4513168404Spjd} 4514168404Spjd 4515168404Spjd/*ARGSUSED*/ 4516228685Spjdint 4517185029Spjdzfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4518185029Spjd caller_context_t *ct) 4519168404Spjd{ 4520168404Spjd znode_t *zp = VTOZ(vp); 4521168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4522168404Spjd int error; 4523185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4524219089Spjd zilog_t *zilog = zfsvfs->z_log; 4525168404Spjd 4526168404Spjd ZFS_ENTER(zfsvfs); 4527185029Spjd ZFS_VERIFY_ZP(zp); 4528219089Spjd 4529185029Spjd error = zfs_setacl(zp, vsecp, skipaclchk, cr); 4530219089Spjd 4531219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4532219089Spjd zil_commit(zilog, 0); 4533219089Spjd 4534168404Spjd ZFS_EXIT(zfsvfs); 4535168404Spjd return (error); 4536168404Spjd} 4537168404Spjd 4538168962Spjdstatic int 4539330991Savgzfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, 4540292373Sglebius int *rahead) 4541213937Savg{ 4542213937Savg znode_t *zp = VTOZ(vp); 4543213937Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4544213937Savg objset_t *os = zp->z_zfsvfs->z_os; 4545330991Savg rl_t *rl; 4546213937Savg vm_object_t object; 4547330991Savg off_t start, end, obj_size; 4548330991Savg uint_t blksz; 4549330991Savg int pgsin_b, pgsin_a; 4550330991Savg int error; 4551213937Savg 4552213937Savg ZFS_ENTER(zfsvfs); 4553213937Savg ZFS_VERIFY_ZP(zp); 4554213937Savg 4555330991Savg start = IDX_TO_OFF(ma[0]->pindex); 4556330991Savg end = IDX_TO_OFF(ma[count - 1]->pindex + 1); 4557330991Savg 4558330991Savg /* 4559330991Savg * Lock a range covering all required and optional pages. 4560330991Savg * Note that we need to handle the case of the block size growing. 4561330991Savg */ 4562330991Savg for (;;) { 4563330991Savg blksz = zp->z_blksz; 4564330991Savg rl = zfs_range_lock(zp, rounddown(start, blksz), 4565330991Savg roundup(end, blksz) - rounddown(start, blksz), RL_READER); 4566330991Savg if (blksz == zp->z_blksz) 4567330991Savg break; 4568330991Savg zfs_range_unlock(rl); 4569213937Savg } 4570213937Savg 4571330991Savg object = ma[0]->object; 4572330991Savg zfs_vmobject_wlock(object); 4573330991Savg obj_size = object->un_pager.vnp.vnp_size; 4574330991Savg zfs_vmobject_wunlock(object); 4575330991Savg if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) { 4576330991Savg zfs_range_unlock(rl); 4577213937Savg ZFS_EXIT(zfsvfs); 4578248084Sattilio return (zfs_vm_pagerret_bad); 4579213937Savg } 4580213937Savg 4581330991Savg pgsin_b = 0; 4582330991Savg if (rbehind != NULL) { 4583330991Savg pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz)); 4584330991Savg pgsin_b = MIN(*rbehind, pgsin_b); 4585330991Savg } 4586292373Sglebius 4587330991Savg pgsin_a = 0; 4588330991Savg if (rahead != NULL) { 4589330991Savg pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end); 4590330991Savg if (end + IDX_TO_OFF(pgsin_a) >= obj_size) 4591330991Savg pgsin_a = OFF_TO_IDX(round_page(obj_size) - end); 4592330991Savg pgsin_a = MIN(*rahead, pgsin_a); 4593243517Savg } 4594243517Savg 4595330991Savg /* 4596330991Savg * NB: we need to pass the exact byte size of the data that we expect 4597330991Savg * to read after accounting for the file size. This is required because 4598330991Savg * ZFS will panic if we request DMU to read beyond the end of the last 4599330991Savg * allocated block. 4600330991Savg */ 4601330991Savg error = dmu_read_pages(os, zp->z_id, ma, count, &pgsin_b, &pgsin_a, 4602330991Savg MIN(end, obj_size) - (end - PAGE_SIZE)); 4603213937Savg 4604330991Savg zfs_range_unlock(rl); 4605213937Savg ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4606213937Savg ZFS_EXIT(zfsvfs); 4607330991Savg 4608330991Savg if (error != 0) 4609292386Sglebius return (zfs_vm_pagerret_error); 4610330991Savg 4611330991Savg PCPU_INC(cnt.v_vnodein); 4612330991Savg PCPU_ADD(cnt.v_vnodepgsin, count + pgsin_b + pgsin_a); 4613330991Savg if (rbehind != NULL) 4614330991Savg *rbehind = pgsin_b; 4615330991Savg if (rahead != NULL) 4616330991Savg *rahead = pgsin_a; 4617330991Savg return (zfs_vm_pagerret_ok); 4618213937Savg} 4619213937Savg 4620213937Savgstatic int 4621213937Savgzfs_freebsd_getpages(ap) 4622213937Savg struct vop_getpages_args /* { 4623213937Savg struct vnode *a_vp; 4624213937Savg vm_page_t *a_m; 4625213937Savg int a_count; 4626292373Sglebius int *a_rbehind; 4627292373Sglebius int *a_rahead; 4628213937Savg } */ *ap; 4629213937Savg{ 4630213937Savg 4631292373Sglebius return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, 4632292373Sglebius ap->a_rahead)); 4633213937Savg} 4634213937Savg 4635213937Savgstatic int 4636258746Savgzfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, 4637258746Savg int *rtvals) 4638258746Savg{ 4639258746Savg znode_t *zp = VTOZ(vp); 4640258746Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4641258746Savg rl_t *rl; 4642258746Savg dmu_tx_t *tx; 4643258746Savg struct sf_buf *sf; 4644258746Savg vm_object_t object; 4645258746Savg vm_page_t m; 4646258746Savg caddr_t va; 4647258746Savg size_t tocopy; 4648258746Savg size_t lo_len; 4649258746Savg vm_ooffset_t lo_off; 4650258746Savg vm_ooffset_t off; 4651258746Savg uint_t blksz; 4652258746Savg int ncount; 4653258746Savg int pcount; 4654258746Savg int err; 4655258746Savg int i; 4656258746Savg 4657258746Savg ZFS_ENTER(zfsvfs); 4658258746Savg ZFS_VERIFY_ZP(zp); 4659258746Savg 4660258746Savg object = vp->v_object; 4661258746Savg pcount = btoc(len); 4662258746Savg ncount = pcount; 4663258746Savg 4664258746Savg KASSERT(ma[0]->object == object, ("mismatching object")); 4665258746Savg KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length")); 4666258746Savg 4667258746Savg for (i = 0; i < pcount; i++) 4668258746Savg rtvals[i] = zfs_vm_pagerret_error; 4669258746Savg 4670258746Savg off = IDX_TO_OFF(ma[0]->pindex); 4671258746Savg blksz = zp->z_blksz; 4672258746Savg lo_off = rounddown(off, blksz); 4673258746Savg lo_len = roundup(len + (off - lo_off), blksz); 4674258746Savg rl = zfs_range_lock(zp, lo_off, lo_len, RL_WRITER); 4675258746Savg 4676258746Savg zfs_vmobject_wlock(object); 4677258746Savg if (len + off > object->un_pager.vnp.vnp_size) { 4678258746Savg if (object->un_pager.vnp.vnp_size > off) { 4679258746Savg int pgoff; 4680258746Savg 4681258746Savg len = object->un_pager.vnp.vnp_size - off; 4682258746Savg ncount = btoc(len); 4683258746Savg if ((pgoff = (int)len & PAGE_MASK) != 0) { 4684258746Savg /* 4685258746Savg * If the object is locked and the following 4686258746Savg * conditions hold, then the page's dirty 4687258746Savg * field cannot be concurrently changed by a 4688258746Savg * pmap operation. 4689258746Savg */ 4690258746Savg m = ma[ncount - 1]; 4691258746Savg vm_page_assert_sbusied(m); 4692258746Savg KASSERT(!pmap_page_is_write_mapped(m), 4693258746Savg ("zfs_putpages: page %p is not read-only", m)); 4694258746Savg vm_page_clear_dirty(m, pgoff, PAGE_SIZE - 4695258746Savg pgoff); 4696258746Savg } 4697258746Savg } else { 4698258746Savg len = 0; 4699258746Savg ncount = 0; 4700258746Savg } 4701258746Savg if (ncount < pcount) { 4702258746Savg for (i = ncount; i < pcount; i++) { 4703258746Savg rtvals[i] = zfs_vm_pagerret_bad; 4704258746Savg } 4705258746Savg } 4706258746Savg } 4707258746Savg zfs_vmobject_wunlock(object); 4708258746Savg 4709258746Savg if (ncount == 0) 4710258746Savg goto out; 4711258746Savg 4712258746Savg if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 4713258746Savg zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 4714258746Savg goto out; 4715258746Savg } 4716258746Savg 4717258746Savg tx = dmu_tx_create(zfsvfs->z_os); 4718258746Savg dmu_tx_hold_write(tx, zp->z_id, off, len); 4719258746Savg 4720258746Savg dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4721258746Savg zfs_sa_upgrade_txholds(tx, zp); 4722316847Savg err = dmu_tx_assign(tx, TXG_WAIT); 4723258746Savg if (err != 0) { 4724258746Savg dmu_tx_abort(tx); 4725258746Savg goto out; 4726258746Savg } 4727258746Savg 4728258746Savg if (zp->z_blksz < PAGE_SIZE) { 4729258746Savg for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) { 4730258746Savg tocopy = len > PAGE_SIZE ? PAGE_SIZE : len; 4731258746Savg va = zfs_map_page(ma[i], &sf); 4732258746Savg dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx); 4733258746Savg zfs_unmap_page(sf); 4734258746Savg } 4735258746Savg } else { 4736258746Savg err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx); 4737258746Savg } 4738258746Savg 4739258746Savg if (err == 0) { 4740258746Savg uint64_t mtime[2], ctime[2]; 4741258746Savg sa_bulk_attr_t bulk[3]; 4742258746Savg int count = 0; 4743258746Savg 4744258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 4745258746Savg &mtime, 16); 4746258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 4747258746Savg &ctime, 16); 4748258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4749258746Savg &zp->z_pflags, 8); 4750258746Savg zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 4751258746Savg B_TRUE); 4752321561Smav err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 4753321561Smav ASSERT0(err); 4754258746Savg zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 4755258746Savg 4756258746Savg zfs_vmobject_wlock(object); 4757258746Savg for (i = 0; i < ncount; i++) { 4758258746Savg rtvals[i] = zfs_vm_pagerret_ok; 4759258746Savg vm_page_undirty(ma[i]); 4760258746Savg } 4761258746Savg zfs_vmobject_wunlock(object); 4762258746Savg PCPU_INC(cnt.v_vnodeout); 4763258746Savg PCPU_ADD(cnt.v_vnodepgsout, ncount); 4764258746Savg } 4765258746Savg dmu_tx_commit(tx); 4766258746Savg 4767258746Savgout: 4768258746Savg zfs_range_unlock(rl); 4769258746Savg if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 || 4770258746Savg zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4771258746Savg zil_commit(zfsvfs->z_log, zp->z_id); 4772258746Savg ZFS_EXIT(zfsvfs); 4773258746Savg return (rtvals[0]); 4774258746Savg} 4775258746Savg 4776258746Savgint 4777258746Savgzfs_freebsd_putpages(ap) 4778258746Savg struct vop_putpages_args /* { 4779258746Savg struct vnode *a_vp; 4780258746Savg vm_page_t *a_m; 4781258746Savg int a_count; 4782258746Savg int a_sync; 4783258746Savg int *a_rtvals; 4784258746Savg } */ *ap; 4785258746Savg{ 4786258746Savg 4787258746Savg return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, 4788258746Savg ap->a_rtvals)); 4789258746Savg} 4790258746Savg 4791258746Savgstatic int 4792243518Savgzfs_freebsd_bmap(ap) 4793243518Savg struct vop_bmap_args /* { 4794243518Savg struct vnode *a_vp; 4795243518Savg daddr_t a_bn; 4796243518Savg struct bufobj **a_bop; 4797243518Savg daddr_t *a_bnp; 4798243518Savg int *a_runp; 4799243518Savg int *a_runb; 4800243518Savg } */ *ap; 4801243518Savg{ 4802243518Savg 4803243518Savg if (ap->a_bop != NULL) 4804243518Savg *ap->a_bop = &ap->a_vp->v_bufobj; 4805243518Savg if (ap->a_bnp != NULL) 4806243518Savg *ap->a_bnp = ap->a_bn; 4807243518Savg if (ap->a_runp != NULL) 4808243518Savg *ap->a_runp = 0; 4809243518Savg if (ap->a_runb != NULL) 4810243518Savg *ap->a_runb = 0; 4811243518Savg 4812243518Savg return (0); 4813243518Savg} 4814243518Savg 4815243518Savgstatic int 4816168962Spjdzfs_freebsd_open(ap) 4817168962Spjd struct vop_open_args /* { 4818168962Spjd struct vnode *a_vp; 4819168962Spjd int a_mode; 4820168962Spjd struct ucred *a_cred; 4821168962Spjd struct thread *a_td; 4822168962Spjd } */ *ap; 4823168962Spjd{ 4824168962Spjd vnode_t *vp = ap->a_vp; 4825168962Spjd znode_t *zp = VTOZ(vp); 4826168962Spjd int error; 4827168962Spjd 4828185029Spjd error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); 4829168962Spjd if (error == 0) 4830219089Spjd vnode_create_vobject(vp, zp->z_size, ap->a_td); 4831168962Spjd return (error); 4832168962Spjd} 4833168962Spjd 4834168962Spjdstatic int 4835168962Spjdzfs_freebsd_close(ap) 4836168962Spjd struct vop_close_args /* { 4837168962Spjd struct vnode *a_vp; 4838168962Spjd int a_fflag; 4839168962Spjd struct ucred *a_cred; 4840168962Spjd struct thread *a_td; 4841168962Spjd } */ *ap; 4842168962Spjd{ 4843168962Spjd 4844242566Savg return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred, NULL)); 4845168962Spjd} 4846168962Spjd 4847168962Spjdstatic int 4848168962Spjdzfs_freebsd_ioctl(ap) 4849168962Spjd struct vop_ioctl_args /* { 4850168962Spjd struct vnode *a_vp; 4851168962Spjd u_long a_command; 4852168962Spjd caddr_t a_data; 4853168962Spjd int a_fflag; 4854168962Spjd struct ucred *cred; 4855168962Spjd struct thread *td; 4856168962Spjd } */ *ap; 4857168962Spjd{ 4858168962Spjd 4859168978Spjd return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 4860185029Spjd ap->a_fflag, ap->a_cred, NULL, NULL)); 4861168962Spjd} 4862168962Spjd 4863168962Spjdstatic int 4864330062Savgioflags(int ioflags) 4865330062Savg{ 4866330062Savg int flags = 0; 4867330062Savg 4868330062Savg if (ioflags & IO_APPEND) 4869330062Savg flags |= FAPPEND; 4870330062Savg if (ioflags & IO_NDELAY) 4871330062Savg flags |= FNONBLOCK; 4872330062Savg if (ioflags & IO_SYNC) 4873330062Savg flags |= (FSYNC | FDSYNC | FRSYNC); 4874330062Savg 4875330062Savg return (flags); 4876330062Savg} 4877330062Savg 4878330062Savgstatic int 4879168962Spjdzfs_freebsd_read(ap) 4880168962Spjd struct vop_read_args /* { 4881168962Spjd struct vnode *a_vp; 4882168962Spjd struct uio *a_uio; 4883168962Spjd int a_ioflag; 4884168962Spjd struct ucred *a_cred; 4885168962Spjd } */ *ap; 4886168962Spjd{ 4887168962Spjd 4888213673Spjd return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 4889213673Spjd ap->a_cred, NULL)); 4890168962Spjd} 4891168962Spjd 4892168962Spjdstatic int 4893168962Spjdzfs_freebsd_write(ap) 4894168962Spjd struct vop_write_args /* { 4895168962Spjd struct vnode *a_vp; 4896168962Spjd struct uio *a_uio; 4897168962Spjd int a_ioflag; 4898168962Spjd struct ucred *a_cred; 4899168962Spjd } */ *ap; 4900168962Spjd{ 4901168962Spjd 4902213673Spjd return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 4903213673Spjd ap->a_cred, NULL)); 4904168962Spjd} 4905168962Spjd 4906168962Spjdstatic int 4907168962Spjdzfs_freebsd_access(ap) 4908168962Spjd struct vop_access_args /* { 4909168962Spjd struct vnode *a_vp; 4910192689Strasz accmode_t a_accmode; 4911168962Spjd struct ucred *a_cred; 4912168962Spjd struct thread *a_td; 4913168962Spjd } */ *ap; 4914168962Spjd{ 4915212002Sjh vnode_t *vp = ap->a_vp; 4916212002Sjh znode_t *zp = VTOZ(vp); 4917198703Spjd accmode_t accmode; 4918198703Spjd int error = 0; 4919168962Spjd 4920185172Spjd /* 4921198703Spjd * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 4922185172Spjd */ 4923198703Spjd accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 4924198703Spjd if (accmode != 0) 4925198703Spjd error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL); 4926185172Spjd 4927198703Spjd /* 4928198703Spjd * VADMIN has to be handled by vaccess(). 4929198703Spjd */ 4930198703Spjd if (error == 0) { 4931198703Spjd accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 4932198703Spjd if (accmode != 0) { 4933219089Spjd error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 4934219089Spjd zp->z_gid, accmode, ap->a_cred, NULL); 4935198703Spjd } 4936185172Spjd } 4937185172Spjd 4938212002Sjh /* 4939212002Sjh * For VEXEC, ensure that at least one execute bit is set for 4940212002Sjh * non-directories. 4941212002Sjh */ 4942212002Sjh if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 4943219089Spjd (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 4944212002Sjh error = EACCES; 4945219089Spjd } 4946212002Sjh 4947198703Spjd return (error); 4948168962Spjd} 4949168962Spjd 4950168962Spjdstatic int 4951357605Skevanszfs_freebsd_lookup(struct vop_lookup_args *ap, boolean_t cached) 4952168962Spjd{ 4953168962Spjd struct componentname *cnp = ap->a_cnp; 4954168962Spjd char nm[NAME_MAX + 1]; 4955168962Spjd 4956168962Spjd ASSERT(cnp->cn_namelen < sizeof(nm)); 4957168962Spjd strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 4958168962Spjd 4959168962Spjd return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 4960357605Skevans cnp->cn_cred, cnp->cn_thread, 0, cached)); 4961168962Spjd} 4962168962Spjd 4963168962Spjdstatic int 4964357605Skevanszfs_freebsd_cachedlookup(struct vop_cachedlookup_args *ap) 4965357605Skevans{ 4966357605Skevans 4967357605Skevans return (zfs_freebsd_lookup((struct vop_lookup_args *)ap, B_TRUE)); 4968357605Skevans} 4969357605Skevans 4970357605Skevansstatic int 4971303970Savgzfs_cache_lookup(ap) 4972303970Savg struct vop_lookup_args /* { 4973303970Savg struct vnode *a_dvp; 4974303970Savg struct vnode **a_vpp; 4975303970Savg struct componentname *a_cnp; 4976303970Savg } */ *ap; 4977303970Savg{ 4978303970Savg zfsvfs_t *zfsvfs; 4979303970Savg 4980303970Savg zfsvfs = ap->a_dvp->v_mount->mnt_data; 4981303970Savg if (zfsvfs->z_use_namecache) 4982303970Savg return (vfs_cache_lookup(ap)); 4983303970Savg else 4984357605Skevans return (zfs_freebsd_lookup(ap, B_FALSE)); 4985303970Savg} 4986303970Savg 4987303970Savgstatic int 4988168962Spjdzfs_freebsd_create(ap) 4989168962Spjd struct vop_create_args /* { 4990168962Spjd struct vnode *a_dvp; 4991168962Spjd struct vnode **a_vpp; 4992168962Spjd struct componentname *a_cnp; 4993168962Spjd struct vattr *a_vap; 4994168962Spjd } */ *ap; 4995168962Spjd{ 4996303970Savg zfsvfs_t *zfsvfs; 4997168962Spjd struct componentname *cnp = ap->a_cnp; 4998168962Spjd vattr_t *vap = ap->a_vap; 4999276007Skib int error, mode; 5000168962Spjd 5001168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5002168962Spjd 5003168962Spjd vattr_init_mask(vap); 5004168962Spjd mode = vap->va_mode & ALLPERMS; 5005303970Savg zfsvfs = ap->a_dvp->v_mount->mnt_data; 5006168962Spjd 5007276007Skib error = zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 5008276007Skib ap->a_vpp, cnp->cn_cred, cnp->cn_thread); 5009303970Savg if (zfsvfs->z_use_namecache && 5010303970Savg error == 0 && (cnp->cn_flags & MAKEENTRY) != 0) 5011276007Skib cache_enter(ap->a_dvp, *ap->a_vpp, cnp); 5012276007Skib return (error); 5013168962Spjd} 5014168962Spjd 5015168962Spjdstatic int 5016168962Spjdzfs_freebsd_remove(ap) 5017168962Spjd struct vop_remove_args /* { 5018168962Spjd struct vnode *a_dvp; 5019168962Spjd struct vnode *a_vp; 5020168962Spjd struct componentname *a_cnp; 5021168962Spjd } */ *ap; 5022168962Spjd{ 5023168962Spjd 5024168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 5025168962Spjd 5026303970Savg return (zfs_remove(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr, 5027303970Savg ap->a_cnp->cn_cred)); 5028168962Spjd} 5029168962Spjd 5030168962Spjdstatic int 5031168962Spjdzfs_freebsd_mkdir(ap) 5032168962Spjd struct vop_mkdir_args /* { 5033168962Spjd struct vnode *a_dvp; 5034168962Spjd struct vnode **a_vpp; 5035168962Spjd struct componentname *a_cnp; 5036168962Spjd struct vattr *a_vap; 5037168962Spjd } */ *ap; 5038168962Spjd{ 5039168962Spjd vattr_t *vap = ap->a_vap; 5040168962Spjd 5041168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 5042168962Spjd 5043168962Spjd vattr_init_mask(vap); 5044168962Spjd 5045168962Spjd return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 5046303970Savg ap->a_cnp->cn_cred)); 5047168962Spjd} 5048168962Spjd 5049168962Spjdstatic int 5050168962Spjdzfs_freebsd_rmdir(ap) 5051168962Spjd struct vop_rmdir_args /* { 5052168962Spjd struct vnode *a_dvp; 5053168962Spjd struct vnode *a_vp; 5054168962Spjd struct componentname *a_cnp; 5055168962Spjd } */ *ap; 5056168962Spjd{ 5057168962Spjd struct componentname *cnp = ap->a_cnp; 5058168962Spjd 5059168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5060168962Spjd 5061303970Savg return (zfs_rmdir(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred)); 5062168962Spjd} 5063168962Spjd 5064168962Spjdstatic int 5065168962Spjdzfs_freebsd_readdir(ap) 5066168962Spjd struct vop_readdir_args /* { 5067168962Spjd struct vnode *a_vp; 5068168962Spjd struct uio *a_uio; 5069168962Spjd struct ucred *a_cred; 5070168962Spjd int *a_eofflag; 5071168962Spjd int *a_ncookies; 5072168962Spjd u_long **a_cookies; 5073168962Spjd } */ *ap; 5074168962Spjd{ 5075168962Spjd 5076168962Spjd return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 5077168962Spjd ap->a_ncookies, ap->a_cookies)); 5078168962Spjd} 5079168962Spjd 5080168962Spjdstatic int 5081168962Spjdzfs_freebsd_fsync(ap) 5082168962Spjd struct vop_fsync_args /* { 5083168962Spjd struct vnode *a_vp; 5084168962Spjd int a_waitfor; 5085168962Spjd struct thread *a_td; 5086168962Spjd } */ *ap; 5087168962Spjd{ 5088168962Spjd 5089168962Spjd vop_stdfsync(ap); 5090185029Spjd return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); 5091168962Spjd} 5092168962Spjd 5093168962Spjdstatic int 5094168962Spjdzfs_freebsd_getattr(ap) 5095168962Spjd struct vop_getattr_args /* { 5096168962Spjd struct vnode *a_vp; 5097168962Spjd struct vattr *a_vap; 5098168962Spjd struct ucred *a_cred; 5099168962Spjd } */ *ap; 5100168962Spjd{ 5101185029Spjd vattr_t *vap = ap->a_vap; 5102185029Spjd xvattr_t xvap; 5103185029Spjd u_long fflags = 0; 5104185029Spjd int error; 5105168962Spjd 5106185029Spjd xva_init(&xvap); 5107185029Spjd xvap.xva_vattr = *vap; 5108185029Spjd xvap.xva_vattr.va_mask |= AT_XVATTR; 5109185029Spjd 5110185029Spjd /* Convert chflags into ZFS-type flags. */ 5111185029Spjd /* XXX: what about SF_SETTABLE?. */ 5112185029Spjd XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 5113185029Spjd XVA_SET_REQ(&xvap, XAT_APPENDONLY); 5114185029Spjd XVA_SET_REQ(&xvap, XAT_NOUNLINK); 5115185029Spjd XVA_SET_REQ(&xvap, XAT_NODUMP); 5116254627Sken XVA_SET_REQ(&xvap, XAT_READONLY); 5117254627Sken XVA_SET_REQ(&xvap, XAT_ARCHIVE); 5118254627Sken XVA_SET_REQ(&xvap, XAT_SYSTEM); 5119254627Sken XVA_SET_REQ(&xvap, XAT_HIDDEN); 5120254627Sken XVA_SET_REQ(&xvap, XAT_REPARSE); 5121254627Sken XVA_SET_REQ(&xvap, XAT_OFFLINE); 5122254627Sken XVA_SET_REQ(&xvap, XAT_SPARSE); 5123254627Sken 5124185029Spjd error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 5125185029Spjd if (error != 0) 5126185029Spjd return (error); 5127185029Spjd 5128185029Spjd /* Convert ZFS xattr into chflags. */ 5129185029Spjd#define FLAG_CHECK(fflag, xflag, xfield) do { \ 5130185029Spjd if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 5131185029Spjd fflags |= (fflag); \ 5132185029Spjd} while (0) 5133185029Spjd FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 5134185029Spjd xvap.xva_xoptattrs.xoa_immutable); 5135185029Spjd FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 5136185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 5137185029Spjd FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 5138185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 5139254627Sken FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE, 5140254627Sken xvap.xva_xoptattrs.xoa_archive); 5141185029Spjd FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 5142185029Spjd xvap.xva_xoptattrs.xoa_nodump); 5143254627Sken FLAG_CHECK(UF_READONLY, XAT_READONLY, 5144254627Sken xvap.xva_xoptattrs.xoa_readonly); 5145254627Sken FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM, 5146254627Sken xvap.xva_xoptattrs.xoa_system); 5147254627Sken FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN, 5148254627Sken xvap.xva_xoptattrs.xoa_hidden); 5149254627Sken FLAG_CHECK(UF_REPARSE, XAT_REPARSE, 5150254627Sken xvap.xva_xoptattrs.xoa_reparse); 5151254627Sken FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE, 5152254627Sken xvap.xva_xoptattrs.xoa_offline); 5153254627Sken FLAG_CHECK(UF_SPARSE, XAT_SPARSE, 5154254627Sken xvap.xva_xoptattrs.xoa_sparse); 5155254627Sken 5156185029Spjd#undef FLAG_CHECK 5157185029Spjd *vap = xvap.xva_vattr; 5158185029Spjd vap->va_flags = fflags; 5159185029Spjd return (0); 5160168962Spjd} 5161168962Spjd 5162168962Spjdstatic int 5163168962Spjdzfs_freebsd_setattr(ap) 5164168962Spjd struct vop_setattr_args /* { 5165168962Spjd struct vnode *a_vp; 5166168962Spjd struct vattr *a_vap; 5167168962Spjd struct ucred *a_cred; 5168168962Spjd } */ *ap; 5169168962Spjd{ 5170185172Spjd vnode_t *vp = ap->a_vp; 5171168962Spjd vattr_t *vap = ap->a_vap; 5172185172Spjd cred_t *cred = ap->a_cred; 5173185029Spjd xvattr_t xvap; 5174185029Spjd u_long fflags; 5175185029Spjd uint64_t zflags; 5176168962Spjd 5177168962Spjd vattr_init_mask(vap); 5178170044Spjd vap->va_mask &= ~AT_NOSET; 5179168962Spjd 5180185029Spjd xva_init(&xvap); 5181185029Spjd xvap.xva_vattr = *vap; 5182185029Spjd 5183219089Spjd zflags = VTOZ(vp)->z_pflags; 5184185172Spjd 5185185029Spjd if (vap->va_flags != VNOVAL) { 5186197683Sdelphij zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 5187185172Spjd int error; 5188185172Spjd 5189197683Sdelphij if (zfsvfs->z_use_fuids == B_FALSE) 5190197683Sdelphij return (EOPNOTSUPP); 5191197683Sdelphij 5192185029Spjd fflags = vap->va_flags; 5193254627Sken /* 5194254627Sken * XXX KDM 5195254627Sken * We need to figure out whether it makes sense to allow 5196254627Sken * UF_REPARSE through, since we don't really have other 5197254627Sken * facilities to handle reparse points and zfs_setattr() 5198254627Sken * doesn't currently allow setting that attribute anyway. 5199254627Sken */ 5200254627Sken if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE| 5201254627Sken UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE| 5202254627Sken UF_OFFLINE|UF_SPARSE)) != 0) 5203185029Spjd return (EOPNOTSUPP); 5204185172Spjd /* 5205185172Spjd * Unprivileged processes are not permitted to unset system 5206185172Spjd * flags, or modify flags if any system flags are set. 5207185172Spjd * Privileged non-jail processes may not modify system flags 5208185172Spjd * if securelevel > 0 and any existing system flags are set. 5209185172Spjd * Privileged jail processes behave like privileged non-jail 5210185172Spjd * processes if the security.jail.chflags_allowed sysctl is 5211185172Spjd * is non-zero; otherwise, they behave like unprivileged 5212185172Spjd * processes. 5213185172Spjd */ 5214197861Spjd if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 5215197861Spjd priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) { 5216185172Spjd if (zflags & 5217185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 5218185172Spjd error = securelevel_gt(cred, 0); 5219197861Spjd if (error != 0) 5220185172Spjd return (error); 5221185172Spjd } 5222185172Spjd } else { 5223197861Spjd /* 5224197861Spjd * Callers may only modify the file flags on objects they 5225197861Spjd * have VADMIN rights for. 5226197861Spjd */ 5227197861Spjd if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0) 5228197861Spjd return (error); 5229185172Spjd if (zflags & 5230185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 5231185172Spjd return (EPERM); 5232185172Spjd } 5233185172Spjd if (fflags & 5234185172Spjd (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 5235185172Spjd return (EPERM); 5236185172Spjd } 5237185172Spjd } 5238185029Spjd 5239185029Spjd#define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 5240185029Spjd if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 5241185029Spjd ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 5242185029Spjd XVA_SET_REQ(&xvap, (xflag)); \ 5243185029Spjd (xfield) = ((fflags & (fflag)) != 0); \ 5244185029Spjd } \ 5245185029Spjd} while (0) 5246185029Spjd /* Convert chflags into ZFS-type flags. */ 5247185029Spjd /* XXX: what about SF_SETTABLE?. */ 5248185029Spjd FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 5249185029Spjd xvap.xva_xoptattrs.xoa_immutable); 5250185029Spjd FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 5251185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 5252185029Spjd FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 5253185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 5254254627Sken FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE, 5255254627Sken xvap.xva_xoptattrs.xoa_archive); 5256185029Spjd FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 5257185172Spjd xvap.xva_xoptattrs.xoa_nodump); 5258254627Sken FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY, 5259254627Sken xvap.xva_xoptattrs.xoa_readonly); 5260254627Sken FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM, 5261254627Sken xvap.xva_xoptattrs.xoa_system); 5262254627Sken FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN, 5263254627Sken xvap.xva_xoptattrs.xoa_hidden); 5264254627Sken FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE, 5265352687Smav xvap.xva_xoptattrs.xoa_reparse); 5266254627Sken FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE, 5267254627Sken xvap.xva_xoptattrs.xoa_offline); 5268254627Sken FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE, 5269254627Sken xvap.xva_xoptattrs.xoa_sparse); 5270185029Spjd#undef FLAG_CHANGE 5271185029Spjd } 5272316391Sasomers if (vap->va_birthtime.tv_sec != VNOVAL) { 5273316391Sasomers xvap.xva_vattr.va_mask |= AT_XVATTR; 5274316391Sasomers XVA_SET_REQ(&xvap, XAT_CREATETIME); 5275316391Sasomers } 5276185172Spjd return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL)); 5277168962Spjd} 5278168962Spjd 5279168962Spjdstatic int 5280168962Spjdzfs_freebsd_rename(ap) 5281168962Spjd struct vop_rename_args /* { 5282168962Spjd struct vnode *a_fdvp; 5283168962Spjd struct vnode *a_fvp; 5284168962Spjd struct componentname *a_fcnp; 5285168962Spjd struct vnode *a_tdvp; 5286168962Spjd struct vnode *a_tvp; 5287168962Spjd struct componentname *a_tcnp; 5288168962Spjd } */ *ap; 5289168962Spjd{ 5290168962Spjd vnode_t *fdvp = ap->a_fdvp; 5291168962Spjd vnode_t *fvp = ap->a_fvp; 5292168962Spjd vnode_t *tdvp = ap->a_tdvp; 5293168962Spjd vnode_t *tvp = ap->a_tvp; 5294168962Spjd int error; 5295168962Spjd 5296192237Skmacy ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 5297192237Skmacy ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 5298168962Spjd 5299303970Savg error = zfs_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp, 5300303970Savg ap->a_tcnp, ap->a_fcnp->cn_cred); 5301168962Spjd 5302303970Savg vrele(fdvp); 5303303970Savg vrele(fvp); 5304303970Savg vrele(tdvp); 5305303970Savg if (tvp != NULL) 5306303970Savg vrele(tvp); 5307303970Savg 5308168962Spjd return (error); 5309168962Spjd} 5310168962Spjd 5311168962Spjdstatic int 5312168962Spjdzfs_freebsd_symlink(ap) 5313168962Spjd struct vop_symlink_args /* { 5314168962Spjd struct vnode *a_dvp; 5315168962Spjd struct vnode **a_vpp; 5316168962Spjd struct componentname *a_cnp; 5317168962Spjd struct vattr *a_vap; 5318168962Spjd char *a_target; 5319168962Spjd } */ *ap; 5320168962Spjd{ 5321168962Spjd struct componentname *cnp = ap->a_cnp; 5322168962Spjd vattr_t *vap = ap->a_vap; 5323168962Spjd 5324168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5325168962Spjd 5326168962Spjd vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 5327168962Spjd vattr_init_mask(vap); 5328168962Spjd 5329168962Spjd return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 5330168962Spjd ap->a_target, cnp->cn_cred, cnp->cn_thread)); 5331168962Spjd} 5332168962Spjd 5333168962Spjdstatic int 5334168962Spjdzfs_freebsd_readlink(ap) 5335168962Spjd struct vop_readlink_args /* { 5336168962Spjd struct vnode *a_vp; 5337168962Spjd struct uio *a_uio; 5338168962Spjd struct ucred *a_cred; 5339168962Spjd } */ *ap; 5340168962Spjd{ 5341168962Spjd 5342185029Spjd return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 5343168962Spjd} 5344168962Spjd 5345168962Spjdstatic int 5346168962Spjdzfs_freebsd_link(ap) 5347168962Spjd struct vop_link_args /* { 5348168962Spjd struct vnode *a_tdvp; 5349168962Spjd struct vnode *a_vp; 5350168962Spjd struct componentname *a_cnp; 5351168962Spjd } */ *ap; 5352168962Spjd{ 5353168962Spjd struct componentname *cnp = ap->a_cnp; 5354254982Sdelphij vnode_t *vp = ap->a_vp; 5355254982Sdelphij vnode_t *tdvp = ap->a_tdvp; 5356168962Spjd 5357254982Sdelphij if (tdvp->v_mount != vp->v_mount) 5358254982Sdelphij return (EXDEV); 5359254982Sdelphij 5360168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5361168962Spjd 5362254982Sdelphij return (zfs_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); 5363168962Spjd} 5364168962Spjd 5365168962Spjdstatic int 5366168962Spjdzfs_freebsd_inactive(ap) 5367169170Spjd struct vop_inactive_args /* { 5368169170Spjd struct vnode *a_vp; 5369169170Spjd struct thread *a_td; 5370169170Spjd } */ *ap; 5371168962Spjd{ 5372168962Spjd vnode_t *vp = ap->a_vp; 5373168962Spjd 5374185029Spjd zfs_inactive(vp, ap->a_td->td_ucred, NULL); 5375168962Spjd return (0); 5376168962Spjd} 5377168962Spjd 5378168962Spjdstatic int 5379168962Spjdzfs_freebsd_reclaim(ap) 5380168962Spjd struct vop_reclaim_args /* { 5381168962Spjd struct vnode *a_vp; 5382168962Spjd struct thread *a_td; 5383168962Spjd } */ *ap; 5384168962Spjd{ 5385169170Spjd vnode_t *vp = ap->a_vp; 5386168962Spjd znode_t *zp = VTOZ(vp); 5387197133Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5388168962Spjd 5389169025Spjd ASSERT(zp != NULL); 5390169025Spjd 5391243520Savg /* Destroy the vm object and flush associated pages. */ 5392243520Savg vnode_destroy_vobject(vp); 5393243520Savg 5394168962Spjd /* 5395243520Savg * z_teardown_inactive_lock protects from a race with 5396243520Savg * zfs_znode_dmu_fini in zfsvfs_teardown during 5397243520Savg * force unmount. 5398168962Spjd */ 5399243520Savg rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 5400243520Savg if (zp->z_sa_hdl == NULL) 5401196301Spjd zfs_znode_free(zp); 5402243520Savg else 5403243520Savg zfs_zinactive(zp); 5404243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 5405185029Spjd 5406168962Spjd vp->v_data = NULL; 5407168962Spjd return (0); 5408168962Spjd} 5409168962Spjd 5410168962Spjdstatic int 5411168962Spjdzfs_freebsd_fid(ap) 5412168962Spjd struct vop_fid_args /* { 5413168962Spjd struct vnode *a_vp; 5414168962Spjd struct fid *a_fid; 5415168962Spjd } */ *ap; 5416168962Spjd{ 5417168962Spjd 5418185029Spjd return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 5419168962Spjd} 5420168962Spjd 5421168962Spjdstatic int 5422168962Spjdzfs_freebsd_pathconf(ap) 5423168962Spjd struct vop_pathconf_args /* { 5424168962Spjd struct vnode *a_vp; 5425168962Spjd int a_name; 5426168962Spjd register_t *a_retval; 5427168962Spjd } */ *ap; 5428168962Spjd{ 5429168962Spjd ulong_t val; 5430168962Spjd int error; 5431168962Spjd 5432185029Spjd error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); 5433328298Sjhb if (error == 0) { 5434168962Spjd *ap->a_retval = val; 5435328298Sjhb return (error); 5436328298Sjhb } 5437328298Sjhb if (error != EOPNOTSUPP) 5438328298Sjhb return (error); 5439168962Spjd 5440196949Strasz switch (ap->a_name) { 5441328298Sjhb case _PC_NAME_MAX: 5442328298Sjhb *ap->a_retval = NAME_MAX; 5443328298Sjhb return (0); 5444328298Sjhb case _PC_PIPE_BUF: 5445328298Sjhb if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) { 5446328298Sjhb *ap->a_retval = PIPE_BUF; 5447328298Sjhb return (0); 5448328298Sjhb } 5449328298Sjhb return (EINVAL); 5450196949Strasz default: 5451328298Sjhb return (vop_stdpathconf(ap)); 5452196949Strasz } 5453196949Strasz} 5454196949Strasz 5455185029Spjd/* 5456185029Spjd * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 5457185029Spjd * extended attribute name: 5458185029Spjd * 5459185029Spjd * NAMESPACE PREFIX 5460185029Spjd * system freebsd:system: 5461185029Spjd * user (none, can be used to access ZFS fsattr(5) attributes 5462185029Spjd * created on Solaris) 5463185029Spjd */ 5464185029Spjdstatic int 5465185029Spjdzfs_create_attrname(int attrnamespace, const char *name, char *attrname, 5466185029Spjd size_t size) 5467185029Spjd{ 5468185029Spjd const char *namespace, *prefix, *suffix; 5469185029Spjd 5470185029Spjd /* We don't allow '/' character in attribute name. */ 5471185029Spjd if (strchr(name, '/') != NULL) 5472185029Spjd return (EINVAL); 5473185029Spjd /* We don't allow attribute names that start with "freebsd:" string. */ 5474185029Spjd if (strncmp(name, "freebsd:", 8) == 0) 5475185029Spjd return (EINVAL); 5476185029Spjd 5477185029Spjd bzero(attrname, size); 5478185029Spjd 5479185029Spjd switch (attrnamespace) { 5480185029Spjd case EXTATTR_NAMESPACE_USER: 5481185029Spjd#if 0 5482185029Spjd prefix = "freebsd:"; 5483185029Spjd namespace = EXTATTR_NAMESPACE_USER_STRING; 5484185029Spjd suffix = ":"; 5485185029Spjd#else 5486185029Spjd /* 5487185029Spjd * This is the default namespace by which we can access all 5488185029Spjd * attributes created on Solaris. 5489185029Spjd */ 5490185029Spjd prefix = namespace = suffix = ""; 5491185029Spjd#endif 5492185029Spjd break; 5493185029Spjd case EXTATTR_NAMESPACE_SYSTEM: 5494185029Spjd prefix = "freebsd:"; 5495185029Spjd namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 5496185029Spjd suffix = ":"; 5497185029Spjd break; 5498185029Spjd case EXTATTR_NAMESPACE_EMPTY: 5499185029Spjd default: 5500185029Spjd return (EINVAL); 5501185029Spjd } 5502185029Spjd if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 5503185029Spjd name) >= size) { 5504185029Spjd return (ENAMETOOLONG); 5505185029Spjd } 5506185029Spjd return (0); 5507185029Spjd} 5508185029Spjd 5509185029Spjd/* 5510185029Spjd * Vnode operating to retrieve a named extended attribute. 5511185029Spjd */ 5512185029Spjdstatic int 5513185029Spjdzfs_getextattr(struct vop_getextattr_args *ap) 5514185029Spjd/* 5515185029Spjdvop_getextattr { 5516185029Spjd IN struct vnode *a_vp; 5517185029Spjd IN int a_attrnamespace; 5518185029Spjd IN const char *a_name; 5519185029Spjd INOUT struct uio *a_uio; 5520185029Spjd OUT size_t *a_size; 5521185029Spjd IN struct ucred *a_cred; 5522185029Spjd IN struct thread *a_td; 5523185029Spjd}; 5524185029Spjd*/ 5525185029Spjd{ 5526185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5527185029Spjd struct thread *td = ap->a_td; 5528185029Spjd struct nameidata nd; 5529185029Spjd char attrname[255]; 5530185029Spjd struct vattr va; 5531185029Spjd vnode_t *xvp = NULL, *vp; 5532185029Spjd int error, flags; 5533185029Spjd 5534195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5535195785Strasz ap->a_cred, ap->a_td, VREAD); 5536195785Strasz if (error != 0) 5537195785Strasz return (error); 5538195785Strasz 5539185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5540185029Spjd sizeof(attrname)); 5541185029Spjd if (error != 0) 5542185029Spjd return (error); 5543185029Spjd 5544185029Spjd ZFS_ENTER(zfsvfs); 5545185029Spjd 5546185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5547357605Skevans LOOKUP_XATTR, B_FALSE); 5548185029Spjd if (error != 0) { 5549185029Spjd ZFS_EXIT(zfsvfs); 5550185029Spjd return (error); 5551185029Spjd } 5552185029Spjd 5553185029Spjd flags = FREAD; 5554241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 5555185029Spjd xvp, td); 5556355443Skib error = vn_open_cred(&nd, &flags, VN_OPEN_INVFS, 0, ap->a_cred, NULL); 5557185029Spjd vp = nd.ni_vp; 5558185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 5559185029Spjd if (error != 0) { 5560196303Spjd ZFS_EXIT(zfsvfs); 5561195785Strasz if (error == ENOENT) 5562195785Strasz error = ENOATTR; 5563185029Spjd return (error); 5564185029Spjd } 5565185029Spjd 5566185029Spjd if (ap->a_size != NULL) { 5567185029Spjd error = VOP_GETATTR(vp, &va, ap->a_cred); 5568185029Spjd if (error == 0) 5569185029Spjd *ap->a_size = (size_t)va.va_size; 5570185029Spjd } else if (ap->a_uio != NULL) 5571224605Smm error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5572185029Spjd 5573185029Spjd VOP_UNLOCK(vp, 0); 5574185029Spjd vn_close(vp, flags, ap->a_cred, td); 5575185029Spjd ZFS_EXIT(zfsvfs); 5576185029Spjd 5577185029Spjd return (error); 5578185029Spjd} 5579185029Spjd 5580185029Spjd/* 5581185029Spjd * Vnode operation to remove a named attribute. 5582185029Spjd */ 5583185029Spjdint 5584185029Spjdzfs_deleteextattr(struct vop_deleteextattr_args *ap) 5585185029Spjd/* 5586185029Spjdvop_deleteextattr { 5587185029Spjd IN struct vnode *a_vp; 5588185029Spjd IN int a_attrnamespace; 5589185029Spjd IN const char *a_name; 5590185029Spjd IN struct ucred *a_cred; 5591185029Spjd IN struct thread *a_td; 5592185029Spjd}; 5593185029Spjd*/ 5594185029Spjd{ 5595185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5596185029Spjd struct thread *td = ap->a_td; 5597185029Spjd struct nameidata nd; 5598185029Spjd char attrname[255]; 5599185029Spjd struct vattr va; 5600185029Spjd vnode_t *xvp = NULL, *vp; 5601185029Spjd int error, flags; 5602185029Spjd 5603195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5604195785Strasz ap->a_cred, ap->a_td, VWRITE); 5605195785Strasz if (error != 0) 5606195785Strasz return (error); 5607195785Strasz 5608185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5609185029Spjd sizeof(attrname)); 5610185029Spjd if (error != 0) 5611185029Spjd return (error); 5612185029Spjd 5613185029Spjd ZFS_ENTER(zfsvfs); 5614185029Spjd 5615185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5616357605Skevans LOOKUP_XATTR, B_FALSE); 5617185029Spjd if (error != 0) { 5618185029Spjd ZFS_EXIT(zfsvfs); 5619185029Spjd return (error); 5620185029Spjd } 5621185029Spjd 5622241896Skib NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 5623185029Spjd UIO_SYSSPACE, attrname, xvp, td); 5624185029Spjd error = namei(&nd); 5625185029Spjd vp = nd.ni_vp; 5626185029Spjd if (error != 0) { 5627196303Spjd ZFS_EXIT(zfsvfs); 5628260706Savg NDFREE(&nd, NDF_ONLY_PNBUF); 5629195785Strasz if (error == ENOENT) 5630195785Strasz error = ENOATTR; 5631185029Spjd return (error); 5632185029Spjd } 5633260706Savg 5634185029Spjd error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 5635260706Savg NDFREE(&nd, NDF_ONLY_PNBUF); 5636185029Spjd 5637185029Spjd vput(nd.ni_dvp); 5638185029Spjd if (vp == nd.ni_dvp) 5639185029Spjd vrele(vp); 5640185029Spjd else 5641185029Spjd vput(vp); 5642185029Spjd ZFS_EXIT(zfsvfs); 5643185029Spjd 5644185029Spjd return (error); 5645185029Spjd} 5646185029Spjd 5647185029Spjd/* 5648185029Spjd * Vnode operation to set a named attribute. 5649185029Spjd */ 5650185029Spjdstatic int 5651185029Spjdzfs_setextattr(struct vop_setextattr_args *ap) 5652185029Spjd/* 5653185029Spjdvop_setextattr { 5654185029Spjd IN struct vnode *a_vp; 5655185029Spjd IN int a_attrnamespace; 5656185029Spjd IN const char *a_name; 5657185029Spjd INOUT struct uio *a_uio; 5658185029Spjd IN struct ucred *a_cred; 5659185029Spjd IN struct thread *a_td; 5660185029Spjd}; 5661185029Spjd*/ 5662185029Spjd{ 5663185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5664185029Spjd struct thread *td = ap->a_td; 5665185029Spjd struct nameidata nd; 5666185029Spjd char attrname[255]; 5667185029Spjd struct vattr va; 5668185029Spjd vnode_t *xvp = NULL, *vp; 5669185029Spjd int error, flags; 5670185029Spjd 5671195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5672195785Strasz ap->a_cred, ap->a_td, VWRITE); 5673195785Strasz if (error != 0) 5674195785Strasz return (error); 5675195785Strasz 5676185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5677185029Spjd sizeof(attrname)); 5678185029Spjd if (error != 0) 5679185029Spjd return (error); 5680185029Spjd 5681185029Spjd ZFS_ENTER(zfsvfs); 5682185029Spjd 5683185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5684357605Skevans LOOKUP_XATTR | CREATE_XATTR_DIR, B_FALSE); 5685185029Spjd if (error != 0) { 5686185029Spjd ZFS_EXIT(zfsvfs); 5687185029Spjd return (error); 5688185029Spjd } 5689185029Spjd 5690185029Spjd flags = FFLAGS(O_WRONLY | O_CREAT); 5691241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 5692185029Spjd xvp, td); 5693355443Skib error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred, 5694355443Skib NULL); 5695185029Spjd vp = nd.ni_vp; 5696185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 5697185029Spjd if (error != 0) { 5698185029Spjd ZFS_EXIT(zfsvfs); 5699185029Spjd return (error); 5700185029Spjd } 5701185029Spjd 5702185029Spjd VATTR_NULL(&va); 5703185029Spjd va.va_size = 0; 5704185029Spjd error = VOP_SETATTR(vp, &va, ap->a_cred); 5705185029Spjd if (error == 0) 5706268420Smav VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5707185029Spjd 5708185029Spjd VOP_UNLOCK(vp, 0); 5709185029Spjd vn_close(vp, flags, ap->a_cred, td); 5710185029Spjd ZFS_EXIT(zfsvfs); 5711185029Spjd 5712185029Spjd return (error); 5713185029Spjd} 5714185029Spjd 5715185029Spjd/* 5716185029Spjd * Vnode operation to retrieve extended attributes on a vnode. 5717185029Spjd */ 5718185029Spjdstatic int 5719185029Spjdzfs_listextattr(struct vop_listextattr_args *ap) 5720185029Spjd/* 5721185029Spjdvop_listextattr { 5722185029Spjd IN struct vnode *a_vp; 5723185029Spjd IN int a_attrnamespace; 5724185029Spjd INOUT struct uio *a_uio; 5725185029Spjd OUT size_t *a_size; 5726185029Spjd IN struct ucred *a_cred; 5727185029Spjd IN struct thread *a_td; 5728185029Spjd}; 5729185029Spjd*/ 5730185029Spjd{ 5731185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5732185029Spjd struct thread *td = ap->a_td; 5733185029Spjd struct nameidata nd; 5734185029Spjd char attrprefix[16]; 5735185029Spjd u_char dirbuf[sizeof(struct dirent)]; 5736185029Spjd struct dirent *dp; 5737185029Spjd struct iovec aiov; 5738185029Spjd struct uio auio, *uio = ap->a_uio; 5739185029Spjd size_t *sizep = ap->a_size; 5740185029Spjd size_t plen; 5741185029Spjd vnode_t *xvp = NULL, *vp; 5742185029Spjd int done, error, eof, pos; 5743185029Spjd 5744195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5745195785Strasz ap->a_cred, ap->a_td, VREAD); 5746196303Spjd if (error != 0) 5747195785Strasz return (error); 5748195785Strasz 5749185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 5750185029Spjd sizeof(attrprefix)); 5751185029Spjd if (error != 0) 5752185029Spjd return (error); 5753185029Spjd plen = strlen(attrprefix); 5754185029Spjd 5755185029Spjd ZFS_ENTER(zfsvfs); 5756185029Spjd 5757195822Strasz if (sizep != NULL) 5758195822Strasz *sizep = 0; 5759195822Strasz 5760185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5761357605Skevans LOOKUP_XATTR, B_FALSE); 5762185029Spjd if (error != 0) { 5763196303Spjd ZFS_EXIT(zfsvfs); 5764195785Strasz /* 5765195785Strasz * ENOATTR means that the EA directory does not yet exist, 5766195785Strasz * i.e. there are no extended attributes there. 5767195785Strasz */ 5768195785Strasz if (error == ENOATTR) 5769195785Strasz error = 0; 5770185029Spjd return (error); 5771185029Spjd } 5772185029Spjd 5773241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 5774188588Sjhb UIO_SYSSPACE, ".", xvp, td); 5775185029Spjd error = namei(&nd); 5776185029Spjd vp = nd.ni_vp; 5777185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 5778185029Spjd if (error != 0) { 5779185029Spjd ZFS_EXIT(zfsvfs); 5780185029Spjd return (error); 5781185029Spjd } 5782185029Spjd 5783185029Spjd auio.uio_iov = &aiov; 5784185029Spjd auio.uio_iovcnt = 1; 5785185029Spjd auio.uio_segflg = UIO_SYSSPACE; 5786185029Spjd auio.uio_td = td; 5787185029Spjd auio.uio_rw = UIO_READ; 5788185029Spjd auio.uio_offset = 0; 5789185029Spjd 5790185029Spjd do { 5791185029Spjd u_char nlen; 5792185029Spjd 5793185029Spjd aiov.iov_base = (void *)dirbuf; 5794185029Spjd aiov.iov_len = sizeof(dirbuf); 5795185029Spjd auio.uio_resid = sizeof(dirbuf); 5796185029Spjd error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 5797185029Spjd done = sizeof(dirbuf) - auio.uio_resid; 5798185029Spjd if (error != 0) 5799185029Spjd break; 5800185029Spjd for (pos = 0; pos < done;) { 5801185029Spjd dp = (struct dirent *)(dirbuf + pos); 5802185029Spjd pos += dp->d_reclen; 5803185029Spjd /* 5804185029Spjd * XXX: Temporarily we also accept DT_UNKNOWN, as this 5805185029Spjd * is what we get when attribute was created on Solaris. 5806185029Spjd */ 5807185029Spjd if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 5808185029Spjd continue; 5809185029Spjd if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) 5810185029Spjd continue; 5811185029Spjd else if (strncmp(dp->d_name, attrprefix, plen) != 0) 5812185029Spjd continue; 5813185029Spjd nlen = dp->d_namlen - plen; 5814185029Spjd if (sizep != NULL) 5815185029Spjd *sizep += 1 + nlen; 5816185029Spjd else if (uio != NULL) { 5817185029Spjd /* 5818185029Spjd * Format of extattr name entry is one byte for 5819185029Spjd * length and the rest for name. 5820185029Spjd */ 5821185029Spjd error = uiomove(&nlen, 1, uio->uio_rw, uio); 5822185029Spjd if (error == 0) { 5823185029Spjd error = uiomove(dp->d_name + plen, nlen, 5824185029Spjd uio->uio_rw, uio); 5825185029Spjd } 5826185029Spjd if (error != 0) 5827185029Spjd break; 5828185029Spjd } 5829185029Spjd } 5830185029Spjd } while (!eof && error == 0); 5831185029Spjd 5832185029Spjd vput(vp); 5833185029Spjd ZFS_EXIT(zfsvfs); 5834185029Spjd 5835185029Spjd return (error); 5836185029Spjd} 5837185029Spjd 5838192800Straszint 5839192800Straszzfs_freebsd_getacl(ap) 5840192800Strasz struct vop_getacl_args /* { 5841192800Strasz struct vnode *vp; 5842192800Strasz acl_type_t type; 5843192800Strasz struct acl *aclp; 5844192800Strasz struct ucred *cred; 5845192800Strasz struct thread *td; 5846192800Strasz } */ *ap; 5847192800Strasz{ 5848192800Strasz int error; 5849192800Strasz vsecattr_t vsecattr; 5850192800Strasz 5851192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 5852197435Strasz return (EINVAL); 5853192800Strasz 5854192800Strasz vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 5855192800Strasz if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL)) 5856192800Strasz return (error); 5857192800Strasz 5858192800Strasz error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt); 5859196303Spjd if (vsecattr.vsa_aclentp != NULL) 5860196303Spjd kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 5861192800Strasz 5862196303Spjd return (error); 5863192800Strasz} 5864192800Strasz 5865192800Straszint 5866192800Straszzfs_freebsd_setacl(ap) 5867192800Strasz struct vop_setacl_args /* { 5868192800Strasz struct vnode *vp; 5869192800Strasz acl_type_t type; 5870192800Strasz struct acl *aclp; 5871192800Strasz struct ucred *cred; 5872192800Strasz struct thread *td; 5873192800Strasz } */ *ap; 5874192800Strasz{ 5875192800Strasz int error; 5876192800Strasz vsecattr_t vsecattr; 5877192800Strasz int aclbsize; /* size of acl list in bytes */ 5878192800Strasz aclent_t *aaclp; 5879192800Strasz 5880192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 5881197435Strasz return (EINVAL); 5882192800Strasz 5883314710Smm if (ap->a_aclp == NULL) 5884314710Smm return (EINVAL); 5885314710Smm 5886192800Strasz if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 5887192800Strasz return (EINVAL); 5888192800Strasz 5889192800Strasz /* 5890196949Strasz * With NFSv4 ACLs, chmod(2) may need to add additional entries, 5891192800Strasz * splitting every entry into two and appending "canonical six" 5892192800Strasz * entries at the end. Don't allow for setting an ACL that would 5893192800Strasz * cause chmod(2) to run out of ACL entries. 5894192800Strasz */ 5895192800Strasz if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 5896192800Strasz return (ENOSPC); 5897192800Strasz 5898208030Strasz error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 5899208030Strasz if (error != 0) 5900208030Strasz return (error); 5901208030Strasz 5902192800Strasz vsecattr.vsa_mask = VSA_ACE; 5903192800Strasz aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t); 5904192800Strasz vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 5905192800Strasz aaclp = vsecattr.vsa_aclentp; 5906192800Strasz vsecattr.vsa_aclentsz = aclbsize; 5907192800Strasz 5908192800Strasz aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 5909192800Strasz error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL); 5910192800Strasz kmem_free(aaclp, aclbsize); 5911192800Strasz 5912192800Strasz return (error); 5913192800Strasz} 5914192800Strasz 5915192800Straszint 5916192800Straszzfs_freebsd_aclcheck(ap) 5917192800Strasz struct vop_aclcheck_args /* { 5918192800Strasz struct vnode *vp; 5919192800Strasz acl_type_t type; 5920192800Strasz struct acl *aclp; 5921192800Strasz struct ucred *cred; 5922192800Strasz struct thread *td; 5923192800Strasz } */ *ap; 5924192800Strasz{ 5925192800Strasz 5926192800Strasz return (EOPNOTSUPP); 5927192800Strasz} 5928192800Strasz 5929299906Savgstatic int 5930299906Savgzfs_vptocnp(struct vop_vptocnp_args *ap) 5931299906Savg{ 5932299906Savg vnode_t *covered_vp; 5933299906Savg vnode_t *vp = ap->a_vp;; 5934299906Savg zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 5935299906Savg znode_t *zp = VTOZ(vp); 5936299906Savg int ltype; 5937299906Savg int error; 5938299906Savg 5939301870Savg ZFS_ENTER(zfsvfs); 5940301870Savg ZFS_VERIFY_ZP(zp); 5941301870Savg 5942299906Savg /* 5943299906Savg * If we are a snapshot mounted under .zfs, run the operation 5944299906Savg * on the covered vnode. 5945299906Savg */ 5946324158Savg if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) { 5947307995Savg char name[MAXNAMLEN + 1]; 5948307995Savg znode_t *dzp; 5949307995Savg size_t len; 5950307995Savg 5951307995Savg error = zfs_znode_parent_and_name(zp, &dzp, name); 5952307995Savg if (error == 0) { 5953307995Savg len = strlen(name); 5954314030Savg if (*ap->a_buflen < len) 5955314030Savg error = SET_ERROR(ENOMEM); 5956314030Savg } 5957314030Savg if (error == 0) { 5958307995Savg *ap->a_buflen -= len; 5959307995Savg bcopy(name, ap->a_buf + *ap->a_buflen, len); 5960307995Savg *ap->a_vpp = ZTOV(dzp); 5961307995Savg } 5962301870Savg ZFS_EXIT(zfsvfs); 5963307995Savg return (error); 5964301870Savg } 5965301870Savg ZFS_EXIT(zfsvfs); 5966299906Savg 5967299906Savg covered_vp = vp->v_mount->mnt_vnodecovered; 5968299906Savg vhold(covered_vp); 5969299906Savg ltype = VOP_ISLOCKED(vp); 5970299906Savg VOP_UNLOCK(vp, 0); 5971315842Savg error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread); 5972299906Savg if (error == 0) { 5973299906Savg error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred, 5974299906Savg ap->a_buf, ap->a_buflen); 5975299906Savg vput(covered_vp); 5976299906Savg } 5977299906Savg vn_lock(vp, ltype | LK_RETRY); 5978299906Savg if ((vp->v_iflag & VI_DOOMED) != 0) 5979299906Savg error = SET_ERROR(ENOENT); 5980299906Savg return (error); 5981299906Savg} 5982299906Savg 5983303970Savg#ifdef DIAGNOSTIC 5984303970Savgstatic int 5985303970Savgzfs_lock(ap) 5986303970Savg struct vop_lock1_args /* { 5987303970Savg struct vnode *a_vp; 5988303970Savg int a_flags; 5989303970Savg char *file; 5990303970Savg int line; 5991303970Savg } */ *ap; 5992303970Savg{ 5993310066Savg vnode_t *vp; 5994303970Savg znode_t *zp; 5995303970Savg int err; 5996303970Savg 5997303970Savg err = vop_stdlock(ap); 5998310066Savg if (err == 0 && (ap->a_flags & LK_NOWAIT) == 0) { 5999310066Savg vp = ap->a_vp; 6000310066Savg zp = vp->v_data; 6001310066Savg if (vp->v_mount != NULL && (vp->v_iflag & VI_DOOMED) == 0 && 6002310066Savg zp != NULL && (zp->z_pflags & ZFS_XATTR) == 0) 6003310066Savg VERIFY(!RRM_LOCK_HELD(&zp->z_zfsvfs->z_teardown_lock)); 6004303970Savg } 6005303970Savg return (err); 6006303970Savg} 6007303970Savg#endif 6008303970Savg 6009168404Spjdstruct vop_vector zfs_vnodeops; 6010168404Spjdstruct vop_vector zfs_fifoops; 6011209962Smmstruct vop_vector zfs_shareops; 6012168404Spjd 6013168404Spjdstruct vop_vector zfs_vnodeops = { 6014185029Spjd .vop_default = &default_vnodeops, 6015185029Spjd .vop_inactive = zfs_freebsd_inactive, 6016185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 6017185029Spjd .vop_access = zfs_freebsd_access, 6018303970Savg .vop_lookup = zfs_cache_lookup, 6019357605Skevans .vop_cachedlookup = zfs_freebsd_cachedlookup, 6020185029Spjd .vop_getattr = zfs_freebsd_getattr, 6021185029Spjd .vop_setattr = zfs_freebsd_setattr, 6022185029Spjd .vop_create = zfs_freebsd_create, 6023185029Spjd .vop_mknod = zfs_freebsd_create, 6024185029Spjd .vop_mkdir = zfs_freebsd_mkdir, 6025185029Spjd .vop_readdir = zfs_freebsd_readdir, 6026185029Spjd .vop_fsync = zfs_freebsd_fsync, 6027185029Spjd .vop_open = zfs_freebsd_open, 6028185029Spjd .vop_close = zfs_freebsd_close, 6029185029Spjd .vop_rmdir = zfs_freebsd_rmdir, 6030185029Spjd .vop_ioctl = zfs_freebsd_ioctl, 6031185029Spjd .vop_link = zfs_freebsd_link, 6032185029Spjd .vop_symlink = zfs_freebsd_symlink, 6033185029Spjd .vop_readlink = zfs_freebsd_readlink, 6034185029Spjd .vop_read = zfs_freebsd_read, 6035185029Spjd .vop_write = zfs_freebsd_write, 6036185029Spjd .vop_remove = zfs_freebsd_remove, 6037185029Spjd .vop_rename = zfs_freebsd_rename, 6038185029Spjd .vop_pathconf = zfs_freebsd_pathconf, 6039243518Savg .vop_bmap = zfs_freebsd_bmap, 6040185029Spjd .vop_fid = zfs_freebsd_fid, 6041185029Spjd .vop_getextattr = zfs_getextattr, 6042185029Spjd .vop_deleteextattr = zfs_deleteextattr, 6043185029Spjd .vop_setextattr = zfs_setextattr, 6044185029Spjd .vop_listextattr = zfs_listextattr, 6045192800Strasz .vop_getacl = zfs_freebsd_getacl, 6046192800Strasz .vop_setacl = zfs_freebsd_setacl, 6047192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 6048213937Savg .vop_getpages = zfs_freebsd_getpages, 6049258746Savg .vop_putpages = zfs_freebsd_putpages, 6050299906Savg .vop_vptocnp = zfs_vptocnp, 6051303970Savg#ifdef DIAGNOSTIC 6052303970Savg .vop_lock1 = zfs_lock, 6053303970Savg#endif 6054168404Spjd}; 6055168404Spjd 6056169170Spjdstruct vop_vector zfs_fifoops = { 6057185029Spjd .vop_default = &fifo_specops, 6058200162Skib .vop_fsync = zfs_freebsd_fsync, 6059185029Spjd .vop_access = zfs_freebsd_access, 6060185029Spjd .vop_getattr = zfs_freebsd_getattr, 6061185029Spjd .vop_inactive = zfs_freebsd_inactive, 6062185029Spjd .vop_read = VOP_PANIC, 6063185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 6064185029Spjd .vop_setattr = zfs_freebsd_setattr, 6065185029Spjd .vop_write = VOP_PANIC, 6066328298Sjhb .vop_pathconf = zfs_freebsd_pathconf, 6067185029Spjd .vop_fid = zfs_freebsd_fid, 6068192800Strasz .vop_getacl = zfs_freebsd_getacl, 6069192800Strasz .vop_setacl = zfs_freebsd_setacl, 6070192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 6071168404Spjd}; 6072209962Smm 6073209962Smm/* 6074209962Smm * special share hidden files vnode operations template 6075209962Smm */ 6076209962Smmstruct vop_vector zfs_shareops = { 6077209962Smm .vop_default = &default_vnodeops, 6078209962Smm .vop_access = zfs_freebsd_access, 6079209962Smm .vop_inactive = zfs_freebsd_inactive, 6080209962Smm .vop_reclaim = zfs_freebsd_reclaim, 6081209962Smm .vop_fid = zfs_freebsd_fid, 6082209962Smm .vop_pathconf = zfs_freebsd_pathconf, 6083209962Smm}; 6084