zfs_vnops.c revision 352687
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21321545Smav 22168404Spjd/* 23212694Smm * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24289562Smav * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 25296519Smav * Copyright (c) 2014 Integros [integros.com] 26321545Smav * Copyright 2017 Nexenta Systems, Inc. 27168404Spjd */ 28168404Spjd 29169195Spjd/* Portions Copyright 2007 Jeremy Teo */ 30219089Spjd/* Portions Copyright 2010 Robert Milkowski */ 31169195Spjd 32168404Spjd#include <sys/types.h> 33168404Spjd#include <sys/param.h> 34168404Spjd#include <sys/time.h> 35168404Spjd#include <sys/systm.h> 36168404Spjd#include <sys/sysmacros.h> 37168404Spjd#include <sys/resource.h> 38168404Spjd#include <sys/vfs.h> 39248084Sattilio#include <sys/vm.h> 40168404Spjd#include <sys/vnode.h> 41168404Spjd#include <sys/file.h> 42168404Spjd#include <sys/stat.h> 43168404Spjd#include <sys/kmem.h> 44168404Spjd#include <sys/taskq.h> 45168404Spjd#include <sys/uio.h> 46168404Spjd#include <sys/atomic.h> 47168404Spjd#include <sys/namei.h> 48168404Spjd#include <sys/mman.h> 49168404Spjd#include <sys/cmn_err.h> 50168404Spjd#include <sys/errno.h> 51168404Spjd#include <sys/unistd.h> 52168404Spjd#include <sys/zfs_dir.h> 53168404Spjd#include <sys/zfs_ioctl.h> 54168404Spjd#include <sys/fs/zfs.h> 55168404Spjd#include <sys/dmu.h> 56219089Spjd#include <sys/dmu_objset.h> 57168404Spjd#include <sys/spa.h> 58168404Spjd#include <sys/txg.h> 59168404Spjd#include <sys/dbuf.h> 60168404Spjd#include <sys/zap.h> 61219089Spjd#include <sys/sa.h> 62168404Spjd#include <sys/dirent.h> 63168962Spjd#include <sys/policy.h> 64168962Spjd#include <sys/sunddi.h> 65168404Spjd#include <sys/filio.h> 66209962Smm#include <sys/sid.h> 67168404Spjd#include <sys/zfs_ctldir.h> 68185029Spjd#include <sys/zfs_fuid.h> 69219089Spjd#include <sys/zfs_sa.h> 70168404Spjd#include <sys/zfs_rlock.h> 71185029Spjd#include <sys/extdirent.h> 72185029Spjd#include <sys/kidmap.h> 73168404Spjd#include <sys/bio.h> 74168404Spjd#include <sys/buf.h> 75168404Spjd#include <sys/sched.h> 76192800Strasz#include <sys/acl.h> 77331017Skevans#include <sys/vmmeter.h> 78239077Smarius#include <vm/vm_param.h> 79325132Savg#include <sys/zil.h> 80168404Spjd 81168404Spjd/* 82168404Spjd * Programming rules. 83168404Spjd * 84168404Spjd * Each vnode op performs some logical unit of work. To do this, the ZPL must 85168404Spjd * properly lock its in-core state, create a DMU transaction, do the work, 86168404Spjd * record this work in the intent log (ZIL), commit the DMU transaction, 87185029Spjd * and wait for the intent log to commit if it is a synchronous operation. 88185029Spjd * Moreover, the vnode ops must work in both normal and log replay context. 89168404Spjd * The ordering of events is important to avoid deadlocks and references 90168404Spjd * to freed memory. The example below illustrates the following Big Rules: 91168404Spjd * 92251631Sdelphij * (1) A check must be made in each zfs thread for a mounted file system. 93168404Spjd * This is done avoiding races using ZFS_ENTER(zfsvfs). 94251631Sdelphij * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 95251631Sdelphij * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 96251631Sdelphij * can return EIO from the calling function. 97168404Spjd * 98168404Spjd * (2) VN_RELE() should always be the last thing except for zil_commit() 99168404Spjd * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 100168404Spjd * First, if it's the last reference, the vnode/znode 101168404Spjd * can be freed, so the zp may point to freed memory. Second, the last 102168404Spjd * reference will call zfs_zinactive(), which may induce a lot of work -- 103168404Spjd * pushing cached pages (which acquires range locks) and syncing out 104168404Spjd * cached atime changes. Third, zfs_zinactive() may require a new tx, 105168404Spjd * which could deadlock the system if you were already holding one. 106191900Skmacy * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 107168404Spjd * 108168404Spjd * (3) All range locks must be grabbed before calling dmu_tx_assign(), 109168404Spjd * as they can span dmu_tx_assign() calls. 110168404Spjd * 111258720Savg * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 112258720Savg * dmu_tx_assign(). This is critical because we don't want to block 113258720Savg * while holding locks. 114168404Spjd * 115258720Savg * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 116258720Savg * reduces lock contention and CPU usage when we must wait (note that if 117258720Savg * throughput is constrained by the storage, nearly every transaction 118258720Savg * must wait). 119258720Savg * 120258720Savg * Note, in particular, that if a lock is sometimes acquired before 121258720Savg * the tx assigns, and sometimes after (e.g. z_lock), then failing 122258720Savg * to use a non-blocking assign can deadlock the system. The scenario: 123258720Savg * 124168404Spjd * Thread A has grabbed a lock before calling dmu_tx_assign(). 125168404Spjd * Thread B is in an already-assigned tx, and blocks for this lock. 126168404Spjd * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 127168404Spjd * forever, because the previous txg can't quiesce until B's tx commits. 128168404Spjd * 129168404Spjd * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 130258632Savg * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 131330986Savg * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT, 132258632Savg * to indicate that this operation has already called dmu_tx_wait(). 133258632Savg * This will ensure that we don't retry forever, waiting a short bit 134258632Savg * each time. 135168404Spjd * 136168404Spjd * (5) If the operation succeeded, generate the intent log entry for it 137168404Spjd * before dropping locks. This ensures that the ordering of events 138168404Spjd * in the intent log matches the order in which they actually occurred. 139251631Sdelphij * During ZIL replay the zfs_log_* functions will update the sequence 140209962Smm * number to indicate the zil transaction has replayed. 141168404Spjd * 142168404Spjd * (6) At the end of each vnode op, the DMU tx must always commit, 143168404Spjd * regardless of whether there were any errors. 144168404Spjd * 145219089Spjd * (7) After dropping all locks, invoke zil_commit(zilog, foid) 146168404Spjd * to ensure that synchronous semantics are provided when necessary. 147168404Spjd * 148168404Spjd * In general, this is how things should be ordered in each vnode op: 149168404Spjd * 150168404Spjd * ZFS_ENTER(zfsvfs); // exit if unmounted 151168404Spjd * top: 152303970Savg * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD()) 153168404Spjd * rw_enter(...); // grab any other locks you need 154168404Spjd * tx = dmu_tx_create(...); // get DMU tx 155168404Spjd * dmu_tx_hold_*(); // hold each object you might modify 156330986Savg * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); 157168404Spjd * if (error) { 158168404Spjd * rw_exit(...); // drop locks 159168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 160168404Spjd * VN_RELE(...); // release held vnodes 161209962Smm * if (error == ERESTART) { 162258632Savg * waited = B_TRUE; 163168404Spjd * dmu_tx_wait(tx); 164168404Spjd * dmu_tx_abort(tx); 165168404Spjd * goto top; 166168404Spjd * } 167168404Spjd * dmu_tx_abort(tx); // abort DMU tx 168168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 169168404Spjd * return (error); // really out of space 170168404Spjd * } 171168404Spjd * error = do_real_work(); // do whatever this VOP does 172168404Spjd * if (error == 0) 173168404Spjd * zfs_log_*(...); // on success, make ZIL entry 174168404Spjd * dmu_tx_commit(tx); // commit DMU tx -- error or not 175168404Spjd * rw_exit(...); // drop locks 176168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 177168404Spjd * VN_RELE(...); // release held vnodes 178219089Spjd * zil_commit(zilog, foid); // synchronous when necessary 179168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 180168404Spjd * return (error); // done, report error 181168404Spjd */ 182185029Spjd 183168404Spjd/* ARGSUSED */ 184168404Spjdstatic int 185185029Spjdzfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 186168404Spjd{ 187168962Spjd znode_t *zp = VTOZ(*vpp); 188209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 189168404Spjd 190209962Smm ZFS_ENTER(zfsvfs); 191209962Smm ZFS_VERIFY_ZP(zp); 192209962Smm 193219089Spjd if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 194185029Spjd ((flag & FAPPEND) == 0)) { 195209962Smm ZFS_EXIT(zfsvfs); 196249195Smm return (SET_ERROR(EPERM)); 197185029Spjd } 198185029Spjd 199185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 200185029Spjd ZTOV(zp)->v_type == VREG && 201219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 202209962Smm if (fs_vscan(*vpp, cr, 0) != 0) { 203209962Smm ZFS_EXIT(zfsvfs); 204249195Smm return (SET_ERROR(EACCES)); 205209962Smm } 206209962Smm } 207185029Spjd 208168404Spjd /* Keep a count of the synchronous opens in the znode */ 209168962Spjd if (flag & (FSYNC | FDSYNC)) 210168404Spjd atomic_inc_32(&zp->z_sync_cnt); 211185029Spjd 212209962Smm ZFS_EXIT(zfsvfs); 213168404Spjd return (0); 214168404Spjd} 215168404Spjd 216168404Spjd/* ARGSUSED */ 217168404Spjdstatic int 218185029Spjdzfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 219185029Spjd caller_context_t *ct) 220168404Spjd{ 221168962Spjd znode_t *zp = VTOZ(vp); 222209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 223168404Spjd 224210470Smm /* 225210470Smm * Clean up any locks held by this process on the vp. 226210470Smm */ 227210470Smm cleanlocks(vp, ddi_get_pid(), 0); 228210470Smm cleanshares(vp, ddi_get_pid()); 229210470Smm 230209962Smm ZFS_ENTER(zfsvfs); 231209962Smm ZFS_VERIFY_ZP(zp); 232209962Smm 233168404Spjd /* Decrement the synchronous opens in the znode */ 234185029Spjd if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 235168404Spjd atomic_dec_32(&zp->z_sync_cnt); 236168404Spjd 237185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 238185029Spjd ZTOV(zp)->v_type == VREG && 239219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 240185029Spjd VERIFY(fs_vscan(vp, cr, 1) == 0); 241185029Spjd 242209962Smm ZFS_EXIT(zfsvfs); 243168404Spjd return (0); 244168404Spjd} 245168404Spjd 246168404Spjd/* 247168404Spjd * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 248168404Spjd * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 249168404Spjd */ 250168404Spjdstatic int 251168978Spjdzfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 252168404Spjd{ 253168404Spjd znode_t *zp = VTOZ(vp); 254168404Spjd uint64_t noff = (uint64_t)*off; /* new offset */ 255168404Spjd uint64_t file_sz; 256168404Spjd int error; 257168404Spjd boolean_t hole; 258168404Spjd 259219089Spjd file_sz = zp->z_size; 260168404Spjd if (noff >= file_sz) { 261249195Smm return (SET_ERROR(ENXIO)); 262168404Spjd } 263168404Spjd 264168962Spjd if (cmd == _FIO_SEEK_HOLE) 265168404Spjd hole = B_TRUE; 266168404Spjd else 267168404Spjd hole = B_FALSE; 268168404Spjd 269168404Spjd error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 270168404Spjd 271271536Sdelphij if (error == ESRCH) 272249195Smm return (SET_ERROR(ENXIO)); 273271536Sdelphij 274271536Sdelphij /* 275271536Sdelphij * We could find a hole that begins after the logical end-of-file, 276271536Sdelphij * because dmu_offset_next() only works on whole blocks. If the 277271536Sdelphij * EOF falls mid-block, then indicate that the "virtual hole" 278271536Sdelphij * at the end of the file begins at the logical EOF, rather than 279271536Sdelphij * at the end of the last block. 280271536Sdelphij */ 281271536Sdelphij if (noff > file_sz) { 282271536Sdelphij ASSERT(hole); 283271536Sdelphij noff = file_sz; 284168404Spjd } 285168404Spjd 286168404Spjd if (noff < *off) 287168404Spjd return (error); 288168404Spjd *off = noff; 289168404Spjd return (error); 290168404Spjd} 291168404Spjd 292168404Spjd/* ARGSUSED */ 293168404Spjdstatic int 294168978Spjdzfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 295185029Spjd int *rvalp, caller_context_t *ct) 296168404Spjd{ 297168962Spjd offset_t off; 298287103Savg offset_t ndata; 299287103Savg dmu_object_info_t doi; 300168962Spjd int error; 301168962Spjd zfsvfs_t *zfsvfs; 302185029Spjd znode_t *zp; 303168404Spjd 304168404Spjd switch (com) { 305185029Spjd case _FIOFFS: 306287103Savg { 307168962Spjd return (0); 308168404Spjd 309168962Spjd /* 310168962Spjd * The following two ioctls are used by bfu. Faking out, 311168962Spjd * necessary to avoid bfu errors. 312168962Spjd */ 313287103Savg } 314185029Spjd case _FIOGDIO: 315185029Spjd case _FIOSDIO: 316287103Savg { 317168962Spjd return (0); 318287103Savg } 319168962Spjd 320185029Spjd case _FIO_SEEK_DATA: 321185029Spjd case _FIO_SEEK_HOLE: 322287103Savg { 323277300Ssmh#ifdef illumos 324168962Spjd if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 325249195Smm return (SET_ERROR(EFAULT)); 326233918Savg#else 327233918Savg off = *(offset_t *)data; 328233918Savg#endif 329185029Spjd zp = VTOZ(vp); 330185029Spjd zfsvfs = zp->z_zfsvfs; 331168404Spjd ZFS_ENTER(zfsvfs); 332185029Spjd ZFS_VERIFY_ZP(zp); 333168404Spjd 334168404Spjd /* offset parameter is in/out */ 335168404Spjd error = zfs_holey(vp, com, &off); 336168404Spjd ZFS_EXIT(zfsvfs); 337168404Spjd if (error) 338168404Spjd return (error); 339277300Ssmh#ifdef illumos 340168962Spjd if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 341249195Smm return (SET_ERROR(EFAULT)); 342233918Savg#else 343233918Savg *(offset_t *)data = off; 344233918Savg#endif 345168404Spjd return (0); 346168404Spjd } 347287103Savg#ifdef illumos 348287103Savg case _FIO_COUNT_FILLED: 349287103Savg { 350287103Savg /* 351287103Savg * _FIO_COUNT_FILLED adds a new ioctl command which 352287103Savg * exposes the number of filled blocks in a 353287103Savg * ZFS object. 354287103Savg */ 355287103Savg zp = VTOZ(vp); 356287103Savg zfsvfs = zp->z_zfsvfs; 357287103Savg ZFS_ENTER(zfsvfs); 358287103Savg ZFS_VERIFY_ZP(zp); 359287103Savg 360287103Savg /* 361287103Savg * Wait for all dirty blocks for this object 362287103Savg * to get synced out to disk, and the DMU info 363287103Savg * updated. 364287103Savg */ 365287103Savg error = dmu_object_wait_synced(zfsvfs->z_os, zp->z_id); 366287103Savg if (error) { 367287103Savg ZFS_EXIT(zfsvfs); 368287103Savg return (error); 369287103Savg } 370287103Savg 371287103Savg /* 372287103Savg * Retrieve fill count from DMU object. 373287103Savg */ 374287103Savg error = dmu_object_info(zfsvfs->z_os, zp->z_id, &doi); 375287103Savg if (error) { 376287103Savg ZFS_EXIT(zfsvfs); 377287103Savg return (error); 378287103Savg } 379287103Savg 380287103Savg ndata = doi.doi_fill_count; 381287103Savg 382287103Savg ZFS_EXIT(zfsvfs); 383287103Savg if (ddi_copyout(&ndata, (void *)data, sizeof (ndata), flag)) 384287103Savg return (SET_ERROR(EFAULT)); 385287103Savg return (0); 386287103Savg } 387287103Savg#endif 388287103Savg } 389249195Smm return (SET_ERROR(ENOTTY)); 390168404Spjd} 391168404Spjd 392209962Smmstatic vm_page_t 393253953Sattiliopage_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 394209962Smm{ 395209962Smm vm_object_t obj; 396209962Smm vm_page_t pp; 397258353Savg int64_t end; 398209962Smm 399258353Savg /* 400258353Savg * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE 401258353Savg * aligned boundaries, if the range is not aligned. As a result a 402258353Savg * DEV_BSIZE subrange with partially dirty data may get marked as clean. 403258353Savg * It may happen that all DEV_BSIZE subranges are marked clean and thus 404258353Savg * the whole page would be considred clean despite have some dirty data. 405258353Savg * For this reason we should shrink the range to DEV_BSIZE aligned 406258353Savg * boundaries before calling vm_page_clear_dirty. 407258353Savg */ 408258353Savg end = rounddown2(off + nbytes, DEV_BSIZE); 409258353Savg off = roundup2(off, DEV_BSIZE); 410258353Savg nbytes = end - off; 411258353Savg 412209962Smm obj = vp->v_object; 413248084Sattilio zfs_vmobject_assert_wlocked(obj); 414209962Smm 415209962Smm for (;;) { 416209962Smm if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 417246293Savg pp->valid) { 418254138Sattilio if (vm_page_xbusied(pp)) { 419212652Savg /* 420212652Savg * Reference the page before unlocking and 421212652Savg * sleeping so that the page daemon is less 422212652Savg * likely to reclaim it. 423212652Savg */ 424225418Skib vm_page_reference(pp); 425254138Sattilio vm_page_lock(pp); 426254138Sattilio zfs_vmobject_wunlock(obj); 427307671Skib vm_page_busy_sleep(pp, "zfsmwb", true); 428254138Sattilio zfs_vmobject_wlock(obj); 429209962Smm continue; 430212652Savg } 431254138Sattilio vm_page_sbusy(pp); 432319091Savg } else if (pp != NULL) { 433319091Savg ASSERT(!pp->valid); 434252337Sgavin pp = NULL; 435209962Smm } 436246293Savg 437246293Savg if (pp != NULL) { 438246293Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 439253953Sattilio vm_object_pip_add(obj, 1); 440246293Savg pmap_remove_write(pp); 441258353Savg if (nbytes != 0) 442258353Savg vm_page_clear_dirty(pp, off, nbytes); 443246293Savg } 444209962Smm break; 445209962Smm } 446209962Smm return (pp); 447209962Smm} 448209962Smm 449209962Smmstatic void 450253953Sattiliopage_unbusy(vm_page_t pp) 451209962Smm{ 452209962Smm 453254138Sattilio vm_page_sunbusy(pp); 454253953Sattilio vm_object_pip_subtract(pp->object, 1); 455209962Smm} 456209962Smm 457253953Sattiliostatic vm_page_t 458253953Sattiliopage_hold(vnode_t *vp, int64_t start) 459253953Sattilio{ 460253953Sattilio vm_object_t obj; 461253953Sattilio vm_page_t pp; 462253953Sattilio 463253953Sattilio obj = vp->v_object; 464253953Sattilio zfs_vmobject_assert_wlocked(obj); 465253953Sattilio 466253953Sattilio for (;;) { 467253953Sattilio if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 468253953Sattilio pp->valid) { 469254138Sattilio if (vm_page_xbusied(pp)) { 470253953Sattilio /* 471253953Sattilio * Reference the page before unlocking and 472253953Sattilio * sleeping so that the page daemon is less 473253953Sattilio * likely to reclaim it. 474253953Sattilio */ 475253953Sattilio vm_page_reference(pp); 476254138Sattilio vm_page_lock(pp); 477254138Sattilio zfs_vmobject_wunlock(obj); 478307671Skib vm_page_busy_sleep(pp, "zfsmwb", true); 479254138Sattilio zfs_vmobject_wlock(obj); 480253953Sattilio continue; 481253953Sattilio } 482253953Sattilio 483253953Sattilio ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 484253953Sattilio vm_page_lock(pp); 485253953Sattilio vm_page_hold(pp); 486253953Sattilio vm_page_unlock(pp); 487253953Sattilio 488253953Sattilio } else 489253953Sattilio pp = NULL; 490253953Sattilio break; 491253953Sattilio } 492253953Sattilio return (pp); 493253953Sattilio} 494253953Sattilio 495253953Sattiliostatic void 496253953Sattiliopage_unhold(vm_page_t pp) 497253953Sattilio{ 498253953Sattilio 499253953Sattilio vm_page_lock(pp); 500253953Sattilio vm_page_unhold(pp); 501253953Sattilio vm_page_unlock(pp); 502253953Sattilio} 503253953Sattilio 504168404Spjd/* 505168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 506168404Spjd * between the DMU cache and the memory mapped pages. What this means: 507168404Spjd * 508168404Spjd * On Write: If we find a memory mapped page, we write to *both* 509168404Spjd * the page and the dmu buffer. 510168404Spjd */ 511209962Smmstatic void 512209962Smmupdate_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 513209962Smm int segflg, dmu_tx_t *tx) 514168404Spjd{ 515168404Spjd vm_object_t obj; 516168404Spjd struct sf_buf *sf; 517246293Savg caddr_t va; 518212655Savg int off; 519168404Spjd 520258746Savg ASSERT(segflg != UIO_NOCOPY); 521168404Spjd ASSERT(vp->v_mount != NULL); 522168404Spjd obj = vp->v_object; 523168404Spjd ASSERT(obj != NULL); 524168404Spjd 525168404Spjd off = start & PAGEOFFSET; 526248084Sattilio zfs_vmobject_wlock(obj); 527168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 528209962Smm vm_page_t pp; 529246293Savg int nbytes = imin(PAGESIZE - off, len); 530168404Spjd 531258746Savg if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 532248084Sattilio zfs_vmobject_wunlock(obj); 533168404Spjd 534246293Savg va = zfs_map_page(pp, &sf); 535246293Savg (void) dmu_read(os, oid, start+off, nbytes, 536246293Savg va+off, DMU_READ_PREFETCH);; 537209962Smm zfs_unmap_page(sf); 538246293Savg 539248084Sattilio zfs_vmobject_wlock(obj); 540253953Sattilio page_unbusy(pp); 541168404Spjd } 542209962Smm len -= nbytes; 543168404Spjd off = 0; 544168404Spjd } 545258746Savg vm_object_pip_wakeupn(obj, 0); 546248084Sattilio zfs_vmobject_wunlock(obj); 547168404Spjd} 548168404Spjd 549168404Spjd/* 550219089Spjd * Read with UIO_NOCOPY flag means that sendfile(2) requests 551219089Spjd * ZFS to populate a range of page cache pages with data. 552219089Spjd * 553219089Spjd * NOTE: this function could be optimized to pre-allocate 554254138Sattilio * all pages in advance, drain exclusive busy on all of them, 555219089Spjd * map them into contiguous KVA region and populate them 556219089Spjd * in one single dmu_read() call. 557219089Spjd */ 558219089Spjdstatic int 559219089Spjdmappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) 560219089Spjd{ 561219089Spjd znode_t *zp = VTOZ(vp); 562219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 563219089Spjd struct sf_buf *sf; 564219089Spjd vm_object_t obj; 565219089Spjd vm_page_t pp; 566219089Spjd int64_t start; 567219089Spjd caddr_t va; 568219089Spjd int len = nbytes; 569219089Spjd int off; 570219089Spjd int error = 0; 571219089Spjd 572219089Spjd ASSERT(uio->uio_segflg == UIO_NOCOPY); 573219089Spjd ASSERT(vp->v_mount != NULL); 574219089Spjd obj = vp->v_object; 575219089Spjd ASSERT(obj != NULL); 576219089Spjd ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); 577219089Spjd 578248084Sattilio zfs_vmobject_wlock(obj); 579219089Spjd for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { 580219089Spjd int bytes = MIN(PAGESIZE, len); 581219089Spjd 582254138Sattilio pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY | 583254649Skib VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 584219089Spjd if (pp->valid == 0) { 585248084Sattilio zfs_vmobject_wunlock(obj); 586219089Spjd va = zfs_map_page(pp, &sf); 587219089Spjd error = dmu_read(os, zp->z_id, start, bytes, va, 588219089Spjd DMU_READ_PREFETCH); 589219089Spjd if (bytes != PAGESIZE && error == 0) 590219089Spjd bzero(va + bytes, PAGESIZE - bytes); 591219089Spjd zfs_unmap_page(sf); 592248084Sattilio zfs_vmobject_wlock(obj); 593254138Sattilio vm_page_sunbusy(pp); 594219089Spjd vm_page_lock(pp); 595219089Spjd if (error) { 596253073Savg if (pp->wire_count == 0 && pp->valid == 0 && 597254138Sattilio !vm_page_busied(pp)) 598253073Savg vm_page_free(pp); 599219089Spjd } else { 600219089Spjd pp->valid = VM_PAGE_BITS_ALL; 601219089Spjd vm_page_activate(pp); 602219089Spjd } 603219089Spjd vm_page_unlock(pp); 604258739Savg } else { 605258739Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 606254138Sattilio vm_page_sunbusy(pp); 607258739Savg } 608219089Spjd if (error) 609219089Spjd break; 610219089Spjd uio->uio_resid -= bytes; 611219089Spjd uio->uio_offset += bytes; 612219089Spjd len -= bytes; 613219089Spjd } 614248084Sattilio zfs_vmobject_wunlock(obj); 615219089Spjd return (error); 616219089Spjd} 617219089Spjd 618219089Spjd/* 619168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 620168404Spjd * between the DMU cache and the memory mapped pages. What this means: 621168404Spjd * 622168404Spjd * On Read: We "read" preferentially from memory mapped pages, 623168404Spjd * else we default from the dmu buffer. 624168404Spjd * 625168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 626251631Sdelphij * the file is memory mapped. 627168404Spjd */ 628168404Spjdstatic int 629168404Spjdmappedread(vnode_t *vp, int nbytes, uio_t *uio) 630168404Spjd{ 631168404Spjd znode_t *zp = VTOZ(vp); 632168404Spjd vm_object_t obj; 633212655Savg int64_t start; 634168926Spjd caddr_t va; 635168404Spjd int len = nbytes; 636212655Savg int off; 637168404Spjd int error = 0; 638168404Spjd 639168404Spjd ASSERT(vp->v_mount != NULL); 640168404Spjd obj = vp->v_object; 641168404Spjd ASSERT(obj != NULL); 642168404Spjd 643168404Spjd start = uio->uio_loffset; 644168404Spjd off = start & PAGEOFFSET; 645248084Sattilio zfs_vmobject_wlock(obj); 646168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 647219089Spjd vm_page_t pp; 648219089Spjd uint64_t bytes = MIN(PAGESIZE - off, len); 649168404Spjd 650253953Sattilio if (pp = page_hold(vp, start)) { 651219089Spjd struct sf_buf *sf; 652219089Spjd caddr_t va; 653212652Savg 654248084Sattilio zfs_vmobject_wunlock(obj); 655219089Spjd va = zfs_map_page(pp, &sf); 656298105Savg#ifdef illumos 657219089Spjd error = uiomove(va + off, bytes, UIO_READ, uio); 658298105Savg#else 659298105Savg error = vn_io_fault_uiomove(va + off, bytes, uio); 660298105Savg#endif 661219089Spjd zfs_unmap_page(sf); 662248084Sattilio zfs_vmobject_wlock(obj); 663253953Sattilio page_unhold(pp); 664219089Spjd } else { 665248084Sattilio zfs_vmobject_wunlock(obj); 666272809Sdelphij error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 667272809Sdelphij uio, bytes); 668248084Sattilio zfs_vmobject_wlock(obj); 669168404Spjd } 670168404Spjd len -= bytes; 671168404Spjd off = 0; 672168404Spjd if (error) 673168404Spjd break; 674168404Spjd } 675248084Sattilio zfs_vmobject_wunlock(obj); 676168404Spjd return (error); 677168404Spjd} 678168404Spjd 679168404Spjdoffset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 680168404Spjd 681168404Spjd/* 682168404Spjd * Read bytes from specified file into supplied buffer. 683168404Spjd * 684168404Spjd * IN: vp - vnode of file to be read from. 685168404Spjd * uio - structure supplying read location, range info, 686168404Spjd * and return buffer. 687168404Spjd * ioflag - SYNC flags; used to provide FRSYNC semantics. 688168404Spjd * cr - credentials of caller. 689185029Spjd * ct - caller context 690168404Spjd * 691168404Spjd * OUT: uio - updated offset and range, buffer filled. 692168404Spjd * 693251631Sdelphij * RETURN: 0 on success, error code on failure. 694168404Spjd * 695168404Spjd * Side Effects: 696168404Spjd * vp - atime updated if byte count > 0 697168404Spjd */ 698168404Spjd/* ARGSUSED */ 699168404Spjdstatic int 700168962Spjdzfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 701168404Spjd{ 702168404Spjd znode_t *zp = VTOZ(vp); 703168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 704168404Spjd ssize_t n, nbytes; 705247187Smm int error = 0; 706168404Spjd rl_t *rl; 707219089Spjd xuio_t *xuio = NULL; 708168404Spjd 709168404Spjd ZFS_ENTER(zfsvfs); 710185029Spjd ZFS_VERIFY_ZP(zp); 711168404Spjd 712219089Spjd if (zp->z_pflags & ZFS_AV_QUARANTINED) { 713185029Spjd ZFS_EXIT(zfsvfs); 714249195Smm return (SET_ERROR(EACCES)); 715185029Spjd } 716185029Spjd 717168404Spjd /* 718168404Spjd * Validate file offset 719168404Spjd */ 720168404Spjd if (uio->uio_loffset < (offset_t)0) { 721168404Spjd ZFS_EXIT(zfsvfs); 722249195Smm return (SET_ERROR(EINVAL)); 723168404Spjd } 724168404Spjd 725168404Spjd /* 726168404Spjd * Fasttrack empty reads 727168404Spjd */ 728168404Spjd if (uio->uio_resid == 0) { 729168404Spjd ZFS_EXIT(zfsvfs); 730168404Spjd return (0); 731168404Spjd } 732168404Spjd 733168404Spjd /* 734168962Spjd * Check for mandatory locks 735168962Spjd */ 736219089Spjd if (MANDMODE(zp->z_mode)) { 737168962Spjd if (error = chklock(vp, FREAD, 738168962Spjd uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 739168962Spjd ZFS_EXIT(zfsvfs); 740168962Spjd return (error); 741168962Spjd } 742168962Spjd } 743168962Spjd 744168962Spjd /* 745168404Spjd * If we're in FRSYNC mode, sync out this znode before reading it. 746168404Spjd */ 747224605Smm if (zfsvfs->z_log && 748224605Smm (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) 749219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 750168404Spjd 751168404Spjd /* 752168404Spjd * Lock the range against changes. 753168404Spjd */ 754168404Spjd rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 755168404Spjd 756168404Spjd /* 757168404Spjd * If we are reading past end-of-file we can skip 758168404Spjd * to the end; but we might still need to set atime. 759168404Spjd */ 760219089Spjd if (uio->uio_loffset >= zp->z_size) { 761168404Spjd error = 0; 762168404Spjd goto out; 763168404Spjd } 764168404Spjd 765219089Spjd ASSERT(uio->uio_loffset < zp->z_size); 766219089Spjd n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 767168404Spjd 768277300Ssmh#ifdef illumos 769219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 770219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 771219089Spjd int nblk; 772219089Spjd int blksz = zp->z_blksz; 773219089Spjd uint64_t offset = uio->uio_loffset; 774219089Spjd 775219089Spjd xuio = (xuio_t *)uio; 776219089Spjd if ((ISP2(blksz))) { 777219089Spjd nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 778219089Spjd blksz)) / blksz; 779219089Spjd } else { 780219089Spjd ASSERT(offset + n <= blksz); 781219089Spjd nblk = 1; 782219089Spjd } 783219089Spjd (void) dmu_xuio_init(xuio, nblk); 784219089Spjd 785219089Spjd if (vn_has_cached_data(vp)) { 786219089Spjd /* 787219089Spjd * For simplicity, we always allocate a full buffer 788219089Spjd * even if we only expect to read a portion of a block. 789219089Spjd */ 790219089Spjd while (--nblk >= 0) { 791219089Spjd (void) dmu_xuio_add(xuio, 792219089Spjd dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 793219089Spjd blksz), 0, blksz); 794219089Spjd } 795219089Spjd } 796219089Spjd } 797277300Ssmh#endif /* illumos */ 798219089Spjd 799168404Spjd while (n > 0) { 800168404Spjd nbytes = MIN(n, zfs_read_chunk_size - 801168404Spjd P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 802168404Spjd 803219089Spjd#ifdef __FreeBSD__ 804219089Spjd if (uio->uio_segflg == UIO_NOCOPY) 805219089Spjd error = mappedread_sf(vp, nbytes, uio); 806219089Spjd else 807219089Spjd#endif /* __FreeBSD__ */ 808272809Sdelphij if (vn_has_cached_data(vp)) { 809168404Spjd error = mappedread(vp, nbytes, uio); 810272809Sdelphij } else { 811272809Sdelphij error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 812272809Sdelphij uio, nbytes); 813272809Sdelphij } 814185029Spjd if (error) { 815185029Spjd /* convert checksum errors into IO errors */ 816185029Spjd if (error == ECKSUM) 817249195Smm error = SET_ERROR(EIO); 818168404Spjd break; 819185029Spjd } 820168962Spjd 821168404Spjd n -= nbytes; 822168404Spjd } 823168404Spjdout: 824168404Spjd zfs_range_unlock(rl); 825168404Spjd 826168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 827168404Spjd ZFS_EXIT(zfsvfs); 828168404Spjd return (error); 829168404Spjd} 830168404Spjd 831168404Spjd/* 832168404Spjd * Write the bytes to a file. 833168404Spjd * 834168404Spjd * IN: vp - vnode of file to be written to. 835168404Spjd * uio - structure supplying write location, range info, 836168404Spjd * and data buffer. 837251631Sdelphij * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 838251631Sdelphij * set if in append mode. 839168404Spjd * cr - credentials of caller. 840185029Spjd * ct - caller context (NFS/CIFS fem monitor only) 841168404Spjd * 842168404Spjd * OUT: uio - updated offset and range. 843168404Spjd * 844251631Sdelphij * RETURN: 0 on success, error code on failure. 845168404Spjd * 846168404Spjd * Timestamps: 847168404Spjd * vp - ctime|mtime updated if byte count > 0 848168404Spjd */ 849219089Spjd 850168404Spjd/* ARGSUSED */ 851168404Spjdstatic int 852168962Spjdzfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 853168404Spjd{ 854168404Spjd znode_t *zp = VTOZ(vp); 855168962Spjd rlim64_t limit = MAXOFFSET_T; 856168404Spjd ssize_t start_resid = uio->uio_resid; 857168404Spjd ssize_t tx_bytes; 858168404Spjd uint64_t end_size; 859168404Spjd dmu_tx_t *tx; 860168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 861185029Spjd zilog_t *zilog; 862168404Spjd offset_t woff; 863168404Spjd ssize_t n, nbytes; 864168404Spjd rl_t *rl; 865168404Spjd int max_blksz = zfsvfs->z_max_blksz; 866247187Smm int error = 0; 867209962Smm arc_buf_t *abuf; 868247187Smm iovec_t *aiov = NULL; 869219089Spjd xuio_t *xuio = NULL; 870219089Spjd int i_iov = 0; 871219089Spjd int iovcnt = uio->uio_iovcnt; 872219089Spjd iovec_t *iovp = uio->uio_iov; 873219089Spjd int write_eof; 874219089Spjd int count = 0; 875219089Spjd sa_bulk_attr_t bulk[4]; 876219089Spjd uint64_t mtime[2], ctime[2]; 877168404Spjd 878168404Spjd /* 879168404Spjd * Fasttrack empty write 880168404Spjd */ 881168404Spjd n = start_resid; 882168404Spjd if (n == 0) 883168404Spjd return (0); 884168404Spjd 885168962Spjd if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 886168962Spjd limit = MAXOFFSET_T; 887168962Spjd 888168404Spjd ZFS_ENTER(zfsvfs); 889185029Spjd ZFS_VERIFY_ZP(zp); 890168404Spjd 891219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 892219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 893219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 894219089Spjd &zp->z_size, 8); 895219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 896219089Spjd &zp->z_pflags, 8); 897219089Spjd 898168404Spjd /* 899262990Sdelphij * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 900262990Sdelphij * callers might not be able to detect properly that we are read-only, 901262990Sdelphij * so check it explicitly here. 902262990Sdelphij */ 903262990Sdelphij if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 904262990Sdelphij ZFS_EXIT(zfsvfs); 905262990Sdelphij return (SET_ERROR(EROFS)); 906262990Sdelphij } 907262990Sdelphij 908262990Sdelphij /* 909321579Smav * If immutable or not appending then return EPERM. 910321579Smav * Intentionally allow ZFS_READONLY through here. 911321579Smav * See zfs_zaccess_common() 912185029Spjd */ 913321579Smav if ((zp->z_pflags & ZFS_IMMUTABLE) || 914219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 915219089Spjd (uio->uio_loffset < zp->z_size))) { 916185029Spjd ZFS_EXIT(zfsvfs); 917249195Smm return (SET_ERROR(EPERM)); 918185029Spjd } 919185029Spjd 920185029Spjd zilog = zfsvfs->z_log; 921185029Spjd 922185029Spjd /* 923219089Spjd * Validate file offset 924219089Spjd */ 925219089Spjd woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 926219089Spjd if (woff < 0) { 927219089Spjd ZFS_EXIT(zfsvfs); 928249195Smm return (SET_ERROR(EINVAL)); 929219089Spjd } 930219089Spjd 931219089Spjd /* 932219089Spjd * Check for mandatory locks before calling zfs_range_lock() 933219089Spjd * in order to prevent a deadlock with locks set via fcntl(). 934219089Spjd */ 935219089Spjd if (MANDMODE((mode_t)zp->z_mode) && 936219089Spjd (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 937219089Spjd ZFS_EXIT(zfsvfs); 938219089Spjd return (error); 939219089Spjd } 940219089Spjd 941277300Ssmh#ifdef illumos 942219089Spjd /* 943168404Spjd * Pre-fault the pages to ensure slow (eg NFS) pages 944168404Spjd * don't hold up txg. 945219089Spjd * Skip this if uio contains loaned arc_buf. 946168404Spjd */ 947219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 948219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 949219089Spjd xuio = (xuio_t *)uio; 950219089Spjd else 951219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 952277300Ssmh#endif 953168404Spjd 954168404Spjd /* 955168404Spjd * If in append mode, set the io offset pointer to eof. 956168404Spjd */ 957213673Spjd if (ioflag & FAPPEND) { 958168404Spjd /* 959219089Spjd * Obtain an appending range lock to guarantee file append 960219089Spjd * semantics. We reset the write offset once we have the lock. 961168404Spjd */ 962168404Spjd rl = zfs_range_lock(zp, 0, n, RL_APPEND); 963219089Spjd woff = rl->r_off; 964168404Spjd if (rl->r_len == UINT64_MAX) { 965219089Spjd /* 966219089Spjd * We overlocked the file because this write will cause 967219089Spjd * the file block size to increase. 968219089Spjd * Note that zp_size cannot change with this lock held. 969219089Spjd */ 970219089Spjd woff = zp->z_size; 971168404Spjd } 972219089Spjd uio->uio_loffset = woff; 973168404Spjd } else { 974168404Spjd /* 975219089Spjd * Note that if the file block size will change as a result of 976219089Spjd * this write, then this range lock will lock the entire file 977219089Spjd * so that we can re-write the block safely. 978168404Spjd */ 979168404Spjd rl = zfs_range_lock(zp, woff, n, RL_WRITER); 980168404Spjd } 981168404Spjd 982235781Strasz if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { 983235781Strasz zfs_range_unlock(rl); 984235781Strasz ZFS_EXIT(zfsvfs); 985235781Strasz return (EFBIG); 986235781Strasz } 987235781Strasz 988168962Spjd if (woff >= limit) { 989168962Spjd zfs_range_unlock(rl); 990168962Spjd ZFS_EXIT(zfsvfs); 991249195Smm return (SET_ERROR(EFBIG)); 992168962Spjd } 993168962Spjd 994168962Spjd if ((woff + n) > limit || woff > (limit - n)) 995168962Spjd n = limit - woff; 996168962Spjd 997219089Spjd /* Will this write extend the file length? */ 998219089Spjd write_eof = (woff + n > zp->z_size); 999168404Spjd 1000219089Spjd end_size = MAX(zp->z_size, woff + n); 1001219089Spjd 1002168404Spjd /* 1003168404Spjd * Write the file in reasonable size chunks. Each chunk is written 1004168404Spjd * in a separate transaction; this keeps the intent log records small 1005168404Spjd * and allows us to do more fine-grained space accounting. 1006168404Spjd */ 1007168404Spjd while (n > 0) { 1008209962Smm abuf = NULL; 1009209962Smm woff = uio->uio_loffset; 1010219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 1011219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 1012209962Smm if (abuf != NULL) 1013209962Smm dmu_return_arcbuf(abuf); 1014249195Smm error = SET_ERROR(EDQUOT); 1015209962Smm break; 1016209962Smm } 1017209962Smm 1018219089Spjd if (xuio && abuf == NULL) { 1019219089Spjd ASSERT(i_iov < iovcnt); 1020219089Spjd aiov = &iovp[i_iov]; 1021219089Spjd abuf = dmu_xuio_arcbuf(xuio, i_iov); 1022219089Spjd dmu_xuio_clear(xuio, i_iov); 1023219089Spjd DTRACE_PROBE3(zfs_cp_write, int, i_iov, 1024219089Spjd iovec_t *, aiov, arc_buf_t *, abuf); 1025219089Spjd ASSERT((aiov->iov_base == abuf->b_data) || 1026219089Spjd ((char *)aiov->iov_base - (char *)abuf->b_data + 1027219089Spjd aiov->iov_len == arc_buf_size(abuf))); 1028219089Spjd i_iov++; 1029219089Spjd } else if (abuf == NULL && n >= max_blksz && 1030219089Spjd woff >= zp->z_size && 1031209962Smm P2PHASE(woff, max_blksz) == 0 && 1032209962Smm zp->z_blksz == max_blksz) { 1033219089Spjd /* 1034219089Spjd * This write covers a full block. "Borrow" a buffer 1035219089Spjd * from the dmu so that we can fill it before we enter 1036219089Spjd * a transaction. This avoids the possibility of 1037219089Spjd * holding up the transaction if the data copy hangs 1038219089Spjd * up on a pagefault (e.g., from an NFS server mapping). 1039219089Spjd */ 1040209962Smm size_t cbytes; 1041209962Smm 1042219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 1043219089Spjd max_blksz); 1044209962Smm ASSERT(abuf != NULL); 1045209962Smm ASSERT(arc_buf_size(abuf) == max_blksz); 1046209962Smm if (error = uiocopy(abuf->b_data, max_blksz, 1047209962Smm UIO_WRITE, uio, &cbytes)) { 1048209962Smm dmu_return_arcbuf(abuf); 1049209962Smm break; 1050209962Smm } 1051209962Smm ASSERT(cbytes == max_blksz); 1052209962Smm } 1053209962Smm 1054209962Smm /* 1055168404Spjd * Start a transaction. 1056168404Spjd */ 1057168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1058219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1059168404Spjd dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 1060219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1061258720Savg error = dmu_tx_assign(tx, TXG_WAIT); 1062168404Spjd if (error) { 1063168404Spjd dmu_tx_abort(tx); 1064209962Smm if (abuf != NULL) 1065209962Smm dmu_return_arcbuf(abuf); 1066168404Spjd break; 1067168404Spjd } 1068168404Spjd 1069168404Spjd /* 1070168404Spjd * If zfs_range_lock() over-locked we grow the blocksize 1071168404Spjd * and then reduce the lock range. This will only happen 1072168404Spjd * on the first iteration since zfs_range_reduce() will 1073168404Spjd * shrink down r_len to the appropriate size. 1074168404Spjd */ 1075168404Spjd if (rl->r_len == UINT64_MAX) { 1076168404Spjd uint64_t new_blksz; 1077168404Spjd 1078168404Spjd if (zp->z_blksz > max_blksz) { 1079274337Sdelphij /* 1080274337Sdelphij * File's blocksize is already larger than the 1081274337Sdelphij * "recordsize" property. Only let it grow to 1082274337Sdelphij * the next power of 2. 1083274337Sdelphij */ 1084168404Spjd ASSERT(!ISP2(zp->z_blksz)); 1085274337Sdelphij new_blksz = MIN(end_size, 1086274337Sdelphij 1 << highbit64(zp->z_blksz)); 1087168404Spjd } else { 1088168404Spjd new_blksz = MIN(end_size, max_blksz); 1089168404Spjd } 1090168404Spjd zfs_grow_blocksize(zp, new_blksz, tx); 1091168404Spjd zfs_range_reduce(rl, woff, n); 1092168404Spjd } 1093168404Spjd 1094168404Spjd /* 1095168404Spjd * XXX - should we really limit each write to z_max_blksz? 1096168404Spjd * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 1097168404Spjd */ 1098168404Spjd nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 1099168404Spjd 1100219089Spjd if (woff + nbytes > zp->z_size) 1101168404Spjd vnode_pager_setsize(vp, woff + nbytes); 1102168404Spjd 1103209962Smm if (abuf == NULL) { 1104209962Smm tx_bytes = uio->uio_resid; 1105219089Spjd error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 1106219089Spjd uio, nbytes, tx); 1107209962Smm tx_bytes -= uio->uio_resid; 1108168404Spjd } else { 1109209962Smm tx_bytes = nbytes; 1110219089Spjd ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 1111219089Spjd /* 1112219089Spjd * If this is not a full block write, but we are 1113219089Spjd * extending the file past EOF and this data starts 1114219089Spjd * block-aligned, use assign_arcbuf(). Otherwise, 1115219089Spjd * write via dmu_write(). 1116219089Spjd */ 1117219089Spjd if (tx_bytes < max_blksz && (!write_eof || 1118219089Spjd aiov->iov_base != abuf->b_data)) { 1119219089Spjd ASSERT(xuio); 1120219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, woff, 1121219089Spjd aiov->iov_len, aiov->iov_base, tx); 1122219089Spjd dmu_return_arcbuf(abuf); 1123219089Spjd xuio_stat_wbuf_copied(); 1124219089Spjd } else { 1125219089Spjd ASSERT(xuio || tx_bytes == max_blksz); 1126219089Spjd dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 1127219089Spjd woff, abuf, tx); 1128219089Spjd } 1129209962Smm ASSERT(tx_bytes <= uio->uio_resid); 1130209962Smm uioskip(uio, tx_bytes); 1131168404Spjd } 1132212657Savg if (tx_bytes && vn_has_cached_data(vp)) { 1133209962Smm update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 1134209962Smm zp->z_id, uio->uio_segflg, tx); 1135209962Smm } 1136209962Smm 1137209962Smm /* 1138168404Spjd * If we made no progress, we're done. If we made even 1139168404Spjd * partial progress, update the znode and ZIL accordingly. 1140168404Spjd */ 1141168404Spjd if (tx_bytes == 0) { 1142219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 1143219089Spjd (void *)&zp->z_size, sizeof (uint64_t), tx); 1144168404Spjd dmu_tx_commit(tx); 1145168404Spjd ASSERT(error != 0); 1146168404Spjd break; 1147168404Spjd } 1148168404Spjd 1149168404Spjd /* 1150168404Spjd * Clear Set-UID/Set-GID bits on successful write if not 1151168404Spjd * privileged and at least one of the excute bits is set. 1152168404Spjd * 1153168404Spjd * It would be nice to to this after all writes have 1154168404Spjd * been done, but that would still expose the ISUID/ISGID 1155168404Spjd * to another app after the partial write is committed. 1156185029Spjd * 1157185029Spjd * Note: we don't call zfs_fuid_map_id() here because 1158185029Spjd * user 0 is not an ephemeral uid. 1159168404Spjd */ 1160168404Spjd mutex_enter(&zp->z_acl_lock); 1161219089Spjd if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 1162168404Spjd (S_IXUSR >> 6))) != 0 && 1163219089Spjd (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 1164185029Spjd secpolicy_vnode_setid_retain(vp, cr, 1165219089Spjd (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 1166219089Spjd uint64_t newmode; 1167219089Spjd zp->z_mode &= ~(S_ISUID | S_ISGID); 1168219089Spjd newmode = zp->z_mode; 1169219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 1170219089Spjd (void *)&newmode, sizeof (uint64_t), tx); 1171168404Spjd } 1172168404Spjd mutex_exit(&zp->z_acl_lock); 1173168404Spjd 1174219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 1175219089Spjd B_TRUE); 1176168404Spjd 1177168404Spjd /* 1178168404Spjd * Update the file size (zp_size) if it has changed; 1179168404Spjd * account for possible concurrent updates. 1180168404Spjd */ 1181219089Spjd while ((end_size = zp->z_size) < uio->uio_loffset) { 1182219089Spjd (void) atomic_cas_64(&zp->z_size, end_size, 1183168404Spjd uio->uio_loffset); 1184298105Savg#ifdef illumos 1185219089Spjd ASSERT(error == 0); 1186298105Savg#else 1187298105Savg ASSERT(error == 0 || error == EFAULT); 1188298105Savg#endif 1189219089Spjd } 1190219089Spjd /* 1191219089Spjd * If we are replaying and eof is non zero then force 1192219089Spjd * the file size to the specified eof. Note, there's no 1193219089Spjd * concurrency during replay. 1194219089Spjd */ 1195219089Spjd if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 1196219089Spjd zp->z_size = zfsvfs->z_replay_eof; 1197219089Spjd 1198298105Savg if (error == 0) 1199298105Savg error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1200298105Savg else 1201298105Savg (void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1202219089Spjd 1203168404Spjd zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 1204168404Spjd dmu_tx_commit(tx); 1205168404Spjd 1206168404Spjd if (error != 0) 1207168404Spjd break; 1208168404Spjd ASSERT(tx_bytes == nbytes); 1209168404Spjd n -= nbytes; 1210219089Spjd 1211277300Ssmh#ifdef illumos 1212219089Spjd if (!xuio && n > 0) 1213219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 1214277300Ssmh#endif 1215168404Spjd } 1216168404Spjd 1217168404Spjd zfs_range_unlock(rl); 1218168404Spjd 1219168404Spjd /* 1220168404Spjd * If we're in replay mode, or we made no progress, return error. 1221168404Spjd * Otherwise, it's at least a partial write, so it's successful. 1222168404Spjd */ 1223209962Smm if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1224168404Spjd ZFS_EXIT(zfsvfs); 1225168404Spjd return (error); 1226168404Spjd } 1227168404Spjd 1228298105Savg#ifdef __FreeBSD__ 1229298105Savg /* 1230298105Savg * EFAULT means that at least one page of the source buffer was not 1231298105Savg * available. VFS will re-try remaining I/O upon this error. 1232298105Savg */ 1233298105Savg if (error == EFAULT) { 1234298105Savg ZFS_EXIT(zfsvfs); 1235298105Savg return (error); 1236298105Savg } 1237298105Savg#endif 1238298105Savg 1239219089Spjd if (ioflag & (FSYNC | FDSYNC) || 1240219089Spjd zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1241219089Spjd zil_commit(zilog, zp->z_id); 1242168404Spjd 1243168404Spjd ZFS_EXIT(zfsvfs); 1244168404Spjd return (0); 1245168404Spjd} 1246168404Spjd 1247168404Spjdvoid 1248219089Spjdzfs_get_done(zgd_t *zgd, int error) 1249168404Spjd{ 1250219089Spjd znode_t *zp = zgd->zgd_private; 1251219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 1252168404Spjd 1253219089Spjd if (zgd->zgd_db) 1254219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1255219089Spjd 1256219089Spjd zfs_range_unlock(zgd->zgd_rl); 1257219089Spjd 1258191900Skmacy /* 1259191900Skmacy * Release the vnode asynchronously as we currently have the 1260191900Skmacy * txg stopped from syncing. 1261191900Skmacy */ 1262219089Spjd VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1263219089Spjd 1264219089Spjd if (error == 0 && zgd->zgd_bp) 1265325132Savg zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp); 1266219089Spjd 1267168404Spjd kmem_free(zgd, sizeof (zgd_t)); 1268168404Spjd} 1269168404Spjd 1270214378Smm#ifdef DEBUG 1271214378Smmstatic int zil_fault_io = 0; 1272214378Smm#endif 1273214378Smm 1274168404Spjd/* 1275168404Spjd * Get data to generate a TX_WRITE intent log record. 1276168404Spjd */ 1277168404Spjdint 1278325132Savgzfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) 1279168404Spjd{ 1280168404Spjd zfsvfs_t *zfsvfs = arg; 1281168404Spjd objset_t *os = zfsvfs->z_os; 1282168404Spjd znode_t *zp; 1283219089Spjd uint64_t object = lr->lr_foid; 1284219089Spjd uint64_t offset = lr->lr_offset; 1285219089Spjd uint64_t size = lr->lr_length; 1286168404Spjd dmu_buf_t *db; 1287168404Spjd zgd_t *zgd; 1288168404Spjd int error = 0; 1289168404Spjd 1290325132Savg ASSERT3P(lwb, !=, NULL); 1291325132Savg ASSERT3P(zio, !=, NULL); 1292325132Savg ASSERT3U(size, !=, 0); 1293168404Spjd 1294168404Spjd /* 1295168404Spjd * Nothing to do if the file has been removed 1296168404Spjd */ 1297219089Spjd if (zfs_zget(zfsvfs, object, &zp) != 0) 1298249195Smm return (SET_ERROR(ENOENT)); 1299168404Spjd if (zp->z_unlinked) { 1300191900Skmacy /* 1301191900Skmacy * Release the vnode asynchronously as we currently have the 1302191900Skmacy * txg stopped from syncing. 1303191900Skmacy */ 1304196307Spjd VN_RELE_ASYNC(ZTOV(zp), 1305196307Spjd dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1306249195Smm return (SET_ERROR(ENOENT)); 1307168404Spjd } 1308168404Spjd 1309219089Spjd zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1310325132Savg zgd->zgd_lwb = lwb; 1311219089Spjd zgd->zgd_private = zp; 1312219089Spjd 1313168404Spjd /* 1314168404Spjd * Write records come in two flavors: immediate and indirect. 1315168404Spjd * For small writes it's cheaper to store the data with the 1316168404Spjd * log record (immediate); for large writes it's cheaper to 1317168404Spjd * sync the data and get a pointer to it (indirect) so that 1318168404Spjd * we don't have to write the data twice. 1319168404Spjd */ 1320168404Spjd if (buf != NULL) { /* immediate write */ 1321219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1322168404Spjd /* test for truncation needs to be done while range locked */ 1323219089Spjd if (offset >= zp->z_size) { 1324249195Smm error = SET_ERROR(ENOENT); 1325219089Spjd } else { 1326219089Spjd error = dmu_read(os, object, offset, size, buf, 1327219089Spjd DMU_READ_NO_PREFETCH); 1328168404Spjd } 1329219089Spjd ASSERT(error == 0 || error == ENOENT); 1330168404Spjd } else { /* indirect write */ 1331168404Spjd /* 1332168404Spjd * Have to lock the whole block to ensure when it's 1333324203Savg * written out and its checksum is being calculated 1334168404Spjd * that no one can change the data. We need to re-check 1335168404Spjd * blocksize after we get the lock in case it's changed! 1336168404Spjd */ 1337168404Spjd for (;;) { 1338219089Spjd uint64_t blkoff; 1339219089Spjd size = zp->z_blksz; 1340219089Spjd blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1341219089Spjd offset -= blkoff; 1342219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1343219089Spjd RL_READER); 1344219089Spjd if (zp->z_blksz == size) 1345168404Spjd break; 1346219089Spjd offset += blkoff; 1347219089Spjd zfs_range_unlock(zgd->zgd_rl); 1348168404Spjd } 1349168404Spjd /* test for truncation needs to be done while range locked */ 1350219089Spjd if (lr->lr_offset >= zp->z_size) 1351249195Smm error = SET_ERROR(ENOENT); 1352214378Smm#ifdef DEBUG 1353214378Smm if (zil_fault_io) { 1354249195Smm error = SET_ERROR(EIO); 1355214378Smm zil_fault_io = 0; 1356214378Smm } 1357214378Smm#endif 1358219089Spjd if (error == 0) 1359219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1360219089Spjd DMU_READ_NO_PREFETCH); 1361214378Smm 1362209962Smm if (error == 0) { 1363323748Savg blkptr_t *bp = &lr->lr_blkptr; 1364243524Smm 1365219089Spjd zgd->zgd_db = db; 1366219089Spjd zgd->zgd_bp = bp; 1367219089Spjd 1368219089Spjd ASSERT(db->db_offset == offset); 1369219089Spjd ASSERT(db->db_size == size); 1370219089Spjd 1371219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1372219089Spjd zfs_get_done, zgd); 1373321559Smav ASSERT(error || lr->lr_length <= size); 1374219089Spjd 1375209962Smm /* 1376219089Spjd * On success, we need to wait for the write I/O 1377219089Spjd * initiated by dmu_sync() to complete before we can 1378219089Spjd * release this dbuf. We will finish everything up 1379219089Spjd * in the zfs_get_done() callback. 1380209962Smm */ 1381219089Spjd if (error == 0) 1382219089Spjd return (0); 1383209962Smm 1384219089Spjd if (error == EALREADY) { 1385219089Spjd lr->lr_common.lrc_txtype = TX_WRITE2; 1386332525Smav /* 1387332525Smav * TX_WRITE2 relies on the data previously 1388332525Smav * written by the TX_WRITE that caused 1389332525Smav * EALREADY. We zero out the BP because 1390332525Smav * it is the old, currently-on-disk BP, 1391332525Smav * so there's no need to zio_flush() its 1392332525Smav * vdevs (flushing would needlesly hurt 1393332525Smav * performance, and doesn't work on 1394332525Smav * indirect vdevs). 1395332525Smav */ 1396332525Smav zgd->zgd_bp = NULL; 1397332525Smav BP_ZERO(bp); 1398219089Spjd error = 0; 1399219089Spjd } 1400209962Smm } 1401168404Spjd } 1402219089Spjd 1403219089Spjd zfs_get_done(zgd, error); 1404219089Spjd 1405168404Spjd return (error); 1406168404Spjd} 1407168404Spjd 1408168404Spjd/*ARGSUSED*/ 1409168404Spjdstatic int 1410185029Spjdzfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1411185029Spjd caller_context_t *ct) 1412168404Spjd{ 1413168404Spjd znode_t *zp = VTOZ(vp); 1414168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1415168404Spjd int error; 1416168404Spjd 1417168404Spjd ZFS_ENTER(zfsvfs); 1418185029Spjd ZFS_VERIFY_ZP(zp); 1419185029Spjd 1420185029Spjd if (flag & V_ACE_MASK) 1421185029Spjd error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1422185029Spjd else 1423185029Spjd error = zfs_zaccess_rwx(zp, mode, flag, cr); 1424185029Spjd 1425168404Spjd ZFS_EXIT(zfsvfs); 1426168404Spjd return (error); 1427168404Spjd} 1428168404Spjd 1429211932Smmstatic int 1430303970Savgzfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) 1431211932Smm{ 1432303970Savg int error; 1433211932Smm 1434303970Savg *vpp = arg; 1435303970Savg error = vn_lock(*vpp, lkflags); 1436303970Savg if (error != 0) 1437303970Savg vrele(*vpp); 1438303970Savg return (error); 1439303970Savg} 1440211932Smm 1441303970Savgstatic int 1442303970Savgzfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags) 1443303970Savg{ 1444303970Savg znode_t *zdp = VTOZ(dvp); 1445303970Savg zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1446303970Savg int error; 1447303970Savg int ltype; 1448303970Savg 1449303970Savg ASSERT_VOP_LOCKED(dvp, __func__); 1450303970Savg#ifdef DIAGNOSTIC 1451307142Savg if ((zdp->z_pflags & ZFS_XATTR) == 0) 1452307142Savg VERIFY(!RRM_LOCK_HELD(&zfsvfs->z_teardown_lock)); 1453303970Savg#endif 1454303970Savg 1455303970Savg if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 1456303970Savg ASSERT3P(dvp, ==, vp); 1457303970Savg vref(dvp); 1458303970Savg ltype = lkflags & LK_TYPE_MASK; 1459303970Savg if (ltype != VOP_ISLOCKED(dvp)) { 1460303970Savg if (ltype == LK_EXCLUSIVE) 1461303970Savg vn_lock(dvp, LK_UPGRADE | LK_RETRY); 1462303970Savg else /* if (ltype == LK_SHARED) */ 1463303970Savg vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); 1464303970Savg 1465303970Savg /* 1466303970Savg * Relock for the "." case could leave us with 1467303970Savg * reclaimed vnode. 1468303970Savg */ 1469303970Savg if (dvp->v_iflag & VI_DOOMED) { 1470303970Savg vrele(dvp); 1471303970Savg return (SET_ERROR(ENOENT)); 1472303970Savg } 1473303970Savg } 1474303970Savg return (0); 1475303970Savg } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 1476303970Savg /* 1477303970Savg * Note that in this case, dvp is the child vnode, and we 1478303970Savg * are looking up the parent vnode - exactly reverse from 1479303970Savg * normal operation. Unlocking dvp requires some rather 1480303970Savg * tricky unlock/relock dance to prevent mp from being freed; 1481303970Savg * use vn_vget_ino_gen() which takes care of all that. 1482303970Savg * 1483303970Savg * XXX Note that there is a time window when both vnodes are 1484303970Savg * unlocked. It is possible, although highly unlikely, that 1485303970Savg * during that window the parent-child relationship between 1486303970Savg * the vnodes may change, for example, get reversed. 1487303970Savg * In that case we would have a wrong lock order for the vnodes. 1488303970Savg * All other filesystems seem to ignore this problem, so we 1489303970Savg * do the same here. 1490303970Savg * A potential solution could be implemented as follows: 1491303970Savg * - using LK_NOWAIT when locking the second vnode and retrying 1492303970Savg * if necessary 1493303970Savg * - checking that the parent-child relationship still holds 1494303970Savg * after locking both vnodes and retrying if it doesn't 1495303970Savg */ 1496303970Savg error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp); 1497303970Savg return (error); 1498303970Savg } else { 1499303970Savg error = vn_lock(vp, lkflags); 1500303970Savg if (error != 0) 1501303970Savg vrele(vp); 1502303970Savg return (error); 1503211932Smm } 1504211932Smm} 1505211932Smm 1506211932Smm/* 1507168404Spjd * Lookup an entry in a directory, or an extended attribute directory. 1508168404Spjd * If it exists, return a held vnode reference for it. 1509168404Spjd * 1510168404Spjd * IN: dvp - vnode of directory to search. 1511168404Spjd * nm - name of entry to lookup. 1512168404Spjd * pnp - full pathname to lookup [UNUSED]. 1513168404Spjd * flags - LOOKUP_XATTR set if looking for an attribute. 1514168404Spjd * rdir - root directory vnode [UNUSED]. 1515168404Spjd * cr - credentials of caller. 1516185029Spjd * ct - caller context 1517168404Spjd * 1518168404Spjd * OUT: vpp - vnode of located entry, NULL if not found. 1519168404Spjd * 1520251631Sdelphij * RETURN: 0 on success, error code on failure. 1521168404Spjd * 1522168404Spjd * Timestamps: 1523168404Spjd * NA 1524168404Spjd */ 1525168404Spjd/* ARGSUSED */ 1526168962Spjdstatic int 1527168962Spjdzfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1528185029Spjd int nameiop, cred_t *cr, kthread_t *td, int flags) 1529168404Spjd{ 1530168962Spjd znode_t *zdp = VTOZ(dvp); 1531303970Savg znode_t *zp; 1532168962Spjd zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1533211932Smm int error = 0; 1534168404Spjd 1535321545Smav /* 1536321545Smav * Fast path lookup, however we must skip DNLC lookup 1537321545Smav * for case folding or normalizing lookups because the 1538321545Smav * DNLC code only stores the passed in name. This means 1539321545Smav * creating 'a' and removing 'A' on a case insensitive 1540321545Smav * file system would work, but DNLC still thinks 'a' 1541321545Smav * exists and won't let you create it again on the next 1542321545Smav * pass through fast path. 1543321545Smav */ 1544303970Savg if (!(flags & LOOKUP_XATTR)) { 1545211932Smm if (dvp->v_type != VDIR) { 1546249195Smm return (SET_ERROR(ENOTDIR)); 1547219089Spjd } else if (zdp->z_sa_hdl == NULL) { 1548249195Smm return (SET_ERROR(EIO)); 1549211932Smm } 1550211932Smm } 1551211932Smm 1552211932Smm DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1553211932Smm 1554168404Spjd ZFS_ENTER(zfsvfs); 1555185029Spjd ZFS_VERIFY_ZP(zdp); 1556168404Spjd 1557168404Spjd *vpp = NULL; 1558168404Spjd 1559185029Spjd if (flags & LOOKUP_XATTR) { 1560168404Spjd#ifdef TODO 1561168404Spjd /* 1562168404Spjd * If the xattr property is off, refuse the lookup request. 1563168404Spjd */ 1564168404Spjd if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1565168404Spjd ZFS_EXIT(zfsvfs); 1566249195Smm return (SET_ERROR(EINVAL)); 1567168404Spjd } 1568185029Spjd#endif 1569168404Spjd 1570168404Spjd /* 1571168404Spjd * We don't allow recursive attributes.. 1572168404Spjd * Maybe someday we will. 1573168404Spjd */ 1574219089Spjd if (zdp->z_pflags & ZFS_XATTR) { 1575168404Spjd ZFS_EXIT(zfsvfs); 1576249195Smm return (SET_ERROR(EINVAL)); 1577168404Spjd } 1578168404Spjd 1579168404Spjd if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1580168404Spjd ZFS_EXIT(zfsvfs); 1581168404Spjd return (error); 1582168404Spjd } 1583168404Spjd 1584168404Spjd /* 1585168404Spjd * Do we have permission to get into attribute directory? 1586168404Spjd */ 1587185029Spjd if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1588185029Spjd B_FALSE, cr)) { 1589303970Savg vrele(*vpp); 1590185029Spjd *vpp = NULL; 1591168404Spjd } 1592168404Spjd 1593168404Spjd ZFS_EXIT(zfsvfs); 1594168404Spjd return (error); 1595168404Spjd } 1596168404Spjd 1597168404Spjd /* 1598168404Spjd * Check accessibility of directory. 1599168404Spjd */ 1600185029Spjd if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1601168404Spjd ZFS_EXIT(zfsvfs); 1602168404Spjd return (error); 1603168404Spjd } 1604168404Spjd 1605185029Spjd if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1606185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1607185029Spjd ZFS_EXIT(zfsvfs); 1608249195Smm return (SET_ERROR(EILSEQ)); 1609185029Spjd } 1610168404Spjd 1611168962Spjd 1612303970Savg /* 1613303970Savg * First handle the special cases. 1614303970Savg */ 1615303970Savg if ((cnp->cn_flags & ISDOTDOT) != 0) { 1616303970Savg /* 1617303970Savg * If we are a snapshot mounted under .zfs, return 1618303970Savg * the vp for the snapshot directory. 1619303970Savg */ 1620303970Savg if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 1621315842Savg struct componentname cn; 1622315842Savg vnode_t *zfsctl_vp; 1623315842Savg int ltype; 1624315842Savg 1625303970Savg ZFS_EXIT(zfsvfs); 1626315842Savg ltype = VOP_ISLOCKED(dvp); 1627315842Savg VOP_UNLOCK(dvp, 0); 1628315842Savg error = zfsctl_root(zfsvfs->z_parent, LK_SHARED, 1629315842Savg &zfsctl_vp); 1630303970Savg if (error == 0) { 1631315842Savg cn.cn_nameptr = "snapshot"; 1632315842Savg cn.cn_namelen = strlen(cn.cn_nameptr); 1633315842Savg cn.cn_nameiop = cnp->cn_nameiop; 1634319415Savg cn.cn_flags = cnp->cn_flags & ~ISDOTDOT; 1635315842Savg cn.cn_lkflags = cnp->cn_lkflags; 1636315842Savg error = VOP_LOOKUP(zfsctl_vp, vpp, &cn); 1637315842Savg vput(zfsctl_vp); 1638303970Savg } 1639315842Savg vn_lock(dvp, ltype | LK_RETRY); 1640315842Savg return (error); 1641303970Savg } 1642303970Savg } 1643303970Savg if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { 1644315842Savg ZFS_EXIT(zfsvfs); 1645303970Savg if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) 1646315842Savg return (SET_ERROR(ENOTSUP)); 1647315842Savg error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp); 1648315842Savg return (error); 1649303970Savg } 1650303970Savg 1651303970Savg /* 1652303970Savg * The loop is retry the lookup if the parent-child relationship 1653303970Savg * changes during the dot-dot locking complexities. 1654303970Savg */ 1655303970Savg for (;;) { 1656303970Savg uint64_t parent; 1657303970Savg 1658303970Savg error = zfs_dirlook(zdp, nm, &zp); 1659303970Savg if (error == 0) 1660303970Savg *vpp = ZTOV(zp); 1661303970Savg 1662303970Savg ZFS_EXIT(zfsvfs); 1663303970Savg if (error != 0) 1664303970Savg break; 1665303970Savg 1666303970Savg error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags); 1667303970Savg if (error != 0) { 1668303970Savg /* 1669303970Savg * If we've got a locking error, then the vnode 1670303970Savg * got reclaimed because of a force unmount. 1671303970Savg * We never enter doomed vnodes into the name cache. 1672303970Savg */ 1673303970Savg *vpp = NULL; 1674303970Savg return (error); 1675303970Savg } 1676303970Savg 1677303970Savg if ((cnp->cn_flags & ISDOTDOT) == 0) 1678303970Savg break; 1679303970Savg 1680303970Savg ZFS_ENTER(zfsvfs); 1681303970Savg if (zdp->z_sa_hdl == NULL) { 1682303970Savg error = SET_ERROR(EIO); 1683303970Savg } else { 1684303970Savg error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 1685303970Savg &parent, sizeof (parent)); 1686303970Savg } 1687303970Savg if (error != 0) { 1688303970Savg ZFS_EXIT(zfsvfs); 1689303970Savg vput(ZTOV(zp)); 1690303970Savg break; 1691303970Savg } 1692303970Savg if (zp->z_id == parent) { 1693303970Savg ZFS_EXIT(zfsvfs); 1694303970Savg break; 1695303970Savg } 1696303970Savg vput(ZTOV(zp)); 1697303970Savg } 1698303970Savg 1699303970Savgout: 1700303970Savg if (error != 0) 1701303970Savg *vpp = NULL; 1702303970Savg 1703168404Spjd /* Translate errors and add SAVENAME when needed. */ 1704168404Spjd if (cnp->cn_flags & ISLASTCN) { 1705168404Spjd switch (nameiop) { 1706168404Spjd case CREATE: 1707168404Spjd case RENAME: 1708168404Spjd if (error == ENOENT) { 1709168404Spjd error = EJUSTRETURN; 1710168404Spjd cnp->cn_flags |= SAVENAME; 1711168404Spjd break; 1712168404Spjd } 1713168404Spjd /* FALLTHROUGH */ 1714168404Spjd case DELETE: 1715168404Spjd if (error == 0) 1716168404Spjd cnp->cn_flags |= SAVENAME; 1717168404Spjd break; 1718168404Spjd } 1719168404Spjd } 1720169198Spjd 1721303970Savg /* Insert name into cache (as non-existent) if appropriate. */ 1722303970Savg if (zfsvfs->z_use_namecache && 1723303970Savg error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0) 1724303970Savg cache_enter(dvp, NULL, cnp); 1725168404Spjd 1726303970Savg /* Insert name into cache if appropriate. */ 1727303970Savg if (zfsvfs->z_use_namecache && 1728303970Savg error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1729168404Spjd if (!(cnp->cn_flags & ISLASTCN) || 1730168404Spjd (nameiop != DELETE && nameiop != RENAME)) { 1731168404Spjd cache_enter(dvp, *vpp, cnp); 1732168404Spjd } 1733168404Spjd } 1734168404Spjd 1735168404Spjd return (error); 1736168404Spjd} 1737168404Spjd 1738168404Spjd/* 1739168404Spjd * Attempt to create a new entry in a directory. If the entry 1740168404Spjd * already exists, truncate the file if permissible, else return 1741168404Spjd * an error. Return the vp of the created or trunc'd file. 1742168404Spjd * 1743168404Spjd * IN: dvp - vnode of directory to put new file entry in. 1744168404Spjd * name - name of new file entry. 1745168404Spjd * vap - attributes of new file. 1746168404Spjd * excl - flag indicating exclusive or non-exclusive mode. 1747168404Spjd * mode - mode to open file with. 1748168404Spjd * cr - credentials of caller. 1749168404Spjd * flag - large file flag [UNUSED]. 1750185029Spjd * ct - caller context 1751268464Sdelphij * vsecp - ACL to be set 1752168404Spjd * 1753168404Spjd * OUT: vpp - vnode of created or trunc'd entry. 1754168404Spjd * 1755251631Sdelphij * RETURN: 0 on success, error code on failure. 1756168404Spjd * 1757168404Spjd * Timestamps: 1758168404Spjd * dvp - ctime|mtime updated if new entry created 1759168404Spjd * vp - ctime|mtime always, atime if new 1760168404Spjd */ 1761185029Spjd 1762168404Spjd/* ARGSUSED */ 1763168404Spjdstatic int 1764168962Spjdzfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 1765185029Spjd vnode_t **vpp, cred_t *cr, kthread_t *td) 1766168404Spjd{ 1767168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1768168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1769185029Spjd zilog_t *zilog; 1770185029Spjd objset_t *os; 1771168404Spjd dmu_tx_t *tx; 1772168404Spjd int error; 1773209962Smm ksid_t *ksid; 1774209962Smm uid_t uid; 1775209962Smm gid_t gid = crgetgid(cr); 1776219089Spjd zfs_acl_ids_t acl_ids; 1777209962Smm boolean_t fuid_dirtied; 1778185029Spjd void *vsecp = NULL; 1779185029Spjd int flag = 0; 1780303970Savg uint64_t txtype; 1781168404Spjd 1782185029Spjd /* 1783185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 1784185029Spjd * make sure file system is at proper version 1785185029Spjd */ 1786185029Spjd 1787209962Smm ksid = crgetsid(cr, KSID_OWNER); 1788209962Smm if (ksid) 1789209962Smm uid = ksid_getid(ksid); 1790209962Smm else 1791209962Smm uid = crgetuid(cr); 1792219089Spjd 1793185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 1794185029Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 1795219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1796249195Smm return (SET_ERROR(EINVAL)); 1797185029Spjd 1798168404Spjd ZFS_ENTER(zfsvfs); 1799185029Spjd ZFS_VERIFY_ZP(dzp); 1800185029Spjd os = zfsvfs->z_os; 1801185029Spjd zilog = zfsvfs->z_log; 1802168404Spjd 1803185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1804185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1805185029Spjd ZFS_EXIT(zfsvfs); 1806249195Smm return (SET_ERROR(EILSEQ)); 1807185029Spjd } 1808185029Spjd 1809185029Spjd if (vap->va_mask & AT_XVATTR) { 1810197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1811185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 1812185029Spjd ZFS_EXIT(zfsvfs); 1813185029Spjd return (error); 1814185029Spjd } 1815185029Spjd } 1816260704Savg 1817168404Spjd *vpp = NULL; 1818168404Spjd 1819182905Strasz if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1820182905Strasz vap->va_mode &= ~S_ISVTX; 1821168404Spjd 1822303970Savg error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 1823303970Savg if (error) { 1824303970Savg ZFS_EXIT(zfsvfs); 1825303970Savg return (error); 1826303970Savg } 1827303970Savg ASSERT3P(zp, ==, NULL); 1828185029Spjd 1829303970Savg /* 1830303970Savg * Create a new file object and update the directory 1831303970Savg * to reference it. 1832303970Savg */ 1833303970Savg if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1834303970Savg goto out; 1835168404Spjd } 1836219089Spjd 1837303970Savg /* 1838303970Savg * We only support the creation of regular files in 1839303970Savg * extended attribute directories. 1840303970Savg */ 1841168404Spjd 1842303970Savg if ((dzp->z_pflags & ZFS_XATTR) && 1843303970Savg (vap->va_type != VREG)) { 1844303970Savg error = SET_ERROR(EINVAL); 1845303970Savg goto out; 1846303970Savg } 1847168404Spjd 1848303970Savg if ((error = zfs_acl_ids_create(dzp, 0, vap, 1849303970Savg cr, vsecp, &acl_ids)) != 0) 1850303970Savg goto out; 1851219089Spjd 1852303970Savg if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1853303970Savg zfs_acl_ids_free(&acl_ids); 1854303970Savg error = SET_ERROR(EDQUOT); 1855303970Savg goto out; 1856303970Savg } 1857168404Spjd 1858303970Savg getnewvnode_reserve(1); 1859209962Smm 1860303970Savg tx = dmu_tx_create(os); 1861209962Smm 1862303970Savg dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1863303970Savg ZFS_SA_BASE_ATTR_SIZE); 1864219089Spjd 1865303970Savg fuid_dirtied = zfsvfs->z_fuid_dirty; 1866303970Savg if (fuid_dirtied) 1867303970Savg zfs_fuid_txhold(zfsvfs, tx); 1868303970Savg dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1869303970Savg dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 1870303970Savg if (!zfsvfs->z_use_sa && 1871303970Savg acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1872303970Savg dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1873303970Savg 0, acl_ids.z_aclp->z_acl_bytes); 1874303970Savg } 1875303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 1876303970Savg if (error) { 1877209962Smm zfs_acl_ids_free(&acl_ids); 1878303970Savg dmu_tx_abort(tx); 1879303970Savg getnewvnode_drop_reserve(); 1880303970Savg ZFS_EXIT(zfsvfs); 1881303970Savg return (error); 1882303970Savg } 1883303970Savg zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1884185029Spjd 1885303970Savg if (fuid_dirtied) 1886303970Savg zfs_fuid_sync(zfsvfs, tx); 1887219089Spjd 1888303970Savg (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 1889303970Savg txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1890303970Savg zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1891303970Savg vsecp, acl_ids.z_fuidp, vap); 1892303970Savg zfs_acl_ids_free(&acl_ids); 1893303970Savg dmu_tx_commit(tx); 1894168404Spjd 1895303970Savg getnewvnode_drop_reserve(); 1896168404Spjd 1897168404Spjdout: 1898303970Savg if (error == 0) { 1899168962Spjd *vpp = ZTOV(zp); 1900168404Spjd } 1901168404Spjd 1902219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1903219089Spjd zil_commit(zilog, 0); 1904219089Spjd 1905168404Spjd ZFS_EXIT(zfsvfs); 1906168404Spjd return (error); 1907168404Spjd} 1908168404Spjd 1909168404Spjd/* 1910168404Spjd * Remove an entry from a directory. 1911168404Spjd * 1912168404Spjd * IN: dvp - vnode of directory to remove entry from. 1913168404Spjd * name - name of entry to remove. 1914168404Spjd * cr - credentials of caller. 1915185029Spjd * ct - caller context 1916185029Spjd * flags - case flags 1917168404Spjd * 1918251631Sdelphij * RETURN: 0 on success, error code on failure. 1919168404Spjd * 1920168404Spjd * Timestamps: 1921168404Spjd * dvp - ctime|mtime 1922168404Spjd * vp - ctime (if nlink > 0) 1923168404Spjd */ 1924219089Spjd 1925185029Spjd/*ARGSUSED*/ 1926168404Spjdstatic int 1927303970Savgzfs_remove(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr) 1928168404Spjd{ 1929303970Savg znode_t *dzp = VTOZ(dvp); 1930303970Savg znode_t *zp = VTOZ(vp); 1931219089Spjd znode_t *xzp; 1932168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1933185029Spjd zilog_t *zilog; 1934168962Spjd uint64_t acl_obj, xattr_obj; 1935219089Spjd uint64_t obj = 0; 1936168404Spjd dmu_tx_t *tx; 1937185029Spjd boolean_t unlinked, toobig = FALSE; 1938185029Spjd uint64_t txtype; 1939168404Spjd int error; 1940168404Spjd 1941168404Spjd ZFS_ENTER(zfsvfs); 1942185029Spjd ZFS_VERIFY_ZP(dzp); 1943303970Savg ZFS_VERIFY_ZP(zp); 1944185029Spjd zilog = zfsvfs->z_log; 1945303970Savg zp = VTOZ(vp); 1946168404Spjd 1947219089Spjd xattr_obj = 0; 1948219089Spjd xzp = NULL; 1949168404Spjd 1950168962Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1951168404Spjd goto out; 1952168962Spjd } 1953168404Spjd 1954168962Spjd /* 1955168962Spjd * Need to use rmdir for removing directories. 1956168962Spjd */ 1957168962Spjd if (vp->v_type == VDIR) { 1958249195Smm error = SET_ERROR(EPERM); 1959168962Spjd goto out; 1960168962Spjd } 1961168962Spjd 1962185029Spjd vnevent_remove(vp, dvp, name, ct); 1963168962Spjd 1964303970Savg obj = zp->z_id; 1965168404Spjd 1966303970Savg /* are there any extended attributes? */ 1967303970Savg error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1968303970Savg &xattr_obj, sizeof (xattr_obj)); 1969303970Savg if (error == 0 && xattr_obj) { 1970303970Savg error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1971303970Savg ASSERT0(error); 1972303970Savg } 1973168962Spjd 1974168404Spjd /* 1975168404Spjd * We may delete the znode now, or we may put it in the unlinked set; 1976168404Spjd * it depends on whether we're the last link, and on whether there are 1977168404Spjd * other holds on the vnode. So we dmu_tx_hold() the right things to 1978168404Spjd * allow for either case. 1979168404Spjd */ 1980168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1981168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1982219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1983219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1984219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 1985168404Spjd 1986303970Savg if (xzp) { 1987219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 1988219089Spjd dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1989168404Spjd } 1990168404Spjd 1991168404Spjd /* charge as an update -- would be nice not to charge at all */ 1992168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1993168404Spjd 1994268464Sdelphij /* 1995294803Smav * Mark this transaction as typically resulting in a net free of space 1996268464Sdelphij */ 1997294803Smav dmu_tx_mark_netfree(tx); 1998268464Sdelphij 1999303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 2000168404Spjd if (error) { 2001168404Spjd dmu_tx_abort(tx); 2002168404Spjd ZFS_EXIT(zfsvfs); 2003168404Spjd return (error); 2004168404Spjd } 2005168404Spjd 2006168404Spjd /* 2007168404Spjd * Remove the directory entry. 2008168404Spjd */ 2009303970Savg error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked); 2010168404Spjd 2011168404Spjd if (error) { 2012168404Spjd dmu_tx_commit(tx); 2013168404Spjd goto out; 2014168404Spjd } 2015168404Spjd 2016219089Spjd if (unlinked) { 2017168404Spjd zfs_unlinked_add(zp, tx); 2018243268Savg vp->v_vflag |= VV_NOSYNC; 2019168962Spjd } 2020168404Spjd 2021185029Spjd txtype = TX_REMOVE; 2022219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 2023168404Spjd 2024168404Spjd dmu_tx_commit(tx); 2025168404Spjdout: 2026185029Spjd 2027219089Spjd if (xzp) 2028303970Savg vrele(ZTOV(xzp)); 2029168962Spjd 2030219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2031219089Spjd zil_commit(zilog, 0); 2032219089Spjd 2033168404Spjd ZFS_EXIT(zfsvfs); 2034168404Spjd return (error); 2035168404Spjd} 2036168404Spjd 2037168404Spjd/* 2038168404Spjd * Create a new directory and insert it into dvp using the name 2039168404Spjd * provided. Return a pointer to the inserted directory. 2040168404Spjd * 2041168404Spjd * IN: dvp - vnode of directory to add subdir to. 2042168404Spjd * dirname - name of new directory. 2043168404Spjd * vap - attributes of new directory. 2044168404Spjd * cr - credentials of caller. 2045185029Spjd * ct - caller context 2046251631Sdelphij * flags - case flags 2047185029Spjd * vsecp - ACL to be set 2048168404Spjd * 2049168404Spjd * OUT: vpp - vnode of created directory. 2050168404Spjd * 2051251631Sdelphij * RETURN: 0 on success, error code on failure. 2052168404Spjd * 2053168404Spjd * Timestamps: 2054168404Spjd * dvp - ctime|mtime updated 2055168404Spjd * vp - ctime|mtime|atime updated 2056168404Spjd */ 2057185029Spjd/*ARGSUSED*/ 2058168404Spjdstatic int 2059303970Savgzfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr) 2060168404Spjd{ 2061168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 2062168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2063185029Spjd zilog_t *zilog; 2064185029Spjd uint64_t txtype; 2065168404Spjd dmu_tx_t *tx; 2066168404Spjd int error; 2067209962Smm ksid_t *ksid; 2068209962Smm uid_t uid; 2069209962Smm gid_t gid = crgetgid(cr); 2070219089Spjd zfs_acl_ids_t acl_ids; 2071209962Smm boolean_t fuid_dirtied; 2072168404Spjd 2073168404Spjd ASSERT(vap->va_type == VDIR); 2074168404Spjd 2075185029Spjd /* 2076185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 2077185029Spjd * make sure file system is at proper version 2078185029Spjd */ 2079185029Spjd 2080209962Smm ksid = crgetsid(cr, KSID_OWNER); 2081209962Smm if (ksid) 2082209962Smm uid = ksid_getid(ksid); 2083209962Smm else 2084209962Smm uid = crgetuid(cr); 2085185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2086303970Savg ((vap->va_mask & AT_XVATTR) || 2087219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2088249195Smm return (SET_ERROR(EINVAL)); 2089185029Spjd 2090168404Spjd ZFS_ENTER(zfsvfs); 2091185029Spjd ZFS_VERIFY_ZP(dzp); 2092185029Spjd zilog = zfsvfs->z_log; 2093168404Spjd 2094219089Spjd if (dzp->z_pflags & ZFS_XATTR) { 2095168404Spjd ZFS_EXIT(zfsvfs); 2096249195Smm return (SET_ERROR(EINVAL)); 2097168404Spjd } 2098168404Spjd 2099185029Spjd if (zfsvfs->z_utf8 && u8_validate(dirname, 2100185029Spjd strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2101185029Spjd ZFS_EXIT(zfsvfs); 2102249195Smm return (SET_ERROR(EILSEQ)); 2103185029Spjd } 2104185029Spjd 2105219089Spjd if (vap->va_mask & AT_XVATTR) { 2106197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2107185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 2108185029Spjd ZFS_EXIT(zfsvfs); 2109185029Spjd return (error); 2110185029Spjd } 2111219089Spjd } 2112185029Spjd 2113219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 2114303970Savg NULL, &acl_ids)) != 0) { 2115219089Spjd ZFS_EXIT(zfsvfs); 2116219089Spjd return (error); 2117219089Spjd } 2118260704Savg 2119168404Spjd /* 2120168404Spjd * First make sure the new directory doesn't exist. 2121219089Spjd * 2122219089Spjd * Existence is checked first to make sure we don't return 2123219089Spjd * EACCES instead of EEXIST which can cause some applications 2124219089Spjd * to fail. 2125168404Spjd */ 2126185029Spjd *vpp = NULL; 2127185029Spjd 2128303970Savg if (error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW)) { 2129219089Spjd zfs_acl_ids_free(&acl_ids); 2130168404Spjd ZFS_EXIT(zfsvfs); 2131168404Spjd return (error); 2132168404Spjd } 2133303970Savg ASSERT3P(zp, ==, NULL); 2134168404Spjd 2135185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 2136219089Spjd zfs_acl_ids_free(&acl_ids); 2137168404Spjd ZFS_EXIT(zfsvfs); 2138168404Spjd return (error); 2139168404Spjd } 2140168404Spjd 2141209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2142211932Smm zfs_acl_ids_free(&acl_ids); 2143209962Smm ZFS_EXIT(zfsvfs); 2144249195Smm return (SET_ERROR(EDQUOT)); 2145209962Smm } 2146209962Smm 2147168404Spjd /* 2148168404Spjd * Add a new entry to the directory. 2149168404Spjd */ 2150303970Savg getnewvnode_reserve(1); 2151168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2152168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 2153168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2154209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 2155209962Smm if (fuid_dirtied) 2156209962Smm zfs_fuid_txhold(zfsvfs, tx); 2157219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2158219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2159219089Spjd acl_ids.z_aclp->z_acl_bytes); 2160219089Spjd } 2161219089Spjd 2162219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2163219089Spjd ZFS_SA_BASE_ATTR_SIZE); 2164219089Spjd 2165303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 2166168404Spjd if (error) { 2167219089Spjd zfs_acl_ids_free(&acl_ids); 2168168404Spjd dmu_tx_abort(tx); 2169260704Savg getnewvnode_drop_reserve(); 2170168404Spjd ZFS_EXIT(zfsvfs); 2171168404Spjd return (error); 2172168404Spjd } 2173168404Spjd 2174168404Spjd /* 2175168404Spjd * Create new node. 2176168404Spjd */ 2177219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2178168404Spjd 2179209962Smm if (fuid_dirtied) 2180209962Smm zfs_fuid_sync(zfsvfs, tx); 2181219089Spjd 2182168404Spjd /* 2183168404Spjd * Now put new name in parent dir. 2184168404Spjd */ 2185303970Savg (void) zfs_link_create(dzp, dirname, zp, tx, ZNEW); 2186168404Spjd 2187168404Spjd *vpp = ZTOV(zp); 2188168404Spjd 2189303970Savg txtype = zfs_log_create_txtype(Z_DIR, NULL, vap); 2190303970Savg zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL, 2191209962Smm acl_ids.z_fuidp, vap); 2192185029Spjd 2193209962Smm zfs_acl_ids_free(&acl_ids); 2194219089Spjd 2195168404Spjd dmu_tx_commit(tx); 2196168404Spjd 2197260704Savg getnewvnode_drop_reserve(); 2198260704Savg 2199219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2200219089Spjd zil_commit(zilog, 0); 2201219089Spjd 2202168404Spjd ZFS_EXIT(zfsvfs); 2203168404Spjd return (0); 2204168404Spjd} 2205168404Spjd 2206168404Spjd/* 2207168404Spjd * Remove a directory subdir entry. If the current working 2208168404Spjd * directory is the same as the subdir to be removed, the 2209168404Spjd * remove will fail. 2210168404Spjd * 2211168404Spjd * IN: dvp - vnode of directory to remove from. 2212168404Spjd * name - name of directory to be removed. 2213168404Spjd * cwd - vnode of current working directory. 2214168404Spjd * cr - credentials of caller. 2215185029Spjd * ct - caller context 2216185029Spjd * flags - case flags 2217168404Spjd * 2218251631Sdelphij * RETURN: 0 on success, error code on failure. 2219168404Spjd * 2220168404Spjd * Timestamps: 2221168404Spjd * dvp - ctime|mtime updated 2222168404Spjd */ 2223185029Spjd/*ARGSUSED*/ 2224168404Spjdstatic int 2225303970Savgzfs_rmdir(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr) 2226168404Spjd{ 2227168404Spjd znode_t *dzp = VTOZ(dvp); 2228303970Savg znode_t *zp = VTOZ(vp); 2229168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2230185029Spjd zilog_t *zilog; 2231168404Spjd dmu_tx_t *tx; 2232168404Spjd int error; 2233168404Spjd 2234168962Spjd ZFS_ENTER(zfsvfs); 2235185029Spjd ZFS_VERIFY_ZP(dzp); 2236303970Savg ZFS_VERIFY_ZP(zp); 2237185029Spjd zilog = zfsvfs->z_log; 2238168404Spjd 2239168404Spjd 2240168404Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2241168404Spjd goto out; 2242168404Spjd } 2243168404Spjd 2244168962Spjd if (vp->v_type != VDIR) { 2245249195Smm error = SET_ERROR(ENOTDIR); 2246168962Spjd goto out; 2247168962Spjd } 2248168962Spjd 2249185029Spjd vnevent_rmdir(vp, dvp, name, ct); 2250168962Spjd 2251168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2252168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2253219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2254168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2255219089Spjd zfs_sa_upgrade_txholds(tx, zp); 2256219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 2257304122Savg dmu_tx_mark_netfree(tx); 2258303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 2259168404Spjd if (error) { 2260168404Spjd dmu_tx_abort(tx); 2261168404Spjd ZFS_EXIT(zfsvfs); 2262168404Spjd return (error); 2263168404Spjd } 2264168404Spjd 2265168404Spjd cache_purge(dvp); 2266168404Spjd 2267303970Savg error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL); 2268168404Spjd 2269185029Spjd if (error == 0) { 2270185029Spjd uint64_t txtype = TX_RMDIR; 2271219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2272185029Spjd } 2273168404Spjd 2274168404Spjd dmu_tx_commit(tx); 2275168404Spjd 2276168404Spjd cache_purge(vp); 2277168404Spjdout: 2278219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2279219089Spjd zil_commit(zilog, 0); 2280219089Spjd 2281168404Spjd ZFS_EXIT(zfsvfs); 2282168404Spjd return (error); 2283168404Spjd} 2284168404Spjd 2285168404Spjd/* 2286168404Spjd * Read as many directory entries as will fit into the provided 2287168404Spjd * buffer from the given directory cursor position (specified in 2288251631Sdelphij * the uio structure). 2289168404Spjd * 2290168404Spjd * IN: vp - vnode of directory to read. 2291168404Spjd * uio - structure supplying read location, range info, 2292168404Spjd * and return buffer. 2293168404Spjd * cr - credentials of caller. 2294185029Spjd * ct - caller context 2295185029Spjd * flags - case flags 2296168404Spjd * 2297168404Spjd * OUT: uio - updated offset and range, buffer filled. 2298168404Spjd * eofp - set to true if end-of-file detected. 2299168404Spjd * 2300251631Sdelphij * RETURN: 0 on success, error code on failure. 2301168404Spjd * 2302168404Spjd * Timestamps: 2303168404Spjd * vp - atime updated 2304168404Spjd * 2305168404Spjd * Note that the low 4 bits of the cookie returned by zap is always zero. 2306168404Spjd * This allows us to use the low range for "special" directory entries: 2307168404Spjd * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2308168404Spjd * we use the offset 2 for the '.zfs' directory. 2309168404Spjd */ 2310168404Spjd/* ARGSUSED */ 2311168404Spjdstatic int 2312168962Spjdzfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) 2313168404Spjd{ 2314168404Spjd znode_t *zp = VTOZ(vp); 2315168404Spjd iovec_t *iovp; 2316185029Spjd edirent_t *eodp; 2317168404Spjd dirent64_t *odp; 2318168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2319168404Spjd objset_t *os; 2320168404Spjd caddr_t outbuf; 2321168404Spjd size_t bufsize; 2322168404Spjd zap_cursor_t zc; 2323168404Spjd zap_attribute_t zap; 2324168404Spjd uint_t bytes_wanted; 2325168404Spjd uint64_t offset; /* must be unsigned; checks for < 1 */ 2326219089Spjd uint64_t parent; 2327168404Spjd int local_eof; 2328168404Spjd int outcount; 2329168404Spjd int error; 2330168404Spjd uint8_t prefetch; 2331185029Spjd boolean_t check_sysattrs; 2332168404Spjd uint8_t type; 2333168962Spjd int ncooks; 2334168962Spjd u_long *cooks = NULL; 2335185029Spjd int flags = 0; 2336168404Spjd 2337168404Spjd ZFS_ENTER(zfsvfs); 2338185029Spjd ZFS_VERIFY_ZP(zp); 2339168404Spjd 2340219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 2341219089Spjd &parent, sizeof (parent))) != 0) { 2342219089Spjd ZFS_EXIT(zfsvfs); 2343219089Spjd return (error); 2344219089Spjd } 2345219089Spjd 2346168404Spjd /* 2347168404Spjd * If we are not given an eof variable, 2348168404Spjd * use a local one. 2349168404Spjd */ 2350168404Spjd if (eofp == NULL) 2351168404Spjd eofp = &local_eof; 2352168404Spjd 2353168404Spjd /* 2354168404Spjd * Check for valid iov_len. 2355168404Spjd */ 2356168404Spjd if (uio->uio_iov->iov_len <= 0) { 2357168404Spjd ZFS_EXIT(zfsvfs); 2358249195Smm return (SET_ERROR(EINVAL)); 2359168404Spjd } 2360168404Spjd 2361168404Spjd /* 2362168404Spjd * Quit if directory has been removed (posix) 2363168404Spjd */ 2364168404Spjd if ((*eofp = zp->z_unlinked) != 0) { 2365168404Spjd ZFS_EXIT(zfsvfs); 2366168404Spjd return (0); 2367168404Spjd } 2368168404Spjd 2369168404Spjd error = 0; 2370168404Spjd os = zfsvfs->z_os; 2371168404Spjd offset = uio->uio_loffset; 2372168404Spjd prefetch = zp->z_zn_prefetch; 2373168404Spjd 2374168404Spjd /* 2375168404Spjd * Initialize the iterator cursor. 2376168404Spjd */ 2377168404Spjd if (offset <= 3) { 2378168404Spjd /* 2379168404Spjd * Start iteration from the beginning of the directory. 2380168404Spjd */ 2381168404Spjd zap_cursor_init(&zc, os, zp->z_id); 2382168404Spjd } else { 2383168404Spjd /* 2384168404Spjd * The offset is a serialized cursor. 2385168404Spjd */ 2386168404Spjd zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2387168404Spjd } 2388168404Spjd 2389168404Spjd /* 2390168404Spjd * Get space to change directory entries into fs independent format. 2391168404Spjd */ 2392168404Spjd iovp = uio->uio_iov; 2393168404Spjd bytes_wanted = iovp->iov_len; 2394168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2395168404Spjd bufsize = bytes_wanted; 2396168404Spjd outbuf = kmem_alloc(bufsize, KM_SLEEP); 2397168404Spjd odp = (struct dirent64 *)outbuf; 2398168404Spjd } else { 2399168404Spjd bufsize = bytes_wanted; 2400247187Smm outbuf = NULL; 2401168404Spjd odp = (struct dirent64 *)iovp->iov_base; 2402168404Spjd } 2403185029Spjd eodp = (struct edirent *)odp; 2404168404Spjd 2405169170Spjd if (ncookies != NULL) { 2406168404Spjd /* 2407168404Spjd * Minimum entry size is dirent size and 1 byte for a file name. 2408168404Spjd */ 2409168962Spjd ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2410219404Spjd cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2411219404Spjd *cookies = cooks; 2412168962Spjd *ncookies = ncooks; 2413168404Spjd } 2414185029Spjd /* 2415185029Spjd * If this VFS supports the system attribute view interface; and 2416185029Spjd * we're looking at an extended attribute directory; and we care 2417185029Spjd * about normalization conflicts on this vfs; then we must check 2418185029Spjd * for normalization conflicts with the sysattr name space. 2419185029Spjd */ 2420185029Spjd#ifdef TODO 2421185029Spjd check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2422185029Spjd (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2423185029Spjd (flags & V_RDDIR_ENTFLAGS); 2424185029Spjd#else 2425185029Spjd check_sysattrs = 0; 2426185029Spjd#endif 2427168404Spjd 2428168404Spjd /* 2429168404Spjd * Transform to file-system independent format 2430168404Spjd */ 2431168404Spjd outcount = 0; 2432168404Spjd while (outcount < bytes_wanted) { 2433168404Spjd ino64_t objnum; 2434168404Spjd ushort_t reclen; 2435219089Spjd off64_t *next = NULL; 2436168404Spjd 2437168404Spjd /* 2438168404Spjd * Special case `.', `..', and `.zfs'. 2439168404Spjd */ 2440168404Spjd if (offset == 0) { 2441168404Spjd (void) strcpy(zap.za_name, "."); 2442185029Spjd zap.za_normalization_conflict = 0; 2443168404Spjd objnum = zp->z_id; 2444169108Spjd type = DT_DIR; 2445168404Spjd } else if (offset == 1) { 2446168404Spjd (void) strcpy(zap.za_name, ".."); 2447185029Spjd zap.za_normalization_conflict = 0; 2448219089Spjd objnum = parent; 2449169108Spjd type = DT_DIR; 2450168404Spjd } else if (offset == 2 && zfs_show_ctldir(zp)) { 2451168404Spjd (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2452185029Spjd zap.za_normalization_conflict = 0; 2453168404Spjd objnum = ZFSCTL_INO_ROOT; 2454169108Spjd type = DT_DIR; 2455168404Spjd } else { 2456168404Spjd /* 2457168404Spjd * Grab next entry. 2458168404Spjd */ 2459168404Spjd if (error = zap_cursor_retrieve(&zc, &zap)) { 2460168404Spjd if ((*eofp = (error == ENOENT)) != 0) 2461168404Spjd break; 2462168404Spjd else 2463168404Spjd goto update; 2464168404Spjd } 2465168404Spjd 2466168404Spjd if (zap.za_integer_length != 8 || 2467168404Spjd zap.za_num_integers != 1) { 2468168404Spjd cmn_err(CE_WARN, "zap_readdir: bad directory " 2469168404Spjd "entry, obj = %lld, offset = %lld\n", 2470168404Spjd (u_longlong_t)zp->z_id, 2471168404Spjd (u_longlong_t)offset); 2472249195Smm error = SET_ERROR(ENXIO); 2473168404Spjd goto update; 2474168404Spjd } 2475168404Spjd 2476168404Spjd objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2477168404Spjd /* 2478168404Spjd * MacOS X can extract the object type here such as: 2479168404Spjd * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2480168404Spjd */ 2481168404Spjd type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2482185029Spjd 2483185029Spjd if (check_sysattrs && !zap.za_normalization_conflict) { 2484185029Spjd#ifdef TODO 2485185029Spjd zap.za_normalization_conflict = 2486185029Spjd xattr_sysattr_casechk(zap.za_name); 2487185029Spjd#else 2488185029Spjd panic("%s:%u: TODO", __func__, __LINE__); 2489185029Spjd#endif 2490185029Spjd } 2491168404Spjd } 2492168404Spjd 2493211932Smm if (flags & V_RDDIR_ACCFILTER) { 2494211932Smm /* 2495211932Smm * If we have no access at all, don't include 2496211932Smm * this entry in the returned information 2497211932Smm */ 2498211932Smm znode_t *ezp; 2499211932Smm if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2500211932Smm goto skip_entry; 2501211932Smm if (!zfs_has_access(ezp, cr)) { 2502303970Savg vrele(ZTOV(ezp)); 2503211932Smm goto skip_entry; 2504211932Smm } 2505303970Savg vrele(ZTOV(ezp)); 2506211932Smm } 2507211932Smm 2508185029Spjd if (flags & V_RDDIR_ENTFLAGS) 2509185029Spjd reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2510185029Spjd else 2511185029Spjd reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2512185029Spjd 2513168404Spjd /* 2514168404Spjd * Will this entry fit in the buffer? 2515168404Spjd */ 2516168404Spjd if (outcount + reclen > bufsize) { 2517168404Spjd /* 2518168404Spjd * Did we manage to fit anything in the buffer? 2519168404Spjd */ 2520168404Spjd if (!outcount) { 2521249195Smm error = SET_ERROR(EINVAL); 2522168404Spjd goto update; 2523168404Spjd } 2524168404Spjd break; 2525168404Spjd } 2526185029Spjd if (flags & V_RDDIR_ENTFLAGS) { 2527185029Spjd /* 2528185029Spjd * Add extended flag entry: 2529185029Spjd */ 2530185029Spjd eodp->ed_ino = objnum; 2531185029Spjd eodp->ed_reclen = reclen; 2532185029Spjd /* NOTE: ed_off is the offset for the *next* entry */ 2533185029Spjd next = &(eodp->ed_off); 2534185029Spjd eodp->ed_eflags = zap.za_normalization_conflict ? 2535185029Spjd ED_CASE_CONFLICT : 0; 2536185029Spjd (void) strncpy(eodp->ed_name, zap.za_name, 2537185029Spjd EDIRENT_NAMELEN(reclen)); 2538185029Spjd eodp = (edirent_t *)((intptr_t)eodp + reclen); 2539185029Spjd } else { 2540185029Spjd /* 2541185029Spjd * Add normal entry: 2542185029Spjd */ 2543185029Spjd odp->d_ino = objnum; 2544185029Spjd odp->d_reclen = reclen; 2545185029Spjd odp->d_namlen = strlen(zap.za_name); 2546185029Spjd (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2547185029Spjd odp->d_type = type; 2548341074Smarkj dirent_terminate(odp); 2549185029Spjd odp = (dirent64_t *)((intptr_t)odp + reclen); 2550185029Spjd } 2551168404Spjd outcount += reclen; 2552168404Spjd 2553168404Spjd ASSERT(outcount <= bufsize); 2554168404Spjd 2555168404Spjd /* Prefetch znode */ 2556168404Spjd if (prefetch) 2557286705Smav dmu_prefetch(os, objnum, 0, 0, 0, 2558286705Smav ZIO_PRIORITY_SYNC_READ); 2559168404Spjd 2560211932Smm skip_entry: 2561168404Spjd /* 2562168404Spjd * Move to the next entry, fill in the previous offset. 2563168404Spjd */ 2564168404Spjd if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2565168404Spjd zap_cursor_advance(&zc); 2566168404Spjd offset = zap_cursor_serialize(&zc); 2567168404Spjd } else { 2568168404Spjd offset += 1; 2569168404Spjd } 2570219404Spjd 2571219404Spjd if (cooks != NULL) { 2572219404Spjd *cooks++ = offset; 2573219404Spjd ncooks--; 2574219404Spjd KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2575219404Spjd } 2576168404Spjd } 2577168404Spjd zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2578168404Spjd 2579168404Spjd /* Subtract unused cookies */ 2580168962Spjd if (ncookies != NULL) 2581168962Spjd *ncookies -= ncooks; 2582168404Spjd 2583168404Spjd if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2584168404Spjd iovp->iov_base += outcount; 2585168404Spjd iovp->iov_len -= outcount; 2586168404Spjd uio->uio_resid -= outcount; 2587168404Spjd } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2588168404Spjd /* 2589168404Spjd * Reset the pointer. 2590168404Spjd */ 2591168404Spjd offset = uio->uio_loffset; 2592168404Spjd } 2593168404Spjd 2594168404Spjdupdate: 2595168404Spjd zap_cursor_fini(&zc); 2596168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2597168404Spjd kmem_free(outbuf, bufsize); 2598168404Spjd 2599168404Spjd if (error == ENOENT) 2600168404Spjd error = 0; 2601168404Spjd 2602168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2603168404Spjd 2604168404Spjd uio->uio_loffset = offset; 2605168404Spjd ZFS_EXIT(zfsvfs); 2606169107Spjd if (error != 0 && cookies != NULL) { 2607168962Spjd free(*cookies, M_TEMP); 2608168962Spjd *cookies = NULL; 2609168962Spjd *ncookies = 0; 2610168404Spjd } 2611168404Spjd return (error); 2612168404Spjd} 2613168404Spjd 2614185029Spjdulong_t zfs_fsync_sync_cnt = 4; 2615185029Spjd 2616168404Spjdstatic int 2617185029Spjdzfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2618168404Spjd{ 2619168962Spjd znode_t *zp = VTOZ(vp); 2620168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2621168404Spjd 2622185029Spjd (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2623185029Spjd 2624219089Spjd if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 2625219089Spjd ZFS_ENTER(zfsvfs); 2626219089Spjd ZFS_VERIFY_ZP(zp); 2627219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 2628219089Spjd ZFS_EXIT(zfsvfs); 2629219089Spjd } 2630168404Spjd return (0); 2631168404Spjd} 2632168404Spjd 2633185029Spjd 2634168404Spjd/* 2635168404Spjd * Get the requested file attributes and place them in the provided 2636168404Spjd * vattr structure. 2637168404Spjd * 2638168404Spjd * IN: vp - vnode of file. 2639168404Spjd * vap - va_mask identifies requested attributes. 2640185029Spjd * If AT_XVATTR set, then optional attrs are requested 2641185029Spjd * flags - ATTR_NOACLCHECK (CIFS server context) 2642168404Spjd * cr - credentials of caller. 2643185029Spjd * ct - caller context 2644168404Spjd * 2645168404Spjd * OUT: vap - attribute values. 2646168404Spjd * 2647251631Sdelphij * RETURN: 0 (always succeeds). 2648168404Spjd */ 2649168404Spjd/* ARGSUSED */ 2650168404Spjdstatic int 2651185029Spjdzfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2652185029Spjd caller_context_t *ct) 2653168404Spjd{ 2654168962Spjd znode_t *zp = VTOZ(vp); 2655168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2656185029Spjd int error = 0; 2657168962Spjd uint32_t blksize; 2658168962Spjd u_longlong_t nblocks; 2659185029Spjd uint64_t links; 2660224251Sdelphij uint64_t mtime[2], ctime[2], crtime[2], rdev; 2661185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2662185029Spjd xoptattr_t *xoap = NULL; 2663185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2664224251Sdelphij sa_bulk_attr_t bulk[4]; 2665219089Spjd int count = 0; 2666168404Spjd 2667168404Spjd ZFS_ENTER(zfsvfs); 2668185029Spjd ZFS_VERIFY_ZP(zp); 2669168404Spjd 2670219089Spjd zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2671219089Spjd 2672219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 2673219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 2674243807Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 2675224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2676224251Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 2677224251Sdelphij &rdev, 8); 2678219089Spjd 2679219089Spjd if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 2680219089Spjd ZFS_EXIT(zfsvfs); 2681219089Spjd return (error); 2682219089Spjd } 2683219089Spjd 2684168404Spjd /* 2685185029Spjd * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2686185029Spjd * Also, if we are the owner don't bother, since owner should 2687185029Spjd * always be allowed to read basic attributes of file. 2688185029Spjd */ 2689219089Spjd if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2690219089Spjd (vap->va_uid != crgetuid(cr))) { 2691185029Spjd if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2692185029Spjd skipaclchk, cr)) { 2693185029Spjd ZFS_EXIT(zfsvfs); 2694185029Spjd return (error); 2695185029Spjd } 2696185029Spjd } 2697185029Spjd 2698185029Spjd /* 2699168404Spjd * Return all attributes. It's cheaper to provide the answer 2700168404Spjd * than to determine whether we were asked the question. 2701168404Spjd */ 2702168404Spjd 2703219089Spjd vap->va_type = IFTOVT(zp->z_mode); 2704219089Spjd vap->va_mode = zp->z_mode & ~S_IFMT; 2705277300Ssmh#ifdef illumos 2706224252Sdelphij vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2707224252Sdelphij#else 2708224252Sdelphij vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 2709224252Sdelphij#endif 2710168404Spjd vap->va_nodeid = zp->z_id; 2711185029Spjd if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2712219089Spjd links = zp->z_links + 1; 2713185029Spjd else 2714219089Spjd links = zp->z_links; 2715229425Sdim vap->va_nlink = MIN(links, LINK_MAX); /* nlink_t limit! */ 2716219089Spjd vap->va_size = zp->z_size; 2717277300Ssmh#ifdef illumos 2718224252Sdelphij vap->va_rdev = vp->v_rdev; 2719224252Sdelphij#else 2720224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2721224251Sdelphij vap->va_rdev = zfs_cmpldev(rdev); 2722224252Sdelphij#endif 2723168404Spjd vap->va_seq = zp->z_seq; 2724168404Spjd vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2725272467Saraujo vap->va_filerev = zp->z_seq; 2726168404Spjd 2727185029Spjd /* 2728185029Spjd * Add in any requested optional attributes and the create time. 2729185029Spjd * Also set the corresponding bits in the returned attribute bitmap. 2730185029Spjd */ 2731185029Spjd if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2732185029Spjd if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2733185029Spjd xoap->xoa_archive = 2734219089Spjd ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2735185029Spjd XVA_SET_RTN(xvap, XAT_ARCHIVE); 2736185029Spjd } 2737185029Spjd 2738185029Spjd if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2739185029Spjd xoap->xoa_readonly = 2740219089Spjd ((zp->z_pflags & ZFS_READONLY) != 0); 2741185029Spjd XVA_SET_RTN(xvap, XAT_READONLY); 2742185029Spjd } 2743185029Spjd 2744185029Spjd if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2745185029Spjd xoap->xoa_system = 2746219089Spjd ((zp->z_pflags & ZFS_SYSTEM) != 0); 2747185029Spjd XVA_SET_RTN(xvap, XAT_SYSTEM); 2748185029Spjd } 2749185029Spjd 2750185029Spjd if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2751185029Spjd xoap->xoa_hidden = 2752219089Spjd ((zp->z_pflags & ZFS_HIDDEN) != 0); 2753185029Spjd XVA_SET_RTN(xvap, XAT_HIDDEN); 2754185029Spjd } 2755185029Spjd 2756185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2757185029Spjd xoap->xoa_nounlink = 2758219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2759185029Spjd XVA_SET_RTN(xvap, XAT_NOUNLINK); 2760185029Spjd } 2761185029Spjd 2762185029Spjd if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2763185029Spjd xoap->xoa_immutable = 2764219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2765185029Spjd XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2766185029Spjd } 2767185029Spjd 2768185029Spjd if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2769185029Spjd xoap->xoa_appendonly = 2770219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2771185029Spjd XVA_SET_RTN(xvap, XAT_APPENDONLY); 2772185029Spjd } 2773185029Spjd 2774185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2775185029Spjd xoap->xoa_nodump = 2776219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0); 2777185029Spjd XVA_SET_RTN(xvap, XAT_NODUMP); 2778185029Spjd } 2779185029Spjd 2780185029Spjd if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2781185029Spjd xoap->xoa_opaque = 2782219089Spjd ((zp->z_pflags & ZFS_OPAQUE) != 0); 2783185029Spjd XVA_SET_RTN(xvap, XAT_OPAQUE); 2784185029Spjd } 2785185029Spjd 2786185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2787185029Spjd xoap->xoa_av_quarantined = 2788219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2789185029Spjd XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2790185029Spjd } 2791185029Spjd 2792185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2793185029Spjd xoap->xoa_av_modified = 2794219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2795185029Spjd XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2796185029Spjd } 2797185029Spjd 2798185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2799219089Spjd vp->v_type == VREG) { 2800219089Spjd zfs_sa_get_scanstamp(zp, xvap); 2801185029Spjd } 2802185029Spjd 2803219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2804219089Spjd xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 2805219089Spjd XVA_SET_RTN(xvap, XAT_REPARSE); 2806219089Spjd } 2807219089Spjd if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 2808219089Spjd xoap->xoa_generation = zp->z_gen; 2809219089Spjd XVA_SET_RTN(xvap, XAT_GEN); 2810219089Spjd } 2811219089Spjd 2812219089Spjd if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2813219089Spjd xoap->xoa_offline = 2814219089Spjd ((zp->z_pflags & ZFS_OFFLINE) != 0); 2815219089Spjd XVA_SET_RTN(xvap, XAT_OFFLINE); 2816219089Spjd } 2817219089Spjd 2818219089Spjd if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2819219089Spjd xoap->xoa_sparse = 2820219089Spjd ((zp->z_pflags & ZFS_SPARSE) != 0); 2821219089Spjd XVA_SET_RTN(xvap, XAT_SPARSE); 2822219089Spjd } 2823185029Spjd } 2824185029Spjd 2825219089Spjd ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 2826219089Spjd ZFS_TIME_DECODE(&vap->va_mtime, mtime); 2827219089Spjd ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2828219089Spjd ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 2829168404Spjd 2830168404Spjd 2831219089Spjd sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 2832168404Spjd vap->va_blksize = blksize; 2833168404Spjd vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2834168404Spjd 2835168404Spjd if (zp->z_blksz == 0) { 2836168404Spjd /* 2837168404Spjd * Block size hasn't been set; suggest maximal I/O transfers. 2838168404Spjd */ 2839168404Spjd vap->va_blksize = zfsvfs->z_max_blksz; 2840168404Spjd } 2841168404Spjd 2842168404Spjd ZFS_EXIT(zfsvfs); 2843168404Spjd return (0); 2844168404Spjd} 2845168404Spjd 2846168404Spjd/* 2847168404Spjd * Set the file attributes to the values contained in the 2848168404Spjd * vattr structure. 2849168404Spjd * 2850168404Spjd * IN: vp - vnode of file to be modified. 2851168404Spjd * vap - new attribute values. 2852185029Spjd * If AT_XVATTR set, then optional attrs are being set 2853168404Spjd * flags - ATTR_UTIME set if non-default time values provided. 2854185029Spjd * - ATTR_NOACLCHECK (CIFS context only). 2855168404Spjd * cr - credentials of caller. 2856185029Spjd * ct - caller context 2857168404Spjd * 2858251631Sdelphij * RETURN: 0 on success, error code on failure. 2859168404Spjd * 2860168404Spjd * Timestamps: 2861168404Spjd * vp - ctime updated, mtime updated if size changed. 2862168404Spjd */ 2863168404Spjd/* ARGSUSED */ 2864168404Spjdstatic int 2865168962Spjdzfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2866251631Sdelphij caller_context_t *ct) 2867168404Spjd{ 2868185029Spjd znode_t *zp = VTOZ(vp); 2869168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2870185029Spjd zilog_t *zilog; 2871168404Spjd dmu_tx_t *tx; 2872168404Spjd vattr_t oldva; 2873209962Smm xvattr_t tmpxvattr; 2874168962Spjd uint_t mask = vap->va_mask; 2875247187Smm uint_t saved_mask = 0; 2876197831Spjd uint64_t saved_mode; 2877168404Spjd int trim_mask = 0; 2878168404Spjd uint64_t new_mode; 2879209962Smm uint64_t new_uid, new_gid; 2880219089Spjd uint64_t xattr_obj; 2881219089Spjd uint64_t mtime[2], ctime[2]; 2882168404Spjd znode_t *attrzp; 2883168404Spjd int need_policy = FALSE; 2884219089Spjd int err, err2; 2885185029Spjd zfs_fuid_info_t *fuidp = NULL; 2886185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2887185029Spjd xoptattr_t *xoap; 2888219089Spjd zfs_acl_t *aclp; 2889185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2890219089Spjd boolean_t fuid_dirtied = B_FALSE; 2891219089Spjd sa_bulk_attr_t bulk[7], xattr_bulk[7]; 2892219089Spjd int count = 0, xattr_count = 0; 2893168404Spjd 2894168404Spjd if (mask == 0) 2895168404Spjd return (0); 2896168404Spjd 2897168962Spjd if (mask & AT_NOSET) 2898249195Smm return (SET_ERROR(EINVAL)); 2899168962Spjd 2900185029Spjd ZFS_ENTER(zfsvfs); 2901185029Spjd ZFS_VERIFY_ZP(zp); 2902185029Spjd 2903185029Spjd zilog = zfsvfs->z_log; 2904185029Spjd 2905185029Spjd /* 2906185029Spjd * Make sure that if we have ephemeral uid/gid or xvattr specified 2907185029Spjd * that file system is at proper version level 2908185029Spjd */ 2909185029Spjd 2910185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2911185029Spjd (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2912185029Spjd ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 2913185029Spjd (mask & AT_XVATTR))) { 2914185029Spjd ZFS_EXIT(zfsvfs); 2915249195Smm return (SET_ERROR(EINVAL)); 2916185029Spjd } 2917185029Spjd 2918185029Spjd if (mask & AT_SIZE && vp->v_type == VDIR) { 2919185029Spjd ZFS_EXIT(zfsvfs); 2920249195Smm return (SET_ERROR(EISDIR)); 2921185029Spjd } 2922168404Spjd 2923185029Spjd if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 2924185029Spjd ZFS_EXIT(zfsvfs); 2925249195Smm return (SET_ERROR(EINVAL)); 2926185029Spjd } 2927168404Spjd 2928185029Spjd /* 2929185029Spjd * If this is an xvattr_t, then get a pointer to the structure of 2930185029Spjd * optional attributes. If this is NULL, then we have a vattr_t. 2931185029Spjd */ 2932185029Spjd xoap = xva_getxoptattr(xvap); 2933168404Spjd 2934209962Smm xva_init(&tmpxvattr); 2935209962Smm 2936185029Spjd /* 2937185029Spjd * Immutable files can only alter immutable bit and atime 2938185029Spjd */ 2939219089Spjd if ((zp->z_pflags & ZFS_IMMUTABLE) && 2940185029Spjd ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 2941185029Spjd ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 2942185029Spjd ZFS_EXIT(zfsvfs); 2943249195Smm return (SET_ERROR(EPERM)); 2944185029Spjd } 2945185029Spjd 2946321579Smav /* 2947321579Smav * Note: ZFS_READONLY is handled in zfs_zaccess_common. 2948321579Smav */ 2949185029Spjd 2950185029Spjd /* 2951185029Spjd * Verify timestamps doesn't overflow 32 bits. 2952185029Spjd * ZFS can handle large timestamps, but 32bit syscalls can't 2953185029Spjd * handle times greater than 2039. This check should be removed 2954185029Spjd * once large timestamps are fully supported. 2955185029Spjd */ 2956185029Spjd if (mask & (AT_ATIME | AT_MTIME)) { 2957185029Spjd if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 2958185029Spjd ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 2959185029Spjd ZFS_EXIT(zfsvfs); 2960249195Smm return (SET_ERROR(EOVERFLOW)); 2961185029Spjd } 2962185029Spjd } 2963316391Sasomers if (xoap && (mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME) && 2964316391Sasomers TIMESPEC_OVERFLOW(&vap->va_birthtime)) { 2965316391Sasomers ZFS_EXIT(zfsvfs); 2966316391Sasomers return (SET_ERROR(EOVERFLOW)); 2967316391Sasomers } 2968185029Spjd 2969168404Spjd attrzp = NULL; 2970219089Spjd aclp = NULL; 2971168404Spjd 2972211932Smm /* Can this be moved to before the top label? */ 2973168404Spjd if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2974168404Spjd ZFS_EXIT(zfsvfs); 2975249195Smm return (SET_ERROR(EROFS)); 2976168404Spjd } 2977168404Spjd 2978168404Spjd /* 2979168404Spjd * First validate permissions 2980168404Spjd */ 2981168404Spjd 2982168404Spjd if (mask & AT_SIZE) { 2983168404Spjd /* 2984168404Spjd * XXX - Note, we are not providing any open 2985168404Spjd * mode flags here (like FNDELAY), so we may 2986168404Spjd * block if there are locks present... this 2987168404Spjd * should be addressed in openat(). 2988168404Spjd */ 2989185029Spjd /* XXX - would it be OK to generate a log record here? */ 2990185029Spjd err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 2991168404Spjd if (err) { 2992168404Spjd ZFS_EXIT(zfsvfs); 2993168404Spjd return (err); 2994168404Spjd } 2995168404Spjd } 2996168404Spjd 2997185029Spjd if (mask & (AT_ATIME|AT_MTIME) || 2998185029Spjd ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2999185029Spjd XVA_ISSET_REQ(xvap, XAT_READONLY) || 3000185029Spjd XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 3001219089Spjd XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 3002219089Spjd XVA_ISSET_REQ(xvap, XAT_SPARSE) || 3003185029Spjd XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 3004219089Spjd XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 3005185029Spjd need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 3006185029Spjd skipaclchk, cr); 3007219089Spjd } 3008168404Spjd 3009168404Spjd if (mask & (AT_UID|AT_GID)) { 3010168404Spjd int idmask = (mask & (AT_UID|AT_GID)); 3011168404Spjd int take_owner; 3012168404Spjd int take_group; 3013168404Spjd 3014168404Spjd /* 3015168404Spjd * NOTE: even if a new mode is being set, 3016168404Spjd * we may clear S_ISUID/S_ISGID bits. 3017168404Spjd */ 3018168404Spjd 3019168404Spjd if (!(mask & AT_MODE)) 3020219089Spjd vap->va_mode = zp->z_mode; 3021168404Spjd 3022168404Spjd /* 3023168404Spjd * Take ownership or chgrp to group we are a member of 3024168404Spjd */ 3025168404Spjd 3026168404Spjd take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 3027185029Spjd take_group = (mask & AT_GID) && 3028185029Spjd zfs_groupmember(zfsvfs, vap->va_gid, cr); 3029168404Spjd 3030168404Spjd /* 3031168404Spjd * If both AT_UID and AT_GID are set then take_owner and 3032168404Spjd * take_group must both be set in order to allow taking 3033168404Spjd * ownership. 3034168404Spjd * 3035168404Spjd * Otherwise, send the check through secpolicy_vnode_setattr() 3036168404Spjd * 3037168404Spjd */ 3038168404Spjd 3039168404Spjd if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 3040168404Spjd ((idmask == AT_UID) && take_owner) || 3041168404Spjd ((idmask == AT_GID) && take_group)) { 3042185029Spjd if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 3043185029Spjd skipaclchk, cr) == 0) { 3044168404Spjd /* 3045168404Spjd * Remove setuid/setgid for non-privileged users 3046168404Spjd */ 3047185029Spjd secpolicy_setid_clear(vap, vp, cr); 3048168404Spjd trim_mask = (mask & (AT_UID|AT_GID)); 3049168404Spjd } else { 3050168404Spjd need_policy = TRUE; 3051168404Spjd } 3052168404Spjd } else { 3053168404Spjd need_policy = TRUE; 3054168404Spjd } 3055168404Spjd } 3056168404Spjd 3057219089Spjd oldva.va_mode = zp->z_mode; 3058185029Spjd zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 3059185029Spjd if (mask & AT_XVATTR) { 3060209962Smm /* 3061209962Smm * Update xvattr mask to include only those attributes 3062209962Smm * that are actually changing. 3063209962Smm * 3064209962Smm * the bits will be restored prior to actually setting 3065209962Smm * the attributes so the caller thinks they were set. 3066209962Smm */ 3067209962Smm if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3068209962Smm if (xoap->xoa_appendonly != 3069219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 3070209962Smm need_policy = TRUE; 3071209962Smm } else { 3072209962Smm XVA_CLR_REQ(xvap, XAT_APPENDONLY); 3073209962Smm XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 3074209962Smm } 3075209962Smm } 3076209962Smm 3077209962Smm if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3078209962Smm if (xoap->xoa_nounlink != 3079219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 3080209962Smm need_policy = TRUE; 3081209962Smm } else { 3082209962Smm XVA_CLR_REQ(xvap, XAT_NOUNLINK); 3083209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 3084209962Smm } 3085209962Smm } 3086209962Smm 3087209962Smm if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3088209962Smm if (xoap->xoa_immutable != 3089219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 3090209962Smm need_policy = TRUE; 3091209962Smm } else { 3092209962Smm XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 3093209962Smm XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 3094209962Smm } 3095209962Smm } 3096209962Smm 3097209962Smm if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3098209962Smm if (xoap->xoa_nodump != 3099219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0)) { 3100209962Smm need_policy = TRUE; 3101209962Smm } else { 3102209962Smm XVA_CLR_REQ(xvap, XAT_NODUMP); 3103209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 3104209962Smm } 3105209962Smm } 3106209962Smm 3107209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3108209962Smm if (xoap->xoa_av_modified != 3109219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 3110209962Smm need_policy = TRUE; 3111209962Smm } else { 3112209962Smm XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 3113209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 3114209962Smm } 3115209962Smm } 3116209962Smm 3117209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3118209962Smm if ((vp->v_type != VREG && 3119209962Smm xoap->xoa_av_quarantined) || 3120209962Smm xoap->xoa_av_quarantined != 3121219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 3122209962Smm need_policy = TRUE; 3123209962Smm } else { 3124209962Smm XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 3125209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 3126209962Smm } 3127209962Smm } 3128209962Smm 3129219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3130219089Spjd ZFS_EXIT(zfsvfs); 3131249195Smm return (SET_ERROR(EPERM)); 3132219089Spjd } 3133219089Spjd 3134209962Smm if (need_policy == FALSE && 3135209962Smm (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3136209962Smm XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3137185029Spjd need_policy = TRUE; 3138185029Spjd } 3139185029Spjd } 3140185029Spjd 3141168404Spjd if (mask & AT_MODE) { 3142185029Spjd if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3143168962Spjd err = secpolicy_setid_setsticky_clear(vp, vap, 3144168962Spjd &oldva, cr); 3145168962Spjd if (err) { 3146168962Spjd ZFS_EXIT(zfsvfs); 3147168962Spjd return (err); 3148168962Spjd } 3149168404Spjd trim_mask |= AT_MODE; 3150168404Spjd } else { 3151168404Spjd need_policy = TRUE; 3152168404Spjd } 3153168404Spjd } 3154168404Spjd 3155168404Spjd if (need_policy) { 3156168404Spjd /* 3157168404Spjd * If trim_mask is set then take ownership 3158168404Spjd * has been granted or write_acl is present and user 3159168404Spjd * has the ability to modify mode. In that case remove 3160168404Spjd * UID|GID and or MODE from mask so that 3161168404Spjd * secpolicy_vnode_setattr() doesn't revoke it. 3162168404Spjd */ 3163168404Spjd 3164168404Spjd if (trim_mask) { 3165168404Spjd saved_mask = vap->va_mask; 3166168404Spjd vap->va_mask &= ~trim_mask; 3167197831Spjd if (trim_mask & AT_MODE) { 3168197831Spjd /* 3169197831Spjd * Save the mode, as secpolicy_vnode_setattr() 3170197831Spjd * will overwrite it with ova.va_mode. 3171197831Spjd */ 3172197831Spjd saved_mode = vap->va_mode; 3173197831Spjd } 3174168404Spjd } 3175168404Spjd err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3176185029Spjd (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3177168404Spjd if (err) { 3178168404Spjd ZFS_EXIT(zfsvfs); 3179168404Spjd return (err); 3180168404Spjd } 3181168404Spjd 3182197831Spjd if (trim_mask) { 3183168404Spjd vap->va_mask |= saved_mask; 3184197831Spjd if (trim_mask & AT_MODE) { 3185197831Spjd /* 3186197831Spjd * Recover the mode after 3187197831Spjd * secpolicy_vnode_setattr(). 3188197831Spjd */ 3189197831Spjd vap->va_mode = saved_mode; 3190197831Spjd } 3191197831Spjd } 3192168404Spjd } 3193168404Spjd 3194168404Spjd /* 3195168404Spjd * secpolicy_vnode_setattr, or take ownership may have 3196168404Spjd * changed va_mask 3197168404Spjd */ 3198168404Spjd mask = vap->va_mask; 3199168404Spjd 3200219089Spjd if ((mask & (AT_UID | AT_GID))) { 3201219089Spjd err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 3202219089Spjd &xattr_obj, sizeof (xattr_obj)); 3203168404Spjd 3204219089Spjd if (err == 0 && xattr_obj) { 3205219089Spjd err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 3206306818Savg if (err == 0) { 3207306818Savg err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE); 3208306818Savg if (err != 0) 3209306818Savg vrele(ZTOV(attrzp)); 3210306818Savg } 3211209962Smm if (err) 3212219089Spjd goto out2; 3213168404Spjd } 3214209962Smm if (mask & AT_UID) { 3215209962Smm new_uid = zfs_fuid_create(zfsvfs, 3216209962Smm (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3217219089Spjd if (new_uid != zp->z_uid && 3218219089Spjd zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 3219219089Spjd if (attrzp) 3220306818Savg vput(ZTOV(attrzp)); 3221249195Smm err = SET_ERROR(EDQUOT); 3222219089Spjd goto out2; 3223209962Smm } 3224209962Smm } 3225209962Smm 3226209962Smm if (mask & AT_GID) { 3227209962Smm new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3228209962Smm cr, ZFS_GROUP, &fuidp); 3229219089Spjd if (new_gid != zp->z_gid && 3230219089Spjd zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 3231219089Spjd if (attrzp) 3232306818Savg vput(ZTOV(attrzp)); 3233249195Smm err = SET_ERROR(EDQUOT); 3234219089Spjd goto out2; 3235209962Smm } 3236209962Smm } 3237219089Spjd } 3238219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 3239219089Spjd 3240219089Spjd if (mask & AT_MODE) { 3241219089Spjd uint64_t pmode = zp->z_mode; 3242219089Spjd uint64_t acl_obj; 3243219089Spjd new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3244219089Spjd 3245243560Smm if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 3246243560Smm !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3247249195Smm err = SET_ERROR(EPERM); 3248243560Smm goto out; 3249243560Smm } 3250243560Smm 3251224174Smm if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3252224174Smm goto out; 3253219089Spjd 3254219089Spjd if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 3255219089Spjd /* 3256219089Spjd * Are we upgrading ACL from old V0 format 3257219089Spjd * to V1 format? 3258219089Spjd */ 3259219089Spjd if (zfsvfs->z_version >= ZPL_VERSION_FUID && 3260219089Spjd zfs_znode_acl_version(zp) == 3261219089Spjd ZFS_ACL_VERSION_INITIAL) { 3262219089Spjd dmu_tx_hold_free(tx, acl_obj, 0, 3263219089Spjd DMU_OBJECT_END); 3264219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3265219089Spjd 0, aclp->z_acl_bytes); 3266209962Smm } else { 3267219089Spjd dmu_tx_hold_write(tx, acl_obj, 0, 3268219089Spjd aclp->z_acl_bytes); 3269209962Smm } 3270219089Spjd } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3271219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3272219089Spjd 0, aclp->z_acl_bytes); 3273209962Smm } 3274219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3275219089Spjd } else { 3276219089Spjd if ((mask & AT_XVATTR) && 3277219089Spjd XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3278219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3279219089Spjd else 3280219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3281168404Spjd } 3282168404Spjd 3283219089Spjd if (attrzp) { 3284219089Spjd dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3285219089Spjd } 3286219089Spjd 3287219089Spjd fuid_dirtied = zfsvfs->z_fuid_dirty; 3288219089Spjd if (fuid_dirtied) 3289219089Spjd zfs_fuid_txhold(zfsvfs, tx); 3290219089Spjd 3291219089Spjd zfs_sa_upgrade_txholds(tx, zp); 3292219089Spjd 3293258720Savg err = dmu_tx_assign(tx, TXG_WAIT); 3294258720Savg if (err) 3295209962Smm goto out; 3296168404Spjd 3297219089Spjd count = 0; 3298168404Spjd /* 3299168404Spjd * Set each attribute requested. 3300168404Spjd * We group settings according to the locks they need to acquire. 3301168404Spjd * 3302168404Spjd * Note: you cannot set ctime directly, although it will be 3303168404Spjd * updated as a side-effect of calling this function. 3304168404Spjd */ 3305168404Spjd 3306219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3307219089Spjd mutex_enter(&zp->z_acl_lock); 3308168404Spjd 3309219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3310219089Spjd &zp->z_pflags, sizeof (zp->z_pflags)); 3311219089Spjd 3312219089Spjd if (attrzp) { 3313219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3314219089Spjd mutex_enter(&attrzp->z_acl_lock); 3315219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3316219089Spjd SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3317219089Spjd sizeof (attrzp->z_pflags)); 3318219089Spjd } 3319219089Spjd 3320219089Spjd if (mask & (AT_UID|AT_GID)) { 3321219089Spjd 3322219089Spjd if (mask & AT_UID) { 3323219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 3324219089Spjd &new_uid, sizeof (new_uid)); 3325219089Spjd zp->z_uid = new_uid; 3326219089Spjd if (attrzp) { 3327219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3328219089Spjd SA_ZPL_UID(zfsvfs), NULL, &new_uid, 3329219089Spjd sizeof (new_uid)); 3330219089Spjd attrzp->z_uid = new_uid; 3331219089Spjd } 3332219089Spjd } 3333219089Spjd 3334219089Spjd if (mask & AT_GID) { 3335219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 3336219089Spjd NULL, &new_gid, sizeof (new_gid)); 3337219089Spjd zp->z_gid = new_gid; 3338219089Spjd if (attrzp) { 3339219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3340219089Spjd SA_ZPL_GID(zfsvfs), NULL, &new_gid, 3341219089Spjd sizeof (new_gid)); 3342219089Spjd attrzp->z_gid = new_gid; 3343219089Spjd } 3344219089Spjd } 3345219089Spjd if (!(mask & AT_MODE)) { 3346219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 3347219089Spjd NULL, &new_mode, sizeof (new_mode)); 3348219089Spjd new_mode = zp->z_mode; 3349219089Spjd } 3350219089Spjd err = zfs_acl_chown_setattr(zp); 3351219089Spjd ASSERT(err == 0); 3352219089Spjd if (attrzp) { 3353219089Spjd err = zfs_acl_chown_setattr(attrzp); 3354219089Spjd ASSERT(err == 0); 3355219089Spjd } 3356219089Spjd } 3357219089Spjd 3358168404Spjd if (mask & AT_MODE) { 3359219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 3360219089Spjd &new_mode, sizeof (new_mode)); 3361219089Spjd zp->z_mode = new_mode; 3362219089Spjd ASSERT3U((uintptr_t)aclp, !=, 0); 3363209962Smm err = zfs_aclset_common(zp, aclp, cr, tx); 3364240415Smm ASSERT0(err); 3365219089Spjd if (zp->z_acl_cached) 3366219089Spjd zfs_acl_free(zp->z_acl_cached); 3367211932Smm zp->z_acl_cached = aclp; 3368211932Smm aclp = NULL; 3369168404Spjd } 3370168404Spjd 3371168404Spjd 3372219089Spjd if (mask & AT_ATIME) { 3373219089Spjd ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 3374219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 3375219089Spjd &zp->z_atime, sizeof (zp->z_atime)); 3376168404Spjd } 3377168404Spjd 3378219089Spjd if (mask & AT_MTIME) { 3379219089Spjd ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 3380219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 3381219089Spjd mtime, sizeof (mtime)); 3382168404Spjd } 3383168404Spjd 3384185029Spjd /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3385219089Spjd if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3386219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3387219089Spjd NULL, mtime, sizeof (mtime)); 3388219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3389219089Spjd &ctime, sizeof (ctime)); 3390219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 3391219089Spjd B_TRUE); 3392219089Spjd } else if (mask != 0) { 3393219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3394219089Spjd &ctime, sizeof (ctime)); 3395219089Spjd zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 3396219089Spjd B_TRUE); 3397219089Spjd if (attrzp) { 3398219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3399219089Spjd SA_ZPL_CTIME(zfsvfs), NULL, 3400219089Spjd &ctime, sizeof (ctime)); 3401219089Spjd zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 3402219089Spjd mtime, ctime, B_TRUE); 3403219089Spjd } 3404219089Spjd } 3405185029Spjd /* 3406185029Spjd * Do this after setting timestamps to prevent timestamp 3407185029Spjd * update from toggling bit 3408185029Spjd */ 3409168404Spjd 3410185029Spjd if (xoap && (mask & AT_XVATTR)) { 3411209962Smm 3412316391Sasomers if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) 3413316391Sasomers xoap->xoa_createtime = vap->va_birthtime; 3414209962Smm /* 3415209962Smm * restore trimmed off masks 3416209962Smm * so that return masks can be set for caller. 3417209962Smm */ 3418209962Smm 3419209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3420209962Smm XVA_SET_REQ(xvap, XAT_APPENDONLY); 3421209962Smm } 3422209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3423209962Smm XVA_SET_REQ(xvap, XAT_NOUNLINK); 3424209962Smm } 3425209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3426209962Smm XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3427209962Smm } 3428209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3429209962Smm XVA_SET_REQ(xvap, XAT_NODUMP); 3430209962Smm } 3431209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3432209962Smm XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3433209962Smm } 3434209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3435209962Smm XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3436209962Smm } 3437209962Smm 3438219089Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3439185029Spjd ASSERT(vp->v_type == VREG); 3440185029Spjd 3441219089Spjd zfs_xvattr_set(zp, xvap, tx); 3442185029Spjd } 3443185029Spjd 3444209962Smm if (fuid_dirtied) 3445209962Smm zfs_fuid_sync(zfsvfs, tx); 3446209962Smm 3447168404Spjd if (mask != 0) 3448185029Spjd zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3449168404Spjd 3450219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3451219089Spjd mutex_exit(&zp->z_acl_lock); 3452168404Spjd 3453219089Spjd if (attrzp) { 3454219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3455219089Spjd mutex_exit(&attrzp->z_acl_lock); 3456219089Spjd } 3457209962Smmout: 3458219089Spjd if (err == 0 && attrzp) { 3459219089Spjd err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 3460219089Spjd xattr_count, tx); 3461219089Spjd ASSERT(err2 == 0); 3462219089Spjd } 3463219089Spjd 3464168404Spjd if (attrzp) 3465306818Savg vput(ZTOV(attrzp)); 3466251631Sdelphij 3467211932Smm if (aclp) 3468209962Smm zfs_acl_free(aclp); 3469168404Spjd 3470209962Smm if (fuidp) { 3471209962Smm zfs_fuid_info_free(fuidp); 3472209962Smm fuidp = NULL; 3473209962Smm } 3474209962Smm 3475219089Spjd if (err) { 3476209962Smm dmu_tx_abort(tx); 3477219089Spjd } else { 3478219089Spjd err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 3479209962Smm dmu_tx_commit(tx); 3480219089Spjd } 3481209962Smm 3482219089Spjdout2: 3483219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3484219089Spjd zil_commit(zilog, 0); 3485209962Smm 3486168404Spjd ZFS_EXIT(zfsvfs); 3487168404Spjd return (err); 3488168404Spjd} 3489168404Spjd 3490168404Spjd/* 3491303970Savg * We acquire all but fdvp locks using non-blocking acquisitions. If we 3492303970Savg * fail to acquire any lock in the path we will drop all held locks, 3493303970Savg * acquire the new lock in a blocking fashion, and then release it and 3494303970Savg * restart the rename. This acquire/release step ensures that we do not 3495303970Savg * spin on a lock waiting for release. On error release all vnode locks 3496303970Savg * and decrement references the way tmpfs_rename() would do. 3497168404Spjd */ 3498303970Savgstatic int 3499303970Savgzfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, 3500303970Savg struct vnode *tdvp, struct vnode **tvpp, 3501303970Savg const struct componentname *scnp, const struct componentname *tcnp) 3502168404Spjd{ 3503303970Savg zfsvfs_t *zfsvfs; 3504303970Savg struct vnode *nvp, *svp, *tvp; 3505303970Savg znode_t *sdzp, *tdzp, *szp, *tzp; 3506303970Savg const char *snm = scnp->cn_nameptr; 3507303970Savg const char *tnm = tcnp->cn_nameptr; 3508303970Savg int error; 3509168404Spjd 3510303970Savg VOP_UNLOCK(tdvp, 0); 3511303970Savg if (*tvpp != NULL && *tvpp != tdvp) 3512303970Savg VOP_UNLOCK(*tvpp, 0); 3513303970Savg 3514303970Savgrelock: 3515303970Savg error = vn_lock(sdvp, LK_EXCLUSIVE); 3516303970Savg if (error) 3517303970Savg goto out; 3518303970Savg sdzp = VTOZ(sdvp); 3519303970Savg 3520303970Savg error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT); 3521303970Savg if (error != 0) { 3522303970Savg VOP_UNLOCK(sdvp, 0); 3523303970Savg if (error != EBUSY) 3524303970Savg goto out; 3525303970Savg error = vn_lock(tdvp, LK_EXCLUSIVE); 3526303970Savg if (error) 3527303970Savg goto out; 3528303970Savg VOP_UNLOCK(tdvp, 0); 3529303970Savg goto relock; 3530168404Spjd } 3531303970Savg tdzp = VTOZ(tdvp); 3532168404Spjd 3533303970Savg /* 3534303970Savg * Before using sdzp and tdzp we must ensure that they are live. 3535303970Savg * As a porting legacy from illumos we have two things to worry 3536303970Savg * about. One is typical for FreeBSD and it is that the vnode is 3537303970Savg * not reclaimed (doomed). The other is that the znode is live. 3538303970Savg * The current code can invalidate the znode without acquiring the 3539303970Savg * corresponding vnode lock if the object represented by the znode 3540303970Savg * and vnode is no longer valid after a rollback or receive operation. 3541303970Savg * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock 3542303970Savg * that protects the znodes from the invalidation. 3543303970Savg */ 3544303970Savg zfsvfs = sdzp->z_zfsvfs; 3545303970Savg ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs); 3546303970Savg ZFS_ENTER(zfsvfs); 3547168404Spjd 3548168404Spjd /* 3549303970Savg * We can not use ZFS_VERIFY_ZP() here because it could directly return 3550303970Savg * bypassing the cleanup code in the case of an error. 3551168404Spjd */ 3552303970Savg if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 3553303970Savg ZFS_EXIT(zfsvfs); 3554303970Savg VOP_UNLOCK(sdvp, 0); 3555303970Savg VOP_UNLOCK(tdvp, 0); 3556303970Savg error = SET_ERROR(EIO); 3557303970Savg goto out; 3558303970Savg } 3559303970Savg 3560303970Savg /* 3561303970Savg * Re-resolve svp to be certain it still exists and fetch the 3562303970Savg * correct vnode. 3563303970Savg */ 3564303970Savg error = zfs_dirent_lookup(sdzp, snm, &szp, ZEXISTS); 3565303970Savg if (error != 0) { 3566303970Savg /* Source entry invalid or not there. */ 3567303970Savg ZFS_EXIT(zfsvfs); 3568303970Savg VOP_UNLOCK(sdvp, 0); 3569303970Savg VOP_UNLOCK(tdvp, 0); 3570303970Savg if ((scnp->cn_flags & ISDOTDOT) != 0 || 3571303970Savg (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.')) 3572303970Savg error = SET_ERROR(EINVAL); 3573303970Savg goto out; 3574303970Savg } 3575303970Savg svp = ZTOV(szp); 3576303970Savg 3577303970Savg /* 3578303970Savg * Re-resolve tvp, if it disappeared we just carry on. 3579303970Savg */ 3580303970Savg error = zfs_dirent_lookup(tdzp, tnm, &tzp, 0); 3581303970Savg if (error != 0) { 3582303970Savg ZFS_EXIT(zfsvfs); 3583303970Savg VOP_UNLOCK(sdvp, 0); 3584303970Savg VOP_UNLOCK(tdvp, 0); 3585303970Savg vrele(svp); 3586303970Savg if ((tcnp->cn_flags & ISDOTDOT) != 0) 3587303970Savg error = SET_ERROR(EINVAL); 3588303970Savg goto out; 3589303970Savg } 3590303970Savg if (tzp != NULL) 3591303970Savg tvp = ZTOV(tzp); 3592303970Savg else 3593303970Savg tvp = NULL; 3594303970Savg 3595303970Savg /* 3596303970Savg * At present the vnode locks must be acquired before z_teardown_lock, 3597303970Savg * although it would be more logical to use the opposite order. 3598303970Savg */ 3599303970Savg ZFS_EXIT(zfsvfs); 3600303970Savg 3601303970Savg /* 3602303970Savg * Now try acquire locks on svp and tvp. 3603303970Savg */ 3604303970Savg nvp = svp; 3605303970Savg error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3606303970Savg if (error != 0) { 3607303970Savg VOP_UNLOCK(sdvp, 0); 3608303970Savg VOP_UNLOCK(tdvp, 0); 3609303970Savg if (tvp != NULL) 3610303970Savg vrele(tvp); 3611303970Savg if (error != EBUSY) { 3612303970Savg vrele(nvp); 3613303970Savg goto out; 3614303970Savg } 3615303970Savg error = vn_lock(nvp, LK_EXCLUSIVE); 3616303970Savg if (error != 0) { 3617303970Savg vrele(nvp); 3618303970Savg goto out; 3619303970Savg } 3620303970Savg VOP_UNLOCK(nvp, 0); 3621303970Savg /* 3622303970Savg * Concurrent rename race. 3623303970Savg * XXX ? 3624303970Savg */ 3625303970Savg if (nvp == tdvp) { 3626303970Savg vrele(nvp); 3627303970Savg error = SET_ERROR(EINVAL); 3628303970Savg goto out; 3629303970Savg } 3630303970Savg vrele(*svpp); 3631303970Savg *svpp = nvp; 3632303970Savg goto relock; 3633303970Savg } 3634303970Savg vrele(*svpp); 3635303970Savg *svpp = nvp; 3636303970Savg 3637303970Savg if (*tvpp != NULL) 3638303970Savg vrele(*tvpp); 3639303970Savg *tvpp = NULL; 3640303970Savg if (tvp != NULL) { 3641303970Savg nvp = tvp; 3642303970Savg error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3643303970Savg if (error != 0) { 3644303970Savg VOP_UNLOCK(sdvp, 0); 3645303970Savg VOP_UNLOCK(tdvp, 0); 3646303970Savg VOP_UNLOCK(*svpp, 0); 3647303970Savg if (error != EBUSY) { 3648303970Savg vrele(nvp); 3649303970Savg goto out; 3650168404Spjd } 3651303970Savg error = vn_lock(nvp, LK_EXCLUSIVE); 3652303970Savg if (error != 0) { 3653303970Savg vrele(nvp); 3654303970Savg goto out; 3655303970Savg } 3656303970Savg vput(nvp); 3657303970Savg goto relock; 3658168404Spjd } 3659303970Savg *tvpp = nvp; 3660303970Savg } 3661168404Spjd 3662303970Savg return (0); 3663168404Spjd 3664303970Savgout: 3665303970Savg return (error); 3666303970Savg} 3667168404Spjd 3668303970Savg/* 3669303970Savg * Note that we must use VRELE_ASYNC in this function as it walks 3670303970Savg * up the directory tree and vrele may need to acquire an exclusive 3671303970Savg * lock if a last reference to a vnode is dropped. 3672303970Savg */ 3673303970Savgstatic int 3674303970Savgzfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp) 3675303970Savg{ 3676303970Savg zfsvfs_t *zfsvfs; 3677303970Savg znode_t *zp, *zp1; 3678303970Savg uint64_t parent; 3679303970Savg int error; 3680168404Spjd 3681303970Savg zfsvfs = tdzp->z_zfsvfs; 3682303970Savg if (tdzp == szp) 3683303970Savg return (SET_ERROR(EINVAL)); 3684303970Savg if (tdzp == sdzp) 3685303970Savg return (0); 3686303970Savg if (tdzp->z_id == zfsvfs->z_root) 3687303970Savg return (0); 3688303970Savg zp = tdzp; 3689303970Savg for (;;) { 3690303970Savg ASSERT(!zp->z_unlinked); 3691303970Savg if ((error = sa_lookup(zp->z_sa_hdl, 3692303970Savg SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 3693303970Savg break; 3694303970Savg 3695303970Savg if (parent == szp->z_id) { 3696303970Savg error = SET_ERROR(EINVAL); 3697303970Savg break; 3698168404Spjd } 3699303970Savg if (parent == zfsvfs->z_root) 3700303970Savg break; 3701303970Savg if (parent == sdzp->z_id) 3702303970Savg break; 3703168404Spjd 3704303970Savg error = zfs_zget(zfsvfs, parent, &zp1); 3705303970Savg if (error != 0) 3706303970Savg break; 3707168404Spjd 3708303970Savg if (zp != tdzp) 3709303970Savg VN_RELE_ASYNC(ZTOV(zp), 3710303970Savg dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 3711303970Savg zp = zp1; 3712303970Savg } 3713303970Savg 3714303970Savg if (error == ENOTDIR) 3715303970Savg panic("checkpath: .. not a directory\n"); 3716303970Savg if (zp != tdzp) 3717303970Savg VN_RELE_ASYNC(ZTOV(zp), 3718303970Savg dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 3719303970Savg return (error); 3720168404Spjd} 3721168404Spjd 3722168404Spjd/* 3723168404Spjd * Move an entry from the provided source directory to the target 3724168404Spjd * directory. Change the entry name as indicated. 3725168404Spjd * 3726168404Spjd * IN: sdvp - Source directory containing the "old entry". 3727168404Spjd * snm - Old entry name. 3728168404Spjd * tdvp - Target directory to contain the "new entry". 3729168404Spjd * tnm - New entry name. 3730168404Spjd * cr - credentials of caller. 3731185029Spjd * ct - caller context 3732185029Spjd * flags - case flags 3733168404Spjd * 3734251631Sdelphij * RETURN: 0 on success, error code on failure. 3735168404Spjd * 3736168404Spjd * Timestamps: 3737168404Spjd * sdvp,tdvp - ctime|mtime updated 3738168404Spjd */ 3739185029Spjd/*ARGSUSED*/ 3740168404Spjdstatic int 3741303970Savgzfs_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 3742303970Savg vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 3743303970Savg cred_t *cr) 3744168404Spjd{ 3745303970Savg zfsvfs_t *zfsvfs; 3746303970Savg znode_t *sdzp, *tdzp, *szp, *tzp; 3747303970Savg zilog_t *zilog = NULL; 3748168404Spjd dmu_tx_t *tx; 3749303970Savg char *snm = scnp->cn_nameptr; 3750303970Savg char *tnm = tcnp->cn_nameptr; 3751185029Spjd int error = 0; 3752168404Spjd 3753303970Savg /* Reject renames across filesystems. */ 3754303970Savg if ((*svpp)->v_mount != tdvp->v_mount || 3755303970Savg ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) { 3756303970Savg error = SET_ERROR(EXDEV); 3757303970Savg goto out; 3758303970Savg } 3759168404Spjd 3760303970Savg if (zfsctl_is_node(tdvp)) { 3761303970Savg error = SET_ERROR(EXDEV); 3762303970Savg goto out; 3763303970Savg } 3764303970Savg 3765168962Spjd /* 3766303970Savg * Lock all four vnodes to ensure safety and semantics of renaming. 3767168962Spjd */ 3768303970Savg error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp); 3769303970Savg if (error != 0) { 3770303970Savg /* no vnodes are locked in the case of error here */ 3771303970Savg return (error); 3772264392Sdavide } 3773168962Spjd 3774303970Savg tdzp = VTOZ(tdvp); 3775303970Savg sdzp = VTOZ(sdvp); 3776303970Savg zfsvfs = tdzp->z_zfsvfs; 3777303970Savg zilog = zfsvfs->z_log; 3778303970Savg 3779254585Sdelphij /* 3780303970Savg * After we re-enter ZFS_ENTER() we will have to revalidate all 3781303970Savg * znodes involved. 3782254585Sdelphij */ 3783303970Savg ZFS_ENTER(zfsvfs); 3784168404Spjd 3785185029Spjd if (zfsvfs->z_utf8 && u8_validate(tnm, 3786185029Spjd strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3787303970Savg error = SET_ERROR(EILSEQ); 3788303970Savg goto unlockout; 3789185029Spjd } 3790185029Spjd 3791303970Savg /* If source and target are the same file, there is nothing to do. */ 3792303970Savg if ((*svpp) == (*tvpp)) { 3793303970Savg error = 0; 3794303970Savg goto unlockout; 3795303970Savg } 3796185029Spjd 3797303970Savg if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) || 3798303970Savg ((*tvpp) != NULL && (*tvpp)->v_type == VDIR && 3799303970Savg (*tvpp)->v_mountedhere != NULL)) { 3800303970Savg error = SET_ERROR(EXDEV); 3801303970Savg goto unlockout; 3802303970Savg } 3803168404Spjd 3804168404Spjd /* 3805303970Savg * We can not use ZFS_VERIFY_ZP() here because it could directly return 3806303970Savg * bypassing the cleanup code in the case of an error. 3807168404Spjd */ 3808303970Savg if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 3809303970Savg error = SET_ERROR(EIO); 3810303970Savg goto unlockout; 3811168404Spjd } 3812168404Spjd 3813303970Savg szp = VTOZ(*svpp); 3814303970Savg tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp); 3815303970Savg if (szp->z_sa_hdl == NULL || (tzp != NULL && tzp->z_sa_hdl == NULL)) { 3816303970Savg error = SET_ERROR(EIO); 3817303970Savg goto unlockout; 3818168962Spjd } 3819185029Spjd 3820208131Smm /* 3821303970Savg * This is to prevent the creation of links into attribute space 3822303970Savg * by renaming a linked file into/outof an attribute directory. 3823303970Savg * See the comment in zfs_link() for why this is considered bad. 3824208131Smm */ 3825303970Savg if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3826303970Savg error = SET_ERROR(EINVAL); 3827303970Savg goto unlockout; 3828208131Smm } 3829208131Smm 3830168404Spjd /* 3831168404Spjd * Must have write access at the source to remove the old entry 3832168404Spjd * and write access at the target to create the new entry. 3833168404Spjd * Note that if target and source are the same, this can be 3834168404Spjd * done in a single check. 3835168404Spjd */ 3836168404Spjd if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3837303970Savg goto unlockout; 3838168404Spjd 3839303970Savg if ((*svpp)->v_type == VDIR) { 3840168404Spjd /* 3841303970Savg * Avoid ".", "..", and aliases of "." for obvious reasons. 3842303970Savg */ 3843303970Savg if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') || 3844303970Savg sdzp == szp || 3845303970Savg (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { 3846303970Savg error = EINVAL; 3847303970Savg goto unlockout; 3848303970Savg } 3849303970Savg 3850303970Savg /* 3851168404Spjd * Check to make sure rename is valid. 3852168404Spjd * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3853168404Spjd */ 3854303970Savg if (error = zfs_rename_check(szp, sdzp, tdzp)) 3855303970Savg goto unlockout; 3856168404Spjd } 3857168404Spjd 3858168404Spjd /* 3859168404Spjd * Does target exist? 3860168404Spjd */ 3861168404Spjd if (tzp) { 3862168404Spjd /* 3863168404Spjd * Source and target must be the same type. 3864168404Spjd */ 3865303970Savg if ((*svpp)->v_type == VDIR) { 3866303970Savg if ((*tvpp)->v_type != VDIR) { 3867249195Smm error = SET_ERROR(ENOTDIR); 3868303970Savg goto unlockout; 3869303970Savg } else { 3870303970Savg cache_purge(tdvp); 3871303970Savg if (sdvp != tdvp) 3872303970Savg cache_purge(sdvp); 3873168404Spjd } 3874168404Spjd } else { 3875303970Savg if ((*tvpp)->v_type == VDIR) { 3876249195Smm error = SET_ERROR(EISDIR); 3877303970Savg goto unlockout; 3878168404Spjd } 3879168404Spjd } 3880168404Spjd } 3881168404Spjd 3882303970Savg vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct); 3883168962Spjd if (tzp) 3884303970Savg vnevent_rename_dest(*tvpp, tdvp, tnm, ct); 3885168962Spjd 3886185029Spjd /* 3887185029Spjd * notify the target directory if it is not the same 3888185029Spjd * as source directory. 3889185029Spjd */ 3890185029Spjd if (tdvp != sdvp) { 3891185029Spjd vnevent_rename_dest_dir(tdvp, ct); 3892185029Spjd } 3893185029Spjd 3894168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 3895219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3896219089Spjd dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3897168404Spjd dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3898168404Spjd dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3899219089Spjd if (sdzp != tdzp) { 3900219089Spjd dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 3901219089Spjd zfs_sa_upgrade_txholds(tx, tdzp); 3902219089Spjd } 3903219089Spjd if (tzp) { 3904219089Spjd dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 3905219089Spjd zfs_sa_upgrade_txholds(tx, tzp); 3906219089Spjd } 3907219089Spjd 3908219089Spjd zfs_sa_upgrade_txholds(tx, szp); 3909168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3910303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 3911168404Spjd if (error) { 3912168404Spjd dmu_tx_abort(tx); 3913303970Savg goto unlockout; 3914168404Spjd } 3915168404Spjd 3916303970Savg 3917168404Spjd if (tzp) /* Attempt to remove the existing target */ 3918303970Savg error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL); 3919168404Spjd 3920168404Spjd if (error == 0) { 3921303970Savg error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING); 3922168404Spjd if (error == 0) { 3923219089Spjd szp->z_pflags |= ZFS_AV_MODIFIED; 3924185029Spjd 3925219089Spjd error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 3926219089Spjd (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3927240415Smm ASSERT0(error); 3928219089Spjd 3929303970Savg error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING, 3930303970Savg NULL); 3931219089Spjd if (error == 0) { 3932303970Savg zfs_log_rename(zilog, tx, TX_RENAME, sdzp, 3933303970Savg snm, tdzp, tnm, szp); 3934185029Spjd 3935219089Spjd /* 3936219089Spjd * Update path information for the target vnode 3937219089Spjd */ 3938303970Savg vn_renamepath(tdvp, *svpp, tnm, strlen(tnm)); 3939219089Spjd } else { 3940219089Spjd /* 3941219089Spjd * At this point, we have successfully created 3942219089Spjd * the target name, but have failed to remove 3943219089Spjd * the source name. Since the create was done 3944219089Spjd * with the ZRENAMING flag, there are 3945219089Spjd * complications; for one, the link count is 3946219089Spjd * wrong. The easiest way to deal with this 3947219089Spjd * is to remove the newly created target, and 3948219089Spjd * return the original error. This must 3949219089Spjd * succeed; fortunately, it is very unlikely to 3950219089Spjd * fail, since we just created it. 3951219089Spjd */ 3952303970Savg VERIFY3U(zfs_link_destroy(tdzp, tnm, szp, tx, 3953219089Spjd ZRENAMING, NULL), ==, 0); 3954219089Spjd } 3955168404Spjd } 3956168404Spjd if (error == 0) { 3957303970Savg cache_purge(*svpp); 3958303970Savg if (*tvpp != NULL) 3959303970Savg cache_purge(*tvpp); 3960303970Savg cache_purge_negative(tdvp); 3961168404Spjd } 3962168404Spjd } 3963168404Spjd 3964168404Spjd dmu_tx_commit(tx); 3965168404Spjd 3966303970Savgunlockout: /* all 4 vnodes are locked, ZFS_ENTER called */ 3967303970Savg ZFS_EXIT(zfsvfs); 3968303970Savg VOP_UNLOCK(*svpp, 0); 3969303970Savg VOP_UNLOCK(sdvp, 0); 3970168404Spjd 3971303970Savgout: /* original two vnodes are locked */ 3972303970Savg if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3973219089Spjd zil_commit(zilog, 0); 3974219089Spjd 3975303970Savg if (*tvpp != NULL) 3976303970Savg VOP_UNLOCK(*tvpp, 0); 3977303970Savg if (tdvp != *tvpp) 3978303970Savg VOP_UNLOCK(tdvp, 0); 3979168404Spjd return (error); 3980168404Spjd} 3981168404Spjd 3982168404Spjd/* 3983168404Spjd * Insert the indicated symbolic reference entry into the directory. 3984168404Spjd * 3985168404Spjd * IN: dvp - Directory to contain new symbolic link. 3986168404Spjd * link - Name for new symlink entry. 3987168404Spjd * vap - Attributes of new entry. 3988168404Spjd * cr - credentials of caller. 3989185029Spjd * ct - caller context 3990185029Spjd * flags - case flags 3991168404Spjd * 3992251631Sdelphij * RETURN: 0 on success, error code on failure. 3993168404Spjd * 3994168404Spjd * Timestamps: 3995168404Spjd * dvp - ctime|mtime updated 3996168404Spjd */ 3997185029Spjd/*ARGSUSED*/ 3998168404Spjdstatic int 3999185029Spjdzfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 4000185029Spjd cred_t *cr, kthread_t *td) 4001168404Spjd{ 4002168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 4003168404Spjd dmu_tx_t *tx; 4004168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4005185029Spjd zilog_t *zilog; 4006219089Spjd uint64_t len = strlen(link); 4007168404Spjd int error; 4008209962Smm zfs_acl_ids_t acl_ids; 4009209962Smm boolean_t fuid_dirtied; 4010219089Spjd uint64_t txtype = TX_SYMLINK; 4011185029Spjd int flags = 0; 4012168404Spjd 4013168962Spjd ASSERT(vap->va_type == VLNK); 4014168404Spjd 4015168404Spjd ZFS_ENTER(zfsvfs); 4016185029Spjd ZFS_VERIFY_ZP(dzp); 4017185029Spjd zilog = zfsvfs->z_log; 4018185029Spjd 4019185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 4020185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4021185029Spjd ZFS_EXIT(zfsvfs); 4022249195Smm return (SET_ERROR(EILSEQ)); 4023185029Spjd } 4024168404Spjd 4025168404Spjd if (len > MAXPATHLEN) { 4026168404Spjd ZFS_EXIT(zfsvfs); 4027249195Smm return (SET_ERROR(ENAMETOOLONG)); 4028168404Spjd } 4029168404Spjd 4030219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, 4031219089Spjd vap, cr, NULL, &acl_ids)) != 0) { 4032219089Spjd ZFS_EXIT(zfsvfs); 4033219089Spjd return (error); 4034219089Spjd } 4035260704Savg 4036168404Spjd /* 4037168404Spjd * Attempt to lock directory; fail if entry already exists. 4038168404Spjd */ 4039303970Savg error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 4040185029Spjd if (error) { 4041219089Spjd zfs_acl_ids_free(&acl_ids); 4042168404Spjd ZFS_EXIT(zfsvfs); 4043168404Spjd return (error); 4044168404Spjd } 4045168404Spjd 4046219089Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4047219089Spjd zfs_acl_ids_free(&acl_ids); 4048219089Spjd ZFS_EXIT(zfsvfs); 4049219089Spjd return (error); 4050219089Spjd } 4051219089Spjd 4052209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 4053209962Smm zfs_acl_ids_free(&acl_ids); 4054209962Smm ZFS_EXIT(zfsvfs); 4055249195Smm return (SET_ERROR(EDQUOT)); 4056209962Smm } 4057303970Savg 4058303970Savg getnewvnode_reserve(1); 4059168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4060209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 4061168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 4062168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4063219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 4064219089Spjd ZFS_SA_BASE_ATTR_SIZE + len); 4065219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 4066219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 4067219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 4068219089Spjd acl_ids.z_aclp->z_acl_bytes); 4069219089Spjd } 4070209962Smm if (fuid_dirtied) 4071209962Smm zfs_fuid_txhold(zfsvfs, tx); 4072303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 4073168404Spjd if (error) { 4074219089Spjd zfs_acl_ids_free(&acl_ids); 4075168404Spjd dmu_tx_abort(tx); 4076260704Savg getnewvnode_drop_reserve(); 4077168404Spjd ZFS_EXIT(zfsvfs); 4078168404Spjd return (error); 4079168404Spjd } 4080168404Spjd 4081168404Spjd /* 4082168404Spjd * Create a new object for the symlink. 4083219089Spjd * for version 4 ZPL datsets the symlink will be an SA attribute 4084168404Spjd */ 4085219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 4086168404Spjd 4087219089Spjd if (fuid_dirtied) 4088219089Spjd zfs_fuid_sync(zfsvfs, tx); 4089209962Smm 4090219089Spjd if (zp->z_is_sa) 4091219089Spjd error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 4092219089Spjd link, len, tx); 4093219089Spjd else 4094219089Spjd zfs_sa_symlink(zp, link, len, tx); 4095168404Spjd 4096219089Spjd zp->z_size = len; 4097219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 4098219089Spjd &zp->z_size, sizeof (zp->z_size), tx); 4099168404Spjd /* 4100168404Spjd * Insert the new object into the directory. 4101168404Spjd */ 4102303970Savg (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 4103168404Spjd 4104219089Spjd zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 4105219089Spjd *vpp = ZTOV(zp); 4106219089Spjd 4107209962Smm zfs_acl_ids_free(&acl_ids); 4108209962Smm 4109168404Spjd dmu_tx_commit(tx); 4110168404Spjd 4111260704Savg getnewvnode_drop_reserve(); 4112260704Savg 4113219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4114219089Spjd zil_commit(zilog, 0); 4115219089Spjd 4116168404Spjd ZFS_EXIT(zfsvfs); 4117168404Spjd return (error); 4118168404Spjd} 4119168404Spjd 4120168404Spjd/* 4121168404Spjd * Return, in the buffer contained in the provided uio structure, 4122168404Spjd * the symbolic path referred to by vp. 4123168404Spjd * 4124168404Spjd * IN: vp - vnode of symbolic link. 4125251631Sdelphij * uio - structure to contain the link path. 4126168404Spjd * cr - credentials of caller. 4127185029Spjd * ct - caller context 4128168404Spjd * 4129251631Sdelphij * OUT: uio - structure containing the link path. 4130168404Spjd * 4131251631Sdelphij * RETURN: 0 on success, error code on failure. 4132168404Spjd * 4133168404Spjd * Timestamps: 4134168404Spjd * vp - atime updated 4135168404Spjd */ 4136168404Spjd/* ARGSUSED */ 4137168404Spjdstatic int 4138185029Spjdzfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 4139168404Spjd{ 4140168404Spjd znode_t *zp = VTOZ(vp); 4141168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4142168404Spjd int error; 4143168404Spjd 4144168404Spjd ZFS_ENTER(zfsvfs); 4145185029Spjd ZFS_VERIFY_ZP(zp); 4146168404Spjd 4147219089Spjd if (zp->z_is_sa) 4148219089Spjd error = sa_lookup_uio(zp->z_sa_hdl, 4149219089Spjd SA_ZPL_SYMLINK(zfsvfs), uio); 4150219089Spjd else 4151219089Spjd error = zfs_sa_readlink(zp, uio); 4152168404Spjd 4153168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4154219089Spjd 4155168404Spjd ZFS_EXIT(zfsvfs); 4156168404Spjd return (error); 4157168404Spjd} 4158168404Spjd 4159168404Spjd/* 4160168404Spjd * Insert a new entry into directory tdvp referencing svp. 4161168404Spjd * 4162168404Spjd * IN: tdvp - Directory to contain new entry. 4163168404Spjd * svp - vnode of new entry. 4164168404Spjd * name - name of new entry. 4165168404Spjd * cr - credentials of caller. 4166185029Spjd * ct - caller context 4167168404Spjd * 4168251631Sdelphij * RETURN: 0 on success, error code on failure. 4169168404Spjd * 4170168404Spjd * Timestamps: 4171168404Spjd * tdvp - ctime|mtime updated 4172168404Spjd * svp - ctime updated 4173168404Spjd */ 4174168404Spjd/* ARGSUSED */ 4175168404Spjdstatic int 4176185029Spjdzfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4177185029Spjd caller_context_t *ct, int flags) 4178168404Spjd{ 4179168404Spjd znode_t *dzp = VTOZ(tdvp); 4180168404Spjd znode_t *tzp, *szp; 4181168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4182185029Spjd zilog_t *zilog; 4183168404Spjd dmu_tx_t *tx; 4184168404Spjd int error; 4185212694Smm uint64_t parent; 4186185029Spjd uid_t owner; 4187168404Spjd 4188168404Spjd ASSERT(tdvp->v_type == VDIR); 4189168404Spjd 4190168404Spjd ZFS_ENTER(zfsvfs); 4191185029Spjd ZFS_VERIFY_ZP(dzp); 4192185029Spjd zilog = zfsvfs->z_log; 4193168404Spjd 4194212694Smm /* 4195212694Smm * POSIX dictates that we return EPERM here. 4196212694Smm * Better choices include ENOTSUP or EISDIR. 4197212694Smm */ 4198212694Smm if (svp->v_type == VDIR) { 4199168404Spjd ZFS_EXIT(zfsvfs); 4200249195Smm return (SET_ERROR(EPERM)); 4201212694Smm } 4202212694Smm 4203254585Sdelphij szp = VTOZ(svp); 4204254585Sdelphij ZFS_VERIFY_ZP(szp); 4205254585Sdelphij 4206258597Spjd if (szp->z_pflags & (ZFS_APPENDONLY | ZFS_IMMUTABLE | ZFS_READONLY)) { 4207258597Spjd ZFS_EXIT(zfsvfs); 4208258597Spjd return (SET_ERROR(EPERM)); 4209258597Spjd } 4210258597Spjd 4211212694Smm /* Prevent links to .zfs/shares files */ 4212212694Smm 4213219089Spjd if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4214219089Spjd &parent, sizeof (uint64_t))) != 0) { 4215212694Smm ZFS_EXIT(zfsvfs); 4216219089Spjd return (error); 4217219089Spjd } 4218219089Spjd if (parent == zfsvfs->z_shares_dir) { 4219219089Spjd ZFS_EXIT(zfsvfs); 4220249195Smm return (SET_ERROR(EPERM)); 4221212694Smm } 4222212694Smm 4223185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, 4224185029Spjd strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4225185029Spjd ZFS_EXIT(zfsvfs); 4226249195Smm return (SET_ERROR(EILSEQ)); 4227185029Spjd } 4228185029Spjd 4229168404Spjd /* 4230168404Spjd * We do not support links between attributes and non-attributes 4231168404Spjd * because of the potential security risk of creating links 4232168404Spjd * into "normal" file space in order to circumvent restrictions 4233168404Spjd * imposed in attribute space. 4234168404Spjd */ 4235219089Spjd if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4236168404Spjd ZFS_EXIT(zfsvfs); 4237249195Smm return (SET_ERROR(EINVAL)); 4238168404Spjd } 4239168404Spjd 4240168404Spjd 4241219089Spjd owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4242219089Spjd if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { 4243168404Spjd ZFS_EXIT(zfsvfs); 4244249195Smm return (SET_ERROR(EPERM)); 4245168404Spjd } 4246168404Spjd 4247185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4248168404Spjd ZFS_EXIT(zfsvfs); 4249168404Spjd return (error); 4250168404Spjd } 4251168404Spjd 4252168404Spjd /* 4253168404Spjd * Attempt to lock directory; fail if entry already exists. 4254168404Spjd */ 4255303970Savg error = zfs_dirent_lookup(dzp, name, &tzp, ZNEW); 4256185029Spjd if (error) { 4257168404Spjd ZFS_EXIT(zfsvfs); 4258168404Spjd return (error); 4259168404Spjd } 4260168404Spjd 4261168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4262219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4263168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4264219089Spjd zfs_sa_upgrade_txholds(tx, szp); 4265219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 4266303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 4267168404Spjd if (error) { 4268168404Spjd dmu_tx_abort(tx); 4269168404Spjd ZFS_EXIT(zfsvfs); 4270168404Spjd return (error); 4271168404Spjd } 4272168404Spjd 4273303970Savg error = zfs_link_create(dzp, name, szp, tx, 0); 4274168404Spjd 4275185029Spjd if (error == 0) { 4276185029Spjd uint64_t txtype = TX_LINK; 4277185029Spjd zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4278185029Spjd } 4279168404Spjd 4280168404Spjd dmu_tx_commit(tx); 4281168404Spjd 4282185029Spjd if (error == 0) { 4283185029Spjd vnevent_link(svp, ct); 4284185029Spjd } 4285185029Spjd 4286219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4287219089Spjd zil_commit(zilog, 0); 4288219089Spjd 4289168404Spjd ZFS_EXIT(zfsvfs); 4290168404Spjd return (error); 4291168404Spjd} 4292168404Spjd 4293219089Spjd 4294185029Spjd/*ARGSUSED*/ 4295168962Spjdvoid 4296185029Spjdzfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4297168404Spjd{ 4298168962Spjd znode_t *zp = VTOZ(vp); 4299168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4300168962Spjd int error; 4301168404Spjd 4302185029Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4303219089Spjd if (zp->z_sa_hdl == NULL) { 4304185029Spjd /* 4305185029Spjd * The fs has been unmounted, or we did a 4306185029Spjd * suspend/resume and this file no longer exists. 4307185029Spjd */ 4308243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 4309234607Strasz vrecycle(vp); 4310243520Savg return; 4311243520Savg } 4312243520Savg 4313243520Savg if (zp->z_unlinked) { 4314243520Savg /* 4315243520Savg * Fast path to recycle a vnode of a removed file. 4316243520Savg */ 4317185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4318243520Savg vrecycle(vp); 4319168962Spjd return; 4320168404Spjd } 4321168404Spjd 4322168404Spjd if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4323168404Spjd dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4324168404Spjd 4325219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4326219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4327168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 4328168404Spjd if (error) { 4329168404Spjd dmu_tx_abort(tx); 4330168404Spjd } else { 4331219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 4332219089Spjd (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4333168404Spjd zp->z_atime_dirty = 0; 4334168404Spjd dmu_tx_commit(tx); 4335168404Spjd } 4336168404Spjd } 4337185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4338168404Spjd} 4339168404Spjd 4340219089Spjd 4341168404SpjdCTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 4342168404SpjdCTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 4343168404Spjd 4344185029Spjd/*ARGSUSED*/ 4345168404Spjdstatic int 4346185029Spjdzfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4347168404Spjd{ 4348168404Spjd znode_t *zp = VTOZ(vp); 4349168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4350185029Spjd uint32_t gen; 4351219089Spjd uint64_t gen64; 4352168404Spjd uint64_t object = zp->z_id; 4353168404Spjd zfid_short_t *zfid; 4354219089Spjd int size, i, error; 4355168404Spjd 4356168404Spjd ZFS_ENTER(zfsvfs); 4357185029Spjd ZFS_VERIFY_ZP(zp); 4358168404Spjd 4359219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 4360219089Spjd &gen64, sizeof (uint64_t))) != 0) { 4361219089Spjd ZFS_EXIT(zfsvfs); 4362219089Spjd return (error); 4363219089Spjd } 4364219089Spjd 4365219089Spjd gen = (uint32_t)gen64; 4366219089Spjd 4367168404Spjd size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4368249195Smm 4369249195Smm#ifdef illumos 4370249195Smm if (fidp->fid_len < size) { 4371249195Smm fidp->fid_len = size; 4372249195Smm ZFS_EXIT(zfsvfs); 4373249195Smm return (SET_ERROR(ENOSPC)); 4374249195Smm } 4375249195Smm#else 4376168404Spjd fidp->fid_len = size; 4377249195Smm#endif 4378168404Spjd 4379168404Spjd zfid = (zfid_short_t *)fidp; 4380168404Spjd 4381168404Spjd zfid->zf_len = size; 4382168404Spjd 4383168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 4384168404Spjd zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4385168404Spjd 4386168404Spjd /* Must have a non-zero generation number to distinguish from .zfs */ 4387168404Spjd if (gen == 0) 4388168404Spjd gen = 1; 4389168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 4390168404Spjd zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4391168404Spjd 4392168404Spjd if (size == LONG_FID_LEN) { 4393168404Spjd uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4394169023Spjd zfid_long_t *zlfid; 4395168404Spjd 4396168404Spjd zlfid = (zfid_long_t *)fidp; 4397168404Spjd 4398168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4399168404Spjd zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4400168404Spjd 4401168404Spjd /* XXX - this should be the generation number for the objset */ 4402168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4403168404Spjd zlfid->zf_setgen[i] = 0; 4404168404Spjd } 4405168404Spjd 4406168404Spjd ZFS_EXIT(zfsvfs); 4407168404Spjd return (0); 4408168404Spjd} 4409168404Spjd 4410168404Spjdstatic int 4411185029Spjdzfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4412185029Spjd caller_context_t *ct) 4413168404Spjd{ 4414168404Spjd znode_t *zp, *xzp; 4415168404Spjd zfsvfs_t *zfsvfs; 4416168404Spjd int error; 4417168404Spjd 4418168404Spjd switch (cmd) { 4419168404Spjd case _PC_LINK_MAX: 4420168404Spjd *valp = INT_MAX; 4421168404Spjd return (0); 4422168404Spjd 4423168404Spjd case _PC_FILESIZEBITS: 4424168404Spjd *valp = 64; 4425168404Spjd return (0); 4426277300Ssmh#ifdef illumos 4427168404Spjd case _PC_XATTR_EXISTS: 4428168404Spjd zp = VTOZ(vp); 4429168404Spjd zfsvfs = zp->z_zfsvfs; 4430168404Spjd ZFS_ENTER(zfsvfs); 4431185029Spjd ZFS_VERIFY_ZP(zp); 4432168404Spjd *valp = 0; 4433303970Savg error = zfs_dirent_lookup(zp, "", &xzp, 4434303970Savg ZXATTR | ZEXISTS | ZSHARED); 4435168404Spjd if (error == 0) { 4436168404Spjd if (!zfs_dirempty(xzp)) 4437168404Spjd *valp = 1; 4438303970Savg vrele(ZTOV(xzp)); 4439168404Spjd } else if (error == ENOENT) { 4440168404Spjd /* 4441168404Spjd * If there aren't extended attributes, it's the 4442168404Spjd * same as having zero of them. 4443168404Spjd */ 4444168404Spjd error = 0; 4445168404Spjd } 4446168404Spjd ZFS_EXIT(zfsvfs); 4447168404Spjd return (error); 4448168404Spjd 4449219089Spjd case _PC_SATTR_ENABLED: 4450219089Spjd case _PC_SATTR_EXISTS: 4451219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 4452219089Spjd (vp->v_type == VREG || vp->v_type == VDIR); 4453219089Spjd return (0); 4454219089Spjd 4455219089Spjd case _PC_ACCESS_FILTERING: 4456219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 4457219089Spjd vp->v_type == VDIR; 4458219089Spjd return (0); 4459219089Spjd 4460219089Spjd case _PC_ACL_ENABLED: 4461219089Spjd *valp = _ACL_ACE_ENABLED; 4462219089Spjd return (0); 4463277300Ssmh#endif /* illumos */ 4464219089Spjd case _PC_MIN_HOLE_SIZE: 4465219089Spjd *valp = (int)SPA_MINBLOCKSIZE; 4466219089Spjd return (0); 4467277300Ssmh#ifdef illumos 4468219089Spjd case _PC_TIMESTAMP_RESOLUTION: 4469219089Spjd /* nanosecond timestamp resolution */ 4470219089Spjd *valp = 1L; 4471219089Spjd return (0); 4472277300Ssmh#endif 4473168404Spjd case _PC_ACL_EXTENDED: 4474196949Strasz *valp = 0; 4475168404Spjd return (0); 4476168404Spjd 4477196949Strasz case _PC_ACL_NFS4: 4478196949Strasz *valp = 1; 4479196949Strasz return (0); 4480196949Strasz 4481196949Strasz case _PC_ACL_PATH_MAX: 4482196949Strasz *valp = ACL_MAX_ENTRIES; 4483196949Strasz return (0); 4484196949Strasz 4485168404Spjd default: 4486168962Spjd return (EOPNOTSUPP); 4487168404Spjd } 4488168404Spjd} 4489168404Spjd 4490168404Spjd/*ARGSUSED*/ 4491168404Spjdstatic int 4492185029Spjdzfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4493185029Spjd caller_context_t *ct) 4494168404Spjd{ 4495168404Spjd znode_t *zp = VTOZ(vp); 4496168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4497168404Spjd int error; 4498185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4499168404Spjd 4500168404Spjd ZFS_ENTER(zfsvfs); 4501185029Spjd ZFS_VERIFY_ZP(zp); 4502185029Spjd error = zfs_getacl(zp, vsecp, skipaclchk, cr); 4503168404Spjd ZFS_EXIT(zfsvfs); 4504168404Spjd 4505168404Spjd return (error); 4506168404Spjd} 4507168404Spjd 4508168404Spjd/*ARGSUSED*/ 4509228685Spjdint 4510185029Spjdzfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4511185029Spjd caller_context_t *ct) 4512168404Spjd{ 4513168404Spjd znode_t *zp = VTOZ(vp); 4514168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4515168404Spjd int error; 4516185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4517219089Spjd zilog_t *zilog = zfsvfs->z_log; 4518168404Spjd 4519168404Spjd ZFS_ENTER(zfsvfs); 4520185029Spjd ZFS_VERIFY_ZP(zp); 4521219089Spjd 4522185029Spjd error = zfs_setacl(zp, vsecp, skipaclchk, cr); 4523219089Spjd 4524219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4525219089Spjd zil_commit(zilog, 0); 4526219089Spjd 4527168404Spjd ZFS_EXIT(zfsvfs); 4528168404Spjd return (error); 4529168404Spjd} 4530168404Spjd 4531168962Spjdstatic int 4532330991Savgzfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, 4533292373Sglebius int *rahead) 4534213937Savg{ 4535213937Savg znode_t *zp = VTOZ(vp); 4536213937Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4537213937Savg objset_t *os = zp->z_zfsvfs->z_os; 4538330991Savg rl_t *rl; 4539213937Savg vm_object_t object; 4540330991Savg off_t start, end, obj_size; 4541330991Savg uint_t blksz; 4542330991Savg int pgsin_b, pgsin_a; 4543330991Savg int error; 4544213937Savg 4545213937Savg ZFS_ENTER(zfsvfs); 4546213937Savg ZFS_VERIFY_ZP(zp); 4547213937Savg 4548330991Savg start = IDX_TO_OFF(ma[0]->pindex); 4549330991Savg end = IDX_TO_OFF(ma[count - 1]->pindex + 1); 4550330991Savg 4551330991Savg /* 4552330991Savg * Lock a range covering all required and optional pages. 4553330991Savg * Note that we need to handle the case of the block size growing. 4554330991Savg */ 4555330991Savg for (;;) { 4556330991Savg blksz = zp->z_blksz; 4557330991Savg rl = zfs_range_lock(zp, rounddown(start, blksz), 4558330991Savg roundup(end, blksz) - rounddown(start, blksz), RL_READER); 4559330991Savg if (blksz == zp->z_blksz) 4560330991Savg break; 4561330991Savg zfs_range_unlock(rl); 4562213937Savg } 4563213937Savg 4564330991Savg object = ma[0]->object; 4565330991Savg zfs_vmobject_wlock(object); 4566330991Savg obj_size = object->un_pager.vnp.vnp_size; 4567330991Savg zfs_vmobject_wunlock(object); 4568330991Savg if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) { 4569330991Savg zfs_range_unlock(rl); 4570213937Savg ZFS_EXIT(zfsvfs); 4571248084Sattilio return (zfs_vm_pagerret_bad); 4572213937Savg } 4573213937Savg 4574330991Savg pgsin_b = 0; 4575330991Savg if (rbehind != NULL) { 4576330991Savg pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz)); 4577330991Savg pgsin_b = MIN(*rbehind, pgsin_b); 4578330991Savg } 4579292373Sglebius 4580330991Savg pgsin_a = 0; 4581330991Savg if (rahead != NULL) { 4582330991Savg pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end); 4583330991Savg if (end + IDX_TO_OFF(pgsin_a) >= obj_size) 4584330991Savg pgsin_a = OFF_TO_IDX(round_page(obj_size) - end); 4585330991Savg pgsin_a = MIN(*rahead, pgsin_a); 4586243517Savg } 4587243517Savg 4588330991Savg /* 4589330991Savg * NB: we need to pass the exact byte size of the data that we expect 4590330991Savg * to read after accounting for the file size. This is required because 4591330991Savg * ZFS will panic if we request DMU to read beyond the end of the last 4592330991Savg * allocated block. 4593330991Savg */ 4594330991Savg error = dmu_read_pages(os, zp->z_id, ma, count, &pgsin_b, &pgsin_a, 4595330991Savg MIN(end, obj_size) - (end - PAGE_SIZE)); 4596213937Savg 4597330991Savg zfs_range_unlock(rl); 4598213937Savg ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4599213937Savg ZFS_EXIT(zfsvfs); 4600330991Savg 4601330991Savg if (error != 0) 4602292386Sglebius return (zfs_vm_pagerret_error); 4603330991Savg 4604330991Savg PCPU_INC(cnt.v_vnodein); 4605330991Savg PCPU_ADD(cnt.v_vnodepgsin, count + pgsin_b + pgsin_a); 4606330991Savg if (rbehind != NULL) 4607330991Savg *rbehind = pgsin_b; 4608330991Savg if (rahead != NULL) 4609330991Savg *rahead = pgsin_a; 4610330991Savg return (zfs_vm_pagerret_ok); 4611213937Savg} 4612213937Savg 4613213937Savgstatic int 4614213937Savgzfs_freebsd_getpages(ap) 4615213937Savg struct vop_getpages_args /* { 4616213937Savg struct vnode *a_vp; 4617213937Savg vm_page_t *a_m; 4618213937Savg int a_count; 4619292373Sglebius int *a_rbehind; 4620292373Sglebius int *a_rahead; 4621213937Savg } */ *ap; 4622213937Savg{ 4623213937Savg 4624292373Sglebius return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, 4625292373Sglebius ap->a_rahead)); 4626213937Savg} 4627213937Savg 4628213937Savgstatic int 4629258746Savgzfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, 4630258746Savg int *rtvals) 4631258746Savg{ 4632258746Savg znode_t *zp = VTOZ(vp); 4633258746Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4634258746Savg rl_t *rl; 4635258746Savg dmu_tx_t *tx; 4636258746Savg struct sf_buf *sf; 4637258746Savg vm_object_t object; 4638258746Savg vm_page_t m; 4639258746Savg caddr_t va; 4640258746Savg size_t tocopy; 4641258746Savg size_t lo_len; 4642258746Savg vm_ooffset_t lo_off; 4643258746Savg vm_ooffset_t off; 4644258746Savg uint_t blksz; 4645258746Savg int ncount; 4646258746Savg int pcount; 4647258746Savg int err; 4648258746Savg int i; 4649258746Savg 4650258746Savg ZFS_ENTER(zfsvfs); 4651258746Savg ZFS_VERIFY_ZP(zp); 4652258746Savg 4653258746Savg object = vp->v_object; 4654258746Savg pcount = btoc(len); 4655258746Savg ncount = pcount; 4656258746Savg 4657258746Savg KASSERT(ma[0]->object == object, ("mismatching object")); 4658258746Savg KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length")); 4659258746Savg 4660258746Savg for (i = 0; i < pcount; i++) 4661258746Savg rtvals[i] = zfs_vm_pagerret_error; 4662258746Savg 4663258746Savg off = IDX_TO_OFF(ma[0]->pindex); 4664258746Savg blksz = zp->z_blksz; 4665258746Savg lo_off = rounddown(off, blksz); 4666258746Savg lo_len = roundup(len + (off - lo_off), blksz); 4667258746Savg rl = zfs_range_lock(zp, lo_off, lo_len, RL_WRITER); 4668258746Savg 4669258746Savg zfs_vmobject_wlock(object); 4670258746Savg if (len + off > object->un_pager.vnp.vnp_size) { 4671258746Savg if (object->un_pager.vnp.vnp_size > off) { 4672258746Savg int pgoff; 4673258746Savg 4674258746Savg len = object->un_pager.vnp.vnp_size - off; 4675258746Savg ncount = btoc(len); 4676258746Savg if ((pgoff = (int)len & PAGE_MASK) != 0) { 4677258746Savg /* 4678258746Savg * If the object is locked and the following 4679258746Savg * conditions hold, then the page's dirty 4680258746Savg * field cannot be concurrently changed by a 4681258746Savg * pmap operation. 4682258746Savg */ 4683258746Savg m = ma[ncount - 1]; 4684258746Savg vm_page_assert_sbusied(m); 4685258746Savg KASSERT(!pmap_page_is_write_mapped(m), 4686258746Savg ("zfs_putpages: page %p is not read-only", m)); 4687258746Savg vm_page_clear_dirty(m, pgoff, PAGE_SIZE - 4688258746Savg pgoff); 4689258746Savg } 4690258746Savg } else { 4691258746Savg len = 0; 4692258746Savg ncount = 0; 4693258746Savg } 4694258746Savg if (ncount < pcount) { 4695258746Savg for (i = ncount; i < pcount; i++) { 4696258746Savg rtvals[i] = zfs_vm_pagerret_bad; 4697258746Savg } 4698258746Savg } 4699258746Savg } 4700258746Savg zfs_vmobject_wunlock(object); 4701258746Savg 4702258746Savg if (ncount == 0) 4703258746Savg goto out; 4704258746Savg 4705258746Savg if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 4706258746Savg zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 4707258746Savg goto out; 4708258746Savg } 4709258746Savg 4710258746Savg tx = dmu_tx_create(zfsvfs->z_os); 4711258746Savg dmu_tx_hold_write(tx, zp->z_id, off, len); 4712258746Savg 4713258746Savg dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4714258746Savg zfs_sa_upgrade_txholds(tx, zp); 4715316847Savg err = dmu_tx_assign(tx, TXG_WAIT); 4716258746Savg if (err != 0) { 4717258746Savg dmu_tx_abort(tx); 4718258746Savg goto out; 4719258746Savg } 4720258746Savg 4721258746Savg if (zp->z_blksz < PAGE_SIZE) { 4722258746Savg for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) { 4723258746Savg tocopy = len > PAGE_SIZE ? PAGE_SIZE : len; 4724258746Savg va = zfs_map_page(ma[i], &sf); 4725258746Savg dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx); 4726258746Savg zfs_unmap_page(sf); 4727258746Savg } 4728258746Savg } else { 4729258746Savg err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx); 4730258746Savg } 4731258746Savg 4732258746Savg if (err == 0) { 4733258746Savg uint64_t mtime[2], ctime[2]; 4734258746Savg sa_bulk_attr_t bulk[3]; 4735258746Savg int count = 0; 4736258746Savg 4737258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 4738258746Savg &mtime, 16); 4739258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 4740258746Savg &ctime, 16); 4741258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4742258746Savg &zp->z_pflags, 8); 4743258746Savg zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 4744258746Savg B_TRUE); 4745321561Smav err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 4746321561Smav ASSERT0(err); 4747258746Savg zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 4748258746Savg 4749258746Savg zfs_vmobject_wlock(object); 4750258746Savg for (i = 0; i < ncount; i++) { 4751258746Savg rtvals[i] = zfs_vm_pagerret_ok; 4752258746Savg vm_page_undirty(ma[i]); 4753258746Savg } 4754258746Savg zfs_vmobject_wunlock(object); 4755258746Savg PCPU_INC(cnt.v_vnodeout); 4756258746Savg PCPU_ADD(cnt.v_vnodepgsout, ncount); 4757258746Savg } 4758258746Savg dmu_tx_commit(tx); 4759258746Savg 4760258746Savgout: 4761258746Savg zfs_range_unlock(rl); 4762258746Savg if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 || 4763258746Savg zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4764258746Savg zil_commit(zfsvfs->z_log, zp->z_id); 4765258746Savg ZFS_EXIT(zfsvfs); 4766258746Savg return (rtvals[0]); 4767258746Savg} 4768258746Savg 4769258746Savgint 4770258746Savgzfs_freebsd_putpages(ap) 4771258746Savg struct vop_putpages_args /* { 4772258746Savg struct vnode *a_vp; 4773258746Savg vm_page_t *a_m; 4774258746Savg int a_count; 4775258746Savg int a_sync; 4776258746Savg int *a_rtvals; 4777258746Savg } */ *ap; 4778258746Savg{ 4779258746Savg 4780258746Savg return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, 4781258746Savg ap->a_rtvals)); 4782258746Savg} 4783258746Savg 4784258746Savgstatic int 4785243518Savgzfs_freebsd_bmap(ap) 4786243518Savg struct vop_bmap_args /* { 4787243518Savg struct vnode *a_vp; 4788243518Savg daddr_t a_bn; 4789243518Savg struct bufobj **a_bop; 4790243518Savg daddr_t *a_bnp; 4791243518Savg int *a_runp; 4792243518Savg int *a_runb; 4793243518Savg } */ *ap; 4794243518Savg{ 4795243518Savg 4796243518Savg if (ap->a_bop != NULL) 4797243518Savg *ap->a_bop = &ap->a_vp->v_bufobj; 4798243518Savg if (ap->a_bnp != NULL) 4799243518Savg *ap->a_bnp = ap->a_bn; 4800243518Savg if (ap->a_runp != NULL) 4801243518Savg *ap->a_runp = 0; 4802243518Savg if (ap->a_runb != NULL) 4803243518Savg *ap->a_runb = 0; 4804243518Savg 4805243518Savg return (0); 4806243518Savg} 4807243518Savg 4808243518Savgstatic int 4809168962Spjdzfs_freebsd_open(ap) 4810168962Spjd struct vop_open_args /* { 4811168962Spjd struct vnode *a_vp; 4812168962Spjd int a_mode; 4813168962Spjd struct ucred *a_cred; 4814168962Spjd struct thread *a_td; 4815168962Spjd } */ *ap; 4816168962Spjd{ 4817168962Spjd vnode_t *vp = ap->a_vp; 4818168962Spjd znode_t *zp = VTOZ(vp); 4819168962Spjd int error; 4820168962Spjd 4821185029Spjd error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); 4822168962Spjd if (error == 0) 4823219089Spjd vnode_create_vobject(vp, zp->z_size, ap->a_td); 4824168962Spjd return (error); 4825168962Spjd} 4826168962Spjd 4827168962Spjdstatic int 4828168962Spjdzfs_freebsd_close(ap) 4829168962Spjd struct vop_close_args /* { 4830168962Spjd struct vnode *a_vp; 4831168962Spjd int a_fflag; 4832168962Spjd struct ucred *a_cred; 4833168962Spjd struct thread *a_td; 4834168962Spjd } */ *ap; 4835168962Spjd{ 4836168962Spjd 4837242566Savg return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred, NULL)); 4838168962Spjd} 4839168962Spjd 4840168962Spjdstatic int 4841168962Spjdzfs_freebsd_ioctl(ap) 4842168962Spjd struct vop_ioctl_args /* { 4843168962Spjd struct vnode *a_vp; 4844168962Spjd u_long a_command; 4845168962Spjd caddr_t a_data; 4846168962Spjd int a_fflag; 4847168962Spjd struct ucred *cred; 4848168962Spjd struct thread *td; 4849168962Spjd } */ *ap; 4850168962Spjd{ 4851168962Spjd 4852168978Spjd return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 4853185029Spjd ap->a_fflag, ap->a_cred, NULL, NULL)); 4854168962Spjd} 4855168962Spjd 4856168962Spjdstatic int 4857330062Savgioflags(int ioflags) 4858330062Savg{ 4859330062Savg int flags = 0; 4860330062Savg 4861330062Savg if (ioflags & IO_APPEND) 4862330062Savg flags |= FAPPEND; 4863330062Savg if (ioflags & IO_NDELAY) 4864330062Savg flags |= FNONBLOCK; 4865330062Savg if (ioflags & IO_SYNC) 4866330062Savg flags |= (FSYNC | FDSYNC | FRSYNC); 4867330062Savg 4868330062Savg return (flags); 4869330062Savg} 4870330062Savg 4871330062Savgstatic int 4872168962Spjdzfs_freebsd_read(ap) 4873168962Spjd struct vop_read_args /* { 4874168962Spjd struct vnode *a_vp; 4875168962Spjd struct uio *a_uio; 4876168962Spjd int a_ioflag; 4877168962Spjd struct ucred *a_cred; 4878168962Spjd } */ *ap; 4879168962Spjd{ 4880168962Spjd 4881213673Spjd return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 4882213673Spjd ap->a_cred, NULL)); 4883168962Spjd} 4884168962Spjd 4885168962Spjdstatic int 4886168962Spjdzfs_freebsd_write(ap) 4887168962Spjd struct vop_write_args /* { 4888168962Spjd struct vnode *a_vp; 4889168962Spjd struct uio *a_uio; 4890168962Spjd int a_ioflag; 4891168962Spjd struct ucred *a_cred; 4892168962Spjd } */ *ap; 4893168962Spjd{ 4894168962Spjd 4895213673Spjd return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 4896213673Spjd ap->a_cred, NULL)); 4897168962Spjd} 4898168962Spjd 4899168962Spjdstatic int 4900168962Spjdzfs_freebsd_access(ap) 4901168962Spjd struct vop_access_args /* { 4902168962Spjd struct vnode *a_vp; 4903192689Strasz accmode_t a_accmode; 4904168962Spjd struct ucred *a_cred; 4905168962Spjd struct thread *a_td; 4906168962Spjd } */ *ap; 4907168962Spjd{ 4908212002Sjh vnode_t *vp = ap->a_vp; 4909212002Sjh znode_t *zp = VTOZ(vp); 4910198703Spjd accmode_t accmode; 4911198703Spjd int error = 0; 4912168962Spjd 4913185172Spjd /* 4914198703Spjd * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 4915185172Spjd */ 4916198703Spjd accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 4917198703Spjd if (accmode != 0) 4918198703Spjd error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL); 4919185172Spjd 4920198703Spjd /* 4921198703Spjd * VADMIN has to be handled by vaccess(). 4922198703Spjd */ 4923198703Spjd if (error == 0) { 4924198703Spjd accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 4925198703Spjd if (accmode != 0) { 4926219089Spjd error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 4927219089Spjd zp->z_gid, accmode, ap->a_cred, NULL); 4928198703Spjd } 4929185172Spjd } 4930185172Spjd 4931212002Sjh /* 4932212002Sjh * For VEXEC, ensure that at least one execute bit is set for 4933212002Sjh * non-directories. 4934212002Sjh */ 4935212002Sjh if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 4936219089Spjd (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 4937212002Sjh error = EACCES; 4938219089Spjd } 4939212002Sjh 4940198703Spjd return (error); 4941168962Spjd} 4942168962Spjd 4943168962Spjdstatic int 4944168962Spjdzfs_freebsd_lookup(ap) 4945168962Spjd struct vop_lookup_args /* { 4946168962Spjd struct vnode *a_dvp; 4947168962Spjd struct vnode **a_vpp; 4948168962Spjd struct componentname *a_cnp; 4949168962Spjd } */ *ap; 4950168962Spjd{ 4951168962Spjd struct componentname *cnp = ap->a_cnp; 4952168962Spjd char nm[NAME_MAX + 1]; 4953168962Spjd 4954168962Spjd ASSERT(cnp->cn_namelen < sizeof(nm)); 4955168962Spjd strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 4956168962Spjd 4957168962Spjd return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 4958185029Spjd cnp->cn_cred, cnp->cn_thread, 0)); 4959168962Spjd} 4960168962Spjd 4961168962Spjdstatic int 4962303970Savgzfs_cache_lookup(ap) 4963303970Savg struct vop_lookup_args /* { 4964303970Savg struct vnode *a_dvp; 4965303970Savg struct vnode **a_vpp; 4966303970Savg struct componentname *a_cnp; 4967303970Savg } */ *ap; 4968303970Savg{ 4969303970Savg zfsvfs_t *zfsvfs; 4970303970Savg 4971303970Savg zfsvfs = ap->a_dvp->v_mount->mnt_data; 4972303970Savg if (zfsvfs->z_use_namecache) 4973303970Savg return (vfs_cache_lookup(ap)); 4974303970Savg else 4975303970Savg return (zfs_freebsd_lookup(ap)); 4976303970Savg} 4977303970Savg 4978303970Savgstatic int 4979168962Spjdzfs_freebsd_create(ap) 4980168962Spjd struct vop_create_args /* { 4981168962Spjd struct vnode *a_dvp; 4982168962Spjd struct vnode **a_vpp; 4983168962Spjd struct componentname *a_cnp; 4984168962Spjd struct vattr *a_vap; 4985168962Spjd } */ *ap; 4986168962Spjd{ 4987303970Savg zfsvfs_t *zfsvfs; 4988168962Spjd struct componentname *cnp = ap->a_cnp; 4989168962Spjd vattr_t *vap = ap->a_vap; 4990276007Skib int error, mode; 4991168962Spjd 4992168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 4993168962Spjd 4994168962Spjd vattr_init_mask(vap); 4995168962Spjd mode = vap->va_mode & ALLPERMS; 4996303970Savg zfsvfs = ap->a_dvp->v_mount->mnt_data; 4997168962Spjd 4998276007Skib error = zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 4999276007Skib ap->a_vpp, cnp->cn_cred, cnp->cn_thread); 5000303970Savg if (zfsvfs->z_use_namecache && 5001303970Savg error == 0 && (cnp->cn_flags & MAKEENTRY) != 0) 5002276007Skib cache_enter(ap->a_dvp, *ap->a_vpp, cnp); 5003276007Skib return (error); 5004168962Spjd} 5005168962Spjd 5006168962Spjdstatic int 5007168962Spjdzfs_freebsd_remove(ap) 5008168962Spjd struct vop_remove_args /* { 5009168962Spjd struct vnode *a_dvp; 5010168962Spjd struct vnode *a_vp; 5011168962Spjd struct componentname *a_cnp; 5012168962Spjd } */ *ap; 5013168962Spjd{ 5014168962Spjd 5015168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 5016168962Spjd 5017303970Savg return (zfs_remove(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr, 5018303970Savg ap->a_cnp->cn_cred)); 5019168962Spjd} 5020168962Spjd 5021168962Spjdstatic int 5022168962Spjdzfs_freebsd_mkdir(ap) 5023168962Spjd struct vop_mkdir_args /* { 5024168962Spjd struct vnode *a_dvp; 5025168962Spjd struct vnode **a_vpp; 5026168962Spjd struct componentname *a_cnp; 5027168962Spjd struct vattr *a_vap; 5028168962Spjd } */ *ap; 5029168962Spjd{ 5030168962Spjd vattr_t *vap = ap->a_vap; 5031168962Spjd 5032168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 5033168962Spjd 5034168962Spjd vattr_init_mask(vap); 5035168962Spjd 5036168962Spjd return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 5037303970Savg ap->a_cnp->cn_cred)); 5038168962Spjd} 5039168962Spjd 5040168962Spjdstatic int 5041168962Spjdzfs_freebsd_rmdir(ap) 5042168962Spjd struct vop_rmdir_args /* { 5043168962Spjd struct vnode *a_dvp; 5044168962Spjd struct vnode *a_vp; 5045168962Spjd struct componentname *a_cnp; 5046168962Spjd } */ *ap; 5047168962Spjd{ 5048168962Spjd struct componentname *cnp = ap->a_cnp; 5049168962Spjd 5050168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5051168962Spjd 5052303970Savg return (zfs_rmdir(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred)); 5053168962Spjd} 5054168962Spjd 5055168962Spjdstatic int 5056168962Spjdzfs_freebsd_readdir(ap) 5057168962Spjd struct vop_readdir_args /* { 5058168962Spjd struct vnode *a_vp; 5059168962Spjd struct uio *a_uio; 5060168962Spjd struct ucred *a_cred; 5061168962Spjd int *a_eofflag; 5062168962Spjd int *a_ncookies; 5063168962Spjd u_long **a_cookies; 5064168962Spjd } */ *ap; 5065168962Spjd{ 5066168962Spjd 5067168962Spjd return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 5068168962Spjd ap->a_ncookies, ap->a_cookies)); 5069168962Spjd} 5070168962Spjd 5071168962Spjdstatic int 5072168962Spjdzfs_freebsd_fsync(ap) 5073168962Spjd struct vop_fsync_args /* { 5074168962Spjd struct vnode *a_vp; 5075168962Spjd int a_waitfor; 5076168962Spjd struct thread *a_td; 5077168962Spjd } */ *ap; 5078168962Spjd{ 5079168962Spjd 5080168962Spjd vop_stdfsync(ap); 5081185029Spjd return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); 5082168962Spjd} 5083168962Spjd 5084168962Spjdstatic int 5085168962Spjdzfs_freebsd_getattr(ap) 5086168962Spjd struct vop_getattr_args /* { 5087168962Spjd struct vnode *a_vp; 5088168962Spjd struct vattr *a_vap; 5089168962Spjd struct ucred *a_cred; 5090168962Spjd } */ *ap; 5091168962Spjd{ 5092185029Spjd vattr_t *vap = ap->a_vap; 5093185029Spjd xvattr_t xvap; 5094185029Spjd u_long fflags = 0; 5095185029Spjd int error; 5096168962Spjd 5097185029Spjd xva_init(&xvap); 5098185029Spjd xvap.xva_vattr = *vap; 5099185029Spjd xvap.xva_vattr.va_mask |= AT_XVATTR; 5100185029Spjd 5101185029Spjd /* Convert chflags into ZFS-type flags. */ 5102185029Spjd /* XXX: what about SF_SETTABLE?. */ 5103185029Spjd XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 5104185029Spjd XVA_SET_REQ(&xvap, XAT_APPENDONLY); 5105185029Spjd XVA_SET_REQ(&xvap, XAT_NOUNLINK); 5106185029Spjd XVA_SET_REQ(&xvap, XAT_NODUMP); 5107254627Sken XVA_SET_REQ(&xvap, XAT_READONLY); 5108254627Sken XVA_SET_REQ(&xvap, XAT_ARCHIVE); 5109254627Sken XVA_SET_REQ(&xvap, XAT_SYSTEM); 5110254627Sken XVA_SET_REQ(&xvap, XAT_HIDDEN); 5111254627Sken XVA_SET_REQ(&xvap, XAT_REPARSE); 5112254627Sken XVA_SET_REQ(&xvap, XAT_OFFLINE); 5113254627Sken XVA_SET_REQ(&xvap, XAT_SPARSE); 5114254627Sken 5115185029Spjd error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 5116185029Spjd if (error != 0) 5117185029Spjd return (error); 5118185029Spjd 5119185029Spjd /* Convert ZFS xattr into chflags. */ 5120185029Spjd#define FLAG_CHECK(fflag, xflag, xfield) do { \ 5121185029Spjd if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 5122185029Spjd fflags |= (fflag); \ 5123185029Spjd} while (0) 5124185029Spjd FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 5125185029Spjd xvap.xva_xoptattrs.xoa_immutable); 5126185029Spjd FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 5127185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 5128185029Spjd FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 5129185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 5130254627Sken FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE, 5131254627Sken xvap.xva_xoptattrs.xoa_archive); 5132185029Spjd FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 5133185029Spjd xvap.xva_xoptattrs.xoa_nodump); 5134254627Sken FLAG_CHECK(UF_READONLY, XAT_READONLY, 5135254627Sken xvap.xva_xoptattrs.xoa_readonly); 5136254627Sken FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM, 5137254627Sken xvap.xva_xoptattrs.xoa_system); 5138254627Sken FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN, 5139254627Sken xvap.xva_xoptattrs.xoa_hidden); 5140254627Sken FLAG_CHECK(UF_REPARSE, XAT_REPARSE, 5141254627Sken xvap.xva_xoptattrs.xoa_reparse); 5142254627Sken FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE, 5143254627Sken xvap.xva_xoptattrs.xoa_offline); 5144254627Sken FLAG_CHECK(UF_SPARSE, XAT_SPARSE, 5145254627Sken xvap.xva_xoptattrs.xoa_sparse); 5146254627Sken 5147185029Spjd#undef FLAG_CHECK 5148185029Spjd *vap = xvap.xva_vattr; 5149185029Spjd vap->va_flags = fflags; 5150185029Spjd return (0); 5151168962Spjd} 5152168962Spjd 5153168962Spjdstatic int 5154168962Spjdzfs_freebsd_setattr(ap) 5155168962Spjd struct vop_setattr_args /* { 5156168962Spjd struct vnode *a_vp; 5157168962Spjd struct vattr *a_vap; 5158168962Spjd struct ucred *a_cred; 5159168962Spjd } */ *ap; 5160168962Spjd{ 5161185172Spjd vnode_t *vp = ap->a_vp; 5162168962Spjd vattr_t *vap = ap->a_vap; 5163185172Spjd cred_t *cred = ap->a_cred; 5164185029Spjd xvattr_t xvap; 5165185029Spjd u_long fflags; 5166185029Spjd uint64_t zflags; 5167168962Spjd 5168168962Spjd vattr_init_mask(vap); 5169170044Spjd vap->va_mask &= ~AT_NOSET; 5170168962Spjd 5171185029Spjd xva_init(&xvap); 5172185029Spjd xvap.xva_vattr = *vap; 5173185029Spjd 5174219089Spjd zflags = VTOZ(vp)->z_pflags; 5175185172Spjd 5176185029Spjd if (vap->va_flags != VNOVAL) { 5177197683Sdelphij zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 5178185172Spjd int error; 5179185172Spjd 5180197683Sdelphij if (zfsvfs->z_use_fuids == B_FALSE) 5181197683Sdelphij return (EOPNOTSUPP); 5182197683Sdelphij 5183185029Spjd fflags = vap->va_flags; 5184254627Sken /* 5185254627Sken * XXX KDM 5186254627Sken * We need to figure out whether it makes sense to allow 5187254627Sken * UF_REPARSE through, since we don't really have other 5188254627Sken * facilities to handle reparse points and zfs_setattr() 5189254627Sken * doesn't currently allow setting that attribute anyway. 5190254627Sken */ 5191254627Sken if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE| 5192254627Sken UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE| 5193254627Sken UF_OFFLINE|UF_SPARSE)) != 0) 5194185029Spjd return (EOPNOTSUPP); 5195185172Spjd /* 5196185172Spjd * Unprivileged processes are not permitted to unset system 5197185172Spjd * flags, or modify flags if any system flags are set. 5198185172Spjd * Privileged non-jail processes may not modify system flags 5199185172Spjd * if securelevel > 0 and any existing system flags are set. 5200185172Spjd * Privileged jail processes behave like privileged non-jail 5201185172Spjd * processes if the security.jail.chflags_allowed sysctl is 5202185172Spjd * is non-zero; otherwise, they behave like unprivileged 5203185172Spjd * processes. 5204185172Spjd */ 5205197861Spjd if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 5206197861Spjd priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) { 5207185172Spjd if (zflags & 5208185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 5209185172Spjd error = securelevel_gt(cred, 0); 5210197861Spjd if (error != 0) 5211185172Spjd return (error); 5212185172Spjd } 5213185172Spjd } else { 5214197861Spjd /* 5215197861Spjd * Callers may only modify the file flags on objects they 5216197861Spjd * have VADMIN rights for. 5217197861Spjd */ 5218197861Spjd if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0) 5219197861Spjd return (error); 5220185172Spjd if (zflags & 5221185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 5222185172Spjd return (EPERM); 5223185172Spjd } 5224185172Spjd if (fflags & 5225185172Spjd (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 5226185172Spjd return (EPERM); 5227185172Spjd } 5228185172Spjd } 5229185029Spjd 5230185029Spjd#define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 5231185029Spjd if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 5232185029Spjd ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 5233185029Spjd XVA_SET_REQ(&xvap, (xflag)); \ 5234185029Spjd (xfield) = ((fflags & (fflag)) != 0); \ 5235185029Spjd } \ 5236185029Spjd} while (0) 5237185029Spjd /* Convert chflags into ZFS-type flags. */ 5238185029Spjd /* XXX: what about SF_SETTABLE?. */ 5239185029Spjd FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 5240185029Spjd xvap.xva_xoptattrs.xoa_immutable); 5241185029Spjd FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 5242185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 5243185029Spjd FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 5244185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 5245254627Sken FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE, 5246254627Sken xvap.xva_xoptattrs.xoa_archive); 5247185029Spjd FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 5248185172Spjd xvap.xva_xoptattrs.xoa_nodump); 5249254627Sken FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY, 5250254627Sken xvap.xva_xoptattrs.xoa_readonly); 5251254627Sken FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM, 5252254627Sken xvap.xva_xoptattrs.xoa_system); 5253254627Sken FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN, 5254254627Sken xvap.xva_xoptattrs.xoa_hidden); 5255254627Sken FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE, 5256352687Smav xvap.xva_xoptattrs.xoa_reparse); 5257254627Sken FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE, 5258254627Sken xvap.xva_xoptattrs.xoa_offline); 5259254627Sken FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE, 5260254627Sken xvap.xva_xoptattrs.xoa_sparse); 5261185029Spjd#undef FLAG_CHANGE 5262185029Spjd } 5263316391Sasomers if (vap->va_birthtime.tv_sec != VNOVAL) { 5264316391Sasomers xvap.xva_vattr.va_mask |= AT_XVATTR; 5265316391Sasomers XVA_SET_REQ(&xvap, XAT_CREATETIME); 5266316391Sasomers } 5267185172Spjd return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL)); 5268168962Spjd} 5269168962Spjd 5270168962Spjdstatic int 5271168962Spjdzfs_freebsd_rename(ap) 5272168962Spjd struct vop_rename_args /* { 5273168962Spjd struct vnode *a_fdvp; 5274168962Spjd struct vnode *a_fvp; 5275168962Spjd struct componentname *a_fcnp; 5276168962Spjd struct vnode *a_tdvp; 5277168962Spjd struct vnode *a_tvp; 5278168962Spjd struct componentname *a_tcnp; 5279168962Spjd } */ *ap; 5280168962Spjd{ 5281168962Spjd vnode_t *fdvp = ap->a_fdvp; 5282168962Spjd vnode_t *fvp = ap->a_fvp; 5283168962Spjd vnode_t *tdvp = ap->a_tdvp; 5284168962Spjd vnode_t *tvp = ap->a_tvp; 5285168962Spjd int error; 5286168962Spjd 5287192237Skmacy ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 5288192237Skmacy ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 5289168962Spjd 5290303970Savg error = zfs_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp, 5291303970Savg ap->a_tcnp, ap->a_fcnp->cn_cred); 5292168962Spjd 5293303970Savg vrele(fdvp); 5294303970Savg vrele(fvp); 5295303970Savg vrele(tdvp); 5296303970Savg if (tvp != NULL) 5297303970Savg vrele(tvp); 5298303970Savg 5299168962Spjd return (error); 5300168962Spjd} 5301168962Spjd 5302168962Spjdstatic int 5303168962Spjdzfs_freebsd_symlink(ap) 5304168962Spjd struct vop_symlink_args /* { 5305168962Spjd struct vnode *a_dvp; 5306168962Spjd struct vnode **a_vpp; 5307168962Spjd struct componentname *a_cnp; 5308168962Spjd struct vattr *a_vap; 5309168962Spjd char *a_target; 5310168962Spjd } */ *ap; 5311168962Spjd{ 5312168962Spjd struct componentname *cnp = ap->a_cnp; 5313168962Spjd vattr_t *vap = ap->a_vap; 5314168962Spjd 5315168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5316168962Spjd 5317168962Spjd vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 5318168962Spjd vattr_init_mask(vap); 5319168962Spjd 5320168962Spjd return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 5321168962Spjd ap->a_target, cnp->cn_cred, cnp->cn_thread)); 5322168962Spjd} 5323168962Spjd 5324168962Spjdstatic int 5325168962Spjdzfs_freebsd_readlink(ap) 5326168962Spjd struct vop_readlink_args /* { 5327168962Spjd struct vnode *a_vp; 5328168962Spjd struct uio *a_uio; 5329168962Spjd struct ucred *a_cred; 5330168962Spjd } */ *ap; 5331168962Spjd{ 5332168962Spjd 5333185029Spjd return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 5334168962Spjd} 5335168962Spjd 5336168962Spjdstatic int 5337168962Spjdzfs_freebsd_link(ap) 5338168962Spjd struct vop_link_args /* { 5339168962Spjd struct vnode *a_tdvp; 5340168962Spjd struct vnode *a_vp; 5341168962Spjd struct componentname *a_cnp; 5342168962Spjd } */ *ap; 5343168962Spjd{ 5344168962Spjd struct componentname *cnp = ap->a_cnp; 5345254982Sdelphij vnode_t *vp = ap->a_vp; 5346254982Sdelphij vnode_t *tdvp = ap->a_tdvp; 5347168962Spjd 5348254982Sdelphij if (tdvp->v_mount != vp->v_mount) 5349254982Sdelphij return (EXDEV); 5350254982Sdelphij 5351168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5352168962Spjd 5353254982Sdelphij return (zfs_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); 5354168962Spjd} 5355168962Spjd 5356168962Spjdstatic int 5357168962Spjdzfs_freebsd_inactive(ap) 5358169170Spjd struct vop_inactive_args /* { 5359169170Spjd struct vnode *a_vp; 5360169170Spjd struct thread *a_td; 5361169170Spjd } */ *ap; 5362168962Spjd{ 5363168962Spjd vnode_t *vp = ap->a_vp; 5364168962Spjd 5365185029Spjd zfs_inactive(vp, ap->a_td->td_ucred, NULL); 5366168962Spjd return (0); 5367168962Spjd} 5368168962Spjd 5369168962Spjdstatic int 5370168962Spjdzfs_freebsd_reclaim(ap) 5371168962Spjd struct vop_reclaim_args /* { 5372168962Spjd struct vnode *a_vp; 5373168962Spjd struct thread *a_td; 5374168962Spjd } */ *ap; 5375168962Spjd{ 5376169170Spjd vnode_t *vp = ap->a_vp; 5377168962Spjd znode_t *zp = VTOZ(vp); 5378197133Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5379168962Spjd 5380169025Spjd ASSERT(zp != NULL); 5381169025Spjd 5382243520Savg /* Destroy the vm object and flush associated pages. */ 5383243520Savg vnode_destroy_vobject(vp); 5384243520Savg 5385168962Spjd /* 5386243520Savg * z_teardown_inactive_lock protects from a race with 5387243520Savg * zfs_znode_dmu_fini in zfsvfs_teardown during 5388243520Savg * force unmount. 5389168962Spjd */ 5390243520Savg rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 5391243520Savg if (zp->z_sa_hdl == NULL) 5392196301Spjd zfs_znode_free(zp); 5393243520Savg else 5394243520Savg zfs_zinactive(zp); 5395243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 5396185029Spjd 5397168962Spjd vp->v_data = NULL; 5398168962Spjd return (0); 5399168962Spjd} 5400168962Spjd 5401168962Spjdstatic int 5402168962Spjdzfs_freebsd_fid(ap) 5403168962Spjd struct vop_fid_args /* { 5404168962Spjd struct vnode *a_vp; 5405168962Spjd struct fid *a_fid; 5406168962Spjd } */ *ap; 5407168962Spjd{ 5408168962Spjd 5409185029Spjd return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 5410168962Spjd} 5411168962Spjd 5412168962Spjdstatic int 5413168962Spjdzfs_freebsd_pathconf(ap) 5414168962Spjd struct vop_pathconf_args /* { 5415168962Spjd struct vnode *a_vp; 5416168962Spjd int a_name; 5417168962Spjd register_t *a_retval; 5418168962Spjd } */ *ap; 5419168962Spjd{ 5420168962Spjd ulong_t val; 5421168962Spjd int error; 5422168962Spjd 5423185029Spjd error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); 5424328298Sjhb if (error == 0) { 5425168962Spjd *ap->a_retval = val; 5426328298Sjhb return (error); 5427328298Sjhb } 5428328298Sjhb if (error != EOPNOTSUPP) 5429328298Sjhb return (error); 5430168962Spjd 5431196949Strasz switch (ap->a_name) { 5432328298Sjhb case _PC_NAME_MAX: 5433328298Sjhb *ap->a_retval = NAME_MAX; 5434328298Sjhb return (0); 5435328298Sjhb case _PC_PIPE_BUF: 5436328298Sjhb if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) { 5437328298Sjhb *ap->a_retval = PIPE_BUF; 5438328298Sjhb return (0); 5439328298Sjhb } 5440328298Sjhb return (EINVAL); 5441196949Strasz default: 5442328298Sjhb return (vop_stdpathconf(ap)); 5443196949Strasz } 5444196949Strasz} 5445196949Strasz 5446185029Spjd/* 5447185029Spjd * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 5448185029Spjd * extended attribute name: 5449185029Spjd * 5450185029Spjd * NAMESPACE PREFIX 5451185029Spjd * system freebsd:system: 5452185029Spjd * user (none, can be used to access ZFS fsattr(5) attributes 5453185029Spjd * created on Solaris) 5454185029Spjd */ 5455185029Spjdstatic int 5456185029Spjdzfs_create_attrname(int attrnamespace, const char *name, char *attrname, 5457185029Spjd size_t size) 5458185029Spjd{ 5459185029Spjd const char *namespace, *prefix, *suffix; 5460185029Spjd 5461185029Spjd /* We don't allow '/' character in attribute name. */ 5462185029Spjd if (strchr(name, '/') != NULL) 5463185029Spjd return (EINVAL); 5464185029Spjd /* We don't allow attribute names that start with "freebsd:" string. */ 5465185029Spjd if (strncmp(name, "freebsd:", 8) == 0) 5466185029Spjd return (EINVAL); 5467185029Spjd 5468185029Spjd bzero(attrname, size); 5469185029Spjd 5470185029Spjd switch (attrnamespace) { 5471185029Spjd case EXTATTR_NAMESPACE_USER: 5472185029Spjd#if 0 5473185029Spjd prefix = "freebsd:"; 5474185029Spjd namespace = EXTATTR_NAMESPACE_USER_STRING; 5475185029Spjd suffix = ":"; 5476185029Spjd#else 5477185029Spjd /* 5478185029Spjd * This is the default namespace by which we can access all 5479185029Spjd * attributes created on Solaris. 5480185029Spjd */ 5481185029Spjd prefix = namespace = suffix = ""; 5482185029Spjd#endif 5483185029Spjd break; 5484185029Spjd case EXTATTR_NAMESPACE_SYSTEM: 5485185029Spjd prefix = "freebsd:"; 5486185029Spjd namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 5487185029Spjd suffix = ":"; 5488185029Spjd break; 5489185029Spjd case EXTATTR_NAMESPACE_EMPTY: 5490185029Spjd default: 5491185029Spjd return (EINVAL); 5492185029Spjd } 5493185029Spjd if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 5494185029Spjd name) >= size) { 5495185029Spjd return (ENAMETOOLONG); 5496185029Spjd } 5497185029Spjd return (0); 5498185029Spjd} 5499185029Spjd 5500185029Spjd/* 5501185029Spjd * Vnode operating to retrieve a named extended attribute. 5502185029Spjd */ 5503185029Spjdstatic int 5504185029Spjdzfs_getextattr(struct vop_getextattr_args *ap) 5505185029Spjd/* 5506185029Spjdvop_getextattr { 5507185029Spjd IN struct vnode *a_vp; 5508185029Spjd IN int a_attrnamespace; 5509185029Spjd IN const char *a_name; 5510185029Spjd INOUT struct uio *a_uio; 5511185029Spjd OUT size_t *a_size; 5512185029Spjd IN struct ucred *a_cred; 5513185029Spjd IN struct thread *a_td; 5514185029Spjd}; 5515185029Spjd*/ 5516185029Spjd{ 5517185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5518185029Spjd struct thread *td = ap->a_td; 5519185029Spjd struct nameidata nd; 5520185029Spjd char attrname[255]; 5521185029Spjd struct vattr va; 5522185029Spjd vnode_t *xvp = NULL, *vp; 5523185029Spjd int error, flags; 5524185029Spjd 5525195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5526195785Strasz ap->a_cred, ap->a_td, VREAD); 5527195785Strasz if (error != 0) 5528195785Strasz return (error); 5529195785Strasz 5530185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5531185029Spjd sizeof(attrname)); 5532185029Spjd if (error != 0) 5533185029Spjd return (error); 5534185029Spjd 5535185029Spjd ZFS_ENTER(zfsvfs); 5536185029Spjd 5537185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5538185029Spjd LOOKUP_XATTR); 5539185029Spjd if (error != 0) { 5540185029Spjd ZFS_EXIT(zfsvfs); 5541185029Spjd return (error); 5542185029Spjd } 5543185029Spjd 5544185029Spjd flags = FREAD; 5545241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 5546185029Spjd xvp, td); 5547194586Skib error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL); 5548185029Spjd vp = nd.ni_vp; 5549185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 5550185029Spjd if (error != 0) { 5551196303Spjd ZFS_EXIT(zfsvfs); 5552195785Strasz if (error == ENOENT) 5553195785Strasz error = ENOATTR; 5554185029Spjd return (error); 5555185029Spjd } 5556185029Spjd 5557185029Spjd if (ap->a_size != NULL) { 5558185029Spjd error = VOP_GETATTR(vp, &va, ap->a_cred); 5559185029Spjd if (error == 0) 5560185029Spjd *ap->a_size = (size_t)va.va_size; 5561185029Spjd } else if (ap->a_uio != NULL) 5562224605Smm error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5563185029Spjd 5564185029Spjd VOP_UNLOCK(vp, 0); 5565185029Spjd vn_close(vp, flags, ap->a_cred, td); 5566185029Spjd ZFS_EXIT(zfsvfs); 5567185029Spjd 5568185029Spjd return (error); 5569185029Spjd} 5570185029Spjd 5571185029Spjd/* 5572185029Spjd * Vnode operation to remove a named attribute. 5573185029Spjd */ 5574185029Spjdint 5575185029Spjdzfs_deleteextattr(struct vop_deleteextattr_args *ap) 5576185029Spjd/* 5577185029Spjdvop_deleteextattr { 5578185029Spjd IN struct vnode *a_vp; 5579185029Spjd IN int a_attrnamespace; 5580185029Spjd IN const char *a_name; 5581185029Spjd IN struct ucred *a_cred; 5582185029Spjd IN struct thread *a_td; 5583185029Spjd}; 5584185029Spjd*/ 5585185029Spjd{ 5586185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5587185029Spjd struct thread *td = ap->a_td; 5588185029Spjd struct nameidata nd; 5589185029Spjd char attrname[255]; 5590185029Spjd struct vattr va; 5591185029Spjd vnode_t *xvp = NULL, *vp; 5592185029Spjd int error, flags; 5593185029Spjd 5594195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5595195785Strasz ap->a_cred, ap->a_td, VWRITE); 5596195785Strasz if (error != 0) 5597195785Strasz return (error); 5598195785Strasz 5599185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5600185029Spjd sizeof(attrname)); 5601185029Spjd if (error != 0) 5602185029Spjd return (error); 5603185029Spjd 5604185029Spjd ZFS_ENTER(zfsvfs); 5605185029Spjd 5606185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5607185029Spjd LOOKUP_XATTR); 5608185029Spjd if (error != 0) { 5609185029Spjd ZFS_EXIT(zfsvfs); 5610185029Spjd return (error); 5611185029Spjd } 5612185029Spjd 5613241896Skib NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 5614185029Spjd UIO_SYSSPACE, attrname, xvp, td); 5615185029Spjd error = namei(&nd); 5616185029Spjd vp = nd.ni_vp; 5617185029Spjd if (error != 0) { 5618196303Spjd ZFS_EXIT(zfsvfs); 5619260706Savg NDFREE(&nd, NDF_ONLY_PNBUF); 5620195785Strasz if (error == ENOENT) 5621195785Strasz error = ENOATTR; 5622185029Spjd return (error); 5623185029Spjd } 5624260706Savg 5625185029Spjd error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 5626260706Savg NDFREE(&nd, NDF_ONLY_PNBUF); 5627185029Spjd 5628185029Spjd vput(nd.ni_dvp); 5629185029Spjd if (vp == nd.ni_dvp) 5630185029Spjd vrele(vp); 5631185029Spjd else 5632185029Spjd vput(vp); 5633185029Spjd ZFS_EXIT(zfsvfs); 5634185029Spjd 5635185029Spjd return (error); 5636185029Spjd} 5637185029Spjd 5638185029Spjd/* 5639185029Spjd * Vnode operation to set a named attribute. 5640185029Spjd */ 5641185029Spjdstatic int 5642185029Spjdzfs_setextattr(struct vop_setextattr_args *ap) 5643185029Spjd/* 5644185029Spjdvop_setextattr { 5645185029Spjd IN struct vnode *a_vp; 5646185029Spjd IN int a_attrnamespace; 5647185029Spjd IN const char *a_name; 5648185029Spjd INOUT struct uio *a_uio; 5649185029Spjd IN struct ucred *a_cred; 5650185029Spjd IN struct thread *a_td; 5651185029Spjd}; 5652185029Spjd*/ 5653185029Spjd{ 5654185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5655185029Spjd struct thread *td = ap->a_td; 5656185029Spjd struct nameidata nd; 5657185029Spjd char attrname[255]; 5658185029Spjd struct vattr va; 5659185029Spjd vnode_t *xvp = NULL, *vp; 5660185029Spjd int error, flags; 5661185029Spjd 5662195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5663195785Strasz ap->a_cred, ap->a_td, VWRITE); 5664195785Strasz if (error != 0) 5665195785Strasz return (error); 5666195785Strasz 5667185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5668185029Spjd sizeof(attrname)); 5669185029Spjd if (error != 0) 5670185029Spjd return (error); 5671185029Spjd 5672185029Spjd ZFS_ENTER(zfsvfs); 5673185029Spjd 5674185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5675195785Strasz LOOKUP_XATTR | CREATE_XATTR_DIR); 5676185029Spjd if (error != 0) { 5677185029Spjd ZFS_EXIT(zfsvfs); 5678185029Spjd return (error); 5679185029Spjd } 5680185029Spjd 5681185029Spjd flags = FFLAGS(O_WRONLY | O_CREAT); 5682241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 5683185029Spjd xvp, td); 5684194586Skib error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL); 5685185029Spjd vp = nd.ni_vp; 5686185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 5687185029Spjd if (error != 0) { 5688185029Spjd ZFS_EXIT(zfsvfs); 5689185029Spjd return (error); 5690185029Spjd } 5691185029Spjd 5692185029Spjd VATTR_NULL(&va); 5693185029Spjd va.va_size = 0; 5694185029Spjd error = VOP_SETATTR(vp, &va, ap->a_cred); 5695185029Spjd if (error == 0) 5696268420Smav VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5697185029Spjd 5698185029Spjd VOP_UNLOCK(vp, 0); 5699185029Spjd vn_close(vp, flags, ap->a_cred, td); 5700185029Spjd ZFS_EXIT(zfsvfs); 5701185029Spjd 5702185029Spjd return (error); 5703185029Spjd} 5704185029Spjd 5705185029Spjd/* 5706185029Spjd * Vnode operation to retrieve extended attributes on a vnode. 5707185029Spjd */ 5708185029Spjdstatic int 5709185029Spjdzfs_listextattr(struct vop_listextattr_args *ap) 5710185029Spjd/* 5711185029Spjdvop_listextattr { 5712185029Spjd IN struct vnode *a_vp; 5713185029Spjd IN int a_attrnamespace; 5714185029Spjd INOUT struct uio *a_uio; 5715185029Spjd OUT size_t *a_size; 5716185029Spjd IN struct ucred *a_cred; 5717185029Spjd IN struct thread *a_td; 5718185029Spjd}; 5719185029Spjd*/ 5720185029Spjd{ 5721185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5722185029Spjd struct thread *td = ap->a_td; 5723185029Spjd struct nameidata nd; 5724185029Spjd char attrprefix[16]; 5725185029Spjd u_char dirbuf[sizeof(struct dirent)]; 5726185029Spjd struct dirent *dp; 5727185029Spjd struct iovec aiov; 5728185029Spjd struct uio auio, *uio = ap->a_uio; 5729185029Spjd size_t *sizep = ap->a_size; 5730185029Spjd size_t plen; 5731185029Spjd vnode_t *xvp = NULL, *vp; 5732185029Spjd int done, error, eof, pos; 5733185029Spjd 5734195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5735195785Strasz ap->a_cred, ap->a_td, VREAD); 5736196303Spjd if (error != 0) 5737195785Strasz return (error); 5738195785Strasz 5739185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 5740185029Spjd sizeof(attrprefix)); 5741185029Spjd if (error != 0) 5742185029Spjd return (error); 5743185029Spjd plen = strlen(attrprefix); 5744185029Spjd 5745185029Spjd ZFS_ENTER(zfsvfs); 5746185029Spjd 5747195822Strasz if (sizep != NULL) 5748195822Strasz *sizep = 0; 5749195822Strasz 5750185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5751185029Spjd LOOKUP_XATTR); 5752185029Spjd if (error != 0) { 5753196303Spjd ZFS_EXIT(zfsvfs); 5754195785Strasz /* 5755195785Strasz * ENOATTR means that the EA directory does not yet exist, 5756195785Strasz * i.e. there are no extended attributes there. 5757195785Strasz */ 5758195785Strasz if (error == ENOATTR) 5759195785Strasz error = 0; 5760185029Spjd return (error); 5761185029Spjd } 5762185029Spjd 5763241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 5764188588Sjhb UIO_SYSSPACE, ".", xvp, td); 5765185029Spjd error = namei(&nd); 5766185029Spjd vp = nd.ni_vp; 5767185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 5768185029Spjd if (error != 0) { 5769185029Spjd ZFS_EXIT(zfsvfs); 5770185029Spjd return (error); 5771185029Spjd } 5772185029Spjd 5773185029Spjd auio.uio_iov = &aiov; 5774185029Spjd auio.uio_iovcnt = 1; 5775185029Spjd auio.uio_segflg = UIO_SYSSPACE; 5776185029Spjd auio.uio_td = td; 5777185029Spjd auio.uio_rw = UIO_READ; 5778185029Spjd auio.uio_offset = 0; 5779185029Spjd 5780185029Spjd do { 5781185029Spjd u_char nlen; 5782185029Spjd 5783185029Spjd aiov.iov_base = (void *)dirbuf; 5784185029Spjd aiov.iov_len = sizeof(dirbuf); 5785185029Spjd auio.uio_resid = sizeof(dirbuf); 5786185029Spjd error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 5787185029Spjd done = sizeof(dirbuf) - auio.uio_resid; 5788185029Spjd if (error != 0) 5789185029Spjd break; 5790185029Spjd for (pos = 0; pos < done;) { 5791185029Spjd dp = (struct dirent *)(dirbuf + pos); 5792185029Spjd pos += dp->d_reclen; 5793185029Spjd /* 5794185029Spjd * XXX: Temporarily we also accept DT_UNKNOWN, as this 5795185029Spjd * is what we get when attribute was created on Solaris. 5796185029Spjd */ 5797185029Spjd if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 5798185029Spjd continue; 5799185029Spjd if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) 5800185029Spjd continue; 5801185029Spjd else if (strncmp(dp->d_name, attrprefix, plen) != 0) 5802185029Spjd continue; 5803185029Spjd nlen = dp->d_namlen - plen; 5804185029Spjd if (sizep != NULL) 5805185029Spjd *sizep += 1 + nlen; 5806185029Spjd else if (uio != NULL) { 5807185029Spjd /* 5808185029Spjd * Format of extattr name entry is one byte for 5809185029Spjd * length and the rest for name. 5810185029Spjd */ 5811185029Spjd error = uiomove(&nlen, 1, uio->uio_rw, uio); 5812185029Spjd if (error == 0) { 5813185029Spjd error = uiomove(dp->d_name + plen, nlen, 5814185029Spjd uio->uio_rw, uio); 5815185029Spjd } 5816185029Spjd if (error != 0) 5817185029Spjd break; 5818185029Spjd } 5819185029Spjd } 5820185029Spjd } while (!eof && error == 0); 5821185029Spjd 5822185029Spjd vput(vp); 5823185029Spjd ZFS_EXIT(zfsvfs); 5824185029Spjd 5825185029Spjd return (error); 5826185029Spjd} 5827185029Spjd 5828192800Straszint 5829192800Straszzfs_freebsd_getacl(ap) 5830192800Strasz struct vop_getacl_args /* { 5831192800Strasz struct vnode *vp; 5832192800Strasz acl_type_t type; 5833192800Strasz struct acl *aclp; 5834192800Strasz struct ucred *cred; 5835192800Strasz struct thread *td; 5836192800Strasz } */ *ap; 5837192800Strasz{ 5838192800Strasz int error; 5839192800Strasz vsecattr_t vsecattr; 5840192800Strasz 5841192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 5842197435Strasz return (EINVAL); 5843192800Strasz 5844192800Strasz vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 5845192800Strasz if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL)) 5846192800Strasz return (error); 5847192800Strasz 5848192800Strasz error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt); 5849196303Spjd if (vsecattr.vsa_aclentp != NULL) 5850196303Spjd kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 5851192800Strasz 5852196303Spjd return (error); 5853192800Strasz} 5854192800Strasz 5855192800Straszint 5856192800Straszzfs_freebsd_setacl(ap) 5857192800Strasz struct vop_setacl_args /* { 5858192800Strasz struct vnode *vp; 5859192800Strasz acl_type_t type; 5860192800Strasz struct acl *aclp; 5861192800Strasz struct ucred *cred; 5862192800Strasz struct thread *td; 5863192800Strasz } */ *ap; 5864192800Strasz{ 5865192800Strasz int error; 5866192800Strasz vsecattr_t vsecattr; 5867192800Strasz int aclbsize; /* size of acl list in bytes */ 5868192800Strasz aclent_t *aaclp; 5869192800Strasz 5870192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 5871197435Strasz return (EINVAL); 5872192800Strasz 5873314710Smm if (ap->a_aclp == NULL) 5874314710Smm return (EINVAL); 5875314710Smm 5876192800Strasz if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 5877192800Strasz return (EINVAL); 5878192800Strasz 5879192800Strasz /* 5880196949Strasz * With NFSv4 ACLs, chmod(2) may need to add additional entries, 5881192800Strasz * splitting every entry into two and appending "canonical six" 5882192800Strasz * entries at the end. Don't allow for setting an ACL that would 5883192800Strasz * cause chmod(2) to run out of ACL entries. 5884192800Strasz */ 5885192800Strasz if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 5886192800Strasz return (ENOSPC); 5887192800Strasz 5888208030Strasz error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 5889208030Strasz if (error != 0) 5890208030Strasz return (error); 5891208030Strasz 5892192800Strasz vsecattr.vsa_mask = VSA_ACE; 5893192800Strasz aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t); 5894192800Strasz vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 5895192800Strasz aaclp = vsecattr.vsa_aclentp; 5896192800Strasz vsecattr.vsa_aclentsz = aclbsize; 5897192800Strasz 5898192800Strasz aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 5899192800Strasz error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL); 5900192800Strasz kmem_free(aaclp, aclbsize); 5901192800Strasz 5902192800Strasz return (error); 5903192800Strasz} 5904192800Strasz 5905192800Straszint 5906192800Straszzfs_freebsd_aclcheck(ap) 5907192800Strasz struct vop_aclcheck_args /* { 5908192800Strasz struct vnode *vp; 5909192800Strasz acl_type_t type; 5910192800Strasz struct acl *aclp; 5911192800Strasz struct ucred *cred; 5912192800Strasz struct thread *td; 5913192800Strasz } */ *ap; 5914192800Strasz{ 5915192800Strasz 5916192800Strasz return (EOPNOTSUPP); 5917192800Strasz} 5918192800Strasz 5919299906Savgstatic int 5920299906Savgzfs_vptocnp(struct vop_vptocnp_args *ap) 5921299906Savg{ 5922299906Savg vnode_t *covered_vp; 5923299906Savg vnode_t *vp = ap->a_vp;; 5924299906Savg zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 5925299906Savg znode_t *zp = VTOZ(vp); 5926299906Savg int ltype; 5927299906Savg int error; 5928299906Savg 5929301870Savg ZFS_ENTER(zfsvfs); 5930301870Savg ZFS_VERIFY_ZP(zp); 5931301870Savg 5932299906Savg /* 5933299906Savg * If we are a snapshot mounted under .zfs, run the operation 5934299906Savg * on the covered vnode. 5935299906Savg */ 5936324158Savg if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) { 5937307995Savg char name[MAXNAMLEN + 1]; 5938307995Savg znode_t *dzp; 5939307995Savg size_t len; 5940307995Savg 5941307995Savg error = zfs_znode_parent_and_name(zp, &dzp, name); 5942307995Savg if (error == 0) { 5943307995Savg len = strlen(name); 5944314030Savg if (*ap->a_buflen < len) 5945314030Savg error = SET_ERROR(ENOMEM); 5946314030Savg } 5947314030Savg if (error == 0) { 5948307995Savg *ap->a_buflen -= len; 5949307995Savg bcopy(name, ap->a_buf + *ap->a_buflen, len); 5950307995Savg *ap->a_vpp = ZTOV(dzp); 5951307995Savg } 5952301870Savg ZFS_EXIT(zfsvfs); 5953307995Savg return (error); 5954301870Savg } 5955301870Savg ZFS_EXIT(zfsvfs); 5956299906Savg 5957299906Savg covered_vp = vp->v_mount->mnt_vnodecovered; 5958299906Savg vhold(covered_vp); 5959299906Savg ltype = VOP_ISLOCKED(vp); 5960299906Savg VOP_UNLOCK(vp, 0); 5961315842Savg error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread); 5962299906Savg if (error == 0) { 5963299906Savg error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred, 5964299906Savg ap->a_buf, ap->a_buflen); 5965299906Savg vput(covered_vp); 5966299906Savg } 5967299906Savg vn_lock(vp, ltype | LK_RETRY); 5968299906Savg if ((vp->v_iflag & VI_DOOMED) != 0) 5969299906Savg error = SET_ERROR(ENOENT); 5970299906Savg return (error); 5971299906Savg} 5972299906Savg 5973303970Savg#ifdef DIAGNOSTIC 5974303970Savgstatic int 5975303970Savgzfs_lock(ap) 5976303970Savg struct vop_lock1_args /* { 5977303970Savg struct vnode *a_vp; 5978303970Savg int a_flags; 5979303970Savg char *file; 5980303970Savg int line; 5981303970Savg } */ *ap; 5982303970Savg{ 5983310066Savg vnode_t *vp; 5984303970Savg znode_t *zp; 5985303970Savg int err; 5986303970Savg 5987303970Savg err = vop_stdlock(ap); 5988310066Savg if (err == 0 && (ap->a_flags & LK_NOWAIT) == 0) { 5989310066Savg vp = ap->a_vp; 5990310066Savg zp = vp->v_data; 5991310066Savg if (vp->v_mount != NULL && (vp->v_iflag & VI_DOOMED) == 0 && 5992310066Savg zp != NULL && (zp->z_pflags & ZFS_XATTR) == 0) 5993310066Savg VERIFY(!RRM_LOCK_HELD(&zp->z_zfsvfs->z_teardown_lock)); 5994303970Savg } 5995303970Savg return (err); 5996303970Savg} 5997303970Savg#endif 5998303970Savg 5999168404Spjdstruct vop_vector zfs_vnodeops; 6000168404Spjdstruct vop_vector zfs_fifoops; 6001209962Smmstruct vop_vector zfs_shareops; 6002168404Spjd 6003168404Spjdstruct vop_vector zfs_vnodeops = { 6004185029Spjd .vop_default = &default_vnodeops, 6005185029Spjd .vop_inactive = zfs_freebsd_inactive, 6006185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 6007185029Spjd .vop_access = zfs_freebsd_access, 6008303970Savg .vop_lookup = zfs_cache_lookup, 6009185029Spjd .vop_cachedlookup = zfs_freebsd_lookup, 6010185029Spjd .vop_getattr = zfs_freebsd_getattr, 6011185029Spjd .vop_setattr = zfs_freebsd_setattr, 6012185029Spjd .vop_create = zfs_freebsd_create, 6013185029Spjd .vop_mknod = zfs_freebsd_create, 6014185029Spjd .vop_mkdir = zfs_freebsd_mkdir, 6015185029Spjd .vop_readdir = zfs_freebsd_readdir, 6016185029Spjd .vop_fsync = zfs_freebsd_fsync, 6017185029Spjd .vop_open = zfs_freebsd_open, 6018185029Spjd .vop_close = zfs_freebsd_close, 6019185029Spjd .vop_rmdir = zfs_freebsd_rmdir, 6020185029Spjd .vop_ioctl = zfs_freebsd_ioctl, 6021185029Spjd .vop_link = zfs_freebsd_link, 6022185029Spjd .vop_symlink = zfs_freebsd_symlink, 6023185029Spjd .vop_readlink = zfs_freebsd_readlink, 6024185029Spjd .vop_read = zfs_freebsd_read, 6025185029Spjd .vop_write = zfs_freebsd_write, 6026185029Spjd .vop_remove = zfs_freebsd_remove, 6027185029Spjd .vop_rename = zfs_freebsd_rename, 6028185029Spjd .vop_pathconf = zfs_freebsd_pathconf, 6029243518Savg .vop_bmap = zfs_freebsd_bmap, 6030185029Spjd .vop_fid = zfs_freebsd_fid, 6031185029Spjd .vop_getextattr = zfs_getextattr, 6032185029Spjd .vop_deleteextattr = zfs_deleteextattr, 6033185029Spjd .vop_setextattr = zfs_setextattr, 6034185029Spjd .vop_listextattr = zfs_listextattr, 6035192800Strasz .vop_getacl = zfs_freebsd_getacl, 6036192800Strasz .vop_setacl = zfs_freebsd_setacl, 6037192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 6038213937Savg .vop_getpages = zfs_freebsd_getpages, 6039258746Savg .vop_putpages = zfs_freebsd_putpages, 6040299906Savg .vop_vptocnp = zfs_vptocnp, 6041303970Savg#ifdef DIAGNOSTIC 6042303970Savg .vop_lock1 = zfs_lock, 6043303970Savg#endif 6044168404Spjd}; 6045168404Spjd 6046169170Spjdstruct vop_vector zfs_fifoops = { 6047185029Spjd .vop_default = &fifo_specops, 6048200162Skib .vop_fsync = zfs_freebsd_fsync, 6049185029Spjd .vop_access = zfs_freebsd_access, 6050185029Spjd .vop_getattr = zfs_freebsd_getattr, 6051185029Spjd .vop_inactive = zfs_freebsd_inactive, 6052185029Spjd .vop_read = VOP_PANIC, 6053185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 6054185029Spjd .vop_setattr = zfs_freebsd_setattr, 6055185029Spjd .vop_write = VOP_PANIC, 6056328298Sjhb .vop_pathconf = zfs_freebsd_pathconf, 6057185029Spjd .vop_fid = zfs_freebsd_fid, 6058192800Strasz .vop_getacl = zfs_freebsd_getacl, 6059192800Strasz .vop_setacl = zfs_freebsd_setacl, 6060192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 6061168404Spjd}; 6062209962Smm 6063209962Smm/* 6064209962Smm * special share hidden files vnode operations template 6065209962Smm */ 6066209962Smmstruct vop_vector zfs_shareops = { 6067209962Smm .vop_default = &default_vnodeops, 6068209962Smm .vop_access = zfs_freebsd_access, 6069209962Smm .vop_inactive = zfs_freebsd_inactive, 6070209962Smm .vop_reclaim = zfs_freebsd_reclaim, 6071209962Smm .vop_fid = zfs_freebsd_fid, 6072209962Smm .vop_pathconf = zfs_freebsd_pathconf, 6073209962Smm}; 6074