zfs_vnops.c revision 331017
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21321545Smav 22168404Spjd/* 23212694Smm * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24289562Smav * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 25296519Smav * Copyright (c) 2014 Integros [integros.com] 26321545Smav * Copyright 2017 Nexenta Systems, Inc. 27168404Spjd */ 28168404Spjd 29169195Spjd/* Portions Copyright 2007 Jeremy Teo */ 30219089Spjd/* Portions Copyright 2010 Robert Milkowski */ 31169195Spjd 32168404Spjd#include <sys/types.h> 33168404Spjd#include <sys/param.h> 34168404Spjd#include <sys/time.h> 35168404Spjd#include <sys/systm.h> 36168404Spjd#include <sys/sysmacros.h> 37168404Spjd#include <sys/resource.h> 38168404Spjd#include <sys/vfs.h> 39248084Sattilio#include <sys/vm.h> 40168404Spjd#include <sys/vnode.h> 41168404Spjd#include <sys/file.h> 42168404Spjd#include <sys/stat.h> 43168404Spjd#include <sys/kmem.h> 44168404Spjd#include <sys/taskq.h> 45168404Spjd#include <sys/uio.h> 46168404Spjd#include <sys/atomic.h> 47168404Spjd#include <sys/namei.h> 48168404Spjd#include <sys/mman.h> 49168404Spjd#include <sys/cmn_err.h> 50168404Spjd#include <sys/errno.h> 51168404Spjd#include <sys/unistd.h> 52168404Spjd#include <sys/zfs_dir.h> 53168404Spjd#include <sys/zfs_ioctl.h> 54168404Spjd#include <sys/fs/zfs.h> 55168404Spjd#include <sys/dmu.h> 56219089Spjd#include <sys/dmu_objset.h> 57168404Spjd#include <sys/spa.h> 58168404Spjd#include <sys/txg.h> 59168404Spjd#include <sys/dbuf.h> 60168404Spjd#include <sys/zap.h> 61219089Spjd#include <sys/sa.h> 62168404Spjd#include <sys/dirent.h> 63168962Spjd#include <sys/policy.h> 64168962Spjd#include <sys/sunddi.h> 65168404Spjd#include <sys/filio.h> 66209962Smm#include <sys/sid.h> 67168404Spjd#include <sys/zfs_ctldir.h> 68185029Spjd#include <sys/zfs_fuid.h> 69219089Spjd#include <sys/zfs_sa.h> 70168404Spjd#include <sys/zfs_rlock.h> 71185029Spjd#include <sys/extdirent.h> 72185029Spjd#include <sys/kidmap.h> 73168404Spjd#include <sys/bio.h> 74168404Spjd#include <sys/buf.h> 75168404Spjd#include <sys/sched.h> 76192800Strasz#include <sys/acl.h> 77331017Skevans#include <sys/vmmeter.h> 78239077Smarius#include <vm/vm_param.h> 79325132Savg#include <sys/zil.h> 80168404Spjd 81168404Spjd/* 82168404Spjd * Programming rules. 83168404Spjd * 84168404Spjd * Each vnode op performs some logical unit of work. To do this, the ZPL must 85168404Spjd * properly lock its in-core state, create a DMU transaction, do the work, 86168404Spjd * record this work in the intent log (ZIL), commit the DMU transaction, 87185029Spjd * and wait for the intent log to commit if it is a synchronous operation. 88185029Spjd * Moreover, the vnode ops must work in both normal and log replay context. 89168404Spjd * The ordering of events is important to avoid deadlocks and references 90168404Spjd * to freed memory. The example below illustrates the following Big Rules: 91168404Spjd * 92251631Sdelphij * (1) A check must be made in each zfs thread for a mounted file system. 93168404Spjd * This is done avoiding races using ZFS_ENTER(zfsvfs). 94251631Sdelphij * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 95251631Sdelphij * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 96251631Sdelphij * can return EIO from the calling function. 97168404Spjd * 98168404Spjd * (2) VN_RELE() should always be the last thing except for zil_commit() 99168404Spjd * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 100168404Spjd * First, if it's the last reference, the vnode/znode 101168404Spjd * can be freed, so the zp may point to freed memory. Second, the last 102168404Spjd * reference will call zfs_zinactive(), which may induce a lot of work -- 103168404Spjd * pushing cached pages (which acquires range locks) and syncing out 104168404Spjd * cached atime changes. Third, zfs_zinactive() may require a new tx, 105168404Spjd * which could deadlock the system if you were already holding one. 106191900Skmacy * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 107168404Spjd * 108168404Spjd * (3) All range locks must be grabbed before calling dmu_tx_assign(), 109168404Spjd * as they can span dmu_tx_assign() calls. 110168404Spjd * 111258720Savg * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 112258720Savg * dmu_tx_assign(). This is critical because we don't want to block 113258720Savg * while holding locks. 114168404Spjd * 115258720Savg * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 116258720Savg * reduces lock contention and CPU usage when we must wait (note that if 117258720Savg * throughput is constrained by the storage, nearly every transaction 118258720Savg * must wait). 119258720Savg * 120258720Savg * Note, in particular, that if a lock is sometimes acquired before 121258720Savg * the tx assigns, and sometimes after (e.g. z_lock), then failing 122258720Savg * to use a non-blocking assign can deadlock the system. The scenario: 123258720Savg * 124168404Spjd * Thread A has grabbed a lock before calling dmu_tx_assign(). 125168404Spjd * Thread B is in an already-assigned tx, and blocks for this lock. 126168404Spjd * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 127168404Spjd * forever, because the previous txg can't quiesce until B's tx commits. 128168404Spjd * 129168404Spjd * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 130258632Savg * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 131330986Savg * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT, 132258632Savg * to indicate that this operation has already called dmu_tx_wait(). 133258632Savg * This will ensure that we don't retry forever, waiting a short bit 134258632Savg * each time. 135168404Spjd * 136168404Spjd * (5) If the operation succeeded, generate the intent log entry for it 137168404Spjd * before dropping locks. This ensures that the ordering of events 138168404Spjd * in the intent log matches the order in which they actually occurred. 139251631Sdelphij * During ZIL replay the zfs_log_* functions will update the sequence 140209962Smm * number to indicate the zil transaction has replayed. 141168404Spjd * 142168404Spjd * (6) At the end of each vnode op, the DMU tx must always commit, 143168404Spjd * regardless of whether there were any errors. 144168404Spjd * 145219089Spjd * (7) After dropping all locks, invoke zil_commit(zilog, foid) 146168404Spjd * to ensure that synchronous semantics are provided when necessary. 147168404Spjd * 148168404Spjd * In general, this is how things should be ordered in each vnode op: 149168404Spjd * 150168404Spjd * ZFS_ENTER(zfsvfs); // exit if unmounted 151168404Spjd * top: 152303970Savg * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD()) 153168404Spjd * rw_enter(...); // grab any other locks you need 154168404Spjd * tx = dmu_tx_create(...); // get DMU tx 155168404Spjd * dmu_tx_hold_*(); // hold each object you might modify 156330986Savg * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); 157168404Spjd * if (error) { 158168404Spjd * rw_exit(...); // drop locks 159168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 160168404Spjd * VN_RELE(...); // release held vnodes 161209962Smm * if (error == ERESTART) { 162258632Savg * waited = B_TRUE; 163168404Spjd * dmu_tx_wait(tx); 164168404Spjd * dmu_tx_abort(tx); 165168404Spjd * goto top; 166168404Spjd * } 167168404Spjd * dmu_tx_abort(tx); // abort DMU tx 168168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 169168404Spjd * return (error); // really out of space 170168404Spjd * } 171168404Spjd * error = do_real_work(); // do whatever this VOP does 172168404Spjd * if (error == 0) 173168404Spjd * zfs_log_*(...); // on success, make ZIL entry 174168404Spjd * dmu_tx_commit(tx); // commit DMU tx -- error or not 175168404Spjd * rw_exit(...); // drop locks 176168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 177168404Spjd * VN_RELE(...); // release held vnodes 178219089Spjd * zil_commit(zilog, foid); // synchronous when necessary 179168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 180168404Spjd * return (error); // done, report error 181168404Spjd */ 182185029Spjd 183168404Spjd/* ARGSUSED */ 184168404Spjdstatic int 185185029Spjdzfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 186168404Spjd{ 187168962Spjd znode_t *zp = VTOZ(*vpp); 188209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 189168404Spjd 190209962Smm ZFS_ENTER(zfsvfs); 191209962Smm ZFS_VERIFY_ZP(zp); 192209962Smm 193219089Spjd if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 194185029Spjd ((flag & FAPPEND) == 0)) { 195209962Smm ZFS_EXIT(zfsvfs); 196249195Smm return (SET_ERROR(EPERM)); 197185029Spjd } 198185029Spjd 199185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 200185029Spjd ZTOV(zp)->v_type == VREG && 201219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 202209962Smm if (fs_vscan(*vpp, cr, 0) != 0) { 203209962Smm ZFS_EXIT(zfsvfs); 204249195Smm return (SET_ERROR(EACCES)); 205209962Smm } 206209962Smm } 207185029Spjd 208168404Spjd /* Keep a count of the synchronous opens in the znode */ 209168962Spjd if (flag & (FSYNC | FDSYNC)) 210168404Spjd atomic_inc_32(&zp->z_sync_cnt); 211185029Spjd 212209962Smm ZFS_EXIT(zfsvfs); 213168404Spjd return (0); 214168404Spjd} 215168404Spjd 216168404Spjd/* ARGSUSED */ 217168404Spjdstatic int 218185029Spjdzfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 219185029Spjd caller_context_t *ct) 220168404Spjd{ 221168962Spjd znode_t *zp = VTOZ(vp); 222209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 223168404Spjd 224210470Smm /* 225210470Smm * Clean up any locks held by this process on the vp. 226210470Smm */ 227210470Smm cleanlocks(vp, ddi_get_pid(), 0); 228210470Smm cleanshares(vp, ddi_get_pid()); 229210470Smm 230209962Smm ZFS_ENTER(zfsvfs); 231209962Smm ZFS_VERIFY_ZP(zp); 232209962Smm 233168404Spjd /* Decrement the synchronous opens in the znode */ 234185029Spjd if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 235168404Spjd atomic_dec_32(&zp->z_sync_cnt); 236168404Spjd 237185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 238185029Spjd ZTOV(zp)->v_type == VREG && 239219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 240185029Spjd VERIFY(fs_vscan(vp, cr, 1) == 0); 241185029Spjd 242209962Smm ZFS_EXIT(zfsvfs); 243168404Spjd return (0); 244168404Spjd} 245168404Spjd 246168404Spjd/* 247168404Spjd * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 248168404Spjd * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 249168404Spjd */ 250168404Spjdstatic int 251168978Spjdzfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 252168404Spjd{ 253168404Spjd znode_t *zp = VTOZ(vp); 254168404Spjd uint64_t noff = (uint64_t)*off; /* new offset */ 255168404Spjd uint64_t file_sz; 256168404Spjd int error; 257168404Spjd boolean_t hole; 258168404Spjd 259219089Spjd file_sz = zp->z_size; 260168404Spjd if (noff >= file_sz) { 261249195Smm return (SET_ERROR(ENXIO)); 262168404Spjd } 263168404Spjd 264168962Spjd if (cmd == _FIO_SEEK_HOLE) 265168404Spjd hole = B_TRUE; 266168404Spjd else 267168404Spjd hole = B_FALSE; 268168404Spjd 269168404Spjd error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 270168404Spjd 271271536Sdelphij if (error == ESRCH) 272249195Smm return (SET_ERROR(ENXIO)); 273271536Sdelphij 274271536Sdelphij /* 275271536Sdelphij * We could find a hole that begins after the logical end-of-file, 276271536Sdelphij * because dmu_offset_next() only works on whole blocks. If the 277271536Sdelphij * EOF falls mid-block, then indicate that the "virtual hole" 278271536Sdelphij * at the end of the file begins at the logical EOF, rather than 279271536Sdelphij * at the end of the last block. 280271536Sdelphij */ 281271536Sdelphij if (noff > file_sz) { 282271536Sdelphij ASSERT(hole); 283271536Sdelphij noff = file_sz; 284168404Spjd } 285168404Spjd 286168404Spjd if (noff < *off) 287168404Spjd return (error); 288168404Spjd *off = noff; 289168404Spjd return (error); 290168404Spjd} 291168404Spjd 292168404Spjd/* ARGSUSED */ 293168404Spjdstatic int 294168978Spjdzfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 295185029Spjd int *rvalp, caller_context_t *ct) 296168404Spjd{ 297168962Spjd offset_t off; 298287103Savg offset_t ndata; 299287103Savg dmu_object_info_t doi; 300168962Spjd int error; 301168962Spjd zfsvfs_t *zfsvfs; 302185029Spjd znode_t *zp; 303168404Spjd 304168404Spjd switch (com) { 305185029Spjd case _FIOFFS: 306287103Savg { 307168962Spjd return (0); 308168404Spjd 309168962Spjd /* 310168962Spjd * The following two ioctls are used by bfu. Faking out, 311168962Spjd * necessary to avoid bfu errors. 312168962Spjd */ 313287103Savg } 314185029Spjd case _FIOGDIO: 315185029Spjd case _FIOSDIO: 316287103Savg { 317168962Spjd return (0); 318287103Savg } 319168962Spjd 320185029Spjd case _FIO_SEEK_DATA: 321185029Spjd case _FIO_SEEK_HOLE: 322287103Savg { 323277300Ssmh#ifdef illumos 324168962Spjd if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 325249195Smm return (SET_ERROR(EFAULT)); 326233918Savg#else 327233918Savg off = *(offset_t *)data; 328233918Savg#endif 329185029Spjd zp = VTOZ(vp); 330185029Spjd zfsvfs = zp->z_zfsvfs; 331168404Spjd ZFS_ENTER(zfsvfs); 332185029Spjd ZFS_VERIFY_ZP(zp); 333168404Spjd 334168404Spjd /* offset parameter is in/out */ 335168404Spjd error = zfs_holey(vp, com, &off); 336168404Spjd ZFS_EXIT(zfsvfs); 337168404Spjd if (error) 338168404Spjd return (error); 339277300Ssmh#ifdef illumos 340168962Spjd if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 341249195Smm return (SET_ERROR(EFAULT)); 342233918Savg#else 343233918Savg *(offset_t *)data = off; 344233918Savg#endif 345168404Spjd return (0); 346168404Spjd } 347287103Savg#ifdef illumos 348287103Savg case _FIO_COUNT_FILLED: 349287103Savg { 350287103Savg /* 351287103Savg * _FIO_COUNT_FILLED adds a new ioctl command which 352287103Savg * exposes the number of filled blocks in a 353287103Savg * ZFS object. 354287103Savg */ 355287103Savg zp = VTOZ(vp); 356287103Savg zfsvfs = zp->z_zfsvfs; 357287103Savg ZFS_ENTER(zfsvfs); 358287103Savg ZFS_VERIFY_ZP(zp); 359287103Savg 360287103Savg /* 361287103Savg * Wait for all dirty blocks for this object 362287103Savg * to get synced out to disk, and the DMU info 363287103Savg * updated. 364287103Savg */ 365287103Savg error = dmu_object_wait_synced(zfsvfs->z_os, zp->z_id); 366287103Savg if (error) { 367287103Savg ZFS_EXIT(zfsvfs); 368287103Savg return (error); 369287103Savg } 370287103Savg 371287103Savg /* 372287103Savg * Retrieve fill count from DMU object. 373287103Savg */ 374287103Savg error = dmu_object_info(zfsvfs->z_os, zp->z_id, &doi); 375287103Savg if (error) { 376287103Savg ZFS_EXIT(zfsvfs); 377287103Savg return (error); 378287103Savg } 379287103Savg 380287103Savg ndata = doi.doi_fill_count; 381287103Savg 382287103Savg ZFS_EXIT(zfsvfs); 383287103Savg if (ddi_copyout(&ndata, (void *)data, sizeof (ndata), flag)) 384287103Savg return (SET_ERROR(EFAULT)); 385287103Savg return (0); 386287103Savg } 387287103Savg#endif 388287103Savg } 389249195Smm return (SET_ERROR(ENOTTY)); 390168404Spjd} 391168404Spjd 392209962Smmstatic vm_page_t 393253953Sattiliopage_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 394209962Smm{ 395209962Smm vm_object_t obj; 396209962Smm vm_page_t pp; 397258353Savg int64_t end; 398209962Smm 399258353Savg /* 400258353Savg * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE 401258353Savg * aligned boundaries, if the range is not aligned. As a result a 402258353Savg * DEV_BSIZE subrange with partially dirty data may get marked as clean. 403258353Savg * It may happen that all DEV_BSIZE subranges are marked clean and thus 404258353Savg * the whole page would be considred clean despite have some dirty data. 405258353Savg * For this reason we should shrink the range to DEV_BSIZE aligned 406258353Savg * boundaries before calling vm_page_clear_dirty. 407258353Savg */ 408258353Savg end = rounddown2(off + nbytes, DEV_BSIZE); 409258353Savg off = roundup2(off, DEV_BSIZE); 410258353Savg nbytes = end - off; 411258353Savg 412209962Smm obj = vp->v_object; 413248084Sattilio zfs_vmobject_assert_wlocked(obj); 414209962Smm 415209962Smm for (;;) { 416209962Smm if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 417246293Savg pp->valid) { 418254138Sattilio if (vm_page_xbusied(pp)) { 419212652Savg /* 420212652Savg * Reference the page before unlocking and 421212652Savg * sleeping so that the page daemon is less 422212652Savg * likely to reclaim it. 423212652Savg */ 424225418Skib vm_page_reference(pp); 425254138Sattilio vm_page_lock(pp); 426254138Sattilio zfs_vmobject_wunlock(obj); 427307671Skib vm_page_busy_sleep(pp, "zfsmwb", true); 428254138Sattilio zfs_vmobject_wlock(obj); 429209962Smm continue; 430212652Savg } 431254138Sattilio vm_page_sbusy(pp); 432319091Savg } else if (pp != NULL) { 433319091Savg ASSERT(!pp->valid); 434252337Sgavin pp = NULL; 435209962Smm } 436246293Savg 437246293Savg if (pp != NULL) { 438246293Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 439253953Sattilio vm_object_pip_add(obj, 1); 440246293Savg pmap_remove_write(pp); 441258353Savg if (nbytes != 0) 442258353Savg vm_page_clear_dirty(pp, off, nbytes); 443246293Savg } 444209962Smm break; 445209962Smm } 446209962Smm return (pp); 447209962Smm} 448209962Smm 449209962Smmstatic void 450253953Sattiliopage_unbusy(vm_page_t pp) 451209962Smm{ 452209962Smm 453254138Sattilio vm_page_sunbusy(pp); 454253953Sattilio vm_object_pip_subtract(pp->object, 1); 455209962Smm} 456209962Smm 457253953Sattiliostatic vm_page_t 458253953Sattiliopage_hold(vnode_t *vp, int64_t start) 459253953Sattilio{ 460253953Sattilio vm_object_t obj; 461253953Sattilio vm_page_t pp; 462253953Sattilio 463253953Sattilio obj = vp->v_object; 464253953Sattilio zfs_vmobject_assert_wlocked(obj); 465253953Sattilio 466253953Sattilio for (;;) { 467253953Sattilio if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 468253953Sattilio pp->valid) { 469254138Sattilio if (vm_page_xbusied(pp)) { 470253953Sattilio /* 471253953Sattilio * Reference the page before unlocking and 472253953Sattilio * sleeping so that the page daemon is less 473253953Sattilio * likely to reclaim it. 474253953Sattilio */ 475253953Sattilio vm_page_reference(pp); 476254138Sattilio vm_page_lock(pp); 477254138Sattilio zfs_vmobject_wunlock(obj); 478307671Skib vm_page_busy_sleep(pp, "zfsmwb", true); 479254138Sattilio zfs_vmobject_wlock(obj); 480253953Sattilio continue; 481253953Sattilio } 482253953Sattilio 483253953Sattilio ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 484253953Sattilio vm_page_lock(pp); 485253953Sattilio vm_page_hold(pp); 486253953Sattilio vm_page_unlock(pp); 487253953Sattilio 488253953Sattilio } else 489253953Sattilio pp = NULL; 490253953Sattilio break; 491253953Sattilio } 492253953Sattilio return (pp); 493253953Sattilio} 494253953Sattilio 495253953Sattiliostatic void 496253953Sattiliopage_unhold(vm_page_t pp) 497253953Sattilio{ 498253953Sattilio 499253953Sattilio vm_page_lock(pp); 500253953Sattilio vm_page_unhold(pp); 501253953Sattilio vm_page_unlock(pp); 502253953Sattilio} 503253953Sattilio 504168404Spjd/* 505168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 506168404Spjd * between the DMU cache and the memory mapped pages. What this means: 507168404Spjd * 508168404Spjd * On Write: If we find a memory mapped page, we write to *both* 509168404Spjd * the page and the dmu buffer. 510168404Spjd */ 511209962Smmstatic void 512209962Smmupdate_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 513209962Smm int segflg, dmu_tx_t *tx) 514168404Spjd{ 515168404Spjd vm_object_t obj; 516168404Spjd struct sf_buf *sf; 517246293Savg caddr_t va; 518212655Savg int off; 519168404Spjd 520258746Savg ASSERT(segflg != UIO_NOCOPY); 521168404Spjd ASSERT(vp->v_mount != NULL); 522168404Spjd obj = vp->v_object; 523168404Spjd ASSERT(obj != NULL); 524168404Spjd 525168404Spjd off = start & PAGEOFFSET; 526248084Sattilio zfs_vmobject_wlock(obj); 527168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 528209962Smm vm_page_t pp; 529246293Savg int nbytes = imin(PAGESIZE - off, len); 530168404Spjd 531258746Savg if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 532248084Sattilio zfs_vmobject_wunlock(obj); 533168404Spjd 534246293Savg va = zfs_map_page(pp, &sf); 535246293Savg (void) dmu_read(os, oid, start+off, nbytes, 536246293Savg va+off, DMU_READ_PREFETCH);; 537209962Smm zfs_unmap_page(sf); 538246293Savg 539248084Sattilio zfs_vmobject_wlock(obj); 540253953Sattilio page_unbusy(pp); 541168404Spjd } 542209962Smm len -= nbytes; 543168404Spjd off = 0; 544168404Spjd } 545258746Savg vm_object_pip_wakeupn(obj, 0); 546248084Sattilio zfs_vmobject_wunlock(obj); 547168404Spjd} 548168404Spjd 549168404Spjd/* 550219089Spjd * Read with UIO_NOCOPY flag means that sendfile(2) requests 551219089Spjd * ZFS to populate a range of page cache pages with data. 552219089Spjd * 553219089Spjd * NOTE: this function could be optimized to pre-allocate 554254138Sattilio * all pages in advance, drain exclusive busy on all of them, 555219089Spjd * map them into contiguous KVA region and populate them 556219089Spjd * in one single dmu_read() call. 557219089Spjd */ 558219089Spjdstatic int 559219089Spjdmappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) 560219089Spjd{ 561219089Spjd znode_t *zp = VTOZ(vp); 562219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 563219089Spjd struct sf_buf *sf; 564219089Spjd vm_object_t obj; 565219089Spjd vm_page_t pp; 566219089Spjd int64_t start; 567219089Spjd caddr_t va; 568219089Spjd int len = nbytes; 569219089Spjd int off; 570219089Spjd int error = 0; 571219089Spjd 572219089Spjd ASSERT(uio->uio_segflg == UIO_NOCOPY); 573219089Spjd ASSERT(vp->v_mount != NULL); 574219089Spjd obj = vp->v_object; 575219089Spjd ASSERT(obj != NULL); 576219089Spjd ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); 577219089Spjd 578248084Sattilio zfs_vmobject_wlock(obj); 579219089Spjd for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { 580219089Spjd int bytes = MIN(PAGESIZE, len); 581219089Spjd 582254138Sattilio pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY | 583254649Skib VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 584219089Spjd if (pp->valid == 0) { 585248084Sattilio zfs_vmobject_wunlock(obj); 586219089Spjd va = zfs_map_page(pp, &sf); 587219089Spjd error = dmu_read(os, zp->z_id, start, bytes, va, 588219089Spjd DMU_READ_PREFETCH); 589219089Spjd if (bytes != PAGESIZE && error == 0) 590219089Spjd bzero(va + bytes, PAGESIZE - bytes); 591219089Spjd zfs_unmap_page(sf); 592248084Sattilio zfs_vmobject_wlock(obj); 593254138Sattilio vm_page_sunbusy(pp); 594219089Spjd vm_page_lock(pp); 595219089Spjd if (error) { 596253073Savg if (pp->wire_count == 0 && pp->valid == 0 && 597254138Sattilio !vm_page_busied(pp)) 598253073Savg vm_page_free(pp); 599219089Spjd } else { 600219089Spjd pp->valid = VM_PAGE_BITS_ALL; 601219089Spjd vm_page_activate(pp); 602219089Spjd } 603219089Spjd vm_page_unlock(pp); 604258739Savg } else { 605258739Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 606254138Sattilio vm_page_sunbusy(pp); 607258739Savg } 608219089Spjd if (error) 609219089Spjd break; 610219089Spjd uio->uio_resid -= bytes; 611219089Spjd uio->uio_offset += bytes; 612219089Spjd len -= bytes; 613219089Spjd } 614248084Sattilio zfs_vmobject_wunlock(obj); 615219089Spjd return (error); 616219089Spjd} 617219089Spjd 618219089Spjd/* 619168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 620168404Spjd * between the DMU cache and the memory mapped pages. What this means: 621168404Spjd * 622168404Spjd * On Read: We "read" preferentially from memory mapped pages, 623168404Spjd * else we default from the dmu buffer. 624168404Spjd * 625168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 626251631Sdelphij * the file is memory mapped. 627168404Spjd */ 628168404Spjdstatic int 629168404Spjdmappedread(vnode_t *vp, int nbytes, uio_t *uio) 630168404Spjd{ 631168404Spjd znode_t *zp = VTOZ(vp); 632168404Spjd vm_object_t obj; 633212655Savg int64_t start; 634168926Spjd caddr_t va; 635168404Spjd int len = nbytes; 636212655Savg int off; 637168404Spjd int error = 0; 638168404Spjd 639168404Spjd ASSERT(vp->v_mount != NULL); 640168404Spjd obj = vp->v_object; 641168404Spjd ASSERT(obj != NULL); 642168404Spjd 643168404Spjd start = uio->uio_loffset; 644168404Spjd off = start & PAGEOFFSET; 645248084Sattilio zfs_vmobject_wlock(obj); 646168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 647219089Spjd vm_page_t pp; 648219089Spjd uint64_t bytes = MIN(PAGESIZE - off, len); 649168404Spjd 650253953Sattilio if (pp = page_hold(vp, start)) { 651219089Spjd struct sf_buf *sf; 652219089Spjd caddr_t va; 653212652Savg 654248084Sattilio zfs_vmobject_wunlock(obj); 655219089Spjd va = zfs_map_page(pp, &sf); 656298105Savg#ifdef illumos 657219089Spjd error = uiomove(va + off, bytes, UIO_READ, uio); 658298105Savg#else 659298105Savg error = vn_io_fault_uiomove(va + off, bytes, uio); 660298105Savg#endif 661219089Spjd zfs_unmap_page(sf); 662248084Sattilio zfs_vmobject_wlock(obj); 663253953Sattilio page_unhold(pp); 664219089Spjd } else { 665248084Sattilio zfs_vmobject_wunlock(obj); 666272809Sdelphij error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 667272809Sdelphij uio, bytes); 668248084Sattilio zfs_vmobject_wlock(obj); 669168404Spjd } 670168404Spjd len -= bytes; 671168404Spjd off = 0; 672168404Spjd if (error) 673168404Spjd break; 674168404Spjd } 675248084Sattilio zfs_vmobject_wunlock(obj); 676168404Spjd return (error); 677168404Spjd} 678168404Spjd 679168404Spjdoffset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 680168404Spjd 681168404Spjd/* 682168404Spjd * Read bytes from specified file into supplied buffer. 683168404Spjd * 684168404Spjd * IN: vp - vnode of file to be read from. 685168404Spjd * uio - structure supplying read location, range info, 686168404Spjd * and return buffer. 687168404Spjd * ioflag - SYNC flags; used to provide FRSYNC semantics. 688168404Spjd * cr - credentials of caller. 689185029Spjd * ct - caller context 690168404Spjd * 691168404Spjd * OUT: uio - updated offset and range, buffer filled. 692168404Spjd * 693251631Sdelphij * RETURN: 0 on success, error code on failure. 694168404Spjd * 695168404Spjd * Side Effects: 696168404Spjd * vp - atime updated if byte count > 0 697168404Spjd */ 698168404Spjd/* ARGSUSED */ 699168404Spjdstatic int 700168962Spjdzfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 701168404Spjd{ 702168404Spjd znode_t *zp = VTOZ(vp); 703168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 704168404Spjd ssize_t n, nbytes; 705247187Smm int error = 0; 706168404Spjd rl_t *rl; 707219089Spjd xuio_t *xuio = NULL; 708168404Spjd 709168404Spjd ZFS_ENTER(zfsvfs); 710185029Spjd ZFS_VERIFY_ZP(zp); 711168404Spjd 712219089Spjd if (zp->z_pflags & ZFS_AV_QUARANTINED) { 713185029Spjd ZFS_EXIT(zfsvfs); 714249195Smm return (SET_ERROR(EACCES)); 715185029Spjd } 716185029Spjd 717168404Spjd /* 718168404Spjd * Validate file offset 719168404Spjd */ 720168404Spjd if (uio->uio_loffset < (offset_t)0) { 721168404Spjd ZFS_EXIT(zfsvfs); 722249195Smm return (SET_ERROR(EINVAL)); 723168404Spjd } 724168404Spjd 725168404Spjd /* 726168404Spjd * Fasttrack empty reads 727168404Spjd */ 728168404Spjd if (uio->uio_resid == 0) { 729168404Spjd ZFS_EXIT(zfsvfs); 730168404Spjd return (0); 731168404Spjd } 732168404Spjd 733168404Spjd /* 734168962Spjd * Check for mandatory locks 735168962Spjd */ 736219089Spjd if (MANDMODE(zp->z_mode)) { 737168962Spjd if (error = chklock(vp, FREAD, 738168962Spjd uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 739168962Spjd ZFS_EXIT(zfsvfs); 740168962Spjd return (error); 741168962Spjd } 742168962Spjd } 743168962Spjd 744168962Spjd /* 745168404Spjd * If we're in FRSYNC mode, sync out this znode before reading it. 746168404Spjd */ 747224605Smm if (zfsvfs->z_log && 748224605Smm (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) 749219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 750168404Spjd 751168404Spjd /* 752168404Spjd * Lock the range against changes. 753168404Spjd */ 754168404Spjd rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 755168404Spjd 756168404Spjd /* 757168404Spjd * If we are reading past end-of-file we can skip 758168404Spjd * to the end; but we might still need to set atime. 759168404Spjd */ 760219089Spjd if (uio->uio_loffset >= zp->z_size) { 761168404Spjd error = 0; 762168404Spjd goto out; 763168404Spjd } 764168404Spjd 765219089Spjd ASSERT(uio->uio_loffset < zp->z_size); 766219089Spjd n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 767168404Spjd 768277300Ssmh#ifdef illumos 769219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 770219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 771219089Spjd int nblk; 772219089Spjd int blksz = zp->z_blksz; 773219089Spjd uint64_t offset = uio->uio_loffset; 774219089Spjd 775219089Spjd xuio = (xuio_t *)uio; 776219089Spjd if ((ISP2(blksz))) { 777219089Spjd nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 778219089Spjd blksz)) / blksz; 779219089Spjd } else { 780219089Spjd ASSERT(offset + n <= blksz); 781219089Spjd nblk = 1; 782219089Spjd } 783219089Spjd (void) dmu_xuio_init(xuio, nblk); 784219089Spjd 785219089Spjd if (vn_has_cached_data(vp)) { 786219089Spjd /* 787219089Spjd * For simplicity, we always allocate a full buffer 788219089Spjd * even if we only expect to read a portion of a block. 789219089Spjd */ 790219089Spjd while (--nblk >= 0) { 791219089Spjd (void) dmu_xuio_add(xuio, 792219089Spjd dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 793219089Spjd blksz), 0, blksz); 794219089Spjd } 795219089Spjd } 796219089Spjd } 797277300Ssmh#endif /* illumos */ 798219089Spjd 799168404Spjd while (n > 0) { 800168404Spjd nbytes = MIN(n, zfs_read_chunk_size - 801168404Spjd P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 802168404Spjd 803219089Spjd#ifdef __FreeBSD__ 804219089Spjd if (uio->uio_segflg == UIO_NOCOPY) 805219089Spjd error = mappedread_sf(vp, nbytes, uio); 806219089Spjd else 807219089Spjd#endif /* __FreeBSD__ */ 808272809Sdelphij if (vn_has_cached_data(vp)) { 809168404Spjd error = mappedread(vp, nbytes, uio); 810272809Sdelphij } else { 811272809Sdelphij error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 812272809Sdelphij uio, nbytes); 813272809Sdelphij } 814185029Spjd if (error) { 815185029Spjd /* convert checksum errors into IO errors */ 816185029Spjd if (error == ECKSUM) 817249195Smm error = SET_ERROR(EIO); 818168404Spjd break; 819185029Spjd } 820168962Spjd 821168404Spjd n -= nbytes; 822168404Spjd } 823168404Spjdout: 824168404Spjd zfs_range_unlock(rl); 825168404Spjd 826168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 827168404Spjd ZFS_EXIT(zfsvfs); 828168404Spjd return (error); 829168404Spjd} 830168404Spjd 831168404Spjd/* 832168404Spjd * Write the bytes to a file. 833168404Spjd * 834168404Spjd * IN: vp - vnode of file to be written to. 835168404Spjd * uio - structure supplying write location, range info, 836168404Spjd * and data buffer. 837251631Sdelphij * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 838251631Sdelphij * set if in append mode. 839168404Spjd * cr - credentials of caller. 840185029Spjd * ct - caller context (NFS/CIFS fem monitor only) 841168404Spjd * 842168404Spjd * OUT: uio - updated offset and range. 843168404Spjd * 844251631Sdelphij * RETURN: 0 on success, error code on failure. 845168404Spjd * 846168404Spjd * Timestamps: 847168404Spjd * vp - ctime|mtime updated if byte count > 0 848168404Spjd */ 849219089Spjd 850168404Spjd/* ARGSUSED */ 851168404Spjdstatic int 852168962Spjdzfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 853168404Spjd{ 854168404Spjd znode_t *zp = VTOZ(vp); 855168962Spjd rlim64_t limit = MAXOFFSET_T; 856168404Spjd ssize_t start_resid = uio->uio_resid; 857168404Spjd ssize_t tx_bytes; 858168404Spjd uint64_t end_size; 859168404Spjd dmu_tx_t *tx; 860168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 861185029Spjd zilog_t *zilog; 862168404Spjd offset_t woff; 863168404Spjd ssize_t n, nbytes; 864168404Spjd rl_t *rl; 865168404Spjd int max_blksz = zfsvfs->z_max_blksz; 866247187Smm int error = 0; 867209962Smm arc_buf_t *abuf; 868247187Smm iovec_t *aiov = NULL; 869219089Spjd xuio_t *xuio = NULL; 870219089Spjd int i_iov = 0; 871219089Spjd int iovcnt = uio->uio_iovcnt; 872219089Spjd iovec_t *iovp = uio->uio_iov; 873219089Spjd int write_eof; 874219089Spjd int count = 0; 875219089Spjd sa_bulk_attr_t bulk[4]; 876219089Spjd uint64_t mtime[2], ctime[2]; 877168404Spjd 878168404Spjd /* 879168404Spjd * Fasttrack empty write 880168404Spjd */ 881168404Spjd n = start_resid; 882168404Spjd if (n == 0) 883168404Spjd return (0); 884168404Spjd 885168962Spjd if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 886168962Spjd limit = MAXOFFSET_T; 887168962Spjd 888168404Spjd ZFS_ENTER(zfsvfs); 889185029Spjd ZFS_VERIFY_ZP(zp); 890168404Spjd 891219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 892219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 893219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 894219089Spjd &zp->z_size, 8); 895219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 896219089Spjd &zp->z_pflags, 8); 897219089Spjd 898168404Spjd /* 899262990Sdelphij * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 900262990Sdelphij * callers might not be able to detect properly that we are read-only, 901262990Sdelphij * so check it explicitly here. 902262990Sdelphij */ 903262990Sdelphij if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 904262990Sdelphij ZFS_EXIT(zfsvfs); 905262990Sdelphij return (SET_ERROR(EROFS)); 906262990Sdelphij } 907262990Sdelphij 908262990Sdelphij /* 909321579Smav * If immutable or not appending then return EPERM. 910321579Smav * Intentionally allow ZFS_READONLY through here. 911321579Smav * See zfs_zaccess_common() 912185029Spjd */ 913321579Smav if ((zp->z_pflags & ZFS_IMMUTABLE) || 914219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 915219089Spjd (uio->uio_loffset < zp->z_size))) { 916185029Spjd ZFS_EXIT(zfsvfs); 917249195Smm return (SET_ERROR(EPERM)); 918185029Spjd } 919185029Spjd 920185029Spjd zilog = zfsvfs->z_log; 921185029Spjd 922185029Spjd /* 923219089Spjd * Validate file offset 924219089Spjd */ 925219089Spjd woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 926219089Spjd if (woff < 0) { 927219089Spjd ZFS_EXIT(zfsvfs); 928249195Smm return (SET_ERROR(EINVAL)); 929219089Spjd } 930219089Spjd 931219089Spjd /* 932219089Spjd * Check for mandatory locks before calling zfs_range_lock() 933219089Spjd * in order to prevent a deadlock with locks set via fcntl(). 934219089Spjd */ 935219089Spjd if (MANDMODE((mode_t)zp->z_mode) && 936219089Spjd (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 937219089Spjd ZFS_EXIT(zfsvfs); 938219089Spjd return (error); 939219089Spjd } 940219089Spjd 941277300Ssmh#ifdef illumos 942219089Spjd /* 943168404Spjd * Pre-fault the pages to ensure slow (eg NFS) pages 944168404Spjd * don't hold up txg. 945219089Spjd * Skip this if uio contains loaned arc_buf. 946168404Spjd */ 947219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 948219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 949219089Spjd xuio = (xuio_t *)uio; 950219089Spjd else 951219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 952277300Ssmh#endif 953168404Spjd 954168404Spjd /* 955168404Spjd * If in append mode, set the io offset pointer to eof. 956168404Spjd */ 957213673Spjd if (ioflag & FAPPEND) { 958168404Spjd /* 959219089Spjd * Obtain an appending range lock to guarantee file append 960219089Spjd * semantics. We reset the write offset once we have the lock. 961168404Spjd */ 962168404Spjd rl = zfs_range_lock(zp, 0, n, RL_APPEND); 963219089Spjd woff = rl->r_off; 964168404Spjd if (rl->r_len == UINT64_MAX) { 965219089Spjd /* 966219089Spjd * We overlocked the file because this write will cause 967219089Spjd * the file block size to increase. 968219089Spjd * Note that zp_size cannot change with this lock held. 969219089Spjd */ 970219089Spjd woff = zp->z_size; 971168404Spjd } 972219089Spjd uio->uio_loffset = woff; 973168404Spjd } else { 974168404Spjd /* 975219089Spjd * Note that if the file block size will change as a result of 976219089Spjd * this write, then this range lock will lock the entire file 977219089Spjd * so that we can re-write the block safely. 978168404Spjd */ 979168404Spjd rl = zfs_range_lock(zp, woff, n, RL_WRITER); 980168404Spjd } 981168404Spjd 982235781Strasz if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { 983235781Strasz zfs_range_unlock(rl); 984235781Strasz ZFS_EXIT(zfsvfs); 985235781Strasz return (EFBIG); 986235781Strasz } 987235781Strasz 988168962Spjd if (woff >= limit) { 989168962Spjd zfs_range_unlock(rl); 990168962Spjd ZFS_EXIT(zfsvfs); 991249195Smm return (SET_ERROR(EFBIG)); 992168962Spjd } 993168962Spjd 994168962Spjd if ((woff + n) > limit || woff > (limit - n)) 995168962Spjd n = limit - woff; 996168962Spjd 997219089Spjd /* Will this write extend the file length? */ 998219089Spjd write_eof = (woff + n > zp->z_size); 999168404Spjd 1000219089Spjd end_size = MAX(zp->z_size, woff + n); 1001219089Spjd 1002168404Spjd /* 1003168404Spjd * Write the file in reasonable size chunks. Each chunk is written 1004168404Spjd * in a separate transaction; this keeps the intent log records small 1005168404Spjd * and allows us to do more fine-grained space accounting. 1006168404Spjd */ 1007168404Spjd while (n > 0) { 1008209962Smm abuf = NULL; 1009209962Smm woff = uio->uio_loffset; 1010219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 1011219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 1012209962Smm if (abuf != NULL) 1013209962Smm dmu_return_arcbuf(abuf); 1014249195Smm error = SET_ERROR(EDQUOT); 1015209962Smm break; 1016209962Smm } 1017209962Smm 1018219089Spjd if (xuio && abuf == NULL) { 1019219089Spjd ASSERT(i_iov < iovcnt); 1020219089Spjd aiov = &iovp[i_iov]; 1021219089Spjd abuf = dmu_xuio_arcbuf(xuio, i_iov); 1022219089Spjd dmu_xuio_clear(xuio, i_iov); 1023219089Spjd DTRACE_PROBE3(zfs_cp_write, int, i_iov, 1024219089Spjd iovec_t *, aiov, arc_buf_t *, abuf); 1025219089Spjd ASSERT((aiov->iov_base == abuf->b_data) || 1026219089Spjd ((char *)aiov->iov_base - (char *)abuf->b_data + 1027219089Spjd aiov->iov_len == arc_buf_size(abuf))); 1028219089Spjd i_iov++; 1029219089Spjd } else if (abuf == NULL && n >= max_blksz && 1030219089Spjd woff >= zp->z_size && 1031209962Smm P2PHASE(woff, max_blksz) == 0 && 1032209962Smm zp->z_blksz == max_blksz) { 1033219089Spjd /* 1034219089Spjd * This write covers a full block. "Borrow" a buffer 1035219089Spjd * from the dmu so that we can fill it before we enter 1036219089Spjd * a transaction. This avoids the possibility of 1037219089Spjd * holding up the transaction if the data copy hangs 1038219089Spjd * up on a pagefault (e.g., from an NFS server mapping). 1039219089Spjd */ 1040209962Smm size_t cbytes; 1041209962Smm 1042219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 1043219089Spjd max_blksz); 1044209962Smm ASSERT(abuf != NULL); 1045209962Smm ASSERT(arc_buf_size(abuf) == max_blksz); 1046209962Smm if (error = uiocopy(abuf->b_data, max_blksz, 1047209962Smm UIO_WRITE, uio, &cbytes)) { 1048209962Smm dmu_return_arcbuf(abuf); 1049209962Smm break; 1050209962Smm } 1051209962Smm ASSERT(cbytes == max_blksz); 1052209962Smm } 1053209962Smm 1054209962Smm /* 1055168404Spjd * Start a transaction. 1056168404Spjd */ 1057168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1058219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1059168404Spjd dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 1060219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1061258720Savg error = dmu_tx_assign(tx, TXG_WAIT); 1062168404Spjd if (error) { 1063168404Spjd dmu_tx_abort(tx); 1064209962Smm if (abuf != NULL) 1065209962Smm dmu_return_arcbuf(abuf); 1066168404Spjd break; 1067168404Spjd } 1068168404Spjd 1069168404Spjd /* 1070168404Spjd * If zfs_range_lock() over-locked we grow the blocksize 1071168404Spjd * and then reduce the lock range. This will only happen 1072168404Spjd * on the first iteration since zfs_range_reduce() will 1073168404Spjd * shrink down r_len to the appropriate size. 1074168404Spjd */ 1075168404Spjd if (rl->r_len == UINT64_MAX) { 1076168404Spjd uint64_t new_blksz; 1077168404Spjd 1078168404Spjd if (zp->z_blksz > max_blksz) { 1079274337Sdelphij /* 1080274337Sdelphij * File's blocksize is already larger than the 1081274337Sdelphij * "recordsize" property. Only let it grow to 1082274337Sdelphij * the next power of 2. 1083274337Sdelphij */ 1084168404Spjd ASSERT(!ISP2(zp->z_blksz)); 1085274337Sdelphij new_blksz = MIN(end_size, 1086274337Sdelphij 1 << highbit64(zp->z_blksz)); 1087168404Spjd } else { 1088168404Spjd new_blksz = MIN(end_size, max_blksz); 1089168404Spjd } 1090168404Spjd zfs_grow_blocksize(zp, new_blksz, tx); 1091168404Spjd zfs_range_reduce(rl, woff, n); 1092168404Spjd } 1093168404Spjd 1094168404Spjd /* 1095168404Spjd * XXX - should we really limit each write to z_max_blksz? 1096168404Spjd * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 1097168404Spjd */ 1098168404Spjd nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 1099168404Spjd 1100219089Spjd if (woff + nbytes > zp->z_size) 1101168404Spjd vnode_pager_setsize(vp, woff + nbytes); 1102168404Spjd 1103209962Smm if (abuf == NULL) { 1104209962Smm tx_bytes = uio->uio_resid; 1105219089Spjd error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 1106219089Spjd uio, nbytes, tx); 1107209962Smm tx_bytes -= uio->uio_resid; 1108168404Spjd } else { 1109209962Smm tx_bytes = nbytes; 1110219089Spjd ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 1111219089Spjd /* 1112219089Spjd * If this is not a full block write, but we are 1113219089Spjd * extending the file past EOF and this data starts 1114219089Spjd * block-aligned, use assign_arcbuf(). Otherwise, 1115219089Spjd * write via dmu_write(). 1116219089Spjd */ 1117219089Spjd if (tx_bytes < max_blksz && (!write_eof || 1118219089Spjd aiov->iov_base != abuf->b_data)) { 1119219089Spjd ASSERT(xuio); 1120219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, woff, 1121219089Spjd aiov->iov_len, aiov->iov_base, tx); 1122219089Spjd dmu_return_arcbuf(abuf); 1123219089Spjd xuio_stat_wbuf_copied(); 1124219089Spjd } else { 1125219089Spjd ASSERT(xuio || tx_bytes == max_blksz); 1126219089Spjd dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 1127219089Spjd woff, abuf, tx); 1128219089Spjd } 1129209962Smm ASSERT(tx_bytes <= uio->uio_resid); 1130209962Smm uioskip(uio, tx_bytes); 1131168404Spjd } 1132212657Savg if (tx_bytes && vn_has_cached_data(vp)) { 1133209962Smm update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 1134209962Smm zp->z_id, uio->uio_segflg, tx); 1135209962Smm } 1136209962Smm 1137209962Smm /* 1138168404Spjd * If we made no progress, we're done. If we made even 1139168404Spjd * partial progress, update the znode and ZIL accordingly. 1140168404Spjd */ 1141168404Spjd if (tx_bytes == 0) { 1142219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 1143219089Spjd (void *)&zp->z_size, sizeof (uint64_t), tx); 1144168404Spjd dmu_tx_commit(tx); 1145168404Spjd ASSERT(error != 0); 1146168404Spjd break; 1147168404Spjd } 1148168404Spjd 1149168404Spjd /* 1150168404Spjd * Clear Set-UID/Set-GID bits on successful write if not 1151168404Spjd * privileged and at least one of the excute bits is set. 1152168404Spjd * 1153168404Spjd * It would be nice to to this after all writes have 1154168404Spjd * been done, but that would still expose the ISUID/ISGID 1155168404Spjd * to another app after the partial write is committed. 1156185029Spjd * 1157185029Spjd * Note: we don't call zfs_fuid_map_id() here because 1158185029Spjd * user 0 is not an ephemeral uid. 1159168404Spjd */ 1160168404Spjd mutex_enter(&zp->z_acl_lock); 1161219089Spjd if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 1162168404Spjd (S_IXUSR >> 6))) != 0 && 1163219089Spjd (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 1164185029Spjd secpolicy_vnode_setid_retain(vp, cr, 1165219089Spjd (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 1166219089Spjd uint64_t newmode; 1167219089Spjd zp->z_mode &= ~(S_ISUID | S_ISGID); 1168219089Spjd newmode = zp->z_mode; 1169219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 1170219089Spjd (void *)&newmode, sizeof (uint64_t), tx); 1171168404Spjd } 1172168404Spjd mutex_exit(&zp->z_acl_lock); 1173168404Spjd 1174219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 1175219089Spjd B_TRUE); 1176168404Spjd 1177168404Spjd /* 1178168404Spjd * Update the file size (zp_size) if it has changed; 1179168404Spjd * account for possible concurrent updates. 1180168404Spjd */ 1181219089Spjd while ((end_size = zp->z_size) < uio->uio_loffset) { 1182219089Spjd (void) atomic_cas_64(&zp->z_size, end_size, 1183168404Spjd uio->uio_loffset); 1184298105Savg#ifdef illumos 1185219089Spjd ASSERT(error == 0); 1186298105Savg#else 1187298105Savg ASSERT(error == 0 || error == EFAULT); 1188298105Savg#endif 1189219089Spjd } 1190219089Spjd /* 1191219089Spjd * If we are replaying and eof is non zero then force 1192219089Spjd * the file size to the specified eof. Note, there's no 1193219089Spjd * concurrency during replay. 1194219089Spjd */ 1195219089Spjd if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 1196219089Spjd zp->z_size = zfsvfs->z_replay_eof; 1197219089Spjd 1198298105Savg if (error == 0) 1199298105Savg error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1200298105Savg else 1201298105Savg (void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1202219089Spjd 1203168404Spjd zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 1204168404Spjd dmu_tx_commit(tx); 1205168404Spjd 1206168404Spjd if (error != 0) 1207168404Spjd break; 1208168404Spjd ASSERT(tx_bytes == nbytes); 1209168404Spjd n -= nbytes; 1210219089Spjd 1211277300Ssmh#ifdef illumos 1212219089Spjd if (!xuio && n > 0) 1213219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 1214277300Ssmh#endif 1215168404Spjd } 1216168404Spjd 1217168404Spjd zfs_range_unlock(rl); 1218168404Spjd 1219168404Spjd /* 1220168404Spjd * If we're in replay mode, or we made no progress, return error. 1221168404Spjd * Otherwise, it's at least a partial write, so it's successful. 1222168404Spjd */ 1223209962Smm if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1224168404Spjd ZFS_EXIT(zfsvfs); 1225168404Spjd return (error); 1226168404Spjd } 1227168404Spjd 1228298105Savg#ifdef __FreeBSD__ 1229298105Savg /* 1230298105Savg * EFAULT means that at least one page of the source buffer was not 1231298105Savg * available. VFS will re-try remaining I/O upon this error. 1232298105Savg */ 1233298105Savg if (error == EFAULT) { 1234298105Savg ZFS_EXIT(zfsvfs); 1235298105Savg return (error); 1236298105Savg } 1237298105Savg#endif 1238298105Savg 1239219089Spjd if (ioflag & (FSYNC | FDSYNC) || 1240219089Spjd zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1241219089Spjd zil_commit(zilog, zp->z_id); 1242168404Spjd 1243168404Spjd ZFS_EXIT(zfsvfs); 1244168404Spjd return (0); 1245168404Spjd} 1246168404Spjd 1247168404Spjdvoid 1248219089Spjdzfs_get_done(zgd_t *zgd, int error) 1249168404Spjd{ 1250219089Spjd znode_t *zp = zgd->zgd_private; 1251219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 1252168404Spjd 1253219089Spjd if (zgd->zgd_db) 1254219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1255219089Spjd 1256219089Spjd zfs_range_unlock(zgd->zgd_rl); 1257219089Spjd 1258191900Skmacy /* 1259191900Skmacy * Release the vnode asynchronously as we currently have the 1260191900Skmacy * txg stopped from syncing. 1261191900Skmacy */ 1262219089Spjd VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1263219089Spjd 1264219089Spjd if (error == 0 && zgd->zgd_bp) 1265325132Savg zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp); 1266219089Spjd 1267168404Spjd kmem_free(zgd, sizeof (zgd_t)); 1268168404Spjd} 1269168404Spjd 1270214378Smm#ifdef DEBUG 1271214378Smmstatic int zil_fault_io = 0; 1272214378Smm#endif 1273214378Smm 1274168404Spjd/* 1275168404Spjd * Get data to generate a TX_WRITE intent log record. 1276168404Spjd */ 1277168404Spjdint 1278325132Savgzfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) 1279168404Spjd{ 1280168404Spjd zfsvfs_t *zfsvfs = arg; 1281168404Spjd objset_t *os = zfsvfs->z_os; 1282168404Spjd znode_t *zp; 1283219089Spjd uint64_t object = lr->lr_foid; 1284219089Spjd uint64_t offset = lr->lr_offset; 1285219089Spjd uint64_t size = lr->lr_length; 1286168404Spjd dmu_buf_t *db; 1287168404Spjd zgd_t *zgd; 1288168404Spjd int error = 0; 1289168404Spjd 1290325132Savg ASSERT3P(lwb, !=, NULL); 1291325132Savg ASSERT3P(zio, !=, NULL); 1292325132Savg ASSERT3U(size, !=, 0); 1293168404Spjd 1294168404Spjd /* 1295168404Spjd * Nothing to do if the file has been removed 1296168404Spjd */ 1297219089Spjd if (zfs_zget(zfsvfs, object, &zp) != 0) 1298249195Smm return (SET_ERROR(ENOENT)); 1299168404Spjd if (zp->z_unlinked) { 1300191900Skmacy /* 1301191900Skmacy * Release the vnode asynchronously as we currently have the 1302191900Skmacy * txg stopped from syncing. 1303191900Skmacy */ 1304196307Spjd VN_RELE_ASYNC(ZTOV(zp), 1305196307Spjd dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1306249195Smm return (SET_ERROR(ENOENT)); 1307168404Spjd } 1308168404Spjd 1309219089Spjd zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1310325132Savg zgd->zgd_lwb = lwb; 1311219089Spjd zgd->zgd_private = zp; 1312219089Spjd 1313168404Spjd /* 1314168404Spjd * Write records come in two flavors: immediate and indirect. 1315168404Spjd * For small writes it's cheaper to store the data with the 1316168404Spjd * log record (immediate); for large writes it's cheaper to 1317168404Spjd * sync the data and get a pointer to it (indirect) so that 1318168404Spjd * we don't have to write the data twice. 1319168404Spjd */ 1320168404Spjd if (buf != NULL) { /* immediate write */ 1321219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1322168404Spjd /* test for truncation needs to be done while range locked */ 1323219089Spjd if (offset >= zp->z_size) { 1324249195Smm error = SET_ERROR(ENOENT); 1325219089Spjd } else { 1326219089Spjd error = dmu_read(os, object, offset, size, buf, 1327219089Spjd DMU_READ_NO_PREFETCH); 1328168404Spjd } 1329219089Spjd ASSERT(error == 0 || error == ENOENT); 1330168404Spjd } else { /* indirect write */ 1331168404Spjd /* 1332168404Spjd * Have to lock the whole block to ensure when it's 1333324203Savg * written out and its checksum is being calculated 1334168404Spjd * that no one can change the data. We need to re-check 1335168404Spjd * blocksize after we get the lock in case it's changed! 1336168404Spjd */ 1337168404Spjd for (;;) { 1338219089Spjd uint64_t blkoff; 1339219089Spjd size = zp->z_blksz; 1340219089Spjd blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1341219089Spjd offset -= blkoff; 1342219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1343219089Spjd RL_READER); 1344219089Spjd if (zp->z_blksz == size) 1345168404Spjd break; 1346219089Spjd offset += blkoff; 1347219089Spjd zfs_range_unlock(zgd->zgd_rl); 1348168404Spjd } 1349168404Spjd /* test for truncation needs to be done while range locked */ 1350219089Spjd if (lr->lr_offset >= zp->z_size) 1351249195Smm error = SET_ERROR(ENOENT); 1352214378Smm#ifdef DEBUG 1353214378Smm if (zil_fault_io) { 1354249195Smm error = SET_ERROR(EIO); 1355214378Smm zil_fault_io = 0; 1356214378Smm } 1357214378Smm#endif 1358219089Spjd if (error == 0) 1359219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1360219089Spjd DMU_READ_NO_PREFETCH); 1361214378Smm 1362209962Smm if (error == 0) { 1363323748Savg blkptr_t *bp = &lr->lr_blkptr; 1364243524Smm 1365219089Spjd zgd->zgd_db = db; 1366219089Spjd zgd->zgd_bp = bp; 1367219089Spjd 1368219089Spjd ASSERT(db->db_offset == offset); 1369219089Spjd ASSERT(db->db_size == size); 1370219089Spjd 1371219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1372219089Spjd zfs_get_done, zgd); 1373321559Smav ASSERT(error || lr->lr_length <= size); 1374219089Spjd 1375209962Smm /* 1376219089Spjd * On success, we need to wait for the write I/O 1377219089Spjd * initiated by dmu_sync() to complete before we can 1378219089Spjd * release this dbuf. We will finish everything up 1379219089Spjd * in the zfs_get_done() callback. 1380209962Smm */ 1381219089Spjd if (error == 0) 1382219089Spjd return (0); 1383209962Smm 1384219089Spjd if (error == EALREADY) { 1385219089Spjd lr->lr_common.lrc_txtype = TX_WRITE2; 1386219089Spjd error = 0; 1387219089Spjd } 1388209962Smm } 1389168404Spjd } 1390219089Spjd 1391219089Spjd zfs_get_done(zgd, error); 1392219089Spjd 1393168404Spjd return (error); 1394168404Spjd} 1395168404Spjd 1396168404Spjd/*ARGSUSED*/ 1397168404Spjdstatic int 1398185029Spjdzfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1399185029Spjd caller_context_t *ct) 1400168404Spjd{ 1401168404Spjd znode_t *zp = VTOZ(vp); 1402168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1403168404Spjd int error; 1404168404Spjd 1405168404Spjd ZFS_ENTER(zfsvfs); 1406185029Spjd ZFS_VERIFY_ZP(zp); 1407185029Spjd 1408185029Spjd if (flag & V_ACE_MASK) 1409185029Spjd error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1410185029Spjd else 1411185029Spjd error = zfs_zaccess_rwx(zp, mode, flag, cr); 1412185029Spjd 1413168404Spjd ZFS_EXIT(zfsvfs); 1414168404Spjd return (error); 1415168404Spjd} 1416168404Spjd 1417211932Smmstatic int 1418303970Savgzfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) 1419211932Smm{ 1420303970Savg int error; 1421211932Smm 1422303970Savg *vpp = arg; 1423303970Savg error = vn_lock(*vpp, lkflags); 1424303970Savg if (error != 0) 1425303970Savg vrele(*vpp); 1426303970Savg return (error); 1427303970Savg} 1428211932Smm 1429303970Savgstatic int 1430303970Savgzfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags) 1431303970Savg{ 1432303970Savg znode_t *zdp = VTOZ(dvp); 1433303970Savg zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1434303970Savg int error; 1435303970Savg int ltype; 1436303970Savg 1437303970Savg ASSERT_VOP_LOCKED(dvp, __func__); 1438303970Savg#ifdef DIAGNOSTIC 1439307142Savg if ((zdp->z_pflags & ZFS_XATTR) == 0) 1440307142Savg VERIFY(!RRM_LOCK_HELD(&zfsvfs->z_teardown_lock)); 1441303970Savg#endif 1442303970Savg 1443303970Savg if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 1444303970Savg ASSERT3P(dvp, ==, vp); 1445303970Savg vref(dvp); 1446303970Savg ltype = lkflags & LK_TYPE_MASK; 1447303970Savg if (ltype != VOP_ISLOCKED(dvp)) { 1448303970Savg if (ltype == LK_EXCLUSIVE) 1449303970Savg vn_lock(dvp, LK_UPGRADE | LK_RETRY); 1450303970Savg else /* if (ltype == LK_SHARED) */ 1451303970Savg vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); 1452303970Savg 1453303970Savg /* 1454303970Savg * Relock for the "." case could leave us with 1455303970Savg * reclaimed vnode. 1456303970Savg */ 1457303970Savg if (dvp->v_iflag & VI_DOOMED) { 1458303970Savg vrele(dvp); 1459303970Savg return (SET_ERROR(ENOENT)); 1460303970Savg } 1461303970Savg } 1462303970Savg return (0); 1463303970Savg } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 1464303970Savg /* 1465303970Savg * Note that in this case, dvp is the child vnode, and we 1466303970Savg * are looking up the parent vnode - exactly reverse from 1467303970Savg * normal operation. Unlocking dvp requires some rather 1468303970Savg * tricky unlock/relock dance to prevent mp from being freed; 1469303970Savg * use vn_vget_ino_gen() which takes care of all that. 1470303970Savg * 1471303970Savg * XXX Note that there is a time window when both vnodes are 1472303970Savg * unlocked. It is possible, although highly unlikely, that 1473303970Savg * during that window the parent-child relationship between 1474303970Savg * the vnodes may change, for example, get reversed. 1475303970Savg * In that case we would have a wrong lock order for the vnodes. 1476303970Savg * All other filesystems seem to ignore this problem, so we 1477303970Savg * do the same here. 1478303970Savg * A potential solution could be implemented as follows: 1479303970Savg * - using LK_NOWAIT when locking the second vnode and retrying 1480303970Savg * if necessary 1481303970Savg * - checking that the parent-child relationship still holds 1482303970Savg * after locking both vnodes and retrying if it doesn't 1483303970Savg */ 1484303970Savg error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp); 1485303970Savg return (error); 1486303970Savg } else { 1487303970Savg error = vn_lock(vp, lkflags); 1488303970Savg if (error != 0) 1489303970Savg vrele(vp); 1490303970Savg return (error); 1491211932Smm } 1492211932Smm} 1493211932Smm 1494211932Smm/* 1495168404Spjd * Lookup an entry in a directory, or an extended attribute directory. 1496168404Spjd * If it exists, return a held vnode reference for it. 1497168404Spjd * 1498168404Spjd * IN: dvp - vnode of directory to search. 1499168404Spjd * nm - name of entry to lookup. 1500168404Spjd * pnp - full pathname to lookup [UNUSED]. 1501168404Spjd * flags - LOOKUP_XATTR set if looking for an attribute. 1502168404Spjd * rdir - root directory vnode [UNUSED]. 1503168404Spjd * cr - credentials of caller. 1504185029Spjd * ct - caller context 1505168404Spjd * 1506168404Spjd * OUT: vpp - vnode of located entry, NULL if not found. 1507168404Spjd * 1508251631Sdelphij * RETURN: 0 on success, error code on failure. 1509168404Spjd * 1510168404Spjd * Timestamps: 1511168404Spjd * NA 1512168404Spjd */ 1513168404Spjd/* ARGSUSED */ 1514168962Spjdstatic int 1515168962Spjdzfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1516185029Spjd int nameiop, cred_t *cr, kthread_t *td, int flags) 1517168404Spjd{ 1518168962Spjd znode_t *zdp = VTOZ(dvp); 1519303970Savg znode_t *zp; 1520168962Spjd zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1521211932Smm int error = 0; 1522168404Spjd 1523321545Smav /* 1524321545Smav * Fast path lookup, however we must skip DNLC lookup 1525321545Smav * for case folding or normalizing lookups because the 1526321545Smav * DNLC code only stores the passed in name. This means 1527321545Smav * creating 'a' and removing 'A' on a case insensitive 1528321545Smav * file system would work, but DNLC still thinks 'a' 1529321545Smav * exists and won't let you create it again on the next 1530321545Smav * pass through fast path. 1531321545Smav */ 1532303970Savg if (!(flags & LOOKUP_XATTR)) { 1533211932Smm if (dvp->v_type != VDIR) { 1534249195Smm return (SET_ERROR(ENOTDIR)); 1535219089Spjd } else if (zdp->z_sa_hdl == NULL) { 1536249195Smm return (SET_ERROR(EIO)); 1537211932Smm } 1538211932Smm } 1539211932Smm 1540211932Smm DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1541211932Smm 1542168404Spjd ZFS_ENTER(zfsvfs); 1543185029Spjd ZFS_VERIFY_ZP(zdp); 1544168404Spjd 1545168404Spjd *vpp = NULL; 1546168404Spjd 1547185029Spjd if (flags & LOOKUP_XATTR) { 1548168404Spjd#ifdef TODO 1549168404Spjd /* 1550168404Spjd * If the xattr property is off, refuse the lookup request. 1551168404Spjd */ 1552168404Spjd if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1553168404Spjd ZFS_EXIT(zfsvfs); 1554249195Smm return (SET_ERROR(EINVAL)); 1555168404Spjd } 1556185029Spjd#endif 1557168404Spjd 1558168404Spjd /* 1559168404Spjd * We don't allow recursive attributes.. 1560168404Spjd * Maybe someday we will. 1561168404Spjd */ 1562219089Spjd if (zdp->z_pflags & ZFS_XATTR) { 1563168404Spjd ZFS_EXIT(zfsvfs); 1564249195Smm return (SET_ERROR(EINVAL)); 1565168404Spjd } 1566168404Spjd 1567168404Spjd if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1568168404Spjd ZFS_EXIT(zfsvfs); 1569168404Spjd return (error); 1570168404Spjd } 1571168404Spjd 1572168404Spjd /* 1573168404Spjd * Do we have permission to get into attribute directory? 1574168404Spjd */ 1575185029Spjd if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1576185029Spjd B_FALSE, cr)) { 1577303970Savg vrele(*vpp); 1578185029Spjd *vpp = NULL; 1579168404Spjd } 1580168404Spjd 1581168404Spjd ZFS_EXIT(zfsvfs); 1582168404Spjd return (error); 1583168404Spjd } 1584168404Spjd 1585168404Spjd /* 1586168404Spjd * Check accessibility of directory. 1587168404Spjd */ 1588185029Spjd if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1589168404Spjd ZFS_EXIT(zfsvfs); 1590168404Spjd return (error); 1591168404Spjd } 1592168404Spjd 1593185029Spjd if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1594185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1595185029Spjd ZFS_EXIT(zfsvfs); 1596249195Smm return (SET_ERROR(EILSEQ)); 1597185029Spjd } 1598168404Spjd 1599168962Spjd 1600303970Savg /* 1601303970Savg * First handle the special cases. 1602303970Savg */ 1603303970Savg if ((cnp->cn_flags & ISDOTDOT) != 0) { 1604303970Savg /* 1605303970Savg * If we are a snapshot mounted under .zfs, return 1606303970Savg * the vp for the snapshot directory. 1607303970Savg */ 1608303970Savg if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 1609315842Savg struct componentname cn; 1610315842Savg vnode_t *zfsctl_vp; 1611315842Savg int ltype; 1612315842Savg 1613303970Savg ZFS_EXIT(zfsvfs); 1614315842Savg ltype = VOP_ISLOCKED(dvp); 1615315842Savg VOP_UNLOCK(dvp, 0); 1616315842Savg error = zfsctl_root(zfsvfs->z_parent, LK_SHARED, 1617315842Savg &zfsctl_vp); 1618303970Savg if (error == 0) { 1619315842Savg cn.cn_nameptr = "snapshot"; 1620315842Savg cn.cn_namelen = strlen(cn.cn_nameptr); 1621315842Savg cn.cn_nameiop = cnp->cn_nameiop; 1622319415Savg cn.cn_flags = cnp->cn_flags & ~ISDOTDOT; 1623315842Savg cn.cn_lkflags = cnp->cn_lkflags; 1624315842Savg error = VOP_LOOKUP(zfsctl_vp, vpp, &cn); 1625315842Savg vput(zfsctl_vp); 1626303970Savg } 1627315842Savg vn_lock(dvp, ltype | LK_RETRY); 1628315842Savg return (error); 1629303970Savg } 1630303970Savg } 1631303970Savg if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { 1632315842Savg ZFS_EXIT(zfsvfs); 1633303970Savg if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) 1634315842Savg return (SET_ERROR(ENOTSUP)); 1635315842Savg error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp); 1636315842Savg return (error); 1637303970Savg } 1638303970Savg 1639303970Savg /* 1640303970Savg * The loop is retry the lookup if the parent-child relationship 1641303970Savg * changes during the dot-dot locking complexities. 1642303970Savg */ 1643303970Savg for (;;) { 1644303970Savg uint64_t parent; 1645303970Savg 1646303970Savg error = zfs_dirlook(zdp, nm, &zp); 1647303970Savg if (error == 0) 1648303970Savg *vpp = ZTOV(zp); 1649303970Savg 1650303970Savg ZFS_EXIT(zfsvfs); 1651303970Savg if (error != 0) 1652303970Savg break; 1653303970Savg 1654303970Savg error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags); 1655303970Savg if (error != 0) { 1656303970Savg /* 1657303970Savg * If we've got a locking error, then the vnode 1658303970Savg * got reclaimed because of a force unmount. 1659303970Savg * We never enter doomed vnodes into the name cache. 1660303970Savg */ 1661303970Savg *vpp = NULL; 1662303970Savg return (error); 1663303970Savg } 1664303970Savg 1665303970Savg if ((cnp->cn_flags & ISDOTDOT) == 0) 1666303970Savg break; 1667303970Savg 1668303970Savg ZFS_ENTER(zfsvfs); 1669303970Savg if (zdp->z_sa_hdl == NULL) { 1670303970Savg error = SET_ERROR(EIO); 1671303970Savg } else { 1672303970Savg error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 1673303970Savg &parent, sizeof (parent)); 1674303970Savg } 1675303970Savg if (error != 0) { 1676303970Savg ZFS_EXIT(zfsvfs); 1677303970Savg vput(ZTOV(zp)); 1678303970Savg break; 1679303970Savg } 1680303970Savg if (zp->z_id == parent) { 1681303970Savg ZFS_EXIT(zfsvfs); 1682303970Savg break; 1683303970Savg } 1684303970Savg vput(ZTOV(zp)); 1685303970Savg } 1686303970Savg 1687303970Savgout: 1688303970Savg if (error != 0) 1689303970Savg *vpp = NULL; 1690303970Savg 1691168404Spjd /* Translate errors and add SAVENAME when needed. */ 1692168404Spjd if (cnp->cn_flags & ISLASTCN) { 1693168404Spjd switch (nameiop) { 1694168404Spjd case CREATE: 1695168404Spjd case RENAME: 1696168404Spjd if (error == ENOENT) { 1697168404Spjd error = EJUSTRETURN; 1698168404Spjd cnp->cn_flags |= SAVENAME; 1699168404Spjd break; 1700168404Spjd } 1701168404Spjd /* FALLTHROUGH */ 1702168404Spjd case DELETE: 1703168404Spjd if (error == 0) 1704168404Spjd cnp->cn_flags |= SAVENAME; 1705168404Spjd break; 1706168404Spjd } 1707168404Spjd } 1708169198Spjd 1709303970Savg /* Insert name into cache (as non-existent) if appropriate. */ 1710303970Savg if (zfsvfs->z_use_namecache && 1711303970Savg error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0) 1712303970Savg cache_enter(dvp, NULL, cnp); 1713168404Spjd 1714303970Savg /* Insert name into cache if appropriate. */ 1715303970Savg if (zfsvfs->z_use_namecache && 1716303970Savg error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1717168404Spjd if (!(cnp->cn_flags & ISLASTCN) || 1718168404Spjd (nameiop != DELETE && nameiop != RENAME)) { 1719168404Spjd cache_enter(dvp, *vpp, cnp); 1720168404Spjd } 1721168404Spjd } 1722168404Spjd 1723168404Spjd return (error); 1724168404Spjd} 1725168404Spjd 1726168404Spjd/* 1727168404Spjd * Attempt to create a new entry in a directory. If the entry 1728168404Spjd * already exists, truncate the file if permissible, else return 1729168404Spjd * an error. Return the vp of the created or trunc'd file. 1730168404Spjd * 1731168404Spjd * IN: dvp - vnode of directory to put new file entry in. 1732168404Spjd * name - name of new file entry. 1733168404Spjd * vap - attributes of new file. 1734168404Spjd * excl - flag indicating exclusive or non-exclusive mode. 1735168404Spjd * mode - mode to open file with. 1736168404Spjd * cr - credentials of caller. 1737168404Spjd * flag - large file flag [UNUSED]. 1738185029Spjd * ct - caller context 1739268464Sdelphij * vsecp - ACL to be set 1740168404Spjd * 1741168404Spjd * OUT: vpp - vnode of created or trunc'd entry. 1742168404Spjd * 1743251631Sdelphij * RETURN: 0 on success, error code on failure. 1744168404Spjd * 1745168404Spjd * Timestamps: 1746168404Spjd * dvp - ctime|mtime updated if new entry created 1747168404Spjd * vp - ctime|mtime always, atime if new 1748168404Spjd */ 1749185029Spjd 1750168404Spjd/* ARGSUSED */ 1751168404Spjdstatic int 1752168962Spjdzfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 1753185029Spjd vnode_t **vpp, cred_t *cr, kthread_t *td) 1754168404Spjd{ 1755168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1756168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1757185029Spjd zilog_t *zilog; 1758185029Spjd objset_t *os; 1759168404Spjd dmu_tx_t *tx; 1760168404Spjd int error; 1761209962Smm ksid_t *ksid; 1762209962Smm uid_t uid; 1763209962Smm gid_t gid = crgetgid(cr); 1764219089Spjd zfs_acl_ids_t acl_ids; 1765209962Smm boolean_t fuid_dirtied; 1766185029Spjd void *vsecp = NULL; 1767185029Spjd int flag = 0; 1768303970Savg uint64_t txtype; 1769168404Spjd 1770185029Spjd /* 1771185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 1772185029Spjd * make sure file system is at proper version 1773185029Spjd */ 1774185029Spjd 1775209962Smm ksid = crgetsid(cr, KSID_OWNER); 1776209962Smm if (ksid) 1777209962Smm uid = ksid_getid(ksid); 1778209962Smm else 1779209962Smm uid = crgetuid(cr); 1780219089Spjd 1781185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 1782185029Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 1783219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1784249195Smm return (SET_ERROR(EINVAL)); 1785185029Spjd 1786168404Spjd ZFS_ENTER(zfsvfs); 1787185029Spjd ZFS_VERIFY_ZP(dzp); 1788185029Spjd os = zfsvfs->z_os; 1789185029Spjd zilog = zfsvfs->z_log; 1790168404Spjd 1791185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1792185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1793185029Spjd ZFS_EXIT(zfsvfs); 1794249195Smm return (SET_ERROR(EILSEQ)); 1795185029Spjd } 1796185029Spjd 1797185029Spjd if (vap->va_mask & AT_XVATTR) { 1798197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1799185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 1800185029Spjd ZFS_EXIT(zfsvfs); 1801185029Spjd return (error); 1802185029Spjd } 1803185029Spjd } 1804260704Savg 1805168404Spjd *vpp = NULL; 1806168404Spjd 1807182905Strasz if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1808182905Strasz vap->va_mode &= ~S_ISVTX; 1809168404Spjd 1810303970Savg error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 1811303970Savg if (error) { 1812303970Savg ZFS_EXIT(zfsvfs); 1813303970Savg return (error); 1814303970Savg } 1815303970Savg ASSERT3P(zp, ==, NULL); 1816185029Spjd 1817303970Savg /* 1818303970Savg * Create a new file object and update the directory 1819303970Savg * to reference it. 1820303970Savg */ 1821303970Savg if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1822303970Savg goto out; 1823168404Spjd } 1824219089Spjd 1825303970Savg /* 1826303970Savg * We only support the creation of regular files in 1827303970Savg * extended attribute directories. 1828303970Savg */ 1829168404Spjd 1830303970Savg if ((dzp->z_pflags & ZFS_XATTR) && 1831303970Savg (vap->va_type != VREG)) { 1832303970Savg error = SET_ERROR(EINVAL); 1833303970Savg goto out; 1834303970Savg } 1835168404Spjd 1836303970Savg if ((error = zfs_acl_ids_create(dzp, 0, vap, 1837303970Savg cr, vsecp, &acl_ids)) != 0) 1838303970Savg goto out; 1839219089Spjd 1840303970Savg if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1841303970Savg zfs_acl_ids_free(&acl_ids); 1842303970Savg error = SET_ERROR(EDQUOT); 1843303970Savg goto out; 1844303970Savg } 1845168404Spjd 1846303970Savg getnewvnode_reserve(1); 1847209962Smm 1848303970Savg tx = dmu_tx_create(os); 1849209962Smm 1850303970Savg dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1851303970Savg ZFS_SA_BASE_ATTR_SIZE); 1852219089Spjd 1853303970Savg fuid_dirtied = zfsvfs->z_fuid_dirty; 1854303970Savg if (fuid_dirtied) 1855303970Savg zfs_fuid_txhold(zfsvfs, tx); 1856303970Savg dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1857303970Savg dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 1858303970Savg if (!zfsvfs->z_use_sa && 1859303970Savg acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1860303970Savg dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1861303970Savg 0, acl_ids.z_aclp->z_acl_bytes); 1862303970Savg } 1863303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 1864303970Savg if (error) { 1865209962Smm zfs_acl_ids_free(&acl_ids); 1866303970Savg dmu_tx_abort(tx); 1867303970Savg getnewvnode_drop_reserve(); 1868303970Savg ZFS_EXIT(zfsvfs); 1869303970Savg return (error); 1870303970Savg } 1871303970Savg zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1872185029Spjd 1873303970Savg if (fuid_dirtied) 1874303970Savg zfs_fuid_sync(zfsvfs, tx); 1875219089Spjd 1876303970Savg (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 1877303970Savg txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1878303970Savg zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1879303970Savg vsecp, acl_ids.z_fuidp, vap); 1880303970Savg zfs_acl_ids_free(&acl_ids); 1881303970Savg dmu_tx_commit(tx); 1882168404Spjd 1883303970Savg getnewvnode_drop_reserve(); 1884168404Spjd 1885168404Spjdout: 1886303970Savg if (error == 0) { 1887168962Spjd *vpp = ZTOV(zp); 1888168404Spjd } 1889168404Spjd 1890219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1891219089Spjd zil_commit(zilog, 0); 1892219089Spjd 1893168404Spjd ZFS_EXIT(zfsvfs); 1894168404Spjd return (error); 1895168404Spjd} 1896168404Spjd 1897168404Spjd/* 1898168404Spjd * Remove an entry from a directory. 1899168404Spjd * 1900168404Spjd * IN: dvp - vnode of directory to remove entry from. 1901168404Spjd * name - name of entry to remove. 1902168404Spjd * cr - credentials of caller. 1903185029Spjd * ct - caller context 1904185029Spjd * flags - case flags 1905168404Spjd * 1906251631Sdelphij * RETURN: 0 on success, error code on failure. 1907168404Spjd * 1908168404Spjd * Timestamps: 1909168404Spjd * dvp - ctime|mtime 1910168404Spjd * vp - ctime (if nlink > 0) 1911168404Spjd */ 1912219089Spjd 1913185029Spjd/*ARGSUSED*/ 1914168404Spjdstatic int 1915303970Savgzfs_remove(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr) 1916168404Spjd{ 1917303970Savg znode_t *dzp = VTOZ(dvp); 1918303970Savg znode_t *zp = VTOZ(vp); 1919219089Spjd znode_t *xzp; 1920168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1921185029Spjd zilog_t *zilog; 1922168962Spjd uint64_t acl_obj, xattr_obj; 1923219089Spjd uint64_t obj = 0; 1924168404Spjd dmu_tx_t *tx; 1925185029Spjd boolean_t unlinked, toobig = FALSE; 1926185029Spjd uint64_t txtype; 1927168404Spjd int error; 1928168404Spjd 1929168404Spjd ZFS_ENTER(zfsvfs); 1930185029Spjd ZFS_VERIFY_ZP(dzp); 1931303970Savg ZFS_VERIFY_ZP(zp); 1932185029Spjd zilog = zfsvfs->z_log; 1933303970Savg zp = VTOZ(vp); 1934168404Spjd 1935219089Spjd xattr_obj = 0; 1936219089Spjd xzp = NULL; 1937168404Spjd 1938168962Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1939168404Spjd goto out; 1940168962Spjd } 1941168404Spjd 1942168962Spjd /* 1943168962Spjd * Need to use rmdir for removing directories. 1944168962Spjd */ 1945168962Spjd if (vp->v_type == VDIR) { 1946249195Smm error = SET_ERROR(EPERM); 1947168962Spjd goto out; 1948168962Spjd } 1949168962Spjd 1950185029Spjd vnevent_remove(vp, dvp, name, ct); 1951168962Spjd 1952303970Savg obj = zp->z_id; 1953168404Spjd 1954303970Savg /* are there any extended attributes? */ 1955303970Savg error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1956303970Savg &xattr_obj, sizeof (xattr_obj)); 1957303970Savg if (error == 0 && xattr_obj) { 1958303970Savg error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1959303970Savg ASSERT0(error); 1960303970Savg } 1961168962Spjd 1962168404Spjd /* 1963168404Spjd * We may delete the znode now, or we may put it in the unlinked set; 1964168404Spjd * it depends on whether we're the last link, and on whether there are 1965168404Spjd * other holds on the vnode. So we dmu_tx_hold() the right things to 1966168404Spjd * allow for either case. 1967168404Spjd */ 1968168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1969168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1970219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1971219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1972219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 1973168404Spjd 1974303970Savg if (xzp) { 1975219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 1976219089Spjd dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1977168404Spjd } 1978168404Spjd 1979168404Spjd /* charge as an update -- would be nice not to charge at all */ 1980168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1981168404Spjd 1982268464Sdelphij /* 1983294803Smav * Mark this transaction as typically resulting in a net free of space 1984268464Sdelphij */ 1985294803Smav dmu_tx_mark_netfree(tx); 1986268464Sdelphij 1987303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 1988168404Spjd if (error) { 1989168404Spjd dmu_tx_abort(tx); 1990168404Spjd ZFS_EXIT(zfsvfs); 1991168404Spjd return (error); 1992168404Spjd } 1993168404Spjd 1994168404Spjd /* 1995168404Spjd * Remove the directory entry. 1996168404Spjd */ 1997303970Savg error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked); 1998168404Spjd 1999168404Spjd if (error) { 2000168404Spjd dmu_tx_commit(tx); 2001168404Spjd goto out; 2002168404Spjd } 2003168404Spjd 2004219089Spjd if (unlinked) { 2005168404Spjd zfs_unlinked_add(zp, tx); 2006243268Savg vp->v_vflag |= VV_NOSYNC; 2007168962Spjd } 2008168404Spjd 2009185029Spjd txtype = TX_REMOVE; 2010219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 2011168404Spjd 2012168404Spjd dmu_tx_commit(tx); 2013168404Spjdout: 2014185029Spjd 2015219089Spjd if (xzp) 2016303970Savg vrele(ZTOV(xzp)); 2017168962Spjd 2018219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2019219089Spjd zil_commit(zilog, 0); 2020219089Spjd 2021168404Spjd ZFS_EXIT(zfsvfs); 2022168404Spjd return (error); 2023168404Spjd} 2024168404Spjd 2025168404Spjd/* 2026168404Spjd * Create a new directory and insert it into dvp using the name 2027168404Spjd * provided. Return a pointer to the inserted directory. 2028168404Spjd * 2029168404Spjd * IN: dvp - vnode of directory to add subdir to. 2030168404Spjd * dirname - name of new directory. 2031168404Spjd * vap - attributes of new directory. 2032168404Spjd * cr - credentials of caller. 2033185029Spjd * ct - caller context 2034251631Sdelphij * flags - case flags 2035185029Spjd * vsecp - ACL to be set 2036168404Spjd * 2037168404Spjd * OUT: vpp - vnode of created directory. 2038168404Spjd * 2039251631Sdelphij * RETURN: 0 on success, error code on failure. 2040168404Spjd * 2041168404Spjd * Timestamps: 2042168404Spjd * dvp - ctime|mtime updated 2043168404Spjd * vp - ctime|mtime|atime updated 2044168404Spjd */ 2045185029Spjd/*ARGSUSED*/ 2046168404Spjdstatic int 2047303970Savgzfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr) 2048168404Spjd{ 2049168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 2050168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2051185029Spjd zilog_t *zilog; 2052185029Spjd uint64_t txtype; 2053168404Spjd dmu_tx_t *tx; 2054168404Spjd int error; 2055209962Smm ksid_t *ksid; 2056209962Smm uid_t uid; 2057209962Smm gid_t gid = crgetgid(cr); 2058219089Spjd zfs_acl_ids_t acl_ids; 2059209962Smm boolean_t fuid_dirtied; 2060168404Spjd 2061168404Spjd ASSERT(vap->va_type == VDIR); 2062168404Spjd 2063185029Spjd /* 2064185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 2065185029Spjd * make sure file system is at proper version 2066185029Spjd */ 2067185029Spjd 2068209962Smm ksid = crgetsid(cr, KSID_OWNER); 2069209962Smm if (ksid) 2070209962Smm uid = ksid_getid(ksid); 2071209962Smm else 2072209962Smm uid = crgetuid(cr); 2073185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2074303970Savg ((vap->va_mask & AT_XVATTR) || 2075219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2076249195Smm return (SET_ERROR(EINVAL)); 2077185029Spjd 2078168404Spjd ZFS_ENTER(zfsvfs); 2079185029Spjd ZFS_VERIFY_ZP(dzp); 2080185029Spjd zilog = zfsvfs->z_log; 2081168404Spjd 2082219089Spjd if (dzp->z_pflags & ZFS_XATTR) { 2083168404Spjd ZFS_EXIT(zfsvfs); 2084249195Smm return (SET_ERROR(EINVAL)); 2085168404Spjd } 2086168404Spjd 2087185029Spjd if (zfsvfs->z_utf8 && u8_validate(dirname, 2088185029Spjd strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2089185029Spjd ZFS_EXIT(zfsvfs); 2090249195Smm return (SET_ERROR(EILSEQ)); 2091185029Spjd } 2092185029Spjd 2093219089Spjd if (vap->va_mask & AT_XVATTR) { 2094197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2095185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 2096185029Spjd ZFS_EXIT(zfsvfs); 2097185029Spjd return (error); 2098185029Spjd } 2099219089Spjd } 2100185029Spjd 2101219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 2102303970Savg NULL, &acl_ids)) != 0) { 2103219089Spjd ZFS_EXIT(zfsvfs); 2104219089Spjd return (error); 2105219089Spjd } 2106260704Savg 2107168404Spjd /* 2108168404Spjd * First make sure the new directory doesn't exist. 2109219089Spjd * 2110219089Spjd * Existence is checked first to make sure we don't return 2111219089Spjd * EACCES instead of EEXIST which can cause some applications 2112219089Spjd * to fail. 2113168404Spjd */ 2114185029Spjd *vpp = NULL; 2115185029Spjd 2116303970Savg if (error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW)) { 2117219089Spjd zfs_acl_ids_free(&acl_ids); 2118168404Spjd ZFS_EXIT(zfsvfs); 2119168404Spjd return (error); 2120168404Spjd } 2121303970Savg ASSERT3P(zp, ==, NULL); 2122168404Spjd 2123185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 2124219089Spjd zfs_acl_ids_free(&acl_ids); 2125168404Spjd ZFS_EXIT(zfsvfs); 2126168404Spjd return (error); 2127168404Spjd } 2128168404Spjd 2129209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2130211932Smm zfs_acl_ids_free(&acl_ids); 2131209962Smm ZFS_EXIT(zfsvfs); 2132249195Smm return (SET_ERROR(EDQUOT)); 2133209962Smm } 2134209962Smm 2135168404Spjd /* 2136168404Spjd * Add a new entry to the directory. 2137168404Spjd */ 2138303970Savg getnewvnode_reserve(1); 2139168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2140168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 2141168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2142209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 2143209962Smm if (fuid_dirtied) 2144209962Smm zfs_fuid_txhold(zfsvfs, tx); 2145219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2146219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2147219089Spjd acl_ids.z_aclp->z_acl_bytes); 2148219089Spjd } 2149219089Spjd 2150219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2151219089Spjd ZFS_SA_BASE_ATTR_SIZE); 2152219089Spjd 2153303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 2154168404Spjd if (error) { 2155219089Spjd zfs_acl_ids_free(&acl_ids); 2156168404Spjd dmu_tx_abort(tx); 2157260704Savg getnewvnode_drop_reserve(); 2158168404Spjd ZFS_EXIT(zfsvfs); 2159168404Spjd return (error); 2160168404Spjd } 2161168404Spjd 2162168404Spjd /* 2163168404Spjd * Create new node. 2164168404Spjd */ 2165219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2166168404Spjd 2167209962Smm if (fuid_dirtied) 2168209962Smm zfs_fuid_sync(zfsvfs, tx); 2169219089Spjd 2170168404Spjd /* 2171168404Spjd * Now put new name in parent dir. 2172168404Spjd */ 2173303970Savg (void) zfs_link_create(dzp, dirname, zp, tx, ZNEW); 2174168404Spjd 2175168404Spjd *vpp = ZTOV(zp); 2176168404Spjd 2177303970Savg txtype = zfs_log_create_txtype(Z_DIR, NULL, vap); 2178303970Savg zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL, 2179209962Smm acl_ids.z_fuidp, vap); 2180185029Spjd 2181209962Smm zfs_acl_ids_free(&acl_ids); 2182219089Spjd 2183168404Spjd dmu_tx_commit(tx); 2184168404Spjd 2185260704Savg getnewvnode_drop_reserve(); 2186260704Savg 2187219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2188219089Spjd zil_commit(zilog, 0); 2189219089Spjd 2190168404Spjd ZFS_EXIT(zfsvfs); 2191168404Spjd return (0); 2192168404Spjd} 2193168404Spjd 2194168404Spjd/* 2195168404Spjd * Remove a directory subdir entry. If the current working 2196168404Spjd * directory is the same as the subdir to be removed, the 2197168404Spjd * remove will fail. 2198168404Spjd * 2199168404Spjd * IN: dvp - vnode of directory to remove from. 2200168404Spjd * name - name of directory to be removed. 2201168404Spjd * cwd - vnode of current working directory. 2202168404Spjd * cr - credentials of caller. 2203185029Spjd * ct - caller context 2204185029Spjd * flags - case flags 2205168404Spjd * 2206251631Sdelphij * RETURN: 0 on success, error code on failure. 2207168404Spjd * 2208168404Spjd * Timestamps: 2209168404Spjd * dvp - ctime|mtime updated 2210168404Spjd */ 2211185029Spjd/*ARGSUSED*/ 2212168404Spjdstatic int 2213303970Savgzfs_rmdir(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr) 2214168404Spjd{ 2215168404Spjd znode_t *dzp = VTOZ(dvp); 2216303970Savg znode_t *zp = VTOZ(vp); 2217168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2218185029Spjd zilog_t *zilog; 2219168404Spjd dmu_tx_t *tx; 2220168404Spjd int error; 2221168404Spjd 2222168962Spjd ZFS_ENTER(zfsvfs); 2223185029Spjd ZFS_VERIFY_ZP(dzp); 2224303970Savg ZFS_VERIFY_ZP(zp); 2225185029Spjd zilog = zfsvfs->z_log; 2226168404Spjd 2227168404Spjd 2228168404Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2229168404Spjd goto out; 2230168404Spjd } 2231168404Spjd 2232168962Spjd if (vp->v_type != VDIR) { 2233249195Smm error = SET_ERROR(ENOTDIR); 2234168962Spjd goto out; 2235168962Spjd } 2236168962Spjd 2237185029Spjd vnevent_rmdir(vp, dvp, name, ct); 2238168962Spjd 2239168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2240168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2241219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2242168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2243219089Spjd zfs_sa_upgrade_txholds(tx, zp); 2244219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 2245304122Savg dmu_tx_mark_netfree(tx); 2246303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 2247168404Spjd if (error) { 2248168404Spjd dmu_tx_abort(tx); 2249168404Spjd ZFS_EXIT(zfsvfs); 2250168404Spjd return (error); 2251168404Spjd } 2252168404Spjd 2253168404Spjd cache_purge(dvp); 2254168404Spjd 2255303970Savg error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL); 2256168404Spjd 2257185029Spjd if (error == 0) { 2258185029Spjd uint64_t txtype = TX_RMDIR; 2259219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2260185029Spjd } 2261168404Spjd 2262168404Spjd dmu_tx_commit(tx); 2263168404Spjd 2264168404Spjd cache_purge(vp); 2265168404Spjdout: 2266219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2267219089Spjd zil_commit(zilog, 0); 2268219089Spjd 2269168404Spjd ZFS_EXIT(zfsvfs); 2270168404Spjd return (error); 2271168404Spjd} 2272168404Spjd 2273168404Spjd/* 2274168404Spjd * Read as many directory entries as will fit into the provided 2275168404Spjd * buffer from the given directory cursor position (specified in 2276251631Sdelphij * the uio structure). 2277168404Spjd * 2278168404Spjd * IN: vp - vnode of directory to read. 2279168404Spjd * uio - structure supplying read location, range info, 2280168404Spjd * and return buffer. 2281168404Spjd * cr - credentials of caller. 2282185029Spjd * ct - caller context 2283185029Spjd * flags - case flags 2284168404Spjd * 2285168404Spjd * OUT: uio - updated offset and range, buffer filled. 2286168404Spjd * eofp - set to true if end-of-file detected. 2287168404Spjd * 2288251631Sdelphij * RETURN: 0 on success, error code on failure. 2289168404Spjd * 2290168404Spjd * Timestamps: 2291168404Spjd * vp - atime updated 2292168404Spjd * 2293168404Spjd * Note that the low 4 bits of the cookie returned by zap is always zero. 2294168404Spjd * This allows us to use the low range for "special" directory entries: 2295168404Spjd * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2296168404Spjd * we use the offset 2 for the '.zfs' directory. 2297168404Spjd */ 2298168404Spjd/* ARGSUSED */ 2299168404Spjdstatic int 2300168962Spjdzfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) 2301168404Spjd{ 2302168404Spjd znode_t *zp = VTOZ(vp); 2303168404Spjd iovec_t *iovp; 2304185029Spjd edirent_t *eodp; 2305168404Spjd dirent64_t *odp; 2306168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2307168404Spjd objset_t *os; 2308168404Spjd caddr_t outbuf; 2309168404Spjd size_t bufsize; 2310168404Spjd zap_cursor_t zc; 2311168404Spjd zap_attribute_t zap; 2312168404Spjd uint_t bytes_wanted; 2313168404Spjd uint64_t offset; /* must be unsigned; checks for < 1 */ 2314219089Spjd uint64_t parent; 2315168404Spjd int local_eof; 2316168404Spjd int outcount; 2317168404Spjd int error; 2318168404Spjd uint8_t prefetch; 2319185029Spjd boolean_t check_sysattrs; 2320168404Spjd uint8_t type; 2321168962Spjd int ncooks; 2322168962Spjd u_long *cooks = NULL; 2323185029Spjd int flags = 0; 2324168404Spjd 2325168404Spjd ZFS_ENTER(zfsvfs); 2326185029Spjd ZFS_VERIFY_ZP(zp); 2327168404Spjd 2328219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 2329219089Spjd &parent, sizeof (parent))) != 0) { 2330219089Spjd ZFS_EXIT(zfsvfs); 2331219089Spjd return (error); 2332219089Spjd } 2333219089Spjd 2334168404Spjd /* 2335168404Spjd * If we are not given an eof variable, 2336168404Spjd * use a local one. 2337168404Spjd */ 2338168404Spjd if (eofp == NULL) 2339168404Spjd eofp = &local_eof; 2340168404Spjd 2341168404Spjd /* 2342168404Spjd * Check for valid iov_len. 2343168404Spjd */ 2344168404Spjd if (uio->uio_iov->iov_len <= 0) { 2345168404Spjd ZFS_EXIT(zfsvfs); 2346249195Smm return (SET_ERROR(EINVAL)); 2347168404Spjd } 2348168404Spjd 2349168404Spjd /* 2350168404Spjd * Quit if directory has been removed (posix) 2351168404Spjd */ 2352168404Spjd if ((*eofp = zp->z_unlinked) != 0) { 2353168404Spjd ZFS_EXIT(zfsvfs); 2354168404Spjd return (0); 2355168404Spjd } 2356168404Spjd 2357168404Spjd error = 0; 2358168404Spjd os = zfsvfs->z_os; 2359168404Spjd offset = uio->uio_loffset; 2360168404Spjd prefetch = zp->z_zn_prefetch; 2361168404Spjd 2362168404Spjd /* 2363168404Spjd * Initialize the iterator cursor. 2364168404Spjd */ 2365168404Spjd if (offset <= 3) { 2366168404Spjd /* 2367168404Spjd * Start iteration from the beginning of the directory. 2368168404Spjd */ 2369168404Spjd zap_cursor_init(&zc, os, zp->z_id); 2370168404Spjd } else { 2371168404Spjd /* 2372168404Spjd * The offset is a serialized cursor. 2373168404Spjd */ 2374168404Spjd zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2375168404Spjd } 2376168404Spjd 2377168404Spjd /* 2378168404Spjd * Get space to change directory entries into fs independent format. 2379168404Spjd */ 2380168404Spjd iovp = uio->uio_iov; 2381168404Spjd bytes_wanted = iovp->iov_len; 2382168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2383168404Spjd bufsize = bytes_wanted; 2384168404Spjd outbuf = kmem_alloc(bufsize, KM_SLEEP); 2385168404Spjd odp = (struct dirent64 *)outbuf; 2386168404Spjd } else { 2387168404Spjd bufsize = bytes_wanted; 2388247187Smm outbuf = NULL; 2389168404Spjd odp = (struct dirent64 *)iovp->iov_base; 2390168404Spjd } 2391185029Spjd eodp = (struct edirent *)odp; 2392168404Spjd 2393169170Spjd if (ncookies != NULL) { 2394168404Spjd /* 2395168404Spjd * Minimum entry size is dirent size and 1 byte for a file name. 2396168404Spjd */ 2397168962Spjd ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2398219404Spjd cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2399219404Spjd *cookies = cooks; 2400168962Spjd *ncookies = ncooks; 2401168404Spjd } 2402185029Spjd /* 2403185029Spjd * If this VFS supports the system attribute view interface; and 2404185029Spjd * we're looking at an extended attribute directory; and we care 2405185029Spjd * about normalization conflicts on this vfs; then we must check 2406185029Spjd * for normalization conflicts with the sysattr name space. 2407185029Spjd */ 2408185029Spjd#ifdef TODO 2409185029Spjd check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2410185029Spjd (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2411185029Spjd (flags & V_RDDIR_ENTFLAGS); 2412185029Spjd#else 2413185029Spjd check_sysattrs = 0; 2414185029Spjd#endif 2415168404Spjd 2416168404Spjd /* 2417168404Spjd * Transform to file-system independent format 2418168404Spjd */ 2419168404Spjd outcount = 0; 2420168404Spjd while (outcount < bytes_wanted) { 2421168404Spjd ino64_t objnum; 2422168404Spjd ushort_t reclen; 2423219089Spjd off64_t *next = NULL; 2424168404Spjd 2425168404Spjd /* 2426168404Spjd * Special case `.', `..', and `.zfs'. 2427168404Spjd */ 2428168404Spjd if (offset == 0) { 2429168404Spjd (void) strcpy(zap.za_name, "."); 2430185029Spjd zap.za_normalization_conflict = 0; 2431168404Spjd objnum = zp->z_id; 2432169108Spjd type = DT_DIR; 2433168404Spjd } else if (offset == 1) { 2434168404Spjd (void) strcpy(zap.za_name, ".."); 2435185029Spjd zap.za_normalization_conflict = 0; 2436219089Spjd objnum = parent; 2437169108Spjd type = DT_DIR; 2438168404Spjd } else if (offset == 2 && zfs_show_ctldir(zp)) { 2439168404Spjd (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2440185029Spjd zap.za_normalization_conflict = 0; 2441168404Spjd objnum = ZFSCTL_INO_ROOT; 2442169108Spjd type = DT_DIR; 2443168404Spjd } else { 2444168404Spjd /* 2445168404Spjd * Grab next entry. 2446168404Spjd */ 2447168404Spjd if (error = zap_cursor_retrieve(&zc, &zap)) { 2448168404Spjd if ((*eofp = (error == ENOENT)) != 0) 2449168404Spjd break; 2450168404Spjd else 2451168404Spjd goto update; 2452168404Spjd } 2453168404Spjd 2454168404Spjd if (zap.za_integer_length != 8 || 2455168404Spjd zap.za_num_integers != 1) { 2456168404Spjd cmn_err(CE_WARN, "zap_readdir: bad directory " 2457168404Spjd "entry, obj = %lld, offset = %lld\n", 2458168404Spjd (u_longlong_t)zp->z_id, 2459168404Spjd (u_longlong_t)offset); 2460249195Smm error = SET_ERROR(ENXIO); 2461168404Spjd goto update; 2462168404Spjd } 2463168404Spjd 2464168404Spjd objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2465168404Spjd /* 2466168404Spjd * MacOS X can extract the object type here such as: 2467168404Spjd * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2468168404Spjd */ 2469168404Spjd type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2470185029Spjd 2471185029Spjd if (check_sysattrs && !zap.za_normalization_conflict) { 2472185029Spjd#ifdef TODO 2473185029Spjd zap.za_normalization_conflict = 2474185029Spjd xattr_sysattr_casechk(zap.za_name); 2475185029Spjd#else 2476185029Spjd panic("%s:%u: TODO", __func__, __LINE__); 2477185029Spjd#endif 2478185029Spjd } 2479168404Spjd } 2480168404Spjd 2481211932Smm if (flags & V_RDDIR_ACCFILTER) { 2482211932Smm /* 2483211932Smm * If we have no access at all, don't include 2484211932Smm * this entry in the returned information 2485211932Smm */ 2486211932Smm znode_t *ezp; 2487211932Smm if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2488211932Smm goto skip_entry; 2489211932Smm if (!zfs_has_access(ezp, cr)) { 2490303970Savg vrele(ZTOV(ezp)); 2491211932Smm goto skip_entry; 2492211932Smm } 2493303970Savg vrele(ZTOV(ezp)); 2494211932Smm } 2495211932Smm 2496185029Spjd if (flags & V_RDDIR_ENTFLAGS) 2497185029Spjd reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2498185029Spjd else 2499185029Spjd reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2500185029Spjd 2501168404Spjd /* 2502168404Spjd * Will this entry fit in the buffer? 2503168404Spjd */ 2504168404Spjd if (outcount + reclen > bufsize) { 2505168404Spjd /* 2506168404Spjd * Did we manage to fit anything in the buffer? 2507168404Spjd */ 2508168404Spjd if (!outcount) { 2509249195Smm error = SET_ERROR(EINVAL); 2510168404Spjd goto update; 2511168404Spjd } 2512168404Spjd break; 2513168404Spjd } 2514185029Spjd if (flags & V_RDDIR_ENTFLAGS) { 2515185029Spjd /* 2516185029Spjd * Add extended flag entry: 2517185029Spjd */ 2518185029Spjd eodp->ed_ino = objnum; 2519185029Spjd eodp->ed_reclen = reclen; 2520185029Spjd /* NOTE: ed_off is the offset for the *next* entry */ 2521185029Spjd next = &(eodp->ed_off); 2522185029Spjd eodp->ed_eflags = zap.za_normalization_conflict ? 2523185029Spjd ED_CASE_CONFLICT : 0; 2524185029Spjd (void) strncpy(eodp->ed_name, zap.za_name, 2525185029Spjd EDIRENT_NAMELEN(reclen)); 2526185029Spjd eodp = (edirent_t *)((intptr_t)eodp + reclen); 2527185029Spjd } else { 2528185029Spjd /* 2529185029Spjd * Add normal entry: 2530185029Spjd */ 2531185029Spjd odp->d_ino = objnum; 2532185029Spjd odp->d_reclen = reclen; 2533185029Spjd odp->d_namlen = strlen(zap.za_name); 2534185029Spjd (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2535185029Spjd odp->d_type = type; 2536185029Spjd odp = (dirent64_t *)((intptr_t)odp + reclen); 2537185029Spjd } 2538168404Spjd outcount += reclen; 2539168404Spjd 2540168404Spjd ASSERT(outcount <= bufsize); 2541168404Spjd 2542168404Spjd /* Prefetch znode */ 2543168404Spjd if (prefetch) 2544286705Smav dmu_prefetch(os, objnum, 0, 0, 0, 2545286705Smav ZIO_PRIORITY_SYNC_READ); 2546168404Spjd 2547211932Smm skip_entry: 2548168404Spjd /* 2549168404Spjd * Move to the next entry, fill in the previous offset. 2550168404Spjd */ 2551168404Spjd if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2552168404Spjd zap_cursor_advance(&zc); 2553168404Spjd offset = zap_cursor_serialize(&zc); 2554168404Spjd } else { 2555168404Spjd offset += 1; 2556168404Spjd } 2557219404Spjd 2558219404Spjd if (cooks != NULL) { 2559219404Spjd *cooks++ = offset; 2560219404Spjd ncooks--; 2561219404Spjd KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2562219404Spjd } 2563168404Spjd } 2564168404Spjd zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2565168404Spjd 2566168404Spjd /* Subtract unused cookies */ 2567168962Spjd if (ncookies != NULL) 2568168962Spjd *ncookies -= ncooks; 2569168404Spjd 2570168404Spjd if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2571168404Spjd iovp->iov_base += outcount; 2572168404Spjd iovp->iov_len -= outcount; 2573168404Spjd uio->uio_resid -= outcount; 2574168404Spjd } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2575168404Spjd /* 2576168404Spjd * Reset the pointer. 2577168404Spjd */ 2578168404Spjd offset = uio->uio_loffset; 2579168404Spjd } 2580168404Spjd 2581168404Spjdupdate: 2582168404Spjd zap_cursor_fini(&zc); 2583168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2584168404Spjd kmem_free(outbuf, bufsize); 2585168404Spjd 2586168404Spjd if (error == ENOENT) 2587168404Spjd error = 0; 2588168404Spjd 2589168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2590168404Spjd 2591168404Spjd uio->uio_loffset = offset; 2592168404Spjd ZFS_EXIT(zfsvfs); 2593169107Spjd if (error != 0 && cookies != NULL) { 2594168962Spjd free(*cookies, M_TEMP); 2595168962Spjd *cookies = NULL; 2596168962Spjd *ncookies = 0; 2597168404Spjd } 2598168404Spjd return (error); 2599168404Spjd} 2600168404Spjd 2601185029Spjdulong_t zfs_fsync_sync_cnt = 4; 2602185029Spjd 2603168404Spjdstatic int 2604185029Spjdzfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2605168404Spjd{ 2606168962Spjd znode_t *zp = VTOZ(vp); 2607168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2608168404Spjd 2609185029Spjd (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2610185029Spjd 2611219089Spjd if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 2612219089Spjd ZFS_ENTER(zfsvfs); 2613219089Spjd ZFS_VERIFY_ZP(zp); 2614219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 2615219089Spjd ZFS_EXIT(zfsvfs); 2616219089Spjd } 2617168404Spjd return (0); 2618168404Spjd} 2619168404Spjd 2620185029Spjd 2621168404Spjd/* 2622168404Spjd * Get the requested file attributes and place them in the provided 2623168404Spjd * vattr structure. 2624168404Spjd * 2625168404Spjd * IN: vp - vnode of file. 2626168404Spjd * vap - va_mask identifies requested attributes. 2627185029Spjd * If AT_XVATTR set, then optional attrs are requested 2628185029Spjd * flags - ATTR_NOACLCHECK (CIFS server context) 2629168404Spjd * cr - credentials of caller. 2630185029Spjd * ct - caller context 2631168404Spjd * 2632168404Spjd * OUT: vap - attribute values. 2633168404Spjd * 2634251631Sdelphij * RETURN: 0 (always succeeds). 2635168404Spjd */ 2636168404Spjd/* ARGSUSED */ 2637168404Spjdstatic int 2638185029Spjdzfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2639185029Spjd caller_context_t *ct) 2640168404Spjd{ 2641168962Spjd znode_t *zp = VTOZ(vp); 2642168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2643185029Spjd int error = 0; 2644168962Spjd uint32_t blksize; 2645168962Spjd u_longlong_t nblocks; 2646185029Spjd uint64_t links; 2647224251Sdelphij uint64_t mtime[2], ctime[2], crtime[2], rdev; 2648185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2649185029Spjd xoptattr_t *xoap = NULL; 2650185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2651224251Sdelphij sa_bulk_attr_t bulk[4]; 2652219089Spjd int count = 0; 2653168404Spjd 2654168404Spjd ZFS_ENTER(zfsvfs); 2655185029Spjd ZFS_VERIFY_ZP(zp); 2656168404Spjd 2657219089Spjd zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2658219089Spjd 2659219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 2660219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 2661243807Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 2662224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2663224251Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 2664224251Sdelphij &rdev, 8); 2665219089Spjd 2666219089Spjd if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 2667219089Spjd ZFS_EXIT(zfsvfs); 2668219089Spjd return (error); 2669219089Spjd } 2670219089Spjd 2671168404Spjd /* 2672185029Spjd * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2673185029Spjd * Also, if we are the owner don't bother, since owner should 2674185029Spjd * always be allowed to read basic attributes of file. 2675185029Spjd */ 2676219089Spjd if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2677219089Spjd (vap->va_uid != crgetuid(cr))) { 2678185029Spjd if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2679185029Spjd skipaclchk, cr)) { 2680185029Spjd ZFS_EXIT(zfsvfs); 2681185029Spjd return (error); 2682185029Spjd } 2683185029Spjd } 2684185029Spjd 2685185029Spjd /* 2686168404Spjd * Return all attributes. It's cheaper to provide the answer 2687168404Spjd * than to determine whether we were asked the question. 2688168404Spjd */ 2689168404Spjd 2690219089Spjd vap->va_type = IFTOVT(zp->z_mode); 2691219089Spjd vap->va_mode = zp->z_mode & ~S_IFMT; 2692277300Ssmh#ifdef illumos 2693224252Sdelphij vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2694224252Sdelphij#else 2695224252Sdelphij vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 2696224252Sdelphij#endif 2697168404Spjd vap->va_nodeid = zp->z_id; 2698185029Spjd if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2699219089Spjd links = zp->z_links + 1; 2700185029Spjd else 2701219089Spjd links = zp->z_links; 2702229425Sdim vap->va_nlink = MIN(links, LINK_MAX); /* nlink_t limit! */ 2703219089Spjd vap->va_size = zp->z_size; 2704277300Ssmh#ifdef illumos 2705224252Sdelphij vap->va_rdev = vp->v_rdev; 2706224252Sdelphij#else 2707224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2708224251Sdelphij vap->va_rdev = zfs_cmpldev(rdev); 2709224252Sdelphij#endif 2710168404Spjd vap->va_seq = zp->z_seq; 2711168404Spjd vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2712272467Saraujo vap->va_filerev = zp->z_seq; 2713168404Spjd 2714185029Spjd /* 2715185029Spjd * Add in any requested optional attributes and the create time. 2716185029Spjd * Also set the corresponding bits in the returned attribute bitmap. 2717185029Spjd */ 2718185029Spjd if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2719185029Spjd if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2720185029Spjd xoap->xoa_archive = 2721219089Spjd ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2722185029Spjd XVA_SET_RTN(xvap, XAT_ARCHIVE); 2723185029Spjd } 2724185029Spjd 2725185029Spjd if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2726185029Spjd xoap->xoa_readonly = 2727219089Spjd ((zp->z_pflags & ZFS_READONLY) != 0); 2728185029Spjd XVA_SET_RTN(xvap, XAT_READONLY); 2729185029Spjd } 2730185029Spjd 2731185029Spjd if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2732185029Spjd xoap->xoa_system = 2733219089Spjd ((zp->z_pflags & ZFS_SYSTEM) != 0); 2734185029Spjd XVA_SET_RTN(xvap, XAT_SYSTEM); 2735185029Spjd } 2736185029Spjd 2737185029Spjd if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2738185029Spjd xoap->xoa_hidden = 2739219089Spjd ((zp->z_pflags & ZFS_HIDDEN) != 0); 2740185029Spjd XVA_SET_RTN(xvap, XAT_HIDDEN); 2741185029Spjd } 2742185029Spjd 2743185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2744185029Spjd xoap->xoa_nounlink = 2745219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2746185029Spjd XVA_SET_RTN(xvap, XAT_NOUNLINK); 2747185029Spjd } 2748185029Spjd 2749185029Spjd if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2750185029Spjd xoap->xoa_immutable = 2751219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2752185029Spjd XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2753185029Spjd } 2754185029Spjd 2755185029Spjd if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2756185029Spjd xoap->xoa_appendonly = 2757219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2758185029Spjd XVA_SET_RTN(xvap, XAT_APPENDONLY); 2759185029Spjd } 2760185029Spjd 2761185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2762185029Spjd xoap->xoa_nodump = 2763219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0); 2764185029Spjd XVA_SET_RTN(xvap, XAT_NODUMP); 2765185029Spjd } 2766185029Spjd 2767185029Spjd if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2768185029Spjd xoap->xoa_opaque = 2769219089Spjd ((zp->z_pflags & ZFS_OPAQUE) != 0); 2770185029Spjd XVA_SET_RTN(xvap, XAT_OPAQUE); 2771185029Spjd } 2772185029Spjd 2773185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2774185029Spjd xoap->xoa_av_quarantined = 2775219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2776185029Spjd XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2777185029Spjd } 2778185029Spjd 2779185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2780185029Spjd xoap->xoa_av_modified = 2781219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2782185029Spjd XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2783185029Spjd } 2784185029Spjd 2785185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2786219089Spjd vp->v_type == VREG) { 2787219089Spjd zfs_sa_get_scanstamp(zp, xvap); 2788185029Spjd } 2789185029Spjd 2790219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2791219089Spjd xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 2792219089Spjd XVA_SET_RTN(xvap, XAT_REPARSE); 2793219089Spjd } 2794219089Spjd if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 2795219089Spjd xoap->xoa_generation = zp->z_gen; 2796219089Spjd XVA_SET_RTN(xvap, XAT_GEN); 2797219089Spjd } 2798219089Spjd 2799219089Spjd if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2800219089Spjd xoap->xoa_offline = 2801219089Spjd ((zp->z_pflags & ZFS_OFFLINE) != 0); 2802219089Spjd XVA_SET_RTN(xvap, XAT_OFFLINE); 2803219089Spjd } 2804219089Spjd 2805219089Spjd if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2806219089Spjd xoap->xoa_sparse = 2807219089Spjd ((zp->z_pflags & ZFS_SPARSE) != 0); 2808219089Spjd XVA_SET_RTN(xvap, XAT_SPARSE); 2809219089Spjd } 2810185029Spjd } 2811185029Spjd 2812219089Spjd ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 2813219089Spjd ZFS_TIME_DECODE(&vap->va_mtime, mtime); 2814219089Spjd ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2815219089Spjd ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 2816168404Spjd 2817168404Spjd 2818219089Spjd sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 2819168404Spjd vap->va_blksize = blksize; 2820168404Spjd vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2821168404Spjd 2822168404Spjd if (zp->z_blksz == 0) { 2823168404Spjd /* 2824168404Spjd * Block size hasn't been set; suggest maximal I/O transfers. 2825168404Spjd */ 2826168404Spjd vap->va_blksize = zfsvfs->z_max_blksz; 2827168404Spjd } 2828168404Spjd 2829168404Spjd ZFS_EXIT(zfsvfs); 2830168404Spjd return (0); 2831168404Spjd} 2832168404Spjd 2833168404Spjd/* 2834168404Spjd * Set the file attributes to the values contained in the 2835168404Spjd * vattr structure. 2836168404Spjd * 2837168404Spjd * IN: vp - vnode of file to be modified. 2838168404Spjd * vap - new attribute values. 2839185029Spjd * If AT_XVATTR set, then optional attrs are being set 2840168404Spjd * flags - ATTR_UTIME set if non-default time values provided. 2841185029Spjd * - ATTR_NOACLCHECK (CIFS context only). 2842168404Spjd * cr - credentials of caller. 2843185029Spjd * ct - caller context 2844168404Spjd * 2845251631Sdelphij * RETURN: 0 on success, error code on failure. 2846168404Spjd * 2847168404Spjd * Timestamps: 2848168404Spjd * vp - ctime updated, mtime updated if size changed. 2849168404Spjd */ 2850168404Spjd/* ARGSUSED */ 2851168404Spjdstatic int 2852168962Spjdzfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2853251631Sdelphij caller_context_t *ct) 2854168404Spjd{ 2855185029Spjd znode_t *zp = VTOZ(vp); 2856168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2857185029Spjd zilog_t *zilog; 2858168404Spjd dmu_tx_t *tx; 2859168404Spjd vattr_t oldva; 2860209962Smm xvattr_t tmpxvattr; 2861168962Spjd uint_t mask = vap->va_mask; 2862247187Smm uint_t saved_mask = 0; 2863197831Spjd uint64_t saved_mode; 2864168404Spjd int trim_mask = 0; 2865168404Spjd uint64_t new_mode; 2866209962Smm uint64_t new_uid, new_gid; 2867219089Spjd uint64_t xattr_obj; 2868219089Spjd uint64_t mtime[2], ctime[2]; 2869168404Spjd znode_t *attrzp; 2870168404Spjd int need_policy = FALSE; 2871219089Spjd int err, err2; 2872185029Spjd zfs_fuid_info_t *fuidp = NULL; 2873185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2874185029Spjd xoptattr_t *xoap; 2875219089Spjd zfs_acl_t *aclp; 2876185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2877219089Spjd boolean_t fuid_dirtied = B_FALSE; 2878219089Spjd sa_bulk_attr_t bulk[7], xattr_bulk[7]; 2879219089Spjd int count = 0, xattr_count = 0; 2880168404Spjd 2881168404Spjd if (mask == 0) 2882168404Spjd return (0); 2883168404Spjd 2884168962Spjd if (mask & AT_NOSET) 2885249195Smm return (SET_ERROR(EINVAL)); 2886168962Spjd 2887185029Spjd ZFS_ENTER(zfsvfs); 2888185029Spjd ZFS_VERIFY_ZP(zp); 2889185029Spjd 2890185029Spjd zilog = zfsvfs->z_log; 2891185029Spjd 2892185029Spjd /* 2893185029Spjd * Make sure that if we have ephemeral uid/gid or xvattr specified 2894185029Spjd * that file system is at proper version level 2895185029Spjd */ 2896185029Spjd 2897185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2898185029Spjd (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2899185029Spjd ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 2900185029Spjd (mask & AT_XVATTR))) { 2901185029Spjd ZFS_EXIT(zfsvfs); 2902249195Smm return (SET_ERROR(EINVAL)); 2903185029Spjd } 2904185029Spjd 2905185029Spjd if (mask & AT_SIZE && vp->v_type == VDIR) { 2906185029Spjd ZFS_EXIT(zfsvfs); 2907249195Smm return (SET_ERROR(EISDIR)); 2908185029Spjd } 2909168404Spjd 2910185029Spjd if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 2911185029Spjd ZFS_EXIT(zfsvfs); 2912249195Smm return (SET_ERROR(EINVAL)); 2913185029Spjd } 2914168404Spjd 2915185029Spjd /* 2916185029Spjd * If this is an xvattr_t, then get a pointer to the structure of 2917185029Spjd * optional attributes. If this is NULL, then we have a vattr_t. 2918185029Spjd */ 2919185029Spjd xoap = xva_getxoptattr(xvap); 2920168404Spjd 2921209962Smm xva_init(&tmpxvattr); 2922209962Smm 2923185029Spjd /* 2924185029Spjd * Immutable files can only alter immutable bit and atime 2925185029Spjd */ 2926219089Spjd if ((zp->z_pflags & ZFS_IMMUTABLE) && 2927185029Spjd ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 2928185029Spjd ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 2929185029Spjd ZFS_EXIT(zfsvfs); 2930249195Smm return (SET_ERROR(EPERM)); 2931185029Spjd } 2932185029Spjd 2933321579Smav /* 2934321579Smav * Note: ZFS_READONLY is handled in zfs_zaccess_common. 2935321579Smav */ 2936185029Spjd 2937185029Spjd /* 2938185029Spjd * Verify timestamps doesn't overflow 32 bits. 2939185029Spjd * ZFS can handle large timestamps, but 32bit syscalls can't 2940185029Spjd * handle times greater than 2039. This check should be removed 2941185029Spjd * once large timestamps are fully supported. 2942185029Spjd */ 2943185029Spjd if (mask & (AT_ATIME | AT_MTIME)) { 2944185029Spjd if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 2945185029Spjd ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 2946185029Spjd ZFS_EXIT(zfsvfs); 2947249195Smm return (SET_ERROR(EOVERFLOW)); 2948185029Spjd } 2949185029Spjd } 2950316391Sasomers if (xoap && (mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME) && 2951316391Sasomers TIMESPEC_OVERFLOW(&vap->va_birthtime)) { 2952316391Sasomers ZFS_EXIT(zfsvfs); 2953316391Sasomers return (SET_ERROR(EOVERFLOW)); 2954316391Sasomers } 2955185029Spjd 2956168404Spjd attrzp = NULL; 2957219089Spjd aclp = NULL; 2958168404Spjd 2959211932Smm /* Can this be moved to before the top label? */ 2960168404Spjd if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2961168404Spjd ZFS_EXIT(zfsvfs); 2962249195Smm return (SET_ERROR(EROFS)); 2963168404Spjd } 2964168404Spjd 2965168404Spjd /* 2966168404Spjd * First validate permissions 2967168404Spjd */ 2968168404Spjd 2969168404Spjd if (mask & AT_SIZE) { 2970168404Spjd /* 2971168404Spjd * XXX - Note, we are not providing any open 2972168404Spjd * mode flags here (like FNDELAY), so we may 2973168404Spjd * block if there are locks present... this 2974168404Spjd * should be addressed in openat(). 2975168404Spjd */ 2976185029Spjd /* XXX - would it be OK to generate a log record here? */ 2977185029Spjd err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 2978168404Spjd if (err) { 2979168404Spjd ZFS_EXIT(zfsvfs); 2980168404Spjd return (err); 2981168404Spjd } 2982168404Spjd } 2983168404Spjd 2984185029Spjd if (mask & (AT_ATIME|AT_MTIME) || 2985185029Spjd ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2986185029Spjd XVA_ISSET_REQ(xvap, XAT_READONLY) || 2987185029Spjd XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2988219089Spjd XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 2989219089Spjd XVA_ISSET_REQ(xvap, XAT_SPARSE) || 2990185029Spjd XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 2991219089Spjd XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 2992185029Spjd need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2993185029Spjd skipaclchk, cr); 2994219089Spjd } 2995168404Spjd 2996168404Spjd if (mask & (AT_UID|AT_GID)) { 2997168404Spjd int idmask = (mask & (AT_UID|AT_GID)); 2998168404Spjd int take_owner; 2999168404Spjd int take_group; 3000168404Spjd 3001168404Spjd /* 3002168404Spjd * NOTE: even if a new mode is being set, 3003168404Spjd * we may clear S_ISUID/S_ISGID bits. 3004168404Spjd */ 3005168404Spjd 3006168404Spjd if (!(mask & AT_MODE)) 3007219089Spjd vap->va_mode = zp->z_mode; 3008168404Spjd 3009168404Spjd /* 3010168404Spjd * Take ownership or chgrp to group we are a member of 3011168404Spjd */ 3012168404Spjd 3013168404Spjd take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 3014185029Spjd take_group = (mask & AT_GID) && 3015185029Spjd zfs_groupmember(zfsvfs, vap->va_gid, cr); 3016168404Spjd 3017168404Spjd /* 3018168404Spjd * If both AT_UID and AT_GID are set then take_owner and 3019168404Spjd * take_group must both be set in order to allow taking 3020168404Spjd * ownership. 3021168404Spjd * 3022168404Spjd * Otherwise, send the check through secpolicy_vnode_setattr() 3023168404Spjd * 3024168404Spjd */ 3025168404Spjd 3026168404Spjd if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 3027168404Spjd ((idmask == AT_UID) && take_owner) || 3028168404Spjd ((idmask == AT_GID) && take_group)) { 3029185029Spjd if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 3030185029Spjd skipaclchk, cr) == 0) { 3031168404Spjd /* 3032168404Spjd * Remove setuid/setgid for non-privileged users 3033168404Spjd */ 3034185029Spjd secpolicy_setid_clear(vap, vp, cr); 3035168404Spjd trim_mask = (mask & (AT_UID|AT_GID)); 3036168404Spjd } else { 3037168404Spjd need_policy = TRUE; 3038168404Spjd } 3039168404Spjd } else { 3040168404Spjd need_policy = TRUE; 3041168404Spjd } 3042168404Spjd } 3043168404Spjd 3044219089Spjd oldva.va_mode = zp->z_mode; 3045185029Spjd zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 3046185029Spjd if (mask & AT_XVATTR) { 3047209962Smm /* 3048209962Smm * Update xvattr mask to include only those attributes 3049209962Smm * that are actually changing. 3050209962Smm * 3051209962Smm * the bits will be restored prior to actually setting 3052209962Smm * the attributes so the caller thinks they were set. 3053209962Smm */ 3054209962Smm if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3055209962Smm if (xoap->xoa_appendonly != 3056219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 3057209962Smm need_policy = TRUE; 3058209962Smm } else { 3059209962Smm XVA_CLR_REQ(xvap, XAT_APPENDONLY); 3060209962Smm XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 3061209962Smm } 3062209962Smm } 3063209962Smm 3064209962Smm if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3065209962Smm if (xoap->xoa_nounlink != 3066219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 3067209962Smm need_policy = TRUE; 3068209962Smm } else { 3069209962Smm XVA_CLR_REQ(xvap, XAT_NOUNLINK); 3070209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 3071209962Smm } 3072209962Smm } 3073209962Smm 3074209962Smm if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3075209962Smm if (xoap->xoa_immutable != 3076219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 3077209962Smm need_policy = TRUE; 3078209962Smm } else { 3079209962Smm XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 3080209962Smm XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 3081209962Smm } 3082209962Smm } 3083209962Smm 3084209962Smm if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3085209962Smm if (xoap->xoa_nodump != 3086219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0)) { 3087209962Smm need_policy = TRUE; 3088209962Smm } else { 3089209962Smm XVA_CLR_REQ(xvap, XAT_NODUMP); 3090209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 3091209962Smm } 3092209962Smm } 3093209962Smm 3094209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3095209962Smm if (xoap->xoa_av_modified != 3096219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 3097209962Smm need_policy = TRUE; 3098209962Smm } else { 3099209962Smm XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 3100209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 3101209962Smm } 3102209962Smm } 3103209962Smm 3104209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3105209962Smm if ((vp->v_type != VREG && 3106209962Smm xoap->xoa_av_quarantined) || 3107209962Smm xoap->xoa_av_quarantined != 3108219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 3109209962Smm need_policy = TRUE; 3110209962Smm } else { 3111209962Smm XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 3112209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 3113209962Smm } 3114209962Smm } 3115209962Smm 3116219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3117219089Spjd ZFS_EXIT(zfsvfs); 3118249195Smm return (SET_ERROR(EPERM)); 3119219089Spjd } 3120219089Spjd 3121209962Smm if (need_policy == FALSE && 3122209962Smm (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3123209962Smm XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3124185029Spjd need_policy = TRUE; 3125185029Spjd } 3126185029Spjd } 3127185029Spjd 3128168404Spjd if (mask & AT_MODE) { 3129185029Spjd if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3130168962Spjd err = secpolicy_setid_setsticky_clear(vp, vap, 3131168962Spjd &oldva, cr); 3132168962Spjd if (err) { 3133168962Spjd ZFS_EXIT(zfsvfs); 3134168962Spjd return (err); 3135168962Spjd } 3136168404Spjd trim_mask |= AT_MODE; 3137168404Spjd } else { 3138168404Spjd need_policy = TRUE; 3139168404Spjd } 3140168404Spjd } 3141168404Spjd 3142168404Spjd if (need_policy) { 3143168404Spjd /* 3144168404Spjd * If trim_mask is set then take ownership 3145168404Spjd * has been granted or write_acl is present and user 3146168404Spjd * has the ability to modify mode. In that case remove 3147168404Spjd * UID|GID and or MODE from mask so that 3148168404Spjd * secpolicy_vnode_setattr() doesn't revoke it. 3149168404Spjd */ 3150168404Spjd 3151168404Spjd if (trim_mask) { 3152168404Spjd saved_mask = vap->va_mask; 3153168404Spjd vap->va_mask &= ~trim_mask; 3154197831Spjd if (trim_mask & AT_MODE) { 3155197831Spjd /* 3156197831Spjd * Save the mode, as secpolicy_vnode_setattr() 3157197831Spjd * will overwrite it with ova.va_mode. 3158197831Spjd */ 3159197831Spjd saved_mode = vap->va_mode; 3160197831Spjd } 3161168404Spjd } 3162168404Spjd err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3163185029Spjd (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3164168404Spjd if (err) { 3165168404Spjd ZFS_EXIT(zfsvfs); 3166168404Spjd return (err); 3167168404Spjd } 3168168404Spjd 3169197831Spjd if (trim_mask) { 3170168404Spjd vap->va_mask |= saved_mask; 3171197831Spjd if (trim_mask & AT_MODE) { 3172197831Spjd /* 3173197831Spjd * Recover the mode after 3174197831Spjd * secpolicy_vnode_setattr(). 3175197831Spjd */ 3176197831Spjd vap->va_mode = saved_mode; 3177197831Spjd } 3178197831Spjd } 3179168404Spjd } 3180168404Spjd 3181168404Spjd /* 3182168404Spjd * secpolicy_vnode_setattr, or take ownership may have 3183168404Spjd * changed va_mask 3184168404Spjd */ 3185168404Spjd mask = vap->va_mask; 3186168404Spjd 3187219089Spjd if ((mask & (AT_UID | AT_GID))) { 3188219089Spjd err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 3189219089Spjd &xattr_obj, sizeof (xattr_obj)); 3190168404Spjd 3191219089Spjd if (err == 0 && xattr_obj) { 3192219089Spjd err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 3193306818Savg if (err == 0) { 3194306818Savg err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE); 3195306818Savg if (err != 0) 3196306818Savg vrele(ZTOV(attrzp)); 3197306818Savg } 3198209962Smm if (err) 3199219089Spjd goto out2; 3200168404Spjd } 3201209962Smm if (mask & AT_UID) { 3202209962Smm new_uid = zfs_fuid_create(zfsvfs, 3203209962Smm (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3204219089Spjd if (new_uid != zp->z_uid && 3205219089Spjd zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 3206219089Spjd if (attrzp) 3207306818Savg vput(ZTOV(attrzp)); 3208249195Smm err = SET_ERROR(EDQUOT); 3209219089Spjd goto out2; 3210209962Smm } 3211209962Smm } 3212209962Smm 3213209962Smm if (mask & AT_GID) { 3214209962Smm new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3215209962Smm cr, ZFS_GROUP, &fuidp); 3216219089Spjd if (new_gid != zp->z_gid && 3217219089Spjd zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 3218219089Spjd if (attrzp) 3219306818Savg vput(ZTOV(attrzp)); 3220249195Smm err = SET_ERROR(EDQUOT); 3221219089Spjd goto out2; 3222209962Smm } 3223209962Smm } 3224219089Spjd } 3225219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 3226219089Spjd 3227219089Spjd if (mask & AT_MODE) { 3228219089Spjd uint64_t pmode = zp->z_mode; 3229219089Spjd uint64_t acl_obj; 3230219089Spjd new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3231219089Spjd 3232243560Smm if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 3233243560Smm !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3234249195Smm err = SET_ERROR(EPERM); 3235243560Smm goto out; 3236243560Smm } 3237243560Smm 3238224174Smm if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3239224174Smm goto out; 3240219089Spjd 3241219089Spjd if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 3242219089Spjd /* 3243219089Spjd * Are we upgrading ACL from old V0 format 3244219089Spjd * to V1 format? 3245219089Spjd */ 3246219089Spjd if (zfsvfs->z_version >= ZPL_VERSION_FUID && 3247219089Spjd zfs_znode_acl_version(zp) == 3248219089Spjd ZFS_ACL_VERSION_INITIAL) { 3249219089Spjd dmu_tx_hold_free(tx, acl_obj, 0, 3250219089Spjd DMU_OBJECT_END); 3251219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3252219089Spjd 0, aclp->z_acl_bytes); 3253209962Smm } else { 3254219089Spjd dmu_tx_hold_write(tx, acl_obj, 0, 3255219089Spjd aclp->z_acl_bytes); 3256209962Smm } 3257219089Spjd } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3258219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3259219089Spjd 0, aclp->z_acl_bytes); 3260209962Smm } 3261219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3262219089Spjd } else { 3263219089Spjd if ((mask & AT_XVATTR) && 3264219089Spjd XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3265219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3266219089Spjd else 3267219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3268168404Spjd } 3269168404Spjd 3270219089Spjd if (attrzp) { 3271219089Spjd dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3272219089Spjd } 3273219089Spjd 3274219089Spjd fuid_dirtied = zfsvfs->z_fuid_dirty; 3275219089Spjd if (fuid_dirtied) 3276219089Spjd zfs_fuid_txhold(zfsvfs, tx); 3277219089Spjd 3278219089Spjd zfs_sa_upgrade_txholds(tx, zp); 3279219089Spjd 3280258720Savg err = dmu_tx_assign(tx, TXG_WAIT); 3281258720Savg if (err) 3282209962Smm goto out; 3283168404Spjd 3284219089Spjd count = 0; 3285168404Spjd /* 3286168404Spjd * Set each attribute requested. 3287168404Spjd * We group settings according to the locks they need to acquire. 3288168404Spjd * 3289168404Spjd * Note: you cannot set ctime directly, although it will be 3290168404Spjd * updated as a side-effect of calling this function. 3291168404Spjd */ 3292168404Spjd 3293219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3294219089Spjd mutex_enter(&zp->z_acl_lock); 3295168404Spjd 3296219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3297219089Spjd &zp->z_pflags, sizeof (zp->z_pflags)); 3298219089Spjd 3299219089Spjd if (attrzp) { 3300219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3301219089Spjd mutex_enter(&attrzp->z_acl_lock); 3302219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3303219089Spjd SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3304219089Spjd sizeof (attrzp->z_pflags)); 3305219089Spjd } 3306219089Spjd 3307219089Spjd if (mask & (AT_UID|AT_GID)) { 3308219089Spjd 3309219089Spjd if (mask & AT_UID) { 3310219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 3311219089Spjd &new_uid, sizeof (new_uid)); 3312219089Spjd zp->z_uid = new_uid; 3313219089Spjd if (attrzp) { 3314219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3315219089Spjd SA_ZPL_UID(zfsvfs), NULL, &new_uid, 3316219089Spjd sizeof (new_uid)); 3317219089Spjd attrzp->z_uid = new_uid; 3318219089Spjd } 3319219089Spjd } 3320219089Spjd 3321219089Spjd if (mask & AT_GID) { 3322219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 3323219089Spjd NULL, &new_gid, sizeof (new_gid)); 3324219089Spjd zp->z_gid = new_gid; 3325219089Spjd if (attrzp) { 3326219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3327219089Spjd SA_ZPL_GID(zfsvfs), NULL, &new_gid, 3328219089Spjd sizeof (new_gid)); 3329219089Spjd attrzp->z_gid = new_gid; 3330219089Spjd } 3331219089Spjd } 3332219089Spjd if (!(mask & AT_MODE)) { 3333219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 3334219089Spjd NULL, &new_mode, sizeof (new_mode)); 3335219089Spjd new_mode = zp->z_mode; 3336219089Spjd } 3337219089Spjd err = zfs_acl_chown_setattr(zp); 3338219089Spjd ASSERT(err == 0); 3339219089Spjd if (attrzp) { 3340219089Spjd err = zfs_acl_chown_setattr(attrzp); 3341219089Spjd ASSERT(err == 0); 3342219089Spjd } 3343219089Spjd } 3344219089Spjd 3345168404Spjd if (mask & AT_MODE) { 3346219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 3347219089Spjd &new_mode, sizeof (new_mode)); 3348219089Spjd zp->z_mode = new_mode; 3349219089Spjd ASSERT3U((uintptr_t)aclp, !=, 0); 3350209962Smm err = zfs_aclset_common(zp, aclp, cr, tx); 3351240415Smm ASSERT0(err); 3352219089Spjd if (zp->z_acl_cached) 3353219089Spjd zfs_acl_free(zp->z_acl_cached); 3354211932Smm zp->z_acl_cached = aclp; 3355211932Smm aclp = NULL; 3356168404Spjd } 3357168404Spjd 3358168404Spjd 3359219089Spjd if (mask & AT_ATIME) { 3360219089Spjd ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 3361219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 3362219089Spjd &zp->z_atime, sizeof (zp->z_atime)); 3363168404Spjd } 3364168404Spjd 3365219089Spjd if (mask & AT_MTIME) { 3366219089Spjd ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 3367219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 3368219089Spjd mtime, sizeof (mtime)); 3369168404Spjd } 3370168404Spjd 3371185029Spjd /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3372219089Spjd if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3373219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3374219089Spjd NULL, mtime, sizeof (mtime)); 3375219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3376219089Spjd &ctime, sizeof (ctime)); 3377219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 3378219089Spjd B_TRUE); 3379219089Spjd } else if (mask != 0) { 3380219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3381219089Spjd &ctime, sizeof (ctime)); 3382219089Spjd zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 3383219089Spjd B_TRUE); 3384219089Spjd if (attrzp) { 3385219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3386219089Spjd SA_ZPL_CTIME(zfsvfs), NULL, 3387219089Spjd &ctime, sizeof (ctime)); 3388219089Spjd zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 3389219089Spjd mtime, ctime, B_TRUE); 3390219089Spjd } 3391219089Spjd } 3392185029Spjd /* 3393185029Spjd * Do this after setting timestamps to prevent timestamp 3394185029Spjd * update from toggling bit 3395185029Spjd */ 3396168404Spjd 3397185029Spjd if (xoap && (mask & AT_XVATTR)) { 3398209962Smm 3399316391Sasomers if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) 3400316391Sasomers xoap->xoa_createtime = vap->va_birthtime; 3401209962Smm /* 3402209962Smm * restore trimmed off masks 3403209962Smm * so that return masks can be set for caller. 3404209962Smm */ 3405209962Smm 3406209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3407209962Smm XVA_SET_REQ(xvap, XAT_APPENDONLY); 3408209962Smm } 3409209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3410209962Smm XVA_SET_REQ(xvap, XAT_NOUNLINK); 3411209962Smm } 3412209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3413209962Smm XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3414209962Smm } 3415209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3416209962Smm XVA_SET_REQ(xvap, XAT_NODUMP); 3417209962Smm } 3418209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3419209962Smm XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3420209962Smm } 3421209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3422209962Smm XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3423209962Smm } 3424209962Smm 3425219089Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3426185029Spjd ASSERT(vp->v_type == VREG); 3427185029Spjd 3428219089Spjd zfs_xvattr_set(zp, xvap, tx); 3429185029Spjd } 3430185029Spjd 3431209962Smm if (fuid_dirtied) 3432209962Smm zfs_fuid_sync(zfsvfs, tx); 3433209962Smm 3434168404Spjd if (mask != 0) 3435185029Spjd zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3436168404Spjd 3437219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3438219089Spjd mutex_exit(&zp->z_acl_lock); 3439168404Spjd 3440219089Spjd if (attrzp) { 3441219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3442219089Spjd mutex_exit(&attrzp->z_acl_lock); 3443219089Spjd } 3444209962Smmout: 3445219089Spjd if (err == 0 && attrzp) { 3446219089Spjd err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 3447219089Spjd xattr_count, tx); 3448219089Spjd ASSERT(err2 == 0); 3449219089Spjd } 3450219089Spjd 3451168404Spjd if (attrzp) 3452306818Savg vput(ZTOV(attrzp)); 3453251631Sdelphij 3454211932Smm if (aclp) 3455209962Smm zfs_acl_free(aclp); 3456168404Spjd 3457209962Smm if (fuidp) { 3458209962Smm zfs_fuid_info_free(fuidp); 3459209962Smm fuidp = NULL; 3460209962Smm } 3461209962Smm 3462219089Spjd if (err) { 3463209962Smm dmu_tx_abort(tx); 3464219089Spjd } else { 3465219089Spjd err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 3466209962Smm dmu_tx_commit(tx); 3467219089Spjd } 3468209962Smm 3469219089Spjdout2: 3470219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3471219089Spjd zil_commit(zilog, 0); 3472209962Smm 3473168404Spjd ZFS_EXIT(zfsvfs); 3474168404Spjd return (err); 3475168404Spjd} 3476168404Spjd 3477168404Spjd/* 3478303970Savg * We acquire all but fdvp locks using non-blocking acquisitions. If we 3479303970Savg * fail to acquire any lock in the path we will drop all held locks, 3480303970Savg * acquire the new lock in a blocking fashion, and then release it and 3481303970Savg * restart the rename. This acquire/release step ensures that we do not 3482303970Savg * spin on a lock waiting for release. On error release all vnode locks 3483303970Savg * and decrement references the way tmpfs_rename() would do. 3484168404Spjd */ 3485303970Savgstatic int 3486303970Savgzfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, 3487303970Savg struct vnode *tdvp, struct vnode **tvpp, 3488303970Savg const struct componentname *scnp, const struct componentname *tcnp) 3489168404Spjd{ 3490303970Savg zfsvfs_t *zfsvfs; 3491303970Savg struct vnode *nvp, *svp, *tvp; 3492303970Savg znode_t *sdzp, *tdzp, *szp, *tzp; 3493303970Savg const char *snm = scnp->cn_nameptr; 3494303970Savg const char *tnm = tcnp->cn_nameptr; 3495303970Savg int error; 3496168404Spjd 3497303970Savg VOP_UNLOCK(tdvp, 0); 3498303970Savg if (*tvpp != NULL && *tvpp != tdvp) 3499303970Savg VOP_UNLOCK(*tvpp, 0); 3500303970Savg 3501303970Savgrelock: 3502303970Savg error = vn_lock(sdvp, LK_EXCLUSIVE); 3503303970Savg if (error) 3504303970Savg goto out; 3505303970Savg sdzp = VTOZ(sdvp); 3506303970Savg 3507303970Savg error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT); 3508303970Savg if (error != 0) { 3509303970Savg VOP_UNLOCK(sdvp, 0); 3510303970Savg if (error != EBUSY) 3511303970Savg goto out; 3512303970Savg error = vn_lock(tdvp, LK_EXCLUSIVE); 3513303970Savg if (error) 3514303970Savg goto out; 3515303970Savg VOP_UNLOCK(tdvp, 0); 3516303970Savg goto relock; 3517168404Spjd } 3518303970Savg tdzp = VTOZ(tdvp); 3519168404Spjd 3520303970Savg /* 3521303970Savg * Before using sdzp and tdzp we must ensure that they are live. 3522303970Savg * As a porting legacy from illumos we have two things to worry 3523303970Savg * about. One is typical for FreeBSD and it is that the vnode is 3524303970Savg * not reclaimed (doomed). The other is that the znode is live. 3525303970Savg * The current code can invalidate the znode without acquiring the 3526303970Savg * corresponding vnode lock if the object represented by the znode 3527303970Savg * and vnode is no longer valid after a rollback or receive operation. 3528303970Savg * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock 3529303970Savg * that protects the znodes from the invalidation. 3530303970Savg */ 3531303970Savg zfsvfs = sdzp->z_zfsvfs; 3532303970Savg ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs); 3533303970Savg ZFS_ENTER(zfsvfs); 3534168404Spjd 3535168404Spjd /* 3536303970Savg * We can not use ZFS_VERIFY_ZP() here because it could directly return 3537303970Savg * bypassing the cleanup code in the case of an error. 3538168404Spjd */ 3539303970Savg if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 3540303970Savg ZFS_EXIT(zfsvfs); 3541303970Savg VOP_UNLOCK(sdvp, 0); 3542303970Savg VOP_UNLOCK(tdvp, 0); 3543303970Savg error = SET_ERROR(EIO); 3544303970Savg goto out; 3545303970Savg } 3546303970Savg 3547303970Savg /* 3548303970Savg * Re-resolve svp to be certain it still exists and fetch the 3549303970Savg * correct vnode. 3550303970Savg */ 3551303970Savg error = zfs_dirent_lookup(sdzp, snm, &szp, ZEXISTS); 3552303970Savg if (error != 0) { 3553303970Savg /* Source entry invalid or not there. */ 3554303970Savg ZFS_EXIT(zfsvfs); 3555303970Savg VOP_UNLOCK(sdvp, 0); 3556303970Savg VOP_UNLOCK(tdvp, 0); 3557303970Savg if ((scnp->cn_flags & ISDOTDOT) != 0 || 3558303970Savg (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.')) 3559303970Savg error = SET_ERROR(EINVAL); 3560303970Savg goto out; 3561303970Savg } 3562303970Savg svp = ZTOV(szp); 3563303970Savg 3564303970Savg /* 3565303970Savg * Re-resolve tvp, if it disappeared we just carry on. 3566303970Savg */ 3567303970Savg error = zfs_dirent_lookup(tdzp, tnm, &tzp, 0); 3568303970Savg if (error != 0) { 3569303970Savg ZFS_EXIT(zfsvfs); 3570303970Savg VOP_UNLOCK(sdvp, 0); 3571303970Savg VOP_UNLOCK(tdvp, 0); 3572303970Savg vrele(svp); 3573303970Savg if ((tcnp->cn_flags & ISDOTDOT) != 0) 3574303970Savg error = SET_ERROR(EINVAL); 3575303970Savg goto out; 3576303970Savg } 3577303970Savg if (tzp != NULL) 3578303970Savg tvp = ZTOV(tzp); 3579303970Savg else 3580303970Savg tvp = NULL; 3581303970Savg 3582303970Savg /* 3583303970Savg * At present the vnode locks must be acquired before z_teardown_lock, 3584303970Savg * although it would be more logical to use the opposite order. 3585303970Savg */ 3586303970Savg ZFS_EXIT(zfsvfs); 3587303970Savg 3588303970Savg /* 3589303970Savg * Now try acquire locks on svp and tvp. 3590303970Savg */ 3591303970Savg nvp = svp; 3592303970Savg error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3593303970Savg if (error != 0) { 3594303970Savg VOP_UNLOCK(sdvp, 0); 3595303970Savg VOP_UNLOCK(tdvp, 0); 3596303970Savg if (tvp != NULL) 3597303970Savg vrele(tvp); 3598303970Savg if (error != EBUSY) { 3599303970Savg vrele(nvp); 3600303970Savg goto out; 3601303970Savg } 3602303970Savg error = vn_lock(nvp, LK_EXCLUSIVE); 3603303970Savg if (error != 0) { 3604303970Savg vrele(nvp); 3605303970Savg goto out; 3606303970Savg } 3607303970Savg VOP_UNLOCK(nvp, 0); 3608303970Savg /* 3609303970Savg * Concurrent rename race. 3610303970Savg * XXX ? 3611303970Savg */ 3612303970Savg if (nvp == tdvp) { 3613303970Savg vrele(nvp); 3614303970Savg error = SET_ERROR(EINVAL); 3615303970Savg goto out; 3616303970Savg } 3617303970Savg vrele(*svpp); 3618303970Savg *svpp = nvp; 3619303970Savg goto relock; 3620303970Savg } 3621303970Savg vrele(*svpp); 3622303970Savg *svpp = nvp; 3623303970Savg 3624303970Savg if (*tvpp != NULL) 3625303970Savg vrele(*tvpp); 3626303970Savg *tvpp = NULL; 3627303970Savg if (tvp != NULL) { 3628303970Savg nvp = tvp; 3629303970Savg error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3630303970Savg if (error != 0) { 3631303970Savg VOP_UNLOCK(sdvp, 0); 3632303970Savg VOP_UNLOCK(tdvp, 0); 3633303970Savg VOP_UNLOCK(*svpp, 0); 3634303970Savg if (error != EBUSY) { 3635303970Savg vrele(nvp); 3636303970Savg goto out; 3637168404Spjd } 3638303970Savg error = vn_lock(nvp, LK_EXCLUSIVE); 3639303970Savg if (error != 0) { 3640303970Savg vrele(nvp); 3641303970Savg goto out; 3642303970Savg } 3643303970Savg vput(nvp); 3644303970Savg goto relock; 3645168404Spjd } 3646303970Savg *tvpp = nvp; 3647303970Savg } 3648168404Spjd 3649303970Savg return (0); 3650168404Spjd 3651303970Savgout: 3652303970Savg return (error); 3653303970Savg} 3654168404Spjd 3655303970Savg/* 3656303970Savg * Note that we must use VRELE_ASYNC in this function as it walks 3657303970Savg * up the directory tree and vrele may need to acquire an exclusive 3658303970Savg * lock if a last reference to a vnode is dropped. 3659303970Savg */ 3660303970Savgstatic int 3661303970Savgzfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp) 3662303970Savg{ 3663303970Savg zfsvfs_t *zfsvfs; 3664303970Savg znode_t *zp, *zp1; 3665303970Savg uint64_t parent; 3666303970Savg int error; 3667168404Spjd 3668303970Savg zfsvfs = tdzp->z_zfsvfs; 3669303970Savg if (tdzp == szp) 3670303970Savg return (SET_ERROR(EINVAL)); 3671303970Savg if (tdzp == sdzp) 3672303970Savg return (0); 3673303970Savg if (tdzp->z_id == zfsvfs->z_root) 3674303970Savg return (0); 3675303970Savg zp = tdzp; 3676303970Savg for (;;) { 3677303970Savg ASSERT(!zp->z_unlinked); 3678303970Savg if ((error = sa_lookup(zp->z_sa_hdl, 3679303970Savg SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 3680303970Savg break; 3681303970Savg 3682303970Savg if (parent == szp->z_id) { 3683303970Savg error = SET_ERROR(EINVAL); 3684303970Savg break; 3685168404Spjd } 3686303970Savg if (parent == zfsvfs->z_root) 3687303970Savg break; 3688303970Savg if (parent == sdzp->z_id) 3689303970Savg break; 3690168404Spjd 3691303970Savg error = zfs_zget(zfsvfs, parent, &zp1); 3692303970Savg if (error != 0) 3693303970Savg break; 3694168404Spjd 3695303970Savg if (zp != tdzp) 3696303970Savg VN_RELE_ASYNC(ZTOV(zp), 3697303970Savg dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 3698303970Savg zp = zp1; 3699303970Savg } 3700303970Savg 3701303970Savg if (error == ENOTDIR) 3702303970Savg panic("checkpath: .. not a directory\n"); 3703303970Savg if (zp != tdzp) 3704303970Savg VN_RELE_ASYNC(ZTOV(zp), 3705303970Savg dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 3706303970Savg return (error); 3707168404Spjd} 3708168404Spjd 3709168404Spjd/* 3710168404Spjd * Move an entry from the provided source directory to the target 3711168404Spjd * directory. Change the entry name as indicated. 3712168404Spjd * 3713168404Spjd * IN: sdvp - Source directory containing the "old entry". 3714168404Spjd * snm - Old entry name. 3715168404Spjd * tdvp - Target directory to contain the "new entry". 3716168404Spjd * tnm - New entry name. 3717168404Spjd * cr - credentials of caller. 3718185029Spjd * ct - caller context 3719185029Spjd * flags - case flags 3720168404Spjd * 3721251631Sdelphij * RETURN: 0 on success, error code on failure. 3722168404Spjd * 3723168404Spjd * Timestamps: 3724168404Spjd * sdvp,tdvp - ctime|mtime updated 3725168404Spjd */ 3726185029Spjd/*ARGSUSED*/ 3727168404Spjdstatic int 3728303970Savgzfs_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 3729303970Savg vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 3730303970Savg cred_t *cr) 3731168404Spjd{ 3732303970Savg zfsvfs_t *zfsvfs; 3733303970Savg znode_t *sdzp, *tdzp, *szp, *tzp; 3734303970Savg zilog_t *zilog = NULL; 3735168404Spjd dmu_tx_t *tx; 3736303970Savg char *snm = scnp->cn_nameptr; 3737303970Savg char *tnm = tcnp->cn_nameptr; 3738185029Spjd int error = 0; 3739168404Spjd 3740303970Savg /* Reject renames across filesystems. */ 3741303970Savg if ((*svpp)->v_mount != tdvp->v_mount || 3742303970Savg ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) { 3743303970Savg error = SET_ERROR(EXDEV); 3744303970Savg goto out; 3745303970Savg } 3746168404Spjd 3747303970Savg if (zfsctl_is_node(tdvp)) { 3748303970Savg error = SET_ERROR(EXDEV); 3749303970Savg goto out; 3750303970Savg } 3751303970Savg 3752168962Spjd /* 3753303970Savg * Lock all four vnodes to ensure safety and semantics of renaming. 3754168962Spjd */ 3755303970Savg error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp); 3756303970Savg if (error != 0) { 3757303970Savg /* no vnodes are locked in the case of error here */ 3758303970Savg return (error); 3759264392Sdavide } 3760168962Spjd 3761303970Savg tdzp = VTOZ(tdvp); 3762303970Savg sdzp = VTOZ(sdvp); 3763303970Savg zfsvfs = tdzp->z_zfsvfs; 3764303970Savg zilog = zfsvfs->z_log; 3765303970Savg 3766254585Sdelphij /* 3767303970Savg * After we re-enter ZFS_ENTER() we will have to revalidate all 3768303970Savg * znodes involved. 3769254585Sdelphij */ 3770303970Savg ZFS_ENTER(zfsvfs); 3771168404Spjd 3772185029Spjd if (zfsvfs->z_utf8 && u8_validate(tnm, 3773185029Spjd strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3774303970Savg error = SET_ERROR(EILSEQ); 3775303970Savg goto unlockout; 3776185029Spjd } 3777185029Spjd 3778303970Savg /* If source and target are the same file, there is nothing to do. */ 3779303970Savg if ((*svpp) == (*tvpp)) { 3780303970Savg error = 0; 3781303970Savg goto unlockout; 3782303970Savg } 3783185029Spjd 3784303970Savg if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) || 3785303970Savg ((*tvpp) != NULL && (*tvpp)->v_type == VDIR && 3786303970Savg (*tvpp)->v_mountedhere != NULL)) { 3787303970Savg error = SET_ERROR(EXDEV); 3788303970Savg goto unlockout; 3789303970Savg } 3790168404Spjd 3791168404Spjd /* 3792303970Savg * We can not use ZFS_VERIFY_ZP() here because it could directly return 3793303970Savg * bypassing the cleanup code in the case of an error. 3794168404Spjd */ 3795303970Savg if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 3796303970Savg error = SET_ERROR(EIO); 3797303970Savg goto unlockout; 3798168404Spjd } 3799168404Spjd 3800303970Savg szp = VTOZ(*svpp); 3801303970Savg tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp); 3802303970Savg if (szp->z_sa_hdl == NULL || (tzp != NULL && tzp->z_sa_hdl == NULL)) { 3803303970Savg error = SET_ERROR(EIO); 3804303970Savg goto unlockout; 3805168962Spjd } 3806185029Spjd 3807208131Smm /* 3808303970Savg * This is to prevent the creation of links into attribute space 3809303970Savg * by renaming a linked file into/outof an attribute directory. 3810303970Savg * See the comment in zfs_link() for why this is considered bad. 3811208131Smm */ 3812303970Savg if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3813303970Savg error = SET_ERROR(EINVAL); 3814303970Savg goto unlockout; 3815208131Smm } 3816208131Smm 3817168404Spjd /* 3818168404Spjd * Must have write access at the source to remove the old entry 3819168404Spjd * and write access at the target to create the new entry. 3820168404Spjd * Note that if target and source are the same, this can be 3821168404Spjd * done in a single check. 3822168404Spjd */ 3823168404Spjd if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3824303970Savg goto unlockout; 3825168404Spjd 3826303970Savg if ((*svpp)->v_type == VDIR) { 3827168404Spjd /* 3828303970Savg * Avoid ".", "..", and aliases of "." for obvious reasons. 3829303970Savg */ 3830303970Savg if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') || 3831303970Savg sdzp == szp || 3832303970Savg (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { 3833303970Savg error = EINVAL; 3834303970Savg goto unlockout; 3835303970Savg } 3836303970Savg 3837303970Savg /* 3838168404Spjd * Check to make sure rename is valid. 3839168404Spjd * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3840168404Spjd */ 3841303970Savg if (error = zfs_rename_check(szp, sdzp, tdzp)) 3842303970Savg goto unlockout; 3843168404Spjd } 3844168404Spjd 3845168404Spjd /* 3846168404Spjd * Does target exist? 3847168404Spjd */ 3848168404Spjd if (tzp) { 3849168404Spjd /* 3850168404Spjd * Source and target must be the same type. 3851168404Spjd */ 3852303970Savg if ((*svpp)->v_type == VDIR) { 3853303970Savg if ((*tvpp)->v_type != VDIR) { 3854249195Smm error = SET_ERROR(ENOTDIR); 3855303970Savg goto unlockout; 3856303970Savg } else { 3857303970Savg cache_purge(tdvp); 3858303970Savg if (sdvp != tdvp) 3859303970Savg cache_purge(sdvp); 3860168404Spjd } 3861168404Spjd } else { 3862303970Savg if ((*tvpp)->v_type == VDIR) { 3863249195Smm error = SET_ERROR(EISDIR); 3864303970Savg goto unlockout; 3865168404Spjd } 3866168404Spjd } 3867168404Spjd } 3868168404Spjd 3869303970Savg vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct); 3870168962Spjd if (tzp) 3871303970Savg vnevent_rename_dest(*tvpp, tdvp, tnm, ct); 3872168962Spjd 3873185029Spjd /* 3874185029Spjd * notify the target directory if it is not the same 3875185029Spjd * as source directory. 3876185029Spjd */ 3877185029Spjd if (tdvp != sdvp) { 3878185029Spjd vnevent_rename_dest_dir(tdvp, ct); 3879185029Spjd } 3880185029Spjd 3881168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 3882219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3883219089Spjd dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3884168404Spjd dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3885168404Spjd dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3886219089Spjd if (sdzp != tdzp) { 3887219089Spjd dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 3888219089Spjd zfs_sa_upgrade_txholds(tx, tdzp); 3889219089Spjd } 3890219089Spjd if (tzp) { 3891219089Spjd dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 3892219089Spjd zfs_sa_upgrade_txholds(tx, tzp); 3893219089Spjd } 3894219089Spjd 3895219089Spjd zfs_sa_upgrade_txholds(tx, szp); 3896168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3897303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 3898168404Spjd if (error) { 3899168404Spjd dmu_tx_abort(tx); 3900303970Savg goto unlockout; 3901168404Spjd } 3902168404Spjd 3903303970Savg 3904168404Spjd if (tzp) /* Attempt to remove the existing target */ 3905303970Savg error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL); 3906168404Spjd 3907168404Spjd if (error == 0) { 3908303970Savg error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING); 3909168404Spjd if (error == 0) { 3910219089Spjd szp->z_pflags |= ZFS_AV_MODIFIED; 3911185029Spjd 3912219089Spjd error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 3913219089Spjd (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3914240415Smm ASSERT0(error); 3915219089Spjd 3916303970Savg error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING, 3917303970Savg NULL); 3918219089Spjd if (error == 0) { 3919303970Savg zfs_log_rename(zilog, tx, TX_RENAME, sdzp, 3920303970Savg snm, tdzp, tnm, szp); 3921185029Spjd 3922219089Spjd /* 3923219089Spjd * Update path information for the target vnode 3924219089Spjd */ 3925303970Savg vn_renamepath(tdvp, *svpp, tnm, strlen(tnm)); 3926219089Spjd } else { 3927219089Spjd /* 3928219089Spjd * At this point, we have successfully created 3929219089Spjd * the target name, but have failed to remove 3930219089Spjd * the source name. Since the create was done 3931219089Spjd * with the ZRENAMING flag, there are 3932219089Spjd * complications; for one, the link count is 3933219089Spjd * wrong. The easiest way to deal with this 3934219089Spjd * is to remove the newly created target, and 3935219089Spjd * return the original error. This must 3936219089Spjd * succeed; fortunately, it is very unlikely to 3937219089Spjd * fail, since we just created it. 3938219089Spjd */ 3939303970Savg VERIFY3U(zfs_link_destroy(tdzp, tnm, szp, tx, 3940219089Spjd ZRENAMING, NULL), ==, 0); 3941219089Spjd } 3942168404Spjd } 3943168404Spjd if (error == 0) { 3944303970Savg cache_purge(*svpp); 3945303970Savg if (*tvpp != NULL) 3946303970Savg cache_purge(*tvpp); 3947303970Savg cache_purge_negative(tdvp); 3948168404Spjd } 3949168404Spjd } 3950168404Spjd 3951168404Spjd dmu_tx_commit(tx); 3952168404Spjd 3953303970Savgunlockout: /* all 4 vnodes are locked, ZFS_ENTER called */ 3954303970Savg ZFS_EXIT(zfsvfs); 3955303970Savg VOP_UNLOCK(*svpp, 0); 3956303970Savg VOP_UNLOCK(sdvp, 0); 3957168404Spjd 3958303970Savgout: /* original two vnodes are locked */ 3959303970Savg if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3960219089Spjd zil_commit(zilog, 0); 3961219089Spjd 3962303970Savg if (*tvpp != NULL) 3963303970Savg VOP_UNLOCK(*tvpp, 0); 3964303970Savg if (tdvp != *tvpp) 3965303970Savg VOP_UNLOCK(tdvp, 0); 3966168404Spjd return (error); 3967168404Spjd} 3968168404Spjd 3969168404Spjd/* 3970168404Spjd * Insert the indicated symbolic reference entry into the directory. 3971168404Spjd * 3972168404Spjd * IN: dvp - Directory to contain new symbolic link. 3973168404Spjd * link - Name for new symlink entry. 3974168404Spjd * vap - Attributes of new entry. 3975168404Spjd * cr - credentials of caller. 3976185029Spjd * ct - caller context 3977185029Spjd * flags - case flags 3978168404Spjd * 3979251631Sdelphij * RETURN: 0 on success, error code on failure. 3980168404Spjd * 3981168404Spjd * Timestamps: 3982168404Spjd * dvp - ctime|mtime updated 3983168404Spjd */ 3984185029Spjd/*ARGSUSED*/ 3985168404Spjdstatic int 3986185029Spjdzfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 3987185029Spjd cred_t *cr, kthread_t *td) 3988168404Spjd{ 3989168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 3990168404Spjd dmu_tx_t *tx; 3991168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3992185029Spjd zilog_t *zilog; 3993219089Spjd uint64_t len = strlen(link); 3994168404Spjd int error; 3995209962Smm zfs_acl_ids_t acl_ids; 3996209962Smm boolean_t fuid_dirtied; 3997219089Spjd uint64_t txtype = TX_SYMLINK; 3998185029Spjd int flags = 0; 3999168404Spjd 4000168962Spjd ASSERT(vap->va_type == VLNK); 4001168404Spjd 4002168404Spjd ZFS_ENTER(zfsvfs); 4003185029Spjd ZFS_VERIFY_ZP(dzp); 4004185029Spjd zilog = zfsvfs->z_log; 4005185029Spjd 4006185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 4007185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4008185029Spjd ZFS_EXIT(zfsvfs); 4009249195Smm return (SET_ERROR(EILSEQ)); 4010185029Spjd } 4011168404Spjd 4012168404Spjd if (len > MAXPATHLEN) { 4013168404Spjd ZFS_EXIT(zfsvfs); 4014249195Smm return (SET_ERROR(ENAMETOOLONG)); 4015168404Spjd } 4016168404Spjd 4017219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, 4018219089Spjd vap, cr, NULL, &acl_ids)) != 0) { 4019219089Spjd ZFS_EXIT(zfsvfs); 4020219089Spjd return (error); 4021219089Spjd } 4022260704Savg 4023168404Spjd /* 4024168404Spjd * Attempt to lock directory; fail if entry already exists. 4025168404Spjd */ 4026303970Savg error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 4027185029Spjd if (error) { 4028219089Spjd zfs_acl_ids_free(&acl_ids); 4029168404Spjd ZFS_EXIT(zfsvfs); 4030168404Spjd return (error); 4031168404Spjd } 4032168404Spjd 4033219089Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4034219089Spjd zfs_acl_ids_free(&acl_ids); 4035219089Spjd ZFS_EXIT(zfsvfs); 4036219089Spjd return (error); 4037219089Spjd } 4038219089Spjd 4039209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 4040209962Smm zfs_acl_ids_free(&acl_ids); 4041209962Smm ZFS_EXIT(zfsvfs); 4042249195Smm return (SET_ERROR(EDQUOT)); 4043209962Smm } 4044303970Savg 4045303970Savg getnewvnode_reserve(1); 4046168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4047209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 4048168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 4049168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4050219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 4051219089Spjd ZFS_SA_BASE_ATTR_SIZE + len); 4052219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 4053219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 4054219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 4055219089Spjd acl_ids.z_aclp->z_acl_bytes); 4056219089Spjd } 4057209962Smm if (fuid_dirtied) 4058209962Smm zfs_fuid_txhold(zfsvfs, tx); 4059303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 4060168404Spjd if (error) { 4061219089Spjd zfs_acl_ids_free(&acl_ids); 4062168404Spjd dmu_tx_abort(tx); 4063260704Savg getnewvnode_drop_reserve(); 4064168404Spjd ZFS_EXIT(zfsvfs); 4065168404Spjd return (error); 4066168404Spjd } 4067168404Spjd 4068168404Spjd /* 4069168404Spjd * Create a new object for the symlink. 4070219089Spjd * for version 4 ZPL datsets the symlink will be an SA attribute 4071168404Spjd */ 4072219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 4073168404Spjd 4074219089Spjd if (fuid_dirtied) 4075219089Spjd zfs_fuid_sync(zfsvfs, tx); 4076209962Smm 4077219089Spjd if (zp->z_is_sa) 4078219089Spjd error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 4079219089Spjd link, len, tx); 4080219089Spjd else 4081219089Spjd zfs_sa_symlink(zp, link, len, tx); 4082168404Spjd 4083219089Spjd zp->z_size = len; 4084219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 4085219089Spjd &zp->z_size, sizeof (zp->z_size), tx); 4086168404Spjd /* 4087168404Spjd * Insert the new object into the directory. 4088168404Spjd */ 4089303970Savg (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 4090168404Spjd 4091219089Spjd zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 4092219089Spjd *vpp = ZTOV(zp); 4093219089Spjd 4094209962Smm zfs_acl_ids_free(&acl_ids); 4095209962Smm 4096168404Spjd dmu_tx_commit(tx); 4097168404Spjd 4098260704Savg getnewvnode_drop_reserve(); 4099260704Savg 4100219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4101219089Spjd zil_commit(zilog, 0); 4102219089Spjd 4103168404Spjd ZFS_EXIT(zfsvfs); 4104168404Spjd return (error); 4105168404Spjd} 4106168404Spjd 4107168404Spjd/* 4108168404Spjd * Return, in the buffer contained in the provided uio structure, 4109168404Spjd * the symbolic path referred to by vp. 4110168404Spjd * 4111168404Spjd * IN: vp - vnode of symbolic link. 4112251631Sdelphij * uio - structure to contain the link path. 4113168404Spjd * cr - credentials of caller. 4114185029Spjd * ct - caller context 4115168404Spjd * 4116251631Sdelphij * OUT: uio - structure containing the link path. 4117168404Spjd * 4118251631Sdelphij * RETURN: 0 on success, error code on failure. 4119168404Spjd * 4120168404Spjd * Timestamps: 4121168404Spjd * vp - atime updated 4122168404Spjd */ 4123168404Spjd/* ARGSUSED */ 4124168404Spjdstatic int 4125185029Spjdzfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 4126168404Spjd{ 4127168404Spjd znode_t *zp = VTOZ(vp); 4128168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4129168404Spjd int error; 4130168404Spjd 4131168404Spjd ZFS_ENTER(zfsvfs); 4132185029Spjd ZFS_VERIFY_ZP(zp); 4133168404Spjd 4134219089Spjd if (zp->z_is_sa) 4135219089Spjd error = sa_lookup_uio(zp->z_sa_hdl, 4136219089Spjd SA_ZPL_SYMLINK(zfsvfs), uio); 4137219089Spjd else 4138219089Spjd error = zfs_sa_readlink(zp, uio); 4139168404Spjd 4140168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4141219089Spjd 4142168404Spjd ZFS_EXIT(zfsvfs); 4143168404Spjd return (error); 4144168404Spjd} 4145168404Spjd 4146168404Spjd/* 4147168404Spjd * Insert a new entry into directory tdvp referencing svp. 4148168404Spjd * 4149168404Spjd * IN: tdvp - Directory to contain new entry. 4150168404Spjd * svp - vnode of new entry. 4151168404Spjd * name - name of new entry. 4152168404Spjd * cr - credentials of caller. 4153185029Spjd * ct - caller context 4154168404Spjd * 4155251631Sdelphij * RETURN: 0 on success, error code on failure. 4156168404Spjd * 4157168404Spjd * Timestamps: 4158168404Spjd * tdvp - ctime|mtime updated 4159168404Spjd * svp - ctime updated 4160168404Spjd */ 4161168404Spjd/* ARGSUSED */ 4162168404Spjdstatic int 4163185029Spjdzfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4164185029Spjd caller_context_t *ct, int flags) 4165168404Spjd{ 4166168404Spjd znode_t *dzp = VTOZ(tdvp); 4167168404Spjd znode_t *tzp, *szp; 4168168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4169185029Spjd zilog_t *zilog; 4170168404Spjd dmu_tx_t *tx; 4171168404Spjd int error; 4172212694Smm uint64_t parent; 4173185029Spjd uid_t owner; 4174168404Spjd 4175168404Spjd ASSERT(tdvp->v_type == VDIR); 4176168404Spjd 4177168404Spjd ZFS_ENTER(zfsvfs); 4178185029Spjd ZFS_VERIFY_ZP(dzp); 4179185029Spjd zilog = zfsvfs->z_log; 4180168404Spjd 4181212694Smm /* 4182212694Smm * POSIX dictates that we return EPERM here. 4183212694Smm * Better choices include ENOTSUP or EISDIR. 4184212694Smm */ 4185212694Smm if (svp->v_type == VDIR) { 4186168404Spjd ZFS_EXIT(zfsvfs); 4187249195Smm return (SET_ERROR(EPERM)); 4188212694Smm } 4189212694Smm 4190254585Sdelphij szp = VTOZ(svp); 4191254585Sdelphij ZFS_VERIFY_ZP(szp); 4192254585Sdelphij 4193258597Spjd if (szp->z_pflags & (ZFS_APPENDONLY | ZFS_IMMUTABLE | ZFS_READONLY)) { 4194258597Spjd ZFS_EXIT(zfsvfs); 4195258597Spjd return (SET_ERROR(EPERM)); 4196258597Spjd } 4197258597Spjd 4198212694Smm /* Prevent links to .zfs/shares files */ 4199212694Smm 4200219089Spjd if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4201219089Spjd &parent, sizeof (uint64_t))) != 0) { 4202212694Smm ZFS_EXIT(zfsvfs); 4203219089Spjd return (error); 4204219089Spjd } 4205219089Spjd if (parent == zfsvfs->z_shares_dir) { 4206219089Spjd ZFS_EXIT(zfsvfs); 4207249195Smm return (SET_ERROR(EPERM)); 4208212694Smm } 4209212694Smm 4210185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, 4211185029Spjd strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4212185029Spjd ZFS_EXIT(zfsvfs); 4213249195Smm return (SET_ERROR(EILSEQ)); 4214185029Spjd } 4215185029Spjd 4216168404Spjd /* 4217168404Spjd * We do not support links between attributes and non-attributes 4218168404Spjd * because of the potential security risk of creating links 4219168404Spjd * into "normal" file space in order to circumvent restrictions 4220168404Spjd * imposed in attribute space. 4221168404Spjd */ 4222219089Spjd if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4223168404Spjd ZFS_EXIT(zfsvfs); 4224249195Smm return (SET_ERROR(EINVAL)); 4225168404Spjd } 4226168404Spjd 4227168404Spjd 4228219089Spjd owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4229219089Spjd if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { 4230168404Spjd ZFS_EXIT(zfsvfs); 4231249195Smm return (SET_ERROR(EPERM)); 4232168404Spjd } 4233168404Spjd 4234185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4235168404Spjd ZFS_EXIT(zfsvfs); 4236168404Spjd return (error); 4237168404Spjd } 4238168404Spjd 4239168404Spjd /* 4240168404Spjd * Attempt to lock directory; fail if entry already exists. 4241168404Spjd */ 4242303970Savg error = zfs_dirent_lookup(dzp, name, &tzp, ZNEW); 4243185029Spjd if (error) { 4244168404Spjd ZFS_EXIT(zfsvfs); 4245168404Spjd return (error); 4246168404Spjd } 4247168404Spjd 4248168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4249219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4250168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4251219089Spjd zfs_sa_upgrade_txholds(tx, szp); 4252219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 4253303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 4254168404Spjd if (error) { 4255168404Spjd dmu_tx_abort(tx); 4256168404Spjd ZFS_EXIT(zfsvfs); 4257168404Spjd return (error); 4258168404Spjd } 4259168404Spjd 4260303970Savg error = zfs_link_create(dzp, name, szp, tx, 0); 4261168404Spjd 4262185029Spjd if (error == 0) { 4263185029Spjd uint64_t txtype = TX_LINK; 4264185029Spjd zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4265185029Spjd } 4266168404Spjd 4267168404Spjd dmu_tx_commit(tx); 4268168404Spjd 4269185029Spjd if (error == 0) { 4270185029Spjd vnevent_link(svp, ct); 4271185029Spjd } 4272185029Spjd 4273219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4274219089Spjd zil_commit(zilog, 0); 4275219089Spjd 4276168404Spjd ZFS_EXIT(zfsvfs); 4277168404Spjd return (error); 4278168404Spjd} 4279168404Spjd 4280219089Spjd 4281185029Spjd/*ARGSUSED*/ 4282168962Spjdvoid 4283185029Spjdzfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4284168404Spjd{ 4285168962Spjd znode_t *zp = VTOZ(vp); 4286168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4287168962Spjd int error; 4288168404Spjd 4289185029Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4290219089Spjd if (zp->z_sa_hdl == NULL) { 4291185029Spjd /* 4292185029Spjd * The fs has been unmounted, or we did a 4293185029Spjd * suspend/resume and this file no longer exists. 4294185029Spjd */ 4295243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 4296234607Strasz vrecycle(vp); 4297243520Savg return; 4298243520Savg } 4299243520Savg 4300243520Savg if (zp->z_unlinked) { 4301243520Savg /* 4302243520Savg * Fast path to recycle a vnode of a removed file. 4303243520Savg */ 4304185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4305243520Savg vrecycle(vp); 4306168962Spjd return; 4307168404Spjd } 4308168404Spjd 4309168404Spjd if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4310168404Spjd dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4311168404Spjd 4312219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4313219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4314168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 4315168404Spjd if (error) { 4316168404Spjd dmu_tx_abort(tx); 4317168404Spjd } else { 4318219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 4319219089Spjd (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4320168404Spjd zp->z_atime_dirty = 0; 4321168404Spjd dmu_tx_commit(tx); 4322168404Spjd } 4323168404Spjd } 4324185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4325168404Spjd} 4326168404Spjd 4327219089Spjd 4328168404SpjdCTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 4329168404SpjdCTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 4330168404Spjd 4331185029Spjd/*ARGSUSED*/ 4332168404Spjdstatic int 4333185029Spjdzfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4334168404Spjd{ 4335168404Spjd znode_t *zp = VTOZ(vp); 4336168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4337185029Spjd uint32_t gen; 4338219089Spjd uint64_t gen64; 4339168404Spjd uint64_t object = zp->z_id; 4340168404Spjd zfid_short_t *zfid; 4341219089Spjd int size, i, error; 4342168404Spjd 4343168404Spjd ZFS_ENTER(zfsvfs); 4344185029Spjd ZFS_VERIFY_ZP(zp); 4345168404Spjd 4346219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 4347219089Spjd &gen64, sizeof (uint64_t))) != 0) { 4348219089Spjd ZFS_EXIT(zfsvfs); 4349219089Spjd return (error); 4350219089Spjd } 4351219089Spjd 4352219089Spjd gen = (uint32_t)gen64; 4353219089Spjd 4354168404Spjd size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4355249195Smm 4356249195Smm#ifdef illumos 4357249195Smm if (fidp->fid_len < size) { 4358249195Smm fidp->fid_len = size; 4359249195Smm ZFS_EXIT(zfsvfs); 4360249195Smm return (SET_ERROR(ENOSPC)); 4361249195Smm } 4362249195Smm#else 4363168404Spjd fidp->fid_len = size; 4364249195Smm#endif 4365168404Spjd 4366168404Spjd zfid = (zfid_short_t *)fidp; 4367168404Spjd 4368168404Spjd zfid->zf_len = size; 4369168404Spjd 4370168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 4371168404Spjd zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4372168404Spjd 4373168404Spjd /* Must have a non-zero generation number to distinguish from .zfs */ 4374168404Spjd if (gen == 0) 4375168404Spjd gen = 1; 4376168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 4377168404Spjd zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4378168404Spjd 4379168404Spjd if (size == LONG_FID_LEN) { 4380168404Spjd uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4381169023Spjd zfid_long_t *zlfid; 4382168404Spjd 4383168404Spjd zlfid = (zfid_long_t *)fidp; 4384168404Spjd 4385168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4386168404Spjd zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4387168404Spjd 4388168404Spjd /* XXX - this should be the generation number for the objset */ 4389168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4390168404Spjd zlfid->zf_setgen[i] = 0; 4391168404Spjd } 4392168404Spjd 4393168404Spjd ZFS_EXIT(zfsvfs); 4394168404Spjd return (0); 4395168404Spjd} 4396168404Spjd 4397168404Spjdstatic int 4398185029Spjdzfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4399185029Spjd caller_context_t *ct) 4400168404Spjd{ 4401168404Spjd znode_t *zp, *xzp; 4402168404Spjd zfsvfs_t *zfsvfs; 4403168404Spjd int error; 4404168404Spjd 4405168404Spjd switch (cmd) { 4406168404Spjd case _PC_LINK_MAX: 4407168404Spjd *valp = INT_MAX; 4408168404Spjd return (0); 4409168404Spjd 4410168404Spjd case _PC_FILESIZEBITS: 4411168404Spjd *valp = 64; 4412168404Spjd return (0); 4413277300Ssmh#ifdef illumos 4414168404Spjd case _PC_XATTR_EXISTS: 4415168404Spjd zp = VTOZ(vp); 4416168404Spjd zfsvfs = zp->z_zfsvfs; 4417168404Spjd ZFS_ENTER(zfsvfs); 4418185029Spjd ZFS_VERIFY_ZP(zp); 4419168404Spjd *valp = 0; 4420303970Savg error = zfs_dirent_lookup(zp, "", &xzp, 4421303970Savg ZXATTR | ZEXISTS | ZSHARED); 4422168404Spjd if (error == 0) { 4423168404Spjd if (!zfs_dirempty(xzp)) 4424168404Spjd *valp = 1; 4425303970Savg vrele(ZTOV(xzp)); 4426168404Spjd } else if (error == ENOENT) { 4427168404Spjd /* 4428168404Spjd * If there aren't extended attributes, it's the 4429168404Spjd * same as having zero of them. 4430168404Spjd */ 4431168404Spjd error = 0; 4432168404Spjd } 4433168404Spjd ZFS_EXIT(zfsvfs); 4434168404Spjd return (error); 4435168404Spjd 4436219089Spjd case _PC_SATTR_ENABLED: 4437219089Spjd case _PC_SATTR_EXISTS: 4438219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 4439219089Spjd (vp->v_type == VREG || vp->v_type == VDIR); 4440219089Spjd return (0); 4441219089Spjd 4442219089Spjd case _PC_ACCESS_FILTERING: 4443219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 4444219089Spjd vp->v_type == VDIR; 4445219089Spjd return (0); 4446219089Spjd 4447219089Spjd case _PC_ACL_ENABLED: 4448219089Spjd *valp = _ACL_ACE_ENABLED; 4449219089Spjd return (0); 4450277300Ssmh#endif /* illumos */ 4451219089Spjd case _PC_MIN_HOLE_SIZE: 4452219089Spjd *valp = (int)SPA_MINBLOCKSIZE; 4453219089Spjd return (0); 4454277300Ssmh#ifdef illumos 4455219089Spjd case _PC_TIMESTAMP_RESOLUTION: 4456219089Spjd /* nanosecond timestamp resolution */ 4457219089Spjd *valp = 1L; 4458219089Spjd return (0); 4459277300Ssmh#endif 4460168404Spjd case _PC_ACL_EXTENDED: 4461196949Strasz *valp = 0; 4462168404Spjd return (0); 4463168404Spjd 4464196949Strasz case _PC_ACL_NFS4: 4465196949Strasz *valp = 1; 4466196949Strasz return (0); 4467196949Strasz 4468196949Strasz case _PC_ACL_PATH_MAX: 4469196949Strasz *valp = ACL_MAX_ENTRIES; 4470196949Strasz return (0); 4471196949Strasz 4472168404Spjd default: 4473168962Spjd return (EOPNOTSUPP); 4474168404Spjd } 4475168404Spjd} 4476168404Spjd 4477168404Spjd/*ARGSUSED*/ 4478168404Spjdstatic int 4479185029Spjdzfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4480185029Spjd caller_context_t *ct) 4481168404Spjd{ 4482168404Spjd znode_t *zp = VTOZ(vp); 4483168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4484168404Spjd int error; 4485185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4486168404Spjd 4487168404Spjd ZFS_ENTER(zfsvfs); 4488185029Spjd ZFS_VERIFY_ZP(zp); 4489185029Spjd error = zfs_getacl(zp, vsecp, skipaclchk, cr); 4490168404Spjd ZFS_EXIT(zfsvfs); 4491168404Spjd 4492168404Spjd return (error); 4493168404Spjd} 4494168404Spjd 4495168404Spjd/*ARGSUSED*/ 4496228685Spjdint 4497185029Spjdzfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4498185029Spjd caller_context_t *ct) 4499168404Spjd{ 4500168404Spjd znode_t *zp = VTOZ(vp); 4501168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4502168404Spjd int error; 4503185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4504219089Spjd zilog_t *zilog = zfsvfs->z_log; 4505168404Spjd 4506168404Spjd ZFS_ENTER(zfsvfs); 4507185029Spjd ZFS_VERIFY_ZP(zp); 4508219089Spjd 4509185029Spjd error = zfs_setacl(zp, vsecp, skipaclchk, cr); 4510219089Spjd 4511219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4512219089Spjd zil_commit(zilog, 0); 4513219089Spjd 4514168404Spjd ZFS_EXIT(zfsvfs); 4515168404Spjd return (error); 4516168404Spjd} 4517168404Spjd 4518168962Spjdstatic int 4519330991Savgzfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, 4520292373Sglebius int *rahead) 4521213937Savg{ 4522213937Savg znode_t *zp = VTOZ(vp); 4523213937Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4524213937Savg objset_t *os = zp->z_zfsvfs->z_os; 4525330991Savg rl_t *rl; 4526213937Savg vm_object_t object; 4527330991Savg off_t start, end, obj_size; 4528330991Savg uint_t blksz; 4529330991Savg int pgsin_b, pgsin_a; 4530330991Savg int error; 4531213937Savg 4532213937Savg ZFS_ENTER(zfsvfs); 4533213937Savg ZFS_VERIFY_ZP(zp); 4534213937Savg 4535330991Savg start = IDX_TO_OFF(ma[0]->pindex); 4536330991Savg end = IDX_TO_OFF(ma[count - 1]->pindex + 1); 4537330991Savg 4538330991Savg /* 4539330991Savg * Lock a range covering all required and optional pages. 4540330991Savg * Note that we need to handle the case of the block size growing. 4541330991Savg */ 4542330991Savg for (;;) { 4543330991Savg blksz = zp->z_blksz; 4544330991Savg rl = zfs_range_lock(zp, rounddown(start, blksz), 4545330991Savg roundup(end, blksz) - rounddown(start, blksz), RL_READER); 4546330991Savg if (blksz == zp->z_blksz) 4547330991Savg break; 4548330991Savg zfs_range_unlock(rl); 4549213937Savg } 4550213937Savg 4551330991Savg object = ma[0]->object; 4552330991Savg zfs_vmobject_wlock(object); 4553330991Savg obj_size = object->un_pager.vnp.vnp_size; 4554330991Savg zfs_vmobject_wunlock(object); 4555330991Savg if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) { 4556330991Savg zfs_range_unlock(rl); 4557213937Savg ZFS_EXIT(zfsvfs); 4558248084Sattilio return (zfs_vm_pagerret_bad); 4559213937Savg } 4560213937Savg 4561330991Savg pgsin_b = 0; 4562330991Savg if (rbehind != NULL) { 4563330991Savg pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz)); 4564330991Savg pgsin_b = MIN(*rbehind, pgsin_b); 4565330991Savg } 4566292373Sglebius 4567330991Savg pgsin_a = 0; 4568330991Savg if (rahead != NULL) { 4569330991Savg pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end); 4570330991Savg if (end + IDX_TO_OFF(pgsin_a) >= obj_size) 4571330991Savg pgsin_a = OFF_TO_IDX(round_page(obj_size) - end); 4572330991Savg pgsin_a = MIN(*rahead, pgsin_a); 4573243517Savg } 4574243517Savg 4575330991Savg /* 4576330991Savg * NB: we need to pass the exact byte size of the data that we expect 4577330991Savg * to read after accounting for the file size. This is required because 4578330991Savg * ZFS will panic if we request DMU to read beyond the end of the last 4579330991Savg * allocated block. 4580330991Savg */ 4581330991Savg error = dmu_read_pages(os, zp->z_id, ma, count, &pgsin_b, &pgsin_a, 4582330991Savg MIN(end, obj_size) - (end - PAGE_SIZE)); 4583213937Savg 4584330991Savg zfs_range_unlock(rl); 4585213937Savg ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4586213937Savg ZFS_EXIT(zfsvfs); 4587330991Savg 4588330991Savg if (error != 0) 4589292386Sglebius return (zfs_vm_pagerret_error); 4590330991Savg 4591330991Savg PCPU_INC(cnt.v_vnodein); 4592330991Savg PCPU_ADD(cnt.v_vnodepgsin, count + pgsin_b + pgsin_a); 4593330991Savg if (rbehind != NULL) 4594330991Savg *rbehind = pgsin_b; 4595330991Savg if (rahead != NULL) 4596330991Savg *rahead = pgsin_a; 4597330991Savg return (zfs_vm_pagerret_ok); 4598213937Savg} 4599213937Savg 4600213937Savgstatic int 4601213937Savgzfs_freebsd_getpages(ap) 4602213937Savg struct vop_getpages_args /* { 4603213937Savg struct vnode *a_vp; 4604213937Savg vm_page_t *a_m; 4605213937Savg int a_count; 4606292373Sglebius int *a_rbehind; 4607292373Sglebius int *a_rahead; 4608213937Savg } */ *ap; 4609213937Savg{ 4610213937Savg 4611292373Sglebius return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, 4612292373Sglebius ap->a_rahead)); 4613213937Savg} 4614213937Savg 4615213937Savgstatic int 4616258746Savgzfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, 4617258746Savg int *rtvals) 4618258746Savg{ 4619258746Savg znode_t *zp = VTOZ(vp); 4620258746Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4621258746Savg rl_t *rl; 4622258746Savg dmu_tx_t *tx; 4623258746Savg struct sf_buf *sf; 4624258746Savg vm_object_t object; 4625258746Savg vm_page_t m; 4626258746Savg caddr_t va; 4627258746Savg size_t tocopy; 4628258746Savg size_t lo_len; 4629258746Savg vm_ooffset_t lo_off; 4630258746Savg vm_ooffset_t off; 4631258746Savg uint_t blksz; 4632258746Savg int ncount; 4633258746Savg int pcount; 4634258746Savg int err; 4635258746Savg int i; 4636258746Savg 4637258746Savg ZFS_ENTER(zfsvfs); 4638258746Savg ZFS_VERIFY_ZP(zp); 4639258746Savg 4640258746Savg object = vp->v_object; 4641258746Savg pcount = btoc(len); 4642258746Savg ncount = pcount; 4643258746Savg 4644258746Savg KASSERT(ma[0]->object == object, ("mismatching object")); 4645258746Savg KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length")); 4646258746Savg 4647258746Savg for (i = 0; i < pcount; i++) 4648258746Savg rtvals[i] = zfs_vm_pagerret_error; 4649258746Savg 4650258746Savg off = IDX_TO_OFF(ma[0]->pindex); 4651258746Savg blksz = zp->z_blksz; 4652258746Savg lo_off = rounddown(off, blksz); 4653258746Savg lo_len = roundup(len + (off - lo_off), blksz); 4654258746Savg rl = zfs_range_lock(zp, lo_off, lo_len, RL_WRITER); 4655258746Savg 4656258746Savg zfs_vmobject_wlock(object); 4657258746Savg if (len + off > object->un_pager.vnp.vnp_size) { 4658258746Savg if (object->un_pager.vnp.vnp_size > off) { 4659258746Savg int pgoff; 4660258746Savg 4661258746Savg len = object->un_pager.vnp.vnp_size - off; 4662258746Savg ncount = btoc(len); 4663258746Savg if ((pgoff = (int)len & PAGE_MASK) != 0) { 4664258746Savg /* 4665258746Savg * If the object is locked and the following 4666258746Savg * conditions hold, then the page's dirty 4667258746Savg * field cannot be concurrently changed by a 4668258746Savg * pmap operation. 4669258746Savg */ 4670258746Savg m = ma[ncount - 1]; 4671258746Savg vm_page_assert_sbusied(m); 4672258746Savg KASSERT(!pmap_page_is_write_mapped(m), 4673258746Savg ("zfs_putpages: page %p is not read-only", m)); 4674258746Savg vm_page_clear_dirty(m, pgoff, PAGE_SIZE - 4675258746Savg pgoff); 4676258746Savg } 4677258746Savg } else { 4678258746Savg len = 0; 4679258746Savg ncount = 0; 4680258746Savg } 4681258746Savg if (ncount < pcount) { 4682258746Savg for (i = ncount; i < pcount; i++) { 4683258746Savg rtvals[i] = zfs_vm_pagerret_bad; 4684258746Savg } 4685258746Savg } 4686258746Savg } 4687258746Savg zfs_vmobject_wunlock(object); 4688258746Savg 4689258746Savg if (ncount == 0) 4690258746Savg goto out; 4691258746Savg 4692258746Savg if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 4693258746Savg zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 4694258746Savg goto out; 4695258746Savg } 4696258746Savg 4697258746Savg tx = dmu_tx_create(zfsvfs->z_os); 4698258746Savg dmu_tx_hold_write(tx, zp->z_id, off, len); 4699258746Savg 4700258746Savg dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4701258746Savg zfs_sa_upgrade_txholds(tx, zp); 4702316847Savg err = dmu_tx_assign(tx, TXG_WAIT); 4703258746Savg if (err != 0) { 4704258746Savg dmu_tx_abort(tx); 4705258746Savg goto out; 4706258746Savg } 4707258746Savg 4708258746Savg if (zp->z_blksz < PAGE_SIZE) { 4709258746Savg for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) { 4710258746Savg tocopy = len > PAGE_SIZE ? PAGE_SIZE : len; 4711258746Savg va = zfs_map_page(ma[i], &sf); 4712258746Savg dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx); 4713258746Savg zfs_unmap_page(sf); 4714258746Savg } 4715258746Savg } else { 4716258746Savg err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx); 4717258746Savg } 4718258746Savg 4719258746Savg if (err == 0) { 4720258746Savg uint64_t mtime[2], ctime[2]; 4721258746Savg sa_bulk_attr_t bulk[3]; 4722258746Savg int count = 0; 4723258746Savg 4724258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 4725258746Savg &mtime, 16); 4726258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 4727258746Savg &ctime, 16); 4728258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4729258746Savg &zp->z_pflags, 8); 4730258746Savg zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 4731258746Savg B_TRUE); 4732321561Smav err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 4733321561Smav ASSERT0(err); 4734258746Savg zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 4735258746Savg 4736258746Savg zfs_vmobject_wlock(object); 4737258746Savg for (i = 0; i < ncount; i++) { 4738258746Savg rtvals[i] = zfs_vm_pagerret_ok; 4739258746Savg vm_page_undirty(ma[i]); 4740258746Savg } 4741258746Savg zfs_vmobject_wunlock(object); 4742258746Savg PCPU_INC(cnt.v_vnodeout); 4743258746Savg PCPU_ADD(cnt.v_vnodepgsout, ncount); 4744258746Savg } 4745258746Savg dmu_tx_commit(tx); 4746258746Savg 4747258746Savgout: 4748258746Savg zfs_range_unlock(rl); 4749258746Savg if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 || 4750258746Savg zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4751258746Savg zil_commit(zfsvfs->z_log, zp->z_id); 4752258746Savg ZFS_EXIT(zfsvfs); 4753258746Savg return (rtvals[0]); 4754258746Savg} 4755258746Savg 4756258746Savgint 4757258746Savgzfs_freebsd_putpages(ap) 4758258746Savg struct vop_putpages_args /* { 4759258746Savg struct vnode *a_vp; 4760258746Savg vm_page_t *a_m; 4761258746Savg int a_count; 4762258746Savg int a_sync; 4763258746Savg int *a_rtvals; 4764258746Savg } */ *ap; 4765258746Savg{ 4766258746Savg 4767258746Savg return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, 4768258746Savg ap->a_rtvals)); 4769258746Savg} 4770258746Savg 4771258746Savgstatic int 4772243518Savgzfs_freebsd_bmap(ap) 4773243518Savg struct vop_bmap_args /* { 4774243518Savg struct vnode *a_vp; 4775243518Savg daddr_t a_bn; 4776243518Savg struct bufobj **a_bop; 4777243518Savg daddr_t *a_bnp; 4778243518Savg int *a_runp; 4779243518Savg int *a_runb; 4780243518Savg } */ *ap; 4781243518Savg{ 4782243518Savg 4783243518Savg if (ap->a_bop != NULL) 4784243518Savg *ap->a_bop = &ap->a_vp->v_bufobj; 4785243518Savg if (ap->a_bnp != NULL) 4786243518Savg *ap->a_bnp = ap->a_bn; 4787243518Savg if (ap->a_runp != NULL) 4788243518Savg *ap->a_runp = 0; 4789243518Savg if (ap->a_runb != NULL) 4790243518Savg *ap->a_runb = 0; 4791243518Savg 4792243518Savg return (0); 4793243518Savg} 4794243518Savg 4795243518Savgstatic int 4796168962Spjdzfs_freebsd_open(ap) 4797168962Spjd struct vop_open_args /* { 4798168962Spjd struct vnode *a_vp; 4799168962Spjd int a_mode; 4800168962Spjd struct ucred *a_cred; 4801168962Spjd struct thread *a_td; 4802168962Spjd } */ *ap; 4803168962Spjd{ 4804168962Spjd vnode_t *vp = ap->a_vp; 4805168962Spjd znode_t *zp = VTOZ(vp); 4806168962Spjd int error; 4807168962Spjd 4808185029Spjd error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); 4809168962Spjd if (error == 0) 4810219089Spjd vnode_create_vobject(vp, zp->z_size, ap->a_td); 4811168962Spjd return (error); 4812168962Spjd} 4813168962Spjd 4814168962Spjdstatic int 4815168962Spjdzfs_freebsd_close(ap) 4816168962Spjd struct vop_close_args /* { 4817168962Spjd struct vnode *a_vp; 4818168962Spjd int a_fflag; 4819168962Spjd struct ucred *a_cred; 4820168962Spjd struct thread *a_td; 4821168962Spjd } */ *ap; 4822168962Spjd{ 4823168962Spjd 4824242566Savg return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred, NULL)); 4825168962Spjd} 4826168962Spjd 4827168962Spjdstatic int 4828168962Spjdzfs_freebsd_ioctl(ap) 4829168962Spjd struct vop_ioctl_args /* { 4830168962Spjd struct vnode *a_vp; 4831168962Spjd u_long a_command; 4832168962Spjd caddr_t a_data; 4833168962Spjd int a_fflag; 4834168962Spjd struct ucred *cred; 4835168962Spjd struct thread *td; 4836168962Spjd } */ *ap; 4837168962Spjd{ 4838168962Spjd 4839168978Spjd return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 4840185029Spjd ap->a_fflag, ap->a_cred, NULL, NULL)); 4841168962Spjd} 4842168962Spjd 4843168962Spjdstatic int 4844330062Savgioflags(int ioflags) 4845330062Savg{ 4846330062Savg int flags = 0; 4847330062Savg 4848330062Savg if (ioflags & IO_APPEND) 4849330062Savg flags |= FAPPEND; 4850330062Savg if (ioflags & IO_NDELAY) 4851330062Savg flags |= FNONBLOCK; 4852330062Savg if (ioflags & IO_SYNC) 4853330062Savg flags |= (FSYNC | FDSYNC | FRSYNC); 4854330062Savg 4855330062Savg return (flags); 4856330062Savg} 4857330062Savg 4858330062Savgstatic int 4859168962Spjdzfs_freebsd_read(ap) 4860168962Spjd struct vop_read_args /* { 4861168962Spjd struct vnode *a_vp; 4862168962Spjd struct uio *a_uio; 4863168962Spjd int a_ioflag; 4864168962Spjd struct ucred *a_cred; 4865168962Spjd } */ *ap; 4866168962Spjd{ 4867168962Spjd 4868213673Spjd return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 4869213673Spjd ap->a_cred, NULL)); 4870168962Spjd} 4871168962Spjd 4872168962Spjdstatic int 4873168962Spjdzfs_freebsd_write(ap) 4874168962Spjd struct vop_write_args /* { 4875168962Spjd struct vnode *a_vp; 4876168962Spjd struct uio *a_uio; 4877168962Spjd int a_ioflag; 4878168962Spjd struct ucred *a_cred; 4879168962Spjd } */ *ap; 4880168962Spjd{ 4881168962Spjd 4882213673Spjd return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 4883213673Spjd ap->a_cred, NULL)); 4884168962Spjd} 4885168962Spjd 4886168962Spjdstatic int 4887168962Spjdzfs_freebsd_access(ap) 4888168962Spjd struct vop_access_args /* { 4889168962Spjd struct vnode *a_vp; 4890192689Strasz accmode_t a_accmode; 4891168962Spjd struct ucred *a_cred; 4892168962Spjd struct thread *a_td; 4893168962Spjd } */ *ap; 4894168962Spjd{ 4895212002Sjh vnode_t *vp = ap->a_vp; 4896212002Sjh znode_t *zp = VTOZ(vp); 4897198703Spjd accmode_t accmode; 4898198703Spjd int error = 0; 4899168962Spjd 4900185172Spjd /* 4901198703Spjd * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 4902185172Spjd */ 4903198703Spjd accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 4904198703Spjd if (accmode != 0) 4905198703Spjd error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL); 4906185172Spjd 4907198703Spjd /* 4908198703Spjd * VADMIN has to be handled by vaccess(). 4909198703Spjd */ 4910198703Spjd if (error == 0) { 4911198703Spjd accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 4912198703Spjd if (accmode != 0) { 4913219089Spjd error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 4914219089Spjd zp->z_gid, accmode, ap->a_cred, NULL); 4915198703Spjd } 4916185172Spjd } 4917185172Spjd 4918212002Sjh /* 4919212002Sjh * For VEXEC, ensure that at least one execute bit is set for 4920212002Sjh * non-directories. 4921212002Sjh */ 4922212002Sjh if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 4923219089Spjd (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 4924212002Sjh error = EACCES; 4925219089Spjd } 4926212002Sjh 4927198703Spjd return (error); 4928168962Spjd} 4929168962Spjd 4930168962Spjdstatic int 4931168962Spjdzfs_freebsd_lookup(ap) 4932168962Spjd struct vop_lookup_args /* { 4933168962Spjd struct vnode *a_dvp; 4934168962Spjd struct vnode **a_vpp; 4935168962Spjd struct componentname *a_cnp; 4936168962Spjd } */ *ap; 4937168962Spjd{ 4938168962Spjd struct componentname *cnp = ap->a_cnp; 4939168962Spjd char nm[NAME_MAX + 1]; 4940168962Spjd 4941168962Spjd ASSERT(cnp->cn_namelen < sizeof(nm)); 4942168962Spjd strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 4943168962Spjd 4944168962Spjd return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 4945185029Spjd cnp->cn_cred, cnp->cn_thread, 0)); 4946168962Spjd} 4947168962Spjd 4948168962Spjdstatic int 4949303970Savgzfs_cache_lookup(ap) 4950303970Savg struct vop_lookup_args /* { 4951303970Savg struct vnode *a_dvp; 4952303970Savg struct vnode **a_vpp; 4953303970Savg struct componentname *a_cnp; 4954303970Savg } */ *ap; 4955303970Savg{ 4956303970Savg zfsvfs_t *zfsvfs; 4957303970Savg 4958303970Savg zfsvfs = ap->a_dvp->v_mount->mnt_data; 4959303970Savg if (zfsvfs->z_use_namecache) 4960303970Savg return (vfs_cache_lookup(ap)); 4961303970Savg else 4962303970Savg return (zfs_freebsd_lookup(ap)); 4963303970Savg} 4964303970Savg 4965303970Savgstatic int 4966168962Spjdzfs_freebsd_create(ap) 4967168962Spjd struct vop_create_args /* { 4968168962Spjd struct vnode *a_dvp; 4969168962Spjd struct vnode **a_vpp; 4970168962Spjd struct componentname *a_cnp; 4971168962Spjd struct vattr *a_vap; 4972168962Spjd } */ *ap; 4973168962Spjd{ 4974303970Savg zfsvfs_t *zfsvfs; 4975168962Spjd struct componentname *cnp = ap->a_cnp; 4976168962Spjd vattr_t *vap = ap->a_vap; 4977276007Skib int error, mode; 4978168962Spjd 4979168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 4980168962Spjd 4981168962Spjd vattr_init_mask(vap); 4982168962Spjd mode = vap->va_mode & ALLPERMS; 4983303970Savg zfsvfs = ap->a_dvp->v_mount->mnt_data; 4984168962Spjd 4985276007Skib error = zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 4986276007Skib ap->a_vpp, cnp->cn_cred, cnp->cn_thread); 4987303970Savg if (zfsvfs->z_use_namecache && 4988303970Savg error == 0 && (cnp->cn_flags & MAKEENTRY) != 0) 4989276007Skib cache_enter(ap->a_dvp, *ap->a_vpp, cnp); 4990276007Skib return (error); 4991168962Spjd} 4992168962Spjd 4993168962Spjdstatic int 4994168962Spjdzfs_freebsd_remove(ap) 4995168962Spjd struct vop_remove_args /* { 4996168962Spjd struct vnode *a_dvp; 4997168962Spjd struct vnode *a_vp; 4998168962Spjd struct componentname *a_cnp; 4999168962Spjd } */ *ap; 5000168962Spjd{ 5001168962Spjd 5002168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 5003168962Spjd 5004303970Savg return (zfs_remove(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr, 5005303970Savg ap->a_cnp->cn_cred)); 5006168962Spjd} 5007168962Spjd 5008168962Spjdstatic int 5009168962Spjdzfs_freebsd_mkdir(ap) 5010168962Spjd struct vop_mkdir_args /* { 5011168962Spjd struct vnode *a_dvp; 5012168962Spjd struct vnode **a_vpp; 5013168962Spjd struct componentname *a_cnp; 5014168962Spjd struct vattr *a_vap; 5015168962Spjd } */ *ap; 5016168962Spjd{ 5017168962Spjd vattr_t *vap = ap->a_vap; 5018168962Spjd 5019168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 5020168962Spjd 5021168962Spjd vattr_init_mask(vap); 5022168962Spjd 5023168962Spjd return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 5024303970Savg ap->a_cnp->cn_cred)); 5025168962Spjd} 5026168962Spjd 5027168962Spjdstatic int 5028168962Spjdzfs_freebsd_rmdir(ap) 5029168962Spjd struct vop_rmdir_args /* { 5030168962Spjd struct vnode *a_dvp; 5031168962Spjd struct vnode *a_vp; 5032168962Spjd struct componentname *a_cnp; 5033168962Spjd } */ *ap; 5034168962Spjd{ 5035168962Spjd struct componentname *cnp = ap->a_cnp; 5036168962Spjd 5037168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5038168962Spjd 5039303970Savg return (zfs_rmdir(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred)); 5040168962Spjd} 5041168962Spjd 5042168962Spjdstatic int 5043168962Spjdzfs_freebsd_readdir(ap) 5044168962Spjd struct vop_readdir_args /* { 5045168962Spjd struct vnode *a_vp; 5046168962Spjd struct uio *a_uio; 5047168962Spjd struct ucred *a_cred; 5048168962Spjd int *a_eofflag; 5049168962Spjd int *a_ncookies; 5050168962Spjd u_long **a_cookies; 5051168962Spjd } */ *ap; 5052168962Spjd{ 5053168962Spjd 5054168962Spjd return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 5055168962Spjd ap->a_ncookies, ap->a_cookies)); 5056168962Spjd} 5057168962Spjd 5058168962Spjdstatic int 5059168962Spjdzfs_freebsd_fsync(ap) 5060168962Spjd struct vop_fsync_args /* { 5061168962Spjd struct vnode *a_vp; 5062168962Spjd int a_waitfor; 5063168962Spjd struct thread *a_td; 5064168962Spjd } */ *ap; 5065168962Spjd{ 5066168962Spjd 5067168962Spjd vop_stdfsync(ap); 5068185029Spjd return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); 5069168962Spjd} 5070168962Spjd 5071168962Spjdstatic int 5072168962Spjdzfs_freebsd_getattr(ap) 5073168962Spjd struct vop_getattr_args /* { 5074168962Spjd struct vnode *a_vp; 5075168962Spjd struct vattr *a_vap; 5076168962Spjd struct ucred *a_cred; 5077168962Spjd } */ *ap; 5078168962Spjd{ 5079185029Spjd vattr_t *vap = ap->a_vap; 5080185029Spjd xvattr_t xvap; 5081185029Spjd u_long fflags = 0; 5082185029Spjd int error; 5083168962Spjd 5084185029Spjd xva_init(&xvap); 5085185029Spjd xvap.xva_vattr = *vap; 5086185029Spjd xvap.xva_vattr.va_mask |= AT_XVATTR; 5087185029Spjd 5088185029Spjd /* Convert chflags into ZFS-type flags. */ 5089185029Spjd /* XXX: what about SF_SETTABLE?. */ 5090185029Spjd XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 5091185029Spjd XVA_SET_REQ(&xvap, XAT_APPENDONLY); 5092185029Spjd XVA_SET_REQ(&xvap, XAT_NOUNLINK); 5093185029Spjd XVA_SET_REQ(&xvap, XAT_NODUMP); 5094254627Sken XVA_SET_REQ(&xvap, XAT_READONLY); 5095254627Sken XVA_SET_REQ(&xvap, XAT_ARCHIVE); 5096254627Sken XVA_SET_REQ(&xvap, XAT_SYSTEM); 5097254627Sken XVA_SET_REQ(&xvap, XAT_HIDDEN); 5098254627Sken XVA_SET_REQ(&xvap, XAT_REPARSE); 5099254627Sken XVA_SET_REQ(&xvap, XAT_OFFLINE); 5100254627Sken XVA_SET_REQ(&xvap, XAT_SPARSE); 5101254627Sken 5102185029Spjd error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 5103185029Spjd if (error != 0) 5104185029Spjd return (error); 5105185029Spjd 5106185029Spjd /* Convert ZFS xattr into chflags. */ 5107185029Spjd#define FLAG_CHECK(fflag, xflag, xfield) do { \ 5108185029Spjd if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 5109185029Spjd fflags |= (fflag); \ 5110185029Spjd} while (0) 5111185029Spjd FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 5112185029Spjd xvap.xva_xoptattrs.xoa_immutable); 5113185029Spjd FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 5114185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 5115185029Spjd FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 5116185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 5117254627Sken FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE, 5118254627Sken xvap.xva_xoptattrs.xoa_archive); 5119185029Spjd FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 5120185029Spjd xvap.xva_xoptattrs.xoa_nodump); 5121254627Sken FLAG_CHECK(UF_READONLY, XAT_READONLY, 5122254627Sken xvap.xva_xoptattrs.xoa_readonly); 5123254627Sken FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM, 5124254627Sken xvap.xva_xoptattrs.xoa_system); 5125254627Sken FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN, 5126254627Sken xvap.xva_xoptattrs.xoa_hidden); 5127254627Sken FLAG_CHECK(UF_REPARSE, XAT_REPARSE, 5128254627Sken xvap.xva_xoptattrs.xoa_reparse); 5129254627Sken FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE, 5130254627Sken xvap.xva_xoptattrs.xoa_offline); 5131254627Sken FLAG_CHECK(UF_SPARSE, XAT_SPARSE, 5132254627Sken xvap.xva_xoptattrs.xoa_sparse); 5133254627Sken 5134185029Spjd#undef FLAG_CHECK 5135185029Spjd *vap = xvap.xva_vattr; 5136185029Spjd vap->va_flags = fflags; 5137185029Spjd return (0); 5138168962Spjd} 5139168962Spjd 5140168962Spjdstatic int 5141168962Spjdzfs_freebsd_setattr(ap) 5142168962Spjd struct vop_setattr_args /* { 5143168962Spjd struct vnode *a_vp; 5144168962Spjd struct vattr *a_vap; 5145168962Spjd struct ucred *a_cred; 5146168962Spjd } */ *ap; 5147168962Spjd{ 5148185172Spjd vnode_t *vp = ap->a_vp; 5149168962Spjd vattr_t *vap = ap->a_vap; 5150185172Spjd cred_t *cred = ap->a_cred; 5151185029Spjd xvattr_t xvap; 5152185029Spjd u_long fflags; 5153185029Spjd uint64_t zflags; 5154168962Spjd 5155168962Spjd vattr_init_mask(vap); 5156170044Spjd vap->va_mask &= ~AT_NOSET; 5157168962Spjd 5158185029Spjd xva_init(&xvap); 5159185029Spjd xvap.xva_vattr = *vap; 5160185029Spjd 5161219089Spjd zflags = VTOZ(vp)->z_pflags; 5162185172Spjd 5163185029Spjd if (vap->va_flags != VNOVAL) { 5164197683Sdelphij zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 5165185172Spjd int error; 5166185172Spjd 5167197683Sdelphij if (zfsvfs->z_use_fuids == B_FALSE) 5168197683Sdelphij return (EOPNOTSUPP); 5169197683Sdelphij 5170185029Spjd fflags = vap->va_flags; 5171254627Sken /* 5172254627Sken * XXX KDM 5173254627Sken * We need to figure out whether it makes sense to allow 5174254627Sken * UF_REPARSE through, since we don't really have other 5175254627Sken * facilities to handle reparse points and zfs_setattr() 5176254627Sken * doesn't currently allow setting that attribute anyway. 5177254627Sken */ 5178254627Sken if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE| 5179254627Sken UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE| 5180254627Sken UF_OFFLINE|UF_SPARSE)) != 0) 5181185029Spjd return (EOPNOTSUPP); 5182185172Spjd /* 5183185172Spjd * Unprivileged processes are not permitted to unset system 5184185172Spjd * flags, or modify flags if any system flags are set. 5185185172Spjd * Privileged non-jail processes may not modify system flags 5186185172Spjd * if securelevel > 0 and any existing system flags are set. 5187185172Spjd * Privileged jail processes behave like privileged non-jail 5188185172Spjd * processes if the security.jail.chflags_allowed sysctl is 5189185172Spjd * is non-zero; otherwise, they behave like unprivileged 5190185172Spjd * processes. 5191185172Spjd */ 5192197861Spjd if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 5193197861Spjd priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) { 5194185172Spjd if (zflags & 5195185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 5196185172Spjd error = securelevel_gt(cred, 0); 5197197861Spjd if (error != 0) 5198185172Spjd return (error); 5199185172Spjd } 5200185172Spjd } else { 5201197861Spjd /* 5202197861Spjd * Callers may only modify the file flags on objects they 5203197861Spjd * have VADMIN rights for. 5204197861Spjd */ 5205197861Spjd if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0) 5206197861Spjd return (error); 5207185172Spjd if (zflags & 5208185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 5209185172Spjd return (EPERM); 5210185172Spjd } 5211185172Spjd if (fflags & 5212185172Spjd (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 5213185172Spjd return (EPERM); 5214185172Spjd } 5215185172Spjd } 5216185029Spjd 5217185029Spjd#define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 5218185029Spjd if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 5219185029Spjd ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 5220185029Spjd XVA_SET_REQ(&xvap, (xflag)); \ 5221185029Spjd (xfield) = ((fflags & (fflag)) != 0); \ 5222185029Spjd } \ 5223185029Spjd} while (0) 5224185029Spjd /* Convert chflags into ZFS-type flags. */ 5225185029Spjd /* XXX: what about SF_SETTABLE?. */ 5226185029Spjd FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 5227185029Spjd xvap.xva_xoptattrs.xoa_immutable); 5228185029Spjd FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 5229185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 5230185029Spjd FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 5231185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 5232254627Sken FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE, 5233254627Sken xvap.xva_xoptattrs.xoa_archive); 5234185029Spjd FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 5235185172Spjd xvap.xva_xoptattrs.xoa_nodump); 5236254627Sken FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY, 5237254627Sken xvap.xva_xoptattrs.xoa_readonly); 5238254627Sken FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM, 5239254627Sken xvap.xva_xoptattrs.xoa_system); 5240254627Sken FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN, 5241254627Sken xvap.xva_xoptattrs.xoa_hidden); 5242254627Sken FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE, 5243254627Sken xvap.xva_xoptattrs.xoa_hidden); 5244254627Sken FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE, 5245254627Sken xvap.xva_xoptattrs.xoa_offline); 5246254627Sken FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE, 5247254627Sken xvap.xva_xoptattrs.xoa_sparse); 5248185029Spjd#undef FLAG_CHANGE 5249185029Spjd } 5250316391Sasomers if (vap->va_birthtime.tv_sec != VNOVAL) { 5251316391Sasomers xvap.xva_vattr.va_mask |= AT_XVATTR; 5252316391Sasomers XVA_SET_REQ(&xvap, XAT_CREATETIME); 5253316391Sasomers } 5254185172Spjd return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL)); 5255168962Spjd} 5256168962Spjd 5257168962Spjdstatic int 5258168962Spjdzfs_freebsd_rename(ap) 5259168962Spjd struct vop_rename_args /* { 5260168962Spjd struct vnode *a_fdvp; 5261168962Spjd struct vnode *a_fvp; 5262168962Spjd struct componentname *a_fcnp; 5263168962Spjd struct vnode *a_tdvp; 5264168962Spjd struct vnode *a_tvp; 5265168962Spjd struct componentname *a_tcnp; 5266168962Spjd } */ *ap; 5267168962Spjd{ 5268168962Spjd vnode_t *fdvp = ap->a_fdvp; 5269168962Spjd vnode_t *fvp = ap->a_fvp; 5270168962Spjd vnode_t *tdvp = ap->a_tdvp; 5271168962Spjd vnode_t *tvp = ap->a_tvp; 5272168962Spjd int error; 5273168962Spjd 5274192237Skmacy ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 5275192237Skmacy ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 5276168962Spjd 5277303970Savg error = zfs_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp, 5278303970Savg ap->a_tcnp, ap->a_fcnp->cn_cred); 5279168962Spjd 5280303970Savg vrele(fdvp); 5281303970Savg vrele(fvp); 5282303970Savg vrele(tdvp); 5283303970Savg if (tvp != NULL) 5284303970Savg vrele(tvp); 5285303970Savg 5286168962Spjd return (error); 5287168962Spjd} 5288168962Spjd 5289168962Spjdstatic int 5290168962Spjdzfs_freebsd_symlink(ap) 5291168962Spjd struct vop_symlink_args /* { 5292168962Spjd struct vnode *a_dvp; 5293168962Spjd struct vnode **a_vpp; 5294168962Spjd struct componentname *a_cnp; 5295168962Spjd struct vattr *a_vap; 5296168962Spjd char *a_target; 5297168962Spjd } */ *ap; 5298168962Spjd{ 5299168962Spjd struct componentname *cnp = ap->a_cnp; 5300168962Spjd vattr_t *vap = ap->a_vap; 5301168962Spjd 5302168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5303168962Spjd 5304168962Spjd vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 5305168962Spjd vattr_init_mask(vap); 5306168962Spjd 5307168962Spjd return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 5308168962Spjd ap->a_target, cnp->cn_cred, cnp->cn_thread)); 5309168962Spjd} 5310168962Spjd 5311168962Spjdstatic int 5312168962Spjdzfs_freebsd_readlink(ap) 5313168962Spjd struct vop_readlink_args /* { 5314168962Spjd struct vnode *a_vp; 5315168962Spjd struct uio *a_uio; 5316168962Spjd struct ucred *a_cred; 5317168962Spjd } */ *ap; 5318168962Spjd{ 5319168962Spjd 5320185029Spjd return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 5321168962Spjd} 5322168962Spjd 5323168962Spjdstatic int 5324168962Spjdzfs_freebsd_link(ap) 5325168962Spjd struct vop_link_args /* { 5326168962Spjd struct vnode *a_tdvp; 5327168962Spjd struct vnode *a_vp; 5328168962Spjd struct componentname *a_cnp; 5329168962Spjd } */ *ap; 5330168962Spjd{ 5331168962Spjd struct componentname *cnp = ap->a_cnp; 5332254982Sdelphij vnode_t *vp = ap->a_vp; 5333254982Sdelphij vnode_t *tdvp = ap->a_tdvp; 5334168962Spjd 5335254982Sdelphij if (tdvp->v_mount != vp->v_mount) 5336254982Sdelphij return (EXDEV); 5337254982Sdelphij 5338168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5339168962Spjd 5340254982Sdelphij return (zfs_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); 5341168962Spjd} 5342168962Spjd 5343168962Spjdstatic int 5344168962Spjdzfs_freebsd_inactive(ap) 5345169170Spjd struct vop_inactive_args /* { 5346169170Spjd struct vnode *a_vp; 5347169170Spjd struct thread *a_td; 5348169170Spjd } */ *ap; 5349168962Spjd{ 5350168962Spjd vnode_t *vp = ap->a_vp; 5351168962Spjd 5352185029Spjd zfs_inactive(vp, ap->a_td->td_ucred, NULL); 5353168962Spjd return (0); 5354168962Spjd} 5355168962Spjd 5356168962Spjdstatic int 5357168962Spjdzfs_freebsd_reclaim(ap) 5358168962Spjd struct vop_reclaim_args /* { 5359168962Spjd struct vnode *a_vp; 5360168962Spjd struct thread *a_td; 5361168962Spjd } */ *ap; 5362168962Spjd{ 5363169170Spjd vnode_t *vp = ap->a_vp; 5364168962Spjd znode_t *zp = VTOZ(vp); 5365197133Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5366168962Spjd 5367169025Spjd ASSERT(zp != NULL); 5368169025Spjd 5369243520Savg /* Destroy the vm object and flush associated pages. */ 5370243520Savg vnode_destroy_vobject(vp); 5371243520Savg 5372168962Spjd /* 5373243520Savg * z_teardown_inactive_lock protects from a race with 5374243520Savg * zfs_znode_dmu_fini in zfsvfs_teardown during 5375243520Savg * force unmount. 5376168962Spjd */ 5377243520Savg rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 5378243520Savg if (zp->z_sa_hdl == NULL) 5379196301Spjd zfs_znode_free(zp); 5380243520Savg else 5381243520Savg zfs_zinactive(zp); 5382243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 5383185029Spjd 5384168962Spjd vp->v_data = NULL; 5385168962Spjd return (0); 5386168962Spjd} 5387168962Spjd 5388168962Spjdstatic int 5389168962Spjdzfs_freebsd_fid(ap) 5390168962Spjd struct vop_fid_args /* { 5391168962Spjd struct vnode *a_vp; 5392168962Spjd struct fid *a_fid; 5393168962Spjd } */ *ap; 5394168962Spjd{ 5395168962Spjd 5396185029Spjd return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 5397168962Spjd} 5398168962Spjd 5399168962Spjdstatic int 5400168962Spjdzfs_freebsd_pathconf(ap) 5401168962Spjd struct vop_pathconf_args /* { 5402168962Spjd struct vnode *a_vp; 5403168962Spjd int a_name; 5404168962Spjd register_t *a_retval; 5405168962Spjd } */ *ap; 5406168962Spjd{ 5407168962Spjd ulong_t val; 5408168962Spjd int error; 5409168962Spjd 5410185029Spjd error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); 5411328298Sjhb if (error == 0) { 5412168962Spjd *ap->a_retval = val; 5413328298Sjhb return (error); 5414328298Sjhb } 5415328298Sjhb if (error != EOPNOTSUPP) 5416328298Sjhb return (error); 5417168962Spjd 5418196949Strasz switch (ap->a_name) { 5419328298Sjhb case _PC_NAME_MAX: 5420328298Sjhb *ap->a_retval = NAME_MAX; 5421328298Sjhb return (0); 5422328298Sjhb case _PC_PIPE_BUF: 5423328298Sjhb if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) { 5424328298Sjhb *ap->a_retval = PIPE_BUF; 5425328298Sjhb return (0); 5426328298Sjhb } 5427328298Sjhb return (EINVAL); 5428196949Strasz default: 5429328298Sjhb return (vop_stdpathconf(ap)); 5430196949Strasz } 5431196949Strasz} 5432196949Strasz 5433185029Spjd/* 5434185029Spjd * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 5435185029Spjd * extended attribute name: 5436185029Spjd * 5437185029Spjd * NAMESPACE PREFIX 5438185029Spjd * system freebsd:system: 5439185029Spjd * user (none, can be used to access ZFS fsattr(5) attributes 5440185029Spjd * created on Solaris) 5441185029Spjd */ 5442185029Spjdstatic int 5443185029Spjdzfs_create_attrname(int attrnamespace, const char *name, char *attrname, 5444185029Spjd size_t size) 5445185029Spjd{ 5446185029Spjd const char *namespace, *prefix, *suffix; 5447185029Spjd 5448185029Spjd /* We don't allow '/' character in attribute name. */ 5449185029Spjd if (strchr(name, '/') != NULL) 5450185029Spjd return (EINVAL); 5451185029Spjd /* We don't allow attribute names that start with "freebsd:" string. */ 5452185029Spjd if (strncmp(name, "freebsd:", 8) == 0) 5453185029Spjd return (EINVAL); 5454185029Spjd 5455185029Spjd bzero(attrname, size); 5456185029Spjd 5457185029Spjd switch (attrnamespace) { 5458185029Spjd case EXTATTR_NAMESPACE_USER: 5459185029Spjd#if 0 5460185029Spjd prefix = "freebsd:"; 5461185029Spjd namespace = EXTATTR_NAMESPACE_USER_STRING; 5462185029Spjd suffix = ":"; 5463185029Spjd#else 5464185029Spjd /* 5465185029Spjd * This is the default namespace by which we can access all 5466185029Spjd * attributes created on Solaris. 5467185029Spjd */ 5468185029Spjd prefix = namespace = suffix = ""; 5469185029Spjd#endif 5470185029Spjd break; 5471185029Spjd case EXTATTR_NAMESPACE_SYSTEM: 5472185029Spjd prefix = "freebsd:"; 5473185029Spjd namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 5474185029Spjd suffix = ":"; 5475185029Spjd break; 5476185029Spjd case EXTATTR_NAMESPACE_EMPTY: 5477185029Spjd default: 5478185029Spjd return (EINVAL); 5479185029Spjd } 5480185029Spjd if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 5481185029Spjd name) >= size) { 5482185029Spjd return (ENAMETOOLONG); 5483185029Spjd } 5484185029Spjd return (0); 5485185029Spjd} 5486185029Spjd 5487185029Spjd/* 5488185029Spjd * Vnode operating to retrieve a named extended attribute. 5489185029Spjd */ 5490185029Spjdstatic int 5491185029Spjdzfs_getextattr(struct vop_getextattr_args *ap) 5492185029Spjd/* 5493185029Spjdvop_getextattr { 5494185029Spjd IN struct vnode *a_vp; 5495185029Spjd IN int a_attrnamespace; 5496185029Spjd IN const char *a_name; 5497185029Spjd INOUT struct uio *a_uio; 5498185029Spjd OUT size_t *a_size; 5499185029Spjd IN struct ucred *a_cred; 5500185029Spjd IN struct thread *a_td; 5501185029Spjd}; 5502185029Spjd*/ 5503185029Spjd{ 5504185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5505185029Spjd struct thread *td = ap->a_td; 5506185029Spjd struct nameidata nd; 5507185029Spjd char attrname[255]; 5508185029Spjd struct vattr va; 5509185029Spjd vnode_t *xvp = NULL, *vp; 5510185029Spjd int error, flags; 5511185029Spjd 5512195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5513195785Strasz ap->a_cred, ap->a_td, VREAD); 5514195785Strasz if (error != 0) 5515195785Strasz return (error); 5516195785Strasz 5517185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5518185029Spjd sizeof(attrname)); 5519185029Spjd if (error != 0) 5520185029Spjd return (error); 5521185029Spjd 5522185029Spjd ZFS_ENTER(zfsvfs); 5523185029Spjd 5524185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5525185029Spjd LOOKUP_XATTR); 5526185029Spjd if (error != 0) { 5527185029Spjd ZFS_EXIT(zfsvfs); 5528185029Spjd return (error); 5529185029Spjd } 5530185029Spjd 5531185029Spjd flags = FREAD; 5532241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 5533185029Spjd xvp, td); 5534194586Skib error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL); 5535185029Spjd vp = nd.ni_vp; 5536185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 5537185029Spjd if (error != 0) { 5538196303Spjd ZFS_EXIT(zfsvfs); 5539195785Strasz if (error == ENOENT) 5540195785Strasz error = ENOATTR; 5541185029Spjd return (error); 5542185029Spjd } 5543185029Spjd 5544185029Spjd if (ap->a_size != NULL) { 5545185029Spjd error = VOP_GETATTR(vp, &va, ap->a_cred); 5546185029Spjd if (error == 0) 5547185029Spjd *ap->a_size = (size_t)va.va_size; 5548185029Spjd } else if (ap->a_uio != NULL) 5549224605Smm error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5550185029Spjd 5551185029Spjd VOP_UNLOCK(vp, 0); 5552185029Spjd vn_close(vp, flags, ap->a_cred, td); 5553185029Spjd ZFS_EXIT(zfsvfs); 5554185029Spjd 5555185029Spjd return (error); 5556185029Spjd} 5557185029Spjd 5558185029Spjd/* 5559185029Spjd * Vnode operation to remove a named attribute. 5560185029Spjd */ 5561185029Spjdint 5562185029Spjdzfs_deleteextattr(struct vop_deleteextattr_args *ap) 5563185029Spjd/* 5564185029Spjdvop_deleteextattr { 5565185029Spjd IN struct vnode *a_vp; 5566185029Spjd IN int a_attrnamespace; 5567185029Spjd IN const char *a_name; 5568185029Spjd IN struct ucred *a_cred; 5569185029Spjd IN struct thread *a_td; 5570185029Spjd}; 5571185029Spjd*/ 5572185029Spjd{ 5573185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5574185029Spjd struct thread *td = ap->a_td; 5575185029Spjd struct nameidata nd; 5576185029Spjd char attrname[255]; 5577185029Spjd struct vattr va; 5578185029Spjd vnode_t *xvp = NULL, *vp; 5579185029Spjd int error, flags; 5580185029Spjd 5581195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5582195785Strasz ap->a_cred, ap->a_td, VWRITE); 5583195785Strasz if (error != 0) 5584195785Strasz return (error); 5585195785Strasz 5586185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5587185029Spjd sizeof(attrname)); 5588185029Spjd if (error != 0) 5589185029Spjd return (error); 5590185029Spjd 5591185029Spjd ZFS_ENTER(zfsvfs); 5592185029Spjd 5593185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5594185029Spjd LOOKUP_XATTR); 5595185029Spjd if (error != 0) { 5596185029Spjd ZFS_EXIT(zfsvfs); 5597185029Spjd return (error); 5598185029Spjd } 5599185029Spjd 5600241896Skib NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 5601185029Spjd UIO_SYSSPACE, attrname, xvp, td); 5602185029Spjd error = namei(&nd); 5603185029Spjd vp = nd.ni_vp; 5604185029Spjd if (error != 0) { 5605196303Spjd ZFS_EXIT(zfsvfs); 5606260706Savg NDFREE(&nd, NDF_ONLY_PNBUF); 5607195785Strasz if (error == ENOENT) 5608195785Strasz error = ENOATTR; 5609185029Spjd return (error); 5610185029Spjd } 5611260706Savg 5612185029Spjd error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 5613260706Savg NDFREE(&nd, NDF_ONLY_PNBUF); 5614185029Spjd 5615185029Spjd vput(nd.ni_dvp); 5616185029Spjd if (vp == nd.ni_dvp) 5617185029Spjd vrele(vp); 5618185029Spjd else 5619185029Spjd vput(vp); 5620185029Spjd ZFS_EXIT(zfsvfs); 5621185029Spjd 5622185029Spjd return (error); 5623185029Spjd} 5624185029Spjd 5625185029Spjd/* 5626185029Spjd * Vnode operation to set a named attribute. 5627185029Spjd */ 5628185029Spjdstatic int 5629185029Spjdzfs_setextattr(struct vop_setextattr_args *ap) 5630185029Spjd/* 5631185029Spjdvop_setextattr { 5632185029Spjd IN struct vnode *a_vp; 5633185029Spjd IN int a_attrnamespace; 5634185029Spjd IN const char *a_name; 5635185029Spjd INOUT struct uio *a_uio; 5636185029Spjd IN struct ucred *a_cred; 5637185029Spjd IN struct thread *a_td; 5638185029Spjd}; 5639185029Spjd*/ 5640185029Spjd{ 5641185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5642185029Spjd struct thread *td = ap->a_td; 5643185029Spjd struct nameidata nd; 5644185029Spjd char attrname[255]; 5645185029Spjd struct vattr va; 5646185029Spjd vnode_t *xvp = NULL, *vp; 5647185029Spjd int error, flags; 5648185029Spjd 5649195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5650195785Strasz ap->a_cred, ap->a_td, VWRITE); 5651195785Strasz if (error != 0) 5652195785Strasz return (error); 5653195785Strasz 5654185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5655185029Spjd sizeof(attrname)); 5656185029Spjd if (error != 0) 5657185029Spjd return (error); 5658185029Spjd 5659185029Spjd ZFS_ENTER(zfsvfs); 5660185029Spjd 5661185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5662195785Strasz LOOKUP_XATTR | CREATE_XATTR_DIR); 5663185029Spjd if (error != 0) { 5664185029Spjd ZFS_EXIT(zfsvfs); 5665185029Spjd return (error); 5666185029Spjd } 5667185029Spjd 5668185029Spjd flags = FFLAGS(O_WRONLY | O_CREAT); 5669241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 5670185029Spjd xvp, td); 5671194586Skib error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL); 5672185029Spjd vp = nd.ni_vp; 5673185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 5674185029Spjd if (error != 0) { 5675185029Spjd ZFS_EXIT(zfsvfs); 5676185029Spjd return (error); 5677185029Spjd } 5678185029Spjd 5679185029Spjd VATTR_NULL(&va); 5680185029Spjd va.va_size = 0; 5681185029Spjd error = VOP_SETATTR(vp, &va, ap->a_cred); 5682185029Spjd if (error == 0) 5683268420Smav VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5684185029Spjd 5685185029Spjd VOP_UNLOCK(vp, 0); 5686185029Spjd vn_close(vp, flags, ap->a_cred, td); 5687185029Spjd ZFS_EXIT(zfsvfs); 5688185029Spjd 5689185029Spjd return (error); 5690185029Spjd} 5691185029Spjd 5692185029Spjd/* 5693185029Spjd * Vnode operation to retrieve extended attributes on a vnode. 5694185029Spjd */ 5695185029Spjdstatic int 5696185029Spjdzfs_listextattr(struct vop_listextattr_args *ap) 5697185029Spjd/* 5698185029Spjdvop_listextattr { 5699185029Spjd IN struct vnode *a_vp; 5700185029Spjd IN int a_attrnamespace; 5701185029Spjd INOUT struct uio *a_uio; 5702185029Spjd OUT size_t *a_size; 5703185029Spjd IN struct ucred *a_cred; 5704185029Spjd IN struct thread *a_td; 5705185029Spjd}; 5706185029Spjd*/ 5707185029Spjd{ 5708185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5709185029Spjd struct thread *td = ap->a_td; 5710185029Spjd struct nameidata nd; 5711185029Spjd char attrprefix[16]; 5712185029Spjd u_char dirbuf[sizeof(struct dirent)]; 5713185029Spjd struct dirent *dp; 5714185029Spjd struct iovec aiov; 5715185029Spjd struct uio auio, *uio = ap->a_uio; 5716185029Spjd size_t *sizep = ap->a_size; 5717185029Spjd size_t plen; 5718185029Spjd vnode_t *xvp = NULL, *vp; 5719185029Spjd int done, error, eof, pos; 5720185029Spjd 5721195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5722195785Strasz ap->a_cred, ap->a_td, VREAD); 5723196303Spjd if (error != 0) 5724195785Strasz return (error); 5725195785Strasz 5726185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 5727185029Spjd sizeof(attrprefix)); 5728185029Spjd if (error != 0) 5729185029Spjd return (error); 5730185029Spjd plen = strlen(attrprefix); 5731185029Spjd 5732185029Spjd ZFS_ENTER(zfsvfs); 5733185029Spjd 5734195822Strasz if (sizep != NULL) 5735195822Strasz *sizep = 0; 5736195822Strasz 5737185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5738185029Spjd LOOKUP_XATTR); 5739185029Spjd if (error != 0) { 5740196303Spjd ZFS_EXIT(zfsvfs); 5741195785Strasz /* 5742195785Strasz * ENOATTR means that the EA directory does not yet exist, 5743195785Strasz * i.e. there are no extended attributes there. 5744195785Strasz */ 5745195785Strasz if (error == ENOATTR) 5746195785Strasz error = 0; 5747185029Spjd return (error); 5748185029Spjd } 5749185029Spjd 5750241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 5751188588Sjhb UIO_SYSSPACE, ".", xvp, td); 5752185029Spjd error = namei(&nd); 5753185029Spjd vp = nd.ni_vp; 5754185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 5755185029Spjd if (error != 0) { 5756185029Spjd ZFS_EXIT(zfsvfs); 5757185029Spjd return (error); 5758185029Spjd } 5759185029Spjd 5760185029Spjd auio.uio_iov = &aiov; 5761185029Spjd auio.uio_iovcnt = 1; 5762185029Spjd auio.uio_segflg = UIO_SYSSPACE; 5763185029Spjd auio.uio_td = td; 5764185029Spjd auio.uio_rw = UIO_READ; 5765185029Spjd auio.uio_offset = 0; 5766185029Spjd 5767185029Spjd do { 5768185029Spjd u_char nlen; 5769185029Spjd 5770185029Spjd aiov.iov_base = (void *)dirbuf; 5771185029Spjd aiov.iov_len = sizeof(dirbuf); 5772185029Spjd auio.uio_resid = sizeof(dirbuf); 5773185029Spjd error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 5774185029Spjd done = sizeof(dirbuf) - auio.uio_resid; 5775185029Spjd if (error != 0) 5776185029Spjd break; 5777185029Spjd for (pos = 0; pos < done;) { 5778185029Spjd dp = (struct dirent *)(dirbuf + pos); 5779185029Spjd pos += dp->d_reclen; 5780185029Spjd /* 5781185029Spjd * XXX: Temporarily we also accept DT_UNKNOWN, as this 5782185029Spjd * is what we get when attribute was created on Solaris. 5783185029Spjd */ 5784185029Spjd if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 5785185029Spjd continue; 5786185029Spjd if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) 5787185029Spjd continue; 5788185029Spjd else if (strncmp(dp->d_name, attrprefix, plen) != 0) 5789185029Spjd continue; 5790185029Spjd nlen = dp->d_namlen - plen; 5791185029Spjd if (sizep != NULL) 5792185029Spjd *sizep += 1 + nlen; 5793185029Spjd else if (uio != NULL) { 5794185029Spjd /* 5795185029Spjd * Format of extattr name entry is one byte for 5796185029Spjd * length and the rest for name. 5797185029Spjd */ 5798185029Spjd error = uiomove(&nlen, 1, uio->uio_rw, uio); 5799185029Spjd if (error == 0) { 5800185029Spjd error = uiomove(dp->d_name + plen, nlen, 5801185029Spjd uio->uio_rw, uio); 5802185029Spjd } 5803185029Spjd if (error != 0) 5804185029Spjd break; 5805185029Spjd } 5806185029Spjd } 5807185029Spjd } while (!eof && error == 0); 5808185029Spjd 5809185029Spjd vput(vp); 5810185029Spjd ZFS_EXIT(zfsvfs); 5811185029Spjd 5812185029Spjd return (error); 5813185029Spjd} 5814185029Spjd 5815192800Straszint 5816192800Straszzfs_freebsd_getacl(ap) 5817192800Strasz struct vop_getacl_args /* { 5818192800Strasz struct vnode *vp; 5819192800Strasz acl_type_t type; 5820192800Strasz struct acl *aclp; 5821192800Strasz struct ucred *cred; 5822192800Strasz struct thread *td; 5823192800Strasz } */ *ap; 5824192800Strasz{ 5825192800Strasz int error; 5826192800Strasz vsecattr_t vsecattr; 5827192800Strasz 5828192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 5829197435Strasz return (EINVAL); 5830192800Strasz 5831192800Strasz vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 5832192800Strasz if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL)) 5833192800Strasz return (error); 5834192800Strasz 5835192800Strasz error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt); 5836196303Spjd if (vsecattr.vsa_aclentp != NULL) 5837196303Spjd kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 5838192800Strasz 5839196303Spjd return (error); 5840192800Strasz} 5841192800Strasz 5842192800Straszint 5843192800Straszzfs_freebsd_setacl(ap) 5844192800Strasz struct vop_setacl_args /* { 5845192800Strasz struct vnode *vp; 5846192800Strasz acl_type_t type; 5847192800Strasz struct acl *aclp; 5848192800Strasz struct ucred *cred; 5849192800Strasz struct thread *td; 5850192800Strasz } */ *ap; 5851192800Strasz{ 5852192800Strasz int error; 5853192800Strasz vsecattr_t vsecattr; 5854192800Strasz int aclbsize; /* size of acl list in bytes */ 5855192800Strasz aclent_t *aaclp; 5856192800Strasz 5857192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 5858197435Strasz return (EINVAL); 5859192800Strasz 5860314710Smm if (ap->a_aclp == NULL) 5861314710Smm return (EINVAL); 5862314710Smm 5863192800Strasz if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 5864192800Strasz return (EINVAL); 5865192800Strasz 5866192800Strasz /* 5867196949Strasz * With NFSv4 ACLs, chmod(2) may need to add additional entries, 5868192800Strasz * splitting every entry into two and appending "canonical six" 5869192800Strasz * entries at the end. Don't allow for setting an ACL that would 5870192800Strasz * cause chmod(2) to run out of ACL entries. 5871192800Strasz */ 5872192800Strasz if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 5873192800Strasz return (ENOSPC); 5874192800Strasz 5875208030Strasz error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 5876208030Strasz if (error != 0) 5877208030Strasz return (error); 5878208030Strasz 5879192800Strasz vsecattr.vsa_mask = VSA_ACE; 5880192800Strasz aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t); 5881192800Strasz vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 5882192800Strasz aaclp = vsecattr.vsa_aclentp; 5883192800Strasz vsecattr.vsa_aclentsz = aclbsize; 5884192800Strasz 5885192800Strasz aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 5886192800Strasz error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL); 5887192800Strasz kmem_free(aaclp, aclbsize); 5888192800Strasz 5889192800Strasz return (error); 5890192800Strasz} 5891192800Strasz 5892192800Straszint 5893192800Straszzfs_freebsd_aclcheck(ap) 5894192800Strasz struct vop_aclcheck_args /* { 5895192800Strasz struct vnode *vp; 5896192800Strasz acl_type_t type; 5897192800Strasz struct acl *aclp; 5898192800Strasz struct ucred *cred; 5899192800Strasz struct thread *td; 5900192800Strasz } */ *ap; 5901192800Strasz{ 5902192800Strasz 5903192800Strasz return (EOPNOTSUPP); 5904192800Strasz} 5905192800Strasz 5906299906Savgstatic int 5907299906Savgzfs_vptocnp(struct vop_vptocnp_args *ap) 5908299906Savg{ 5909299906Savg vnode_t *covered_vp; 5910299906Savg vnode_t *vp = ap->a_vp;; 5911299906Savg zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 5912299906Savg znode_t *zp = VTOZ(vp); 5913299906Savg int ltype; 5914299906Savg int error; 5915299906Savg 5916301870Savg ZFS_ENTER(zfsvfs); 5917301870Savg ZFS_VERIFY_ZP(zp); 5918301870Savg 5919299906Savg /* 5920299906Savg * If we are a snapshot mounted under .zfs, run the operation 5921299906Savg * on the covered vnode. 5922299906Savg */ 5923324158Savg if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) { 5924307995Savg char name[MAXNAMLEN + 1]; 5925307995Savg znode_t *dzp; 5926307995Savg size_t len; 5927307995Savg 5928307995Savg error = zfs_znode_parent_and_name(zp, &dzp, name); 5929307995Savg if (error == 0) { 5930307995Savg len = strlen(name); 5931314030Savg if (*ap->a_buflen < len) 5932314030Savg error = SET_ERROR(ENOMEM); 5933314030Savg } 5934314030Savg if (error == 0) { 5935307995Savg *ap->a_buflen -= len; 5936307995Savg bcopy(name, ap->a_buf + *ap->a_buflen, len); 5937307995Savg *ap->a_vpp = ZTOV(dzp); 5938307995Savg } 5939301870Savg ZFS_EXIT(zfsvfs); 5940307995Savg return (error); 5941301870Savg } 5942301870Savg ZFS_EXIT(zfsvfs); 5943299906Savg 5944299906Savg covered_vp = vp->v_mount->mnt_vnodecovered; 5945299906Savg vhold(covered_vp); 5946299906Savg ltype = VOP_ISLOCKED(vp); 5947299906Savg VOP_UNLOCK(vp, 0); 5948315842Savg error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread); 5949299906Savg if (error == 0) { 5950299906Savg error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred, 5951299906Savg ap->a_buf, ap->a_buflen); 5952299906Savg vput(covered_vp); 5953299906Savg } 5954299906Savg vn_lock(vp, ltype | LK_RETRY); 5955299906Savg if ((vp->v_iflag & VI_DOOMED) != 0) 5956299906Savg error = SET_ERROR(ENOENT); 5957299906Savg return (error); 5958299906Savg} 5959299906Savg 5960303970Savg#ifdef DIAGNOSTIC 5961303970Savgstatic int 5962303970Savgzfs_lock(ap) 5963303970Savg struct vop_lock1_args /* { 5964303970Savg struct vnode *a_vp; 5965303970Savg int a_flags; 5966303970Savg char *file; 5967303970Savg int line; 5968303970Savg } */ *ap; 5969303970Savg{ 5970310066Savg vnode_t *vp; 5971303970Savg znode_t *zp; 5972303970Savg int err; 5973303970Savg 5974303970Savg err = vop_stdlock(ap); 5975310066Savg if (err == 0 && (ap->a_flags & LK_NOWAIT) == 0) { 5976310066Savg vp = ap->a_vp; 5977310066Savg zp = vp->v_data; 5978310066Savg if (vp->v_mount != NULL && (vp->v_iflag & VI_DOOMED) == 0 && 5979310066Savg zp != NULL && (zp->z_pflags & ZFS_XATTR) == 0) 5980310066Savg VERIFY(!RRM_LOCK_HELD(&zp->z_zfsvfs->z_teardown_lock)); 5981303970Savg } 5982303970Savg return (err); 5983303970Savg} 5984303970Savg#endif 5985303970Savg 5986168404Spjdstruct vop_vector zfs_vnodeops; 5987168404Spjdstruct vop_vector zfs_fifoops; 5988209962Smmstruct vop_vector zfs_shareops; 5989168404Spjd 5990168404Spjdstruct vop_vector zfs_vnodeops = { 5991185029Spjd .vop_default = &default_vnodeops, 5992185029Spjd .vop_inactive = zfs_freebsd_inactive, 5993185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 5994185029Spjd .vop_access = zfs_freebsd_access, 5995303970Savg .vop_lookup = zfs_cache_lookup, 5996185029Spjd .vop_cachedlookup = zfs_freebsd_lookup, 5997185029Spjd .vop_getattr = zfs_freebsd_getattr, 5998185029Spjd .vop_setattr = zfs_freebsd_setattr, 5999185029Spjd .vop_create = zfs_freebsd_create, 6000185029Spjd .vop_mknod = zfs_freebsd_create, 6001185029Spjd .vop_mkdir = zfs_freebsd_mkdir, 6002185029Spjd .vop_readdir = zfs_freebsd_readdir, 6003185029Spjd .vop_fsync = zfs_freebsd_fsync, 6004185029Spjd .vop_open = zfs_freebsd_open, 6005185029Spjd .vop_close = zfs_freebsd_close, 6006185029Spjd .vop_rmdir = zfs_freebsd_rmdir, 6007185029Spjd .vop_ioctl = zfs_freebsd_ioctl, 6008185029Spjd .vop_link = zfs_freebsd_link, 6009185029Spjd .vop_symlink = zfs_freebsd_symlink, 6010185029Spjd .vop_readlink = zfs_freebsd_readlink, 6011185029Spjd .vop_read = zfs_freebsd_read, 6012185029Spjd .vop_write = zfs_freebsd_write, 6013185029Spjd .vop_remove = zfs_freebsd_remove, 6014185029Spjd .vop_rename = zfs_freebsd_rename, 6015185029Spjd .vop_pathconf = zfs_freebsd_pathconf, 6016243518Savg .vop_bmap = zfs_freebsd_bmap, 6017185029Spjd .vop_fid = zfs_freebsd_fid, 6018185029Spjd .vop_getextattr = zfs_getextattr, 6019185029Spjd .vop_deleteextattr = zfs_deleteextattr, 6020185029Spjd .vop_setextattr = zfs_setextattr, 6021185029Spjd .vop_listextattr = zfs_listextattr, 6022192800Strasz .vop_getacl = zfs_freebsd_getacl, 6023192800Strasz .vop_setacl = zfs_freebsd_setacl, 6024192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 6025213937Savg .vop_getpages = zfs_freebsd_getpages, 6026258746Savg .vop_putpages = zfs_freebsd_putpages, 6027299906Savg .vop_vptocnp = zfs_vptocnp, 6028303970Savg#ifdef DIAGNOSTIC 6029303970Savg .vop_lock1 = zfs_lock, 6030303970Savg#endif 6031168404Spjd}; 6032168404Spjd 6033169170Spjdstruct vop_vector zfs_fifoops = { 6034185029Spjd .vop_default = &fifo_specops, 6035200162Skib .vop_fsync = zfs_freebsd_fsync, 6036185029Spjd .vop_access = zfs_freebsd_access, 6037185029Spjd .vop_getattr = zfs_freebsd_getattr, 6038185029Spjd .vop_inactive = zfs_freebsd_inactive, 6039185029Spjd .vop_read = VOP_PANIC, 6040185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 6041185029Spjd .vop_setattr = zfs_freebsd_setattr, 6042185029Spjd .vop_write = VOP_PANIC, 6043328298Sjhb .vop_pathconf = zfs_freebsd_pathconf, 6044185029Spjd .vop_fid = zfs_freebsd_fid, 6045192800Strasz .vop_getacl = zfs_freebsd_getacl, 6046192800Strasz .vop_setacl = zfs_freebsd_setacl, 6047192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 6048168404Spjd}; 6049209962Smm 6050209962Smm/* 6051209962Smm * special share hidden files vnode operations template 6052209962Smm */ 6053209962Smmstruct vop_vector zfs_shareops = { 6054209962Smm .vop_default = &default_vnodeops, 6055209962Smm .vop_access = zfs_freebsd_access, 6056209962Smm .vop_inactive = zfs_freebsd_inactive, 6057209962Smm .vop_reclaim = zfs_freebsd_reclaim, 6058209962Smm .vop_fid = zfs_freebsd_fid, 6059209962Smm .vop_pathconf = zfs_freebsd_pathconf, 6060209962Smm}; 6061