zfs_vnops.c revision 330062
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21321545Smav 22168404Spjd/* 23212694Smm * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24289562Smav * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 25296519Smav * Copyright (c) 2014 Integros [integros.com] 26321545Smav * Copyright 2017 Nexenta Systems, Inc. 27168404Spjd */ 28168404Spjd 29169195Spjd/* Portions Copyright 2007 Jeremy Teo */ 30219089Spjd/* Portions Copyright 2010 Robert Milkowski */ 31169195Spjd 32168404Spjd#include <sys/types.h> 33168404Spjd#include <sys/param.h> 34168404Spjd#include <sys/time.h> 35168404Spjd#include <sys/systm.h> 36168404Spjd#include <sys/sysmacros.h> 37168404Spjd#include <sys/resource.h> 38168404Spjd#include <sys/vfs.h> 39248084Sattilio#include <sys/vm.h> 40168404Spjd#include <sys/vnode.h> 41168404Spjd#include <sys/file.h> 42168404Spjd#include <sys/stat.h> 43168404Spjd#include <sys/kmem.h> 44168404Spjd#include <sys/taskq.h> 45168404Spjd#include <sys/uio.h> 46168404Spjd#include <sys/atomic.h> 47168404Spjd#include <sys/namei.h> 48168404Spjd#include <sys/mman.h> 49168404Spjd#include <sys/cmn_err.h> 50168404Spjd#include <sys/errno.h> 51168404Spjd#include <sys/unistd.h> 52168404Spjd#include <sys/zfs_dir.h> 53168404Spjd#include <sys/zfs_ioctl.h> 54168404Spjd#include <sys/fs/zfs.h> 55168404Spjd#include <sys/dmu.h> 56219089Spjd#include <sys/dmu_objset.h> 57168404Spjd#include <sys/spa.h> 58168404Spjd#include <sys/txg.h> 59168404Spjd#include <sys/dbuf.h> 60168404Spjd#include <sys/zap.h> 61219089Spjd#include <sys/sa.h> 62168404Spjd#include <sys/dirent.h> 63168962Spjd#include <sys/policy.h> 64168962Spjd#include <sys/sunddi.h> 65168404Spjd#include <sys/filio.h> 66209962Smm#include <sys/sid.h> 67168404Spjd#include <sys/zfs_ctldir.h> 68185029Spjd#include <sys/zfs_fuid.h> 69219089Spjd#include <sys/zfs_sa.h> 70168404Spjd#include <sys/zfs_rlock.h> 71185029Spjd#include <sys/extdirent.h> 72185029Spjd#include <sys/kidmap.h> 73168404Spjd#include <sys/bio.h> 74168404Spjd#include <sys/buf.h> 75168404Spjd#include <sys/sched.h> 76192800Strasz#include <sys/acl.h> 77239077Smarius#include <vm/vm_param.h> 78325132Savg#include <sys/zil.h> 79168404Spjd 80168404Spjd/* 81168404Spjd * Programming rules. 82168404Spjd * 83168404Spjd * Each vnode op performs some logical unit of work. To do this, the ZPL must 84168404Spjd * properly lock its in-core state, create a DMU transaction, do the work, 85168404Spjd * record this work in the intent log (ZIL), commit the DMU transaction, 86185029Spjd * and wait for the intent log to commit if it is a synchronous operation. 87185029Spjd * Moreover, the vnode ops must work in both normal and log replay context. 88168404Spjd * The ordering of events is important to avoid deadlocks and references 89168404Spjd * to freed memory. The example below illustrates the following Big Rules: 90168404Spjd * 91251631Sdelphij * (1) A check must be made in each zfs thread for a mounted file system. 92168404Spjd * This is done avoiding races using ZFS_ENTER(zfsvfs). 93251631Sdelphij * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 94251631Sdelphij * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 95251631Sdelphij * can return EIO from the calling function. 96168404Spjd * 97168404Spjd * (2) VN_RELE() should always be the last thing except for zil_commit() 98168404Spjd * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 99168404Spjd * First, if it's the last reference, the vnode/znode 100168404Spjd * can be freed, so the zp may point to freed memory. Second, the last 101168404Spjd * reference will call zfs_zinactive(), which may induce a lot of work -- 102168404Spjd * pushing cached pages (which acquires range locks) and syncing out 103168404Spjd * cached atime changes. Third, zfs_zinactive() may require a new tx, 104168404Spjd * which could deadlock the system if you were already holding one. 105191900Skmacy * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 106168404Spjd * 107168404Spjd * (3) All range locks must be grabbed before calling dmu_tx_assign(), 108168404Spjd * as they can span dmu_tx_assign() calls. 109168404Spjd * 110258720Savg * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 111258720Savg * dmu_tx_assign(). This is critical because we don't want to block 112258720Savg * while holding locks. 113168404Spjd * 114258720Savg * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 115258720Savg * reduces lock contention and CPU usage when we must wait (note that if 116258720Savg * throughput is constrained by the storage, nearly every transaction 117258720Savg * must wait). 118258720Savg * 119258720Savg * Note, in particular, that if a lock is sometimes acquired before 120258720Savg * the tx assigns, and sometimes after (e.g. z_lock), then failing 121258720Savg * to use a non-blocking assign can deadlock the system. The scenario: 122258720Savg * 123168404Spjd * Thread A has grabbed a lock before calling dmu_tx_assign(). 124168404Spjd * Thread B is in an already-assigned tx, and blocks for this lock. 125168404Spjd * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 126168404Spjd * forever, because the previous txg can't quiesce until B's tx commits. 127168404Spjd * 128168404Spjd * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 129258632Savg * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 130258632Savg * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT, 131258632Savg * to indicate that this operation has already called dmu_tx_wait(). 132258632Savg * This will ensure that we don't retry forever, waiting a short bit 133258632Savg * each time. 134168404Spjd * 135168404Spjd * (5) If the operation succeeded, generate the intent log entry for it 136168404Spjd * before dropping locks. This ensures that the ordering of events 137168404Spjd * in the intent log matches the order in which they actually occurred. 138251631Sdelphij * During ZIL replay the zfs_log_* functions will update the sequence 139209962Smm * number to indicate the zil transaction has replayed. 140168404Spjd * 141168404Spjd * (6) At the end of each vnode op, the DMU tx must always commit, 142168404Spjd * regardless of whether there were any errors. 143168404Spjd * 144219089Spjd * (7) After dropping all locks, invoke zil_commit(zilog, foid) 145168404Spjd * to ensure that synchronous semantics are provided when necessary. 146168404Spjd * 147168404Spjd * In general, this is how things should be ordered in each vnode op: 148168404Spjd * 149168404Spjd * ZFS_ENTER(zfsvfs); // exit if unmounted 150168404Spjd * top: 151303970Savg * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD()) 152168404Spjd * rw_enter(...); // grab any other locks you need 153168404Spjd * tx = dmu_tx_create(...); // get DMU tx 154168404Spjd * dmu_tx_hold_*(); // hold each object you might modify 155258632Savg * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 156168404Spjd * if (error) { 157168404Spjd * rw_exit(...); // drop locks 158168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 159168404Spjd * VN_RELE(...); // release held vnodes 160209962Smm * if (error == ERESTART) { 161258632Savg * waited = B_TRUE; 162168404Spjd * dmu_tx_wait(tx); 163168404Spjd * dmu_tx_abort(tx); 164168404Spjd * goto top; 165168404Spjd * } 166168404Spjd * dmu_tx_abort(tx); // abort DMU tx 167168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 168168404Spjd * return (error); // really out of space 169168404Spjd * } 170168404Spjd * error = do_real_work(); // do whatever this VOP does 171168404Spjd * if (error == 0) 172168404Spjd * zfs_log_*(...); // on success, make ZIL entry 173168404Spjd * dmu_tx_commit(tx); // commit DMU tx -- error or not 174168404Spjd * rw_exit(...); // drop locks 175168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 176168404Spjd * VN_RELE(...); // release held vnodes 177219089Spjd * zil_commit(zilog, foid); // synchronous when necessary 178168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 179168404Spjd * return (error); // done, report error 180168404Spjd */ 181185029Spjd 182168404Spjd/* ARGSUSED */ 183168404Spjdstatic int 184185029Spjdzfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 185168404Spjd{ 186168962Spjd znode_t *zp = VTOZ(*vpp); 187209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 188168404Spjd 189209962Smm ZFS_ENTER(zfsvfs); 190209962Smm ZFS_VERIFY_ZP(zp); 191209962Smm 192219089Spjd if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 193185029Spjd ((flag & FAPPEND) == 0)) { 194209962Smm ZFS_EXIT(zfsvfs); 195249195Smm return (SET_ERROR(EPERM)); 196185029Spjd } 197185029Spjd 198185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 199185029Spjd ZTOV(zp)->v_type == VREG && 200219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 201209962Smm if (fs_vscan(*vpp, cr, 0) != 0) { 202209962Smm ZFS_EXIT(zfsvfs); 203249195Smm return (SET_ERROR(EACCES)); 204209962Smm } 205209962Smm } 206185029Spjd 207168404Spjd /* Keep a count of the synchronous opens in the znode */ 208168962Spjd if (flag & (FSYNC | FDSYNC)) 209168404Spjd atomic_inc_32(&zp->z_sync_cnt); 210185029Spjd 211209962Smm ZFS_EXIT(zfsvfs); 212168404Spjd return (0); 213168404Spjd} 214168404Spjd 215168404Spjd/* ARGSUSED */ 216168404Spjdstatic int 217185029Spjdzfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 218185029Spjd caller_context_t *ct) 219168404Spjd{ 220168962Spjd znode_t *zp = VTOZ(vp); 221209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 222168404Spjd 223210470Smm /* 224210470Smm * Clean up any locks held by this process on the vp. 225210470Smm */ 226210470Smm cleanlocks(vp, ddi_get_pid(), 0); 227210470Smm cleanshares(vp, ddi_get_pid()); 228210470Smm 229209962Smm ZFS_ENTER(zfsvfs); 230209962Smm ZFS_VERIFY_ZP(zp); 231209962Smm 232168404Spjd /* Decrement the synchronous opens in the znode */ 233185029Spjd if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 234168404Spjd atomic_dec_32(&zp->z_sync_cnt); 235168404Spjd 236185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 237185029Spjd ZTOV(zp)->v_type == VREG && 238219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 239185029Spjd VERIFY(fs_vscan(vp, cr, 1) == 0); 240185029Spjd 241209962Smm ZFS_EXIT(zfsvfs); 242168404Spjd return (0); 243168404Spjd} 244168404Spjd 245168404Spjd/* 246168404Spjd * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 247168404Spjd * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 248168404Spjd */ 249168404Spjdstatic int 250168978Spjdzfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 251168404Spjd{ 252168404Spjd znode_t *zp = VTOZ(vp); 253168404Spjd uint64_t noff = (uint64_t)*off; /* new offset */ 254168404Spjd uint64_t file_sz; 255168404Spjd int error; 256168404Spjd boolean_t hole; 257168404Spjd 258219089Spjd file_sz = zp->z_size; 259168404Spjd if (noff >= file_sz) { 260249195Smm return (SET_ERROR(ENXIO)); 261168404Spjd } 262168404Spjd 263168962Spjd if (cmd == _FIO_SEEK_HOLE) 264168404Spjd hole = B_TRUE; 265168404Spjd else 266168404Spjd hole = B_FALSE; 267168404Spjd 268168404Spjd error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 269168404Spjd 270271536Sdelphij if (error == ESRCH) 271249195Smm return (SET_ERROR(ENXIO)); 272271536Sdelphij 273271536Sdelphij /* 274271536Sdelphij * We could find a hole that begins after the logical end-of-file, 275271536Sdelphij * because dmu_offset_next() only works on whole blocks. If the 276271536Sdelphij * EOF falls mid-block, then indicate that the "virtual hole" 277271536Sdelphij * at the end of the file begins at the logical EOF, rather than 278271536Sdelphij * at the end of the last block. 279271536Sdelphij */ 280271536Sdelphij if (noff > file_sz) { 281271536Sdelphij ASSERT(hole); 282271536Sdelphij noff = file_sz; 283168404Spjd } 284168404Spjd 285168404Spjd if (noff < *off) 286168404Spjd return (error); 287168404Spjd *off = noff; 288168404Spjd return (error); 289168404Spjd} 290168404Spjd 291168404Spjd/* ARGSUSED */ 292168404Spjdstatic int 293168978Spjdzfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 294185029Spjd int *rvalp, caller_context_t *ct) 295168404Spjd{ 296168962Spjd offset_t off; 297287103Savg offset_t ndata; 298287103Savg dmu_object_info_t doi; 299168962Spjd int error; 300168962Spjd zfsvfs_t *zfsvfs; 301185029Spjd znode_t *zp; 302168404Spjd 303168404Spjd switch (com) { 304185029Spjd case _FIOFFS: 305287103Savg { 306168962Spjd return (0); 307168404Spjd 308168962Spjd /* 309168962Spjd * The following two ioctls are used by bfu. Faking out, 310168962Spjd * necessary to avoid bfu errors. 311168962Spjd */ 312287103Savg } 313185029Spjd case _FIOGDIO: 314185029Spjd case _FIOSDIO: 315287103Savg { 316168962Spjd return (0); 317287103Savg } 318168962Spjd 319185029Spjd case _FIO_SEEK_DATA: 320185029Spjd case _FIO_SEEK_HOLE: 321287103Savg { 322277300Ssmh#ifdef illumos 323168962Spjd if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 324249195Smm return (SET_ERROR(EFAULT)); 325233918Savg#else 326233918Savg off = *(offset_t *)data; 327233918Savg#endif 328185029Spjd zp = VTOZ(vp); 329185029Spjd zfsvfs = zp->z_zfsvfs; 330168404Spjd ZFS_ENTER(zfsvfs); 331185029Spjd ZFS_VERIFY_ZP(zp); 332168404Spjd 333168404Spjd /* offset parameter is in/out */ 334168404Spjd error = zfs_holey(vp, com, &off); 335168404Spjd ZFS_EXIT(zfsvfs); 336168404Spjd if (error) 337168404Spjd return (error); 338277300Ssmh#ifdef illumos 339168962Spjd if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 340249195Smm return (SET_ERROR(EFAULT)); 341233918Savg#else 342233918Savg *(offset_t *)data = off; 343233918Savg#endif 344168404Spjd return (0); 345168404Spjd } 346287103Savg#ifdef illumos 347287103Savg case _FIO_COUNT_FILLED: 348287103Savg { 349287103Savg /* 350287103Savg * _FIO_COUNT_FILLED adds a new ioctl command which 351287103Savg * exposes the number of filled blocks in a 352287103Savg * ZFS object. 353287103Savg */ 354287103Savg zp = VTOZ(vp); 355287103Savg zfsvfs = zp->z_zfsvfs; 356287103Savg ZFS_ENTER(zfsvfs); 357287103Savg ZFS_VERIFY_ZP(zp); 358287103Savg 359287103Savg /* 360287103Savg * Wait for all dirty blocks for this object 361287103Savg * to get synced out to disk, and the DMU info 362287103Savg * updated. 363287103Savg */ 364287103Savg error = dmu_object_wait_synced(zfsvfs->z_os, zp->z_id); 365287103Savg if (error) { 366287103Savg ZFS_EXIT(zfsvfs); 367287103Savg return (error); 368287103Savg } 369287103Savg 370287103Savg /* 371287103Savg * Retrieve fill count from DMU object. 372287103Savg */ 373287103Savg error = dmu_object_info(zfsvfs->z_os, zp->z_id, &doi); 374287103Savg if (error) { 375287103Savg ZFS_EXIT(zfsvfs); 376287103Savg return (error); 377287103Savg } 378287103Savg 379287103Savg ndata = doi.doi_fill_count; 380287103Savg 381287103Savg ZFS_EXIT(zfsvfs); 382287103Savg if (ddi_copyout(&ndata, (void *)data, sizeof (ndata), flag)) 383287103Savg return (SET_ERROR(EFAULT)); 384287103Savg return (0); 385287103Savg } 386287103Savg#endif 387287103Savg } 388249195Smm return (SET_ERROR(ENOTTY)); 389168404Spjd} 390168404Spjd 391209962Smmstatic vm_page_t 392253953Sattiliopage_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 393209962Smm{ 394209962Smm vm_object_t obj; 395209962Smm vm_page_t pp; 396258353Savg int64_t end; 397209962Smm 398258353Savg /* 399258353Savg * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE 400258353Savg * aligned boundaries, if the range is not aligned. As a result a 401258353Savg * DEV_BSIZE subrange with partially dirty data may get marked as clean. 402258353Savg * It may happen that all DEV_BSIZE subranges are marked clean and thus 403258353Savg * the whole page would be considred clean despite have some dirty data. 404258353Savg * For this reason we should shrink the range to DEV_BSIZE aligned 405258353Savg * boundaries before calling vm_page_clear_dirty. 406258353Savg */ 407258353Savg end = rounddown2(off + nbytes, DEV_BSIZE); 408258353Savg off = roundup2(off, DEV_BSIZE); 409258353Savg nbytes = end - off; 410258353Savg 411209962Smm obj = vp->v_object; 412248084Sattilio zfs_vmobject_assert_wlocked(obj); 413209962Smm 414209962Smm for (;;) { 415209962Smm if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 416246293Savg pp->valid) { 417254138Sattilio if (vm_page_xbusied(pp)) { 418212652Savg /* 419212652Savg * Reference the page before unlocking and 420212652Savg * sleeping so that the page daemon is less 421212652Savg * likely to reclaim it. 422212652Savg */ 423225418Skib vm_page_reference(pp); 424254138Sattilio vm_page_lock(pp); 425254138Sattilio zfs_vmobject_wunlock(obj); 426307671Skib vm_page_busy_sleep(pp, "zfsmwb", true); 427254138Sattilio zfs_vmobject_wlock(obj); 428209962Smm continue; 429212652Savg } 430254138Sattilio vm_page_sbusy(pp); 431319091Savg } else if (pp != NULL) { 432319091Savg ASSERT(!pp->valid); 433252337Sgavin pp = NULL; 434209962Smm } 435246293Savg 436246293Savg if (pp != NULL) { 437246293Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 438253953Sattilio vm_object_pip_add(obj, 1); 439246293Savg pmap_remove_write(pp); 440258353Savg if (nbytes != 0) 441258353Savg vm_page_clear_dirty(pp, off, nbytes); 442246293Savg } 443209962Smm break; 444209962Smm } 445209962Smm return (pp); 446209962Smm} 447209962Smm 448209962Smmstatic void 449253953Sattiliopage_unbusy(vm_page_t pp) 450209962Smm{ 451209962Smm 452254138Sattilio vm_page_sunbusy(pp); 453253953Sattilio vm_object_pip_subtract(pp->object, 1); 454209962Smm} 455209962Smm 456253953Sattiliostatic vm_page_t 457253953Sattiliopage_hold(vnode_t *vp, int64_t start) 458253953Sattilio{ 459253953Sattilio vm_object_t obj; 460253953Sattilio vm_page_t pp; 461253953Sattilio 462253953Sattilio obj = vp->v_object; 463253953Sattilio zfs_vmobject_assert_wlocked(obj); 464253953Sattilio 465253953Sattilio for (;;) { 466253953Sattilio if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 467253953Sattilio pp->valid) { 468254138Sattilio if (vm_page_xbusied(pp)) { 469253953Sattilio /* 470253953Sattilio * Reference the page before unlocking and 471253953Sattilio * sleeping so that the page daemon is less 472253953Sattilio * likely to reclaim it. 473253953Sattilio */ 474253953Sattilio vm_page_reference(pp); 475254138Sattilio vm_page_lock(pp); 476254138Sattilio zfs_vmobject_wunlock(obj); 477307671Skib vm_page_busy_sleep(pp, "zfsmwb", true); 478254138Sattilio zfs_vmobject_wlock(obj); 479253953Sattilio continue; 480253953Sattilio } 481253953Sattilio 482253953Sattilio ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 483253953Sattilio vm_page_lock(pp); 484253953Sattilio vm_page_hold(pp); 485253953Sattilio vm_page_unlock(pp); 486253953Sattilio 487253953Sattilio } else 488253953Sattilio pp = NULL; 489253953Sattilio break; 490253953Sattilio } 491253953Sattilio return (pp); 492253953Sattilio} 493253953Sattilio 494253953Sattiliostatic void 495253953Sattiliopage_unhold(vm_page_t pp) 496253953Sattilio{ 497253953Sattilio 498253953Sattilio vm_page_lock(pp); 499253953Sattilio vm_page_unhold(pp); 500253953Sattilio vm_page_unlock(pp); 501253953Sattilio} 502253953Sattilio 503168404Spjd/* 504168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 505168404Spjd * between the DMU cache and the memory mapped pages. What this means: 506168404Spjd * 507168404Spjd * On Write: If we find a memory mapped page, we write to *both* 508168404Spjd * the page and the dmu buffer. 509168404Spjd */ 510209962Smmstatic void 511209962Smmupdate_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 512209962Smm int segflg, dmu_tx_t *tx) 513168404Spjd{ 514168404Spjd vm_object_t obj; 515168404Spjd struct sf_buf *sf; 516246293Savg caddr_t va; 517212655Savg int off; 518168404Spjd 519258746Savg ASSERT(segflg != UIO_NOCOPY); 520168404Spjd ASSERT(vp->v_mount != NULL); 521168404Spjd obj = vp->v_object; 522168404Spjd ASSERT(obj != NULL); 523168404Spjd 524168404Spjd off = start & PAGEOFFSET; 525248084Sattilio zfs_vmobject_wlock(obj); 526168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 527209962Smm vm_page_t pp; 528246293Savg int nbytes = imin(PAGESIZE - off, len); 529168404Spjd 530258746Savg if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 531248084Sattilio zfs_vmobject_wunlock(obj); 532168404Spjd 533246293Savg va = zfs_map_page(pp, &sf); 534246293Savg (void) dmu_read(os, oid, start+off, nbytes, 535246293Savg va+off, DMU_READ_PREFETCH);; 536209962Smm zfs_unmap_page(sf); 537246293Savg 538248084Sattilio zfs_vmobject_wlock(obj); 539253953Sattilio page_unbusy(pp); 540168404Spjd } 541209962Smm len -= nbytes; 542168404Spjd off = 0; 543168404Spjd } 544258746Savg vm_object_pip_wakeupn(obj, 0); 545248084Sattilio zfs_vmobject_wunlock(obj); 546168404Spjd} 547168404Spjd 548168404Spjd/* 549219089Spjd * Read with UIO_NOCOPY flag means that sendfile(2) requests 550219089Spjd * ZFS to populate a range of page cache pages with data. 551219089Spjd * 552219089Spjd * NOTE: this function could be optimized to pre-allocate 553254138Sattilio * all pages in advance, drain exclusive busy on all of them, 554219089Spjd * map them into contiguous KVA region and populate them 555219089Spjd * in one single dmu_read() call. 556219089Spjd */ 557219089Spjdstatic int 558219089Spjdmappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) 559219089Spjd{ 560219089Spjd znode_t *zp = VTOZ(vp); 561219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 562219089Spjd struct sf_buf *sf; 563219089Spjd vm_object_t obj; 564219089Spjd vm_page_t pp; 565219089Spjd int64_t start; 566219089Spjd caddr_t va; 567219089Spjd int len = nbytes; 568219089Spjd int off; 569219089Spjd int error = 0; 570219089Spjd 571219089Spjd ASSERT(uio->uio_segflg == UIO_NOCOPY); 572219089Spjd ASSERT(vp->v_mount != NULL); 573219089Spjd obj = vp->v_object; 574219089Spjd ASSERT(obj != NULL); 575219089Spjd ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); 576219089Spjd 577248084Sattilio zfs_vmobject_wlock(obj); 578219089Spjd for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { 579219089Spjd int bytes = MIN(PAGESIZE, len); 580219089Spjd 581254138Sattilio pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY | 582254649Skib VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 583219089Spjd if (pp->valid == 0) { 584248084Sattilio zfs_vmobject_wunlock(obj); 585219089Spjd va = zfs_map_page(pp, &sf); 586219089Spjd error = dmu_read(os, zp->z_id, start, bytes, va, 587219089Spjd DMU_READ_PREFETCH); 588219089Spjd if (bytes != PAGESIZE && error == 0) 589219089Spjd bzero(va + bytes, PAGESIZE - bytes); 590219089Spjd zfs_unmap_page(sf); 591248084Sattilio zfs_vmobject_wlock(obj); 592254138Sattilio vm_page_sunbusy(pp); 593219089Spjd vm_page_lock(pp); 594219089Spjd if (error) { 595253073Savg if (pp->wire_count == 0 && pp->valid == 0 && 596254138Sattilio !vm_page_busied(pp)) 597253073Savg vm_page_free(pp); 598219089Spjd } else { 599219089Spjd pp->valid = VM_PAGE_BITS_ALL; 600219089Spjd vm_page_activate(pp); 601219089Spjd } 602219089Spjd vm_page_unlock(pp); 603258739Savg } else { 604258739Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 605254138Sattilio vm_page_sunbusy(pp); 606258739Savg } 607219089Spjd if (error) 608219089Spjd break; 609219089Spjd uio->uio_resid -= bytes; 610219089Spjd uio->uio_offset += bytes; 611219089Spjd len -= bytes; 612219089Spjd } 613248084Sattilio zfs_vmobject_wunlock(obj); 614219089Spjd return (error); 615219089Spjd} 616219089Spjd 617219089Spjd/* 618168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 619168404Spjd * between the DMU cache and the memory mapped pages. What this means: 620168404Spjd * 621168404Spjd * On Read: We "read" preferentially from memory mapped pages, 622168404Spjd * else we default from the dmu buffer. 623168404Spjd * 624168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 625251631Sdelphij * the file is memory mapped. 626168404Spjd */ 627168404Spjdstatic int 628168404Spjdmappedread(vnode_t *vp, int nbytes, uio_t *uio) 629168404Spjd{ 630168404Spjd znode_t *zp = VTOZ(vp); 631168404Spjd vm_object_t obj; 632212655Savg int64_t start; 633168926Spjd caddr_t va; 634168404Spjd int len = nbytes; 635212655Savg int off; 636168404Spjd int error = 0; 637168404Spjd 638168404Spjd ASSERT(vp->v_mount != NULL); 639168404Spjd obj = vp->v_object; 640168404Spjd ASSERT(obj != NULL); 641168404Spjd 642168404Spjd start = uio->uio_loffset; 643168404Spjd off = start & PAGEOFFSET; 644248084Sattilio zfs_vmobject_wlock(obj); 645168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 646219089Spjd vm_page_t pp; 647219089Spjd uint64_t bytes = MIN(PAGESIZE - off, len); 648168404Spjd 649253953Sattilio if (pp = page_hold(vp, start)) { 650219089Spjd struct sf_buf *sf; 651219089Spjd caddr_t va; 652212652Savg 653248084Sattilio zfs_vmobject_wunlock(obj); 654219089Spjd va = zfs_map_page(pp, &sf); 655298105Savg#ifdef illumos 656219089Spjd error = uiomove(va + off, bytes, UIO_READ, uio); 657298105Savg#else 658298105Savg error = vn_io_fault_uiomove(va + off, bytes, uio); 659298105Savg#endif 660219089Spjd zfs_unmap_page(sf); 661248084Sattilio zfs_vmobject_wlock(obj); 662253953Sattilio page_unhold(pp); 663219089Spjd } else { 664248084Sattilio zfs_vmobject_wunlock(obj); 665272809Sdelphij error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 666272809Sdelphij uio, bytes); 667248084Sattilio zfs_vmobject_wlock(obj); 668168404Spjd } 669168404Spjd len -= bytes; 670168404Spjd off = 0; 671168404Spjd if (error) 672168404Spjd break; 673168404Spjd } 674248084Sattilio zfs_vmobject_wunlock(obj); 675168404Spjd return (error); 676168404Spjd} 677168404Spjd 678168404Spjdoffset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 679168404Spjd 680168404Spjd/* 681168404Spjd * Read bytes from specified file into supplied buffer. 682168404Spjd * 683168404Spjd * IN: vp - vnode of file to be read from. 684168404Spjd * uio - structure supplying read location, range info, 685168404Spjd * and return buffer. 686168404Spjd * ioflag - SYNC flags; used to provide FRSYNC semantics. 687168404Spjd * cr - credentials of caller. 688185029Spjd * ct - caller context 689168404Spjd * 690168404Spjd * OUT: uio - updated offset and range, buffer filled. 691168404Spjd * 692251631Sdelphij * RETURN: 0 on success, error code on failure. 693168404Spjd * 694168404Spjd * Side Effects: 695168404Spjd * vp - atime updated if byte count > 0 696168404Spjd */ 697168404Spjd/* ARGSUSED */ 698168404Spjdstatic int 699168962Spjdzfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 700168404Spjd{ 701168404Spjd znode_t *zp = VTOZ(vp); 702168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 703168404Spjd ssize_t n, nbytes; 704247187Smm int error = 0; 705168404Spjd rl_t *rl; 706219089Spjd xuio_t *xuio = NULL; 707168404Spjd 708168404Spjd ZFS_ENTER(zfsvfs); 709185029Spjd ZFS_VERIFY_ZP(zp); 710168404Spjd 711219089Spjd if (zp->z_pflags & ZFS_AV_QUARANTINED) { 712185029Spjd ZFS_EXIT(zfsvfs); 713249195Smm return (SET_ERROR(EACCES)); 714185029Spjd } 715185029Spjd 716168404Spjd /* 717168404Spjd * Validate file offset 718168404Spjd */ 719168404Spjd if (uio->uio_loffset < (offset_t)0) { 720168404Spjd ZFS_EXIT(zfsvfs); 721249195Smm return (SET_ERROR(EINVAL)); 722168404Spjd } 723168404Spjd 724168404Spjd /* 725168404Spjd * Fasttrack empty reads 726168404Spjd */ 727168404Spjd if (uio->uio_resid == 0) { 728168404Spjd ZFS_EXIT(zfsvfs); 729168404Spjd return (0); 730168404Spjd } 731168404Spjd 732168404Spjd /* 733168962Spjd * Check for mandatory locks 734168962Spjd */ 735219089Spjd if (MANDMODE(zp->z_mode)) { 736168962Spjd if (error = chklock(vp, FREAD, 737168962Spjd uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 738168962Spjd ZFS_EXIT(zfsvfs); 739168962Spjd return (error); 740168962Spjd } 741168962Spjd } 742168962Spjd 743168962Spjd /* 744168404Spjd * If we're in FRSYNC mode, sync out this znode before reading it. 745168404Spjd */ 746224605Smm if (zfsvfs->z_log && 747224605Smm (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) 748219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 749168404Spjd 750168404Spjd /* 751168404Spjd * Lock the range against changes. 752168404Spjd */ 753168404Spjd rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 754168404Spjd 755168404Spjd /* 756168404Spjd * If we are reading past end-of-file we can skip 757168404Spjd * to the end; but we might still need to set atime. 758168404Spjd */ 759219089Spjd if (uio->uio_loffset >= zp->z_size) { 760168404Spjd error = 0; 761168404Spjd goto out; 762168404Spjd } 763168404Spjd 764219089Spjd ASSERT(uio->uio_loffset < zp->z_size); 765219089Spjd n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 766168404Spjd 767277300Ssmh#ifdef illumos 768219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 769219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 770219089Spjd int nblk; 771219089Spjd int blksz = zp->z_blksz; 772219089Spjd uint64_t offset = uio->uio_loffset; 773219089Spjd 774219089Spjd xuio = (xuio_t *)uio; 775219089Spjd if ((ISP2(blksz))) { 776219089Spjd nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 777219089Spjd blksz)) / blksz; 778219089Spjd } else { 779219089Spjd ASSERT(offset + n <= blksz); 780219089Spjd nblk = 1; 781219089Spjd } 782219089Spjd (void) dmu_xuio_init(xuio, nblk); 783219089Spjd 784219089Spjd if (vn_has_cached_data(vp)) { 785219089Spjd /* 786219089Spjd * For simplicity, we always allocate a full buffer 787219089Spjd * even if we only expect to read a portion of a block. 788219089Spjd */ 789219089Spjd while (--nblk >= 0) { 790219089Spjd (void) dmu_xuio_add(xuio, 791219089Spjd dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 792219089Spjd blksz), 0, blksz); 793219089Spjd } 794219089Spjd } 795219089Spjd } 796277300Ssmh#endif /* illumos */ 797219089Spjd 798168404Spjd while (n > 0) { 799168404Spjd nbytes = MIN(n, zfs_read_chunk_size - 800168404Spjd P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 801168404Spjd 802219089Spjd#ifdef __FreeBSD__ 803219089Spjd if (uio->uio_segflg == UIO_NOCOPY) 804219089Spjd error = mappedread_sf(vp, nbytes, uio); 805219089Spjd else 806219089Spjd#endif /* __FreeBSD__ */ 807272809Sdelphij if (vn_has_cached_data(vp)) { 808168404Spjd error = mappedread(vp, nbytes, uio); 809272809Sdelphij } else { 810272809Sdelphij error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 811272809Sdelphij uio, nbytes); 812272809Sdelphij } 813185029Spjd if (error) { 814185029Spjd /* convert checksum errors into IO errors */ 815185029Spjd if (error == ECKSUM) 816249195Smm error = SET_ERROR(EIO); 817168404Spjd break; 818185029Spjd } 819168962Spjd 820168404Spjd n -= nbytes; 821168404Spjd } 822168404Spjdout: 823168404Spjd zfs_range_unlock(rl); 824168404Spjd 825168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 826168404Spjd ZFS_EXIT(zfsvfs); 827168404Spjd return (error); 828168404Spjd} 829168404Spjd 830168404Spjd/* 831168404Spjd * Write the bytes to a file. 832168404Spjd * 833168404Spjd * IN: vp - vnode of file to be written to. 834168404Spjd * uio - structure supplying write location, range info, 835168404Spjd * and data buffer. 836251631Sdelphij * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 837251631Sdelphij * set if in append mode. 838168404Spjd * cr - credentials of caller. 839185029Spjd * ct - caller context (NFS/CIFS fem monitor only) 840168404Spjd * 841168404Spjd * OUT: uio - updated offset and range. 842168404Spjd * 843251631Sdelphij * RETURN: 0 on success, error code on failure. 844168404Spjd * 845168404Spjd * Timestamps: 846168404Spjd * vp - ctime|mtime updated if byte count > 0 847168404Spjd */ 848219089Spjd 849168404Spjd/* ARGSUSED */ 850168404Spjdstatic int 851168962Spjdzfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 852168404Spjd{ 853168404Spjd znode_t *zp = VTOZ(vp); 854168962Spjd rlim64_t limit = MAXOFFSET_T; 855168404Spjd ssize_t start_resid = uio->uio_resid; 856168404Spjd ssize_t tx_bytes; 857168404Spjd uint64_t end_size; 858168404Spjd dmu_tx_t *tx; 859168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 860185029Spjd zilog_t *zilog; 861168404Spjd offset_t woff; 862168404Spjd ssize_t n, nbytes; 863168404Spjd rl_t *rl; 864168404Spjd int max_blksz = zfsvfs->z_max_blksz; 865247187Smm int error = 0; 866209962Smm arc_buf_t *abuf; 867247187Smm iovec_t *aiov = NULL; 868219089Spjd xuio_t *xuio = NULL; 869219089Spjd int i_iov = 0; 870219089Spjd int iovcnt = uio->uio_iovcnt; 871219089Spjd iovec_t *iovp = uio->uio_iov; 872219089Spjd int write_eof; 873219089Spjd int count = 0; 874219089Spjd sa_bulk_attr_t bulk[4]; 875219089Spjd uint64_t mtime[2], ctime[2]; 876168404Spjd 877168404Spjd /* 878168404Spjd * Fasttrack empty write 879168404Spjd */ 880168404Spjd n = start_resid; 881168404Spjd if (n == 0) 882168404Spjd return (0); 883168404Spjd 884168962Spjd if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 885168962Spjd limit = MAXOFFSET_T; 886168962Spjd 887168404Spjd ZFS_ENTER(zfsvfs); 888185029Spjd ZFS_VERIFY_ZP(zp); 889168404Spjd 890219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 891219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 892219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 893219089Spjd &zp->z_size, 8); 894219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 895219089Spjd &zp->z_pflags, 8); 896219089Spjd 897168404Spjd /* 898262990Sdelphij * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 899262990Sdelphij * callers might not be able to detect properly that we are read-only, 900262990Sdelphij * so check it explicitly here. 901262990Sdelphij */ 902262990Sdelphij if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 903262990Sdelphij ZFS_EXIT(zfsvfs); 904262990Sdelphij return (SET_ERROR(EROFS)); 905262990Sdelphij } 906262990Sdelphij 907262990Sdelphij /* 908321579Smav * If immutable or not appending then return EPERM. 909321579Smav * Intentionally allow ZFS_READONLY through here. 910321579Smav * See zfs_zaccess_common() 911185029Spjd */ 912321579Smav if ((zp->z_pflags & ZFS_IMMUTABLE) || 913219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 914219089Spjd (uio->uio_loffset < zp->z_size))) { 915185029Spjd ZFS_EXIT(zfsvfs); 916249195Smm return (SET_ERROR(EPERM)); 917185029Spjd } 918185029Spjd 919185029Spjd zilog = zfsvfs->z_log; 920185029Spjd 921185029Spjd /* 922219089Spjd * Validate file offset 923219089Spjd */ 924219089Spjd woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 925219089Spjd if (woff < 0) { 926219089Spjd ZFS_EXIT(zfsvfs); 927249195Smm return (SET_ERROR(EINVAL)); 928219089Spjd } 929219089Spjd 930219089Spjd /* 931219089Spjd * Check for mandatory locks before calling zfs_range_lock() 932219089Spjd * in order to prevent a deadlock with locks set via fcntl(). 933219089Spjd */ 934219089Spjd if (MANDMODE((mode_t)zp->z_mode) && 935219089Spjd (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 936219089Spjd ZFS_EXIT(zfsvfs); 937219089Spjd return (error); 938219089Spjd } 939219089Spjd 940277300Ssmh#ifdef illumos 941219089Spjd /* 942168404Spjd * Pre-fault the pages to ensure slow (eg NFS) pages 943168404Spjd * don't hold up txg. 944219089Spjd * Skip this if uio contains loaned arc_buf. 945168404Spjd */ 946219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 947219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 948219089Spjd xuio = (xuio_t *)uio; 949219089Spjd else 950219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 951277300Ssmh#endif 952168404Spjd 953168404Spjd /* 954168404Spjd * If in append mode, set the io offset pointer to eof. 955168404Spjd */ 956213673Spjd if (ioflag & FAPPEND) { 957168404Spjd /* 958219089Spjd * Obtain an appending range lock to guarantee file append 959219089Spjd * semantics. We reset the write offset once we have the lock. 960168404Spjd */ 961168404Spjd rl = zfs_range_lock(zp, 0, n, RL_APPEND); 962219089Spjd woff = rl->r_off; 963168404Spjd if (rl->r_len == UINT64_MAX) { 964219089Spjd /* 965219089Spjd * We overlocked the file because this write will cause 966219089Spjd * the file block size to increase. 967219089Spjd * Note that zp_size cannot change with this lock held. 968219089Spjd */ 969219089Spjd woff = zp->z_size; 970168404Spjd } 971219089Spjd uio->uio_loffset = woff; 972168404Spjd } else { 973168404Spjd /* 974219089Spjd * Note that if the file block size will change as a result of 975219089Spjd * this write, then this range lock will lock the entire file 976219089Spjd * so that we can re-write the block safely. 977168404Spjd */ 978168404Spjd rl = zfs_range_lock(zp, woff, n, RL_WRITER); 979168404Spjd } 980168404Spjd 981235781Strasz if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { 982235781Strasz zfs_range_unlock(rl); 983235781Strasz ZFS_EXIT(zfsvfs); 984235781Strasz return (EFBIG); 985235781Strasz } 986235781Strasz 987168962Spjd if (woff >= limit) { 988168962Spjd zfs_range_unlock(rl); 989168962Spjd ZFS_EXIT(zfsvfs); 990249195Smm return (SET_ERROR(EFBIG)); 991168962Spjd } 992168962Spjd 993168962Spjd if ((woff + n) > limit || woff > (limit - n)) 994168962Spjd n = limit - woff; 995168962Spjd 996219089Spjd /* Will this write extend the file length? */ 997219089Spjd write_eof = (woff + n > zp->z_size); 998168404Spjd 999219089Spjd end_size = MAX(zp->z_size, woff + n); 1000219089Spjd 1001168404Spjd /* 1002168404Spjd * Write the file in reasonable size chunks. Each chunk is written 1003168404Spjd * in a separate transaction; this keeps the intent log records small 1004168404Spjd * and allows us to do more fine-grained space accounting. 1005168404Spjd */ 1006168404Spjd while (n > 0) { 1007209962Smm abuf = NULL; 1008209962Smm woff = uio->uio_loffset; 1009219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 1010219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 1011209962Smm if (abuf != NULL) 1012209962Smm dmu_return_arcbuf(abuf); 1013249195Smm error = SET_ERROR(EDQUOT); 1014209962Smm break; 1015209962Smm } 1016209962Smm 1017219089Spjd if (xuio && abuf == NULL) { 1018219089Spjd ASSERT(i_iov < iovcnt); 1019219089Spjd aiov = &iovp[i_iov]; 1020219089Spjd abuf = dmu_xuio_arcbuf(xuio, i_iov); 1021219089Spjd dmu_xuio_clear(xuio, i_iov); 1022219089Spjd DTRACE_PROBE3(zfs_cp_write, int, i_iov, 1023219089Spjd iovec_t *, aiov, arc_buf_t *, abuf); 1024219089Spjd ASSERT((aiov->iov_base == abuf->b_data) || 1025219089Spjd ((char *)aiov->iov_base - (char *)abuf->b_data + 1026219089Spjd aiov->iov_len == arc_buf_size(abuf))); 1027219089Spjd i_iov++; 1028219089Spjd } else if (abuf == NULL && n >= max_blksz && 1029219089Spjd woff >= zp->z_size && 1030209962Smm P2PHASE(woff, max_blksz) == 0 && 1031209962Smm zp->z_blksz == max_blksz) { 1032219089Spjd /* 1033219089Spjd * This write covers a full block. "Borrow" a buffer 1034219089Spjd * from the dmu so that we can fill it before we enter 1035219089Spjd * a transaction. This avoids the possibility of 1036219089Spjd * holding up the transaction if the data copy hangs 1037219089Spjd * up on a pagefault (e.g., from an NFS server mapping). 1038219089Spjd */ 1039209962Smm size_t cbytes; 1040209962Smm 1041219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 1042219089Spjd max_blksz); 1043209962Smm ASSERT(abuf != NULL); 1044209962Smm ASSERT(arc_buf_size(abuf) == max_blksz); 1045209962Smm if (error = uiocopy(abuf->b_data, max_blksz, 1046209962Smm UIO_WRITE, uio, &cbytes)) { 1047209962Smm dmu_return_arcbuf(abuf); 1048209962Smm break; 1049209962Smm } 1050209962Smm ASSERT(cbytes == max_blksz); 1051209962Smm } 1052209962Smm 1053209962Smm /* 1054168404Spjd * Start a transaction. 1055168404Spjd */ 1056168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1057219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1058168404Spjd dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 1059219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1060258720Savg error = dmu_tx_assign(tx, TXG_WAIT); 1061168404Spjd if (error) { 1062168404Spjd dmu_tx_abort(tx); 1063209962Smm if (abuf != NULL) 1064209962Smm dmu_return_arcbuf(abuf); 1065168404Spjd break; 1066168404Spjd } 1067168404Spjd 1068168404Spjd /* 1069168404Spjd * If zfs_range_lock() over-locked we grow the blocksize 1070168404Spjd * and then reduce the lock range. This will only happen 1071168404Spjd * on the first iteration since zfs_range_reduce() will 1072168404Spjd * shrink down r_len to the appropriate size. 1073168404Spjd */ 1074168404Spjd if (rl->r_len == UINT64_MAX) { 1075168404Spjd uint64_t new_blksz; 1076168404Spjd 1077168404Spjd if (zp->z_blksz > max_blksz) { 1078274337Sdelphij /* 1079274337Sdelphij * File's blocksize is already larger than the 1080274337Sdelphij * "recordsize" property. Only let it grow to 1081274337Sdelphij * the next power of 2. 1082274337Sdelphij */ 1083168404Spjd ASSERT(!ISP2(zp->z_blksz)); 1084274337Sdelphij new_blksz = MIN(end_size, 1085274337Sdelphij 1 << highbit64(zp->z_blksz)); 1086168404Spjd } else { 1087168404Spjd new_blksz = MIN(end_size, max_blksz); 1088168404Spjd } 1089168404Spjd zfs_grow_blocksize(zp, new_blksz, tx); 1090168404Spjd zfs_range_reduce(rl, woff, n); 1091168404Spjd } 1092168404Spjd 1093168404Spjd /* 1094168404Spjd * XXX - should we really limit each write to z_max_blksz? 1095168404Spjd * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 1096168404Spjd */ 1097168404Spjd nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 1098168404Spjd 1099219089Spjd if (woff + nbytes > zp->z_size) 1100168404Spjd vnode_pager_setsize(vp, woff + nbytes); 1101168404Spjd 1102209962Smm if (abuf == NULL) { 1103209962Smm tx_bytes = uio->uio_resid; 1104219089Spjd error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 1105219089Spjd uio, nbytes, tx); 1106209962Smm tx_bytes -= uio->uio_resid; 1107168404Spjd } else { 1108209962Smm tx_bytes = nbytes; 1109219089Spjd ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 1110219089Spjd /* 1111219089Spjd * If this is not a full block write, but we are 1112219089Spjd * extending the file past EOF and this data starts 1113219089Spjd * block-aligned, use assign_arcbuf(). Otherwise, 1114219089Spjd * write via dmu_write(). 1115219089Spjd */ 1116219089Spjd if (tx_bytes < max_blksz && (!write_eof || 1117219089Spjd aiov->iov_base != abuf->b_data)) { 1118219089Spjd ASSERT(xuio); 1119219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, woff, 1120219089Spjd aiov->iov_len, aiov->iov_base, tx); 1121219089Spjd dmu_return_arcbuf(abuf); 1122219089Spjd xuio_stat_wbuf_copied(); 1123219089Spjd } else { 1124219089Spjd ASSERT(xuio || tx_bytes == max_blksz); 1125219089Spjd dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 1126219089Spjd woff, abuf, tx); 1127219089Spjd } 1128209962Smm ASSERT(tx_bytes <= uio->uio_resid); 1129209962Smm uioskip(uio, tx_bytes); 1130168404Spjd } 1131212657Savg if (tx_bytes && vn_has_cached_data(vp)) { 1132209962Smm update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 1133209962Smm zp->z_id, uio->uio_segflg, tx); 1134209962Smm } 1135209962Smm 1136209962Smm /* 1137168404Spjd * If we made no progress, we're done. If we made even 1138168404Spjd * partial progress, update the znode and ZIL accordingly. 1139168404Spjd */ 1140168404Spjd if (tx_bytes == 0) { 1141219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 1142219089Spjd (void *)&zp->z_size, sizeof (uint64_t), tx); 1143168404Spjd dmu_tx_commit(tx); 1144168404Spjd ASSERT(error != 0); 1145168404Spjd break; 1146168404Spjd } 1147168404Spjd 1148168404Spjd /* 1149168404Spjd * Clear Set-UID/Set-GID bits on successful write if not 1150168404Spjd * privileged and at least one of the excute bits is set. 1151168404Spjd * 1152168404Spjd * It would be nice to to this after all writes have 1153168404Spjd * been done, but that would still expose the ISUID/ISGID 1154168404Spjd * to another app after the partial write is committed. 1155185029Spjd * 1156185029Spjd * Note: we don't call zfs_fuid_map_id() here because 1157185029Spjd * user 0 is not an ephemeral uid. 1158168404Spjd */ 1159168404Spjd mutex_enter(&zp->z_acl_lock); 1160219089Spjd if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 1161168404Spjd (S_IXUSR >> 6))) != 0 && 1162219089Spjd (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 1163185029Spjd secpolicy_vnode_setid_retain(vp, cr, 1164219089Spjd (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 1165219089Spjd uint64_t newmode; 1166219089Spjd zp->z_mode &= ~(S_ISUID | S_ISGID); 1167219089Spjd newmode = zp->z_mode; 1168219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 1169219089Spjd (void *)&newmode, sizeof (uint64_t), tx); 1170168404Spjd } 1171168404Spjd mutex_exit(&zp->z_acl_lock); 1172168404Spjd 1173219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 1174219089Spjd B_TRUE); 1175168404Spjd 1176168404Spjd /* 1177168404Spjd * Update the file size (zp_size) if it has changed; 1178168404Spjd * account for possible concurrent updates. 1179168404Spjd */ 1180219089Spjd while ((end_size = zp->z_size) < uio->uio_loffset) { 1181219089Spjd (void) atomic_cas_64(&zp->z_size, end_size, 1182168404Spjd uio->uio_loffset); 1183298105Savg#ifdef illumos 1184219089Spjd ASSERT(error == 0); 1185298105Savg#else 1186298105Savg ASSERT(error == 0 || error == EFAULT); 1187298105Savg#endif 1188219089Spjd } 1189219089Spjd /* 1190219089Spjd * If we are replaying and eof is non zero then force 1191219089Spjd * the file size to the specified eof. Note, there's no 1192219089Spjd * concurrency during replay. 1193219089Spjd */ 1194219089Spjd if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 1195219089Spjd zp->z_size = zfsvfs->z_replay_eof; 1196219089Spjd 1197298105Savg if (error == 0) 1198298105Savg error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1199298105Savg else 1200298105Savg (void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1201219089Spjd 1202168404Spjd zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 1203168404Spjd dmu_tx_commit(tx); 1204168404Spjd 1205168404Spjd if (error != 0) 1206168404Spjd break; 1207168404Spjd ASSERT(tx_bytes == nbytes); 1208168404Spjd n -= nbytes; 1209219089Spjd 1210277300Ssmh#ifdef illumos 1211219089Spjd if (!xuio && n > 0) 1212219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 1213277300Ssmh#endif 1214168404Spjd } 1215168404Spjd 1216168404Spjd zfs_range_unlock(rl); 1217168404Spjd 1218168404Spjd /* 1219168404Spjd * If we're in replay mode, or we made no progress, return error. 1220168404Spjd * Otherwise, it's at least a partial write, so it's successful. 1221168404Spjd */ 1222209962Smm if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1223168404Spjd ZFS_EXIT(zfsvfs); 1224168404Spjd return (error); 1225168404Spjd } 1226168404Spjd 1227298105Savg#ifdef __FreeBSD__ 1228298105Savg /* 1229298105Savg * EFAULT means that at least one page of the source buffer was not 1230298105Savg * available. VFS will re-try remaining I/O upon this error. 1231298105Savg */ 1232298105Savg if (error == EFAULT) { 1233298105Savg ZFS_EXIT(zfsvfs); 1234298105Savg return (error); 1235298105Savg } 1236298105Savg#endif 1237298105Savg 1238219089Spjd if (ioflag & (FSYNC | FDSYNC) || 1239219089Spjd zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1240219089Spjd zil_commit(zilog, zp->z_id); 1241168404Spjd 1242168404Spjd ZFS_EXIT(zfsvfs); 1243168404Spjd return (0); 1244168404Spjd} 1245168404Spjd 1246168404Spjdvoid 1247219089Spjdzfs_get_done(zgd_t *zgd, int error) 1248168404Spjd{ 1249219089Spjd znode_t *zp = zgd->zgd_private; 1250219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 1251168404Spjd 1252219089Spjd if (zgd->zgd_db) 1253219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1254219089Spjd 1255219089Spjd zfs_range_unlock(zgd->zgd_rl); 1256219089Spjd 1257191900Skmacy /* 1258191900Skmacy * Release the vnode asynchronously as we currently have the 1259191900Skmacy * txg stopped from syncing. 1260191900Skmacy */ 1261219089Spjd VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1262219089Spjd 1263219089Spjd if (error == 0 && zgd->zgd_bp) 1264325132Savg zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp); 1265219089Spjd 1266168404Spjd kmem_free(zgd, sizeof (zgd_t)); 1267168404Spjd} 1268168404Spjd 1269214378Smm#ifdef DEBUG 1270214378Smmstatic int zil_fault_io = 0; 1271214378Smm#endif 1272214378Smm 1273168404Spjd/* 1274168404Spjd * Get data to generate a TX_WRITE intent log record. 1275168404Spjd */ 1276168404Spjdint 1277325132Savgzfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) 1278168404Spjd{ 1279168404Spjd zfsvfs_t *zfsvfs = arg; 1280168404Spjd objset_t *os = zfsvfs->z_os; 1281168404Spjd znode_t *zp; 1282219089Spjd uint64_t object = lr->lr_foid; 1283219089Spjd uint64_t offset = lr->lr_offset; 1284219089Spjd uint64_t size = lr->lr_length; 1285168404Spjd dmu_buf_t *db; 1286168404Spjd zgd_t *zgd; 1287168404Spjd int error = 0; 1288168404Spjd 1289325132Savg ASSERT3P(lwb, !=, NULL); 1290325132Savg ASSERT3P(zio, !=, NULL); 1291325132Savg ASSERT3U(size, !=, 0); 1292168404Spjd 1293168404Spjd /* 1294168404Spjd * Nothing to do if the file has been removed 1295168404Spjd */ 1296219089Spjd if (zfs_zget(zfsvfs, object, &zp) != 0) 1297249195Smm return (SET_ERROR(ENOENT)); 1298168404Spjd if (zp->z_unlinked) { 1299191900Skmacy /* 1300191900Skmacy * Release the vnode asynchronously as we currently have the 1301191900Skmacy * txg stopped from syncing. 1302191900Skmacy */ 1303196307Spjd VN_RELE_ASYNC(ZTOV(zp), 1304196307Spjd dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1305249195Smm return (SET_ERROR(ENOENT)); 1306168404Spjd } 1307168404Spjd 1308219089Spjd zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1309325132Savg zgd->zgd_lwb = lwb; 1310219089Spjd zgd->zgd_private = zp; 1311219089Spjd 1312168404Spjd /* 1313168404Spjd * Write records come in two flavors: immediate and indirect. 1314168404Spjd * For small writes it's cheaper to store the data with the 1315168404Spjd * log record (immediate); for large writes it's cheaper to 1316168404Spjd * sync the data and get a pointer to it (indirect) so that 1317168404Spjd * we don't have to write the data twice. 1318168404Spjd */ 1319168404Spjd if (buf != NULL) { /* immediate write */ 1320219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1321168404Spjd /* test for truncation needs to be done while range locked */ 1322219089Spjd if (offset >= zp->z_size) { 1323249195Smm error = SET_ERROR(ENOENT); 1324219089Spjd } else { 1325219089Spjd error = dmu_read(os, object, offset, size, buf, 1326219089Spjd DMU_READ_NO_PREFETCH); 1327168404Spjd } 1328219089Spjd ASSERT(error == 0 || error == ENOENT); 1329168404Spjd } else { /* indirect write */ 1330168404Spjd /* 1331168404Spjd * Have to lock the whole block to ensure when it's 1332324203Savg * written out and its checksum is being calculated 1333168404Spjd * that no one can change the data. We need to re-check 1334168404Spjd * blocksize after we get the lock in case it's changed! 1335168404Spjd */ 1336168404Spjd for (;;) { 1337219089Spjd uint64_t blkoff; 1338219089Spjd size = zp->z_blksz; 1339219089Spjd blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1340219089Spjd offset -= blkoff; 1341219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1342219089Spjd RL_READER); 1343219089Spjd if (zp->z_blksz == size) 1344168404Spjd break; 1345219089Spjd offset += blkoff; 1346219089Spjd zfs_range_unlock(zgd->zgd_rl); 1347168404Spjd } 1348168404Spjd /* test for truncation needs to be done while range locked */ 1349219089Spjd if (lr->lr_offset >= zp->z_size) 1350249195Smm error = SET_ERROR(ENOENT); 1351214378Smm#ifdef DEBUG 1352214378Smm if (zil_fault_io) { 1353249195Smm error = SET_ERROR(EIO); 1354214378Smm zil_fault_io = 0; 1355214378Smm } 1356214378Smm#endif 1357219089Spjd if (error == 0) 1358219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1359219089Spjd DMU_READ_NO_PREFETCH); 1360214378Smm 1361209962Smm if (error == 0) { 1362323748Savg blkptr_t *bp = &lr->lr_blkptr; 1363243524Smm 1364219089Spjd zgd->zgd_db = db; 1365219089Spjd zgd->zgd_bp = bp; 1366219089Spjd 1367219089Spjd ASSERT(db->db_offset == offset); 1368219089Spjd ASSERT(db->db_size == size); 1369219089Spjd 1370219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1371219089Spjd zfs_get_done, zgd); 1372321559Smav ASSERT(error || lr->lr_length <= size); 1373219089Spjd 1374209962Smm /* 1375219089Spjd * On success, we need to wait for the write I/O 1376219089Spjd * initiated by dmu_sync() to complete before we can 1377219089Spjd * release this dbuf. We will finish everything up 1378219089Spjd * in the zfs_get_done() callback. 1379209962Smm */ 1380219089Spjd if (error == 0) 1381219089Spjd return (0); 1382209962Smm 1383219089Spjd if (error == EALREADY) { 1384219089Spjd lr->lr_common.lrc_txtype = TX_WRITE2; 1385219089Spjd error = 0; 1386219089Spjd } 1387209962Smm } 1388168404Spjd } 1389219089Spjd 1390219089Spjd zfs_get_done(zgd, error); 1391219089Spjd 1392168404Spjd return (error); 1393168404Spjd} 1394168404Spjd 1395168404Spjd/*ARGSUSED*/ 1396168404Spjdstatic int 1397185029Spjdzfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1398185029Spjd caller_context_t *ct) 1399168404Spjd{ 1400168404Spjd znode_t *zp = VTOZ(vp); 1401168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1402168404Spjd int error; 1403168404Spjd 1404168404Spjd ZFS_ENTER(zfsvfs); 1405185029Spjd ZFS_VERIFY_ZP(zp); 1406185029Spjd 1407185029Spjd if (flag & V_ACE_MASK) 1408185029Spjd error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1409185029Spjd else 1410185029Spjd error = zfs_zaccess_rwx(zp, mode, flag, cr); 1411185029Spjd 1412168404Spjd ZFS_EXIT(zfsvfs); 1413168404Spjd return (error); 1414168404Spjd} 1415168404Spjd 1416211932Smmstatic int 1417303970Savgzfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) 1418211932Smm{ 1419303970Savg int error; 1420211932Smm 1421303970Savg *vpp = arg; 1422303970Savg error = vn_lock(*vpp, lkflags); 1423303970Savg if (error != 0) 1424303970Savg vrele(*vpp); 1425303970Savg return (error); 1426303970Savg} 1427211932Smm 1428303970Savgstatic int 1429303970Savgzfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags) 1430303970Savg{ 1431303970Savg znode_t *zdp = VTOZ(dvp); 1432303970Savg zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1433303970Savg int error; 1434303970Savg int ltype; 1435303970Savg 1436303970Savg ASSERT_VOP_LOCKED(dvp, __func__); 1437303970Savg#ifdef DIAGNOSTIC 1438307142Savg if ((zdp->z_pflags & ZFS_XATTR) == 0) 1439307142Savg VERIFY(!RRM_LOCK_HELD(&zfsvfs->z_teardown_lock)); 1440303970Savg#endif 1441303970Savg 1442303970Savg if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 1443303970Savg ASSERT3P(dvp, ==, vp); 1444303970Savg vref(dvp); 1445303970Savg ltype = lkflags & LK_TYPE_MASK; 1446303970Savg if (ltype != VOP_ISLOCKED(dvp)) { 1447303970Savg if (ltype == LK_EXCLUSIVE) 1448303970Savg vn_lock(dvp, LK_UPGRADE | LK_RETRY); 1449303970Savg else /* if (ltype == LK_SHARED) */ 1450303970Savg vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); 1451303970Savg 1452303970Savg /* 1453303970Savg * Relock for the "." case could leave us with 1454303970Savg * reclaimed vnode. 1455303970Savg */ 1456303970Savg if (dvp->v_iflag & VI_DOOMED) { 1457303970Savg vrele(dvp); 1458303970Savg return (SET_ERROR(ENOENT)); 1459303970Savg } 1460303970Savg } 1461303970Savg return (0); 1462303970Savg } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 1463303970Savg /* 1464303970Savg * Note that in this case, dvp is the child vnode, and we 1465303970Savg * are looking up the parent vnode - exactly reverse from 1466303970Savg * normal operation. Unlocking dvp requires some rather 1467303970Savg * tricky unlock/relock dance to prevent mp from being freed; 1468303970Savg * use vn_vget_ino_gen() which takes care of all that. 1469303970Savg * 1470303970Savg * XXX Note that there is a time window when both vnodes are 1471303970Savg * unlocked. It is possible, although highly unlikely, that 1472303970Savg * during that window the parent-child relationship between 1473303970Savg * the vnodes may change, for example, get reversed. 1474303970Savg * In that case we would have a wrong lock order for the vnodes. 1475303970Savg * All other filesystems seem to ignore this problem, so we 1476303970Savg * do the same here. 1477303970Savg * A potential solution could be implemented as follows: 1478303970Savg * - using LK_NOWAIT when locking the second vnode and retrying 1479303970Savg * if necessary 1480303970Savg * - checking that the parent-child relationship still holds 1481303970Savg * after locking both vnodes and retrying if it doesn't 1482303970Savg */ 1483303970Savg error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp); 1484303970Savg return (error); 1485303970Savg } else { 1486303970Savg error = vn_lock(vp, lkflags); 1487303970Savg if (error != 0) 1488303970Savg vrele(vp); 1489303970Savg return (error); 1490211932Smm } 1491211932Smm} 1492211932Smm 1493211932Smm/* 1494168404Spjd * Lookup an entry in a directory, or an extended attribute directory. 1495168404Spjd * If it exists, return a held vnode reference for it. 1496168404Spjd * 1497168404Spjd * IN: dvp - vnode of directory to search. 1498168404Spjd * nm - name of entry to lookup. 1499168404Spjd * pnp - full pathname to lookup [UNUSED]. 1500168404Spjd * flags - LOOKUP_XATTR set if looking for an attribute. 1501168404Spjd * rdir - root directory vnode [UNUSED]. 1502168404Spjd * cr - credentials of caller. 1503185029Spjd * ct - caller context 1504168404Spjd * 1505168404Spjd * OUT: vpp - vnode of located entry, NULL if not found. 1506168404Spjd * 1507251631Sdelphij * RETURN: 0 on success, error code on failure. 1508168404Spjd * 1509168404Spjd * Timestamps: 1510168404Spjd * NA 1511168404Spjd */ 1512168404Spjd/* ARGSUSED */ 1513168962Spjdstatic int 1514168962Spjdzfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1515185029Spjd int nameiop, cred_t *cr, kthread_t *td, int flags) 1516168404Spjd{ 1517168962Spjd znode_t *zdp = VTOZ(dvp); 1518303970Savg znode_t *zp; 1519168962Spjd zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1520211932Smm int error = 0; 1521168404Spjd 1522321545Smav /* 1523321545Smav * Fast path lookup, however we must skip DNLC lookup 1524321545Smav * for case folding or normalizing lookups because the 1525321545Smav * DNLC code only stores the passed in name. This means 1526321545Smav * creating 'a' and removing 'A' on a case insensitive 1527321545Smav * file system would work, but DNLC still thinks 'a' 1528321545Smav * exists and won't let you create it again on the next 1529321545Smav * pass through fast path. 1530321545Smav */ 1531303970Savg if (!(flags & LOOKUP_XATTR)) { 1532211932Smm if (dvp->v_type != VDIR) { 1533249195Smm return (SET_ERROR(ENOTDIR)); 1534219089Spjd } else if (zdp->z_sa_hdl == NULL) { 1535249195Smm return (SET_ERROR(EIO)); 1536211932Smm } 1537211932Smm } 1538211932Smm 1539211932Smm DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1540211932Smm 1541168404Spjd ZFS_ENTER(zfsvfs); 1542185029Spjd ZFS_VERIFY_ZP(zdp); 1543168404Spjd 1544168404Spjd *vpp = NULL; 1545168404Spjd 1546185029Spjd if (flags & LOOKUP_XATTR) { 1547168404Spjd#ifdef TODO 1548168404Spjd /* 1549168404Spjd * If the xattr property is off, refuse the lookup request. 1550168404Spjd */ 1551168404Spjd if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1552168404Spjd ZFS_EXIT(zfsvfs); 1553249195Smm return (SET_ERROR(EINVAL)); 1554168404Spjd } 1555185029Spjd#endif 1556168404Spjd 1557168404Spjd /* 1558168404Spjd * We don't allow recursive attributes.. 1559168404Spjd * Maybe someday we will. 1560168404Spjd */ 1561219089Spjd if (zdp->z_pflags & ZFS_XATTR) { 1562168404Spjd ZFS_EXIT(zfsvfs); 1563249195Smm return (SET_ERROR(EINVAL)); 1564168404Spjd } 1565168404Spjd 1566168404Spjd if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1567168404Spjd ZFS_EXIT(zfsvfs); 1568168404Spjd return (error); 1569168404Spjd } 1570168404Spjd 1571168404Spjd /* 1572168404Spjd * Do we have permission to get into attribute directory? 1573168404Spjd */ 1574185029Spjd if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1575185029Spjd B_FALSE, cr)) { 1576303970Savg vrele(*vpp); 1577185029Spjd *vpp = NULL; 1578168404Spjd } 1579168404Spjd 1580168404Spjd ZFS_EXIT(zfsvfs); 1581168404Spjd return (error); 1582168404Spjd } 1583168404Spjd 1584168404Spjd /* 1585168404Spjd * Check accessibility of directory. 1586168404Spjd */ 1587185029Spjd if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1588168404Spjd ZFS_EXIT(zfsvfs); 1589168404Spjd return (error); 1590168404Spjd } 1591168404Spjd 1592185029Spjd if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1593185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1594185029Spjd ZFS_EXIT(zfsvfs); 1595249195Smm return (SET_ERROR(EILSEQ)); 1596185029Spjd } 1597168404Spjd 1598168962Spjd 1599303970Savg /* 1600303970Savg * First handle the special cases. 1601303970Savg */ 1602303970Savg if ((cnp->cn_flags & ISDOTDOT) != 0) { 1603303970Savg /* 1604303970Savg * If we are a snapshot mounted under .zfs, return 1605303970Savg * the vp for the snapshot directory. 1606303970Savg */ 1607303970Savg if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 1608315842Savg struct componentname cn; 1609315842Savg vnode_t *zfsctl_vp; 1610315842Savg int ltype; 1611315842Savg 1612303970Savg ZFS_EXIT(zfsvfs); 1613315842Savg ltype = VOP_ISLOCKED(dvp); 1614315842Savg VOP_UNLOCK(dvp, 0); 1615315842Savg error = zfsctl_root(zfsvfs->z_parent, LK_SHARED, 1616315842Savg &zfsctl_vp); 1617303970Savg if (error == 0) { 1618315842Savg cn.cn_nameptr = "snapshot"; 1619315842Savg cn.cn_namelen = strlen(cn.cn_nameptr); 1620315842Savg cn.cn_nameiop = cnp->cn_nameiop; 1621319415Savg cn.cn_flags = cnp->cn_flags & ~ISDOTDOT; 1622315842Savg cn.cn_lkflags = cnp->cn_lkflags; 1623315842Savg error = VOP_LOOKUP(zfsctl_vp, vpp, &cn); 1624315842Savg vput(zfsctl_vp); 1625303970Savg } 1626315842Savg vn_lock(dvp, ltype | LK_RETRY); 1627315842Savg return (error); 1628303970Savg } 1629303970Savg } 1630303970Savg if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { 1631315842Savg ZFS_EXIT(zfsvfs); 1632303970Savg if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) 1633315842Savg return (SET_ERROR(ENOTSUP)); 1634315842Savg error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp); 1635315842Savg return (error); 1636303970Savg } 1637303970Savg 1638303970Savg /* 1639303970Savg * The loop is retry the lookup if the parent-child relationship 1640303970Savg * changes during the dot-dot locking complexities. 1641303970Savg */ 1642303970Savg for (;;) { 1643303970Savg uint64_t parent; 1644303970Savg 1645303970Savg error = zfs_dirlook(zdp, nm, &zp); 1646303970Savg if (error == 0) 1647303970Savg *vpp = ZTOV(zp); 1648303970Savg 1649303970Savg ZFS_EXIT(zfsvfs); 1650303970Savg if (error != 0) 1651303970Savg break; 1652303970Savg 1653303970Savg error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags); 1654303970Savg if (error != 0) { 1655303970Savg /* 1656303970Savg * If we've got a locking error, then the vnode 1657303970Savg * got reclaimed because of a force unmount. 1658303970Savg * We never enter doomed vnodes into the name cache. 1659303970Savg */ 1660303970Savg *vpp = NULL; 1661303970Savg return (error); 1662303970Savg } 1663303970Savg 1664303970Savg if ((cnp->cn_flags & ISDOTDOT) == 0) 1665303970Savg break; 1666303970Savg 1667303970Savg ZFS_ENTER(zfsvfs); 1668303970Savg if (zdp->z_sa_hdl == NULL) { 1669303970Savg error = SET_ERROR(EIO); 1670303970Savg } else { 1671303970Savg error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 1672303970Savg &parent, sizeof (parent)); 1673303970Savg } 1674303970Savg if (error != 0) { 1675303970Savg ZFS_EXIT(zfsvfs); 1676303970Savg vput(ZTOV(zp)); 1677303970Savg break; 1678303970Savg } 1679303970Savg if (zp->z_id == parent) { 1680303970Savg ZFS_EXIT(zfsvfs); 1681303970Savg break; 1682303970Savg } 1683303970Savg vput(ZTOV(zp)); 1684303970Savg } 1685303970Savg 1686303970Savgout: 1687303970Savg if (error != 0) 1688303970Savg *vpp = NULL; 1689303970Savg 1690168404Spjd /* Translate errors and add SAVENAME when needed. */ 1691168404Spjd if (cnp->cn_flags & ISLASTCN) { 1692168404Spjd switch (nameiop) { 1693168404Spjd case CREATE: 1694168404Spjd case RENAME: 1695168404Spjd if (error == ENOENT) { 1696168404Spjd error = EJUSTRETURN; 1697168404Spjd cnp->cn_flags |= SAVENAME; 1698168404Spjd break; 1699168404Spjd } 1700168404Spjd /* FALLTHROUGH */ 1701168404Spjd case DELETE: 1702168404Spjd if (error == 0) 1703168404Spjd cnp->cn_flags |= SAVENAME; 1704168404Spjd break; 1705168404Spjd } 1706168404Spjd } 1707169198Spjd 1708303970Savg /* Insert name into cache (as non-existent) if appropriate. */ 1709303970Savg if (zfsvfs->z_use_namecache && 1710303970Savg error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0) 1711303970Savg cache_enter(dvp, NULL, cnp); 1712168404Spjd 1713303970Savg /* Insert name into cache if appropriate. */ 1714303970Savg if (zfsvfs->z_use_namecache && 1715303970Savg error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1716168404Spjd if (!(cnp->cn_flags & ISLASTCN) || 1717168404Spjd (nameiop != DELETE && nameiop != RENAME)) { 1718168404Spjd cache_enter(dvp, *vpp, cnp); 1719168404Spjd } 1720168404Spjd } 1721168404Spjd 1722168404Spjd return (error); 1723168404Spjd} 1724168404Spjd 1725168404Spjd/* 1726168404Spjd * Attempt to create a new entry in a directory. If the entry 1727168404Spjd * already exists, truncate the file if permissible, else return 1728168404Spjd * an error. Return the vp of the created or trunc'd file. 1729168404Spjd * 1730168404Spjd * IN: dvp - vnode of directory to put new file entry in. 1731168404Spjd * name - name of new file entry. 1732168404Spjd * vap - attributes of new file. 1733168404Spjd * excl - flag indicating exclusive or non-exclusive mode. 1734168404Spjd * mode - mode to open file with. 1735168404Spjd * cr - credentials of caller. 1736168404Spjd * flag - large file flag [UNUSED]. 1737185029Spjd * ct - caller context 1738268464Sdelphij * vsecp - ACL to be set 1739168404Spjd * 1740168404Spjd * OUT: vpp - vnode of created or trunc'd entry. 1741168404Spjd * 1742251631Sdelphij * RETURN: 0 on success, error code on failure. 1743168404Spjd * 1744168404Spjd * Timestamps: 1745168404Spjd * dvp - ctime|mtime updated if new entry created 1746168404Spjd * vp - ctime|mtime always, atime if new 1747168404Spjd */ 1748185029Spjd 1749168404Spjd/* ARGSUSED */ 1750168404Spjdstatic int 1751168962Spjdzfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 1752185029Spjd vnode_t **vpp, cred_t *cr, kthread_t *td) 1753168404Spjd{ 1754168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1755168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1756185029Spjd zilog_t *zilog; 1757185029Spjd objset_t *os; 1758168404Spjd dmu_tx_t *tx; 1759168404Spjd int error; 1760209962Smm ksid_t *ksid; 1761209962Smm uid_t uid; 1762209962Smm gid_t gid = crgetgid(cr); 1763219089Spjd zfs_acl_ids_t acl_ids; 1764209962Smm boolean_t fuid_dirtied; 1765185029Spjd void *vsecp = NULL; 1766185029Spjd int flag = 0; 1767303970Savg uint64_t txtype; 1768168404Spjd 1769185029Spjd /* 1770185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 1771185029Spjd * make sure file system is at proper version 1772185029Spjd */ 1773185029Spjd 1774209962Smm ksid = crgetsid(cr, KSID_OWNER); 1775209962Smm if (ksid) 1776209962Smm uid = ksid_getid(ksid); 1777209962Smm else 1778209962Smm uid = crgetuid(cr); 1779219089Spjd 1780185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 1781185029Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 1782219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1783249195Smm return (SET_ERROR(EINVAL)); 1784185029Spjd 1785168404Spjd ZFS_ENTER(zfsvfs); 1786185029Spjd ZFS_VERIFY_ZP(dzp); 1787185029Spjd os = zfsvfs->z_os; 1788185029Spjd zilog = zfsvfs->z_log; 1789168404Spjd 1790185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1791185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1792185029Spjd ZFS_EXIT(zfsvfs); 1793249195Smm return (SET_ERROR(EILSEQ)); 1794185029Spjd } 1795185029Spjd 1796185029Spjd if (vap->va_mask & AT_XVATTR) { 1797197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1798185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 1799185029Spjd ZFS_EXIT(zfsvfs); 1800185029Spjd return (error); 1801185029Spjd } 1802185029Spjd } 1803260704Savg 1804168404Spjd *vpp = NULL; 1805168404Spjd 1806182905Strasz if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1807182905Strasz vap->va_mode &= ~S_ISVTX; 1808168404Spjd 1809303970Savg error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 1810303970Savg if (error) { 1811303970Savg ZFS_EXIT(zfsvfs); 1812303970Savg return (error); 1813303970Savg } 1814303970Savg ASSERT3P(zp, ==, NULL); 1815185029Spjd 1816303970Savg /* 1817303970Savg * Create a new file object and update the directory 1818303970Savg * to reference it. 1819303970Savg */ 1820303970Savg if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1821303970Savg goto out; 1822168404Spjd } 1823219089Spjd 1824303970Savg /* 1825303970Savg * We only support the creation of regular files in 1826303970Savg * extended attribute directories. 1827303970Savg */ 1828168404Spjd 1829303970Savg if ((dzp->z_pflags & ZFS_XATTR) && 1830303970Savg (vap->va_type != VREG)) { 1831303970Savg error = SET_ERROR(EINVAL); 1832303970Savg goto out; 1833303970Savg } 1834168404Spjd 1835303970Savg if ((error = zfs_acl_ids_create(dzp, 0, vap, 1836303970Savg cr, vsecp, &acl_ids)) != 0) 1837303970Savg goto out; 1838219089Spjd 1839303970Savg if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1840303970Savg zfs_acl_ids_free(&acl_ids); 1841303970Savg error = SET_ERROR(EDQUOT); 1842303970Savg goto out; 1843303970Savg } 1844168404Spjd 1845303970Savg getnewvnode_reserve(1); 1846209962Smm 1847303970Savg tx = dmu_tx_create(os); 1848209962Smm 1849303970Savg dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1850303970Savg ZFS_SA_BASE_ATTR_SIZE); 1851219089Spjd 1852303970Savg fuid_dirtied = zfsvfs->z_fuid_dirty; 1853303970Savg if (fuid_dirtied) 1854303970Savg zfs_fuid_txhold(zfsvfs, tx); 1855303970Savg dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1856303970Savg dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 1857303970Savg if (!zfsvfs->z_use_sa && 1858303970Savg acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1859303970Savg dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1860303970Savg 0, acl_ids.z_aclp->z_acl_bytes); 1861303970Savg } 1862303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 1863303970Savg if (error) { 1864209962Smm zfs_acl_ids_free(&acl_ids); 1865303970Savg dmu_tx_abort(tx); 1866303970Savg getnewvnode_drop_reserve(); 1867303970Savg ZFS_EXIT(zfsvfs); 1868303970Savg return (error); 1869303970Savg } 1870303970Savg zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1871185029Spjd 1872303970Savg if (fuid_dirtied) 1873303970Savg zfs_fuid_sync(zfsvfs, tx); 1874219089Spjd 1875303970Savg (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 1876303970Savg txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1877303970Savg zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1878303970Savg vsecp, acl_ids.z_fuidp, vap); 1879303970Savg zfs_acl_ids_free(&acl_ids); 1880303970Savg dmu_tx_commit(tx); 1881168404Spjd 1882303970Savg getnewvnode_drop_reserve(); 1883168404Spjd 1884168404Spjdout: 1885303970Savg if (error == 0) { 1886168962Spjd *vpp = ZTOV(zp); 1887168404Spjd } 1888168404Spjd 1889219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1890219089Spjd zil_commit(zilog, 0); 1891219089Spjd 1892168404Spjd ZFS_EXIT(zfsvfs); 1893168404Spjd return (error); 1894168404Spjd} 1895168404Spjd 1896168404Spjd/* 1897168404Spjd * Remove an entry from a directory. 1898168404Spjd * 1899168404Spjd * IN: dvp - vnode of directory to remove entry from. 1900168404Spjd * name - name of entry to remove. 1901168404Spjd * cr - credentials of caller. 1902185029Spjd * ct - caller context 1903185029Spjd * flags - case flags 1904168404Spjd * 1905251631Sdelphij * RETURN: 0 on success, error code on failure. 1906168404Spjd * 1907168404Spjd * Timestamps: 1908168404Spjd * dvp - ctime|mtime 1909168404Spjd * vp - ctime (if nlink > 0) 1910168404Spjd */ 1911219089Spjd 1912185029Spjd/*ARGSUSED*/ 1913168404Spjdstatic int 1914303970Savgzfs_remove(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr) 1915168404Spjd{ 1916303970Savg znode_t *dzp = VTOZ(dvp); 1917303970Savg znode_t *zp = VTOZ(vp); 1918219089Spjd znode_t *xzp; 1919168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1920185029Spjd zilog_t *zilog; 1921168962Spjd uint64_t acl_obj, xattr_obj; 1922219089Spjd uint64_t obj = 0; 1923168404Spjd dmu_tx_t *tx; 1924185029Spjd boolean_t unlinked, toobig = FALSE; 1925185029Spjd uint64_t txtype; 1926168404Spjd int error; 1927168404Spjd 1928168404Spjd ZFS_ENTER(zfsvfs); 1929185029Spjd ZFS_VERIFY_ZP(dzp); 1930303970Savg ZFS_VERIFY_ZP(zp); 1931185029Spjd zilog = zfsvfs->z_log; 1932303970Savg zp = VTOZ(vp); 1933168404Spjd 1934219089Spjd xattr_obj = 0; 1935219089Spjd xzp = NULL; 1936168404Spjd 1937168962Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1938168404Spjd goto out; 1939168962Spjd } 1940168404Spjd 1941168962Spjd /* 1942168962Spjd * Need to use rmdir for removing directories. 1943168962Spjd */ 1944168962Spjd if (vp->v_type == VDIR) { 1945249195Smm error = SET_ERROR(EPERM); 1946168962Spjd goto out; 1947168962Spjd } 1948168962Spjd 1949185029Spjd vnevent_remove(vp, dvp, name, ct); 1950168962Spjd 1951303970Savg obj = zp->z_id; 1952168404Spjd 1953303970Savg /* are there any extended attributes? */ 1954303970Savg error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1955303970Savg &xattr_obj, sizeof (xattr_obj)); 1956303970Savg if (error == 0 && xattr_obj) { 1957303970Savg error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1958303970Savg ASSERT0(error); 1959303970Savg } 1960168962Spjd 1961168404Spjd /* 1962168404Spjd * We may delete the znode now, or we may put it in the unlinked set; 1963168404Spjd * it depends on whether we're the last link, and on whether there are 1964168404Spjd * other holds on the vnode. So we dmu_tx_hold() the right things to 1965168404Spjd * allow for either case. 1966168404Spjd */ 1967168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1968168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1969219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1970219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1971219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 1972168404Spjd 1973303970Savg if (xzp) { 1974219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 1975219089Spjd dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1976168404Spjd } 1977168404Spjd 1978168404Spjd /* charge as an update -- would be nice not to charge at all */ 1979168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1980168404Spjd 1981268464Sdelphij /* 1982294803Smav * Mark this transaction as typically resulting in a net free of space 1983268464Sdelphij */ 1984294803Smav dmu_tx_mark_netfree(tx); 1985268464Sdelphij 1986303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 1987168404Spjd if (error) { 1988168404Spjd dmu_tx_abort(tx); 1989168404Spjd ZFS_EXIT(zfsvfs); 1990168404Spjd return (error); 1991168404Spjd } 1992168404Spjd 1993168404Spjd /* 1994168404Spjd * Remove the directory entry. 1995168404Spjd */ 1996303970Savg error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked); 1997168404Spjd 1998168404Spjd if (error) { 1999168404Spjd dmu_tx_commit(tx); 2000168404Spjd goto out; 2001168404Spjd } 2002168404Spjd 2003219089Spjd if (unlinked) { 2004168404Spjd zfs_unlinked_add(zp, tx); 2005243268Savg vp->v_vflag |= VV_NOSYNC; 2006168962Spjd } 2007168404Spjd 2008185029Spjd txtype = TX_REMOVE; 2009219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 2010168404Spjd 2011168404Spjd dmu_tx_commit(tx); 2012168404Spjdout: 2013185029Spjd 2014219089Spjd if (xzp) 2015303970Savg vrele(ZTOV(xzp)); 2016168962Spjd 2017219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2018219089Spjd zil_commit(zilog, 0); 2019219089Spjd 2020168404Spjd ZFS_EXIT(zfsvfs); 2021168404Spjd return (error); 2022168404Spjd} 2023168404Spjd 2024168404Spjd/* 2025168404Spjd * Create a new directory and insert it into dvp using the name 2026168404Spjd * provided. Return a pointer to the inserted directory. 2027168404Spjd * 2028168404Spjd * IN: dvp - vnode of directory to add subdir to. 2029168404Spjd * dirname - name of new directory. 2030168404Spjd * vap - attributes of new directory. 2031168404Spjd * cr - credentials of caller. 2032185029Spjd * ct - caller context 2033251631Sdelphij * flags - case flags 2034185029Spjd * vsecp - ACL to be set 2035168404Spjd * 2036168404Spjd * OUT: vpp - vnode of created directory. 2037168404Spjd * 2038251631Sdelphij * RETURN: 0 on success, error code on failure. 2039168404Spjd * 2040168404Spjd * Timestamps: 2041168404Spjd * dvp - ctime|mtime updated 2042168404Spjd * vp - ctime|mtime|atime updated 2043168404Spjd */ 2044185029Spjd/*ARGSUSED*/ 2045168404Spjdstatic int 2046303970Savgzfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr) 2047168404Spjd{ 2048168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 2049168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2050185029Spjd zilog_t *zilog; 2051185029Spjd uint64_t txtype; 2052168404Spjd dmu_tx_t *tx; 2053168404Spjd int error; 2054209962Smm ksid_t *ksid; 2055209962Smm uid_t uid; 2056209962Smm gid_t gid = crgetgid(cr); 2057219089Spjd zfs_acl_ids_t acl_ids; 2058209962Smm boolean_t fuid_dirtied; 2059168404Spjd 2060168404Spjd ASSERT(vap->va_type == VDIR); 2061168404Spjd 2062185029Spjd /* 2063185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 2064185029Spjd * make sure file system is at proper version 2065185029Spjd */ 2066185029Spjd 2067209962Smm ksid = crgetsid(cr, KSID_OWNER); 2068209962Smm if (ksid) 2069209962Smm uid = ksid_getid(ksid); 2070209962Smm else 2071209962Smm uid = crgetuid(cr); 2072185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2073303970Savg ((vap->va_mask & AT_XVATTR) || 2074219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2075249195Smm return (SET_ERROR(EINVAL)); 2076185029Spjd 2077168404Spjd ZFS_ENTER(zfsvfs); 2078185029Spjd ZFS_VERIFY_ZP(dzp); 2079185029Spjd zilog = zfsvfs->z_log; 2080168404Spjd 2081219089Spjd if (dzp->z_pflags & ZFS_XATTR) { 2082168404Spjd ZFS_EXIT(zfsvfs); 2083249195Smm return (SET_ERROR(EINVAL)); 2084168404Spjd } 2085168404Spjd 2086185029Spjd if (zfsvfs->z_utf8 && u8_validate(dirname, 2087185029Spjd strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2088185029Spjd ZFS_EXIT(zfsvfs); 2089249195Smm return (SET_ERROR(EILSEQ)); 2090185029Spjd } 2091185029Spjd 2092219089Spjd if (vap->va_mask & AT_XVATTR) { 2093197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2094185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 2095185029Spjd ZFS_EXIT(zfsvfs); 2096185029Spjd return (error); 2097185029Spjd } 2098219089Spjd } 2099185029Spjd 2100219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 2101303970Savg NULL, &acl_ids)) != 0) { 2102219089Spjd ZFS_EXIT(zfsvfs); 2103219089Spjd return (error); 2104219089Spjd } 2105260704Savg 2106168404Spjd /* 2107168404Spjd * First make sure the new directory doesn't exist. 2108219089Spjd * 2109219089Spjd * Existence is checked first to make sure we don't return 2110219089Spjd * EACCES instead of EEXIST which can cause some applications 2111219089Spjd * to fail. 2112168404Spjd */ 2113185029Spjd *vpp = NULL; 2114185029Spjd 2115303970Savg if (error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW)) { 2116219089Spjd zfs_acl_ids_free(&acl_ids); 2117168404Spjd ZFS_EXIT(zfsvfs); 2118168404Spjd return (error); 2119168404Spjd } 2120303970Savg ASSERT3P(zp, ==, NULL); 2121168404Spjd 2122185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 2123219089Spjd zfs_acl_ids_free(&acl_ids); 2124168404Spjd ZFS_EXIT(zfsvfs); 2125168404Spjd return (error); 2126168404Spjd } 2127168404Spjd 2128209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2129211932Smm zfs_acl_ids_free(&acl_ids); 2130209962Smm ZFS_EXIT(zfsvfs); 2131249195Smm return (SET_ERROR(EDQUOT)); 2132209962Smm } 2133209962Smm 2134168404Spjd /* 2135168404Spjd * Add a new entry to the directory. 2136168404Spjd */ 2137303970Savg getnewvnode_reserve(1); 2138168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2139168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 2140168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2141209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 2142209962Smm if (fuid_dirtied) 2143209962Smm zfs_fuid_txhold(zfsvfs, tx); 2144219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2145219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2146219089Spjd acl_ids.z_aclp->z_acl_bytes); 2147219089Spjd } 2148219089Spjd 2149219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2150219089Spjd ZFS_SA_BASE_ATTR_SIZE); 2151219089Spjd 2152303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 2153168404Spjd if (error) { 2154219089Spjd zfs_acl_ids_free(&acl_ids); 2155168404Spjd dmu_tx_abort(tx); 2156260704Savg getnewvnode_drop_reserve(); 2157168404Spjd ZFS_EXIT(zfsvfs); 2158168404Spjd return (error); 2159168404Spjd } 2160168404Spjd 2161168404Spjd /* 2162168404Spjd * Create new node. 2163168404Spjd */ 2164219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2165168404Spjd 2166209962Smm if (fuid_dirtied) 2167209962Smm zfs_fuid_sync(zfsvfs, tx); 2168219089Spjd 2169168404Spjd /* 2170168404Spjd * Now put new name in parent dir. 2171168404Spjd */ 2172303970Savg (void) zfs_link_create(dzp, dirname, zp, tx, ZNEW); 2173168404Spjd 2174168404Spjd *vpp = ZTOV(zp); 2175168404Spjd 2176303970Savg txtype = zfs_log_create_txtype(Z_DIR, NULL, vap); 2177303970Savg zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL, 2178209962Smm acl_ids.z_fuidp, vap); 2179185029Spjd 2180209962Smm zfs_acl_ids_free(&acl_ids); 2181219089Spjd 2182168404Spjd dmu_tx_commit(tx); 2183168404Spjd 2184260704Savg getnewvnode_drop_reserve(); 2185260704Savg 2186219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2187219089Spjd zil_commit(zilog, 0); 2188219089Spjd 2189168404Spjd ZFS_EXIT(zfsvfs); 2190168404Spjd return (0); 2191168404Spjd} 2192168404Spjd 2193168404Spjd/* 2194168404Spjd * Remove a directory subdir entry. If the current working 2195168404Spjd * directory is the same as the subdir to be removed, the 2196168404Spjd * remove will fail. 2197168404Spjd * 2198168404Spjd * IN: dvp - vnode of directory to remove from. 2199168404Spjd * name - name of directory to be removed. 2200168404Spjd * cwd - vnode of current working directory. 2201168404Spjd * cr - credentials of caller. 2202185029Spjd * ct - caller context 2203185029Spjd * flags - case flags 2204168404Spjd * 2205251631Sdelphij * RETURN: 0 on success, error code on failure. 2206168404Spjd * 2207168404Spjd * Timestamps: 2208168404Spjd * dvp - ctime|mtime updated 2209168404Spjd */ 2210185029Spjd/*ARGSUSED*/ 2211168404Spjdstatic int 2212303970Savgzfs_rmdir(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr) 2213168404Spjd{ 2214168404Spjd znode_t *dzp = VTOZ(dvp); 2215303970Savg znode_t *zp = VTOZ(vp); 2216168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2217185029Spjd zilog_t *zilog; 2218168404Spjd dmu_tx_t *tx; 2219168404Spjd int error; 2220168404Spjd 2221168962Spjd ZFS_ENTER(zfsvfs); 2222185029Spjd ZFS_VERIFY_ZP(dzp); 2223303970Savg ZFS_VERIFY_ZP(zp); 2224185029Spjd zilog = zfsvfs->z_log; 2225168404Spjd 2226168404Spjd 2227168404Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2228168404Spjd goto out; 2229168404Spjd } 2230168404Spjd 2231168962Spjd if (vp->v_type != VDIR) { 2232249195Smm error = SET_ERROR(ENOTDIR); 2233168962Spjd goto out; 2234168962Spjd } 2235168962Spjd 2236185029Spjd vnevent_rmdir(vp, dvp, name, ct); 2237168962Spjd 2238168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2239168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2240219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2241168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2242219089Spjd zfs_sa_upgrade_txholds(tx, zp); 2243219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 2244304122Savg dmu_tx_mark_netfree(tx); 2245303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 2246168404Spjd if (error) { 2247168404Spjd dmu_tx_abort(tx); 2248168404Spjd ZFS_EXIT(zfsvfs); 2249168404Spjd return (error); 2250168404Spjd } 2251168404Spjd 2252168404Spjd cache_purge(dvp); 2253168404Spjd 2254303970Savg error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL); 2255168404Spjd 2256185029Spjd if (error == 0) { 2257185029Spjd uint64_t txtype = TX_RMDIR; 2258219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2259185029Spjd } 2260168404Spjd 2261168404Spjd dmu_tx_commit(tx); 2262168404Spjd 2263168404Spjd cache_purge(vp); 2264168404Spjdout: 2265219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2266219089Spjd zil_commit(zilog, 0); 2267219089Spjd 2268168404Spjd ZFS_EXIT(zfsvfs); 2269168404Spjd return (error); 2270168404Spjd} 2271168404Spjd 2272168404Spjd/* 2273168404Spjd * Read as many directory entries as will fit into the provided 2274168404Spjd * buffer from the given directory cursor position (specified in 2275251631Sdelphij * the uio structure). 2276168404Spjd * 2277168404Spjd * IN: vp - vnode of directory to read. 2278168404Spjd * uio - structure supplying read location, range info, 2279168404Spjd * and return buffer. 2280168404Spjd * cr - credentials of caller. 2281185029Spjd * ct - caller context 2282185029Spjd * flags - case flags 2283168404Spjd * 2284168404Spjd * OUT: uio - updated offset and range, buffer filled. 2285168404Spjd * eofp - set to true if end-of-file detected. 2286168404Spjd * 2287251631Sdelphij * RETURN: 0 on success, error code on failure. 2288168404Spjd * 2289168404Spjd * Timestamps: 2290168404Spjd * vp - atime updated 2291168404Spjd * 2292168404Spjd * Note that the low 4 bits of the cookie returned by zap is always zero. 2293168404Spjd * This allows us to use the low range for "special" directory entries: 2294168404Spjd * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2295168404Spjd * we use the offset 2 for the '.zfs' directory. 2296168404Spjd */ 2297168404Spjd/* ARGSUSED */ 2298168404Spjdstatic int 2299168962Spjdzfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) 2300168404Spjd{ 2301168404Spjd znode_t *zp = VTOZ(vp); 2302168404Spjd iovec_t *iovp; 2303185029Spjd edirent_t *eodp; 2304168404Spjd dirent64_t *odp; 2305168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2306168404Spjd objset_t *os; 2307168404Spjd caddr_t outbuf; 2308168404Spjd size_t bufsize; 2309168404Spjd zap_cursor_t zc; 2310168404Spjd zap_attribute_t zap; 2311168404Spjd uint_t bytes_wanted; 2312168404Spjd uint64_t offset; /* must be unsigned; checks for < 1 */ 2313219089Spjd uint64_t parent; 2314168404Spjd int local_eof; 2315168404Spjd int outcount; 2316168404Spjd int error; 2317168404Spjd uint8_t prefetch; 2318185029Spjd boolean_t check_sysattrs; 2319168404Spjd uint8_t type; 2320168962Spjd int ncooks; 2321168962Spjd u_long *cooks = NULL; 2322185029Spjd int flags = 0; 2323168404Spjd 2324168404Spjd ZFS_ENTER(zfsvfs); 2325185029Spjd ZFS_VERIFY_ZP(zp); 2326168404Spjd 2327219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 2328219089Spjd &parent, sizeof (parent))) != 0) { 2329219089Spjd ZFS_EXIT(zfsvfs); 2330219089Spjd return (error); 2331219089Spjd } 2332219089Spjd 2333168404Spjd /* 2334168404Spjd * If we are not given an eof variable, 2335168404Spjd * use a local one. 2336168404Spjd */ 2337168404Spjd if (eofp == NULL) 2338168404Spjd eofp = &local_eof; 2339168404Spjd 2340168404Spjd /* 2341168404Spjd * Check for valid iov_len. 2342168404Spjd */ 2343168404Spjd if (uio->uio_iov->iov_len <= 0) { 2344168404Spjd ZFS_EXIT(zfsvfs); 2345249195Smm return (SET_ERROR(EINVAL)); 2346168404Spjd } 2347168404Spjd 2348168404Spjd /* 2349168404Spjd * Quit if directory has been removed (posix) 2350168404Spjd */ 2351168404Spjd if ((*eofp = zp->z_unlinked) != 0) { 2352168404Spjd ZFS_EXIT(zfsvfs); 2353168404Spjd return (0); 2354168404Spjd } 2355168404Spjd 2356168404Spjd error = 0; 2357168404Spjd os = zfsvfs->z_os; 2358168404Spjd offset = uio->uio_loffset; 2359168404Spjd prefetch = zp->z_zn_prefetch; 2360168404Spjd 2361168404Spjd /* 2362168404Spjd * Initialize the iterator cursor. 2363168404Spjd */ 2364168404Spjd if (offset <= 3) { 2365168404Spjd /* 2366168404Spjd * Start iteration from the beginning of the directory. 2367168404Spjd */ 2368168404Spjd zap_cursor_init(&zc, os, zp->z_id); 2369168404Spjd } else { 2370168404Spjd /* 2371168404Spjd * The offset is a serialized cursor. 2372168404Spjd */ 2373168404Spjd zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2374168404Spjd } 2375168404Spjd 2376168404Spjd /* 2377168404Spjd * Get space to change directory entries into fs independent format. 2378168404Spjd */ 2379168404Spjd iovp = uio->uio_iov; 2380168404Spjd bytes_wanted = iovp->iov_len; 2381168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2382168404Spjd bufsize = bytes_wanted; 2383168404Spjd outbuf = kmem_alloc(bufsize, KM_SLEEP); 2384168404Spjd odp = (struct dirent64 *)outbuf; 2385168404Spjd } else { 2386168404Spjd bufsize = bytes_wanted; 2387247187Smm outbuf = NULL; 2388168404Spjd odp = (struct dirent64 *)iovp->iov_base; 2389168404Spjd } 2390185029Spjd eodp = (struct edirent *)odp; 2391168404Spjd 2392169170Spjd if (ncookies != NULL) { 2393168404Spjd /* 2394168404Spjd * Minimum entry size is dirent size and 1 byte for a file name. 2395168404Spjd */ 2396168962Spjd ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2397219404Spjd cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2398219404Spjd *cookies = cooks; 2399168962Spjd *ncookies = ncooks; 2400168404Spjd } 2401185029Spjd /* 2402185029Spjd * If this VFS supports the system attribute view interface; and 2403185029Spjd * we're looking at an extended attribute directory; and we care 2404185029Spjd * about normalization conflicts on this vfs; then we must check 2405185029Spjd * for normalization conflicts with the sysattr name space. 2406185029Spjd */ 2407185029Spjd#ifdef TODO 2408185029Spjd check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2409185029Spjd (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2410185029Spjd (flags & V_RDDIR_ENTFLAGS); 2411185029Spjd#else 2412185029Spjd check_sysattrs = 0; 2413185029Spjd#endif 2414168404Spjd 2415168404Spjd /* 2416168404Spjd * Transform to file-system independent format 2417168404Spjd */ 2418168404Spjd outcount = 0; 2419168404Spjd while (outcount < bytes_wanted) { 2420168404Spjd ino64_t objnum; 2421168404Spjd ushort_t reclen; 2422219089Spjd off64_t *next = NULL; 2423168404Spjd 2424168404Spjd /* 2425168404Spjd * Special case `.', `..', and `.zfs'. 2426168404Spjd */ 2427168404Spjd if (offset == 0) { 2428168404Spjd (void) strcpy(zap.za_name, "."); 2429185029Spjd zap.za_normalization_conflict = 0; 2430168404Spjd objnum = zp->z_id; 2431169108Spjd type = DT_DIR; 2432168404Spjd } else if (offset == 1) { 2433168404Spjd (void) strcpy(zap.za_name, ".."); 2434185029Spjd zap.za_normalization_conflict = 0; 2435219089Spjd objnum = parent; 2436169108Spjd type = DT_DIR; 2437168404Spjd } else if (offset == 2 && zfs_show_ctldir(zp)) { 2438168404Spjd (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2439185029Spjd zap.za_normalization_conflict = 0; 2440168404Spjd objnum = ZFSCTL_INO_ROOT; 2441169108Spjd type = DT_DIR; 2442168404Spjd } else { 2443168404Spjd /* 2444168404Spjd * Grab next entry. 2445168404Spjd */ 2446168404Spjd if (error = zap_cursor_retrieve(&zc, &zap)) { 2447168404Spjd if ((*eofp = (error == ENOENT)) != 0) 2448168404Spjd break; 2449168404Spjd else 2450168404Spjd goto update; 2451168404Spjd } 2452168404Spjd 2453168404Spjd if (zap.za_integer_length != 8 || 2454168404Spjd zap.za_num_integers != 1) { 2455168404Spjd cmn_err(CE_WARN, "zap_readdir: bad directory " 2456168404Spjd "entry, obj = %lld, offset = %lld\n", 2457168404Spjd (u_longlong_t)zp->z_id, 2458168404Spjd (u_longlong_t)offset); 2459249195Smm error = SET_ERROR(ENXIO); 2460168404Spjd goto update; 2461168404Spjd } 2462168404Spjd 2463168404Spjd objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2464168404Spjd /* 2465168404Spjd * MacOS X can extract the object type here such as: 2466168404Spjd * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2467168404Spjd */ 2468168404Spjd type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2469185029Spjd 2470185029Spjd if (check_sysattrs && !zap.za_normalization_conflict) { 2471185029Spjd#ifdef TODO 2472185029Spjd zap.za_normalization_conflict = 2473185029Spjd xattr_sysattr_casechk(zap.za_name); 2474185029Spjd#else 2475185029Spjd panic("%s:%u: TODO", __func__, __LINE__); 2476185029Spjd#endif 2477185029Spjd } 2478168404Spjd } 2479168404Spjd 2480211932Smm if (flags & V_RDDIR_ACCFILTER) { 2481211932Smm /* 2482211932Smm * If we have no access at all, don't include 2483211932Smm * this entry in the returned information 2484211932Smm */ 2485211932Smm znode_t *ezp; 2486211932Smm if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2487211932Smm goto skip_entry; 2488211932Smm if (!zfs_has_access(ezp, cr)) { 2489303970Savg vrele(ZTOV(ezp)); 2490211932Smm goto skip_entry; 2491211932Smm } 2492303970Savg vrele(ZTOV(ezp)); 2493211932Smm } 2494211932Smm 2495185029Spjd if (flags & V_RDDIR_ENTFLAGS) 2496185029Spjd reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2497185029Spjd else 2498185029Spjd reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2499185029Spjd 2500168404Spjd /* 2501168404Spjd * Will this entry fit in the buffer? 2502168404Spjd */ 2503168404Spjd if (outcount + reclen > bufsize) { 2504168404Spjd /* 2505168404Spjd * Did we manage to fit anything in the buffer? 2506168404Spjd */ 2507168404Spjd if (!outcount) { 2508249195Smm error = SET_ERROR(EINVAL); 2509168404Spjd goto update; 2510168404Spjd } 2511168404Spjd break; 2512168404Spjd } 2513185029Spjd if (flags & V_RDDIR_ENTFLAGS) { 2514185029Spjd /* 2515185029Spjd * Add extended flag entry: 2516185029Spjd */ 2517185029Spjd eodp->ed_ino = objnum; 2518185029Spjd eodp->ed_reclen = reclen; 2519185029Spjd /* NOTE: ed_off is the offset for the *next* entry */ 2520185029Spjd next = &(eodp->ed_off); 2521185029Spjd eodp->ed_eflags = zap.za_normalization_conflict ? 2522185029Spjd ED_CASE_CONFLICT : 0; 2523185029Spjd (void) strncpy(eodp->ed_name, zap.za_name, 2524185029Spjd EDIRENT_NAMELEN(reclen)); 2525185029Spjd eodp = (edirent_t *)((intptr_t)eodp + reclen); 2526185029Spjd } else { 2527185029Spjd /* 2528185029Spjd * Add normal entry: 2529185029Spjd */ 2530185029Spjd odp->d_ino = objnum; 2531185029Spjd odp->d_reclen = reclen; 2532185029Spjd odp->d_namlen = strlen(zap.za_name); 2533185029Spjd (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2534185029Spjd odp->d_type = type; 2535185029Spjd odp = (dirent64_t *)((intptr_t)odp + reclen); 2536185029Spjd } 2537168404Spjd outcount += reclen; 2538168404Spjd 2539168404Spjd ASSERT(outcount <= bufsize); 2540168404Spjd 2541168404Spjd /* Prefetch znode */ 2542168404Spjd if (prefetch) 2543286705Smav dmu_prefetch(os, objnum, 0, 0, 0, 2544286705Smav ZIO_PRIORITY_SYNC_READ); 2545168404Spjd 2546211932Smm skip_entry: 2547168404Spjd /* 2548168404Spjd * Move to the next entry, fill in the previous offset. 2549168404Spjd */ 2550168404Spjd if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2551168404Spjd zap_cursor_advance(&zc); 2552168404Spjd offset = zap_cursor_serialize(&zc); 2553168404Spjd } else { 2554168404Spjd offset += 1; 2555168404Spjd } 2556219404Spjd 2557219404Spjd if (cooks != NULL) { 2558219404Spjd *cooks++ = offset; 2559219404Spjd ncooks--; 2560219404Spjd KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2561219404Spjd } 2562168404Spjd } 2563168404Spjd zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2564168404Spjd 2565168404Spjd /* Subtract unused cookies */ 2566168962Spjd if (ncookies != NULL) 2567168962Spjd *ncookies -= ncooks; 2568168404Spjd 2569168404Spjd if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2570168404Spjd iovp->iov_base += outcount; 2571168404Spjd iovp->iov_len -= outcount; 2572168404Spjd uio->uio_resid -= outcount; 2573168404Spjd } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2574168404Spjd /* 2575168404Spjd * Reset the pointer. 2576168404Spjd */ 2577168404Spjd offset = uio->uio_loffset; 2578168404Spjd } 2579168404Spjd 2580168404Spjdupdate: 2581168404Spjd zap_cursor_fini(&zc); 2582168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2583168404Spjd kmem_free(outbuf, bufsize); 2584168404Spjd 2585168404Spjd if (error == ENOENT) 2586168404Spjd error = 0; 2587168404Spjd 2588168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2589168404Spjd 2590168404Spjd uio->uio_loffset = offset; 2591168404Spjd ZFS_EXIT(zfsvfs); 2592169107Spjd if (error != 0 && cookies != NULL) { 2593168962Spjd free(*cookies, M_TEMP); 2594168962Spjd *cookies = NULL; 2595168962Spjd *ncookies = 0; 2596168404Spjd } 2597168404Spjd return (error); 2598168404Spjd} 2599168404Spjd 2600185029Spjdulong_t zfs_fsync_sync_cnt = 4; 2601185029Spjd 2602168404Spjdstatic int 2603185029Spjdzfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2604168404Spjd{ 2605168962Spjd znode_t *zp = VTOZ(vp); 2606168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2607168404Spjd 2608185029Spjd (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2609185029Spjd 2610219089Spjd if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 2611219089Spjd ZFS_ENTER(zfsvfs); 2612219089Spjd ZFS_VERIFY_ZP(zp); 2613219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 2614219089Spjd ZFS_EXIT(zfsvfs); 2615219089Spjd } 2616168404Spjd return (0); 2617168404Spjd} 2618168404Spjd 2619185029Spjd 2620168404Spjd/* 2621168404Spjd * Get the requested file attributes and place them in the provided 2622168404Spjd * vattr structure. 2623168404Spjd * 2624168404Spjd * IN: vp - vnode of file. 2625168404Spjd * vap - va_mask identifies requested attributes. 2626185029Spjd * If AT_XVATTR set, then optional attrs are requested 2627185029Spjd * flags - ATTR_NOACLCHECK (CIFS server context) 2628168404Spjd * cr - credentials of caller. 2629185029Spjd * ct - caller context 2630168404Spjd * 2631168404Spjd * OUT: vap - attribute values. 2632168404Spjd * 2633251631Sdelphij * RETURN: 0 (always succeeds). 2634168404Spjd */ 2635168404Spjd/* ARGSUSED */ 2636168404Spjdstatic int 2637185029Spjdzfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2638185029Spjd caller_context_t *ct) 2639168404Spjd{ 2640168962Spjd znode_t *zp = VTOZ(vp); 2641168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2642185029Spjd int error = 0; 2643168962Spjd uint32_t blksize; 2644168962Spjd u_longlong_t nblocks; 2645185029Spjd uint64_t links; 2646224251Sdelphij uint64_t mtime[2], ctime[2], crtime[2], rdev; 2647185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2648185029Spjd xoptattr_t *xoap = NULL; 2649185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2650224251Sdelphij sa_bulk_attr_t bulk[4]; 2651219089Spjd int count = 0; 2652168404Spjd 2653168404Spjd ZFS_ENTER(zfsvfs); 2654185029Spjd ZFS_VERIFY_ZP(zp); 2655168404Spjd 2656219089Spjd zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2657219089Spjd 2658219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 2659219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 2660243807Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 2661224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2662224251Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 2663224251Sdelphij &rdev, 8); 2664219089Spjd 2665219089Spjd if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 2666219089Spjd ZFS_EXIT(zfsvfs); 2667219089Spjd return (error); 2668219089Spjd } 2669219089Spjd 2670168404Spjd /* 2671185029Spjd * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2672185029Spjd * Also, if we are the owner don't bother, since owner should 2673185029Spjd * always be allowed to read basic attributes of file. 2674185029Spjd */ 2675219089Spjd if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2676219089Spjd (vap->va_uid != crgetuid(cr))) { 2677185029Spjd if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2678185029Spjd skipaclchk, cr)) { 2679185029Spjd ZFS_EXIT(zfsvfs); 2680185029Spjd return (error); 2681185029Spjd } 2682185029Spjd } 2683185029Spjd 2684185029Spjd /* 2685168404Spjd * Return all attributes. It's cheaper to provide the answer 2686168404Spjd * than to determine whether we were asked the question. 2687168404Spjd */ 2688168404Spjd 2689219089Spjd vap->va_type = IFTOVT(zp->z_mode); 2690219089Spjd vap->va_mode = zp->z_mode & ~S_IFMT; 2691277300Ssmh#ifdef illumos 2692224252Sdelphij vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2693224252Sdelphij#else 2694224252Sdelphij vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 2695224252Sdelphij#endif 2696168404Spjd vap->va_nodeid = zp->z_id; 2697185029Spjd if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2698219089Spjd links = zp->z_links + 1; 2699185029Spjd else 2700219089Spjd links = zp->z_links; 2701229425Sdim vap->va_nlink = MIN(links, LINK_MAX); /* nlink_t limit! */ 2702219089Spjd vap->va_size = zp->z_size; 2703277300Ssmh#ifdef illumos 2704224252Sdelphij vap->va_rdev = vp->v_rdev; 2705224252Sdelphij#else 2706224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2707224251Sdelphij vap->va_rdev = zfs_cmpldev(rdev); 2708224252Sdelphij#endif 2709168404Spjd vap->va_seq = zp->z_seq; 2710168404Spjd vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2711272467Saraujo vap->va_filerev = zp->z_seq; 2712168404Spjd 2713185029Spjd /* 2714185029Spjd * Add in any requested optional attributes and the create time. 2715185029Spjd * Also set the corresponding bits in the returned attribute bitmap. 2716185029Spjd */ 2717185029Spjd if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2718185029Spjd if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2719185029Spjd xoap->xoa_archive = 2720219089Spjd ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2721185029Spjd XVA_SET_RTN(xvap, XAT_ARCHIVE); 2722185029Spjd } 2723185029Spjd 2724185029Spjd if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2725185029Spjd xoap->xoa_readonly = 2726219089Spjd ((zp->z_pflags & ZFS_READONLY) != 0); 2727185029Spjd XVA_SET_RTN(xvap, XAT_READONLY); 2728185029Spjd } 2729185029Spjd 2730185029Spjd if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2731185029Spjd xoap->xoa_system = 2732219089Spjd ((zp->z_pflags & ZFS_SYSTEM) != 0); 2733185029Spjd XVA_SET_RTN(xvap, XAT_SYSTEM); 2734185029Spjd } 2735185029Spjd 2736185029Spjd if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2737185029Spjd xoap->xoa_hidden = 2738219089Spjd ((zp->z_pflags & ZFS_HIDDEN) != 0); 2739185029Spjd XVA_SET_RTN(xvap, XAT_HIDDEN); 2740185029Spjd } 2741185029Spjd 2742185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2743185029Spjd xoap->xoa_nounlink = 2744219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2745185029Spjd XVA_SET_RTN(xvap, XAT_NOUNLINK); 2746185029Spjd } 2747185029Spjd 2748185029Spjd if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2749185029Spjd xoap->xoa_immutable = 2750219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2751185029Spjd XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2752185029Spjd } 2753185029Spjd 2754185029Spjd if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2755185029Spjd xoap->xoa_appendonly = 2756219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2757185029Spjd XVA_SET_RTN(xvap, XAT_APPENDONLY); 2758185029Spjd } 2759185029Spjd 2760185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2761185029Spjd xoap->xoa_nodump = 2762219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0); 2763185029Spjd XVA_SET_RTN(xvap, XAT_NODUMP); 2764185029Spjd } 2765185029Spjd 2766185029Spjd if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2767185029Spjd xoap->xoa_opaque = 2768219089Spjd ((zp->z_pflags & ZFS_OPAQUE) != 0); 2769185029Spjd XVA_SET_RTN(xvap, XAT_OPAQUE); 2770185029Spjd } 2771185029Spjd 2772185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2773185029Spjd xoap->xoa_av_quarantined = 2774219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2775185029Spjd XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2776185029Spjd } 2777185029Spjd 2778185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2779185029Spjd xoap->xoa_av_modified = 2780219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2781185029Spjd XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2782185029Spjd } 2783185029Spjd 2784185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2785219089Spjd vp->v_type == VREG) { 2786219089Spjd zfs_sa_get_scanstamp(zp, xvap); 2787185029Spjd } 2788185029Spjd 2789219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2790219089Spjd xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 2791219089Spjd XVA_SET_RTN(xvap, XAT_REPARSE); 2792219089Spjd } 2793219089Spjd if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 2794219089Spjd xoap->xoa_generation = zp->z_gen; 2795219089Spjd XVA_SET_RTN(xvap, XAT_GEN); 2796219089Spjd } 2797219089Spjd 2798219089Spjd if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2799219089Spjd xoap->xoa_offline = 2800219089Spjd ((zp->z_pflags & ZFS_OFFLINE) != 0); 2801219089Spjd XVA_SET_RTN(xvap, XAT_OFFLINE); 2802219089Spjd } 2803219089Spjd 2804219089Spjd if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2805219089Spjd xoap->xoa_sparse = 2806219089Spjd ((zp->z_pflags & ZFS_SPARSE) != 0); 2807219089Spjd XVA_SET_RTN(xvap, XAT_SPARSE); 2808219089Spjd } 2809185029Spjd } 2810185029Spjd 2811219089Spjd ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 2812219089Spjd ZFS_TIME_DECODE(&vap->va_mtime, mtime); 2813219089Spjd ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2814219089Spjd ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 2815168404Spjd 2816168404Spjd 2817219089Spjd sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 2818168404Spjd vap->va_blksize = blksize; 2819168404Spjd vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2820168404Spjd 2821168404Spjd if (zp->z_blksz == 0) { 2822168404Spjd /* 2823168404Spjd * Block size hasn't been set; suggest maximal I/O transfers. 2824168404Spjd */ 2825168404Spjd vap->va_blksize = zfsvfs->z_max_blksz; 2826168404Spjd } 2827168404Spjd 2828168404Spjd ZFS_EXIT(zfsvfs); 2829168404Spjd return (0); 2830168404Spjd} 2831168404Spjd 2832168404Spjd/* 2833168404Spjd * Set the file attributes to the values contained in the 2834168404Spjd * vattr structure. 2835168404Spjd * 2836168404Spjd * IN: vp - vnode of file to be modified. 2837168404Spjd * vap - new attribute values. 2838185029Spjd * If AT_XVATTR set, then optional attrs are being set 2839168404Spjd * flags - ATTR_UTIME set if non-default time values provided. 2840185029Spjd * - ATTR_NOACLCHECK (CIFS context only). 2841168404Spjd * cr - credentials of caller. 2842185029Spjd * ct - caller context 2843168404Spjd * 2844251631Sdelphij * RETURN: 0 on success, error code on failure. 2845168404Spjd * 2846168404Spjd * Timestamps: 2847168404Spjd * vp - ctime updated, mtime updated if size changed. 2848168404Spjd */ 2849168404Spjd/* ARGSUSED */ 2850168404Spjdstatic int 2851168962Spjdzfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2852251631Sdelphij caller_context_t *ct) 2853168404Spjd{ 2854185029Spjd znode_t *zp = VTOZ(vp); 2855168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2856185029Spjd zilog_t *zilog; 2857168404Spjd dmu_tx_t *tx; 2858168404Spjd vattr_t oldva; 2859209962Smm xvattr_t tmpxvattr; 2860168962Spjd uint_t mask = vap->va_mask; 2861247187Smm uint_t saved_mask = 0; 2862197831Spjd uint64_t saved_mode; 2863168404Spjd int trim_mask = 0; 2864168404Spjd uint64_t new_mode; 2865209962Smm uint64_t new_uid, new_gid; 2866219089Spjd uint64_t xattr_obj; 2867219089Spjd uint64_t mtime[2], ctime[2]; 2868168404Spjd znode_t *attrzp; 2869168404Spjd int need_policy = FALSE; 2870219089Spjd int err, err2; 2871185029Spjd zfs_fuid_info_t *fuidp = NULL; 2872185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2873185029Spjd xoptattr_t *xoap; 2874219089Spjd zfs_acl_t *aclp; 2875185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2876219089Spjd boolean_t fuid_dirtied = B_FALSE; 2877219089Spjd sa_bulk_attr_t bulk[7], xattr_bulk[7]; 2878219089Spjd int count = 0, xattr_count = 0; 2879168404Spjd 2880168404Spjd if (mask == 0) 2881168404Spjd return (0); 2882168404Spjd 2883168962Spjd if (mask & AT_NOSET) 2884249195Smm return (SET_ERROR(EINVAL)); 2885168962Spjd 2886185029Spjd ZFS_ENTER(zfsvfs); 2887185029Spjd ZFS_VERIFY_ZP(zp); 2888185029Spjd 2889185029Spjd zilog = zfsvfs->z_log; 2890185029Spjd 2891185029Spjd /* 2892185029Spjd * Make sure that if we have ephemeral uid/gid or xvattr specified 2893185029Spjd * that file system is at proper version level 2894185029Spjd */ 2895185029Spjd 2896185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2897185029Spjd (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2898185029Spjd ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 2899185029Spjd (mask & AT_XVATTR))) { 2900185029Spjd ZFS_EXIT(zfsvfs); 2901249195Smm return (SET_ERROR(EINVAL)); 2902185029Spjd } 2903185029Spjd 2904185029Spjd if (mask & AT_SIZE && vp->v_type == VDIR) { 2905185029Spjd ZFS_EXIT(zfsvfs); 2906249195Smm return (SET_ERROR(EISDIR)); 2907185029Spjd } 2908168404Spjd 2909185029Spjd if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 2910185029Spjd ZFS_EXIT(zfsvfs); 2911249195Smm return (SET_ERROR(EINVAL)); 2912185029Spjd } 2913168404Spjd 2914185029Spjd /* 2915185029Spjd * If this is an xvattr_t, then get a pointer to the structure of 2916185029Spjd * optional attributes. If this is NULL, then we have a vattr_t. 2917185029Spjd */ 2918185029Spjd xoap = xva_getxoptattr(xvap); 2919168404Spjd 2920209962Smm xva_init(&tmpxvattr); 2921209962Smm 2922185029Spjd /* 2923185029Spjd * Immutable files can only alter immutable bit and atime 2924185029Spjd */ 2925219089Spjd if ((zp->z_pflags & ZFS_IMMUTABLE) && 2926185029Spjd ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 2927185029Spjd ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 2928185029Spjd ZFS_EXIT(zfsvfs); 2929249195Smm return (SET_ERROR(EPERM)); 2930185029Spjd } 2931185029Spjd 2932321579Smav /* 2933321579Smav * Note: ZFS_READONLY is handled in zfs_zaccess_common. 2934321579Smav */ 2935185029Spjd 2936185029Spjd /* 2937185029Spjd * Verify timestamps doesn't overflow 32 bits. 2938185029Spjd * ZFS can handle large timestamps, but 32bit syscalls can't 2939185029Spjd * handle times greater than 2039. This check should be removed 2940185029Spjd * once large timestamps are fully supported. 2941185029Spjd */ 2942185029Spjd if (mask & (AT_ATIME | AT_MTIME)) { 2943185029Spjd if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 2944185029Spjd ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 2945185029Spjd ZFS_EXIT(zfsvfs); 2946249195Smm return (SET_ERROR(EOVERFLOW)); 2947185029Spjd } 2948185029Spjd } 2949316391Sasomers if (xoap && (mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME) && 2950316391Sasomers TIMESPEC_OVERFLOW(&vap->va_birthtime)) { 2951316391Sasomers ZFS_EXIT(zfsvfs); 2952316391Sasomers return (SET_ERROR(EOVERFLOW)); 2953316391Sasomers } 2954185029Spjd 2955168404Spjd attrzp = NULL; 2956219089Spjd aclp = NULL; 2957168404Spjd 2958211932Smm /* Can this be moved to before the top label? */ 2959168404Spjd if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2960168404Spjd ZFS_EXIT(zfsvfs); 2961249195Smm return (SET_ERROR(EROFS)); 2962168404Spjd } 2963168404Spjd 2964168404Spjd /* 2965168404Spjd * First validate permissions 2966168404Spjd */ 2967168404Spjd 2968168404Spjd if (mask & AT_SIZE) { 2969168404Spjd /* 2970168404Spjd * XXX - Note, we are not providing any open 2971168404Spjd * mode flags here (like FNDELAY), so we may 2972168404Spjd * block if there are locks present... this 2973168404Spjd * should be addressed in openat(). 2974168404Spjd */ 2975185029Spjd /* XXX - would it be OK to generate a log record here? */ 2976185029Spjd err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 2977168404Spjd if (err) { 2978168404Spjd ZFS_EXIT(zfsvfs); 2979168404Spjd return (err); 2980168404Spjd } 2981168404Spjd } 2982168404Spjd 2983185029Spjd if (mask & (AT_ATIME|AT_MTIME) || 2984185029Spjd ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2985185029Spjd XVA_ISSET_REQ(xvap, XAT_READONLY) || 2986185029Spjd XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2987219089Spjd XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 2988219089Spjd XVA_ISSET_REQ(xvap, XAT_SPARSE) || 2989185029Spjd XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 2990219089Spjd XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 2991185029Spjd need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2992185029Spjd skipaclchk, cr); 2993219089Spjd } 2994168404Spjd 2995168404Spjd if (mask & (AT_UID|AT_GID)) { 2996168404Spjd int idmask = (mask & (AT_UID|AT_GID)); 2997168404Spjd int take_owner; 2998168404Spjd int take_group; 2999168404Spjd 3000168404Spjd /* 3001168404Spjd * NOTE: even if a new mode is being set, 3002168404Spjd * we may clear S_ISUID/S_ISGID bits. 3003168404Spjd */ 3004168404Spjd 3005168404Spjd if (!(mask & AT_MODE)) 3006219089Spjd vap->va_mode = zp->z_mode; 3007168404Spjd 3008168404Spjd /* 3009168404Spjd * Take ownership or chgrp to group we are a member of 3010168404Spjd */ 3011168404Spjd 3012168404Spjd take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 3013185029Spjd take_group = (mask & AT_GID) && 3014185029Spjd zfs_groupmember(zfsvfs, vap->va_gid, cr); 3015168404Spjd 3016168404Spjd /* 3017168404Spjd * If both AT_UID and AT_GID are set then take_owner and 3018168404Spjd * take_group must both be set in order to allow taking 3019168404Spjd * ownership. 3020168404Spjd * 3021168404Spjd * Otherwise, send the check through secpolicy_vnode_setattr() 3022168404Spjd * 3023168404Spjd */ 3024168404Spjd 3025168404Spjd if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 3026168404Spjd ((idmask == AT_UID) && take_owner) || 3027168404Spjd ((idmask == AT_GID) && take_group)) { 3028185029Spjd if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 3029185029Spjd skipaclchk, cr) == 0) { 3030168404Spjd /* 3031168404Spjd * Remove setuid/setgid for non-privileged users 3032168404Spjd */ 3033185029Spjd secpolicy_setid_clear(vap, vp, cr); 3034168404Spjd trim_mask = (mask & (AT_UID|AT_GID)); 3035168404Spjd } else { 3036168404Spjd need_policy = TRUE; 3037168404Spjd } 3038168404Spjd } else { 3039168404Spjd need_policy = TRUE; 3040168404Spjd } 3041168404Spjd } 3042168404Spjd 3043219089Spjd oldva.va_mode = zp->z_mode; 3044185029Spjd zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 3045185029Spjd if (mask & AT_XVATTR) { 3046209962Smm /* 3047209962Smm * Update xvattr mask to include only those attributes 3048209962Smm * that are actually changing. 3049209962Smm * 3050209962Smm * the bits will be restored prior to actually setting 3051209962Smm * the attributes so the caller thinks they were set. 3052209962Smm */ 3053209962Smm if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3054209962Smm if (xoap->xoa_appendonly != 3055219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 3056209962Smm need_policy = TRUE; 3057209962Smm } else { 3058209962Smm XVA_CLR_REQ(xvap, XAT_APPENDONLY); 3059209962Smm XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 3060209962Smm } 3061209962Smm } 3062209962Smm 3063209962Smm if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3064209962Smm if (xoap->xoa_nounlink != 3065219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 3066209962Smm need_policy = TRUE; 3067209962Smm } else { 3068209962Smm XVA_CLR_REQ(xvap, XAT_NOUNLINK); 3069209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 3070209962Smm } 3071209962Smm } 3072209962Smm 3073209962Smm if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3074209962Smm if (xoap->xoa_immutable != 3075219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 3076209962Smm need_policy = TRUE; 3077209962Smm } else { 3078209962Smm XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 3079209962Smm XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 3080209962Smm } 3081209962Smm } 3082209962Smm 3083209962Smm if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3084209962Smm if (xoap->xoa_nodump != 3085219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0)) { 3086209962Smm need_policy = TRUE; 3087209962Smm } else { 3088209962Smm XVA_CLR_REQ(xvap, XAT_NODUMP); 3089209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 3090209962Smm } 3091209962Smm } 3092209962Smm 3093209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3094209962Smm if (xoap->xoa_av_modified != 3095219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 3096209962Smm need_policy = TRUE; 3097209962Smm } else { 3098209962Smm XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 3099209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 3100209962Smm } 3101209962Smm } 3102209962Smm 3103209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3104209962Smm if ((vp->v_type != VREG && 3105209962Smm xoap->xoa_av_quarantined) || 3106209962Smm xoap->xoa_av_quarantined != 3107219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 3108209962Smm need_policy = TRUE; 3109209962Smm } else { 3110209962Smm XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 3111209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 3112209962Smm } 3113209962Smm } 3114209962Smm 3115219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3116219089Spjd ZFS_EXIT(zfsvfs); 3117249195Smm return (SET_ERROR(EPERM)); 3118219089Spjd } 3119219089Spjd 3120209962Smm if (need_policy == FALSE && 3121209962Smm (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3122209962Smm XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3123185029Spjd need_policy = TRUE; 3124185029Spjd } 3125185029Spjd } 3126185029Spjd 3127168404Spjd if (mask & AT_MODE) { 3128185029Spjd if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3129168962Spjd err = secpolicy_setid_setsticky_clear(vp, vap, 3130168962Spjd &oldva, cr); 3131168962Spjd if (err) { 3132168962Spjd ZFS_EXIT(zfsvfs); 3133168962Spjd return (err); 3134168962Spjd } 3135168404Spjd trim_mask |= AT_MODE; 3136168404Spjd } else { 3137168404Spjd need_policy = TRUE; 3138168404Spjd } 3139168404Spjd } 3140168404Spjd 3141168404Spjd if (need_policy) { 3142168404Spjd /* 3143168404Spjd * If trim_mask is set then take ownership 3144168404Spjd * has been granted or write_acl is present and user 3145168404Spjd * has the ability to modify mode. In that case remove 3146168404Spjd * UID|GID and or MODE from mask so that 3147168404Spjd * secpolicy_vnode_setattr() doesn't revoke it. 3148168404Spjd */ 3149168404Spjd 3150168404Spjd if (trim_mask) { 3151168404Spjd saved_mask = vap->va_mask; 3152168404Spjd vap->va_mask &= ~trim_mask; 3153197831Spjd if (trim_mask & AT_MODE) { 3154197831Spjd /* 3155197831Spjd * Save the mode, as secpolicy_vnode_setattr() 3156197831Spjd * will overwrite it with ova.va_mode. 3157197831Spjd */ 3158197831Spjd saved_mode = vap->va_mode; 3159197831Spjd } 3160168404Spjd } 3161168404Spjd err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3162185029Spjd (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3163168404Spjd if (err) { 3164168404Spjd ZFS_EXIT(zfsvfs); 3165168404Spjd return (err); 3166168404Spjd } 3167168404Spjd 3168197831Spjd if (trim_mask) { 3169168404Spjd vap->va_mask |= saved_mask; 3170197831Spjd if (trim_mask & AT_MODE) { 3171197831Spjd /* 3172197831Spjd * Recover the mode after 3173197831Spjd * secpolicy_vnode_setattr(). 3174197831Spjd */ 3175197831Spjd vap->va_mode = saved_mode; 3176197831Spjd } 3177197831Spjd } 3178168404Spjd } 3179168404Spjd 3180168404Spjd /* 3181168404Spjd * secpolicy_vnode_setattr, or take ownership may have 3182168404Spjd * changed va_mask 3183168404Spjd */ 3184168404Spjd mask = vap->va_mask; 3185168404Spjd 3186219089Spjd if ((mask & (AT_UID | AT_GID))) { 3187219089Spjd err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 3188219089Spjd &xattr_obj, sizeof (xattr_obj)); 3189168404Spjd 3190219089Spjd if (err == 0 && xattr_obj) { 3191219089Spjd err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 3192306818Savg if (err == 0) { 3193306818Savg err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE); 3194306818Savg if (err != 0) 3195306818Savg vrele(ZTOV(attrzp)); 3196306818Savg } 3197209962Smm if (err) 3198219089Spjd goto out2; 3199168404Spjd } 3200209962Smm if (mask & AT_UID) { 3201209962Smm new_uid = zfs_fuid_create(zfsvfs, 3202209962Smm (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3203219089Spjd if (new_uid != zp->z_uid && 3204219089Spjd zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 3205219089Spjd if (attrzp) 3206306818Savg vput(ZTOV(attrzp)); 3207249195Smm err = SET_ERROR(EDQUOT); 3208219089Spjd goto out2; 3209209962Smm } 3210209962Smm } 3211209962Smm 3212209962Smm if (mask & AT_GID) { 3213209962Smm new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3214209962Smm cr, ZFS_GROUP, &fuidp); 3215219089Spjd if (new_gid != zp->z_gid && 3216219089Spjd zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 3217219089Spjd if (attrzp) 3218306818Savg vput(ZTOV(attrzp)); 3219249195Smm err = SET_ERROR(EDQUOT); 3220219089Spjd goto out2; 3221209962Smm } 3222209962Smm } 3223219089Spjd } 3224219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 3225219089Spjd 3226219089Spjd if (mask & AT_MODE) { 3227219089Spjd uint64_t pmode = zp->z_mode; 3228219089Spjd uint64_t acl_obj; 3229219089Spjd new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3230219089Spjd 3231243560Smm if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 3232243560Smm !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3233249195Smm err = SET_ERROR(EPERM); 3234243560Smm goto out; 3235243560Smm } 3236243560Smm 3237224174Smm if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3238224174Smm goto out; 3239219089Spjd 3240219089Spjd if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 3241219089Spjd /* 3242219089Spjd * Are we upgrading ACL from old V0 format 3243219089Spjd * to V1 format? 3244219089Spjd */ 3245219089Spjd if (zfsvfs->z_version >= ZPL_VERSION_FUID && 3246219089Spjd zfs_znode_acl_version(zp) == 3247219089Spjd ZFS_ACL_VERSION_INITIAL) { 3248219089Spjd dmu_tx_hold_free(tx, acl_obj, 0, 3249219089Spjd DMU_OBJECT_END); 3250219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3251219089Spjd 0, aclp->z_acl_bytes); 3252209962Smm } else { 3253219089Spjd dmu_tx_hold_write(tx, acl_obj, 0, 3254219089Spjd aclp->z_acl_bytes); 3255209962Smm } 3256219089Spjd } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3257219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3258219089Spjd 0, aclp->z_acl_bytes); 3259209962Smm } 3260219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3261219089Spjd } else { 3262219089Spjd if ((mask & AT_XVATTR) && 3263219089Spjd XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3264219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3265219089Spjd else 3266219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3267168404Spjd } 3268168404Spjd 3269219089Spjd if (attrzp) { 3270219089Spjd dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3271219089Spjd } 3272219089Spjd 3273219089Spjd fuid_dirtied = zfsvfs->z_fuid_dirty; 3274219089Spjd if (fuid_dirtied) 3275219089Spjd zfs_fuid_txhold(zfsvfs, tx); 3276219089Spjd 3277219089Spjd zfs_sa_upgrade_txholds(tx, zp); 3278219089Spjd 3279258720Savg err = dmu_tx_assign(tx, TXG_WAIT); 3280258720Savg if (err) 3281209962Smm goto out; 3282168404Spjd 3283219089Spjd count = 0; 3284168404Spjd /* 3285168404Spjd * Set each attribute requested. 3286168404Spjd * We group settings according to the locks they need to acquire. 3287168404Spjd * 3288168404Spjd * Note: you cannot set ctime directly, although it will be 3289168404Spjd * updated as a side-effect of calling this function. 3290168404Spjd */ 3291168404Spjd 3292219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3293219089Spjd mutex_enter(&zp->z_acl_lock); 3294168404Spjd 3295219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3296219089Spjd &zp->z_pflags, sizeof (zp->z_pflags)); 3297219089Spjd 3298219089Spjd if (attrzp) { 3299219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3300219089Spjd mutex_enter(&attrzp->z_acl_lock); 3301219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3302219089Spjd SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3303219089Spjd sizeof (attrzp->z_pflags)); 3304219089Spjd } 3305219089Spjd 3306219089Spjd if (mask & (AT_UID|AT_GID)) { 3307219089Spjd 3308219089Spjd if (mask & AT_UID) { 3309219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 3310219089Spjd &new_uid, sizeof (new_uid)); 3311219089Spjd zp->z_uid = new_uid; 3312219089Spjd if (attrzp) { 3313219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3314219089Spjd SA_ZPL_UID(zfsvfs), NULL, &new_uid, 3315219089Spjd sizeof (new_uid)); 3316219089Spjd attrzp->z_uid = new_uid; 3317219089Spjd } 3318219089Spjd } 3319219089Spjd 3320219089Spjd if (mask & AT_GID) { 3321219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 3322219089Spjd NULL, &new_gid, sizeof (new_gid)); 3323219089Spjd zp->z_gid = new_gid; 3324219089Spjd if (attrzp) { 3325219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3326219089Spjd SA_ZPL_GID(zfsvfs), NULL, &new_gid, 3327219089Spjd sizeof (new_gid)); 3328219089Spjd attrzp->z_gid = new_gid; 3329219089Spjd } 3330219089Spjd } 3331219089Spjd if (!(mask & AT_MODE)) { 3332219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 3333219089Spjd NULL, &new_mode, sizeof (new_mode)); 3334219089Spjd new_mode = zp->z_mode; 3335219089Spjd } 3336219089Spjd err = zfs_acl_chown_setattr(zp); 3337219089Spjd ASSERT(err == 0); 3338219089Spjd if (attrzp) { 3339219089Spjd err = zfs_acl_chown_setattr(attrzp); 3340219089Spjd ASSERT(err == 0); 3341219089Spjd } 3342219089Spjd } 3343219089Spjd 3344168404Spjd if (mask & AT_MODE) { 3345219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 3346219089Spjd &new_mode, sizeof (new_mode)); 3347219089Spjd zp->z_mode = new_mode; 3348219089Spjd ASSERT3U((uintptr_t)aclp, !=, 0); 3349209962Smm err = zfs_aclset_common(zp, aclp, cr, tx); 3350240415Smm ASSERT0(err); 3351219089Spjd if (zp->z_acl_cached) 3352219089Spjd zfs_acl_free(zp->z_acl_cached); 3353211932Smm zp->z_acl_cached = aclp; 3354211932Smm aclp = NULL; 3355168404Spjd } 3356168404Spjd 3357168404Spjd 3358219089Spjd if (mask & AT_ATIME) { 3359219089Spjd ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 3360219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 3361219089Spjd &zp->z_atime, sizeof (zp->z_atime)); 3362168404Spjd } 3363168404Spjd 3364219089Spjd if (mask & AT_MTIME) { 3365219089Spjd ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 3366219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 3367219089Spjd mtime, sizeof (mtime)); 3368168404Spjd } 3369168404Spjd 3370185029Spjd /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3371219089Spjd if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3372219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3373219089Spjd NULL, mtime, sizeof (mtime)); 3374219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3375219089Spjd &ctime, sizeof (ctime)); 3376219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 3377219089Spjd B_TRUE); 3378219089Spjd } else if (mask != 0) { 3379219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3380219089Spjd &ctime, sizeof (ctime)); 3381219089Spjd zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 3382219089Spjd B_TRUE); 3383219089Spjd if (attrzp) { 3384219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3385219089Spjd SA_ZPL_CTIME(zfsvfs), NULL, 3386219089Spjd &ctime, sizeof (ctime)); 3387219089Spjd zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 3388219089Spjd mtime, ctime, B_TRUE); 3389219089Spjd } 3390219089Spjd } 3391185029Spjd /* 3392185029Spjd * Do this after setting timestamps to prevent timestamp 3393185029Spjd * update from toggling bit 3394185029Spjd */ 3395168404Spjd 3396185029Spjd if (xoap && (mask & AT_XVATTR)) { 3397209962Smm 3398316391Sasomers if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) 3399316391Sasomers xoap->xoa_createtime = vap->va_birthtime; 3400209962Smm /* 3401209962Smm * restore trimmed off masks 3402209962Smm * so that return masks can be set for caller. 3403209962Smm */ 3404209962Smm 3405209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3406209962Smm XVA_SET_REQ(xvap, XAT_APPENDONLY); 3407209962Smm } 3408209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3409209962Smm XVA_SET_REQ(xvap, XAT_NOUNLINK); 3410209962Smm } 3411209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3412209962Smm XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3413209962Smm } 3414209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3415209962Smm XVA_SET_REQ(xvap, XAT_NODUMP); 3416209962Smm } 3417209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3418209962Smm XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3419209962Smm } 3420209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3421209962Smm XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3422209962Smm } 3423209962Smm 3424219089Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3425185029Spjd ASSERT(vp->v_type == VREG); 3426185029Spjd 3427219089Spjd zfs_xvattr_set(zp, xvap, tx); 3428185029Spjd } 3429185029Spjd 3430209962Smm if (fuid_dirtied) 3431209962Smm zfs_fuid_sync(zfsvfs, tx); 3432209962Smm 3433168404Spjd if (mask != 0) 3434185029Spjd zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3435168404Spjd 3436219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3437219089Spjd mutex_exit(&zp->z_acl_lock); 3438168404Spjd 3439219089Spjd if (attrzp) { 3440219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3441219089Spjd mutex_exit(&attrzp->z_acl_lock); 3442219089Spjd } 3443209962Smmout: 3444219089Spjd if (err == 0 && attrzp) { 3445219089Spjd err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 3446219089Spjd xattr_count, tx); 3447219089Spjd ASSERT(err2 == 0); 3448219089Spjd } 3449219089Spjd 3450168404Spjd if (attrzp) 3451306818Savg vput(ZTOV(attrzp)); 3452251631Sdelphij 3453211932Smm if (aclp) 3454209962Smm zfs_acl_free(aclp); 3455168404Spjd 3456209962Smm if (fuidp) { 3457209962Smm zfs_fuid_info_free(fuidp); 3458209962Smm fuidp = NULL; 3459209962Smm } 3460209962Smm 3461219089Spjd if (err) { 3462209962Smm dmu_tx_abort(tx); 3463219089Spjd } else { 3464219089Spjd err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 3465209962Smm dmu_tx_commit(tx); 3466219089Spjd } 3467209962Smm 3468219089Spjdout2: 3469219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3470219089Spjd zil_commit(zilog, 0); 3471209962Smm 3472168404Spjd ZFS_EXIT(zfsvfs); 3473168404Spjd return (err); 3474168404Spjd} 3475168404Spjd 3476168404Spjd/* 3477303970Savg * We acquire all but fdvp locks using non-blocking acquisitions. If we 3478303970Savg * fail to acquire any lock in the path we will drop all held locks, 3479303970Savg * acquire the new lock in a blocking fashion, and then release it and 3480303970Savg * restart the rename. This acquire/release step ensures that we do not 3481303970Savg * spin on a lock waiting for release. On error release all vnode locks 3482303970Savg * and decrement references the way tmpfs_rename() would do. 3483168404Spjd */ 3484303970Savgstatic int 3485303970Savgzfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, 3486303970Savg struct vnode *tdvp, struct vnode **tvpp, 3487303970Savg const struct componentname *scnp, const struct componentname *tcnp) 3488168404Spjd{ 3489303970Savg zfsvfs_t *zfsvfs; 3490303970Savg struct vnode *nvp, *svp, *tvp; 3491303970Savg znode_t *sdzp, *tdzp, *szp, *tzp; 3492303970Savg const char *snm = scnp->cn_nameptr; 3493303970Savg const char *tnm = tcnp->cn_nameptr; 3494303970Savg int error; 3495168404Spjd 3496303970Savg VOP_UNLOCK(tdvp, 0); 3497303970Savg if (*tvpp != NULL && *tvpp != tdvp) 3498303970Savg VOP_UNLOCK(*tvpp, 0); 3499303970Savg 3500303970Savgrelock: 3501303970Savg error = vn_lock(sdvp, LK_EXCLUSIVE); 3502303970Savg if (error) 3503303970Savg goto out; 3504303970Savg sdzp = VTOZ(sdvp); 3505303970Savg 3506303970Savg error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT); 3507303970Savg if (error != 0) { 3508303970Savg VOP_UNLOCK(sdvp, 0); 3509303970Savg if (error != EBUSY) 3510303970Savg goto out; 3511303970Savg error = vn_lock(tdvp, LK_EXCLUSIVE); 3512303970Savg if (error) 3513303970Savg goto out; 3514303970Savg VOP_UNLOCK(tdvp, 0); 3515303970Savg goto relock; 3516168404Spjd } 3517303970Savg tdzp = VTOZ(tdvp); 3518168404Spjd 3519303970Savg /* 3520303970Savg * Before using sdzp and tdzp we must ensure that they are live. 3521303970Savg * As a porting legacy from illumos we have two things to worry 3522303970Savg * about. One is typical for FreeBSD and it is that the vnode is 3523303970Savg * not reclaimed (doomed). The other is that the znode is live. 3524303970Savg * The current code can invalidate the znode without acquiring the 3525303970Savg * corresponding vnode lock if the object represented by the znode 3526303970Savg * and vnode is no longer valid after a rollback or receive operation. 3527303970Savg * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock 3528303970Savg * that protects the znodes from the invalidation. 3529303970Savg */ 3530303970Savg zfsvfs = sdzp->z_zfsvfs; 3531303970Savg ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs); 3532303970Savg ZFS_ENTER(zfsvfs); 3533168404Spjd 3534168404Spjd /* 3535303970Savg * We can not use ZFS_VERIFY_ZP() here because it could directly return 3536303970Savg * bypassing the cleanup code in the case of an error. 3537168404Spjd */ 3538303970Savg if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 3539303970Savg ZFS_EXIT(zfsvfs); 3540303970Savg VOP_UNLOCK(sdvp, 0); 3541303970Savg VOP_UNLOCK(tdvp, 0); 3542303970Savg error = SET_ERROR(EIO); 3543303970Savg goto out; 3544303970Savg } 3545303970Savg 3546303970Savg /* 3547303970Savg * Re-resolve svp to be certain it still exists and fetch the 3548303970Savg * correct vnode. 3549303970Savg */ 3550303970Savg error = zfs_dirent_lookup(sdzp, snm, &szp, ZEXISTS); 3551303970Savg if (error != 0) { 3552303970Savg /* Source entry invalid or not there. */ 3553303970Savg ZFS_EXIT(zfsvfs); 3554303970Savg VOP_UNLOCK(sdvp, 0); 3555303970Savg VOP_UNLOCK(tdvp, 0); 3556303970Savg if ((scnp->cn_flags & ISDOTDOT) != 0 || 3557303970Savg (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.')) 3558303970Savg error = SET_ERROR(EINVAL); 3559303970Savg goto out; 3560303970Savg } 3561303970Savg svp = ZTOV(szp); 3562303970Savg 3563303970Savg /* 3564303970Savg * Re-resolve tvp, if it disappeared we just carry on. 3565303970Savg */ 3566303970Savg error = zfs_dirent_lookup(tdzp, tnm, &tzp, 0); 3567303970Savg if (error != 0) { 3568303970Savg ZFS_EXIT(zfsvfs); 3569303970Savg VOP_UNLOCK(sdvp, 0); 3570303970Savg VOP_UNLOCK(tdvp, 0); 3571303970Savg vrele(svp); 3572303970Savg if ((tcnp->cn_flags & ISDOTDOT) != 0) 3573303970Savg error = SET_ERROR(EINVAL); 3574303970Savg goto out; 3575303970Savg } 3576303970Savg if (tzp != NULL) 3577303970Savg tvp = ZTOV(tzp); 3578303970Savg else 3579303970Savg tvp = NULL; 3580303970Savg 3581303970Savg /* 3582303970Savg * At present the vnode locks must be acquired before z_teardown_lock, 3583303970Savg * although it would be more logical to use the opposite order. 3584303970Savg */ 3585303970Savg ZFS_EXIT(zfsvfs); 3586303970Savg 3587303970Savg /* 3588303970Savg * Now try acquire locks on svp and tvp. 3589303970Savg */ 3590303970Savg nvp = svp; 3591303970Savg error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3592303970Savg if (error != 0) { 3593303970Savg VOP_UNLOCK(sdvp, 0); 3594303970Savg VOP_UNLOCK(tdvp, 0); 3595303970Savg if (tvp != NULL) 3596303970Savg vrele(tvp); 3597303970Savg if (error != EBUSY) { 3598303970Savg vrele(nvp); 3599303970Savg goto out; 3600303970Savg } 3601303970Savg error = vn_lock(nvp, LK_EXCLUSIVE); 3602303970Savg if (error != 0) { 3603303970Savg vrele(nvp); 3604303970Savg goto out; 3605303970Savg } 3606303970Savg VOP_UNLOCK(nvp, 0); 3607303970Savg /* 3608303970Savg * Concurrent rename race. 3609303970Savg * XXX ? 3610303970Savg */ 3611303970Savg if (nvp == tdvp) { 3612303970Savg vrele(nvp); 3613303970Savg error = SET_ERROR(EINVAL); 3614303970Savg goto out; 3615303970Savg } 3616303970Savg vrele(*svpp); 3617303970Savg *svpp = nvp; 3618303970Savg goto relock; 3619303970Savg } 3620303970Savg vrele(*svpp); 3621303970Savg *svpp = nvp; 3622303970Savg 3623303970Savg if (*tvpp != NULL) 3624303970Savg vrele(*tvpp); 3625303970Savg *tvpp = NULL; 3626303970Savg if (tvp != NULL) { 3627303970Savg nvp = tvp; 3628303970Savg error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3629303970Savg if (error != 0) { 3630303970Savg VOP_UNLOCK(sdvp, 0); 3631303970Savg VOP_UNLOCK(tdvp, 0); 3632303970Savg VOP_UNLOCK(*svpp, 0); 3633303970Savg if (error != EBUSY) { 3634303970Savg vrele(nvp); 3635303970Savg goto out; 3636168404Spjd } 3637303970Savg error = vn_lock(nvp, LK_EXCLUSIVE); 3638303970Savg if (error != 0) { 3639303970Savg vrele(nvp); 3640303970Savg goto out; 3641303970Savg } 3642303970Savg vput(nvp); 3643303970Savg goto relock; 3644168404Spjd } 3645303970Savg *tvpp = nvp; 3646303970Savg } 3647168404Spjd 3648303970Savg return (0); 3649168404Spjd 3650303970Savgout: 3651303970Savg return (error); 3652303970Savg} 3653168404Spjd 3654303970Savg/* 3655303970Savg * Note that we must use VRELE_ASYNC in this function as it walks 3656303970Savg * up the directory tree and vrele may need to acquire an exclusive 3657303970Savg * lock if a last reference to a vnode is dropped. 3658303970Savg */ 3659303970Savgstatic int 3660303970Savgzfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp) 3661303970Savg{ 3662303970Savg zfsvfs_t *zfsvfs; 3663303970Savg znode_t *zp, *zp1; 3664303970Savg uint64_t parent; 3665303970Savg int error; 3666168404Spjd 3667303970Savg zfsvfs = tdzp->z_zfsvfs; 3668303970Savg if (tdzp == szp) 3669303970Savg return (SET_ERROR(EINVAL)); 3670303970Savg if (tdzp == sdzp) 3671303970Savg return (0); 3672303970Savg if (tdzp->z_id == zfsvfs->z_root) 3673303970Savg return (0); 3674303970Savg zp = tdzp; 3675303970Savg for (;;) { 3676303970Savg ASSERT(!zp->z_unlinked); 3677303970Savg if ((error = sa_lookup(zp->z_sa_hdl, 3678303970Savg SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 3679303970Savg break; 3680303970Savg 3681303970Savg if (parent == szp->z_id) { 3682303970Savg error = SET_ERROR(EINVAL); 3683303970Savg break; 3684168404Spjd } 3685303970Savg if (parent == zfsvfs->z_root) 3686303970Savg break; 3687303970Savg if (parent == sdzp->z_id) 3688303970Savg break; 3689168404Spjd 3690303970Savg error = zfs_zget(zfsvfs, parent, &zp1); 3691303970Savg if (error != 0) 3692303970Savg break; 3693168404Spjd 3694303970Savg if (zp != tdzp) 3695303970Savg VN_RELE_ASYNC(ZTOV(zp), 3696303970Savg dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 3697303970Savg zp = zp1; 3698303970Savg } 3699303970Savg 3700303970Savg if (error == ENOTDIR) 3701303970Savg panic("checkpath: .. not a directory\n"); 3702303970Savg if (zp != tdzp) 3703303970Savg VN_RELE_ASYNC(ZTOV(zp), 3704303970Savg dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 3705303970Savg return (error); 3706168404Spjd} 3707168404Spjd 3708168404Spjd/* 3709168404Spjd * Move an entry from the provided source directory to the target 3710168404Spjd * directory. Change the entry name as indicated. 3711168404Spjd * 3712168404Spjd * IN: sdvp - Source directory containing the "old entry". 3713168404Spjd * snm - Old entry name. 3714168404Spjd * tdvp - Target directory to contain the "new entry". 3715168404Spjd * tnm - New entry name. 3716168404Spjd * cr - credentials of caller. 3717185029Spjd * ct - caller context 3718185029Spjd * flags - case flags 3719168404Spjd * 3720251631Sdelphij * RETURN: 0 on success, error code on failure. 3721168404Spjd * 3722168404Spjd * Timestamps: 3723168404Spjd * sdvp,tdvp - ctime|mtime updated 3724168404Spjd */ 3725185029Spjd/*ARGSUSED*/ 3726168404Spjdstatic int 3727303970Savgzfs_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 3728303970Savg vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 3729303970Savg cred_t *cr) 3730168404Spjd{ 3731303970Savg zfsvfs_t *zfsvfs; 3732303970Savg znode_t *sdzp, *tdzp, *szp, *tzp; 3733303970Savg zilog_t *zilog = NULL; 3734168404Spjd dmu_tx_t *tx; 3735303970Savg char *snm = scnp->cn_nameptr; 3736303970Savg char *tnm = tcnp->cn_nameptr; 3737185029Spjd int error = 0; 3738168404Spjd 3739303970Savg /* Reject renames across filesystems. */ 3740303970Savg if ((*svpp)->v_mount != tdvp->v_mount || 3741303970Savg ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) { 3742303970Savg error = SET_ERROR(EXDEV); 3743303970Savg goto out; 3744303970Savg } 3745168404Spjd 3746303970Savg if (zfsctl_is_node(tdvp)) { 3747303970Savg error = SET_ERROR(EXDEV); 3748303970Savg goto out; 3749303970Savg } 3750303970Savg 3751168962Spjd /* 3752303970Savg * Lock all four vnodes to ensure safety and semantics of renaming. 3753168962Spjd */ 3754303970Savg error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp); 3755303970Savg if (error != 0) { 3756303970Savg /* no vnodes are locked in the case of error here */ 3757303970Savg return (error); 3758264392Sdavide } 3759168962Spjd 3760303970Savg tdzp = VTOZ(tdvp); 3761303970Savg sdzp = VTOZ(sdvp); 3762303970Savg zfsvfs = tdzp->z_zfsvfs; 3763303970Savg zilog = zfsvfs->z_log; 3764303970Savg 3765254585Sdelphij /* 3766303970Savg * After we re-enter ZFS_ENTER() we will have to revalidate all 3767303970Savg * znodes involved. 3768254585Sdelphij */ 3769303970Savg ZFS_ENTER(zfsvfs); 3770168404Spjd 3771185029Spjd if (zfsvfs->z_utf8 && u8_validate(tnm, 3772185029Spjd strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3773303970Savg error = SET_ERROR(EILSEQ); 3774303970Savg goto unlockout; 3775185029Spjd } 3776185029Spjd 3777303970Savg /* If source and target are the same file, there is nothing to do. */ 3778303970Savg if ((*svpp) == (*tvpp)) { 3779303970Savg error = 0; 3780303970Savg goto unlockout; 3781303970Savg } 3782185029Spjd 3783303970Savg if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) || 3784303970Savg ((*tvpp) != NULL && (*tvpp)->v_type == VDIR && 3785303970Savg (*tvpp)->v_mountedhere != NULL)) { 3786303970Savg error = SET_ERROR(EXDEV); 3787303970Savg goto unlockout; 3788303970Savg } 3789168404Spjd 3790168404Spjd /* 3791303970Savg * We can not use ZFS_VERIFY_ZP() here because it could directly return 3792303970Savg * bypassing the cleanup code in the case of an error. 3793168404Spjd */ 3794303970Savg if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 3795303970Savg error = SET_ERROR(EIO); 3796303970Savg goto unlockout; 3797168404Spjd } 3798168404Spjd 3799303970Savg szp = VTOZ(*svpp); 3800303970Savg tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp); 3801303970Savg if (szp->z_sa_hdl == NULL || (tzp != NULL && tzp->z_sa_hdl == NULL)) { 3802303970Savg error = SET_ERROR(EIO); 3803303970Savg goto unlockout; 3804168962Spjd } 3805185029Spjd 3806208131Smm /* 3807303970Savg * This is to prevent the creation of links into attribute space 3808303970Savg * by renaming a linked file into/outof an attribute directory. 3809303970Savg * See the comment in zfs_link() for why this is considered bad. 3810208131Smm */ 3811303970Savg if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3812303970Savg error = SET_ERROR(EINVAL); 3813303970Savg goto unlockout; 3814208131Smm } 3815208131Smm 3816168404Spjd /* 3817168404Spjd * Must have write access at the source to remove the old entry 3818168404Spjd * and write access at the target to create the new entry. 3819168404Spjd * Note that if target and source are the same, this can be 3820168404Spjd * done in a single check. 3821168404Spjd */ 3822168404Spjd if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3823303970Savg goto unlockout; 3824168404Spjd 3825303970Savg if ((*svpp)->v_type == VDIR) { 3826168404Spjd /* 3827303970Savg * Avoid ".", "..", and aliases of "." for obvious reasons. 3828303970Savg */ 3829303970Savg if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') || 3830303970Savg sdzp == szp || 3831303970Savg (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { 3832303970Savg error = EINVAL; 3833303970Savg goto unlockout; 3834303970Savg } 3835303970Savg 3836303970Savg /* 3837168404Spjd * Check to make sure rename is valid. 3838168404Spjd * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3839168404Spjd */ 3840303970Savg if (error = zfs_rename_check(szp, sdzp, tdzp)) 3841303970Savg goto unlockout; 3842168404Spjd } 3843168404Spjd 3844168404Spjd /* 3845168404Spjd * Does target exist? 3846168404Spjd */ 3847168404Spjd if (tzp) { 3848168404Spjd /* 3849168404Spjd * Source and target must be the same type. 3850168404Spjd */ 3851303970Savg if ((*svpp)->v_type == VDIR) { 3852303970Savg if ((*tvpp)->v_type != VDIR) { 3853249195Smm error = SET_ERROR(ENOTDIR); 3854303970Savg goto unlockout; 3855303970Savg } else { 3856303970Savg cache_purge(tdvp); 3857303970Savg if (sdvp != tdvp) 3858303970Savg cache_purge(sdvp); 3859168404Spjd } 3860168404Spjd } else { 3861303970Savg if ((*tvpp)->v_type == VDIR) { 3862249195Smm error = SET_ERROR(EISDIR); 3863303970Savg goto unlockout; 3864168404Spjd } 3865168404Spjd } 3866168404Spjd } 3867168404Spjd 3868303970Savg vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct); 3869168962Spjd if (tzp) 3870303970Savg vnevent_rename_dest(*tvpp, tdvp, tnm, ct); 3871168962Spjd 3872185029Spjd /* 3873185029Spjd * notify the target directory if it is not the same 3874185029Spjd * as source directory. 3875185029Spjd */ 3876185029Spjd if (tdvp != sdvp) { 3877185029Spjd vnevent_rename_dest_dir(tdvp, ct); 3878185029Spjd } 3879185029Spjd 3880168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 3881219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3882219089Spjd dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3883168404Spjd dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3884168404Spjd dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3885219089Spjd if (sdzp != tdzp) { 3886219089Spjd dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 3887219089Spjd zfs_sa_upgrade_txholds(tx, tdzp); 3888219089Spjd } 3889219089Spjd if (tzp) { 3890219089Spjd dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 3891219089Spjd zfs_sa_upgrade_txholds(tx, tzp); 3892219089Spjd } 3893219089Spjd 3894219089Spjd zfs_sa_upgrade_txholds(tx, szp); 3895168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3896303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 3897168404Spjd if (error) { 3898168404Spjd dmu_tx_abort(tx); 3899303970Savg goto unlockout; 3900168404Spjd } 3901168404Spjd 3902303970Savg 3903168404Spjd if (tzp) /* Attempt to remove the existing target */ 3904303970Savg error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL); 3905168404Spjd 3906168404Spjd if (error == 0) { 3907303970Savg error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING); 3908168404Spjd if (error == 0) { 3909219089Spjd szp->z_pflags |= ZFS_AV_MODIFIED; 3910185029Spjd 3911219089Spjd error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 3912219089Spjd (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3913240415Smm ASSERT0(error); 3914219089Spjd 3915303970Savg error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING, 3916303970Savg NULL); 3917219089Spjd if (error == 0) { 3918303970Savg zfs_log_rename(zilog, tx, TX_RENAME, sdzp, 3919303970Savg snm, tdzp, tnm, szp); 3920185029Spjd 3921219089Spjd /* 3922219089Spjd * Update path information for the target vnode 3923219089Spjd */ 3924303970Savg vn_renamepath(tdvp, *svpp, tnm, strlen(tnm)); 3925219089Spjd } else { 3926219089Spjd /* 3927219089Spjd * At this point, we have successfully created 3928219089Spjd * the target name, but have failed to remove 3929219089Spjd * the source name. Since the create was done 3930219089Spjd * with the ZRENAMING flag, there are 3931219089Spjd * complications; for one, the link count is 3932219089Spjd * wrong. The easiest way to deal with this 3933219089Spjd * is to remove the newly created target, and 3934219089Spjd * return the original error. This must 3935219089Spjd * succeed; fortunately, it is very unlikely to 3936219089Spjd * fail, since we just created it. 3937219089Spjd */ 3938303970Savg VERIFY3U(zfs_link_destroy(tdzp, tnm, szp, tx, 3939219089Spjd ZRENAMING, NULL), ==, 0); 3940219089Spjd } 3941168404Spjd } 3942168404Spjd if (error == 0) { 3943303970Savg cache_purge(*svpp); 3944303970Savg if (*tvpp != NULL) 3945303970Savg cache_purge(*tvpp); 3946303970Savg cache_purge_negative(tdvp); 3947168404Spjd } 3948168404Spjd } 3949168404Spjd 3950168404Spjd dmu_tx_commit(tx); 3951168404Spjd 3952303970Savgunlockout: /* all 4 vnodes are locked, ZFS_ENTER called */ 3953303970Savg ZFS_EXIT(zfsvfs); 3954303970Savg VOP_UNLOCK(*svpp, 0); 3955303970Savg VOP_UNLOCK(sdvp, 0); 3956168404Spjd 3957303970Savgout: /* original two vnodes are locked */ 3958303970Savg if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3959219089Spjd zil_commit(zilog, 0); 3960219089Spjd 3961303970Savg if (*tvpp != NULL) 3962303970Savg VOP_UNLOCK(*tvpp, 0); 3963303970Savg if (tdvp != *tvpp) 3964303970Savg VOP_UNLOCK(tdvp, 0); 3965168404Spjd return (error); 3966168404Spjd} 3967168404Spjd 3968168404Spjd/* 3969168404Spjd * Insert the indicated symbolic reference entry into the directory. 3970168404Spjd * 3971168404Spjd * IN: dvp - Directory to contain new symbolic link. 3972168404Spjd * link - Name for new symlink entry. 3973168404Spjd * vap - Attributes of new entry. 3974168404Spjd * cr - credentials of caller. 3975185029Spjd * ct - caller context 3976185029Spjd * flags - case flags 3977168404Spjd * 3978251631Sdelphij * RETURN: 0 on success, error code on failure. 3979168404Spjd * 3980168404Spjd * Timestamps: 3981168404Spjd * dvp - ctime|mtime updated 3982168404Spjd */ 3983185029Spjd/*ARGSUSED*/ 3984168404Spjdstatic int 3985185029Spjdzfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 3986185029Spjd cred_t *cr, kthread_t *td) 3987168404Spjd{ 3988168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 3989168404Spjd dmu_tx_t *tx; 3990168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3991185029Spjd zilog_t *zilog; 3992219089Spjd uint64_t len = strlen(link); 3993168404Spjd int error; 3994209962Smm zfs_acl_ids_t acl_ids; 3995209962Smm boolean_t fuid_dirtied; 3996219089Spjd uint64_t txtype = TX_SYMLINK; 3997185029Spjd int flags = 0; 3998168404Spjd 3999168962Spjd ASSERT(vap->va_type == VLNK); 4000168404Spjd 4001168404Spjd ZFS_ENTER(zfsvfs); 4002185029Spjd ZFS_VERIFY_ZP(dzp); 4003185029Spjd zilog = zfsvfs->z_log; 4004185029Spjd 4005185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 4006185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4007185029Spjd ZFS_EXIT(zfsvfs); 4008249195Smm return (SET_ERROR(EILSEQ)); 4009185029Spjd } 4010168404Spjd 4011168404Spjd if (len > MAXPATHLEN) { 4012168404Spjd ZFS_EXIT(zfsvfs); 4013249195Smm return (SET_ERROR(ENAMETOOLONG)); 4014168404Spjd } 4015168404Spjd 4016219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, 4017219089Spjd vap, cr, NULL, &acl_ids)) != 0) { 4018219089Spjd ZFS_EXIT(zfsvfs); 4019219089Spjd return (error); 4020219089Spjd } 4021260704Savg 4022168404Spjd /* 4023168404Spjd * Attempt to lock directory; fail if entry already exists. 4024168404Spjd */ 4025303970Savg error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 4026185029Spjd if (error) { 4027219089Spjd zfs_acl_ids_free(&acl_ids); 4028168404Spjd ZFS_EXIT(zfsvfs); 4029168404Spjd return (error); 4030168404Spjd } 4031168404Spjd 4032219089Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4033219089Spjd zfs_acl_ids_free(&acl_ids); 4034219089Spjd ZFS_EXIT(zfsvfs); 4035219089Spjd return (error); 4036219089Spjd } 4037219089Spjd 4038209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 4039209962Smm zfs_acl_ids_free(&acl_ids); 4040209962Smm ZFS_EXIT(zfsvfs); 4041249195Smm return (SET_ERROR(EDQUOT)); 4042209962Smm } 4043303970Savg 4044303970Savg getnewvnode_reserve(1); 4045168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4046209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 4047168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 4048168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4049219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 4050219089Spjd ZFS_SA_BASE_ATTR_SIZE + len); 4051219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 4052219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 4053219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 4054219089Spjd acl_ids.z_aclp->z_acl_bytes); 4055219089Spjd } 4056209962Smm if (fuid_dirtied) 4057209962Smm zfs_fuid_txhold(zfsvfs, tx); 4058303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 4059168404Spjd if (error) { 4060219089Spjd zfs_acl_ids_free(&acl_ids); 4061168404Spjd dmu_tx_abort(tx); 4062260704Savg getnewvnode_drop_reserve(); 4063168404Spjd ZFS_EXIT(zfsvfs); 4064168404Spjd return (error); 4065168404Spjd } 4066168404Spjd 4067168404Spjd /* 4068168404Spjd * Create a new object for the symlink. 4069219089Spjd * for version 4 ZPL datsets the symlink will be an SA attribute 4070168404Spjd */ 4071219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 4072168404Spjd 4073219089Spjd if (fuid_dirtied) 4074219089Spjd zfs_fuid_sync(zfsvfs, tx); 4075209962Smm 4076219089Spjd if (zp->z_is_sa) 4077219089Spjd error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 4078219089Spjd link, len, tx); 4079219089Spjd else 4080219089Spjd zfs_sa_symlink(zp, link, len, tx); 4081168404Spjd 4082219089Spjd zp->z_size = len; 4083219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 4084219089Spjd &zp->z_size, sizeof (zp->z_size), tx); 4085168404Spjd /* 4086168404Spjd * Insert the new object into the directory. 4087168404Spjd */ 4088303970Savg (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 4089168404Spjd 4090219089Spjd zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 4091219089Spjd *vpp = ZTOV(zp); 4092219089Spjd 4093209962Smm zfs_acl_ids_free(&acl_ids); 4094209962Smm 4095168404Spjd dmu_tx_commit(tx); 4096168404Spjd 4097260704Savg getnewvnode_drop_reserve(); 4098260704Savg 4099219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4100219089Spjd zil_commit(zilog, 0); 4101219089Spjd 4102168404Spjd ZFS_EXIT(zfsvfs); 4103168404Spjd return (error); 4104168404Spjd} 4105168404Spjd 4106168404Spjd/* 4107168404Spjd * Return, in the buffer contained in the provided uio structure, 4108168404Spjd * the symbolic path referred to by vp. 4109168404Spjd * 4110168404Spjd * IN: vp - vnode of symbolic link. 4111251631Sdelphij * uio - structure to contain the link path. 4112168404Spjd * cr - credentials of caller. 4113185029Spjd * ct - caller context 4114168404Spjd * 4115251631Sdelphij * OUT: uio - structure containing the link path. 4116168404Spjd * 4117251631Sdelphij * RETURN: 0 on success, error code on failure. 4118168404Spjd * 4119168404Spjd * Timestamps: 4120168404Spjd * vp - atime updated 4121168404Spjd */ 4122168404Spjd/* ARGSUSED */ 4123168404Spjdstatic int 4124185029Spjdzfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 4125168404Spjd{ 4126168404Spjd znode_t *zp = VTOZ(vp); 4127168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4128168404Spjd int error; 4129168404Spjd 4130168404Spjd ZFS_ENTER(zfsvfs); 4131185029Spjd ZFS_VERIFY_ZP(zp); 4132168404Spjd 4133219089Spjd if (zp->z_is_sa) 4134219089Spjd error = sa_lookup_uio(zp->z_sa_hdl, 4135219089Spjd SA_ZPL_SYMLINK(zfsvfs), uio); 4136219089Spjd else 4137219089Spjd error = zfs_sa_readlink(zp, uio); 4138168404Spjd 4139168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4140219089Spjd 4141168404Spjd ZFS_EXIT(zfsvfs); 4142168404Spjd return (error); 4143168404Spjd} 4144168404Spjd 4145168404Spjd/* 4146168404Spjd * Insert a new entry into directory tdvp referencing svp. 4147168404Spjd * 4148168404Spjd * IN: tdvp - Directory to contain new entry. 4149168404Spjd * svp - vnode of new entry. 4150168404Spjd * name - name of new entry. 4151168404Spjd * cr - credentials of caller. 4152185029Spjd * ct - caller context 4153168404Spjd * 4154251631Sdelphij * RETURN: 0 on success, error code on failure. 4155168404Spjd * 4156168404Spjd * Timestamps: 4157168404Spjd * tdvp - ctime|mtime updated 4158168404Spjd * svp - ctime updated 4159168404Spjd */ 4160168404Spjd/* ARGSUSED */ 4161168404Spjdstatic int 4162185029Spjdzfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4163185029Spjd caller_context_t *ct, int flags) 4164168404Spjd{ 4165168404Spjd znode_t *dzp = VTOZ(tdvp); 4166168404Spjd znode_t *tzp, *szp; 4167168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4168185029Spjd zilog_t *zilog; 4169168404Spjd dmu_tx_t *tx; 4170168404Spjd int error; 4171212694Smm uint64_t parent; 4172185029Spjd uid_t owner; 4173168404Spjd 4174168404Spjd ASSERT(tdvp->v_type == VDIR); 4175168404Spjd 4176168404Spjd ZFS_ENTER(zfsvfs); 4177185029Spjd ZFS_VERIFY_ZP(dzp); 4178185029Spjd zilog = zfsvfs->z_log; 4179168404Spjd 4180212694Smm /* 4181212694Smm * POSIX dictates that we return EPERM here. 4182212694Smm * Better choices include ENOTSUP or EISDIR. 4183212694Smm */ 4184212694Smm if (svp->v_type == VDIR) { 4185168404Spjd ZFS_EXIT(zfsvfs); 4186249195Smm return (SET_ERROR(EPERM)); 4187212694Smm } 4188212694Smm 4189254585Sdelphij szp = VTOZ(svp); 4190254585Sdelphij ZFS_VERIFY_ZP(szp); 4191254585Sdelphij 4192258597Spjd if (szp->z_pflags & (ZFS_APPENDONLY | ZFS_IMMUTABLE | ZFS_READONLY)) { 4193258597Spjd ZFS_EXIT(zfsvfs); 4194258597Spjd return (SET_ERROR(EPERM)); 4195258597Spjd } 4196258597Spjd 4197212694Smm /* Prevent links to .zfs/shares files */ 4198212694Smm 4199219089Spjd if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4200219089Spjd &parent, sizeof (uint64_t))) != 0) { 4201212694Smm ZFS_EXIT(zfsvfs); 4202219089Spjd return (error); 4203219089Spjd } 4204219089Spjd if (parent == zfsvfs->z_shares_dir) { 4205219089Spjd ZFS_EXIT(zfsvfs); 4206249195Smm return (SET_ERROR(EPERM)); 4207212694Smm } 4208212694Smm 4209185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, 4210185029Spjd strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4211185029Spjd ZFS_EXIT(zfsvfs); 4212249195Smm return (SET_ERROR(EILSEQ)); 4213185029Spjd } 4214185029Spjd 4215168404Spjd /* 4216168404Spjd * We do not support links between attributes and non-attributes 4217168404Spjd * because of the potential security risk of creating links 4218168404Spjd * into "normal" file space in order to circumvent restrictions 4219168404Spjd * imposed in attribute space. 4220168404Spjd */ 4221219089Spjd if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4222168404Spjd ZFS_EXIT(zfsvfs); 4223249195Smm return (SET_ERROR(EINVAL)); 4224168404Spjd } 4225168404Spjd 4226168404Spjd 4227219089Spjd owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4228219089Spjd if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { 4229168404Spjd ZFS_EXIT(zfsvfs); 4230249195Smm return (SET_ERROR(EPERM)); 4231168404Spjd } 4232168404Spjd 4233185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4234168404Spjd ZFS_EXIT(zfsvfs); 4235168404Spjd return (error); 4236168404Spjd } 4237168404Spjd 4238168404Spjd /* 4239168404Spjd * Attempt to lock directory; fail if entry already exists. 4240168404Spjd */ 4241303970Savg error = zfs_dirent_lookup(dzp, name, &tzp, ZNEW); 4242185029Spjd if (error) { 4243168404Spjd ZFS_EXIT(zfsvfs); 4244168404Spjd return (error); 4245168404Spjd } 4246168404Spjd 4247168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4248219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4249168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4250219089Spjd zfs_sa_upgrade_txholds(tx, szp); 4251219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 4252303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 4253168404Spjd if (error) { 4254168404Spjd dmu_tx_abort(tx); 4255168404Spjd ZFS_EXIT(zfsvfs); 4256168404Spjd return (error); 4257168404Spjd } 4258168404Spjd 4259303970Savg error = zfs_link_create(dzp, name, szp, tx, 0); 4260168404Spjd 4261185029Spjd if (error == 0) { 4262185029Spjd uint64_t txtype = TX_LINK; 4263185029Spjd zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4264185029Spjd } 4265168404Spjd 4266168404Spjd dmu_tx_commit(tx); 4267168404Spjd 4268185029Spjd if (error == 0) { 4269185029Spjd vnevent_link(svp, ct); 4270185029Spjd } 4271185029Spjd 4272219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4273219089Spjd zil_commit(zilog, 0); 4274219089Spjd 4275168404Spjd ZFS_EXIT(zfsvfs); 4276168404Spjd return (error); 4277168404Spjd} 4278168404Spjd 4279219089Spjd 4280185029Spjd/*ARGSUSED*/ 4281168962Spjdvoid 4282185029Spjdzfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4283168404Spjd{ 4284168962Spjd znode_t *zp = VTOZ(vp); 4285168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4286168962Spjd int error; 4287168404Spjd 4288185029Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4289219089Spjd if (zp->z_sa_hdl == NULL) { 4290185029Spjd /* 4291185029Spjd * The fs has been unmounted, or we did a 4292185029Spjd * suspend/resume and this file no longer exists. 4293185029Spjd */ 4294243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 4295234607Strasz vrecycle(vp); 4296243520Savg return; 4297243520Savg } 4298243520Savg 4299243520Savg if (zp->z_unlinked) { 4300243520Savg /* 4301243520Savg * Fast path to recycle a vnode of a removed file. 4302243520Savg */ 4303185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4304243520Savg vrecycle(vp); 4305168962Spjd return; 4306168404Spjd } 4307168404Spjd 4308168404Spjd if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4309168404Spjd dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4310168404Spjd 4311219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4312219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4313168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 4314168404Spjd if (error) { 4315168404Spjd dmu_tx_abort(tx); 4316168404Spjd } else { 4317219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 4318219089Spjd (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4319168404Spjd zp->z_atime_dirty = 0; 4320168404Spjd dmu_tx_commit(tx); 4321168404Spjd } 4322168404Spjd } 4323185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4324168404Spjd} 4325168404Spjd 4326219089Spjd 4327168404SpjdCTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 4328168404SpjdCTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 4329168404Spjd 4330185029Spjd/*ARGSUSED*/ 4331168404Spjdstatic int 4332185029Spjdzfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4333168404Spjd{ 4334168404Spjd znode_t *zp = VTOZ(vp); 4335168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4336185029Spjd uint32_t gen; 4337219089Spjd uint64_t gen64; 4338168404Spjd uint64_t object = zp->z_id; 4339168404Spjd zfid_short_t *zfid; 4340219089Spjd int size, i, error; 4341168404Spjd 4342168404Spjd ZFS_ENTER(zfsvfs); 4343185029Spjd ZFS_VERIFY_ZP(zp); 4344168404Spjd 4345219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 4346219089Spjd &gen64, sizeof (uint64_t))) != 0) { 4347219089Spjd ZFS_EXIT(zfsvfs); 4348219089Spjd return (error); 4349219089Spjd } 4350219089Spjd 4351219089Spjd gen = (uint32_t)gen64; 4352219089Spjd 4353168404Spjd size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4354249195Smm 4355249195Smm#ifdef illumos 4356249195Smm if (fidp->fid_len < size) { 4357249195Smm fidp->fid_len = size; 4358249195Smm ZFS_EXIT(zfsvfs); 4359249195Smm return (SET_ERROR(ENOSPC)); 4360249195Smm } 4361249195Smm#else 4362168404Spjd fidp->fid_len = size; 4363249195Smm#endif 4364168404Spjd 4365168404Spjd zfid = (zfid_short_t *)fidp; 4366168404Spjd 4367168404Spjd zfid->zf_len = size; 4368168404Spjd 4369168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 4370168404Spjd zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4371168404Spjd 4372168404Spjd /* Must have a non-zero generation number to distinguish from .zfs */ 4373168404Spjd if (gen == 0) 4374168404Spjd gen = 1; 4375168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 4376168404Spjd zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4377168404Spjd 4378168404Spjd if (size == LONG_FID_LEN) { 4379168404Spjd uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4380169023Spjd zfid_long_t *zlfid; 4381168404Spjd 4382168404Spjd zlfid = (zfid_long_t *)fidp; 4383168404Spjd 4384168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4385168404Spjd zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4386168404Spjd 4387168404Spjd /* XXX - this should be the generation number for the objset */ 4388168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4389168404Spjd zlfid->zf_setgen[i] = 0; 4390168404Spjd } 4391168404Spjd 4392168404Spjd ZFS_EXIT(zfsvfs); 4393168404Spjd return (0); 4394168404Spjd} 4395168404Spjd 4396168404Spjdstatic int 4397185029Spjdzfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4398185029Spjd caller_context_t *ct) 4399168404Spjd{ 4400168404Spjd znode_t *zp, *xzp; 4401168404Spjd zfsvfs_t *zfsvfs; 4402168404Spjd int error; 4403168404Spjd 4404168404Spjd switch (cmd) { 4405168404Spjd case _PC_LINK_MAX: 4406168404Spjd *valp = INT_MAX; 4407168404Spjd return (0); 4408168404Spjd 4409168404Spjd case _PC_FILESIZEBITS: 4410168404Spjd *valp = 64; 4411168404Spjd return (0); 4412277300Ssmh#ifdef illumos 4413168404Spjd case _PC_XATTR_EXISTS: 4414168404Spjd zp = VTOZ(vp); 4415168404Spjd zfsvfs = zp->z_zfsvfs; 4416168404Spjd ZFS_ENTER(zfsvfs); 4417185029Spjd ZFS_VERIFY_ZP(zp); 4418168404Spjd *valp = 0; 4419303970Savg error = zfs_dirent_lookup(zp, "", &xzp, 4420303970Savg ZXATTR | ZEXISTS | ZSHARED); 4421168404Spjd if (error == 0) { 4422168404Spjd if (!zfs_dirempty(xzp)) 4423168404Spjd *valp = 1; 4424303970Savg vrele(ZTOV(xzp)); 4425168404Spjd } else if (error == ENOENT) { 4426168404Spjd /* 4427168404Spjd * If there aren't extended attributes, it's the 4428168404Spjd * same as having zero of them. 4429168404Spjd */ 4430168404Spjd error = 0; 4431168404Spjd } 4432168404Spjd ZFS_EXIT(zfsvfs); 4433168404Spjd return (error); 4434168404Spjd 4435219089Spjd case _PC_SATTR_ENABLED: 4436219089Spjd case _PC_SATTR_EXISTS: 4437219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 4438219089Spjd (vp->v_type == VREG || vp->v_type == VDIR); 4439219089Spjd return (0); 4440219089Spjd 4441219089Spjd case _PC_ACCESS_FILTERING: 4442219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 4443219089Spjd vp->v_type == VDIR; 4444219089Spjd return (0); 4445219089Spjd 4446219089Spjd case _PC_ACL_ENABLED: 4447219089Spjd *valp = _ACL_ACE_ENABLED; 4448219089Spjd return (0); 4449277300Ssmh#endif /* illumos */ 4450219089Spjd case _PC_MIN_HOLE_SIZE: 4451219089Spjd *valp = (int)SPA_MINBLOCKSIZE; 4452219089Spjd return (0); 4453277300Ssmh#ifdef illumos 4454219089Spjd case _PC_TIMESTAMP_RESOLUTION: 4455219089Spjd /* nanosecond timestamp resolution */ 4456219089Spjd *valp = 1L; 4457219089Spjd return (0); 4458277300Ssmh#endif 4459168404Spjd case _PC_ACL_EXTENDED: 4460196949Strasz *valp = 0; 4461168404Spjd return (0); 4462168404Spjd 4463196949Strasz case _PC_ACL_NFS4: 4464196949Strasz *valp = 1; 4465196949Strasz return (0); 4466196949Strasz 4467196949Strasz case _PC_ACL_PATH_MAX: 4468196949Strasz *valp = ACL_MAX_ENTRIES; 4469196949Strasz return (0); 4470196949Strasz 4471168404Spjd default: 4472168962Spjd return (EOPNOTSUPP); 4473168404Spjd } 4474168404Spjd} 4475168404Spjd 4476168404Spjd/*ARGSUSED*/ 4477168404Spjdstatic int 4478185029Spjdzfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4479185029Spjd caller_context_t *ct) 4480168404Spjd{ 4481168404Spjd znode_t *zp = VTOZ(vp); 4482168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4483168404Spjd int error; 4484185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4485168404Spjd 4486168404Spjd ZFS_ENTER(zfsvfs); 4487185029Spjd ZFS_VERIFY_ZP(zp); 4488185029Spjd error = zfs_getacl(zp, vsecp, skipaclchk, cr); 4489168404Spjd ZFS_EXIT(zfsvfs); 4490168404Spjd 4491168404Spjd return (error); 4492168404Spjd} 4493168404Spjd 4494168404Spjd/*ARGSUSED*/ 4495228685Spjdint 4496185029Spjdzfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4497185029Spjd caller_context_t *ct) 4498168404Spjd{ 4499168404Spjd znode_t *zp = VTOZ(vp); 4500168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4501168404Spjd int error; 4502185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4503219089Spjd zilog_t *zilog = zfsvfs->z_log; 4504168404Spjd 4505168404Spjd ZFS_ENTER(zfsvfs); 4506185029Spjd ZFS_VERIFY_ZP(zp); 4507219089Spjd 4508185029Spjd error = zfs_setacl(zp, vsecp, skipaclchk, cr); 4509219089Spjd 4510219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4511219089Spjd zil_commit(zilog, 0); 4512219089Spjd 4513168404Spjd ZFS_EXIT(zfsvfs); 4514168404Spjd return (error); 4515168404Spjd} 4516168404Spjd 4517168962Spjdstatic int 4518292373Sglebiuszfs_getpages(struct vnode *vp, vm_page_t *m, int count, int *rbehind, 4519292373Sglebius int *rahead) 4520213937Savg{ 4521213937Savg znode_t *zp = VTOZ(vp); 4522213937Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4523213937Savg objset_t *os = zp->z_zfsvfs->z_os; 4524292373Sglebius vm_page_t mlast; 4525213937Savg vm_object_t object; 4526213937Savg caddr_t va; 4527213937Savg struct sf_buf *sf; 4528243517Savg off_t startoff, endoff; 4529213937Savg int i, error; 4530243517Savg vm_pindex_t reqstart, reqend; 4531297473Sglebius int lsize, size; 4532213937Savg 4533292386Sglebius object = m[0]->object; 4534292386Sglebius error = 0; 4535292373Sglebius 4536213937Savg ZFS_ENTER(zfsvfs); 4537213937Savg ZFS_VERIFY_ZP(zp); 4538213937Savg 4539248084Sattilio zfs_vmobject_wlock(object); 4540292386Sglebius if (m[count - 1]->valid != 0 && --count == 0) { 4541248084Sattilio zfs_vmobject_wunlock(object); 4542292386Sglebius goto out; 4543213937Savg } 4544213937Savg 4545292386Sglebius mlast = m[count - 1]; 4546213937Savg 4547292373Sglebius if (IDX_TO_OFF(mlast->pindex) >= 4548292373Sglebius object->un_pager.vnp.vnp_size) { 4549248084Sattilio zfs_vmobject_wunlock(object); 4550213937Savg ZFS_EXIT(zfsvfs); 4551248084Sattilio return (zfs_vm_pagerret_bad); 4552213937Savg } 4553213937Savg 4554292373Sglebius PCPU_INC(cnt.v_vnodein); 4555297473Sglebius PCPU_ADD(cnt.v_vnodepgsin, count); 4556292373Sglebius 4557243517Savg lsize = PAGE_SIZE; 4558243517Savg if (IDX_TO_OFF(mlast->pindex) + lsize > object->un_pager.vnp.vnp_size) 4559292373Sglebius lsize = object->un_pager.vnp.vnp_size - 4560292373Sglebius IDX_TO_OFF(mlast->pindex); 4561248084Sattilio zfs_vmobject_wunlock(object); 4562243517Savg 4563292386Sglebius for (i = 0; i < count; i++) { 4564243517Savg size = PAGE_SIZE; 4565292386Sglebius if (i == count - 1) 4566243517Savg size = lsize; 4567243517Savg va = zfs_map_page(m[i], &sf); 4568243517Savg error = dmu_read(os, zp->z_id, IDX_TO_OFF(m[i]->pindex), 4569243517Savg size, va, DMU_READ_PREFETCH); 4570243517Savg if (size != PAGE_SIZE) 4571243517Savg bzero(va + size, PAGE_SIZE - size); 4572243517Savg zfs_unmap_page(sf); 4573243517Savg if (error != 0) 4574292373Sglebius goto out; 4575243517Savg } 4576243517Savg 4577248084Sattilio zfs_vmobject_wlock(object); 4578292386Sglebius for (i = 0; i < count; i++) 4579292373Sglebius m[i]->valid = VM_PAGE_BITS_ALL; 4580248084Sattilio zfs_vmobject_wunlock(object); 4581213937Savg 4582292373Sglebiusout: 4583213937Savg ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4584213937Savg ZFS_EXIT(zfsvfs); 4585292386Sglebius if (error == 0) { 4586292386Sglebius if (rbehind) 4587292386Sglebius *rbehind = 0; 4588292386Sglebius if (rahead) 4589292386Sglebius *rahead = 0; 4590292386Sglebius return (zfs_vm_pagerret_ok); 4591292386Sglebius } else 4592292386Sglebius return (zfs_vm_pagerret_error); 4593213937Savg} 4594213937Savg 4595213937Savgstatic int 4596213937Savgzfs_freebsd_getpages(ap) 4597213937Savg struct vop_getpages_args /* { 4598213937Savg struct vnode *a_vp; 4599213937Savg vm_page_t *a_m; 4600213937Savg int a_count; 4601292373Sglebius int *a_rbehind; 4602292373Sglebius int *a_rahead; 4603213937Savg } */ *ap; 4604213937Savg{ 4605213937Savg 4606292373Sglebius return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, 4607292373Sglebius ap->a_rahead)); 4608213937Savg} 4609213937Savg 4610213937Savgstatic int 4611258746Savgzfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, 4612258746Savg int *rtvals) 4613258746Savg{ 4614258746Savg znode_t *zp = VTOZ(vp); 4615258746Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4616258746Savg rl_t *rl; 4617258746Savg dmu_tx_t *tx; 4618258746Savg struct sf_buf *sf; 4619258746Savg vm_object_t object; 4620258746Savg vm_page_t m; 4621258746Savg caddr_t va; 4622258746Savg size_t tocopy; 4623258746Savg size_t lo_len; 4624258746Savg vm_ooffset_t lo_off; 4625258746Savg vm_ooffset_t off; 4626258746Savg uint_t blksz; 4627258746Savg int ncount; 4628258746Savg int pcount; 4629258746Savg int err; 4630258746Savg int i; 4631258746Savg 4632258746Savg ZFS_ENTER(zfsvfs); 4633258746Savg ZFS_VERIFY_ZP(zp); 4634258746Savg 4635258746Savg object = vp->v_object; 4636258746Savg pcount = btoc(len); 4637258746Savg ncount = pcount; 4638258746Savg 4639258746Savg KASSERT(ma[0]->object == object, ("mismatching object")); 4640258746Savg KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length")); 4641258746Savg 4642258746Savg for (i = 0; i < pcount; i++) 4643258746Savg rtvals[i] = zfs_vm_pagerret_error; 4644258746Savg 4645258746Savg off = IDX_TO_OFF(ma[0]->pindex); 4646258746Savg blksz = zp->z_blksz; 4647258746Savg lo_off = rounddown(off, blksz); 4648258746Savg lo_len = roundup(len + (off - lo_off), blksz); 4649258746Savg rl = zfs_range_lock(zp, lo_off, lo_len, RL_WRITER); 4650258746Savg 4651258746Savg zfs_vmobject_wlock(object); 4652258746Savg if (len + off > object->un_pager.vnp.vnp_size) { 4653258746Savg if (object->un_pager.vnp.vnp_size > off) { 4654258746Savg int pgoff; 4655258746Savg 4656258746Savg len = object->un_pager.vnp.vnp_size - off; 4657258746Savg ncount = btoc(len); 4658258746Savg if ((pgoff = (int)len & PAGE_MASK) != 0) { 4659258746Savg /* 4660258746Savg * If the object is locked and the following 4661258746Savg * conditions hold, then the page's dirty 4662258746Savg * field cannot be concurrently changed by a 4663258746Savg * pmap operation. 4664258746Savg */ 4665258746Savg m = ma[ncount - 1]; 4666258746Savg vm_page_assert_sbusied(m); 4667258746Savg KASSERT(!pmap_page_is_write_mapped(m), 4668258746Savg ("zfs_putpages: page %p is not read-only", m)); 4669258746Savg vm_page_clear_dirty(m, pgoff, PAGE_SIZE - 4670258746Savg pgoff); 4671258746Savg } 4672258746Savg } else { 4673258746Savg len = 0; 4674258746Savg ncount = 0; 4675258746Savg } 4676258746Savg if (ncount < pcount) { 4677258746Savg for (i = ncount; i < pcount; i++) { 4678258746Savg rtvals[i] = zfs_vm_pagerret_bad; 4679258746Savg } 4680258746Savg } 4681258746Savg } 4682258746Savg zfs_vmobject_wunlock(object); 4683258746Savg 4684258746Savg if (ncount == 0) 4685258746Savg goto out; 4686258746Savg 4687258746Savg if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 4688258746Savg zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 4689258746Savg goto out; 4690258746Savg } 4691258746Savg 4692258746Savg tx = dmu_tx_create(zfsvfs->z_os); 4693258746Savg dmu_tx_hold_write(tx, zp->z_id, off, len); 4694258746Savg 4695258746Savg dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4696258746Savg zfs_sa_upgrade_txholds(tx, zp); 4697316847Savg err = dmu_tx_assign(tx, TXG_WAIT); 4698258746Savg if (err != 0) { 4699258746Savg dmu_tx_abort(tx); 4700258746Savg goto out; 4701258746Savg } 4702258746Savg 4703258746Savg if (zp->z_blksz < PAGE_SIZE) { 4704258746Savg i = 0; 4705258746Savg for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) { 4706258746Savg tocopy = len > PAGE_SIZE ? PAGE_SIZE : len; 4707258746Savg va = zfs_map_page(ma[i], &sf); 4708258746Savg dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx); 4709258746Savg zfs_unmap_page(sf); 4710258746Savg } 4711258746Savg } else { 4712258746Savg err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx); 4713258746Savg } 4714258746Savg 4715258746Savg if (err == 0) { 4716258746Savg uint64_t mtime[2], ctime[2]; 4717258746Savg sa_bulk_attr_t bulk[3]; 4718258746Savg int count = 0; 4719258746Savg 4720258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 4721258746Savg &mtime, 16); 4722258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 4723258746Savg &ctime, 16); 4724258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4725258746Savg &zp->z_pflags, 8); 4726258746Savg zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 4727258746Savg B_TRUE); 4728321561Smav err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 4729321561Smav ASSERT0(err); 4730258746Savg zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 4731258746Savg 4732258746Savg zfs_vmobject_wlock(object); 4733258746Savg for (i = 0; i < ncount; i++) { 4734258746Savg rtvals[i] = zfs_vm_pagerret_ok; 4735258746Savg vm_page_undirty(ma[i]); 4736258746Savg } 4737258746Savg zfs_vmobject_wunlock(object); 4738258746Savg PCPU_INC(cnt.v_vnodeout); 4739258746Savg PCPU_ADD(cnt.v_vnodepgsout, ncount); 4740258746Savg } 4741258746Savg dmu_tx_commit(tx); 4742258746Savg 4743258746Savgout: 4744258746Savg zfs_range_unlock(rl); 4745258746Savg if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 || 4746258746Savg zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4747258746Savg zil_commit(zfsvfs->z_log, zp->z_id); 4748258746Savg ZFS_EXIT(zfsvfs); 4749258746Savg return (rtvals[0]); 4750258746Savg} 4751258746Savg 4752258746Savgint 4753258746Savgzfs_freebsd_putpages(ap) 4754258746Savg struct vop_putpages_args /* { 4755258746Savg struct vnode *a_vp; 4756258746Savg vm_page_t *a_m; 4757258746Savg int a_count; 4758258746Savg int a_sync; 4759258746Savg int *a_rtvals; 4760258746Savg } */ *ap; 4761258746Savg{ 4762258746Savg 4763258746Savg return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, 4764258746Savg ap->a_rtvals)); 4765258746Savg} 4766258746Savg 4767258746Savgstatic int 4768243518Savgzfs_freebsd_bmap(ap) 4769243518Savg struct vop_bmap_args /* { 4770243518Savg struct vnode *a_vp; 4771243518Savg daddr_t a_bn; 4772243518Savg struct bufobj **a_bop; 4773243518Savg daddr_t *a_bnp; 4774243518Savg int *a_runp; 4775243518Savg int *a_runb; 4776243518Savg } */ *ap; 4777243518Savg{ 4778243518Savg 4779243518Savg if (ap->a_bop != NULL) 4780243518Savg *ap->a_bop = &ap->a_vp->v_bufobj; 4781243518Savg if (ap->a_bnp != NULL) 4782243518Savg *ap->a_bnp = ap->a_bn; 4783243518Savg if (ap->a_runp != NULL) 4784243518Savg *ap->a_runp = 0; 4785243518Savg if (ap->a_runb != NULL) 4786243518Savg *ap->a_runb = 0; 4787243518Savg 4788243518Savg return (0); 4789243518Savg} 4790243518Savg 4791243518Savgstatic int 4792168962Spjdzfs_freebsd_open(ap) 4793168962Spjd struct vop_open_args /* { 4794168962Spjd struct vnode *a_vp; 4795168962Spjd int a_mode; 4796168962Spjd struct ucred *a_cred; 4797168962Spjd struct thread *a_td; 4798168962Spjd } */ *ap; 4799168962Spjd{ 4800168962Spjd vnode_t *vp = ap->a_vp; 4801168962Spjd znode_t *zp = VTOZ(vp); 4802168962Spjd int error; 4803168962Spjd 4804185029Spjd error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); 4805168962Spjd if (error == 0) 4806219089Spjd vnode_create_vobject(vp, zp->z_size, ap->a_td); 4807168962Spjd return (error); 4808168962Spjd} 4809168962Spjd 4810168962Spjdstatic int 4811168962Spjdzfs_freebsd_close(ap) 4812168962Spjd struct vop_close_args /* { 4813168962Spjd struct vnode *a_vp; 4814168962Spjd int a_fflag; 4815168962Spjd struct ucred *a_cred; 4816168962Spjd struct thread *a_td; 4817168962Spjd } */ *ap; 4818168962Spjd{ 4819168962Spjd 4820242566Savg return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred, NULL)); 4821168962Spjd} 4822168962Spjd 4823168962Spjdstatic int 4824168962Spjdzfs_freebsd_ioctl(ap) 4825168962Spjd struct vop_ioctl_args /* { 4826168962Spjd struct vnode *a_vp; 4827168962Spjd u_long a_command; 4828168962Spjd caddr_t a_data; 4829168962Spjd int a_fflag; 4830168962Spjd struct ucred *cred; 4831168962Spjd struct thread *td; 4832168962Spjd } */ *ap; 4833168962Spjd{ 4834168962Spjd 4835168978Spjd return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 4836185029Spjd ap->a_fflag, ap->a_cred, NULL, NULL)); 4837168962Spjd} 4838168962Spjd 4839168962Spjdstatic int 4840330062Savgioflags(int ioflags) 4841330062Savg{ 4842330062Savg int flags = 0; 4843330062Savg 4844330062Savg if (ioflags & IO_APPEND) 4845330062Savg flags |= FAPPEND; 4846330062Savg if (ioflags & IO_NDELAY) 4847330062Savg flags |= FNONBLOCK; 4848330062Savg if (ioflags & IO_SYNC) 4849330062Savg flags |= (FSYNC | FDSYNC | FRSYNC); 4850330062Savg 4851330062Savg return (flags); 4852330062Savg} 4853330062Savg 4854330062Savgstatic int 4855168962Spjdzfs_freebsd_read(ap) 4856168962Spjd struct vop_read_args /* { 4857168962Spjd struct vnode *a_vp; 4858168962Spjd struct uio *a_uio; 4859168962Spjd int a_ioflag; 4860168962Spjd struct ucred *a_cred; 4861168962Spjd } */ *ap; 4862168962Spjd{ 4863168962Spjd 4864213673Spjd return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 4865213673Spjd ap->a_cred, NULL)); 4866168962Spjd} 4867168962Spjd 4868168962Spjdstatic int 4869168962Spjdzfs_freebsd_write(ap) 4870168962Spjd struct vop_write_args /* { 4871168962Spjd struct vnode *a_vp; 4872168962Spjd struct uio *a_uio; 4873168962Spjd int a_ioflag; 4874168962Spjd struct ucred *a_cred; 4875168962Spjd } */ *ap; 4876168962Spjd{ 4877168962Spjd 4878213673Spjd return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 4879213673Spjd ap->a_cred, NULL)); 4880168962Spjd} 4881168962Spjd 4882168962Spjdstatic int 4883168962Spjdzfs_freebsd_access(ap) 4884168962Spjd struct vop_access_args /* { 4885168962Spjd struct vnode *a_vp; 4886192689Strasz accmode_t a_accmode; 4887168962Spjd struct ucred *a_cred; 4888168962Spjd struct thread *a_td; 4889168962Spjd } */ *ap; 4890168962Spjd{ 4891212002Sjh vnode_t *vp = ap->a_vp; 4892212002Sjh znode_t *zp = VTOZ(vp); 4893198703Spjd accmode_t accmode; 4894198703Spjd int error = 0; 4895168962Spjd 4896185172Spjd /* 4897198703Spjd * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 4898185172Spjd */ 4899198703Spjd accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 4900198703Spjd if (accmode != 0) 4901198703Spjd error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL); 4902185172Spjd 4903198703Spjd /* 4904198703Spjd * VADMIN has to be handled by vaccess(). 4905198703Spjd */ 4906198703Spjd if (error == 0) { 4907198703Spjd accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 4908198703Spjd if (accmode != 0) { 4909219089Spjd error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 4910219089Spjd zp->z_gid, accmode, ap->a_cred, NULL); 4911198703Spjd } 4912185172Spjd } 4913185172Spjd 4914212002Sjh /* 4915212002Sjh * For VEXEC, ensure that at least one execute bit is set for 4916212002Sjh * non-directories. 4917212002Sjh */ 4918212002Sjh if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 4919219089Spjd (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 4920212002Sjh error = EACCES; 4921219089Spjd } 4922212002Sjh 4923198703Spjd return (error); 4924168962Spjd} 4925168962Spjd 4926168962Spjdstatic int 4927168962Spjdzfs_freebsd_lookup(ap) 4928168962Spjd struct vop_lookup_args /* { 4929168962Spjd struct vnode *a_dvp; 4930168962Spjd struct vnode **a_vpp; 4931168962Spjd struct componentname *a_cnp; 4932168962Spjd } */ *ap; 4933168962Spjd{ 4934168962Spjd struct componentname *cnp = ap->a_cnp; 4935168962Spjd char nm[NAME_MAX + 1]; 4936168962Spjd 4937168962Spjd ASSERT(cnp->cn_namelen < sizeof(nm)); 4938168962Spjd strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 4939168962Spjd 4940168962Spjd return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 4941185029Spjd cnp->cn_cred, cnp->cn_thread, 0)); 4942168962Spjd} 4943168962Spjd 4944168962Spjdstatic int 4945303970Savgzfs_cache_lookup(ap) 4946303970Savg struct vop_lookup_args /* { 4947303970Savg struct vnode *a_dvp; 4948303970Savg struct vnode **a_vpp; 4949303970Savg struct componentname *a_cnp; 4950303970Savg } */ *ap; 4951303970Savg{ 4952303970Savg zfsvfs_t *zfsvfs; 4953303970Savg 4954303970Savg zfsvfs = ap->a_dvp->v_mount->mnt_data; 4955303970Savg if (zfsvfs->z_use_namecache) 4956303970Savg return (vfs_cache_lookup(ap)); 4957303970Savg else 4958303970Savg return (zfs_freebsd_lookup(ap)); 4959303970Savg} 4960303970Savg 4961303970Savgstatic int 4962168962Spjdzfs_freebsd_create(ap) 4963168962Spjd struct vop_create_args /* { 4964168962Spjd struct vnode *a_dvp; 4965168962Spjd struct vnode **a_vpp; 4966168962Spjd struct componentname *a_cnp; 4967168962Spjd struct vattr *a_vap; 4968168962Spjd } */ *ap; 4969168962Spjd{ 4970303970Savg zfsvfs_t *zfsvfs; 4971168962Spjd struct componentname *cnp = ap->a_cnp; 4972168962Spjd vattr_t *vap = ap->a_vap; 4973276007Skib int error, mode; 4974168962Spjd 4975168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 4976168962Spjd 4977168962Spjd vattr_init_mask(vap); 4978168962Spjd mode = vap->va_mode & ALLPERMS; 4979303970Savg zfsvfs = ap->a_dvp->v_mount->mnt_data; 4980168962Spjd 4981276007Skib error = zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 4982276007Skib ap->a_vpp, cnp->cn_cred, cnp->cn_thread); 4983303970Savg if (zfsvfs->z_use_namecache && 4984303970Savg error == 0 && (cnp->cn_flags & MAKEENTRY) != 0) 4985276007Skib cache_enter(ap->a_dvp, *ap->a_vpp, cnp); 4986276007Skib return (error); 4987168962Spjd} 4988168962Spjd 4989168962Spjdstatic int 4990168962Spjdzfs_freebsd_remove(ap) 4991168962Spjd struct vop_remove_args /* { 4992168962Spjd struct vnode *a_dvp; 4993168962Spjd struct vnode *a_vp; 4994168962Spjd struct componentname *a_cnp; 4995168962Spjd } */ *ap; 4996168962Spjd{ 4997168962Spjd 4998168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4999168962Spjd 5000303970Savg return (zfs_remove(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr, 5001303970Savg ap->a_cnp->cn_cred)); 5002168962Spjd} 5003168962Spjd 5004168962Spjdstatic int 5005168962Spjdzfs_freebsd_mkdir(ap) 5006168962Spjd struct vop_mkdir_args /* { 5007168962Spjd struct vnode *a_dvp; 5008168962Spjd struct vnode **a_vpp; 5009168962Spjd struct componentname *a_cnp; 5010168962Spjd struct vattr *a_vap; 5011168962Spjd } */ *ap; 5012168962Spjd{ 5013168962Spjd vattr_t *vap = ap->a_vap; 5014168962Spjd 5015168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 5016168962Spjd 5017168962Spjd vattr_init_mask(vap); 5018168962Spjd 5019168962Spjd return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 5020303970Savg ap->a_cnp->cn_cred)); 5021168962Spjd} 5022168962Spjd 5023168962Spjdstatic int 5024168962Spjdzfs_freebsd_rmdir(ap) 5025168962Spjd struct vop_rmdir_args /* { 5026168962Spjd struct vnode *a_dvp; 5027168962Spjd struct vnode *a_vp; 5028168962Spjd struct componentname *a_cnp; 5029168962Spjd } */ *ap; 5030168962Spjd{ 5031168962Spjd struct componentname *cnp = ap->a_cnp; 5032168962Spjd 5033168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5034168962Spjd 5035303970Savg return (zfs_rmdir(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred)); 5036168962Spjd} 5037168962Spjd 5038168962Spjdstatic int 5039168962Spjdzfs_freebsd_readdir(ap) 5040168962Spjd struct vop_readdir_args /* { 5041168962Spjd struct vnode *a_vp; 5042168962Spjd struct uio *a_uio; 5043168962Spjd struct ucred *a_cred; 5044168962Spjd int *a_eofflag; 5045168962Spjd int *a_ncookies; 5046168962Spjd u_long **a_cookies; 5047168962Spjd } */ *ap; 5048168962Spjd{ 5049168962Spjd 5050168962Spjd return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 5051168962Spjd ap->a_ncookies, ap->a_cookies)); 5052168962Spjd} 5053168962Spjd 5054168962Spjdstatic int 5055168962Spjdzfs_freebsd_fsync(ap) 5056168962Spjd struct vop_fsync_args /* { 5057168962Spjd struct vnode *a_vp; 5058168962Spjd int a_waitfor; 5059168962Spjd struct thread *a_td; 5060168962Spjd } */ *ap; 5061168962Spjd{ 5062168962Spjd 5063168962Spjd vop_stdfsync(ap); 5064185029Spjd return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); 5065168962Spjd} 5066168962Spjd 5067168962Spjdstatic int 5068168962Spjdzfs_freebsd_getattr(ap) 5069168962Spjd struct vop_getattr_args /* { 5070168962Spjd struct vnode *a_vp; 5071168962Spjd struct vattr *a_vap; 5072168962Spjd struct ucred *a_cred; 5073168962Spjd } */ *ap; 5074168962Spjd{ 5075185029Spjd vattr_t *vap = ap->a_vap; 5076185029Spjd xvattr_t xvap; 5077185029Spjd u_long fflags = 0; 5078185029Spjd int error; 5079168962Spjd 5080185029Spjd xva_init(&xvap); 5081185029Spjd xvap.xva_vattr = *vap; 5082185029Spjd xvap.xva_vattr.va_mask |= AT_XVATTR; 5083185029Spjd 5084185029Spjd /* Convert chflags into ZFS-type flags. */ 5085185029Spjd /* XXX: what about SF_SETTABLE?. */ 5086185029Spjd XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 5087185029Spjd XVA_SET_REQ(&xvap, XAT_APPENDONLY); 5088185029Spjd XVA_SET_REQ(&xvap, XAT_NOUNLINK); 5089185029Spjd XVA_SET_REQ(&xvap, XAT_NODUMP); 5090254627Sken XVA_SET_REQ(&xvap, XAT_READONLY); 5091254627Sken XVA_SET_REQ(&xvap, XAT_ARCHIVE); 5092254627Sken XVA_SET_REQ(&xvap, XAT_SYSTEM); 5093254627Sken XVA_SET_REQ(&xvap, XAT_HIDDEN); 5094254627Sken XVA_SET_REQ(&xvap, XAT_REPARSE); 5095254627Sken XVA_SET_REQ(&xvap, XAT_OFFLINE); 5096254627Sken XVA_SET_REQ(&xvap, XAT_SPARSE); 5097254627Sken 5098185029Spjd error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 5099185029Spjd if (error != 0) 5100185029Spjd return (error); 5101185029Spjd 5102185029Spjd /* Convert ZFS xattr into chflags. */ 5103185029Spjd#define FLAG_CHECK(fflag, xflag, xfield) do { \ 5104185029Spjd if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 5105185029Spjd fflags |= (fflag); \ 5106185029Spjd} while (0) 5107185029Spjd FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 5108185029Spjd xvap.xva_xoptattrs.xoa_immutable); 5109185029Spjd FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 5110185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 5111185029Spjd FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 5112185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 5113254627Sken FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE, 5114254627Sken xvap.xva_xoptattrs.xoa_archive); 5115185029Spjd FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 5116185029Spjd xvap.xva_xoptattrs.xoa_nodump); 5117254627Sken FLAG_CHECK(UF_READONLY, XAT_READONLY, 5118254627Sken xvap.xva_xoptattrs.xoa_readonly); 5119254627Sken FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM, 5120254627Sken xvap.xva_xoptattrs.xoa_system); 5121254627Sken FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN, 5122254627Sken xvap.xva_xoptattrs.xoa_hidden); 5123254627Sken FLAG_CHECK(UF_REPARSE, XAT_REPARSE, 5124254627Sken xvap.xva_xoptattrs.xoa_reparse); 5125254627Sken FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE, 5126254627Sken xvap.xva_xoptattrs.xoa_offline); 5127254627Sken FLAG_CHECK(UF_SPARSE, XAT_SPARSE, 5128254627Sken xvap.xva_xoptattrs.xoa_sparse); 5129254627Sken 5130185029Spjd#undef FLAG_CHECK 5131185029Spjd *vap = xvap.xva_vattr; 5132185029Spjd vap->va_flags = fflags; 5133185029Spjd return (0); 5134168962Spjd} 5135168962Spjd 5136168962Spjdstatic int 5137168962Spjdzfs_freebsd_setattr(ap) 5138168962Spjd struct vop_setattr_args /* { 5139168962Spjd struct vnode *a_vp; 5140168962Spjd struct vattr *a_vap; 5141168962Spjd struct ucred *a_cred; 5142168962Spjd } */ *ap; 5143168962Spjd{ 5144185172Spjd vnode_t *vp = ap->a_vp; 5145168962Spjd vattr_t *vap = ap->a_vap; 5146185172Spjd cred_t *cred = ap->a_cred; 5147185029Spjd xvattr_t xvap; 5148185029Spjd u_long fflags; 5149185029Spjd uint64_t zflags; 5150168962Spjd 5151168962Spjd vattr_init_mask(vap); 5152170044Spjd vap->va_mask &= ~AT_NOSET; 5153168962Spjd 5154185029Spjd xva_init(&xvap); 5155185029Spjd xvap.xva_vattr = *vap; 5156185029Spjd 5157219089Spjd zflags = VTOZ(vp)->z_pflags; 5158185172Spjd 5159185029Spjd if (vap->va_flags != VNOVAL) { 5160197683Sdelphij zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 5161185172Spjd int error; 5162185172Spjd 5163197683Sdelphij if (zfsvfs->z_use_fuids == B_FALSE) 5164197683Sdelphij return (EOPNOTSUPP); 5165197683Sdelphij 5166185029Spjd fflags = vap->va_flags; 5167254627Sken /* 5168254627Sken * XXX KDM 5169254627Sken * We need to figure out whether it makes sense to allow 5170254627Sken * UF_REPARSE through, since we don't really have other 5171254627Sken * facilities to handle reparse points and zfs_setattr() 5172254627Sken * doesn't currently allow setting that attribute anyway. 5173254627Sken */ 5174254627Sken if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE| 5175254627Sken UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE| 5176254627Sken UF_OFFLINE|UF_SPARSE)) != 0) 5177185029Spjd return (EOPNOTSUPP); 5178185172Spjd /* 5179185172Spjd * Unprivileged processes are not permitted to unset system 5180185172Spjd * flags, or modify flags if any system flags are set. 5181185172Spjd * Privileged non-jail processes may not modify system flags 5182185172Spjd * if securelevel > 0 and any existing system flags are set. 5183185172Spjd * Privileged jail processes behave like privileged non-jail 5184185172Spjd * processes if the security.jail.chflags_allowed sysctl is 5185185172Spjd * is non-zero; otherwise, they behave like unprivileged 5186185172Spjd * processes. 5187185172Spjd */ 5188197861Spjd if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 5189197861Spjd priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) { 5190185172Spjd if (zflags & 5191185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 5192185172Spjd error = securelevel_gt(cred, 0); 5193197861Spjd if (error != 0) 5194185172Spjd return (error); 5195185172Spjd } 5196185172Spjd } else { 5197197861Spjd /* 5198197861Spjd * Callers may only modify the file flags on objects they 5199197861Spjd * have VADMIN rights for. 5200197861Spjd */ 5201197861Spjd if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0) 5202197861Spjd return (error); 5203185172Spjd if (zflags & 5204185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 5205185172Spjd return (EPERM); 5206185172Spjd } 5207185172Spjd if (fflags & 5208185172Spjd (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 5209185172Spjd return (EPERM); 5210185172Spjd } 5211185172Spjd } 5212185029Spjd 5213185029Spjd#define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 5214185029Spjd if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 5215185029Spjd ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 5216185029Spjd XVA_SET_REQ(&xvap, (xflag)); \ 5217185029Spjd (xfield) = ((fflags & (fflag)) != 0); \ 5218185029Spjd } \ 5219185029Spjd} while (0) 5220185029Spjd /* Convert chflags into ZFS-type flags. */ 5221185029Spjd /* XXX: what about SF_SETTABLE?. */ 5222185029Spjd FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 5223185029Spjd xvap.xva_xoptattrs.xoa_immutable); 5224185029Spjd FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 5225185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 5226185029Spjd FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 5227185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 5228254627Sken FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE, 5229254627Sken xvap.xva_xoptattrs.xoa_archive); 5230185029Spjd FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 5231185172Spjd xvap.xva_xoptattrs.xoa_nodump); 5232254627Sken FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY, 5233254627Sken xvap.xva_xoptattrs.xoa_readonly); 5234254627Sken FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM, 5235254627Sken xvap.xva_xoptattrs.xoa_system); 5236254627Sken FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN, 5237254627Sken xvap.xva_xoptattrs.xoa_hidden); 5238254627Sken FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE, 5239254627Sken xvap.xva_xoptattrs.xoa_hidden); 5240254627Sken FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE, 5241254627Sken xvap.xva_xoptattrs.xoa_offline); 5242254627Sken FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE, 5243254627Sken xvap.xva_xoptattrs.xoa_sparse); 5244185029Spjd#undef FLAG_CHANGE 5245185029Spjd } 5246316391Sasomers if (vap->va_birthtime.tv_sec != VNOVAL) { 5247316391Sasomers xvap.xva_vattr.va_mask |= AT_XVATTR; 5248316391Sasomers XVA_SET_REQ(&xvap, XAT_CREATETIME); 5249316391Sasomers } 5250185172Spjd return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL)); 5251168962Spjd} 5252168962Spjd 5253168962Spjdstatic int 5254168962Spjdzfs_freebsd_rename(ap) 5255168962Spjd struct vop_rename_args /* { 5256168962Spjd struct vnode *a_fdvp; 5257168962Spjd struct vnode *a_fvp; 5258168962Spjd struct componentname *a_fcnp; 5259168962Spjd struct vnode *a_tdvp; 5260168962Spjd struct vnode *a_tvp; 5261168962Spjd struct componentname *a_tcnp; 5262168962Spjd } */ *ap; 5263168962Spjd{ 5264168962Spjd vnode_t *fdvp = ap->a_fdvp; 5265168962Spjd vnode_t *fvp = ap->a_fvp; 5266168962Spjd vnode_t *tdvp = ap->a_tdvp; 5267168962Spjd vnode_t *tvp = ap->a_tvp; 5268168962Spjd int error; 5269168962Spjd 5270192237Skmacy ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 5271192237Skmacy ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 5272168962Spjd 5273303970Savg error = zfs_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp, 5274303970Savg ap->a_tcnp, ap->a_fcnp->cn_cred); 5275168962Spjd 5276303970Savg vrele(fdvp); 5277303970Savg vrele(fvp); 5278303970Savg vrele(tdvp); 5279303970Savg if (tvp != NULL) 5280303970Savg vrele(tvp); 5281303970Savg 5282168962Spjd return (error); 5283168962Spjd} 5284168962Spjd 5285168962Spjdstatic int 5286168962Spjdzfs_freebsd_symlink(ap) 5287168962Spjd struct vop_symlink_args /* { 5288168962Spjd struct vnode *a_dvp; 5289168962Spjd struct vnode **a_vpp; 5290168962Spjd struct componentname *a_cnp; 5291168962Spjd struct vattr *a_vap; 5292168962Spjd char *a_target; 5293168962Spjd } */ *ap; 5294168962Spjd{ 5295168962Spjd struct componentname *cnp = ap->a_cnp; 5296168962Spjd vattr_t *vap = ap->a_vap; 5297168962Spjd 5298168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5299168962Spjd 5300168962Spjd vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 5301168962Spjd vattr_init_mask(vap); 5302168962Spjd 5303168962Spjd return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 5304168962Spjd ap->a_target, cnp->cn_cred, cnp->cn_thread)); 5305168962Spjd} 5306168962Spjd 5307168962Spjdstatic int 5308168962Spjdzfs_freebsd_readlink(ap) 5309168962Spjd struct vop_readlink_args /* { 5310168962Spjd struct vnode *a_vp; 5311168962Spjd struct uio *a_uio; 5312168962Spjd struct ucred *a_cred; 5313168962Spjd } */ *ap; 5314168962Spjd{ 5315168962Spjd 5316185029Spjd return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 5317168962Spjd} 5318168962Spjd 5319168962Spjdstatic int 5320168962Spjdzfs_freebsd_link(ap) 5321168962Spjd struct vop_link_args /* { 5322168962Spjd struct vnode *a_tdvp; 5323168962Spjd struct vnode *a_vp; 5324168962Spjd struct componentname *a_cnp; 5325168962Spjd } */ *ap; 5326168962Spjd{ 5327168962Spjd struct componentname *cnp = ap->a_cnp; 5328254982Sdelphij vnode_t *vp = ap->a_vp; 5329254982Sdelphij vnode_t *tdvp = ap->a_tdvp; 5330168962Spjd 5331254982Sdelphij if (tdvp->v_mount != vp->v_mount) 5332254982Sdelphij return (EXDEV); 5333254982Sdelphij 5334168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5335168962Spjd 5336254982Sdelphij return (zfs_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); 5337168962Spjd} 5338168962Spjd 5339168962Spjdstatic int 5340168962Spjdzfs_freebsd_inactive(ap) 5341169170Spjd struct vop_inactive_args /* { 5342169170Spjd struct vnode *a_vp; 5343169170Spjd struct thread *a_td; 5344169170Spjd } */ *ap; 5345168962Spjd{ 5346168962Spjd vnode_t *vp = ap->a_vp; 5347168962Spjd 5348185029Spjd zfs_inactive(vp, ap->a_td->td_ucred, NULL); 5349168962Spjd return (0); 5350168962Spjd} 5351168962Spjd 5352168962Spjdstatic int 5353168962Spjdzfs_freebsd_reclaim(ap) 5354168962Spjd struct vop_reclaim_args /* { 5355168962Spjd struct vnode *a_vp; 5356168962Spjd struct thread *a_td; 5357168962Spjd } */ *ap; 5358168962Spjd{ 5359169170Spjd vnode_t *vp = ap->a_vp; 5360168962Spjd znode_t *zp = VTOZ(vp); 5361197133Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5362168962Spjd 5363169025Spjd ASSERT(zp != NULL); 5364169025Spjd 5365243520Savg /* Destroy the vm object and flush associated pages. */ 5366243520Savg vnode_destroy_vobject(vp); 5367243520Savg 5368168962Spjd /* 5369243520Savg * z_teardown_inactive_lock protects from a race with 5370243520Savg * zfs_znode_dmu_fini in zfsvfs_teardown during 5371243520Savg * force unmount. 5372168962Spjd */ 5373243520Savg rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 5374243520Savg if (zp->z_sa_hdl == NULL) 5375196301Spjd zfs_znode_free(zp); 5376243520Savg else 5377243520Savg zfs_zinactive(zp); 5378243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 5379185029Spjd 5380168962Spjd vp->v_data = NULL; 5381168962Spjd return (0); 5382168962Spjd} 5383168962Spjd 5384168962Spjdstatic int 5385168962Spjdzfs_freebsd_fid(ap) 5386168962Spjd struct vop_fid_args /* { 5387168962Spjd struct vnode *a_vp; 5388168962Spjd struct fid *a_fid; 5389168962Spjd } */ *ap; 5390168962Spjd{ 5391168962Spjd 5392185029Spjd return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 5393168962Spjd} 5394168962Spjd 5395168962Spjdstatic int 5396168962Spjdzfs_freebsd_pathconf(ap) 5397168962Spjd struct vop_pathconf_args /* { 5398168962Spjd struct vnode *a_vp; 5399168962Spjd int a_name; 5400168962Spjd register_t *a_retval; 5401168962Spjd } */ *ap; 5402168962Spjd{ 5403168962Spjd ulong_t val; 5404168962Spjd int error; 5405168962Spjd 5406185029Spjd error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); 5407328298Sjhb if (error == 0) { 5408168962Spjd *ap->a_retval = val; 5409328298Sjhb return (error); 5410328298Sjhb } 5411328298Sjhb if (error != EOPNOTSUPP) 5412328298Sjhb return (error); 5413168962Spjd 5414196949Strasz switch (ap->a_name) { 5415328298Sjhb case _PC_NAME_MAX: 5416328298Sjhb *ap->a_retval = NAME_MAX; 5417328298Sjhb return (0); 5418328298Sjhb case _PC_PIPE_BUF: 5419328298Sjhb if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) { 5420328298Sjhb *ap->a_retval = PIPE_BUF; 5421328298Sjhb return (0); 5422328298Sjhb } 5423328298Sjhb return (EINVAL); 5424196949Strasz default: 5425328298Sjhb return (vop_stdpathconf(ap)); 5426196949Strasz } 5427196949Strasz} 5428196949Strasz 5429185029Spjd/* 5430185029Spjd * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 5431185029Spjd * extended attribute name: 5432185029Spjd * 5433185029Spjd * NAMESPACE PREFIX 5434185029Spjd * system freebsd:system: 5435185029Spjd * user (none, can be used to access ZFS fsattr(5) attributes 5436185029Spjd * created on Solaris) 5437185029Spjd */ 5438185029Spjdstatic int 5439185029Spjdzfs_create_attrname(int attrnamespace, const char *name, char *attrname, 5440185029Spjd size_t size) 5441185029Spjd{ 5442185029Spjd const char *namespace, *prefix, *suffix; 5443185029Spjd 5444185029Spjd /* We don't allow '/' character in attribute name. */ 5445185029Spjd if (strchr(name, '/') != NULL) 5446185029Spjd return (EINVAL); 5447185029Spjd /* We don't allow attribute names that start with "freebsd:" string. */ 5448185029Spjd if (strncmp(name, "freebsd:", 8) == 0) 5449185029Spjd return (EINVAL); 5450185029Spjd 5451185029Spjd bzero(attrname, size); 5452185029Spjd 5453185029Spjd switch (attrnamespace) { 5454185029Spjd case EXTATTR_NAMESPACE_USER: 5455185029Spjd#if 0 5456185029Spjd prefix = "freebsd:"; 5457185029Spjd namespace = EXTATTR_NAMESPACE_USER_STRING; 5458185029Spjd suffix = ":"; 5459185029Spjd#else 5460185029Spjd /* 5461185029Spjd * This is the default namespace by which we can access all 5462185029Spjd * attributes created on Solaris. 5463185029Spjd */ 5464185029Spjd prefix = namespace = suffix = ""; 5465185029Spjd#endif 5466185029Spjd break; 5467185029Spjd case EXTATTR_NAMESPACE_SYSTEM: 5468185029Spjd prefix = "freebsd:"; 5469185029Spjd namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 5470185029Spjd suffix = ":"; 5471185029Spjd break; 5472185029Spjd case EXTATTR_NAMESPACE_EMPTY: 5473185029Spjd default: 5474185029Spjd return (EINVAL); 5475185029Spjd } 5476185029Spjd if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 5477185029Spjd name) >= size) { 5478185029Spjd return (ENAMETOOLONG); 5479185029Spjd } 5480185029Spjd return (0); 5481185029Spjd} 5482185029Spjd 5483185029Spjd/* 5484185029Spjd * Vnode operating to retrieve a named extended attribute. 5485185029Spjd */ 5486185029Spjdstatic int 5487185029Spjdzfs_getextattr(struct vop_getextattr_args *ap) 5488185029Spjd/* 5489185029Spjdvop_getextattr { 5490185029Spjd IN struct vnode *a_vp; 5491185029Spjd IN int a_attrnamespace; 5492185029Spjd IN const char *a_name; 5493185029Spjd INOUT struct uio *a_uio; 5494185029Spjd OUT size_t *a_size; 5495185029Spjd IN struct ucred *a_cred; 5496185029Spjd IN struct thread *a_td; 5497185029Spjd}; 5498185029Spjd*/ 5499185029Spjd{ 5500185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5501185029Spjd struct thread *td = ap->a_td; 5502185029Spjd struct nameidata nd; 5503185029Spjd char attrname[255]; 5504185029Spjd struct vattr va; 5505185029Spjd vnode_t *xvp = NULL, *vp; 5506185029Spjd int error, flags; 5507185029Spjd 5508195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5509195785Strasz ap->a_cred, ap->a_td, VREAD); 5510195785Strasz if (error != 0) 5511195785Strasz return (error); 5512195785Strasz 5513185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5514185029Spjd sizeof(attrname)); 5515185029Spjd if (error != 0) 5516185029Spjd return (error); 5517185029Spjd 5518185029Spjd ZFS_ENTER(zfsvfs); 5519185029Spjd 5520185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5521185029Spjd LOOKUP_XATTR); 5522185029Spjd if (error != 0) { 5523185029Spjd ZFS_EXIT(zfsvfs); 5524185029Spjd return (error); 5525185029Spjd } 5526185029Spjd 5527185029Spjd flags = FREAD; 5528241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 5529185029Spjd xvp, td); 5530194586Skib error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL); 5531185029Spjd vp = nd.ni_vp; 5532185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 5533185029Spjd if (error != 0) { 5534196303Spjd ZFS_EXIT(zfsvfs); 5535195785Strasz if (error == ENOENT) 5536195785Strasz error = ENOATTR; 5537185029Spjd return (error); 5538185029Spjd } 5539185029Spjd 5540185029Spjd if (ap->a_size != NULL) { 5541185029Spjd error = VOP_GETATTR(vp, &va, ap->a_cred); 5542185029Spjd if (error == 0) 5543185029Spjd *ap->a_size = (size_t)va.va_size; 5544185029Spjd } else if (ap->a_uio != NULL) 5545224605Smm error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5546185029Spjd 5547185029Spjd VOP_UNLOCK(vp, 0); 5548185029Spjd vn_close(vp, flags, ap->a_cred, td); 5549185029Spjd ZFS_EXIT(zfsvfs); 5550185029Spjd 5551185029Spjd return (error); 5552185029Spjd} 5553185029Spjd 5554185029Spjd/* 5555185029Spjd * Vnode operation to remove a named attribute. 5556185029Spjd */ 5557185029Spjdint 5558185029Spjdzfs_deleteextattr(struct vop_deleteextattr_args *ap) 5559185029Spjd/* 5560185029Spjdvop_deleteextattr { 5561185029Spjd IN struct vnode *a_vp; 5562185029Spjd IN int a_attrnamespace; 5563185029Spjd IN const char *a_name; 5564185029Spjd IN struct ucred *a_cred; 5565185029Spjd IN struct thread *a_td; 5566185029Spjd}; 5567185029Spjd*/ 5568185029Spjd{ 5569185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5570185029Spjd struct thread *td = ap->a_td; 5571185029Spjd struct nameidata nd; 5572185029Spjd char attrname[255]; 5573185029Spjd struct vattr va; 5574185029Spjd vnode_t *xvp = NULL, *vp; 5575185029Spjd int error, flags; 5576185029Spjd 5577195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5578195785Strasz ap->a_cred, ap->a_td, VWRITE); 5579195785Strasz if (error != 0) 5580195785Strasz return (error); 5581195785Strasz 5582185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5583185029Spjd sizeof(attrname)); 5584185029Spjd if (error != 0) 5585185029Spjd return (error); 5586185029Spjd 5587185029Spjd ZFS_ENTER(zfsvfs); 5588185029Spjd 5589185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5590185029Spjd LOOKUP_XATTR); 5591185029Spjd if (error != 0) { 5592185029Spjd ZFS_EXIT(zfsvfs); 5593185029Spjd return (error); 5594185029Spjd } 5595185029Spjd 5596241896Skib NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 5597185029Spjd UIO_SYSSPACE, attrname, xvp, td); 5598185029Spjd error = namei(&nd); 5599185029Spjd vp = nd.ni_vp; 5600185029Spjd if (error != 0) { 5601196303Spjd ZFS_EXIT(zfsvfs); 5602260706Savg NDFREE(&nd, NDF_ONLY_PNBUF); 5603195785Strasz if (error == ENOENT) 5604195785Strasz error = ENOATTR; 5605185029Spjd return (error); 5606185029Spjd } 5607260706Savg 5608185029Spjd error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 5609260706Savg NDFREE(&nd, NDF_ONLY_PNBUF); 5610185029Spjd 5611185029Spjd vput(nd.ni_dvp); 5612185029Spjd if (vp == nd.ni_dvp) 5613185029Spjd vrele(vp); 5614185029Spjd else 5615185029Spjd vput(vp); 5616185029Spjd ZFS_EXIT(zfsvfs); 5617185029Spjd 5618185029Spjd return (error); 5619185029Spjd} 5620185029Spjd 5621185029Spjd/* 5622185029Spjd * Vnode operation to set a named attribute. 5623185029Spjd */ 5624185029Spjdstatic int 5625185029Spjdzfs_setextattr(struct vop_setextattr_args *ap) 5626185029Spjd/* 5627185029Spjdvop_setextattr { 5628185029Spjd IN struct vnode *a_vp; 5629185029Spjd IN int a_attrnamespace; 5630185029Spjd IN const char *a_name; 5631185029Spjd INOUT struct uio *a_uio; 5632185029Spjd IN struct ucred *a_cred; 5633185029Spjd IN struct thread *a_td; 5634185029Spjd}; 5635185029Spjd*/ 5636185029Spjd{ 5637185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5638185029Spjd struct thread *td = ap->a_td; 5639185029Spjd struct nameidata nd; 5640185029Spjd char attrname[255]; 5641185029Spjd struct vattr va; 5642185029Spjd vnode_t *xvp = NULL, *vp; 5643185029Spjd int error, flags; 5644185029Spjd 5645195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5646195785Strasz ap->a_cred, ap->a_td, VWRITE); 5647195785Strasz if (error != 0) 5648195785Strasz return (error); 5649195785Strasz 5650185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5651185029Spjd sizeof(attrname)); 5652185029Spjd if (error != 0) 5653185029Spjd return (error); 5654185029Spjd 5655185029Spjd ZFS_ENTER(zfsvfs); 5656185029Spjd 5657185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5658195785Strasz LOOKUP_XATTR | CREATE_XATTR_DIR); 5659185029Spjd if (error != 0) { 5660185029Spjd ZFS_EXIT(zfsvfs); 5661185029Spjd return (error); 5662185029Spjd } 5663185029Spjd 5664185029Spjd flags = FFLAGS(O_WRONLY | O_CREAT); 5665241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 5666185029Spjd xvp, td); 5667194586Skib error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL); 5668185029Spjd vp = nd.ni_vp; 5669185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 5670185029Spjd if (error != 0) { 5671185029Spjd ZFS_EXIT(zfsvfs); 5672185029Spjd return (error); 5673185029Spjd } 5674185029Spjd 5675185029Spjd VATTR_NULL(&va); 5676185029Spjd va.va_size = 0; 5677185029Spjd error = VOP_SETATTR(vp, &va, ap->a_cred); 5678185029Spjd if (error == 0) 5679268420Smav VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5680185029Spjd 5681185029Spjd VOP_UNLOCK(vp, 0); 5682185029Spjd vn_close(vp, flags, ap->a_cred, td); 5683185029Spjd ZFS_EXIT(zfsvfs); 5684185029Spjd 5685185029Spjd return (error); 5686185029Spjd} 5687185029Spjd 5688185029Spjd/* 5689185029Spjd * Vnode operation to retrieve extended attributes on a vnode. 5690185029Spjd */ 5691185029Spjdstatic int 5692185029Spjdzfs_listextattr(struct vop_listextattr_args *ap) 5693185029Spjd/* 5694185029Spjdvop_listextattr { 5695185029Spjd IN struct vnode *a_vp; 5696185029Spjd IN int a_attrnamespace; 5697185029Spjd INOUT struct uio *a_uio; 5698185029Spjd OUT size_t *a_size; 5699185029Spjd IN struct ucred *a_cred; 5700185029Spjd IN struct thread *a_td; 5701185029Spjd}; 5702185029Spjd*/ 5703185029Spjd{ 5704185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5705185029Spjd struct thread *td = ap->a_td; 5706185029Spjd struct nameidata nd; 5707185029Spjd char attrprefix[16]; 5708185029Spjd u_char dirbuf[sizeof(struct dirent)]; 5709185029Spjd struct dirent *dp; 5710185029Spjd struct iovec aiov; 5711185029Spjd struct uio auio, *uio = ap->a_uio; 5712185029Spjd size_t *sizep = ap->a_size; 5713185029Spjd size_t plen; 5714185029Spjd vnode_t *xvp = NULL, *vp; 5715185029Spjd int done, error, eof, pos; 5716185029Spjd 5717195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5718195785Strasz ap->a_cred, ap->a_td, VREAD); 5719196303Spjd if (error != 0) 5720195785Strasz return (error); 5721195785Strasz 5722185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 5723185029Spjd sizeof(attrprefix)); 5724185029Spjd if (error != 0) 5725185029Spjd return (error); 5726185029Spjd plen = strlen(attrprefix); 5727185029Spjd 5728185029Spjd ZFS_ENTER(zfsvfs); 5729185029Spjd 5730195822Strasz if (sizep != NULL) 5731195822Strasz *sizep = 0; 5732195822Strasz 5733185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5734185029Spjd LOOKUP_XATTR); 5735185029Spjd if (error != 0) { 5736196303Spjd ZFS_EXIT(zfsvfs); 5737195785Strasz /* 5738195785Strasz * ENOATTR means that the EA directory does not yet exist, 5739195785Strasz * i.e. there are no extended attributes there. 5740195785Strasz */ 5741195785Strasz if (error == ENOATTR) 5742195785Strasz error = 0; 5743185029Spjd return (error); 5744185029Spjd } 5745185029Spjd 5746241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 5747188588Sjhb UIO_SYSSPACE, ".", xvp, td); 5748185029Spjd error = namei(&nd); 5749185029Spjd vp = nd.ni_vp; 5750185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 5751185029Spjd if (error != 0) { 5752185029Spjd ZFS_EXIT(zfsvfs); 5753185029Spjd return (error); 5754185029Spjd } 5755185029Spjd 5756185029Spjd auio.uio_iov = &aiov; 5757185029Spjd auio.uio_iovcnt = 1; 5758185029Spjd auio.uio_segflg = UIO_SYSSPACE; 5759185029Spjd auio.uio_td = td; 5760185029Spjd auio.uio_rw = UIO_READ; 5761185029Spjd auio.uio_offset = 0; 5762185029Spjd 5763185029Spjd do { 5764185029Spjd u_char nlen; 5765185029Spjd 5766185029Spjd aiov.iov_base = (void *)dirbuf; 5767185029Spjd aiov.iov_len = sizeof(dirbuf); 5768185029Spjd auio.uio_resid = sizeof(dirbuf); 5769185029Spjd error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 5770185029Spjd done = sizeof(dirbuf) - auio.uio_resid; 5771185029Spjd if (error != 0) 5772185029Spjd break; 5773185029Spjd for (pos = 0; pos < done;) { 5774185029Spjd dp = (struct dirent *)(dirbuf + pos); 5775185029Spjd pos += dp->d_reclen; 5776185029Spjd /* 5777185029Spjd * XXX: Temporarily we also accept DT_UNKNOWN, as this 5778185029Spjd * is what we get when attribute was created on Solaris. 5779185029Spjd */ 5780185029Spjd if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 5781185029Spjd continue; 5782185029Spjd if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) 5783185029Spjd continue; 5784185029Spjd else if (strncmp(dp->d_name, attrprefix, plen) != 0) 5785185029Spjd continue; 5786185029Spjd nlen = dp->d_namlen - plen; 5787185029Spjd if (sizep != NULL) 5788185029Spjd *sizep += 1 + nlen; 5789185029Spjd else if (uio != NULL) { 5790185029Spjd /* 5791185029Spjd * Format of extattr name entry is one byte for 5792185029Spjd * length and the rest for name. 5793185029Spjd */ 5794185029Spjd error = uiomove(&nlen, 1, uio->uio_rw, uio); 5795185029Spjd if (error == 0) { 5796185029Spjd error = uiomove(dp->d_name + plen, nlen, 5797185029Spjd uio->uio_rw, uio); 5798185029Spjd } 5799185029Spjd if (error != 0) 5800185029Spjd break; 5801185029Spjd } 5802185029Spjd } 5803185029Spjd } while (!eof && error == 0); 5804185029Spjd 5805185029Spjd vput(vp); 5806185029Spjd ZFS_EXIT(zfsvfs); 5807185029Spjd 5808185029Spjd return (error); 5809185029Spjd} 5810185029Spjd 5811192800Straszint 5812192800Straszzfs_freebsd_getacl(ap) 5813192800Strasz struct vop_getacl_args /* { 5814192800Strasz struct vnode *vp; 5815192800Strasz acl_type_t type; 5816192800Strasz struct acl *aclp; 5817192800Strasz struct ucred *cred; 5818192800Strasz struct thread *td; 5819192800Strasz } */ *ap; 5820192800Strasz{ 5821192800Strasz int error; 5822192800Strasz vsecattr_t vsecattr; 5823192800Strasz 5824192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 5825197435Strasz return (EINVAL); 5826192800Strasz 5827192800Strasz vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 5828192800Strasz if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL)) 5829192800Strasz return (error); 5830192800Strasz 5831192800Strasz error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt); 5832196303Spjd if (vsecattr.vsa_aclentp != NULL) 5833196303Spjd kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 5834192800Strasz 5835196303Spjd return (error); 5836192800Strasz} 5837192800Strasz 5838192800Straszint 5839192800Straszzfs_freebsd_setacl(ap) 5840192800Strasz struct vop_setacl_args /* { 5841192800Strasz struct vnode *vp; 5842192800Strasz acl_type_t type; 5843192800Strasz struct acl *aclp; 5844192800Strasz struct ucred *cred; 5845192800Strasz struct thread *td; 5846192800Strasz } */ *ap; 5847192800Strasz{ 5848192800Strasz int error; 5849192800Strasz vsecattr_t vsecattr; 5850192800Strasz int aclbsize; /* size of acl list in bytes */ 5851192800Strasz aclent_t *aaclp; 5852192800Strasz 5853192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 5854197435Strasz return (EINVAL); 5855192800Strasz 5856314710Smm if (ap->a_aclp == NULL) 5857314710Smm return (EINVAL); 5858314710Smm 5859192800Strasz if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 5860192800Strasz return (EINVAL); 5861192800Strasz 5862192800Strasz /* 5863196949Strasz * With NFSv4 ACLs, chmod(2) may need to add additional entries, 5864192800Strasz * splitting every entry into two and appending "canonical six" 5865192800Strasz * entries at the end. Don't allow for setting an ACL that would 5866192800Strasz * cause chmod(2) to run out of ACL entries. 5867192800Strasz */ 5868192800Strasz if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 5869192800Strasz return (ENOSPC); 5870192800Strasz 5871208030Strasz error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 5872208030Strasz if (error != 0) 5873208030Strasz return (error); 5874208030Strasz 5875192800Strasz vsecattr.vsa_mask = VSA_ACE; 5876192800Strasz aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t); 5877192800Strasz vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 5878192800Strasz aaclp = vsecattr.vsa_aclentp; 5879192800Strasz vsecattr.vsa_aclentsz = aclbsize; 5880192800Strasz 5881192800Strasz aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 5882192800Strasz error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL); 5883192800Strasz kmem_free(aaclp, aclbsize); 5884192800Strasz 5885192800Strasz return (error); 5886192800Strasz} 5887192800Strasz 5888192800Straszint 5889192800Straszzfs_freebsd_aclcheck(ap) 5890192800Strasz struct vop_aclcheck_args /* { 5891192800Strasz struct vnode *vp; 5892192800Strasz acl_type_t type; 5893192800Strasz struct acl *aclp; 5894192800Strasz struct ucred *cred; 5895192800Strasz struct thread *td; 5896192800Strasz } */ *ap; 5897192800Strasz{ 5898192800Strasz 5899192800Strasz return (EOPNOTSUPP); 5900192800Strasz} 5901192800Strasz 5902299906Savgstatic int 5903299906Savgzfs_vptocnp(struct vop_vptocnp_args *ap) 5904299906Savg{ 5905299906Savg vnode_t *covered_vp; 5906299906Savg vnode_t *vp = ap->a_vp;; 5907299906Savg zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 5908299906Savg znode_t *zp = VTOZ(vp); 5909299906Savg int ltype; 5910299906Savg int error; 5911299906Savg 5912301870Savg ZFS_ENTER(zfsvfs); 5913301870Savg ZFS_VERIFY_ZP(zp); 5914301870Savg 5915299906Savg /* 5916299906Savg * If we are a snapshot mounted under .zfs, run the operation 5917299906Savg * on the covered vnode. 5918299906Savg */ 5919324158Savg if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) { 5920307995Savg char name[MAXNAMLEN + 1]; 5921307995Savg znode_t *dzp; 5922307995Savg size_t len; 5923307995Savg 5924307995Savg error = zfs_znode_parent_and_name(zp, &dzp, name); 5925307995Savg if (error == 0) { 5926307995Savg len = strlen(name); 5927314030Savg if (*ap->a_buflen < len) 5928314030Savg error = SET_ERROR(ENOMEM); 5929314030Savg } 5930314030Savg if (error == 0) { 5931307995Savg *ap->a_buflen -= len; 5932307995Savg bcopy(name, ap->a_buf + *ap->a_buflen, len); 5933307995Savg *ap->a_vpp = ZTOV(dzp); 5934307995Savg } 5935301870Savg ZFS_EXIT(zfsvfs); 5936307995Savg return (error); 5937301870Savg } 5938301870Savg ZFS_EXIT(zfsvfs); 5939299906Savg 5940299906Savg covered_vp = vp->v_mount->mnt_vnodecovered; 5941299906Savg vhold(covered_vp); 5942299906Savg ltype = VOP_ISLOCKED(vp); 5943299906Savg VOP_UNLOCK(vp, 0); 5944315842Savg error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread); 5945299906Savg if (error == 0) { 5946299906Savg error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred, 5947299906Savg ap->a_buf, ap->a_buflen); 5948299906Savg vput(covered_vp); 5949299906Savg } 5950299906Savg vn_lock(vp, ltype | LK_RETRY); 5951299906Savg if ((vp->v_iflag & VI_DOOMED) != 0) 5952299906Savg error = SET_ERROR(ENOENT); 5953299906Savg return (error); 5954299906Savg} 5955299906Savg 5956303970Savg#ifdef DIAGNOSTIC 5957303970Savgstatic int 5958303970Savgzfs_lock(ap) 5959303970Savg struct vop_lock1_args /* { 5960303970Savg struct vnode *a_vp; 5961303970Savg int a_flags; 5962303970Savg char *file; 5963303970Savg int line; 5964303970Savg } */ *ap; 5965303970Savg{ 5966310066Savg vnode_t *vp; 5967303970Savg znode_t *zp; 5968303970Savg int err; 5969303970Savg 5970303970Savg err = vop_stdlock(ap); 5971310066Savg if (err == 0 && (ap->a_flags & LK_NOWAIT) == 0) { 5972310066Savg vp = ap->a_vp; 5973310066Savg zp = vp->v_data; 5974310066Savg if (vp->v_mount != NULL && (vp->v_iflag & VI_DOOMED) == 0 && 5975310066Savg zp != NULL && (zp->z_pflags & ZFS_XATTR) == 0) 5976310066Savg VERIFY(!RRM_LOCK_HELD(&zp->z_zfsvfs->z_teardown_lock)); 5977303970Savg } 5978303970Savg return (err); 5979303970Savg} 5980303970Savg#endif 5981303970Savg 5982168404Spjdstruct vop_vector zfs_vnodeops; 5983168404Spjdstruct vop_vector zfs_fifoops; 5984209962Smmstruct vop_vector zfs_shareops; 5985168404Spjd 5986168404Spjdstruct vop_vector zfs_vnodeops = { 5987185029Spjd .vop_default = &default_vnodeops, 5988185029Spjd .vop_inactive = zfs_freebsd_inactive, 5989185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 5990185029Spjd .vop_access = zfs_freebsd_access, 5991303970Savg .vop_lookup = zfs_cache_lookup, 5992185029Spjd .vop_cachedlookup = zfs_freebsd_lookup, 5993185029Spjd .vop_getattr = zfs_freebsd_getattr, 5994185029Spjd .vop_setattr = zfs_freebsd_setattr, 5995185029Spjd .vop_create = zfs_freebsd_create, 5996185029Spjd .vop_mknod = zfs_freebsd_create, 5997185029Spjd .vop_mkdir = zfs_freebsd_mkdir, 5998185029Spjd .vop_readdir = zfs_freebsd_readdir, 5999185029Spjd .vop_fsync = zfs_freebsd_fsync, 6000185029Spjd .vop_open = zfs_freebsd_open, 6001185029Spjd .vop_close = zfs_freebsd_close, 6002185029Spjd .vop_rmdir = zfs_freebsd_rmdir, 6003185029Spjd .vop_ioctl = zfs_freebsd_ioctl, 6004185029Spjd .vop_link = zfs_freebsd_link, 6005185029Spjd .vop_symlink = zfs_freebsd_symlink, 6006185029Spjd .vop_readlink = zfs_freebsd_readlink, 6007185029Spjd .vop_read = zfs_freebsd_read, 6008185029Spjd .vop_write = zfs_freebsd_write, 6009185029Spjd .vop_remove = zfs_freebsd_remove, 6010185029Spjd .vop_rename = zfs_freebsd_rename, 6011185029Spjd .vop_pathconf = zfs_freebsd_pathconf, 6012243518Savg .vop_bmap = zfs_freebsd_bmap, 6013185029Spjd .vop_fid = zfs_freebsd_fid, 6014185029Spjd .vop_getextattr = zfs_getextattr, 6015185029Spjd .vop_deleteextattr = zfs_deleteextattr, 6016185029Spjd .vop_setextattr = zfs_setextattr, 6017185029Spjd .vop_listextattr = zfs_listextattr, 6018192800Strasz .vop_getacl = zfs_freebsd_getacl, 6019192800Strasz .vop_setacl = zfs_freebsd_setacl, 6020192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 6021213937Savg .vop_getpages = zfs_freebsd_getpages, 6022258746Savg .vop_putpages = zfs_freebsd_putpages, 6023299906Savg .vop_vptocnp = zfs_vptocnp, 6024303970Savg#ifdef DIAGNOSTIC 6025303970Savg .vop_lock1 = zfs_lock, 6026303970Savg#endif 6027168404Spjd}; 6028168404Spjd 6029169170Spjdstruct vop_vector zfs_fifoops = { 6030185029Spjd .vop_default = &fifo_specops, 6031200162Skib .vop_fsync = zfs_freebsd_fsync, 6032185029Spjd .vop_access = zfs_freebsd_access, 6033185029Spjd .vop_getattr = zfs_freebsd_getattr, 6034185029Spjd .vop_inactive = zfs_freebsd_inactive, 6035185029Spjd .vop_read = VOP_PANIC, 6036185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 6037185029Spjd .vop_setattr = zfs_freebsd_setattr, 6038185029Spjd .vop_write = VOP_PANIC, 6039328298Sjhb .vop_pathconf = zfs_freebsd_pathconf, 6040185029Spjd .vop_fid = zfs_freebsd_fid, 6041192800Strasz .vop_getacl = zfs_freebsd_getacl, 6042192800Strasz .vop_setacl = zfs_freebsd_setacl, 6043192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 6044168404Spjd}; 6045209962Smm 6046209962Smm/* 6047209962Smm * special share hidden files vnode operations template 6048209962Smm */ 6049209962Smmstruct vop_vector zfs_shareops = { 6050209962Smm .vop_default = &default_vnodeops, 6051209962Smm .vop_access = zfs_freebsd_access, 6052209962Smm .vop_inactive = zfs_freebsd_inactive, 6053209962Smm .vop_reclaim = zfs_freebsd_reclaim, 6054209962Smm .vop_fid = zfs_freebsd_fid, 6055209962Smm .vop_pathconf = zfs_freebsd_pathconf, 6056209962Smm}; 6057