zfs_vnops.c revision 321559
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21321545Smav 22168404Spjd/* 23212694Smm * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24289562Smav * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 25296519Smav * Copyright (c) 2014 Integros [integros.com] 26321545Smav * Copyright 2017 Nexenta Systems, Inc. 27168404Spjd */ 28168404Spjd 29169195Spjd/* Portions Copyright 2007 Jeremy Teo */ 30219089Spjd/* Portions Copyright 2010 Robert Milkowski */ 31169195Spjd 32168404Spjd#include <sys/types.h> 33168404Spjd#include <sys/param.h> 34168404Spjd#include <sys/time.h> 35168404Spjd#include <sys/systm.h> 36168404Spjd#include <sys/sysmacros.h> 37168404Spjd#include <sys/resource.h> 38168404Spjd#include <sys/vfs.h> 39248084Sattilio#include <sys/vm.h> 40168404Spjd#include <sys/vnode.h> 41168404Spjd#include <sys/file.h> 42168404Spjd#include <sys/stat.h> 43168404Spjd#include <sys/kmem.h> 44168404Spjd#include <sys/taskq.h> 45168404Spjd#include <sys/uio.h> 46168404Spjd#include <sys/atomic.h> 47168404Spjd#include <sys/namei.h> 48168404Spjd#include <sys/mman.h> 49168404Spjd#include <sys/cmn_err.h> 50168404Spjd#include <sys/errno.h> 51168404Spjd#include <sys/unistd.h> 52168404Spjd#include <sys/zfs_dir.h> 53168404Spjd#include <sys/zfs_ioctl.h> 54168404Spjd#include <sys/fs/zfs.h> 55168404Spjd#include <sys/dmu.h> 56219089Spjd#include <sys/dmu_objset.h> 57168404Spjd#include <sys/spa.h> 58168404Spjd#include <sys/txg.h> 59168404Spjd#include <sys/dbuf.h> 60168404Spjd#include <sys/zap.h> 61219089Spjd#include <sys/sa.h> 62168404Spjd#include <sys/dirent.h> 63168962Spjd#include <sys/policy.h> 64168962Spjd#include <sys/sunddi.h> 65168404Spjd#include <sys/filio.h> 66209962Smm#include <sys/sid.h> 67168404Spjd#include <sys/zfs_ctldir.h> 68185029Spjd#include <sys/zfs_fuid.h> 69219089Spjd#include <sys/zfs_sa.h> 70168404Spjd#include <sys/zfs_rlock.h> 71185029Spjd#include <sys/extdirent.h> 72185029Spjd#include <sys/kidmap.h> 73168404Spjd#include <sys/bio.h> 74168404Spjd#include <sys/buf.h> 75168404Spjd#include <sys/sched.h> 76192800Strasz#include <sys/acl.h> 77239077Smarius#include <vm/vm_param.h> 78168404Spjd 79168404Spjd/* 80168404Spjd * Programming rules. 81168404Spjd * 82168404Spjd * Each vnode op performs some logical unit of work. To do this, the ZPL must 83168404Spjd * properly lock its in-core state, create a DMU transaction, do the work, 84168404Spjd * record this work in the intent log (ZIL), commit the DMU transaction, 85185029Spjd * and wait for the intent log to commit if it is a synchronous operation. 86185029Spjd * Moreover, the vnode ops must work in both normal and log replay context. 87168404Spjd * The ordering of events is important to avoid deadlocks and references 88168404Spjd * to freed memory. The example below illustrates the following Big Rules: 89168404Spjd * 90251631Sdelphij * (1) A check must be made in each zfs thread for a mounted file system. 91168404Spjd * This is done avoiding races using ZFS_ENTER(zfsvfs). 92251631Sdelphij * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 93251631Sdelphij * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 94251631Sdelphij * can return EIO from the calling function. 95168404Spjd * 96168404Spjd * (2) VN_RELE() should always be the last thing except for zil_commit() 97168404Spjd * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 98168404Spjd * First, if it's the last reference, the vnode/znode 99168404Spjd * can be freed, so the zp may point to freed memory. Second, the last 100168404Spjd * reference will call zfs_zinactive(), which may induce a lot of work -- 101168404Spjd * pushing cached pages (which acquires range locks) and syncing out 102168404Spjd * cached atime changes. Third, zfs_zinactive() may require a new tx, 103168404Spjd * which could deadlock the system if you were already holding one. 104191900Skmacy * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 105168404Spjd * 106168404Spjd * (3) All range locks must be grabbed before calling dmu_tx_assign(), 107168404Spjd * as they can span dmu_tx_assign() calls. 108168404Spjd * 109258720Savg * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 110258720Savg * dmu_tx_assign(). This is critical because we don't want to block 111258720Savg * while holding locks. 112168404Spjd * 113258720Savg * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 114258720Savg * reduces lock contention and CPU usage when we must wait (note that if 115258720Savg * throughput is constrained by the storage, nearly every transaction 116258720Savg * must wait). 117258720Savg * 118258720Savg * Note, in particular, that if a lock is sometimes acquired before 119258720Savg * the tx assigns, and sometimes after (e.g. z_lock), then failing 120258720Savg * to use a non-blocking assign can deadlock the system. The scenario: 121258720Savg * 122168404Spjd * Thread A has grabbed a lock before calling dmu_tx_assign(). 123168404Spjd * Thread B is in an already-assigned tx, and blocks for this lock. 124168404Spjd * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 125168404Spjd * forever, because the previous txg can't quiesce until B's tx commits. 126168404Spjd * 127168404Spjd * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 128258632Savg * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 129258632Savg * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT, 130258632Savg * to indicate that this operation has already called dmu_tx_wait(). 131258632Savg * This will ensure that we don't retry forever, waiting a short bit 132258632Savg * each time. 133168404Spjd * 134168404Spjd * (5) If the operation succeeded, generate the intent log entry for it 135168404Spjd * before dropping locks. This ensures that the ordering of events 136168404Spjd * in the intent log matches the order in which they actually occurred. 137251631Sdelphij * During ZIL replay the zfs_log_* functions will update the sequence 138209962Smm * number to indicate the zil transaction has replayed. 139168404Spjd * 140168404Spjd * (6) At the end of each vnode op, the DMU tx must always commit, 141168404Spjd * regardless of whether there were any errors. 142168404Spjd * 143219089Spjd * (7) After dropping all locks, invoke zil_commit(zilog, foid) 144168404Spjd * to ensure that synchronous semantics are provided when necessary. 145168404Spjd * 146168404Spjd * In general, this is how things should be ordered in each vnode op: 147168404Spjd * 148168404Spjd * ZFS_ENTER(zfsvfs); // exit if unmounted 149168404Spjd * top: 150303970Savg * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD()) 151168404Spjd * rw_enter(...); // grab any other locks you need 152168404Spjd * tx = dmu_tx_create(...); // get DMU tx 153168404Spjd * dmu_tx_hold_*(); // hold each object you might modify 154258632Savg * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 155168404Spjd * if (error) { 156168404Spjd * rw_exit(...); // drop locks 157168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 158168404Spjd * VN_RELE(...); // release held vnodes 159209962Smm * if (error == ERESTART) { 160258632Savg * waited = B_TRUE; 161168404Spjd * dmu_tx_wait(tx); 162168404Spjd * dmu_tx_abort(tx); 163168404Spjd * goto top; 164168404Spjd * } 165168404Spjd * dmu_tx_abort(tx); // abort DMU tx 166168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 167168404Spjd * return (error); // really out of space 168168404Spjd * } 169168404Spjd * error = do_real_work(); // do whatever this VOP does 170168404Spjd * if (error == 0) 171168404Spjd * zfs_log_*(...); // on success, make ZIL entry 172168404Spjd * dmu_tx_commit(tx); // commit DMU tx -- error or not 173168404Spjd * rw_exit(...); // drop locks 174168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 175168404Spjd * VN_RELE(...); // release held vnodes 176219089Spjd * zil_commit(zilog, foid); // synchronous when necessary 177168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 178168404Spjd * return (error); // done, report error 179168404Spjd */ 180185029Spjd 181168404Spjd/* ARGSUSED */ 182168404Spjdstatic int 183185029Spjdzfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 184168404Spjd{ 185168962Spjd znode_t *zp = VTOZ(*vpp); 186209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 187168404Spjd 188209962Smm ZFS_ENTER(zfsvfs); 189209962Smm ZFS_VERIFY_ZP(zp); 190209962Smm 191219089Spjd if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 192185029Spjd ((flag & FAPPEND) == 0)) { 193209962Smm ZFS_EXIT(zfsvfs); 194249195Smm return (SET_ERROR(EPERM)); 195185029Spjd } 196185029Spjd 197185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 198185029Spjd ZTOV(zp)->v_type == VREG && 199219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 200209962Smm if (fs_vscan(*vpp, cr, 0) != 0) { 201209962Smm ZFS_EXIT(zfsvfs); 202249195Smm return (SET_ERROR(EACCES)); 203209962Smm } 204209962Smm } 205185029Spjd 206168404Spjd /* Keep a count of the synchronous opens in the znode */ 207168962Spjd if (flag & (FSYNC | FDSYNC)) 208168404Spjd atomic_inc_32(&zp->z_sync_cnt); 209185029Spjd 210209962Smm ZFS_EXIT(zfsvfs); 211168404Spjd return (0); 212168404Spjd} 213168404Spjd 214168404Spjd/* ARGSUSED */ 215168404Spjdstatic int 216185029Spjdzfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 217185029Spjd caller_context_t *ct) 218168404Spjd{ 219168962Spjd znode_t *zp = VTOZ(vp); 220209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 221168404Spjd 222210470Smm /* 223210470Smm * Clean up any locks held by this process on the vp. 224210470Smm */ 225210470Smm cleanlocks(vp, ddi_get_pid(), 0); 226210470Smm cleanshares(vp, ddi_get_pid()); 227210470Smm 228209962Smm ZFS_ENTER(zfsvfs); 229209962Smm ZFS_VERIFY_ZP(zp); 230209962Smm 231168404Spjd /* Decrement the synchronous opens in the znode */ 232185029Spjd if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 233168404Spjd atomic_dec_32(&zp->z_sync_cnt); 234168404Spjd 235185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 236185029Spjd ZTOV(zp)->v_type == VREG && 237219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 238185029Spjd VERIFY(fs_vscan(vp, cr, 1) == 0); 239185029Spjd 240209962Smm ZFS_EXIT(zfsvfs); 241168404Spjd return (0); 242168404Spjd} 243168404Spjd 244168404Spjd/* 245168404Spjd * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 246168404Spjd * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 247168404Spjd */ 248168404Spjdstatic int 249168978Spjdzfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 250168404Spjd{ 251168404Spjd znode_t *zp = VTOZ(vp); 252168404Spjd uint64_t noff = (uint64_t)*off; /* new offset */ 253168404Spjd uint64_t file_sz; 254168404Spjd int error; 255168404Spjd boolean_t hole; 256168404Spjd 257219089Spjd file_sz = zp->z_size; 258168404Spjd if (noff >= file_sz) { 259249195Smm return (SET_ERROR(ENXIO)); 260168404Spjd } 261168404Spjd 262168962Spjd if (cmd == _FIO_SEEK_HOLE) 263168404Spjd hole = B_TRUE; 264168404Spjd else 265168404Spjd hole = B_FALSE; 266168404Spjd 267168404Spjd error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 268168404Spjd 269271536Sdelphij if (error == ESRCH) 270249195Smm return (SET_ERROR(ENXIO)); 271271536Sdelphij 272271536Sdelphij /* 273271536Sdelphij * We could find a hole that begins after the logical end-of-file, 274271536Sdelphij * because dmu_offset_next() only works on whole blocks. If the 275271536Sdelphij * EOF falls mid-block, then indicate that the "virtual hole" 276271536Sdelphij * at the end of the file begins at the logical EOF, rather than 277271536Sdelphij * at the end of the last block. 278271536Sdelphij */ 279271536Sdelphij if (noff > file_sz) { 280271536Sdelphij ASSERT(hole); 281271536Sdelphij noff = file_sz; 282168404Spjd } 283168404Spjd 284168404Spjd if (noff < *off) 285168404Spjd return (error); 286168404Spjd *off = noff; 287168404Spjd return (error); 288168404Spjd} 289168404Spjd 290168404Spjd/* ARGSUSED */ 291168404Spjdstatic int 292168978Spjdzfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 293185029Spjd int *rvalp, caller_context_t *ct) 294168404Spjd{ 295168962Spjd offset_t off; 296287103Savg offset_t ndata; 297287103Savg dmu_object_info_t doi; 298168962Spjd int error; 299168962Spjd zfsvfs_t *zfsvfs; 300185029Spjd znode_t *zp; 301168404Spjd 302168404Spjd switch (com) { 303185029Spjd case _FIOFFS: 304287103Savg { 305168962Spjd return (0); 306168404Spjd 307168962Spjd /* 308168962Spjd * The following two ioctls are used by bfu. Faking out, 309168962Spjd * necessary to avoid bfu errors. 310168962Spjd */ 311287103Savg } 312185029Spjd case _FIOGDIO: 313185029Spjd case _FIOSDIO: 314287103Savg { 315168962Spjd return (0); 316287103Savg } 317168962Spjd 318185029Spjd case _FIO_SEEK_DATA: 319185029Spjd case _FIO_SEEK_HOLE: 320287103Savg { 321277300Ssmh#ifdef illumos 322168962Spjd if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 323249195Smm return (SET_ERROR(EFAULT)); 324233918Savg#else 325233918Savg off = *(offset_t *)data; 326233918Savg#endif 327185029Spjd zp = VTOZ(vp); 328185029Spjd zfsvfs = zp->z_zfsvfs; 329168404Spjd ZFS_ENTER(zfsvfs); 330185029Spjd ZFS_VERIFY_ZP(zp); 331168404Spjd 332168404Spjd /* offset parameter is in/out */ 333168404Spjd error = zfs_holey(vp, com, &off); 334168404Spjd ZFS_EXIT(zfsvfs); 335168404Spjd if (error) 336168404Spjd return (error); 337277300Ssmh#ifdef illumos 338168962Spjd if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 339249195Smm return (SET_ERROR(EFAULT)); 340233918Savg#else 341233918Savg *(offset_t *)data = off; 342233918Savg#endif 343168404Spjd return (0); 344168404Spjd } 345287103Savg#ifdef illumos 346287103Savg case _FIO_COUNT_FILLED: 347287103Savg { 348287103Savg /* 349287103Savg * _FIO_COUNT_FILLED adds a new ioctl command which 350287103Savg * exposes the number of filled blocks in a 351287103Savg * ZFS object. 352287103Savg */ 353287103Savg zp = VTOZ(vp); 354287103Savg zfsvfs = zp->z_zfsvfs; 355287103Savg ZFS_ENTER(zfsvfs); 356287103Savg ZFS_VERIFY_ZP(zp); 357287103Savg 358287103Savg /* 359287103Savg * Wait for all dirty blocks for this object 360287103Savg * to get synced out to disk, and the DMU info 361287103Savg * updated. 362287103Savg */ 363287103Savg error = dmu_object_wait_synced(zfsvfs->z_os, zp->z_id); 364287103Savg if (error) { 365287103Savg ZFS_EXIT(zfsvfs); 366287103Savg return (error); 367287103Savg } 368287103Savg 369287103Savg /* 370287103Savg * Retrieve fill count from DMU object. 371287103Savg */ 372287103Savg error = dmu_object_info(zfsvfs->z_os, zp->z_id, &doi); 373287103Savg if (error) { 374287103Savg ZFS_EXIT(zfsvfs); 375287103Savg return (error); 376287103Savg } 377287103Savg 378287103Savg ndata = doi.doi_fill_count; 379287103Savg 380287103Savg ZFS_EXIT(zfsvfs); 381287103Savg if (ddi_copyout(&ndata, (void *)data, sizeof (ndata), flag)) 382287103Savg return (SET_ERROR(EFAULT)); 383287103Savg return (0); 384287103Savg } 385287103Savg#endif 386287103Savg } 387249195Smm return (SET_ERROR(ENOTTY)); 388168404Spjd} 389168404Spjd 390209962Smmstatic vm_page_t 391253953Sattiliopage_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 392209962Smm{ 393209962Smm vm_object_t obj; 394209962Smm vm_page_t pp; 395258353Savg int64_t end; 396209962Smm 397258353Savg /* 398258353Savg * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE 399258353Savg * aligned boundaries, if the range is not aligned. As a result a 400258353Savg * DEV_BSIZE subrange with partially dirty data may get marked as clean. 401258353Savg * It may happen that all DEV_BSIZE subranges are marked clean and thus 402258353Savg * the whole page would be considred clean despite have some dirty data. 403258353Savg * For this reason we should shrink the range to DEV_BSIZE aligned 404258353Savg * boundaries before calling vm_page_clear_dirty. 405258353Savg */ 406258353Savg end = rounddown2(off + nbytes, DEV_BSIZE); 407258353Savg off = roundup2(off, DEV_BSIZE); 408258353Savg nbytes = end - off; 409258353Savg 410209962Smm obj = vp->v_object; 411248084Sattilio zfs_vmobject_assert_wlocked(obj); 412209962Smm 413209962Smm for (;;) { 414209962Smm if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 415246293Savg pp->valid) { 416254138Sattilio if (vm_page_xbusied(pp)) { 417212652Savg /* 418212652Savg * Reference the page before unlocking and 419212652Savg * sleeping so that the page daemon is less 420212652Savg * likely to reclaim it. 421212652Savg */ 422225418Skib vm_page_reference(pp); 423254138Sattilio vm_page_lock(pp); 424254138Sattilio zfs_vmobject_wunlock(obj); 425307671Skib vm_page_busy_sleep(pp, "zfsmwb", true); 426254138Sattilio zfs_vmobject_wlock(obj); 427209962Smm continue; 428212652Savg } 429254138Sattilio vm_page_sbusy(pp); 430319091Savg } else if (pp != NULL) { 431319091Savg ASSERT(!pp->valid); 432252337Sgavin pp = NULL; 433209962Smm } 434246293Savg 435246293Savg if (pp != NULL) { 436246293Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 437253953Sattilio vm_object_pip_add(obj, 1); 438246293Savg pmap_remove_write(pp); 439258353Savg if (nbytes != 0) 440258353Savg vm_page_clear_dirty(pp, off, nbytes); 441246293Savg } 442209962Smm break; 443209962Smm } 444209962Smm return (pp); 445209962Smm} 446209962Smm 447209962Smmstatic void 448253953Sattiliopage_unbusy(vm_page_t pp) 449209962Smm{ 450209962Smm 451254138Sattilio vm_page_sunbusy(pp); 452253953Sattilio vm_object_pip_subtract(pp->object, 1); 453209962Smm} 454209962Smm 455253953Sattiliostatic vm_page_t 456253953Sattiliopage_hold(vnode_t *vp, int64_t start) 457253953Sattilio{ 458253953Sattilio vm_object_t obj; 459253953Sattilio vm_page_t pp; 460253953Sattilio 461253953Sattilio obj = vp->v_object; 462253953Sattilio zfs_vmobject_assert_wlocked(obj); 463253953Sattilio 464253953Sattilio for (;;) { 465253953Sattilio if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 466253953Sattilio pp->valid) { 467254138Sattilio if (vm_page_xbusied(pp)) { 468253953Sattilio /* 469253953Sattilio * Reference the page before unlocking and 470253953Sattilio * sleeping so that the page daemon is less 471253953Sattilio * likely to reclaim it. 472253953Sattilio */ 473253953Sattilio vm_page_reference(pp); 474254138Sattilio vm_page_lock(pp); 475254138Sattilio zfs_vmobject_wunlock(obj); 476307671Skib vm_page_busy_sleep(pp, "zfsmwb", true); 477254138Sattilio zfs_vmobject_wlock(obj); 478253953Sattilio continue; 479253953Sattilio } 480253953Sattilio 481253953Sattilio ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 482253953Sattilio vm_page_lock(pp); 483253953Sattilio vm_page_hold(pp); 484253953Sattilio vm_page_unlock(pp); 485253953Sattilio 486253953Sattilio } else 487253953Sattilio pp = NULL; 488253953Sattilio break; 489253953Sattilio } 490253953Sattilio return (pp); 491253953Sattilio} 492253953Sattilio 493253953Sattiliostatic void 494253953Sattiliopage_unhold(vm_page_t pp) 495253953Sattilio{ 496253953Sattilio 497253953Sattilio vm_page_lock(pp); 498253953Sattilio vm_page_unhold(pp); 499253953Sattilio vm_page_unlock(pp); 500253953Sattilio} 501253953Sattilio 502168404Spjd/* 503168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 504168404Spjd * between the DMU cache and the memory mapped pages. What this means: 505168404Spjd * 506168404Spjd * On Write: If we find a memory mapped page, we write to *both* 507168404Spjd * the page and the dmu buffer. 508168404Spjd */ 509209962Smmstatic void 510209962Smmupdate_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 511209962Smm int segflg, dmu_tx_t *tx) 512168404Spjd{ 513168404Spjd vm_object_t obj; 514168404Spjd struct sf_buf *sf; 515246293Savg caddr_t va; 516212655Savg int off; 517168404Spjd 518258746Savg ASSERT(segflg != UIO_NOCOPY); 519168404Spjd ASSERT(vp->v_mount != NULL); 520168404Spjd obj = vp->v_object; 521168404Spjd ASSERT(obj != NULL); 522168404Spjd 523168404Spjd off = start & PAGEOFFSET; 524248084Sattilio zfs_vmobject_wlock(obj); 525168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 526209962Smm vm_page_t pp; 527246293Savg int nbytes = imin(PAGESIZE - off, len); 528168404Spjd 529258746Savg if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 530248084Sattilio zfs_vmobject_wunlock(obj); 531168404Spjd 532246293Savg va = zfs_map_page(pp, &sf); 533246293Savg (void) dmu_read(os, oid, start+off, nbytes, 534246293Savg va+off, DMU_READ_PREFETCH);; 535209962Smm zfs_unmap_page(sf); 536246293Savg 537248084Sattilio zfs_vmobject_wlock(obj); 538253953Sattilio page_unbusy(pp); 539168404Spjd } 540209962Smm len -= nbytes; 541168404Spjd off = 0; 542168404Spjd } 543258746Savg vm_object_pip_wakeupn(obj, 0); 544248084Sattilio zfs_vmobject_wunlock(obj); 545168404Spjd} 546168404Spjd 547168404Spjd/* 548219089Spjd * Read with UIO_NOCOPY flag means that sendfile(2) requests 549219089Spjd * ZFS to populate a range of page cache pages with data. 550219089Spjd * 551219089Spjd * NOTE: this function could be optimized to pre-allocate 552254138Sattilio * all pages in advance, drain exclusive busy on all of them, 553219089Spjd * map them into contiguous KVA region and populate them 554219089Spjd * in one single dmu_read() call. 555219089Spjd */ 556219089Spjdstatic int 557219089Spjdmappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) 558219089Spjd{ 559219089Spjd znode_t *zp = VTOZ(vp); 560219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 561219089Spjd struct sf_buf *sf; 562219089Spjd vm_object_t obj; 563219089Spjd vm_page_t pp; 564219089Spjd int64_t start; 565219089Spjd caddr_t va; 566219089Spjd int len = nbytes; 567219089Spjd int off; 568219089Spjd int error = 0; 569219089Spjd 570219089Spjd ASSERT(uio->uio_segflg == UIO_NOCOPY); 571219089Spjd ASSERT(vp->v_mount != NULL); 572219089Spjd obj = vp->v_object; 573219089Spjd ASSERT(obj != NULL); 574219089Spjd ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); 575219089Spjd 576248084Sattilio zfs_vmobject_wlock(obj); 577219089Spjd for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { 578219089Spjd int bytes = MIN(PAGESIZE, len); 579219089Spjd 580254138Sattilio pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY | 581254649Skib VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 582219089Spjd if (pp->valid == 0) { 583248084Sattilio zfs_vmobject_wunlock(obj); 584219089Spjd va = zfs_map_page(pp, &sf); 585219089Spjd error = dmu_read(os, zp->z_id, start, bytes, va, 586219089Spjd DMU_READ_PREFETCH); 587219089Spjd if (bytes != PAGESIZE && error == 0) 588219089Spjd bzero(va + bytes, PAGESIZE - bytes); 589219089Spjd zfs_unmap_page(sf); 590248084Sattilio zfs_vmobject_wlock(obj); 591254138Sattilio vm_page_sunbusy(pp); 592219089Spjd vm_page_lock(pp); 593219089Spjd if (error) { 594253073Savg if (pp->wire_count == 0 && pp->valid == 0 && 595254138Sattilio !vm_page_busied(pp)) 596253073Savg vm_page_free(pp); 597219089Spjd } else { 598219089Spjd pp->valid = VM_PAGE_BITS_ALL; 599219089Spjd vm_page_activate(pp); 600219089Spjd } 601219089Spjd vm_page_unlock(pp); 602258739Savg } else { 603258739Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 604254138Sattilio vm_page_sunbusy(pp); 605258739Savg } 606219089Spjd if (error) 607219089Spjd break; 608219089Spjd uio->uio_resid -= bytes; 609219089Spjd uio->uio_offset += bytes; 610219089Spjd len -= bytes; 611219089Spjd } 612248084Sattilio zfs_vmobject_wunlock(obj); 613219089Spjd return (error); 614219089Spjd} 615219089Spjd 616219089Spjd/* 617168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 618168404Spjd * between the DMU cache and the memory mapped pages. What this means: 619168404Spjd * 620168404Spjd * On Read: We "read" preferentially from memory mapped pages, 621168404Spjd * else we default from the dmu buffer. 622168404Spjd * 623168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 624251631Sdelphij * the file is memory mapped. 625168404Spjd */ 626168404Spjdstatic int 627168404Spjdmappedread(vnode_t *vp, int nbytes, uio_t *uio) 628168404Spjd{ 629168404Spjd znode_t *zp = VTOZ(vp); 630168404Spjd vm_object_t obj; 631212655Savg int64_t start; 632168926Spjd caddr_t va; 633168404Spjd int len = nbytes; 634212655Savg int off; 635168404Spjd int error = 0; 636168404Spjd 637168404Spjd ASSERT(vp->v_mount != NULL); 638168404Spjd obj = vp->v_object; 639168404Spjd ASSERT(obj != NULL); 640168404Spjd 641168404Spjd start = uio->uio_loffset; 642168404Spjd off = start & PAGEOFFSET; 643248084Sattilio zfs_vmobject_wlock(obj); 644168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 645219089Spjd vm_page_t pp; 646219089Spjd uint64_t bytes = MIN(PAGESIZE - off, len); 647168404Spjd 648253953Sattilio if (pp = page_hold(vp, start)) { 649219089Spjd struct sf_buf *sf; 650219089Spjd caddr_t va; 651212652Savg 652248084Sattilio zfs_vmobject_wunlock(obj); 653219089Spjd va = zfs_map_page(pp, &sf); 654298105Savg#ifdef illumos 655219089Spjd error = uiomove(va + off, bytes, UIO_READ, uio); 656298105Savg#else 657298105Savg error = vn_io_fault_uiomove(va + off, bytes, uio); 658298105Savg#endif 659219089Spjd zfs_unmap_page(sf); 660248084Sattilio zfs_vmobject_wlock(obj); 661253953Sattilio page_unhold(pp); 662219089Spjd } else { 663248084Sattilio zfs_vmobject_wunlock(obj); 664272809Sdelphij error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 665272809Sdelphij uio, bytes); 666248084Sattilio zfs_vmobject_wlock(obj); 667168404Spjd } 668168404Spjd len -= bytes; 669168404Spjd off = 0; 670168404Spjd if (error) 671168404Spjd break; 672168404Spjd } 673248084Sattilio zfs_vmobject_wunlock(obj); 674168404Spjd return (error); 675168404Spjd} 676168404Spjd 677168404Spjdoffset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 678168404Spjd 679168404Spjd/* 680168404Spjd * Read bytes from specified file into supplied buffer. 681168404Spjd * 682168404Spjd * IN: vp - vnode of file to be read from. 683168404Spjd * uio - structure supplying read location, range info, 684168404Spjd * and return buffer. 685168404Spjd * ioflag - SYNC flags; used to provide FRSYNC semantics. 686168404Spjd * cr - credentials of caller. 687185029Spjd * ct - caller context 688168404Spjd * 689168404Spjd * OUT: uio - updated offset and range, buffer filled. 690168404Spjd * 691251631Sdelphij * RETURN: 0 on success, error code on failure. 692168404Spjd * 693168404Spjd * Side Effects: 694168404Spjd * vp - atime updated if byte count > 0 695168404Spjd */ 696168404Spjd/* ARGSUSED */ 697168404Spjdstatic int 698168962Spjdzfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 699168404Spjd{ 700168404Spjd znode_t *zp = VTOZ(vp); 701168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 702168404Spjd ssize_t n, nbytes; 703247187Smm int error = 0; 704168404Spjd rl_t *rl; 705219089Spjd xuio_t *xuio = NULL; 706168404Spjd 707168404Spjd ZFS_ENTER(zfsvfs); 708185029Spjd ZFS_VERIFY_ZP(zp); 709168404Spjd 710219089Spjd if (zp->z_pflags & ZFS_AV_QUARANTINED) { 711185029Spjd ZFS_EXIT(zfsvfs); 712249195Smm return (SET_ERROR(EACCES)); 713185029Spjd } 714185029Spjd 715168404Spjd /* 716168404Spjd * Validate file offset 717168404Spjd */ 718168404Spjd if (uio->uio_loffset < (offset_t)0) { 719168404Spjd ZFS_EXIT(zfsvfs); 720249195Smm return (SET_ERROR(EINVAL)); 721168404Spjd } 722168404Spjd 723168404Spjd /* 724168404Spjd * Fasttrack empty reads 725168404Spjd */ 726168404Spjd if (uio->uio_resid == 0) { 727168404Spjd ZFS_EXIT(zfsvfs); 728168404Spjd return (0); 729168404Spjd } 730168404Spjd 731168404Spjd /* 732168962Spjd * Check for mandatory locks 733168962Spjd */ 734219089Spjd if (MANDMODE(zp->z_mode)) { 735168962Spjd if (error = chklock(vp, FREAD, 736168962Spjd uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 737168962Spjd ZFS_EXIT(zfsvfs); 738168962Spjd return (error); 739168962Spjd } 740168962Spjd } 741168962Spjd 742168962Spjd /* 743168404Spjd * If we're in FRSYNC mode, sync out this znode before reading it. 744168404Spjd */ 745224605Smm if (zfsvfs->z_log && 746224605Smm (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) 747219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 748168404Spjd 749168404Spjd /* 750168404Spjd * Lock the range against changes. 751168404Spjd */ 752168404Spjd rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 753168404Spjd 754168404Spjd /* 755168404Spjd * If we are reading past end-of-file we can skip 756168404Spjd * to the end; but we might still need to set atime. 757168404Spjd */ 758219089Spjd if (uio->uio_loffset >= zp->z_size) { 759168404Spjd error = 0; 760168404Spjd goto out; 761168404Spjd } 762168404Spjd 763219089Spjd ASSERT(uio->uio_loffset < zp->z_size); 764219089Spjd n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 765168404Spjd 766277300Ssmh#ifdef illumos 767219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 768219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 769219089Spjd int nblk; 770219089Spjd int blksz = zp->z_blksz; 771219089Spjd uint64_t offset = uio->uio_loffset; 772219089Spjd 773219089Spjd xuio = (xuio_t *)uio; 774219089Spjd if ((ISP2(blksz))) { 775219089Spjd nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 776219089Spjd blksz)) / blksz; 777219089Spjd } else { 778219089Spjd ASSERT(offset + n <= blksz); 779219089Spjd nblk = 1; 780219089Spjd } 781219089Spjd (void) dmu_xuio_init(xuio, nblk); 782219089Spjd 783219089Spjd if (vn_has_cached_data(vp)) { 784219089Spjd /* 785219089Spjd * For simplicity, we always allocate a full buffer 786219089Spjd * even if we only expect to read a portion of a block. 787219089Spjd */ 788219089Spjd while (--nblk >= 0) { 789219089Spjd (void) dmu_xuio_add(xuio, 790219089Spjd dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 791219089Spjd blksz), 0, blksz); 792219089Spjd } 793219089Spjd } 794219089Spjd } 795277300Ssmh#endif /* illumos */ 796219089Spjd 797168404Spjd while (n > 0) { 798168404Spjd nbytes = MIN(n, zfs_read_chunk_size - 799168404Spjd P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 800168404Spjd 801219089Spjd#ifdef __FreeBSD__ 802219089Spjd if (uio->uio_segflg == UIO_NOCOPY) 803219089Spjd error = mappedread_sf(vp, nbytes, uio); 804219089Spjd else 805219089Spjd#endif /* __FreeBSD__ */ 806272809Sdelphij if (vn_has_cached_data(vp)) { 807168404Spjd error = mappedread(vp, nbytes, uio); 808272809Sdelphij } else { 809272809Sdelphij error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 810272809Sdelphij uio, nbytes); 811272809Sdelphij } 812185029Spjd if (error) { 813185029Spjd /* convert checksum errors into IO errors */ 814185029Spjd if (error == ECKSUM) 815249195Smm error = SET_ERROR(EIO); 816168404Spjd break; 817185029Spjd } 818168962Spjd 819168404Spjd n -= nbytes; 820168404Spjd } 821168404Spjdout: 822168404Spjd zfs_range_unlock(rl); 823168404Spjd 824168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 825168404Spjd ZFS_EXIT(zfsvfs); 826168404Spjd return (error); 827168404Spjd} 828168404Spjd 829168404Spjd/* 830168404Spjd * Write the bytes to a file. 831168404Spjd * 832168404Spjd * IN: vp - vnode of file to be written to. 833168404Spjd * uio - structure supplying write location, range info, 834168404Spjd * and data buffer. 835251631Sdelphij * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 836251631Sdelphij * set if in append mode. 837168404Spjd * cr - credentials of caller. 838185029Spjd * ct - caller context (NFS/CIFS fem monitor only) 839168404Spjd * 840168404Spjd * OUT: uio - updated offset and range. 841168404Spjd * 842251631Sdelphij * RETURN: 0 on success, error code on failure. 843168404Spjd * 844168404Spjd * Timestamps: 845168404Spjd * vp - ctime|mtime updated if byte count > 0 846168404Spjd */ 847219089Spjd 848168404Spjd/* ARGSUSED */ 849168404Spjdstatic int 850168962Spjdzfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 851168404Spjd{ 852168404Spjd znode_t *zp = VTOZ(vp); 853168962Spjd rlim64_t limit = MAXOFFSET_T; 854168404Spjd ssize_t start_resid = uio->uio_resid; 855168404Spjd ssize_t tx_bytes; 856168404Spjd uint64_t end_size; 857168404Spjd dmu_tx_t *tx; 858168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 859185029Spjd zilog_t *zilog; 860168404Spjd offset_t woff; 861168404Spjd ssize_t n, nbytes; 862168404Spjd rl_t *rl; 863168404Spjd int max_blksz = zfsvfs->z_max_blksz; 864247187Smm int error = 0; 865209962Smm arc_buf_t *abuf; 866247187Smm iovec_t *aiov = NULL; 867219089Spjd xuio_t *xuio = NULL; 868219089Spjd int i_iov = 0; 869219089Spjd int iovcnt = uio->uio_iovcnt; 870219089Spjd iovec_t *iovp = uio->uio_iov; 871219089Spjd int write_eof; 872219089Spjd int count = 0; 873219089Spjd sa_bulk_attr_t bulk[4]; 874219089Spjd uint64_t mtime[2], ctime[2]; 875168404Spjd 876168404Spjd /* 877168404Spjd * Fasttrack empty write 878168404Spjd */ 879168404Spjd n = start_resid; 880168404Spjd if (n == 0) 881168404Spjd return (0); 882168404Spjd 883168962Spjd if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 884168962Spjd limit = MAXOFFSET_T; 885168962Spjd 886168404Spjd ZFS_ENTER(zfsvfs); 887185029Spjd ZFS_VERIFY_ZP(zp); 888168404Spjd 889219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 890219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 891219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 892219089Spjd &zp->z_size, 8); 893219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 894219089Spjd &zp->z_pflags, 8); 895219089Spjd 896168404Spjd /* 897262990Sdelphij * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 898262990Sdelphij * callers might not be able to detect properly that we are read-only, 899262990Sdelphij * so check it explicitly here. 900262990Sdelphij */ 901262990Sdelphij if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 902262990Sdelphij ZFS_EXIT(zfsvfs); 903262990Sdelphij return (SET_ERROR(EROFS)); 904262990Sdelphij } 905262990Sdelphij 906262990Sdelphij /* 907185029Spjd * If immutable or not appending then return EPERM 908185029Spjd */ 909219089Spjd if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 910219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 911219089Spjd (uio->uio_loffset < zp->z_size))) { 912185029Spjd ZFS_EXIT(zfsvfs); 913249195Smm return (SET_ERROR(EPERM)); 914185029Spjd } 915185029Spjd 916185029Spjd zilog = zfsvfs->z_log; 917185029Spjd 918185029Spjd /* 919219089Spjd * Validate file offset 920219089Spjd */ 921219089Spjd woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 922219089Spjd if (woff < 0) { 923219089Spjd ZFS_EXIT(zfsvfs); 924249195Smm return (SET_ERROR(EINVAL)); 925219089Spjd } 926219089Spjd 927219089Spjd /* 928219089Spjd * Check for mandatory locks before calling zfs_range_lock() 929219089Spjd * in order to prevent a deadlock with locks set via fcntl(). 930219089Spjd */ 931219089Spjd if (MANDMODE((mode_t)zp->z_mode) && 932219089Spjd (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 933219089Spjd ZFS_EXIT(zfsvfs); 934219089Spjd return (error); 935219089Spjd } 936219089Spjd 937277300Ssmh#ifdef illumos 938219089Spjd /* 939168404Spjd * Pre-fault the pages to ensure slow (eg NFS) pages 940168404Spjd * don't hold up txg. 941219089Spjd * Skip this if uio contains loaned arc_buf. 942168404Spjd */ 943219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 944219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 945219089Spjd xuio = (xuio_t *)uio; 946219089Spjd else 947219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 948277300Ssmh#endif 949168404Spjd 950168404Spjd /* 951168404Spjd * If in append mode, set the io offset pointer to eof. 952168404Spjd */ 953213673Spjd if (ioflag & FAPPEND) { 954168404Spjd /* 955219089Spjd * Obtain an appending range lock to guarantee file append 956219089Spjd * semantics. We reset the write offset once we have the lock. 957168404Spjd */ 958168404Spjd rl = zfs_range_lock(zp, 0, n, RL_APPEND); 959219089Spjd woff = rl->r_off; 960168404Spjd if (rl->r_len == UINT64_MAX) { 961219089Spjd /* 962219089Spjd * We overlocked the file because this write will cause 963219089Spjd * the file block size to increase. 964219089Spjd * Note that zp_size cannot change with this lock held. 965219089Spjd */ 966219089Spjd woff = zp->z_size; 967168404Spjd } 968219089Spjd uio->uio_loffset = woff; 969168404Spjd } else { 970168404Spjd /* 971219089Spjd * Note that if the file block size will change as a result of 972219089Spjd * this write, then this range lock will lock the entire file 973219089Spjd * so that we can re-write the block safely. 974168404Spjd */ 975168404Spjd rl = zfs_range_lock(zp, woff, n, RL_WRITER); 976168404Spjd } 977168404Spjd 978235781Strasz if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { 979235781Strasz zfs_range_unlock(rl); 980235781Strasz ZFS_EXIT(zfsvfs); 981235781Strasz return (EFBIG); 982235781Strasz } 983235781Strasz 984168962Spjd if (woff >= limit) { 985168962Spjd zfs_range_unlock(rl); 986168962Spjd ZFS_EXIT(zfsvfs); 987249195Smm return (SET_ERROR(EFBIG)); 988168962Spjd } 989168962Spjd 990168962Spjd if ((woff + n) > limit || woff > (limit - n)) 991168962Spjd n = limit - woff; 992168962Spjd 993219089Spjd /* Will this write extend the file length? */ 994219089Spjd write_eof = (woff + n > zp->z_size); 995168404Spjd 996219089Spjd end_size = MAX(zp->z_size, woff + n); 997219089Spjd 998168404Spjd /* 999168404Spjd * Write the file in reasonable size chunks. Each chunk is written 1000168404Spjd * in a separate transaction; this keeps the intent log records small 1001168404Spjd * and allows us to do more fine-grained space accounting. 1002168404Spjd */ 1003168404Spjd while (n > 0) { 1004209962Smm abuf = NULL; 1005209962Smm woff = uio->uio_loffset; 1006219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 1007219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 1008209962Smm if (abuf != NULL) 1009209962Smm dmu_return_arcbuf(abuf); 1010249195Smm error = SET_ERROR(EDQUOT); 1011209962Smm break; 1012209962Smm } 1013209962Smm 1014219089Spjd if (xuio && abuf == NULL) { 1015219089Spjd ASSERT(i_iov < iovcnt); 1016219089Spjd aiov = &iovp[i_iov]; 1017219089Spjd abuf = dmu_xuio_arcbuf(xuio, i_iov); 1018219089Spjd dmu_xuio_clear(xuio, i_iov); 1019219089Spjd DTRACE_PROBE3(zfs_cp_write, int, i_iov, 1020219089Spjd iovec_t *, aiov, arc_buf_t *, abuf); 1021219089Spjd ASSERT((aiov->iov_base == abuf->b_data) || 1022219089Spjd ((char *)aiov->iov_base - (char *)abuf->b_data + 1023219089Spjd aiov->iov_len == arc_buf_size(abuf))); 1024219089Spjd i_iov++; 1025219089Spjd } else if (abuf == NULL && n >= max_blksz && 1026219089Spjd woff >= zp->z_size && 1027209962Smm P2PHASE(woff, max_blksz) == 0 && 1028209962Smm zp->z_blksz == max_blksz) { 1029219089Spjd /* 1030219089Spjd * This write covers a full block. "Borrow" a buffer 1031219089Spjd * from the dmu so that we can fill it before we enter 1032219089Spjd * a transaction. This avoids the possibility of 1033219089Spjd * holding up the transaction if the data copy hangs 1034219089Spjd * up on a pagefault (e.g., from an NFS server mapping). 1035219089Spjd */ 1036298105Savg#ifdef illumos 1037209962Smm size_t cbytes; 1038298105Savg#endif 1039209962Smm 1040219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 1041219089Spjd max_blksz); 1042209962Smm ASSERT(abuf != NULL); 1043209962Smm ASSERT(arc_buf_size(abuf) == max_blksz); 1044298105Savg#ifdef illumos 1045209962Smm if (error = uiocopy(abuf->b_data, max_blksz, 1046209962Smm UIO_WRITE, uio, &cbytes)) { 1047209962Smm dmu_return_arcbuf(abuf); 1048209962Smm break; 1049209962Smm } 1050209962Smm ASSERT(cbytes == max_blksz); 1051298105Savg#else 1052298105Savg ssize_t resid = uio->uio_resid; 1053298105Savg error = vn_io_fault_uiomove(abuf->b_data, max_blksz, uio); 1054298105Savg if (error != 0) { 1055298105Savg uio->uio_offset -= resid - uio->uio_resid; 1056298105Savg uio->uio_resid = resid; 1057298105Savg dmu_return_arcbuf(abuf); 1058298105Savg break; 1059298105Savg } 1060298105Savg#endif 1061209962Smm } 1062209962Smm 1063209962Smm /* 1064168404Spjd * Start a transaction. 1065168404Spjd */ 1066168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1067219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1068168404Spjd dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 1069219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1070258720Savg error = dmu_tx_assign(tx, TXG_WAIT); 1071168404Spjd if (error) { 1072168404Spjd dmu_tx_abort(tx); 1073209962Smm if (abuf != NULL) 1074209962Smm dmu_return_arcbuf(abuf); 1075168404Spjd break; 1076168404Spjd } 1077168404Spjd 1078168404Spjd /* 1079168404Spjd * If zfs_range_lock() over-locked we grow the blocksize 1080168404Spjd * and then reduce the lock range. This will only happen 1081168404Spjd * on the first iteration since zfs_range_reduce() will 1082168404Spjd * shrink down r_len to the appropriate size. 1083168404Spjd */ 1084168404Spjd if (rl->r_len == UINT64_MAX) { 1085168404Spjd uint64_t new_blksz; 1086168404Spjd 1087168404Spjd if (zp->z_blksz > max_blksz) { 1088274337Sdelphij /* 1089274337Sdelphij * File's blocksize is already larger than the 1090274337Sdelphij * "recordsize" property. Only let it grow to 1091274337Sdelphij * the next power of 2. 1092274337Sdelphij */ 1093168404Spjd ASSERT(!ISP2(zp->z_blksz)); 1094274337Sdelphij new_blksz = MIN(end_size, 1095274337Sdelphij 1 << highbit64(zp->z_blksz)); 1096168404Spjd } else { 1097168404Spjd new_blksz = MIN(end_size, max_blksz); 1098168404Spjd } 1099168404Spjd zfs_grow_blocksize(zp, new_blksz, tx); 1100168404Spjd zfs_range_reduce(rl, woff, n); 1101168404Spjd } 1102168404Spjd 1103168404Spjd /* 1104168404Spjd * XXX - should we really limit each write to z_max_blksz? 1105168404Spjd * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 1106168404Spjd */ 1107168404Spjd nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 1108168404Spjd 1109219089Spjd if (woff + nbytes > zp->z_size) 1110168404Spjd vnode_pager_setsize(vp, woff + nbytes); 1111168404Spjd 1112209962Smm if (abuf == NULL) { 1113209962Smm tx_bytes = uio->uio_resid; 1114219089Spjd error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 1115219089Spjd uio, nbytes, tx); 1116209962Smm tx_bytes -= uio->uio_resid; 1117168404Spjd } else { 1118209962Smm tx_bytes = nbytes; 1119219089Spjd ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 1120219089Spjd /* 1121219089Spjd * If this is not a full block write, but we are 1122219089Spjd * extending the file past EOF and this data starts 1123219089Spjd * block-aligned, use assign_arcbuf(). Otherwise, 1124219089Spjd * write via dmu_write(). 1125219089Spjd */ 1126219089Spjd if (tx_bytes < max_blksz && (!write_eof || 1127219089Spjd aiov->iov_base != abuf->b_data)) { 1128219089Spjd ASSERT(xuio); 1129219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, woff, 1130219089Spjd aiov->iov_len, aiov->iov_base, tx); 1131219089Spjd dmu_return_arcbuf(abuf); 1132219089Spjd xuio_stat_wbuf_copied(); 1133219089Spjd } else { 1134219089Spjd ASSERT(xuio || tx_bytes == max_blksz); 1135219089Spjd dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 1136219089Spjd woff, abuf, tx); 1137219089Spjd } 1138298105Savg#ifdef illumos 1139209962Smm ASSERT(tx_bytes <= uio->uio_resid); 1140209962Smm uioskip(uio, tx_bytes); 1141298105Savg#endif 1142168404Spjd } 1143212657Savg if (tx_bytes && vn_has_cached_data(vp)) { 1144209962Smm update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 1145209962Smm zp->z_id, uio->uio_segflg, tx); 1146209962Smm } 1147209962Smm 1148209962Smm /* 1149168404Spjd * If we made no progress, we're done. If we made even 1150168404Spjd * partial progress, update the znode and ZIL accordingly. 1151168404Spjd */ 1152168404Spjd if (tx_bytes == 0) { 1153219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 1154219089Spjd (void *)&zp->z_size, sizeof (uint64_t), tx); 1155168404Spjd dmu_tx_commit(tx); 1156168404Spjd ASSERT(error != 0); 1157168404Spjd break; 1158168404Spjd } 1159168404Spjd 1160168404Spjd /* 1161168404Spjd * Clear Set-UID/Set-GID bits on successful write if not 1162168404Spjd * privileged and at least one of the excute bits is set. 1163168404Spjd * 1164168404Spjd * It would be nice to to this after all writes have 1165168404Spjd * been done, but that would still expose the ISUID/ISGID 1166168404Spjd * to another app after the partial write is committed. 1167185029Spjd * 1168185029Spjd * Note: we don't call zfs_fuid_map_id() here because 1169185029Spjd * user 0 is not an ephemeral uid. 1170168404Spjd */ 1171168404Spjd mutex_enter(&zp->z_acl_lock); 1172219089Spjd if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 1173168404Spjd (S_IXUSR >> 6))) != 0 && 1174219089Spjd (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 1175185029Spjd secpolicy_vnode_setid_retain(vp, cr, 1176219089Spjd (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 1177219089Spjd uint64_t newmode; 1178219089Spjd zp->z_mode &= ~(S_ISUID | S_ISGID); 1179219089Spjd newmode = zp->z_mode; 1180219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 1181219089Spjd (void *)&newmode, sizeof (uint64_t), tx); 1182168404Spjd } 1183168404Spjd mutex_exit(&zp->z_acl_lock); 1184168404Spjd 1185219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 1186219089Spjd B_TRUE); 1187168404Spjd 1188168404Spjd /* 1189168404Spjd * Update the file size (zp_size) if it has changed; 1190168404Spjd * account for possible concurrent updates. 1191168404Spjd */ 1192219089Spjd while ((end_size = zp->z_size) < uio->uio_loffset) { 1193219089Spjd (void) atomic_cas_64(&zp->z_size, end_size, 1194168404Spjd uio->uio_loffset); 1195298105Savg#ifdef illumos 1196219089Spjd ASSERT(error == 0); 1197298105Savg#else 1198298105Savg ASSERT(error == 0 || error == EFAULT); 1199298105Savg#endif 1200219089Spjd } 1201219089Spjd /* 1202219089Spjd * If we are replaying and eof is non zero then force 1203219089Spjd * the file size to the specified eof. Note, there's no 1204219089Spjd * concurrency during replay. 1205219089Spjd */ 1206219089Spjd if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 1207219089Spjd zp->z_size = zfsvfs->z_replay_eof; 1208219089Spjd 1209298105Savg if (error == 0) 1210298105Savg error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1211298105Savg else 1212298105Savg (void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1213219089Spjd 1214168404Spjd zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 1215168404Spjd dmu_tx_commit(tx); 1216168404Spjd 1217168404Spjd if (error != 0) 1218168404Spjd break; 1219168404Spjd ASSERT(tx_bytes == nbytes); 1220168404Spjd n -= nbytes; 1221219089Spjd 1222277300Ssmh#ifdef illumos 1223219089Spjd if (!xuio && n > 0) 1224219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 1225277300Ssmh#endif 1226168404Spjd } 1227168404Spjd 1228168404Spjd zfs_range_unlock(rl); 1229168404Spjd 1230168404Spjd /* 1231168404Spjd * If we're in replay mode, or we made no progress, return error. 1232168404Spjd * Otherwise, it's at least a partial write, so it's successful. 1233168404Spjd */ 1234209962Smm if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1235168404Spjd ZFS_EXIT(zfsvfs); 1236168404Spjd return (error); 1237168404Spjd } 1238168404Spjd 1239298105Savg#ifdef __FreeBSD__ 1240298105Savg /* 1241298105Savg * EFAULT means that at least one page of the source buffer was not 1242298105Savg * available. VFS will re-try remaining I/O upon this error. 1243298105Savg */ 1244298105Savg if (error == EFAULT) { 1245298105Savg ZFS_EXIT(zfsvfs); 1246298105Savg return (error); 1247298105Savg } 1248298105Savg#endif 1249298105Savg 1250219089Spjd if (ioflag & (FSYNC | FDSYNC) || 1251219089Spjd zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1252219089Spjd zil_commit(zilog, zp->z_id); 1253168404Spjd 1254168404Spjd ZFS_EXIT(zfsvfs); 1255168404Spjd return (0); 1256168404Spjd} 1257168404Spjd 1258168404Spjdvoid 1259219089Spjdzfs_get_done(zgd_t *zgd, int error) 1260168404Spjd{ 1261219089Spjd znode_t *zp = zgd->zgd_private; 1262219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 1263168404Spjd 1264219089Spjd if (zgd->zgd_db) 1265219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1266219089Spjd 1267219089Spjd zfs_range_unlock(zgd->zgd_rl); 1268219089Spjd 1269191900Skmacy /* 1270191900Skmacy * Release the vnode asynchronously as we currently have the 1271191900Skmacy * txg stopped from syncing. 1272191900Skmacy */ 1273219089Spjd VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1274219089Spjd 1275219089Spjd if (error == 0 && zgd->zgd_bp) 1276219089Spjd zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1277219089Spjd 1278168404Spjd kmem_free(zgd, sizeof (zgd_t)); 1279168404Spjd} 1280168404Spjd 1281214378Smm#ifdef DEBUG 1282214378Smmstatic int zil_fault_io = 0; 1283214378Smm#endif 1284214378Smm 1285168404Spjd/* 1286168404Spjd * Get data to generate a TX_WRITE intent log record. 1287168404Spjd */ 1288168404Spjdint 1289168404Spjdzfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1290168404Spjd{ 1291168404Spjd zfsvfs_t *zfsvfs = arg; 1292168404Spjd objset_t *os = zfsvfs->z_os; 1293168404Spjd znode_t *zp; 1294219089Spjd uint64_t object = lr->lr_foid; 1295219089Spjd uint64_t offset = lr->lr_offset; 1296219089Spjd uint64_t size = lr->lr_length; 1297219089Spjd blkptr_t *bp = &lr->lr_blkptr; 1298168404Spjd dmu_buf_t *db; 1299168404Spjd zgd_t *zgd; 1300168404Spjd int error = 0; 1301168404Spjd 1302219089Spjd ASSERT(zio != NULL); 1303219089Spjd ASSERT(size != 0); 1304168404Spjd 1305168404Spjd /* 1306168404Spjd * Nothing to do if the file has been removed 1307168404Spjd */ 1308219089Spjd if (zfs_zget(zfsvfs, object, &zp) != 0) 1309249195Smm return (SET_ERROR(ENOENT)); 1310168404Spjd if (zp->z_unlinked) { 1311191900Skmacy /* 1312191900Skmacy * Release the vnode asynchronously as we currently have the 1313191900Skmacy * txg stopped from syncing. 1314191900Skmacy */ 1315196307Spjd VN_RELE_ASYNC(ZTOV(zp), 1316196307Spjd dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1317249195Smm return (SET_ERROR(ENOENT)); 1318168404Spjd } 1319168404Spjd 1320219089Spjd zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1321219089Spjd zgd->zgd_zilog = zfsvfs->z_log; 1322219089Spjd zgd->zgd_private = zp; 1323219089Spjd 1324168404Spjd /* 1325168404Spjd * Write records come in two flavors: immediate and indirect. 1326168404Spjd * For small writes it's cheaper to store the data with the 1327168404Spjd * log record (immediate); for large writes it's cheaper to 1328168404Spjd * sync the data and get a pointer to it (indirect) so that 1329168404Spjd * we don't have to write the data twice. 1330168404Spjd */ 1331168404Spjd if (buf != NULL) { /* immediate write */ 1332219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1333168404Spjd /* test for truncation needs to be done while range locked */ 1334219089Spjd if (offset >= zp->z_size) { 1335249195Smm error = SET_ERROR(ENOENT); 1336219089Spjd } else { 1337219089Spjd error = dmu_read(os, object, offset, size, buf, 1338219089Spjd DMU_READ_NO_PREFETCH); 1339168404Spjd } 1340219089Spjd ASSERT(error == 0 || error == ENOENT); 1341168404Spjd } else { /* indirect write */ 1342168404Spjd /* 1343168404Spjd * Have to lock the whole block to ensure when it's 1344168404Spjd * written out and it's checksum is being calculated 1345168404Spjd * that no one can change the data. We need to re-check 1346168404Spjd * blocksize after we get the lock in case it's changed! 1347168404Spjd */ 1348168404Spjd for (;;) { 1349219089Spjd uint64_t blkoff; 1350219089Spjd size = zp->z_blksz; 1351219089Spjd blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1352219089Spjd offset -= blkoff; 1353219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1354219089Spjd RL_READER); 1355219089Spjd if (zp->z_blksz == size) 1356168404Spjd break; 1357219089Spjd offset += blkoff; 1358219089Spjd zfs_range_unlock(zgd->zgd_rl); 1359168404Spjd } 1360168404Spjd /* test for truncation needs to be done while range locked */ 1361219089Spjd if (lr->lr_offset >= zp->z_size) 1362249195Smm error = SET_ERROR(ENOENT); 1363214378Smm#ifdef DEBUG 1364214378Smm if (zil_fault_io) { 1365249195Smm error = SET_ERROR(EIO); 1366214378Smm zil_fault_io = 0; 1367214378Smm } 1368214378Smm#endif 1369219089Spjd if (error == 0) 1370219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1371219089Spjd DMU_READ_NO_PREFETCH); 1372214378Smm 1373209962Smm if (error == 0) { 1374243524Smm blkptr_t *obp = dmu_buf_get_blkptr(db); 1375243524Smm if (obp) { 1376243524Smm ASSERT(BP_IS_HOLE(bp)); 1377243524Smm *bp = *obp; 1378243524Smm } 1379243524Smm 1380219089Spjd zgd->zgd_db = db; 1381219089Spjd zgd->zgd_bp = bp; 1382219089Spjd 1383219089Spjd ASSERT(db->db_offset == offset); 1384219089Spjd ASSERT(db->db_size == size); 1385219089Spjd 1386219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1387219089Spjd zfs_get_done, zgd); 1388321559Smav ASSERT(error || lr->lr_length <= size); 1389219089Spjd 1390209962Smm /* 1391219089Spjd * On success, we need to wait for the write I/O 1392219089Spjd * initiated by dmu_sync() to complete before we can 1393219089Spjd * release this dbuf. We will finish everything up 1394219089Spjd * in the zfs_get_done() callback. 1395209962Smm */ 1396219089Spjd if (error == 0) 1397219089Spjd return (0); 1398209962Smm 1399219089Spjd if (error == EALREADY) { 1400219089Spjd lr->lr_common.lrc_txtype = TX_WRITE2; 1401219089Spjd error = 0; 1402219089Spjd } 1403209962Smm } 1404168404Spjd } 1405219089Spjd 1406219089Spjd zfs_get_done(zgd, error); 1407219089Spjd 1408168404Spjd return (error); 1409168404Spjd} 1410168404Spjd 1411168404Spjd/*ARGSUSED*/ 1412168404Spjdstatic int 1413185029Spjdzfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1414185029Spjd caller_context_t *ct) 1415168404Spjd{ 1416168404Spjd znode_t *zp = VTOZ(vp); 1417168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1418168404Spjd int error; 1419168404Spjd 1420168404Spjd ZFS_ENTER(zfsvfs); 1421185029Spjd ZFS_VERIFY_ZP(zp); 1422185029Spjd 1423185029Spjd if (flag & V_ACE_MASK) 1424185029Spjd error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1425185029Spjd else 1426185029Spjd error = zfs_zaccess_rwx(zp, mode, flag, cr); 1427185029Spjd 1428168404Spjd ZFS_EXIT(zfsvfs); 1429168404Spjd return (error); 1430168404Spjd} 1431168404Spjd 1432211932Smmstatic int 1433303970Savgzfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) 1434211932Smm{ 1435303970Savg int error; 1436211932Smm 1437303970Savg *vpp = arg; 1438303970Savg error = vn_lock(*vpp, lkflags); 1439303970Savg if (error != 0) 1440303970Savg vrele(*vpp); 1441303970Savg return (error); 1442303970Savg} 1443211932Smm 1444303970Savgstatic int 1445303970Savgzfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags) 1446303970Savg{ 1447303970Savg znode_t *zdp = VTOZ(dvp); 1448303970Savg zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1449303970Savg int error; 1450303970Savg int ltype; 1451303970Savg 1452303970Savg ASSERT_VOP_LOCKED(dvp, __func__); 1453303970Savg#ifdef DIAGNOSTIC 1454307142Savg if ((zdp->z_pflags & ZFS_XATTR) == 0) 1455307142Savg VERIFY(!RRM_LOCK_HELD(&zfsvfs->z_teardown_lock)); 1456303970Savg#endif 1457303970Savg 1458303970Savg if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 1459303970Savg ASSERT3P(dvp, ==, vp); 1460303970Savg vref(dvp); 1461303970Savg ltype = lkflags & LK_TYPE_MASK; 1462303970Savg if (ltype != VOP_ISLOCKED(dvp)) { 1463303970Savg if (ltype == LK_EXCLUSIVE) 1464303970Savg vn_lock(dvp, LK_UPGRADE | LK_RETRY); 1465303970Savg else /* if (ltype == LK_SHARED) */ 1466303970Savg vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); 1467303970Savg 1468303970Savg /* 1469303970Savg * Relock for the "." case could leave us with 1470303970Savg * reclaimed vnode. 1471303970Savg */ 1472303970Savg if (dvp->v_iflag & VI_DOOMED) { 1473303970Savg vrele(dvp); 1474303970Savg return (SET_ERROR(ENOENT)); 1475303970Savg } 1476303970Savg } 1477303970Savg return (0); 1478303970Savg } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 1479303970Savg /* 1480303970Savg * Note that in this case, dvp is the child vnode, and we 1481303970Savg * are looking up the parent vnode - exactly reverse from 1482303970Savg * normal operation. Unlocking dvp requires some rather 1483303970Savg * tricky unlock/relock dance to prevent mp from being freed; 1484303970Savg * use vn_vget_ino_gen() which takes care of all that. 1485303970Savg * 1486303970Savg * XXX Note that there is a time window when both vnodes are 1487303970Savg * unlocked. It is possible, although highly unlikely, that 1488303970Savg * during that window the parent-child relationship between 1489303970Savg * the vnodes may change, for example, get reversed. 1490303970Savg * In that case we would have a wrong lock order for the vnodes. 1491303970Savg * All other filesystems seem to ignore this problem, so we 1492303970Savg * do the same here. 1493303970Savg * A potential solution could be implemented as follows: 1494303970Savg * - using LK_NOWAIT when locking the second vnode and retrying 1495303970Savg * if necessary 1496303970Savg * - checking that the parent-child relationship still holds 1497303970Savg * after locking both vnodes and retrying if it doesn't 1498303970Savg */ 1499303970Savg error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp); 1500303970Savg return (error); 1501303970Savg } else { 1502303970Savg error = vn_lock(vp, lkflags); 1503303970Savg if (error != 0) 1504303970Savg vrele(vp); 1505303970Savg return (error); 1506211932Smm } 1507211932Smm} 1508211932Smm 1509211932Smm/* 1510168404Spjd * Lookup an entry in a directory, or an extended attribute directory. 1511168404Spjd * If it exists, return a held vnode reference for it. 1512168404Spjd * 1513168404Spjd * IN: dvp - vnode of directory to search. 1514168404Spjd * nm - name of entry to lookup. 1515168404Spjd * pnp - full pathname to lookup [UNUSED]. 1516168404Spjd * flags - LOOKUP_XATTR set if looking for an attribute. 1517168404Spjd * rdir - root directory vnode [UNUSED]. 1518168404Spjd * cr - credentials of caller. 1519185029Spjd * ct - caller context 1520168404Spjd * 1521168404Spjd * OUT: vpp - vnode of located entry, NULL if not found. 1522168404Spjd * 1523251631Sdelphij * RETURN: 0 on success, error code on failure. 1524168404Spjd * 1525168404Spjd * Timestamps: 1526168404Spjd * NA 1527168404Spjd */ 1528168404Spjd/* ARGSUSED */ 1529168962Spjdstatic int 1530168962Spjdzfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1531185029Spjd int nameiop, cred_t *cr, kthread_t *td, int flags) 1532168404Spjd{ 1533168962Spjd znode_t *zdp = VTOZ(dvp); 1534303970Savg znode_t *zp; 1535168962Spjd zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1536211932Smm int error = 0; 1537168404Spjd 1538321545Smav /* 1539321545Smav * Fast path lookup, however we must skip DNLC lookup 1540321545Smav * for case folding or normalizing lookups because the 1541321545Smav * DNLC code only stores the passed in name. This means 1542321545Smav * creating 'a' and removing 'A' on a case insensitive 1543321545Smav * file system would work, but DNLC still thinks 'a' 1544321545Smav * exists and won't let you create it again on the next 1545321545Smav * pass through fast path. 1546321545Smav */ 1547303970Savg if (!(flags & LOOKUP_XATTR)) { 1548211932Smm if (dvp->v_type != VDIR) { 1549249195Smm return (SET_ERROR(ENOTDIR)); 1550219089Spjd } else if (zdp->z_sa_hdl == NULL) { 1551249195Smm return (SET_ERROR(EIO)); 1552211932Smm } 1553211932Smm } 1554211932Smm 1555211932Smm DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1556211932Smm 1557168404Spjd ZFS_ENTER(zfsvfs); 1558185029Spjd ZFS_VERIFY_ZP(zdp); 1559168404Spjd 1560168404Spjd *vpp = NULL; 1561168404Spjd 1562185029Spjd if (flags & LOOKUP_XATTR) { 1563168404Spjd#ifdef TODO 1564168404Spjd /* 1565168404Spjd * If the xattr property is off, refuse the lookup request. 1566168404Spjd */ 1567168404Spjd if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1568168404Spjd ZFS_EXIT(zfsvfs); 1569249195Smm return (SET_ERROR(EINVAL)); 1570168404Spjd } 1571185029Spjd#endif 1572168404Spjd 1573168404Spjd /* 1574168404Spjd * We don't allow recursive attributes.. 1575168404Spjd * Maybe someday we will. 1576168404Spjd */ 1577219089Spjd if (zdp->z_pflags & ZFS_XATTR) { 1578168404Spjd ZFS_EXIT(zfsvfs); 1579249195Smm return (SET_ERROR(EINVAL)); 1580168404Spjd } 1581168404Spjd 1582168404Spjd if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1583168404Spjd ZFS_EXIT(zfsvfs); 1584168404Spjd return (error); 1585168404Spjd } 1586168404Spjd 1587168404Spjd /* 1588168404Spjd * Do we have permission to get into attribute directory? 1589168404Spjd */ 1590185029Spjd if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1591185029Spjd B_FALSE, cr)) { 1592303970Savg vrele(*vpp); 1593185029Spjd *vpp = NULL; 1594168404Spjd } 1595168404Spjd 1596168404Spjd ZFS_EXIT(zfsvfs); 1597168404Spjd return (error); 1598168404Spjd } 1599168404Spjd 1600168404Spjd /* 1601168404Spjd * Check accessibility of directory. 1602168404Spjd */ 1603185029Spjd if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1604168404Spjd ZFS_EXIT(zfsvfs); 1605168404Spjd return (error); 1606168404Spjd } 1607168404Spjd 1608185029Spjd if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1609185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1610185029Spjd ZFS_EXIT(zfsvfs); 1611249195Smm return (SET_ERROR(EILSEQ)); 1612185029Spjd } 1613168404Spjd 1614168962Spjd 1615303970Savg /* 1616303970Savg * First handle the special cases. 1617303970Savg */ 1618303970Savg if ((cnp->cn_flags & ISDOTDOT) != 0) { 1619303970Savg /* 1620303970Savg * If we are a snapshot mounted under .zfs, return 1621303970Savg * the vp for the snapshot directory. 1622303970Savg */ 1623303970Savg if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 1624315842Savg struct componentname cn; 1625315842Savg vnode_t *zfsctl_vp; 1626315842Savg int ltype; 1627315842Savg 1628303970Savg ZFS_EXIT(zfsvfs); 1629315842Savg ltype = VOP_ISLOCKED(dvp); 1630315842Savg VOP_UNLOCK(dvp, 0); 1631315842Savg error = zfsctl_root(zfsvfs->z_parent, LK_SHARED, 1632315842Savg &zfsctl_vp); 1633303970Savg if (error == 0) { 1634315842Savg cn.cn_nameptr = "snapshot"; 1635315842Savg cn.cn_namelen = strlen(cn.cn_nameptr); 1636315842Savg cn.cn_nameiop = cnp->cn_nameiop; 1637319415Savg cn.cn_flags = cnp->cn_flags & ~ISDOTDOT; 1638315842Savg cn.cn_lkflags = cnp->cn_lkflags; 1639315842Savg error = VOP_LOOKUP(zfsctl_vp, vpp, &cn); 1640315842Savg vput(zfsctl_vp); 1641303970Savg } 1642315842Savg vn_lock(dvp, ltype | LK_RETRY); 1643315842Savg return (error); 1644303970Savg } 1645303970Savg } 1646303970Savg if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { 1647315842Savg ZFS_EXIT(zfsvfs); 1648303970Savg if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) 1649315842Savg return (SET_ERROR(ENOTSUP)); 1650315842Savg error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp); 1651315842Savg return (error); 1652303970Savg } 1653303970Savg 1654303970Savg /* 1655303970Savg * The loop is retry the lookup if the parent-child relationship 1656303970Savg * changes during the dot-dot locking complexities. 1657303970Savg */ 1658303970Savg for (;;) { 1659303970Savg uint64_t parent; 1660303970Savg 1661303970Savg error = zfs_dirlook(zdp, nm, &zp); 1662303970Savg if (error == 0) 1663303970Savg *vpp = ZTOV(zp); 1664303970Savg 1665303970Savg ZFS_EXIT(zfsvfs); 1666303970Savg if (error != 0) 1667303970Savg break; 1668303970Savg 1669303970Savg error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags); 1670303970Savg if (error != 0) { 1671303970Savg /* 1672303970Savg * If we've got a locking error, then the vnode 1673303970Savg * got reclaimed because of a force unmount. 1674303970Savg * We never enter doomed vnodes into the name cache. 1675303970Savg */ 1676303970Savg *vpp = NULL; 1677303970Savg return (error); 1678303970Savg } 1679303970Savg 1680303970Savg if ((cnp->cn_flags & ISDOTDOT) == 0) 1681303970Savg break; 1682303970Savg 1683303970Savg ZFS_ENTER(zfsvfs); 1684303970Savg if (zdp->z_sa_hdl == NULL) { 1685303970Savg error = SET_ERROR(EIO); 1686303970Savg } else { 1687303970Savg error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 1688303970Savg &parent, sizeof (parent)); 1689303970Savg } 1690303970Savg if (error != 0) { 1691303970Savg ZFS_EXIT(zfsvfs); 1692303970Savg vput(ZTOV(zp)); 1693303970Savg break; 1694303970Savg } 1695303970Savg if (zp->z_id == parent) { 1696303970Savg ZFS_EXIT(zfsvfs); 1697303970Savg break; 1698303970Savg } 1699303970Savg vput(ZTOV(zp)); 1700303970Savg } 1701303970Savg 1702303970Savgout: 1703303970Savg if (error != 0) 1704303970Savg *vpp = NULL; 1705303970Savg 1706168404Spjd /* Translate errors and add SAVENAME when needed. */ 1707168404Spjd if (cnp->cn_flags & ISLASTCN) { 1708168404Spjd switch (nameiop) { 1709168404Spjd case CREATE: 1710168404Spjd case RENAME: 1711168404Spjd if (error == ENOENT) { 1712168404Spjd error = EJUSTRETURN; 1713168404Spjd cnp->cn_flags |= SAVENAME; 1714168404Spjd break; 1715168404Spjd } 1716168404Spjd /* FALLTHROUGH */ 1717168404Spjd case DELETE: 1718168404Spjd if (error == 0) 1719168404Spjd cnp->cn_flags |= SAVENAME; 1720168404Spjd break; 1721168404Spjd } 1722168404Spjd } 1723169198Spjd 1724303970Savg /* Insert name into cache (as non-existent) if appropriate. */ 1725303970Savg if (zfsvfs->z_use_namecache && 1726303970Savg error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0) 1727303970Savg cache_enter(dvp, NULL, cnp); 1728168404Spjd 1729303970Savg /* Insert name into cache if appropriate. */ 1730303970Savg if (zfsvfs->z_use_namecache && 1731303970Savg error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1732168404Spjd if (!(cnp->cn_flags & ISLASTCN) || 1733168404Spjd (nameiop != DELETE && nameiop != RENAME)) { 1734168404Spjd cache_enter(dvp, *vpp, cnp); 1735168404Spjd } 1736168404Spjd } 1737168404Spjd 1738168404Spjd return (error); 1739168404Spjd} 1740168404Spjd 1741168404Spjd/* 1742168404Spjd * Attempt to create a new entry in a directory. If the entry 1743168404Spjd * already exists, truncate the file if permissible, else return 1744168404Spjd * an error. Return the vp of the created or trunc'd file. 1745168404Spjd * 1746168404Spjd * IN: dvp - vnode of directory to put new file entry in. 1747168404Spjd * name - name of new file entry. 1748168404Spjd * vap - attributes of new file. 1749168404Spjd * excl - flag indicating exclusive or non-exclusive mode. 1750168404Spjd * mode - mode to open file with. 1751168404Spjd * cr - credentials of caller. 1752168404Spjd * flag - large file flag [UNUSED]. 1753185029Spjd * ct - caller context 1754268464Sdelphij * vsecp - ACL to be set 1755168404Spjd * 1756168404Spjd * OUT: vpp - vnode of created or trunc'd entry. 1757168404Spjd * 1758251631Sdelphij * RETURN: 0 on success, error code on failure. 1759168404Spjd * 1760168404Spjd * Timestamps: 1761168404Spjd * dvp - ctime|mtime updated if new entry created 1762168404Spjd * vp - ctime|mtime always, atime if new 1763168404Spjd */ 1764185029Spjd 1765168404Spjd/* ARGSUSED */ 1766168404Spjdstatic int 1767168962Spjdzfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 1768185029Spjd vnode_t **vpp, cred_t *cr, kthread_t *td) 1769168404Spjd{ 1770168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1771168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1772185029Spjd zilog_t *zilog; 1773185029Spjd objset_t *os; 1774168404Spjd dmu_tx_t *tx; 1775168404Spjd int error; 1776209962Smm ksid_t *ksid; 1777209962Smm uid_t uid; 1778209962Smm gid_t gid = crgetgid(cr); 1779219089Spjd zfs_acl_ids_t acl_ids; 1780209962Smm boolean_t fuid_dirtied; 1781185029Spjd void *vsecp = NULL; 1782185029Spjd int flag = 0; 1783303970Savg uint64_t txtype; 1784168404Spjd 1785185029Spjd /* 1786185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 1787185029Spjd * make sure file system is at proper version 1788185029Spjd */ 1789185029Spjd 1790209962Smm ksid = crgetsid(cr, KSID_OWNER); 1791209962Smm if (ksid) 1792209962Smm uid = ksid_getid(ksid); 1793209962Smm else 1794209962Smm uid = crgetuid(cr); 1795219089Spjd 1796185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 1797185029Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 1798219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1799249195Smm return (SET_ERROR(EINVAL)); 1800185029Spjd 1801168404Spjd ZFS_ENTER(zfsvfs); 1802185029Spjd ZFS_VERIFY_ZP(dzp); 1803185029Spjd os = zfsvfs->z_os; 1804185029Spjd zilog = zfsvfs->z_log; 1805168404Spjd 1806185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1807185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1808185029Spjd ZFS_EXIT(zfsvfs); 1809249195Smm return (SET_ERROR(EILSEQ)); 1810185029Spjd } 1811185029Spjd 1812185029Spjd if (vap->va_mask & AT_XVATTR) { 1813197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1814185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 1815185029Spjd ZFS_EXIT(zfsvfs); 1816185029Spjd return (error); 1817185029Spjd } 1818185029Spjd } 1819260704Savg 1820168404Spjd *vpp = NULL; 1821168404Spjd 1822182905Strasz if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1823182905Strasz vap->va_mode &= ~S_ISVTX; 1824168404Spjd 1825303970Savg error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 1826303970Savg if (error) { 1827303970Savg ZFS_EXIT(zfsvfs); 1828303970Savg return (error); 1829303970Savg } 1830303970Savg ASSERT3P(zp, ==, NULL); 1831185029Spjd 1832303970Savg /* 1833303970Savg * Create a new file object and update the directory 1834303970Savg * to reference it. 1835303970Savg */ 1836303970Savg if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1837303970Savg goto out; 1838168404Spjd } 1839219089Spjd 1840303970Savg /* 1841303970Savg * We only support the creation of regular files in 1842303970Savg * extended attribute directories. 1843303970Savg */ 1844168404Spjd 1845303970Savg if ((dzp->z_pflags & ZFS_XATTR) && 1846303970Savg (vap->va_type != VREG)) { 1847303970Savg error = SET_ERROR(EINVAL); 1848303970Savg goto out; 1849303970Savg } 1850168404Spjd 1851303970Savg if ((error = zfs_acl_ids_create(dzp, 0, vap, 1852303970Savg cr, vsecp, &acl_ids)) != 0) 1853303970Savg goto out; 1854219089Spjd 1855303970Savg if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1856303970Savg zfs_acl_ids_free(&acl_ids); 1857303970Savg error = SET_ERROR(EDQUOT); 1858303970Savg goto out; 1859303970Savg } 1860168404Spjd 1861303970Savg getnewvnode_reserve(1); 1862209962Smm 1863303970Savg tx = dmu_tx_create(os); 1864209962Smm 1865303970Savg dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1866303970Savg ZFS_SA_BASE_ATTR_SIZE); 1867219089Spjd 1868303970Savg fuid_dirtied = zfsvfs->z_fuid_dirty; 1869303970Savg if (fuid_dirtied) 1870303970Savg zfs_fuid_txhold(zfsvfs, tx); 1871303970Savg dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1872303970Savg dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 1873303970Savg if (!zfsvfs->z_use_sa && 1874303970Savg acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1875303970Savg dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1876303970Savg 0, acl_ids.z_aclp->z_acl_bytes); 1877303970Savg } 1878303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 1879303970Savg if (error) { 1880209962Smm zfs_acl_ids_free(&acl_ids); 1881303970Savg dmu_tx_abort(tx); 1882303970Savg getnewvnode_drop_reserve(); 1883303970Savg ZFS_EXIT(zfsvfs); 1884303970Savg return (error); 1885303970Savg } 1886303970Savg zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1887185029Spjd 1888303970Savg if (fuid_dirtied) 1889303970Savg zfs_fuid_sync(zfsvfs, tx); 1890219089Spjd 1891303970Savg (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 1892303970Savg txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1893303970Savg zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1894303970Savg vsecp, acl_ids.z_fuidp, vap); 1895303970Savg zfs_acl_ids_free(&acl_ids); 1896303970Savg dmu_tx_commit(tx); 1897168404Spjd 1898303970Savg getnewvnode_drop_reserve(); 1899168404Spjd 1900168404Spjdout: 1901303970Savg if (error == 0) { 1902168962Spjd *vpp = ZTOV(zp); 1903168404Spjd } 1904168404Spjd 1905219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1906219089Spjd zil_commit(zilog, 0); 1907219089Spjd 1908168404Spjd ZFS_EXIT(zfsvfs); 1909168404Spjd return (error); 1910168404Spjd} 1911168404Spjd 1912168404Spjd/* 1913168404Spjd * Remove an entry from a directory. 1914168404Spjd * 1915168404Spjd * IN: dvp - vnode of directory to remove entry from. 1916168404Spjd * name - name of entry to remove. 1917168404Spjd * cr - credentials of caller. 1918185029Spjd * ct - caller context 1919185029Spjd * flags - case flags 1920168404Spjd * 1921251631Sdelphij * RETURN: 0 on success, error code on failure. 1922168404Spjd * 1923168404Spjd * Timestamps: 1924168404Spjd * dvp - ctime|mtime 1925168404Spjd * vp - ctime (if nlink > 0) 1926168404Spjd */ 1927219089Spjd 1928185029Spjd/*ARGSUSED*/ 1929168404Spjdstatic int 1930303970Savgzfs_remove(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr) 1931168404Spjd{ 1932303970Savg znode_t *dzp = VTOZ(dvp); 1933303970Savg znode_t *zp = VTOZ(vp); 1934219089Spjd znode_t *xzp; 1935168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1936185029Spjd zilog_t *zilog; 1937168962Spjd uint64_t acl_obj, xattr_obj; 1938219089Spjd uint64_t obj = 0; 1939168404Spjd dmu_tx_t *tx; 1940185029Spjd boolean_t unlinked, toobig = FALSE; 1941185029Spjd uint64_t txtype; 1942168404Spjd int error; 1943168404Spjd 1944168404Spjd ZFS_ENTER(zfsvfs); 1945185029Spjd ZFS_VERIFY_ZP(dzp); 1946303970Savg ZFS_VERIFY_ZP(zp); 1947185029Spjd zilog = zfsvfs->z_log; 1948303970Savg zp = VTOZ(vp); 1949168404Spjd 1950219089Spjd xattr_obj = 0; 1951219089Spjd xzp = NULL; 1952168404Spjd 1953168962Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1954168404Spjd goto out; 1955168962Spjd } 1956168404Spjd 1957168962Spjd /* 1958168962Spjd * Need to use rmdir for removing directories. 1959168962Spjd */ 1960168962Spjd if (vp->v_type == VDIR) { 1961249195Smm error = SET_ERROR(EPERM); 1962168962Spjd goto out; 1963168962Spjd } 1964168962Spjd 1965185029Spjd vnevent_remove(vp, dvp, name, ct); 1966168962Spjd 1967303970Savg obj = zp->z_id; 1968168404Spjd 1969303970Savg /* are there any extended attributes? */ 1970303970Savg error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1971303970Savg &xattr_obj, sizeof (xattr_obj)); 1972303970Savg if (error == 0 && xattr_obj) { 1973303970Savg error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1974303970Savg ASSERT0(error); 1975303970Savg } 1976168962Spjd 1977168404Spjd /* 1978168404Spjd * We may delete the znode now, or we may put it in the unlinked set; 1979168404Spjd * it depends on whether we're the last link, and on whether there are 1980168404Spjd * other holds on the vnode. So we dmu_tx_hold() the right things to 1981168404Spjd * allow for either case. 1982168404Spjd */ 1983168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1984168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1985219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1986219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1987219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 1988168404Spjd 1989303970Savg if (xzp) { 1990219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 1991219089Spjd dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1992168404Spjd } 1993168404Spjd 1994168404Spjd /* charge as an update -- would be nice not to charge at all */ 1995168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1996168404Spjd 1997268464Sdelphij /* 1998294803Smav * Mark this transaction as typically resulting in a net free of space 1999268464Sdelphij */ 2000294803Smav dmu_tx_mark_netfree(tx); 2001268464Sdelphij 2002303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 2003168404Spjd if (error) { 2004168404Spjd dmu_tx_abort(tx); 2005168404Spjd ZFS_EXIT(zfsvfs); 2006168404Spjd return (error); 2007168404Spjd } 2008168404Spjd 2009168404Spjd /* 2010168404Spjd * Remove the directory entry. 2011168404Spjd */ 2012303970Savg error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked); 2013168404Spjd 2014168404Spjd if (error) { 2015168404Spjd dmu_tx_commit(tx); 2016168404Spjd goto out; 2017168404Spjd } 2018168404Spjd 2019219089Spjd if (unlinked) { 2020168404Spjd zfs_unlinked_add(zp, tx); 2021243268Savg vp->v_vflag |= VV_NOSYNC; 2022168962Spjd } 2023168404Spjd 2024185029Spjd txtype = TX_REMOVE; 2025219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 2026168404Spjd 2027168404Spjd dmu_tx_commit(tx); 2028168404Spjdout: 2029185029Spjd 2030219089Spjd if (xzp) 2031303970Savg vrele(ZTOV(xzp)); 2032168962Spjd 2033219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2034219089Spjd zil_commit(zilog, 0); 2035219089Spjd 2036168404Spjd ZFS_EXIT(zfsvfs); 2037168404Spjd return (error); 2038168404Spjd} 2039168404Spjd 2040168404Spjd/* 2041168404Spjd * Create a new directory and insert it into dvp using the name 2042168404Spjd * provided. Return a pointer to the inserted directory. 2043168404Spjd * 2044168404Spjd * IN: dvp - vnode of directory to add subdir to. 2045168404Spjd * dirname - name of new directory. 2046168404Spjd * vap - attributes of new directory. 2047168404Spjd * cr - credentials of caller. 2048185029Spjd * ct - caller context 2049251631Sdelphij * flags - case flags 2050185029Spjd * vsecp - ACL to be set 2051168404Spjd * 2052168404Spjd * OUT: vpp - vnode of created directory. 2053168404Spjd * 2054251631Sdelphij * RETURN: 0 on success, error code on failure. 2055168404Spjd * 2056168404Spjd * Timestamps: 2057168404Spjd * dvp - ctime|mtime updated 2058168404Spjd * vp - ctime|mtime|atime updated 2059168404Spjd */ 2060185029Spjd/*ARGSUSED*/ 2061168404Spjdstatic int 2062303970Savgzfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr) 2063168404Spjd{ 2064168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 2065168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2066185029Spjd zilog_t *zilog; 2067185029Spjd uint64_t txtype; 2068168404Spjd dmu_tx_t *tx; 2069168404Spjd int error; 2070209962Smm ksid_t *ksid; 2071209962Smm uid_t uid; 2072209962Smm gid_t gid = crgetgid(cr); 2073219089Spjd zfs_acl_ids_t acl_ids; 2074209962Smm boolean_t fuid_dirtied; 2075168404Spjd 2076168404Spjd ASSERT(vap->va_type == VDIR); 2077168404Spjd 2078185029Spjd /* 2079185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 2080185029Spjd * make sure file system is at proper version 2081185029Spjd */ 2082185029Spjd 2083209962Smm ksid = crgetsid(cr, KSID_OWNER); 2084209962Smm if (ksid) 2085209962Smm uid = ksid_getid(ksid); 2086209962Smm else 2087209962Smm uid = crgetuid(cr); 2088185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2089303970Savg ((vap->va_mask & AT_XVATTR) || 2090219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2091249195Smm return (SET_ERROR(EINVAL)); 2092185029Spjd 2093168404Spjd ZFS_ENTER(zfsvfs); 2094185029Spjd ZFS_VERIFY_ZP(dzp); 2095185029Spjd zilog = zfsvfs->z_log; 2096168404Spjd 2097219089Spjd if (dzp->z_pflags & ZFS_XATTR) { 2098168404Spjd ZFS_EXIT(zfsvfs); 2099249195Smm return (SET_ERROR(EINVAL)); 2100168404Spjd } 2101168404Spjd 2102185029Spjd if (zfsvfs->z_utf8 && u8_validate(dirname, 2103185029Spjd strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2104185029Spjd ZFS_EXIT(zfsvfs); 2105249195Smm return (SET_ERROR(EILSEQ)); 2106185029Spjd } 2107185029Spjd 2108219089Spjd if (vap->va_mask & AT_XVATTR) { 2109197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2110185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 2111185029Spjd ZFS_EXIT(zfsvfs); 2112185029Spjd return (error); 2113185029Spjd } 2114219089Spjd } 2115185029Spjd 2116219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 2117303970Savg NULL, &acl_ids)) != 0) { 2118219089Spjd ZFS_EXIT(zfsvfs); 2119219089Spjd return (error); 2120219089Spjd } 2121260704Savg 2122168404Spjd /* 2123168404Spjd * First make sure the new directory doesn't exist. 2124219089Spjd * 2125219089Spjd * Existence is checked first to make sure we don't return 2126219089Spjd * EACCES instead of EEXIST which can cause some applications 2127219089Spjd * to fail. 2128168404Spjd */ 2129185029Spjd *vpp = NULL; 2130185029Spjd 2131303970Savg if (error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW)) { 2132219089Spjd zfs_acl_ids_free(&acl_ids); 2133168404Spjd ZFS_EXIT(zfsvfs); 2134168404Spjd return (error); 2135168404Spjd } 2136303970Savg ASSERT3P(zp, ==, NULL); 2137168404Spjd 2138185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 2139219089Spjd zfs_acl_ids_free(&acl_ids); 2140168404Spjd ZFS_EXIT(zfsvfs); 2141168404Spjd return (error); 2142168404Spjd } 2143168404Spjd 2144209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2145211932Smm zfs_acl_ids_free(&acl_ids); 2146209962Smm ZFS_EXIT(zfsvfs); 2147249195Smm return (SET_ERROR(EDQUOT)); 2148209962Smm } 2149209962Smm 2150168404Spjd /* 2151168404Spjd * Add a new entry to the directory. 2152168404Spjd */ 2153303970Savg getnewvnode_reserve(1); 2154168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2155168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 2156168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2157209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 2158209962Smm if (fuid_dirtied) 2159209962Smm zfs_fuid_txhold(zfsvfs, tx); 2160219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2161219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2162219089Spjd acl_ids.z_aclp->z_acl_bytes); 2163219089Spjd } 2164219089Spjd 2165219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2166219089Spjd ZFS_SA_BASE_ATTR_SIZE); 2167219089Spjd 2168303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 2169168404Spjd if (error) { 2170219089Spjd zfs_acl_ids_free(&acl_ids); 2171168404Spjd dmu_tx_abort(tx); 2172260704Savg getnewvnode_drop_reserve(); 2173168404Spjd ZFS_EXIT(zfsvfs); 2174168404Spjd return (error); 2175168404Spjd } 2176168404Spjd 2177168404Spjd /* 2178168404Spjd * Create new node. 2179168404Spjd */ 2180219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2181168404Spjd 2182209962Smm if (fuid_dirtied) 2183209962Smm zfs_fuid_sync(zfsvfs, tx); 2184219089Spjd 2185168404Spjd /* 2186168404Spjd * Now put new name in parent dir. 2187168404Spjd */ 2188303970Savg (void) zfs_link_create(dzp, dirname, zp, tx, ZNEW); 2189168404Spjd 2190168404Spjd *vpp = ZTOV(zp); 2191168404Spjd 2192303970Savg txtype = zfs_log_create_txtype(Z_DIR, NULL, vap); 2193303970Savg zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL, 2194209962Smm acl_ids.z_fuidp, vap); 2195185029Spjd 2196209962Smm zfs_acl_ids_free(&acl_ids); 2197219089Spjd 2198168404Spjd dmu_tx_commit(tx); 2199168404Spjd 2200260704Savg getnewvnode_drop_reserve(); 2201260704Savg 2202219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2203219089Spjd zil_commit(zilog, 0); 2204219089Spjd 2205168404Spjd ZFS_EXIT(zfsvfs); 2206168404Spjd return (0); 2207168404Spjd} 2208168404Spjd 2209168404Spjd/* 2210168404Spjd * Remove a directory subdir entry. If the current working 2211168404Spjd * directory is the same as the subdir to be removed, the 2212168404Spjd * remove will fail. 2213168404Spjd * 2214168404Spjd * IN: dvp - vnode of directory to remove from. 2215168404Spjd * name - name of directory to be removed. 2216168404Spjd * cwd - vnode of current working directory. 2217168404Spjd * cr - credentials of caller. 2218185029Spjd * ct - caller context 2219185029Spjd * flags - case flags 2220168404Spjd * 2221251631Sdelphij * RETURN: 0 on success, error code on failure. 2222168404Spjd * 2223168404Spjd * Timestamps: 2224168404Spjd * dvp - ctime|mtime updated 2225168404Spjd */ 2226185029Spjd/*ARGSUSED*/ 2227168404Spjdstatic int 2228303970Savgzfs_rmdir(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr) 2229168404Spjd{ 2230168404Spjd znode_t *dzp = VTOZ(dvp); 2231303970Savg znode_t *zp = VTOZ(vp); 2232168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2233185029Spjd zilog_t *zilog; 2234168404Spjd dmu_tx_t *tx; 2235168404Spjd int error; 2236168404Spjd 2237168962Spjd ZFS_ENTER(zfsvfs); 2238185029Spjd ZFS_VERIFY_ZP(dzp); 2239303970Savg ZFS_VERIFY_ZP(zp); 2240185029Spjd zilog = zfsvfs->z_log; 2241168404Spjd 2242168404Spjd 2243168404Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2244168404Spjd goto out; 2245168404Spjd } 2246168404Spjd 2247168962Spjd if (vp->v_type != VDIR) { 2248249195Smm error = SET_ERROR(ENOTDIR); 2249168962Spjd goto out; 2250168962Spjd } 2251168962Spjd 2252185029Spjd vnevent_rmdir(vp, dvp, name, ct); 2253168962Spjd 2254168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2255168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2256219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2257168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2258219089Spjd zfs_sa_upgrade_txholds(tx, zp); 2259219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 2260304122Savg dmu_tx_mark_netfree(tx); 2261303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 2262168404Spjd if (error) { 2263168404Spjd dmu_tx_abort(tx); 2264168404Spjd ZFS_EXIT(zfsvfs); 2265168404Spjd return (error); 2266168404Spjd } 2267168404Spjd 2268168404Spjd cache_purge(dvp); 2269168404Spjd 2270303970Savg error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL); 2271168404Spjd 2272185029Spjd if (error == 0) { 2273185029Spjd uint64_t txtype = TX_RMDIR; 2274219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2275185029Spjd } 2276168404Spjd 2277168404Spjd dmu_tx_commit(tx); 2278168404Spjd 2279168404Spjd cache_purge(vp); 2280168404Spjdout: 2281219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2282219089Spjd zil_commit(zilog, 0); 2283219089Spjd 2284168404Spjd ZFS_EXIT(zfsvfs); 2285168404Spjd return (error); 2286168404Spjd} 2287168404Spjd 2288168404Spjd/* 2289168404Spjd * Read as many directory entries as will fit into the provided 2290168404Spjd * buffer from the given directory cursor position (specified in 2291251631Sdelphij * the uio structure). 2292168404Spjd * 2293168404Spjd * IN: vp - vnode of directory to read. 2294168404Spjd * uio - structure supplying read location, range info, 2295168404Spjd * and return buffer. 2296168404Spjd * cr - credentials of caller. 2297185029Spjd * ct - caller context 2298185029Spjd * flags - case flags 2299168404Spjd * 2300168404Spjd * OUT: uio - updated offset and range, buffer filled. 2301168404Spjd * eofp - set to true if end-of-file detected. 2302168404Spjd * 2303251631Sdelphij * RETURN: 0 on success, error code on failure. 2304168404Spjd * 2305168404Spjd * Timestamps: 2306168404Spjd * vp - atime updated 2307168404Spjd * 2308168404Spjd * Note that the low 4 bits of the cookie returned by zap is always zero. 2309168404Spjd * This allows us to use the low range for "special" directory entries: 2310168404Spjd * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2311168404Spjd * we use the offset 2 for the '.zfs' directory. 2312168404Spjd */ 2313168404Spjd/* ARGSUSED */ 2314168404Spjdstatic int 2315168962Spjdzfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) 2316168404Spjd{ 2317168404Spjd znode_t *zp = VTOZ(vp); 2318168404Spjd iovec_t *iovp; 2319185029Spjd edirent_t *eodp; 2320168404Spjd dirent64_t *odp; 2321168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2322168404Spjd objset_t *os; 2323168404Spjd caddr_t outbuf; 2324168404Spjd size_t bufsize; 2325168404Spjd zap_cursor_t zc; 2326168404Spjd zap_attribute_t zap; 2327168404Spjd uint_t bytes_wanted; 2328168404Spjd uint64_t offset; /* must be unsigned; checks for < 1 */ 2329219089Spjd uint64_t parent; 2330168404Spjd int local_eof; 2331168404Spjd int outcount; 2332168404Spjd int error; 2333168404Spjd uint8_t prefetch; 2334185029Spjd boolean_t check_sysattrs; 2335168404Spjd uint8_t type; 2336168962Spjd int ncooks; 2337168962Spjd u_long *cooks = NULL; 2338185029Spjd int flags = 0; 2339168404Spjd 2340168404Spjd ZFS_ENTER(zfsvfs); 2341185029Spjd ZFS_VERIFY_ZP(zp); 2342168404Spjd 2343219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 2344219089Spjd &parent, sizeof (parent))) != 0) { 2345219089Spjd ZFS_EXIT(zfsvfs); 2346219089Spjd return (error); 2347219089Spjd } 2348219089Spjd 2349168404Spjd /* 2350168404Spjd * If we are not given an eof variable, 2351168404Spjd * use a local one. 2352168404Spjd */ 2353168404Spjd if (eofp == NULL) 2354168404Spjd eofp = &local_eof; 2355168404Spjd 2356168404Spjd /* 2357168404Spjd * Check for valid iov_len. 2358168404Spjd */ 2359168404Spjd if (uio->uio_iov->iov_len <= 0) { 2360168404Spjd ZFS_EXIT(zfsvfs); 2361249195Smm return (SET_ERROR(EINVAL)); 2362168404Spjd } 2363168404Spjd 2364168404Spjd /* 2365168404Spjd * Quit if directory has been removed (posix) 2366168404Spjd */ 2367168404Spjd if ((*eofp = zp->z_unlinked) != 0) { 2368168404Spjd ZFS_EXIT(zfsvfs); 2369168404Spjd return (0); 2370168404Spjd } 2371168404Spjd 2372168404Spjd error = 0; 2373168404Spjd os = zfsvfs->z_os; 2374168404Spjd offset = uio->uio_loffset; 2375168404Spjd prefetch = zp->z_zn_prefetch; 2376168404Spjd 2377168404Spjd /* 2378168404Spjd * Initialize the iterator cursor. 2379168404Spjd */ 2380168404Spjd if (offset <= 3) { 2381168404Spjd /* 2382168404Spjd * Start iteration from the beginning of the directory. 2383168404Spjd */ 2384168404Spjd zap_cursor_init(&zc, os, zp->z_id); 2385168404Spjd } else { 2386168404Spjd /* 2387168404Spjd * The offset is a serialized cursor. 2388168404Spjd */ 2389168404Spjd zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2390168404Spjd } 2391168404Spjd 2392168404Spjd /* 2393168404Spjd * Get space to change directory entries into fs independent format. 2394168404Spjd */ 2395168404Spjd iovp = uio->uio_iov; 2396168404Spjd bytes_wanted = iovp->iov_len; 2397168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2398168404Spjd bufsize = bytes_wanted; 2399168404Spjd outbuf = kmem_alloc(bufsize, KM_SLEEP); 2400168404Spjd odp = (struct dirent64 *)outbuf; 2401168404Spjd } else { 2402168404Spjd bufsize = bytes_wanted; 2403247187Smm outbuf = NULL; 2404168404Spjd odp = (struct dirent64 *)iovp->iov_base; 2405168404Spjd } 2406185029Spjd eodp = (struct edirent *)odp; 2407168404Spjd 2408169170Spjd if (ncookies != NULL) { 2409168404Spjd /* 2410168404Spjd * Minimum entry size is dirent size and 1 byte for a file name. 2411168404Spjd */ 2412168962Spjd ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2413219404Spjd cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2414219404Spjd *cookies = cooks; 2415168962Spjd *ncookies = ncooks; 2416168404Spjd } 2417185029Spjd /* 2418185029Spjd * If this VFS supports the system attribute view interface; and 2419185029Spjd * we're looking at an extended attribute directory; and we care 2420185029Spjd * about normalization conflicts on this vfs; then we must check 2421185029Spjd * for normalization conflicts with the sysattr name space. 2422185029Spjd */ 2423185029Spjd#ifdef TODO 2424185029Spjd check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2425185029Spjd (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2426185029Spjd (flags & V_RDDIR_ENTFLAGS); 2427185029Spjd#else 2428185029Spjd check_sysattrs = 0; 2429185029Spjd#endif 2430168404Spjd 2431168404Spjd /* 2432168404Spjd * Transform to file-system independent format 2433168404Spjd */ 2434168404Spjd outcount = 0; 2435168404Spjd while (outcount < bytes_wanted) { 2436168404Spjd ino64_t objnum; 2437168404Spjd ushort_t reclen; 2438219089Spjd off64_t *next = NULL; 2439168404Spjd 2440168404Spjd /* 2441168404Spjd * Special case `.', `..', and `.zfs'. 2442168404Spjd */ 2443168404Spjd if (offset == 0) { 2444168404Spjd (void) strcpy(zap.za_name, "."); 2445185029Spjd zap.za_normalization_conflict = 0; 2446168404Spjd objnum = zp->z_id; 2447169108Spjd type = DT_DIR; 2448168404Spjd } else if (offset == 1) { 2449168404Spjd (void) strcpy(zap.za_name, ".."); 2450185029Spjd zap.za_normalization_conflict = 0; 2451219089Spjd objnum = parent; 2452169108Spjd type = DT_DIR; 2453168404Spjd } else if (offset == 2 && zfs_show_ctldir(zp)) { 2454168404Spjd (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2455185029Spjd zap.za_normalization_conflict = 0; 2456168404Spjd objnum = ZFSCTL_INO_ROOT; 2457169108Spjd type = DT_DIR; 2458168404Spjd } else { 2459168404Spjd /* 2460168404Spjd * Grab next entry. 2461168404Spjd */ 2462168404Spjd if (error = zap_cursor_retrieve(&zc, &zap)) { 2463168404Spjd if ((*eofp = (error == ENOENT)) != 0) 2464168404Spjd break; 2465168404Spjd else 2466168404Spjd goto update; 2467168404Spjd } 2468168404Spjd 2469168404Spjd if (zap.za_integer_length != 8 || 2470168404Spjd zap.za_num_integers != 1) { 2471168404Spjd cmn_err(CE_WARN, "zap_readdir: bad directory " 2472168404Spjd "entry, obj = %lld, offset = %lld\n", 2473168404Spjd (u_longlong_t)zp->z_id, 2474168404Spjd (u_longlong_t)offset); 2475249195Smm error = SET_ERROR(ENXIO); 2476168404Spjd goto update; 2477168404Spjd } 2478168404Spjd 2479168404Spjd objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2480168404Spjd /* 2481168404Spjd * MacOS X can extract the object type here such as: 2482168404Spjd * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2483168404Spjd */ 2484168404Spjd type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2485185029Spjd 2486185029Spjd if (check_sysattrs && !zap.za_normalization_conflict) { 2487185029Spjd#ifdef TODO 2488185029Spjd zap.za_normalization_conflict = 2489185029Spjd xattr_sysattr_casechk(zap.za_name); 2490185029Spjd#else 2491185029Spjd panic("%s:%u: TODO", __func__, __LINE__); 2492185029Spjd#endif 2493185029Spjd } 2494168404Spjd } 2495168404Spjd 2496211932Smm if (flags & V_RDDIR_ACCFILTER) { 2497211932Smm /* 2498211932Smm * If we have no access at all, don't include 2499211932Smm * this entry in the returned information 2500211932Smm */ 2501211932Smm znode_t *ezp; 2502211932Smm if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2503211932Smm goto skip_entry; 2504211932Smm if (!zfs_has_access(ezp, cr)) { 2505303970Savg vrele(ZTOV(ezp)); 2506211932Smm goto skip_entry; 2507211932Smm } 2508303970Savg vrele(ZTOV(ezp)); 2509211932Smm } 2510211932Smm 2511185029Spjd if (flags & V_RDDIR_ENTFLAGS) 2512185029Spjd reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2513185029Spjd else 2514185029Spjd reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2515185029Spjd 2516168404Spjd /* 2517168404Spjd * Will this entry fit in the buffer? 2518168404Spjd */ 2519168404Spjd if (outcount + reclen > bufsize) { 2520168404Spjd /* 2521168404Spjd * Did we manage to fit anything in the buffer? 2522168404Spjd */ 2523168404Spjd if (!outcount) { 2524249195Smm error = SET_ERROR(EINVAL); 2525168404Spjd goto update; 2526168404Spjd } 2527168404Spjd break; 2528168404Spjd } 2529185029Spjd if (flags & V_RDDIR_ENTFLAGS) { 2530185029Spjd /* 2531185029Spjd * Add extended flag entry: 2532185029Spjd */ 2533185029Spjd eodp->ed_ino = objnum; 2534185029Spjd eodp->ed_reclen = reclen; 2535185029Spjd /* NOTE: ed_off is the offset for the *next* entry */ 2536185029Spjd next = &(eodp->ed_off); 2537185029Spjd eodp->ed_eflags = zap.za_normalization_conflict ? 2538185029Spjd ED_CASE_CONFLICT : 0; 2539185029Spjd (void) strncpy(eodp->ed_name, zap.za_name, 2540185029Spjd EDIRENT_NAMELEN(reclen)); 2541185029Spjd eodp = (edirent_t *)((intptr_t)eodp + reclen); 2542185029Spjd } else { 2543185029Spjd /* 2544185029Spjd * Add normal entry: 2545185029Spjd */ 2546185029Spjd odp->d_ino = objnum; 2547185029Spjd odp->d_reclen = reclen; 2548185029Spjd odp->d_namlen = strlen(zap.za_name); 2549185029Spjd (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2550185029Spjd odp->d_type = type; 2551185029Spjd odp = (dirent64_t *)((intptr_t)odp + reclen); 2552185029Spjd } 2553168404Spjd outcount += reclen; 2554168404Spjd 2555168404Spjd ASSERT(outcount <= bufsize); 2556168404Spjd 2557168404Spjd /* Prefetch znode */ 2558168404Spjd if (prefetch) 2559286705Smav dmu_prefetch(os, objnum, 0, 0, 0, 2560286705Smav ZIO_PRIORITY_SYNC_READ); 2561168404Spjd 2562211932Smm skip_entry: 2563168404Spjd /* 2564168404Spjd * Move to the next entry, fill in the previous offset. 2565168404Spjd */ 2566168404Spjd if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2567168404Spjd zap_cursor_advance(&zc); 2568168404Spjd offset = zap_cursor_serialize(&zc); 2569168404Spjd } else { 2570168404Spjd offset += 1; 2571168404Spjd } 2572219404Spjd 2573219404Spjd if (cooks != NULL) { 2574219404Spjd *cooks++ = offset; 2575219404Spjd ncooks--; 2576219404Spjd KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2577219404Spjd } 2578168404Spjd } 2579168404Spjd zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2580168404Spjd 2581168404Spjd /* Subtract unused cookies */ 2582168962Spjd if (ncookies != NULL) 2583168962Spjd *ncookies -= ncooks; 2584168404Spjd 2585168404Spjd if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2586168404Spjd iovp->iov_base += outcount; 2587168404Spjd iovp->iov_len -= outcount; 2588168404Spjd uio->uio_resid -= outcount; 2589168404Spjd } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2590168404Spjd /* 2591168404Spjd * Reset the pointer. 2592168404Spjd */ 2593168404Spjd offset = uio->uio_loffset; 2594168404Spjd } 2595168404Spjd 2596168404Spjdupdate: 2597168404Spjd zap_cursor_fini(&zc); 2598168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2599168404Spjd kmem_free(outbuf, bufsize); 2600168404Spjd 2601168404Spjd if (error == ENOENT) 2602168404Spjd error = 0; 2603168404Spjd 2604168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2605168404Spjd 2606168404Spjd uio->uio_loffset = offset; 2607168404Spjd ZFS_EXIT(zfsvfs); 2608169107Spjd if (error != 0 && cookies != NULL) { 2609168962Spjd free(*cookies, M_TEMP); 2610168962Spjd *cookies = NULL; 2611168962Spjd *ncookies = 0; 2612168404Spjd } 2613168404Spjd return (error); 2614168404Spjd} 2615168404Spjd 2616185029Spjdulong_t zfs_fsync_sync_cnt = 4; 2617185029Spjd 2618168404Spjdstatic int 2619185029Spjdzfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2620168404Spjd{ 2621168962Spjd znode_t *zp = VTOZ(vp); 2622168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2623168404Spjd 2624185029Spjd (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2625185029Spjd 2626219089Spjd if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 2627219089Spjd ZFS_ENTER(zfsvfs); 2628219089Spjd ZFS_VERIFY_ZP(zp); 2629219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 2630219089Spjd ZFS_EXIT(zfsvfs); 2631219089Spjd } 2632168404Spjd return (0); 2633168404Spjd} 2634168404Spjd 2635185029Spjd 2636168404Spjd/* 2637168404Spjd * Get the requested file attributes and place them in the provided 2638168404Spjd * vattr structure. 2639168404Spjd * 2640168404Spjd * IN: vp - vnode of file. 2641168404Spjd * vap - va_mask identifies requested attributes. 2642185029Spjd * If AT_XVATTR set, then optional attrs are requested 2643185029Spjd * flags - ATTR_NOACLCHECK (CIFS server context) 2644168404Spjd * cr - credentials of caller. 2645185029Spjd * ct - caller context 2646168404Spjd * 2647168404Spjd * OUT: vap - attribute values. 2648168404Spjd * 2649251631Sdelphij * RETURN: 0 (always succeeds). 2650168404Spjd */ 2651168404Spjd/* ARGSUSED */ 2652168404Spjdstatic int 2653185029Spjdzfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2654185029Spjd caller_context_t *ct) 2655168404Spjd{ 2656168962Spjd znode_t *zp = VTOZ(vp); 2657168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2658185029Spjd int error = 0; 2659168962Spjd uint32_t blksize; 2660168962Spjd u_longlong_t nblocks; 2661185029Spjd uint64_t links; 2662224251Sdelphij uint64_t mtime[2], ctime[2], crtime[2], rdev; 2663185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2664185029Spjd xoptattr_t *xoap = NULL; 2665185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2666224251Sdelphij sa_bulk_attr_t bulk[4]; 2667219089Spjd int count = 0; 2668168404Spjd 2669168404Spjd ZFS_ENTER(zfsvfs); 2670185029Spjd ZFS_VERIFY_ZP(zp); 2671168404Spjd 2672219089Spjd zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2673219089Spjd 2674219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 2675219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 2676243807Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 2677224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2678224251Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 2679224251Sdelphij &rdev, 8); 2680219089Spjd 2681219089Spjd if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 2682219089Spjd ZFS_EXIT(zfsvfs); 2683219089Spjd return (error); 2684219089Spjd } 2685219089Spjd 2686168404Spjd /* 2687185029Spjd * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2688185029Spjd * Also, if we are the owner don't bother, since owner should 2689185029Spjd * always be allowed to read basic attributes of file. 2690185029Spjd */ 2691219089Spjd if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2692219089Spjd (vap->va_uid != crgetuid(cr))) { 2693185029Spjd if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2694185029Spjd skipaclchk, cr)) { 2695185029Spjd ZFS_EXIT(zfsvfs); 2696185029Spjd return (error); 2697185029Spjd } 2698185029Spjd } 2699185029Spjd 2700185029Spjd /* 2701168404Spjd * Return all attributes. It's cheaper to provide the answer 2702168404Spjd * than to determine whether we were asked the question. 2703168404Spjd */ 2704168404Spjd 2705219089Spjd vap->va_type = IFTOVT(zp->z_mode); 2706219089Spjd vap->va_mode = zp->z_mode & ~S_IFMT; 2707277300Ssmh#ifdef illumos 2708224252Sdelphij vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2709224252Sdelphij#else 2710224252Sdelphij vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 2711224252Sdelphij#endif 2712168404Spjd vap->va_nodeid = zp->z_id; 2713185029Spjd if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2714219089Spjd links = zp->z_links + 1; 2715185029Spjd else 2716219089Spjd links = zp->z_links; 2717229425Sdim vap->va_nlink = MIN(links, LINK_MAX); /* nlink_t limit! */ 2718219089Spjd vap->va_size = zp->z_size; 2719277300Ssmh#ifdef illumos 2720224252Sdelphij vap->va_rdev = vp->v_rdev; 2721224252Sdelphij#else 2722224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2723224251Sdelphij vap->va_rdev = zfs_cmpldev(rdev); 2724224252Sdelphij#endif 2725168404Spjd vap->va_seq = zp->z_seq; 2726168404Spjd vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2727272467Saraujo vap->va_filerev = zp->z_seq; 2728168404Spjd 2729185029Spjd /* 2730185029Spjd * Add in any requested optional attributes and the create time. 2731185029Spjd * Also set the corresponding bits in the returned attribute bitmap. 2732185029Spjd */ 2733185029Spjd if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2734185029Spjd if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2735185029Spjd xoap->xoa_archive = 2736219089Spjd ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2737185029Spjd XVA_SET_RTN(xvap, XAT_ARCHIVE); 2738185029Spjd } 2739185029Spjd 2740185029Spjd if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2741185029Spjd xoap->xoa_readonly = 2742219089Spjd ((zp->z_pflags & ZFS_READONLY) != 0); 2743185029Spjd XVA_SET_RTN(xvap, XAT_READONLY); 2744185029Spjd } 2745185029Spjd 2746185029Spjd if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2747185029Spjd xoap->xoa_system = 2748219089Spjd ((zp->z_pflags & ZFS_SYSTEM) != 0); 2749185029Spjd XVA_SET_RTN(xvap, XAT_SYSTEM); 2750185029Spjd } 2751185029Spjd 2752185029Spjd if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2753185029Spjd xoap->xoa_hidden = 2754219089Spjd ((zp->z_pflags & ZFS_HIDDEN) != 0); 2755185029Spjd XVA_SET_RTN(xvap, XAT_HIDDEN); 2756185029Spjd } 2757185029Spjd 2758185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2759185029Spjd xoap->xoa_nounlink = 2760219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2761185029Spjd XVA_SET_RTN(xvap, XAT_NOUNLINK); 2762185029Spjd } 2763185029Spjd 2764185029Spjd if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2765185029Spjd xoap->xoa_immutable = 2766219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2767185029Spjd XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2768185029Spjd } 2769185029Spjd 2770185029Spjd if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2771185029Spjd xoap->xoa_appendonly = 2772219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2773185029Spjd XVA_SET_RTN(xvap, XAT_APPENDONLY); 2774185029Spjd } 2775185029Spjd 2776185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2777185029Spjd xoap->xoa_nodump = 2778219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0); 2779185029Spjd XVA_SET_RTN(xvap, XAT_NODUMP); 2780185029Spjd } 2781185029Spjd 2782185029Spjd if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2783185029Spjd xoap->xoa_opaque = 2784219089Spjd ((zp->z_pflags & ZFS_OPAQUE) != 0); 2785185029Spjd XVA_SET_RTN(xvap, XAT_OPAQUE); 2786185029Spjd } 2787185029Spjd 2788185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2789185029Spjd xoap->xoa_av_quarantined = 2790219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2791185029Spjd XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2792185029Spjd } 2793185029Spjd 2794185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2795185029Spjd xoap->xoa_av_modified = 2796219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2797185029Spjd XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2798185029Spjd } 2799185029Spjd 2800185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2801219089Spjd vp->v_type == VREG) { 2802219089Spjd zfs_sa_get_scanstamp(zp, xvap); 2803185029Spjd } 2804185029Spjd 2805219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2806219089Spjd xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 2807219089Spjd XVA_SET_RTN(xvap, XAT_REPARSE); 2808219089Spjd } 2809219089Spjd if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 2810219089Spjd xoap->xoa_generation = zp->z_gen; 2811219089Spjd XVA_SET_RTN(xvap, XAT_GEN); 2812219089Spjd } 2813219089Spjd 2814219089Spjd if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2815219089Spjd xoap->xoa_offline = 2816219089Spjd ((zp->z_pflags & ZFS_OFFLINE) != 0); 2817219089Spjd XVA_SET_RTN(xvap, XAT_OFFLINE); 2818219089Spjd } 2819219089Spjd 2820219089Spjd if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2821219089Spjd xoap->xoa_sparse = 2822219089Spjd ((zp->z_pflags & ZFS_SPARSE) != 0); 2823219089Spjd XVA_SET_RTN(xvap, XAT_SPARSE); 2824219089Spjd } 2825185029Spjd } 2826185029Spjd 2827219089Spjd ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 2828219089Spjd ZFS_TIME_DECODE(&vap->va_mtime, mtime); 2829219089Spjd ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2830219089Spjd ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 2831168404Spjd 2832168404Spjd 2833219089Spjd sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 2834168404Spjd vap->va_blksize = blksize; 2835168404Spjd vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2836168404Spjd 2837168404Spjd if (zp->z_blksz == 0) { 2838168404Spjd /* 2839168404Spjd * Block size hasn't been set; suggest maximal I/O transfers. 2840168404Spjd */ 2841168404Spjd vap->va_blksize = zfsvfs->z_max_blksz; 2842168404Spjd } 2843168404Spjd 2844168404Spjd ZFS_EXIT(zfsvfs); 2845168404Spjd return (0); 2846168404Spjd} 2847168404Spjd 2848168404Spjd/* 2849168404Spjd * Set the file attributes to the values contained in the 2850168404Spjd * vattr structure. 2851168404Spjd * 2852168404Spjd * IN: vp - vnode of file to be modified. 2853168404Spjd * vap - new attribute values. 2854185029Spjd * If AT_XVATTR set, then optional attrs are being set 2855168404Spjd * flags - ATTR_UTIME set if non-default time values provided. 2856185029Spjd * - ATTR_NOACLCHECK (CIFS context only). 2857168404Spjd * cr - credentials of caller. 2858185029Spjd * ct - caller context 2859168404Spjd * 2860251631Sdelphij * RETURN: 0 on success, error code on failure. 2861168404Spjd * 2862168404Spjd * Timestamps: 2863168404Spjd * vp - ctime updated, mtime updated if size changed. 2864168404Spjd */ 2865168404Spjd/* ARGSUSED */ 2866168404Spjdstatic int 2867168962Spjdzfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2868251631Sdelphij caller_context_t *ct) 2869168404Spjd{ 2870185029Spjd znode_t *zp = VTOZ(vp); 2871168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2872185029Spjd zilog_t *zilog; 2873168404Spjd dmu_tx_t *tx; 2874168404Spjd vattr_t oldva; 2875209962Smm xvattr_t tmpxvattr; 2876168962Spjd uint_t mask = vap->va_mask; 2877247187Smm uint_t saved_mask = 0; 2878197831Spjd uint64_t saved_mode; 2879168404Spjd int trim_mask = 0; 2880168404Spjd uint64_t new_mode; 2881209962Smm uint64_t new_uid, new_gid; 2882219089Spjd uint64_t xattr_obj; 2883219089Spjd uint64_t mtime[2], ctime[2]; 2884168404Spjd znode_t *attrzp; 2885168404Spjd int need_policy = FALSE; 2886219089Spjd int err, err2; 2887185029Spjd zfs_fuid_info_t *fuidp = NULL; 2888185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2889185029Spjd xoptattr_t *xoap; 2890219089Spjd zfs_acl_t *aclp; 2891185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2892219089Spjd boolean_t fuid_dirtied = B_FALSE; 2893219089Spjd sa_bulk_attr_t bulk[7], xattr_bulk[7]; 2894219089Spjd int count = 0, xattr_count = 0; 2895168404Spjd 2896168404Spjd if (mask == 0) 2897168404Spjd return (0); 2898168404Spjd 2899168962Spjd if (mask & AT_NOSET) 2900249195Smm return (SET_ERROR(EINVAL)); 2901168962Spjd 2902185029Spjd ZFS_ENTER(zfsvfs); 2903185029Spjd ZFS_VERIFY_ZP(zp); 2904185029Spjd 2905185029Spjd zilog = zfsvfs->z_log; 2906185029Spjd 2907185029Spjd /* 2908185029Spjd * Make sure that if we have ephemeral uid/gid or xvattr specified 2909185029Spjd * that file system is at proper version level 2910185029Spjd */ 2911185029Spjd 2912185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2913185029Spjd (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2914185029Spjd ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 2915185029Spjd (mask & AT_XVATTR))) { 2916185029Spjd ZFS_EXIT(zfsvfs); 2917249195Smm return (SET_ERROR(EINVAL)); 2918185029Spjd } 2919185029Spjd 2920185029Spjd if (mask & AT_SIZE && vp->v_type == VDIR) { 2921185029Spjd ZFS_EXIT(zfsvfs); 2922249195Smm return (SET_ERROR(EISDIR)); 2923185029Spjd } 2924168404Spjd 2925185029Spjd if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 2926185029Spjd ZFS_EXIT(zfsvfs); 2927249195Smm return (SET_ERROR(EINVAL)); 2928185029Spjd } 2929168404Spjd 2930185029Spjd /* 2931185029Spjd * If this is an xvattr_t, then get a pointer to the structure of 2932185029Spjd * optional attributes. If this is NULL, then we have a vattr_t. 2933185029Spjd */ 2934185029Spjd xoap = xva_getxoptattr(xvap); 2935168404Spjd 2936209962Smm xva_init(&tmpxvattr); 2937209962Smm 2938185029Spjd /* 2939185029Spjd * Immutable files can only alter immutable bit and atime 2940185029Spjd */ 2941219089Spjd if ((zp->z_pflags & ZFS_IMMUTABLE) && 2942185029Spjd ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 2943185029Spjd ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 2944185029Spjd ZFS_EXIT(zfsvfs); 2945249195Smm return (SET_ERROR(EPERM)); 2946185029Spjd } 2947185029Spjd 2948219089Spjd if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { 2949185029Spjd ZFS_EXIT(zfsvfs); 2950249195Smm return (SET_ERROR(EPERM)); 2951185029Spjd } 2952185029Spjd 2953185029Spjd /* 2954185029Spjd * Verify timestamps doesn't overflow 32 bits. 2955185029Spjd * ZFS can handle large timestamps, but 32bit syscalls can't 2956185029Spjd * handle times greater than 2039. This check should be removed 2957185029Spjd * once large timestamps are fully supported. 2958185029Spjd */ 2959185029Spjd if (mask & (AT_ATIME | AT_MTIME)) { 2960185029Spjd if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 2961185029Spjd ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 2962185029Spjd ZFS_EXIT(zfsvfs); 2963249195Smm return (SET_ERROR(EOVERFLOW)); 2964185029Spjd } 2965185029Spjd } 2966316391Sasomers if (xoap && (mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME) && 2967316391Sasomers TIMESPEC_OVERFLOW(&vap->va_birthtime)) { 2968316391Sasomers ZFS_EXIT(zfsvfs); 2969316391Sasomers return (SET_ERROR(EOVERFLOW)); 2970316391Sasomers } 2971185029Spjd 2972168404Spjd attrzp = NULL; 2973219089Spjd aclp = NULL; 2974168404Spjd 2975211932Smm /* Can this be moved to before the top label? */ 2976168404Spjd if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2977168404Spjd ZFS_EXIT(zfsvfs); 2978249195Smm return (SET_ERROR(EROFS)); 2979168404Spjd } 2980168404Spjd 2981168404Spjd /* 2982168404Spjd * First validate permissions 2983168404Spjd */ 2984168404Spjd 2985168404Spjd if (mask & AT_SIZE) { 2986168404Spjd /* 2987168404Spjd * XXX - Note, we are not providing any open 2988168404Spjd * mode flags here (like FNDELAY), so we may 2989168404Spjd * block if there are locks present... this 2990168404Spjd * should be addressed in openat(). 2991168404Spjd */ 2992185029Spjd /* XXX - would it be OK to generate a log record here? */ 2993185029Spjd err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 2994168404Spjd if (err) { 2995168404Spjd ZFS_EXIT(zfsvfs); 2996168404Spjd return (err); 2997168404Spjd } 2998168404Spjd } 2999168404Spjd 3000185029Spjd if (mask & (AT_ATIME|AT_MTIME) || 3001185029Spjd ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 3002185029Spjd XVA_ISSET_REQ(xvap, XAT_READONLY) || 3003185029Spjd XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 3004219089Spjd XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 3005219089Spjd XVA_ISSET_REQ(xvap, XAT_SPARSE) || 3006185029Spjd XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 3007219089Spjd XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 3008185029Spjd need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 3009185029Spjd skipaclchk, cr); 3010219089Spjd } 3011168404Spjd 3012168404Spjd if (mask & (AT_UID|AT_GID)) { 3013168404Spjd int idmask = (mask & (AT_UID|AT_GID)); 3014168404Spjd int take_owner; 3015168404Spjd int take_group; 3016168404Spjd 3017168404Spjd /* 3018168404Spjd * NOTE: even if a new mode is being set, 3019168404Spjd * we may clear S_ISUID/S_ISGID bits. 3020168404Spjd */ 3021168404Spjd 3022168404Spjd if (!(mask & AT_MODE)) 3023219089Spjd vap->va_mode = zp->z_mode; 3024168404Spjd 3025168404Spjd /* 3026168404Spjd * Take ownership or chgrp to group we are a member of 3027168404Spjd */ 3028168404Spjd 3029168404Spjd take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 3030185029Spjd take_group = (mask & AT_GID) && 3031185029Spjd zfs_groupmember(zfsvfs, vap->va_gid, cr); 3032168404Spjd 3033168404Spjd /* 3034168404Spjd * If both AT_UID and AT_GID are set then take_owner and 3035168404Spjd * take_group must both be set in order to allow taking 3036168404Spjd * ownership. 3037168404Spjd * 3038168404Spjd * Otherwise, send the check through secpolicy_vnode_setattr() 3039168404Spjd * 3040168404Spjd */ 3041168404Spjd 3042168404Spjd if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 3043168404Spjd ((idmask == AT_UID) && take_owner) || 3044168404Spjd ((idmask == AT_GID) && take_group)) { 3045185029Spjd if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 3046185029Spjd skipaclchk, cr) == 0) { 3047168404Spjd /* 3048168404Spjd * Remove setuid/setgid for non-privileged users 3049168404Spjd */ 3050185029Spjd secpolicy_setid_clear(vap, vp, cr); 3051168404Spjd trim_mask = (mask & (AT_UID|AT_GID)); 3052168404Spjd } else { 3053168404Spjd need_policy = TRUE; 3054168404Spjd } 3055168404Spjd } else { 3056168404Spjd need_policy = TRUE; 3057168404Spjd } 3058168404Spjd } 3059168404Spjd 3060219089Spjd oldva.va_mode = zp->z_mode; 3061185029Spjd zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 3062185029Spjd if (mask & AT_XVATTR) { 3063209962Smm /* 3064209962Smm * Update xvattr mask to include only those attributes 3065209962Smm * that are actually changing. 3066209962Smm * 3067209962Smm * the bits will be restored prior to actually setting 3068209962Smm * the attributes so the caller thinks they were set. 3069209962Smm */ 3070209962Smm if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3071209962Smm if (xoap->xoa_appendonly != 3072219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 3073209962Smm need_policy = TRUE; 3074209962Smm } else { 3075209962Smm XVA_CLR_REQ(xvap, XAT_APPENDONLY); 3076209962Smm XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 3077209962Smm } 3078209962Smm } 3079209962Smm 3080209962Smm if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3081209962Smm if (xoap->xoa_nounlink != 3082219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 3083209962Smm need_policy = TRUE; 3084209962Smm } else { 3085209962Smm XVA_CLR_REQ(xvap, XAT_NOUNLINK); 3086209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 3087209962Smm } 3088209962Smm } 3089209962Smm 3090209962Smm if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3091209962Smm if (xoap->xoa_immutable != 3092219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 3093209962Smm need_policy = TRUE; 3094209962Smm } else { 3095209962Smm XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 3096209962Smm XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 3097209962Smm } 3098209962Smm } 3099209962Smm 3100209962Smm if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3101209962Smm if (xoap->xoa_nodump != 3102219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0)) { 3103209962Smm need_policy = TRUE; 3104209962Smm } else { 3105209962Smm XVA_CLR_REQ(xvap, XAT_NODUMP); 3106209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 3107209962Smm } 3108209962Smm } 3109209962Smm 3110209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3111209962Smm if (xoap->xoa_av_modified != 3112219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 3113209962Smm need_policy = TRUE; 3114209962Smm } else { 3115209962Smm XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 3116209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 3117209962Smm } 3118209962Smm } 3119209962Smm 3120209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3121209962Smm if ((vp->v_type != VREG && 3122209962Smm xoap->xoa_av_quarantined) || 3123209962Smm xoap->xoa_av_quarantined != 3124219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 3125209962Smm need_policy = TRUE; 3126209962Smm } else { 3127209962Smm XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 3128209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 3129209962Smm } 3130209962Smm } 3131209962Smm 3132219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3133219089Spjd ZFS_EXIT(zfsvfs); 3134249195Smm return (SET_ERROR(EPERM)); 3135219089Spjd } 3136219089Spjd 3137209962Smm if (need_policy == FALSE && 3138209962Smm (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3139209962Smm XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3140185029Spjd need_policy = TRUE; 3141185029Spjd } 3142185029Spjd } 3143185029Spjd 3144168404Spjd if (mask & AT_MODE) { 3145185029Spjd if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3146168962Spjd err = secpolicy_setid_setsticky_clear(vp, vap, 3147168962Spjd &oldva, cr); 3148168962Spjd if (err) { 3149168962Spjd ZFS_EXIT(zfsvfs); 3150168962Spjd return (err); 3151168962Spjd } 3152168404Spjd trim_mask |= AT_MODE; 3153168404Spjd } else { 3154168404Spjd need_policy = TRUE; 3155168404Spjd } 3156168404Spjd } 3157168404Spjd 3158168404Spjd if (need_policy) { 3159168404Spjd /* 3160168404Spjd * If trim_mask is set then take ownership 3161168404Spjd * has been granted or write_acl is present and user 3162168404Spjd * has the ability to modify mode. In that case remove 3163168404Spjd * UID|GID and or MODE from mask so that 3164168404Spjd * secpolicy_vnode_setattr() doesn't revoke it. 3165168404Spjd */ 3166168404Spjd 3167168404Spjd if (trim_mask) { 3168168404Spjd saved_mask = vap->va_mask; 3169168404Spjd vap->va_mask &= ~trim_mask; 3170197831Spjd if (trim_mask & AT_MODE) { 3171197831Spjd /* 3172197831Spjd * Save the mode, as secpolicy_vnode_setattr() 3173197831Spjd * will overwrite it with ova.va_mode. 3174197831Spjd */ 3175197831Spjd saved_mode = vap->va_mode; 3176197831Spjd } 3177168404Spjd } 3178168404Spjd err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3179185029Spjd (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3180168404Spjd if (err) { 3181168404Spjd ZFS_EXIT(zfsvfs); 3182168404Spjd return (err); 3183168404Spjd } 3184168404Spjd 3185197831Spjd if (trim_mask) { 3186168404Spjd vap->va_mask |= saved_mask; 3187197831Spjd if (trim_mask & AT_MODE) { 3188197831Spjd /* 3189197831Spjd * Recover the mode after 3190197831Spjd * secpolicy_vnode_setattr(). 3191197831Spjd */ 3192197831Spjd vap->va_mode = saved_mode; 3193197831Spjd } 3194197831Spjd } 3195168404Spjd } 3196168404Spjd 3197168404Spjd /* 3198168404Spjd * secpolicy_vnode_setattr, or take ownership may have 3199168404Spjd * changed va_mask 3200168404Spjd */ 3201168404Spjd mask = vap->va_mask; 3202168404Spjd 3203219089Spjd if ((mask & (AT_UID | AT_GID))) { 3204219089Spjd err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 3205219089Spjd &xattr_obj, sizeof (xattr_obj)); 3206168404Spjd 3207219089Spjd if (err == 0 && xattr_obj) { 3208219089Spjd err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 3209306818Savg if (err == 0) { 3210306818Savg err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE); 3211306818Savg if (err != 0) 3212306818Savg vrele(ZTOV(attrzp)); 3213306818Savg } 3214209962Smm if (err) 3215219089Spjd goto out2; 3216168404Spjd } 3217209962Smm if (mask & AT_UID) { 3218209962Smm new_uid = zfs_fuid_create(zfsvfs, 3219209962Smm (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3220219089Spjd if (new_uid != zp->z_uid && 3221219089Spjd zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 3222219089Spjd if (attrzp) 3223306818Savg vput(ZTOV(attrzp)); 3224249195Smm err = SET_ERROR(EDQUOT); 3225219089Spjd goto out2; 3226209962Smm } 3227209962Smm } 3228209962Smm 3229209962Smm if (mask & AT_GID) { 3230209962Smm new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3231209962Smm cr, ZFS_GROUP, &fuidp); 3232219089Spjd if (new_gid != zp->z_gid && 3233219089Spjd zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 3234219089Spjd if (attrzp) 3235306818Savg vput(ZTOV(attrzp)); 3236249195Smm err = SET_ERROR(EDQUOT); 3237219089Spjd goto out2; 3238209962Smm } 3239209962Smm } 3240219089Spjd } 3241219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 3242219089Spjd 3243219089Spjd if (mask & AT_MODE) { 3244219089Spjd uint64_t pmode = zp->z_mode; 3245219089Spjd uint64_t acl_obj; 3246219089Spjd new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3247219089Spjd 3248243560Smm if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 3249243560Smm !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3250249195Smm err = SET_ERROR(EPERM); 3251243560Smm goto out; 3252243560Smm } 3253243560Smm 3254224174Smm if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3255224174Smm goto out; 3256219089Spjd 3257219089Spjd if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 3258219089Spjd /* 3259219089Spjd * Are we upgrading ACL from old V0 format 3260219089Spjd * to V1 format? 3261219089Spjd */ 3262219089Spjd if (zfsvfs->z_version >= ZPL_VERSION_FUID && 3263219089Spjd zfs_znode_acl_version(zp) == 3264219089Spjd ZFS_ACL_VERSION_INITIAL) { 3265219089Spjd dmu_tx_hold_free(tx, acl_obj, 0, 3266219089Spjd DMU_OBJECT_END); 3267219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3268219089Spjd 0, aclp->z_acl_bytes); 3269209962Smm } else { 3270219089Spjd dmu_tx_hold_write(tx, acl_obj, 0, 3271219089Spjd aclp->z_acl_bytes); 3272209962Smm } 3273219089Spjd } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3274219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3275219089Spjd 0, aclp->z_acl_bytes); 3276209962Smm } 3277219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3278219089Spjd } else { 3279219089Spjd if ((mask & AT_XVATTR) && 3280219089Spjd XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3281219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3282219089Spjd else 3283219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3284168404Spjd } 3285168404Spjd 3286219089Spjd if (attrzp) { 3287219089Spjd dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3288219089Spjd } 3289219089Spjd 3290219089Spjd fuid_dirtied = zfsvfs->z_fuid_dirty; 3291219089Spjd if (fuid_dirtied) 3292219089Spjd zfs_fuid_txhold(zfsvfs, tx); 3293219089Spjd 3294219089Spjd zfs_sa_upgrade_txholds(tx, zp); 3295219089Spjd 3296258720Savg err = dmu_tx_assign(tx, TXG_WAIT); 3297258720Savg if (err) 3298209962Smm goto out; 3299168404Spjd 3300219089Spjd count = 0; 3301168404Spjd /* 3302168404Spjd * Set each attribute requested. 3303168404Spjd * We group settings according to the locks they need to acquire. 3304168404Spjd * 3305168404Spjd * Note: you cannot set ctime directly, although it will be 3306168404Spjd * updated as a side-effect of calling this function. 3307168404Spjd */ 3308168404Spjd 3309219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3310219089Spjd mutex_enter(&zp->z_acl_lock); 3311168404Spjd 3312219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3313219089Spjd &zp->z_pflags, sizeof (zp->z_pflags)); 3314219089Spjd 3315219089Spjd if (attrzp) { 3316219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3317219089Spjd mutex_enter(&attrzp->z_acl_lock); 3318219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3319219089Spjd SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3320219089Spjd sizeof (attrzp->z_pflags)); 3321219089Spjd } 3322219089Spjd 3323219089Spjd if (mask & (AT_UID|AT_GID)) { 3324219089Spjd 3325219089Spjd if (mask & AT_UID) { 3326219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 3327219089Spjd &new_uid, sizeof (new_uid)); 3328219089Spjd zp->z_uid = new_uid; 3329219089Spjd if (attrzp) { 3330219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3331219089Spjd SA_ZPL_UID(zfsvfs), NULL, &new_uid, 3332219089Spjd sizeof (new_uid)); 3333219089Spjd attrzp->z_uid = new_uid; 3334219089Spjd } 3335219089Spjd } 3336219089Spjd 3337219089Spjd if (mask & AT_GID) { 3338219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 3339219089Spjd NULL, &new_gid, sizeof (new_gid)); 3340219089Spjd zp->z_gid = new_gid; 3341219089Spjd if (attrzp) { 3342219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3343219089Spjd SA_ZPL_GID(zfsvfs), NULL, &new_gid, 3344219089Spjd sizeof (new_gid)); 3345219089Spjd attrzp->z_gid = new_gid; 3346219089Spjd } 3347219089Spjd } 3348219089Spjd if (!(mask & AT_MODE)) { 3349219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 3350219089Spjd NULL, &new_mode, sizeof (new_mode)); 3351219089Spjd new_mode = zp->z_mode; 3352219089Spjd } 3353219089Spjd err = zfs_acl_chown_setattr(zp); 3354219089Spjd ASSERT(err == 0); 3355219089Spjd if (attrzp) { 3356219089Spjd err = zfs_acl_chown_setattr(attrzp); 3357219089Spjd ASSERT(err == 0); 3358219089Spjd } 3359219089Spjd } 3360219089Spjd 3361168404Spjd if (mask & AT_MODE) { 3362219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 3363219089Spjd &new_mode, sizeof (new_mode)); 3364219089Spjd zp->z_mode = new_mode; 3365219089Spjd ASSERT3U((uintptr_t)aclp, !=, 0); 3366209962Smm err = zfs_aclset_common(zp, aclp, cr, tx); 3367240415Smm ASSERT0(err); 3368219089Spjd if (zp->z_acl_cached) 3369219089Spjd zfs_acl_free(zp->z_acl_cached); 3370211932Smm zp->z_acl_cached = aclp; 3371211932Smm aclp = NULL; 3372168404Spjd } 3373168404Spjd 3374168404Spjd 3375219089Spjd if (mask & AT_ATIME) { 3376219089Spjd ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 3377219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 3378219089Spjd &zp->z_atime, sizeof (zp->z_atime)); 3379168404Spjd } 3380168404Spjd 3381219089Spjd if (mask & AT_MTIME) { 3382219089Spjd ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 3383219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 3384219089Spjd mtime, sizeof (mtime)); 3385168404Spjd } 3386168404Spjd 3387185029Spjd /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3388219089Spjd if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3389219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3390219089Spjd NULL, mtime, sizeof (mtime)); 3391219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3392219089Spjd &ctime, sizeof (ctime)); 3393219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 3394219089Spjd B_TRUE); 3395219089Spjd } else if (mask != 0) { 3396219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3397219089Spjd &ctime, sizeof (ctime)); 3398219089Spjd zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 3399219089Spjd B_TRUE); 3400219089Spjd if (attrzp) { 3401219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3402219089Spjd SA_ZPL_CTIME(zfsvfs), NULL, 3403219089Spjd &ctime, sizeof (ctime)); 3404219089Spjd zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 3405219089Spjd mtime, ctime, B_TRUE); 3406219089Spjd } 3407219089Spjd } 3408185029Spjd /* 3409185029Spjd * Do this after setting timestamps to prevent timestamp 3410185029Spjd * update from toggling bit 3411185029Spjd */ 3412168404Spjd 3413185029Spjd if (xoap && (mask & AT_XVATTR)) { 3414209962Smm 3415316391Sasomers if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) 3416316391Sasomers xoap->xoa_createtime = vap->va_birthtime; 3417209962Smm /* 3418209962Smm * restore trimmed off masks 3419209962Smm * so that return masks can be set for caller. 3420209962Smm */ 3421209962Smm 3422209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3423209962Smm XVA_SET_REQ(xvap, XAT_APPENDONLY); 3424209962Smm } 3425209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3426209962Smm XVA_SET_REQ(xvap, XAT_NOUNLINK); 3427209962Smm } 3428209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3429209962Smm XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3430209962Smm } 3431209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3432209962Smm XVA_SET_REQ(xvap, XAT_NODUMP); 3433209962Smm } 3434209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3435209962Smm XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3436209962Smm } 3437209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3438209962Smm XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3439209962Smm } 3440209962Smm 3441219089Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3442185029Spjd ASSERT(vp->v_type == VREG); 3443185029Spjd 3444219089Spjd zfs_xvattr_set(zp, xvap, tx); 3445185029Spjd } 3446185029Spjd 3447209962Smm if (fuid_dirtied) 3448209962Smm zfs_fuid_sync(zfsvfs, tx); 3449209962Smm 3450168404Spjd if (mask != 0) 3451185029Spjd zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3452168404Spjd 3453219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3454219089Spjd mutex_exit(&zp->z_acl_lock); 3455168404Spjd 3456219089Spjd if (attrzp) { 3457219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3458219089Spjd mutex_exit(&attrzp->z_acl_lock); 3459219089Spjd } 3460209962Smmout: 3461219089Spjd if (err == 0 && attrzp) { 3462219089Spjd err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 3463219089Spjd xattr_count, tx); 3464219089Spjd ASSERT(err2 == 0); 3465219089Spjd } 3466219089Spjd 3467168404Spjd if (attrzp) 3468306818Savg vput(ZTOV(attrzp)); 3469251631Sdelphij 3470211932Smm if (aclp) 3471209962Smm zfs_acl_free(aclp); 3472168404Spjd 3473209962Smm if (fuidp) { 3474209962Smm zfs_fuid_info_free(fuidp); 3475209962Smm fuidp = NULL; 3476209962Smm } 3477209962Smm 3478219089Spjd if (err) { 3479209962Smm dmu_tx_abort(tx); 3480219089Spjd } else { 3481219089Spjd err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 3482209962Smm dmu_tx_commit(tx); 3483219089Spjd } 3484209962Smm 3485219089Spjdout2: 3486219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3487219089Spjd zil_commit(zilog, 0); 3488209962Smm 3489168404Spjd ZFS_EXIT(zfsvfs); 3490168404Spjd return (err); 3491168404Spjd} 3492168404Spjd 3493168404Spjd/* 3494303970Savg * We acquire all but fdvp locks using non-blocking acquisitions. If we 3495303970Savg * fail to acquire any lock in the path we will drop all held locks, 3496303970Savg * acquire the new lock in a blocking fashion, and then release it and 3497303970Savg * restart the rename. This acquire/release step ensures that we do not 3498303970Savg * spin on a lock waiting for release. On error release all vnode locks 3499303970Savg * and decrement references the way tmpfs_rename() would do. 3500168404Spjd */ 3501303970Savgstatic int 3502303970Savgzfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, 3503303970Savg struct vnode *tdvp, struct vnode **tvpp, 3504303970Savg const struct componentname *scnp, const struct componentname *tcnp) 3505168404Spjd{ 3506303970Savg zfsvfs_t *zfsvfs; 3507303970Savg struct vnode *nvp, *svp, *tvp; 3508303970Savg znode_t *sdzp, *tdzp, *szp, *tzp; 3509303970Savg const char *snm = scnp->cn_nameptr; 3510303970Savg const char *tnm = tcnp->cn_nameptr; 3511303970Savg int error; 3512168404Spjd 3513303970Savg VOP_UNLOCK(tdvp, 0); 3514303970Savg if (*tvpp != NULL && *tvpp != tdvp) 3515303970Savg VOP_UNLOCK(*tvpp, 0); 3516303970Savg 3517303970Savgrelock: 3518303970Savg error = vn_lock(sdvp, LK_EXCLUSIVE); 3519303970Savg if (error) 3520303970Savg goto out; 3521303970Savg sdzp = VTOZ(sdvp); 3522303970Savg 3523303970Savg error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT); 3524303970Savg if (error != 0) { 3525303970Savg VOP_UNLOCK(sdvp, 0); 3526303970Savg if (error != EBUSY) 3527303970Savg goto out; 3528303970Savg error = vn_lock(tdvp, LK_EXCLUSIVE); 3529303970Savg if (error) 3530303970Savg goto out; 3531303970Savg VOP_UNLOCK(tdvp, 0); 3532303970Savg goto relock; 3533168404Spjd } 3534303970Savg tdzp = VTOZ(tdvp); 3535168404Spjd 3536303970Savg /* 3537303970Savg * Before using sdzp and tdzp we must ensure that they are live. 3538303970Savg * As a porting legacy from illumos we have two things to worry 3539303970Savg * about. One is typical for FreeBSD and it is that the vnode is 3540303970Savg * not reclaimed (doomed). The other is that the znode is live. 3541303970Savg * The current code can invalidate the znode without acquiring the 3542303970Savg * corresponding vnode lock if the object represented by the znode 3543303970Savg * and vnode is no longer valid after a rollback or receive operation. 3544303970Savg * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock 3545303970Savg * that protects the znodes from the invalidation. 3546303970Savg */ 3547303970Savg zfsvfs = sdzp->z_zfsvfs; 3548303970Savg ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs); 3549303970Savg ZFS_ENTER(zfsvfs); 3550168404Spjd 3551168404Spjd /* 3552303970Savg * We can not use ZFS_VERIFY_ZP() here because it could directly return 3553303970Savg * bypassing the cleanup code in the case of an error. 3554168404Spjd */ 3555303970Savg if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 3556303970Savg ZFS_EXIT(zfsvfs); 3557303970Savg VOP_UNLOCK(sdvp, 0); 3558303970Savg VOP_UNLOCK(tdvp, 0); 3559303970Savg error = SET_ERROR(EIO); 3560303970Savg goto out; 3561303970Savg } 3562303970Savg 3563303970Savg /* 3564303970Savg * Re-resolve svp to be certain it still exists and fetch the 3565303970Savg * correct vnode. 3566303970Savg */ 3567303970Savg error = zfs_dirent_lookup(sdzp, snm, &szp, ZEXISTS); 3568303970Savg if (error != 0) { 3569303970Savg /* Source entry invalid or not there. */ 3570303970Savg ZFS_EXIT(zfsvfs); 3571303970Savg VOP_UNLOCK(sdvp, 0); 3572303970Savg VOP_UNLOCK(tdvp, 0); 3573303970Savg if ((scnp->cn_flags & ISDOTDOT) != 0 || 3574303970Savg (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.')) 3575303970Savg error = SET_ERROR(EINVAL); 3576303970Savg goto out; 3577303970Savg } 3578303970Savg svp = ZTOV(szp); 3579303970Savg 3580303970Savg /* 3581303970Savg * Re-resolve tvp, if it disappeared we just carry on. 3582303970Savg */ 3583303970Savg error = zfs_dirent_lookup(tdzp, tnm, &tzp, 0); 3584303970Savg if (error != 0) { 3585303970Savg ZFS_EXIT(zfsvfs); 3586303970Savg VOP_UNLOCK(sdvp, 0); 3587303970Savg VOP_UNLOCK(tdvp, 0); 3588303970Savg vrele(svp); 3589303970Savg if ((tcnp->cn_flags & ISDOTDOT) != 0) 3590303970Savg error = SET_ERROR(EINVAL); 3591303970Savg goto out; 3592303970Savg } 3593303970Savg if (tzp != NULL) 3594303970Savg tvp = ZTOV(tzp); 3595303970Savg else 3596303970Savg tvp = NULL; 3597303970Savg 3598303970Savg /* 3599303970Savg * At present the vnode locks must be acquired before z_teardown_lock, 3600303970Savg * although it would be more logical to use the opposite order. 3601303970Savg */ 3602303970Savg ZFS_EXIT(zfsvfs); 3603303970Savg 3604303970Savg /* 3605303970Savg * Now try acquire locks on svp and tvp. 3606303970Savg */ 3607303970Savg nvp = svp; 3608303970Savg error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3609303970Savg if (error != 0) { 3610303970Savg VOP_UNLOCK(sdvp, 0); 3611303970Savg VOP_UNLOCK(tdvp, 0); 3612303970Savg if (tvp != NULL) 3613303970Savg vrele(tvp); 3614303970Savg if (error != EBUSY) { 3615303970Savg vrele(nvp); 3616303970Savg goto out; 3617303970Savg } 3618303970Savg error = vn_lock(nvp, LK_EXCLUSIVE); 3619303970Savg if (error != 0) { 3620303970Savg vrele(nvp); 3621303970Savg goto out; 3622303970Savg } 3623303970Savg VOP_UNLOCK(nvp, 0); 3624303970Savg /* 3625303970Savg * Concurrent rename race. 3626303970Savg * XXX ? 3627303970Savg */ 3628303970Savg if (nvp == tdvp) { 3629303970Savg vrele(nvp); 3630303970Savg error = SET_ERROR(EINVAL); 3631303970Savg goto out; 3632303970Savg } 3633303970Savg vrele(*svpp); 3634303970Savg *svpp = nvp; 3635303970Savg goto relock; 3636303970Savg } 3637303970Savg vrele(*svpp); 3638303970Savg *svpp = nvp; 3639303970Savg 3640303970Savg if (*tvpp != NULL) 3641303970Savg vrele(*tvpp); 3642303970Savg *tvpp = NULL; 3643303970Savg if (tvp != NULL) { 3644303970Savg nvp = tvp; 3645303970Savg error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3646303970Savg if (error != 0) { 3647303970Savg VOP_UNLOCK(sdvp, 0); 3648303970Savg VOP_UNLOCK(tdvp, 0); 3649303970Savg VOP_UNLOCK(*svpp, 0); 3650303970Savg if (error != EBUSY) { 3651303970Savg vrele(nvp); 3652303970Savg goto out; 3653168404Spjd } 3654303970Savg error = vn_lock(nvp, LK_EXCLUSIVE); 3655303970Savg if (error != 0) { 3656303970Savg vrele(nvp); 3657303970Savg goto out; 3658303970Savg } 3659303970Savg vput(nvp); 3660303970Savg goto relock; 3661168404Spjd } 3662303970Savg *tvpp = nvp; 3663303970Savg } 3664168404Spjd 3665303970Savg return (0); 3666168404Spjd 3667303970Savgout: 3668303970Savg return (error); 3669303970Savg} 3670168404Spjd 3671303970Savg/* 3672303970Savg * Note that we must use VRELE_ASYNC in this function as it walks 3673303970Savg * up the directory tree and vrele may need to acquire an exclusive 3674303970Savg * lock if a last reference to a vnode is dropped. 3675303970Savg */ 3676303970Savgstatic int 3677303970Savgzfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp) 3678303970Savg{ 3679303970Savg zfsvfs_t *zfsvfs; 3680303970Savg znode_t *zp, *zp1; 3681303970Savg uint64_t parent; 3682303970Savg int error; 3683168404Spjd 3684303970Savg zfsvfs = tdzp->z_zfsvfs; 3685303970Savg if (tdzp == szp) 3686303970Savg return (SET_ERROR(EINVAL)); 3687303970Savg if (tdzp == sdzp) 3688303970Savg return (0); 3689303970Savg if (tdzp->z_id == zfsvfs->z_root) 3690303970Savg return (0); 3691303970Savg zp = tdzp; 3692303970Savg for (;;) { 3693303970Savg ASSERT(!zp->z_unlinked); 3694303970Savg if ((error = sa_lookup(zp->z_sa_hdl, 3695303970Savg SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 3696303970Savg break; 3697303970Savg 3698303970Savg if (parent == szp->z_id) { 3699303970Savg error = SET_ERROR(EINVAL); 3700303970Savg break; 3701168404Spjd } 3702303970Savg if (parent == zfsvfs->z_root) 3703303970Savg break; 3704303970Savg if (parent == sdzp->z_id) 3705303970Savg break; 3706168404Spjd 3707303970Savg error = zfs_zget(zfsvfs, parent, &zp1); 3708303970Savg if (error != 0) 3709303970Savg break; 3710168404Spjd 3711303970Savg if (zp != tdzp) 3712303970Savg VN_RELE_ASYNC(ZTOV(zp), 3713303970Savg dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 3714303970Savg zp = zp1; 3715303970Savg } 3716303970Savg 3717303970Savg if (error == ENOTDIR) 3718303970Savg panic("checkpath: .. not a directory\n"); 3719303970Savg if (zp != tdzp) 3720303970Savg VN_RELE_ASYNC(ZTOV(zp), 3721303970Savg dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 3722303970Savg return (error); 3723168404Spjd} 3724168404Spjd 3725168404Spjd/* 3726168404Spjd * Move an entry from the provided source directory to the target 3727168404Spjd * directory. Change the entry name as indicated. 3728168404Spjd * 3729168404Spjd * IN: sdvp - Source directory containing the "old entry". 3730168404Spjd * snm - Old entry name. 3731168404Spjd * tdvp - Target directory to contain the "new entry". 3732168404Spjd * tnm - New entry name. 3733168404Spjd * cr - credentials of caller. 3734185029Spjd * ct - caller context 3735185029Spjd * flags - case flags 3736168404Spjd * 3737251631Sdelphij * RETURN: 0 on success, error code on failure. 3738168404Spjd * 3739168404Spjd * Timestamps: 3740168404Spjd * sdvp,tdvp - ctime|mtime updated 3741168404Spjd */ 3742185029Spjd/*ARGSUSED*/ 3743168404Spjdstatic int 3744303970Savgzfs_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 3745303970Savg vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 3746303970Savg cred_t *cr) 3747168404Spjd{ 3748303970Savg zfsvfs_t *zfsvfs; 3749303970Savg znode_t *sdzp, *tdzp, *szp, *tzp; 3750303970Savg zilog_t *zilog = NULL; 3751168404Spjd dmu_tx_t *tx; 3752303970Savg char *snm = scnp->cn_nameptr; 3753303970Savg char *tnm = tcnp->cn_nameptr; 3754185029Spjd int error = 0; 3755168404Spjd 3756303970Savg /* Reject renames across filesystems. */ 3757303970Savg if ((*svpp)->v_mount != tdvp->v_mount || 3758303970Savg ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) { 3759303970Savg error = SET_ERROR(EXDEV); 3760303970Savg goto out; 3761303970Savg } 3762168404Spjd 3763303970Savg if (zfsctl_is_node(tdvp)) { 3764303970Savg error = SET_ERROR(EXDEV); 3765303970Savg goto out; 3766303970Savg } 3767303970Savg 3768168962Spjd /* 3769303970Savg * Lock all four vnodes to ensure safety and semantics of renaming. 3770168962Spjd */ 3771303970Savg error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp); 3772303970Savg if (error != 0) { 3773303970Savg /* no vnodes are locked in the case of error here */ 3774303970Savg return (error); 3775264392Sdavide } 3776168962Spjd 3777303970Savg tdzp = VTOZ(tdvp); 3778303970Savg sdzp = VTOZ(sdvp); 3779303970Savg zfsvfs = tdzp->z_zfsvfs; 3780303970Savg zilog = zfsvfs->z_log; 3781303970Savg 3782254585Sdelphij /* 3783303970Savg * After we re-enter ZFS_ENTER() we will have to revalidate all 3784303970Savg * znodes involved. 3785254585Sdelphij */ 3786303970Savg ZFS_ENTER(zfsvfs); 3787168404Spjd 3788185029Spjd if (zfsvfs->z_utf8 && u8_validate(tnm, 3789185029Spjd strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3790303970Savg error = SET_ERROR(EILSEQ); 3791303970Savg goto unlockout; 3792185029Spjd } 3793185029Spjd 3794303970Savg /* If source and target are the same file, there is nothing to do. */ 3795303970Savg if ((*svpp) == (*tvpp)) { 3796303970Savg error = 0; 3797303970Savg goto unlockout; 3798303970Savg } 3799185029Spjd 3800303970Savg if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) || 3801303970Savg ((*tvpp) != NULL && (*tvpp)->v_type == VDIR && 3802303970Savg (*tvpp)->v_mountedhere != NULL)) { 3803303970Savg error = SET_ERROR(EXDEV); 3804303970Savg goto unlockout; 3805303970Savg } 3806168404Spjd 3807168404Spjd /* 3808303970Savg * We can not use ZFS_VERIFY_ZP() here because it could directly return 3809303970Savg * bypassing the cleanup code in the case of an error. 3810168404Spjd */ 3811303970Savg if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 3812303970Savg error = SET_ERROR(EIO); 3813303970Savg goto unlockout; 3814168404Spjd } 3815168404Spjd 3816303970Savg szp = VTOZ(*svpp); 3817303970Savg tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp); 3818303970Savg if (szp->z_sa_hdl == NULL || (tzp != NULL && tzp->z_sa_hdl == NULL)) { 3819303970Savg error = SET_ERROR(EIO); 3820303970Savg goto unlockout; 3821168962Spjd } 3822185029Spjd 3823208131Smm /* 3824303970Savg * This is to prevent the creation of links into attribute space 3825303970Savg * by renaming a linked file into/outof an attribute directory. 3826303970Savg * See the comment in zfs_link() for why this is considered bad. 3827208131Smm */ 3828303970Savg if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3829303970Savg error = SET_ERROR(EINVAL); 3830303970Savg goto unlockout; 3831208131Smm } 3832208131Smm 3833168404Spjd /* 3834168404Spjd * Must have write access at the source to remove the old entry 3835168404Spjd * and write access at the target to create the new entry. 3836168404Spjd * Note that if target and source are the same, this can be 3837168404Spjd * done in a single check. 3838168404Spjd */ 3839168404Spjd if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3840303970Savg goto unlockout; 3841168404Spjd 3842303970Savg if ((*svpp)->v_type == VDIR) { 3843168404Spjd /* 3844303970Savg * Avoid ".", "..", and aliases of "." for obvious reasons. 3845303970Savg */ 3846303970Savg if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') || 3847303970Savg sdzp == szp || 3848303970Savg (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { 3849303970Savg error = EINVAL; 3850303970Savg goto unlockout; 3851303970Savg } 3852303970Savg 3853303970Savg /* 3854168404Spjd * Check to make sure rename is valid. 3855168404Spjd * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3856168404Spjd */ 3857303970Savg if (error = zfs_rename_check(szp, sdzp, tdzp)) 3858303970Savg goto unlockout; 3859168404Spjd } 3860168404Spjd 3861168404Spjd /* 3862168404Spjd * Does target exist? 3863168404Spjd */ 3864168404Spjd if (tzp) { 3865168404Spjd /* 3866168404Spjd * Source and target must be the same type. 3867168404Spjd */ 3868303970Savg if ((*svpp)->v_type == VDIR) { 3869303970Savg if ((*tvpp)->v_type != VDIR) { 3870249195Smm error = SET_ERROR(ENOTDIR); 3871303970Savg goto unlockout; 3872303970Savg } else { 3873303970Savg cache_purge(tdvp); 3874303970Savg if (sdvp != tdvp) 3875303970Savg cache_purge(sdvp); 3876168404Spjd } 3877168404Spjd } else { 3878303970Savg if ((*tvpp)->v_type == VDIR) { 3879249195Smm error = SET_ERROR(EISDIR); 3880303970Savg goto unlockout; 3881168404Spjd } 3882168404Spjd } 3883168404Spjd } 3884168404Spjd 3885303970Savg vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct); 3886168962Spjd if (tzp) 3887303970Savg vnevent_rename_dest(*tvpp, tdvp, tnm, ct); 3888168962Spjd 3889185029Spjd /* 3890185029Spjd * notify the target directory if it is not the same 3891185029Spjd * as source directory. 3892185029Spjd */ 3893185029Spjd if (tdvp != sdvp) { 3894185029Spjd vnevent_rename_dest_dir(tdvp, ct); 3895185029Spjd } 3896185029Spjd 3897168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 3898219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3899219089Spjd dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3900168404Spjd dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3901168404Spjd dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3902219089Spjd if (sdzp != tdzp) { 3903219089Spjd dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 3904219089Spjd zfs_sa_upgrade_txholds(tx, tdzp); 3905219089Spjd } 3906219089Spjd if (tzp) { 3907219089Spjd dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 3908219089Spjd zfs_sa_upgrade_txholds(tx, tzp); 3909219089Spjd } 3910219089Spjd 3911219089Spjd zfs_sa_upgrade_txholds(tx, szp); 3912168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3913303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 3914168404Spjd if (error) { 3915168404Spjd dmu_tx_abort(tx); 3916303970Savg goto unlockout; 3917168404Spjd } 3918168404Spjd 3919303970Savg 3920168404Spjd if (tzp) /* Attempt to remove the existing target */ 3921303970Savg error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL); 3922168404Spjd 3923168404Spjd if (error == 0) { 3924303970Savg error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING); 3925168404Spjd if (error == 0) { 3926219089Spjd szp->z_pflags |= ZFS_AV_MODIFIED; 3927185029Spjd 3928219089Spjd error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 3929219089Spjd (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3930240415Smm ASSERT0(error); 3931219089Spjd 3932303970Savg error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING, 3933303970Savg NULL); 3934219089Spjd if (error == 0) { 3935303970Savg zfs_log_rename(zilog, tx, TX_RENAME, sdzp, 3936303970Savg snm, tdzp, tnm, szp); 3937185029Spjd 3938219089Spjd /* 3939219089Spjd * Update path information for the target vnode 3940219089Spjd */ 3941303970Savg vn_renamepath(tdvp, *svpp, tnm, strlen(tnm)); 3942219089Spjd } else { 3943219089Spjd /* 3944219089Spjd * At this point, we have successfully created 3945219089Spjd * the target name, but have failed to remove 3946219089Spjd * the source name. Since the create was done 3947219089Spjd * with the ZRENAMING flag, there are 3948219089Spjd * complications; for one, the link count is 3949219089Spjd * wrong. The easiest way to deal with this 3950219089Spjd * is to remove the newly created target, and 3951219089Spjd * return the original error. This must 3952219089Spjd * succeed; fortunately, it is very unlikely to 3953219089Spjd * fail, since we just created it. 3954219089Spjd */ 3955303970Savg VERIFY3U(zfs_link_destroy(tdzp, tnm, szp, tx, 3956219089Spjd ZRENAMING, NULL), ==, 0); 3957219089Spjd } 3958168404Spjd } 3959168404Spjd if (error == 0) { 3960303970Savg cache_purge(*svpp); 3961303970Savg if (*tvpp != NULL) 3962303970Savg cache_purge(*tvpp); 3963303970Savg cache_purge_negative(tdvp); 3964168404Spjd } 3965168404Spjd } 3966168404Spjd 3967168404Spjd dmu_tx_commit(tx); 3968168404Spjd 3969303970Savgunlockout: /* all 4 vnodes are locked, ZFS_ENTER called */ 3970303970Savg ZFS_EXIT(zfsvfs); 3971303970Savg VOP_UNLOCK(*svpp, 0); 3972303970Savg VOP_UNLOCK(sdvp, 0); 3973168404Spjd 3974303970Savgout: /* original two vnodes are locked */ 3975303970Savg if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3976219089Spjd zil_commit(zilog, 0); 3977219089Spjd 3978303970Savg if (*tvpp != NULL) 3979303970Savg VOP_UNLOCK(*tvpp, 0); 3980303970Savg if (tdvp != *tvpp) 3981303970Savg VOP_UNLOCK(tdvp, 0); 3982168404Spjd return (error); 3983168404Spjd} 3984168404Spjd 3985168404Spjd/* 3986168404Spjd * Insert the indicated symbolic reference entry into the directory. 3987168404Spjd * 3988168404Spjd * IN: dvp - Directory to contain new symbolic link. 3989168404Spjd * link - Name for new symlink entry. 3990168404Spjd * vap - Attributes of new entry. 3991168404Spjd * cr - credentials of caller. 3992185029Spjd * ct - caller context 3993185029Spjd * flags - case flags 3994168404Spjd * 3995251631Sdelphij * RETURN: 0 on success, error code on failure. 3996168404Spjd * 3997168404Spjd * Timestamps: 3998168404Spjd * dvp - ctime|mtime updated 3999168404Spjd */ 4000185029Spjd/*ARGSUSED*/ 4001168404Spjdstatic int 4002185029Spjdzfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 4003185029Spjd cred_t *cr, kthread_t *td) 4004168404Spjd{ 4005168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 4006168404Spjd dmu_tx_t *tx; 4007168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4008185029Spjd zilog_t *zilog; 4009219089Spjd uint64_t len = strlen(link); 4010168404Spjd int error; 4011209962Smm zfs_acl_ids_t acl_ids; 4012209962Smm boolean_t fuid_dirtied; 4013219089Spjd uint64_t txtype = TX_SYMLINK; 4014185029Spjd int flags = 0; 4015168404Spjd 4016168962Spjd ASSERT(vap->va_type == VLNK); 4017168404Spjd 4018168404Spjd ZFS_ENTER(zfsvfs); 4019185029Spjd ZFS_VERIFY_ZP(dzp); 4020185029Spjd zilog = zfsvfs->z_log; 4021185029Spjd 4022185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 4023185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4024185029Spjd ZFS_EXIT(zfsvfs); 4025249195Smm return (SET_ERROR(EILSEQ)); 4026185029Spjd } 4027168404Spjd 4028168404Spjd if (len > MAXPATHLEN) { 4029168404Spjd ZFS_EXIT(zfsvfs); 4030249195Smm return (SET_ERROR(ENAMETOOLONG)); 4031168404Spjd } 4032168404Spjd 4033219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, 4034219089Spjd vap, cr, NULL, &acl_ids)) != 0) { 4035219089Spjd ZFS_EXIT(zfsvfs); 4036219089Spjd return (error); 4037219089Spjd } 4038260704Savg 4039168404Spjd /* 4040168404Spjd * Attempt to lock directory; fail if entry already exists. 4041168404Spjd */ 4042303970Savg error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 4043185029Spjd if (error) { 4044219089Spjd zfs_acl_ids_free(&acl_ids); 4045168404Spjd ZFS_EXIT(zfsvfs); 4046168404Spjd return (error); 4047168404Spjd } 4048168404Spjd 4049219089Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4050219089Spjd zfs_acl_ids_free(&acl_ids); 4051219089Spjd ZFS_EXIT(zfsvfs); 4052219089Spjd return (error); 4053219089Spjd } 4054219089Spjd 4055209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 4056209962Smm zfs_acl_ids_free(&acl_ids); 4057209962Smm ZFS_EXIT(zfsvfs); 4058249195Smm return (SET_ERROR(EDQUOT)); 4059209962Smm } 4060303970Savg 4061303970Savg getnewvnode_reserve(1); 4062168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4063209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 4064168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 4065168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4066219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 4067219089Spjd ZFS_SA_BASE_ATTR_SIZE + len); 4068219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 4069219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 4070219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 4071219089Spjd acl_ids.z_aclp->z_acl_bytes); 4072219089Spjd } 4073209962Smm if (fuid_dirtied) 4074209962Smm zfs_fuid_txhold(zfsvfs, tx); 4075303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 4076168404Spjd if (error) { 4077219089Spjd zfs_acl_ids_free(&acl_ids); 4078168404Spjd dmu_tx_abort(tx); 4079260704Savg getnewvnode_drop_reserve(); 4080168404Spjd ZFS_EXIT(zfsvfs); 4081168404Spjd return (error); 4082168404Spjd } 4083168404Spjd 4084168404Spjd /* 4085168404Spjd * Create a new object for the symlink. 4086219089Spjd * for version 4 ZPL datsets the symlink will be an SA attribute 4087168404Spjd */ 4088219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 4089168404Spjd 4090219089Spjd if (fuid_dirtied) 4091219089Spjd zfs_fuid_sync(zfsvfs, tx); 4092209962Smm 4093219089Spjd if (zp->z_is_sa) 4094219089Spjd error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 4095219089Spjd link, len, tx); 4096219089Spjd else 4097219089Spjd zfs_sa_symlink(zp, link, len, tx); 4098168404Spjd 4099219089Spjd zp->z_size = len; 4100219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 4101219089Spjd &zp->z_size, sizeof (zp->z_size), tx); 4102168404Spjd /* 4103168404Spjd * Insert the new object into the directory. 4104168404Spjd */ 4105303970Savg (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 4106168404Spjd 4107219089Spjd zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 4108219089Spjd *vpp = ZTOV(zp); 4109219089Spjd 4110209962Smm zfs_acl_ids_free(&acl_ids); 4111209962Smm 4112168404Spjd dmu_tx_commit(tx); 4113168404Spjd 4114260704Savg getnewvnode_drop_reserve(); 4115260704Savg 4116219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4117219089Spjd zil_commit(zilog, 0); 4118219089Spjd 4119168404Spjd ZFS_EXIT(zfsvfs); 4120168404Spjd return (error); 4121168404Spjd} 4122168404Spjd 4123168404Spjd/* 4124168404Spjd * Return, in the buffer contained in the provided uio structure, 4125168404Spjd * the symbolic path referred to by vp. 4126168404Spjd * 4127168404Spjd * IN: vp - vnode of symbolic link. 4128251631Sdelphij * uio - structure to contain the link path. 4129168404Spjd * cr - credentials of caller. 4130185029Spjd * ct - caller context 4131168404Spjd * 4132251631Sdelphij * OUT: uio - structure containing the link path. 4133168404Spjd * 4134251631Sdelphij * RETURN: 0 on success, error code on failure. 4135168404Spjd * 4136168404Spjd * Timestamps: 4137168404Spjd * vp - atime updated 4138168404Spjd */ 4139168404Spjd/* ARGSUSED */ 4140168404Spjdstatic int 4141185029Spjdzfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 4142168404Spjd{ 4143168404Spjd znode_t *zp = VTOZ(vp); 4144168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4145168404Spjd int error; 4146168404Spjd 4147168404Spjd ZFS_ENTER(zfsvfs); 4148185029Spjd ZFS_VERIFY_ZP(zp); 4149168404Spjd 4150219089Spjd if (zp->z_is_sa) 4151219089Spjd error = sa_lookup_uio(zp->z_sa_hdl, 4152219089Spjd SA_ZPL_SYMLINK(zfsvfs), uio); 4153219089Spjd else 4154219089Spjd error = zfs_sa_readlink(zp, uio); 4155168404Spjd 4156168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4157219089Spjd 4158168404Spjd ZFS_EXIT(zfsvfs); 4159168404Spjd return (error); 4160168404Spjd} 4161168404Spjd 4162168404Spjd/* 4163168404Spjd * Insert a new entry into directory tdvp referencing svp. 4164168404Spjd * 4165168404Spjd * IN: tdvp - Directory to contain new entry. 4166168404Spjd * svp - vnode of new entry. 4167168404Spjd * name - name of new entry. 4168168404Spjd * cr - credentials of caller. 4169185029Spjd * ct - caller context 4170168404Spjd * 4171251631Sdelphij * RETURN: 0 on success, error code on failure. 4172168404Spjd * 4173168404Spjd * Timestamps: 4174168404Spjd * tdvp - ctime|mtime updated 4175168404Spjd * svp - ctime updated 4176168404Spjd */ 4177168404Spjd/* ARGSUSED */ 4178168404Spjdstatic int 4179185029Spjdzfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4180185029Spjd caller_context_t *ct, int flags) 4181168404Spjd{ 4182168404Spjd znode_t *dzp = VTOZ(tdvp); 4183168404Spjd znode_t *tzp, *szp; 4184168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4185185029Spjd zilog_t *zilog; 4186168404Spjd dmu_tx_t *tx; 4187168404Spjd int error; 4188212694Smm uint64_t parent; 4189185029Spjd uid_t owner; 4190168404Spjd 4191168404Spjd ASSERT(tdvp->v_type == VDIR); 4192168404Spjd 4193168404Spjd ZFS_ENTER(zfsvfs); 4194185029Spjd ZFS_VERIFY_ZP(dzp); 4195185029Spjd zilog = zfsvfs->z_log; 4196168404Spjd 4197212694Smm /* 4198212694Smm * POSIX dictates that we return EPERM here. 4199212694Smm * Better choices include ENOTSUP or EISDIR. 4200212694Smm */ 4201212694Smm if (svp->v_type == VDIR) { 4202168404Spjd ZFS_EXIT(zfsvfs); 4203249195Smm return (SET_ERROR(EPERM)); 4204212694Smm } 4205212694Smm 4206254585Sdelphij szp = VTOZ(svp); 4207254585Sdelphij ZFS_VERIFY_ZP(szp); 4208254585Sdelphij 4209258597Spjd if (szp->z_pflags & (ZFS_APPENDONLY | ZFS_IMMUTABLE | ZFS_READONLY)) { 4210258597Spjd ZFS_EXIT(zfsvfs); 4211258597Spjd return (SET_ERROR(EPERM)); 4212258597Spjd } 4213258597Spjd 4214212694Smm /* Prevent links to .zfs/shares files */ 4215212694Smm 4216219089Spjd if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4217219089Spjd &parent, sizeof (uint64_t))) != 0) { 4218212694Smm ZFS_EXIT(zfsvfs); 4219219089Spjd return (error); 4220219089Spjd } 4221219089Spjd if (parent == zfsvfs->z_shares_dir) { 4222219089Spjd ZFS_EXIT(zfsvfs); 4223249195Smm return (SET_ERROR(EPERM)); 4224212694Smm } 4225212694Smm 4226185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, 4227185029Spjd strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4228185029Spjd ZFS_EXIT(zfsvfs); 4229249195Smm return (SET_ERROR(EILSEQ)); 4230185029Spjd } 4231185029Spjd 4232168404Spjd /* 4233168404Spjd * We do not support links between attributes and non-attributes 4234168404Spjd * because of the potential security risk of creating links 4235168404Spjd * into "normal" file space in order to circumvent restrictions 4236168404Spjd * imposed in attribute space. 4237168404Spjd */ 4238219089Spjd if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4239168404Spjd ZFS_EXIT(zfsvfs); 4240249195Smm return (SET_ERROR(EINVAL)); 4241168404Spjd } 4242168404Spjd 4243168404Spjd 4244219089Spjd owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4245219089Spjd if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { 4246168404Spjd ZFS_EXIT(zfsvfs); 4247249195Smm return (SET_ERROR(EPERM)); 4248168404Spjd } 4249168404Spjd 4250185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4251168404Spjd ZFS_EXIT(zfsvfs); 4252168404Spjd return (error); 4253168404Spjd } 4254168404Spjd 4255168404Spjd /* 4256168404Spjd * Attempt to lock directory; fail if entry already exists. 4257168404Spjd */ 4258303970Savg error = zfs_dirent_lookup(dzp, name, &tzp, ZNEW); 4259185029Spjd if (error) { 4260168404Spjd ZFS_EXIT(zfsvfs); 4261168404Spjd return (error); 4262168404Spjd } 4263168404Spjd 4264168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4265219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4266168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4267219089Spjd zfs_sa_upgrade_txholds(tx, szp); 4268219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 4269303970Savg error = dmu_tx_assign(tx, TXG_WAIT); 4270168404Spjd if (error) { 4271168404Spjd dmu_tx_abort(tx); 4272168404Spjd ZFS_EXIT(zfsvfs); 4273168404Spjd return (error); 4274168404Spjd } 4275168404Spjd 4276303970Savg error = zfs_link_create(dzp, name, szp, tx, 0); 4277168404Spjd 4278185029Spjd if (error == 0) { 4279185029Spjd uint64_t txtype = TX_LINK; 4280185029Spjd zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4281185029Spjd } 4282168404Spjd 4283168404Spjd dmu_tx_commit(tx); 4284168404Spjd 4285185029Spjd if (error == 0) { 4286185029Spjd vnevent_link(svp, ct); 4287185029Spjd } 4288185029Spjd 4289219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4290219089Spjd zil_commit(zilog, 0); 4291219089Spjd 4292168404Spjd ZFS_EXIT(zfsvfs); 4293168404Spjd return (error); 4294168404Spjd} 4295168404Spjd 4296219089Spjd 4297185029Spjd/*ARGSUSED*/ 4298168962Spjdvoid 4299185029Spjdzfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4300168404Spjd{ 4301168962Spjd znode_t *zp = VTOZ(vp); 4302168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4303168962Spjd int error; 4304168404Spjd 4305185029Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4306219089Spjd if (zp->z_sa_hdl == NULL) { 4307185029Spjd /* 4308185029Spjd * The fs has been unmounted, or we did a 4309185029Spjd * suspend/resume and this file no longer exists. 4310185029Spjd */ 4311243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 4312234607Strasz vrecycle(vp); 4313243520Savg return; 4314243520Savg } 4315243520Savg 4316243520Savg if (zp->z_unlinked) { 4317243520Savg /* 4318243520Savg * Fast path to recycle a vnode of a removed file. 4319243520Savg */ 4320185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4321243520Savg vrecycle(vp); 4322168962Spjd return; 4323168404Spjd } 4324168404Spjd 4325168404Spjd if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4326168404Spjd dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4327168404Spjd 4328219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4329219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4330168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 4331168404Spjd if (error) { 4332168404Spjd dmu_tx_abort(tx); 4333168404Spjd } else { 4334219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 4335219089Spjd (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4336168404Spjd zp->z_atime_dirty = 0; 4337168404Spjd dmu_tx_commit(tx); 4338168404Spjd } 4339168404Spjd } 4340185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4341168404Spjd} 4342168404Spjd 4343219089Spjd 4344168404SpjdCTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 4345168404SpjdCTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 4346168404Spjd 4347185029Spjd/*ARGSUSED*/ 4348168404Spjdstatic int 4349185029Spjdzfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4350168404Spjd{ 4351168404Spjd znode_t *zp = VTOZ(vp); 4352168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4353185029Spjd uint32_t gen; 4354219089Spjd uint64_t gen64; 4355168404Spjd uint64_t object = zp->z_id; 4356168404Spjd zfid_short_t *zfid; 4357219089Spjd int size, i, error; 4358168404Spjd 4359168404Spjd ZFS_ENTER(zfsvfs); 4360185029Spjd ZFS_VERIFY_ZP(zp); 4361168404Spjd 4362219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 4363219089Spjd &gen64, sizeof (uint64_t))) != 0) { 4364219089Spjd ZFS_EXIT(zfsvfs); 4365219089Spjd return (error); 4366219089Spjd } 4367219089Spjd 4368219089Spjd gen = (uint32_t)gen64; 4369219089Spjd 4370168404Spjd size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4371249195Smm 4372249195Smm#ifdef illumos 4373249195Smm if (fidp->fid_len < size) { 4374249195Smm fidp->fid_len = size; 4375249195Smm ZFS_EXIT(zfsvfs); 4376249195Smm return (SET_ERROR(ENOSPC)); 4377249195Smm } 4378249195Smm#else 4379168404Spjd fidp->fid_len = size; 4380249195Smm#endif 4381168404Spjd 4382168404Spjd zfid = (zfid_short_t *)fidp; 4383168404Spjd 4384168404Spjd zfid->zf_len = size; 4385168404Spjd 4386168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 4387168404Spjd zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4388168404Spjd 4389168404Spjd /* Must have a non-zero generation number to distinguish from .zfs */ 4390168404Spjd if (gen == 0) 4391168404Spjd gen = 1; 4392168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 4393168404Spjd zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4394168404Spjd 4395168404Spjd if (size == LONG_FID_LEN) { 4396168404Spjd uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4397169023Spjd zfid_long_t *zlfid; 4398168404Spjd 4399168404Spjd zlfid = (zfid_long_t *)fidp; 4400168404Spjd 4401168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4402168404Spjd zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4403168404Spjd 4404168404Spjd /* XXX - this should be the generation number for the objset */ 4405168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4406168404Spjd zlfid->zf_setgen[i] = 0; 4407168404Spjd } 4408168404Spjd 4409168404Spjd ZFS_EXIT(zfsvfs); 4410168404Spjd return (0); 4411168404Spjd} 4412168404Spjd 4413168404Spjdstatic int 4414185029Spjdzfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4415185029Spjd caller_context_t *ct) 4416168404Spjd{ 4417168404Spjd znode_t *zp, *xzp; 4418168404Spjd zfsvfs_t *zfsvfs; 4419168404Spjd int error; 4420168404Spjd 4421168404Spjd switch (cmd) { 4422168404Spjd case _PC_LINK_MAX: 4423168404Spjd *valp = INT_MAX; 4424168404Spjd return (0); 4425168404Spjd 4426168404Spjd case _PC_FILESIZEBITS: 4427168404Spjd *valp = 64; 4428168404Spjd return (0); 4429277300Ssmh#ifdef illumos 4430168404Spjd case _PC_XATTR_EXISTS: 4431168404Spjd zp = VTOZ(vp); 4432168404Spjd zfsvfs = zp->z_zfsvfs; 4433168404Spjd ZFS_ENTER(zfsvfs); 4434185029Spjd ZFS_VERIFY_ZP(zp); 4435168404Spjd *valp = 0; 4436303970Savg error = zfs_dirent_lookup(zp, "", &xzp, 4437303970Savg ZXATTR | ZEXISTS | ZSHARED); 4438168404Spjd if (error == 0) { 4439168404Spjd if (!zfs_dirempty(xzp)) 4440168404Spjd *valp = 1; 4441303970Savg vrele(ZTOV(xzp)); 4442168404Spjd } else if (error == ENOENT) { 4443168404Spjd /* 4444168404Spjd * If there aren't extended attributes, it's the 4445168404Spjd * same as having zero of them. 4446168404Spjd */ 4447168404Spjd error = 0; 4448168404Spjd } 4449168404Spjd ZFS_EXIT(zfsvfs); 4450168404Spjd return (error); 4451168404Spjd 4452219089Spjd case _PC_SATTR_ENABLED: 4453219089Spjd case _PC_SATTR_EXISTS: 4454219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 4455219089Spjd (vp->v_type == VREG || vp->v_type == VDIR); 4456219089Spjd return (0); 4457219089Spjd 4458219089Spjd case _PC_ACCESS_FILTERING: 4459219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 4460219089Spjd vp->v_type == VDIR; 4461219089Spjd return (0); 4462219089Spjd 4463219089Spjd case _PC_ACL_ENABLED: 4464219089Spjd *valp = _ACL_ACE_ENABLED; 4465219089Spjd return (0); 4466277300Ssmh#endif /* illumos */ 4467219089Spjd case _PC_MIN_HOLE_SIZE: 4468219089Spjd *valp = (int)SPA_MINBLOCKSIZE; 4469219089Spjd return (0); 4470277300Ssmh#ifdef illumos 4471219089Spjd case _PC_TIMESTAMP_RESOLUTION: 4472219089Spjd /* nanosecond timestamp resolution */ 4473219089Spjd *valp = 1L; 4474219089Spjd return (0); 4475277300Ssmh#endif 4476168404Spjd case _PC_ACL_EXTENDED: 4477196949Strasz *valp = 0; 4478168404Spjd return (0); 4479168404Spjd 4480196949Strasz case _PC_ACL_NFS4: 4481196949Strasz *valp = 1; 4482196949Strasz return (0); 4483196949Strasz 4484196949Strasz case _PC_ACL_PATH_MAX: 4485196949Strasz *valp = ACL_MAX_ENTRIES; 4486196949Strasz return (0); 4487196949Strasz 4488168404Spjd default: 4489168962Spjd return (EOPNOTSUPP); 4490168404Spjd } 4491168404Spjd} 4492168404Spjd 4493168404Spjd/*ARGSUSED*/ 4494168404Spjdstatic int 4495185029Spjdzfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4496185029Spjd caller_context_t *ct) 4497168404Spjd{ 4498168404Spjd znode_t *zp = VTOZ(vp); 4499168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4500168404Spjd int error; 4501185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4502168404Spjd 4503168404Spjd ZFS_ENTER(zfsvfs); 4504185029Spjd ZFS_VERIFY_ZP(zp); 4505185029Spjd error = zfs_getacl(zp, vsecp, skipaclchk, cr); 4506168404Spjd ZFS_EXIT(zfsvfs); 4507168404Spjd 4508168404Spjd return (error); 4509168404Spjd} 4510168404Spjd 4511168404Spjd/*ARGSUSED*/ 4512228685Spjdint 4513185029Spjdzfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4514185029Spjd caller_context_t *ct) 4515168404Spjd{ 4516168404Spjd znode_t *zp = VTOZ(vp); 4517168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4518168404Spjd int error; 4519185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4520219089Spjd zilog_t *zilog = zfsvfs->z_log; 4521168404Spjd 4522168404Spjd ZFS_ENTER(zfsvfs); 4523185029Spjd ZFS_VERIFY_ZP(zp); 4524219089Spjd 4525185029Spjd error = zfs_setacl(zp, vsecp, skipaclchk, cr); 4526219089Spjd 4527219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4528219089Spjd zil_commit(zilog, 0); 4529219089Spjd 4530168404Spjd ZFS_EXIT(zfsvfs); 4531168404Spjd return (error); 4532168404Spjd} 4533168404Spjd 4534168962Spjdstatic int 4535213673Spjdioflags(int ioflags) 4536213673Spjd{ 4537213673Spjd int flags = 0; 4538213673Spjd 4539213673Spjd if (ioflags & IO_APPEND) 4540213673Spjd flags |= FAPPEND; 4541213673Spjd if (ioflags & IO_NDELAY) 4542303970Savg flags |= FNONBLOCK; 4543213673Spjd if (ioflags & IO_SYNC) 4544213673Spjd flags |= (FSYNC | FDSYNC | FRSYNC); 4545213673Spjd 4546213673Spjd return (flags); 4547213673Spjd} 4548213673Spjd 4549213673Spjdstatic int 4550292373Sglebiuszfs_getpages(struct vnode *vp, vm_page_t *m, int count, int *rbehind, 4551292373Sglebius int *rahead) 4552213937Savg{ 4553213937Savg znode_t *zp = VTOZ(vp); 4554213937Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4555213937Savg objset_t *os = zp->z_zfsvfs->z_os; 4556292373Sglebius vm_page_t mlast; 4557213937Savg vm_object_t object; 4558213937Savg caddr_t va; 4559213937Savg struct sf_buf *sf; 4560243517Savg off_t startoff, endoff; 4561213937Savg int i, error; 4562243517Savg vm_pindex_t reqstart, reqend; 4563297473Sglebius int lsize, size; 4564213937Savg 4565292386Sglebius object = m[0]->object; 4566292386Sglebius error = 0; 4567292373Sglebius 4568213937Savg ZFS_ENTER(zfsvfs); 4569213937Savg ZFS_VERIFY_ZP(zp); 4570213937Savg 4571248084Sattilio zfs_vmobject_wlock(object); 4572292386Sglebius if (m[count - 1]->valid != 0 && --count == 0) { 4573248084Sattilio zfs_vmobject_wunlock(object); 4574292386Sglebius goto out; 4575213937Savg } 4576213937Savg 4577292386Sglebius mlast = m[count - 1]; 4578213937Savg 4579292373Sglebius if (IDX_TO_OFF(mlast->pindex) >= 4580292373Sglebius object->un_pager.vnp.vnp_size) { 4581248084Sattilio zfs_vmobject_wunlock(object); 4582213937Savg ZFS_EXIT(zfsvfs); 4583248084Sattilio return (zfs_vm_pagerret_bad); 4584213937Savg } 4585213937Savg 4586292373Sglebius PCPU_INC(cnt.v_vnodein); 4587297473Sglebius PCPU_ADD(cnt.v_vnodepgsin, count); 4588292373Sglebius 4589243517Savg lsize = PAGE_SIZE; 4590243517Savg if (IDX_TO_OFF(mlast->pindex) + lsize > object->un_pager.vnp.vnp_size) 4591292373Sglebius lsize = object->un_pager.vnp.vnp_size - 4592292373Sglebius IDX_TO_OFF(mlast->pindex); 4593248084Sattilio zfs_vmobject_wunlock(object); 4594243517Savg 4595292386Sglebius for (i = 0; i < count; i++) { 4596243517Savg size = PAGE_SIZE; 4597292386Sglebius if (i == count - 1) 4598243517Savg size = lsize; 4599243517Savg va = zfs_map_page(m[i], &sf); 4600243517Savg error = dmu_read(os, zp->z_id, IDX_TO_OFF(m[i]->pindex), 4601243517Savg size, va, DMU_READ_PREFETCH); 4602243517Savg if (size != PAGE_SIZE) 4603243517Savg bzero(va + size, PAGE_SIZE - size); 4604243517Savg zfs_unmap_page(sf); 4605243517Savg if (error != 0) 4606292373Sglebius goto out; 4607243517Savg } 4608243517Savg 4609248084Sattilio zfs_vmobject_wlock(object); 4610292386Sglebius for (i = 0; i < count; i++) 4611292373Sglebius m[i]->valid = VM_PAGE_BITS_ALL; 4612248084Sattilio zfs_vmobject_wunlock(object); 4613213937Savg 4614292373Sglebiusout: 4615213937Savg ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4616213937Savg ZFS_EXIT(zfsvfs); 4617292386Sglebius if (error == 0) { 4618292386Sglebius if (rbehind) 4619292386Sglebius *rbehind = 0; 4620292386Sglebius if (rahead) 4621292386Sglebius *rahead = 0; 4622292386Sglebius return (zfs_vm_pagerret_ok); 4623292386Sglebius } else 4624292386Sglebius return (zfs_vm_pagerret_error); 4625213937Savg} 4626213937Savg 4627213937Savgstatic int 4628213937Savgzfs_freebsd_getpages(ap) 4629213937Savg struct vop_getpages_args /* { 4630213937Savg struct vnode *a_vp; 4631213937Savg vm_page_t *a_m; 4632213937Savg int a_count; 4633292373Sglebius int *a_rbehind; 4634292373Sglebius int *a_rahead; 4635213937Savg } */ *ap; 4636213937Savg{ 4637213937Savg 4638292373Sglebius return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, 4639292373Sglebius ap->a_rahead)); 4640213937Savg} 4641213937Savg 4642213937Savgstatic int 4643258746Savgzfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, 4644258746Savg int *rtvals) 4645258746Savg{ 4646258746Savg znode_t *zp = VTOZ(vp); 4647258746Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4648258746Savg rl_t *rl; 4649258746Savg dmu_tx_t *tx; 4650258746Savg struct sf_buf *sf; 4651258746Savg vm_object_t object; 4652258746Savg vm_page_t m; 4653258746Savg caddr_t va; 4654258746Savg size_t tocopy; 4655258746Savg size_t lo_len; 4656258746Savg vm_ooffset_t lo_off; 4657258746Savg vm_ooffset_t off; 4658258746Savg uint_t blksz; 4659258746Savg int ncount; 4660258746Savg int pcount; 4661258746Savg int err; 4662258746Savg int i; 4663258746Savg 4664258746Savg ZFS_ENTER(zfsvfs); 4665258746Savg ZFS_VERIFY_ZP(zp); 4666258746Savg 4667258746Savg object = vp->v_object; 4668258746Savg pcount = btoc(len); 4669258746Savg ncount = pcount; 4670258746Savg 4671258746Savg KASSERT(ma[0]->object == object, ("mismatching object")); 4672258746Savg KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length")); 4673258746Savg 4674258746Savg for (i = 0; i < pcount; i++) 4675258746Savg rtvals[i] = zfs_vm_pagerret_error; 4676258746Savg 4677258746Savg off = IDX_TO_OFF(ma[0]->pindex); 4678258746Savg blksz = zp->z_blksz; 4679258746Savg lo_off = rounddown(off, blksz); 4680258746Savg lo_len = roundup(len + (off - lo_off), blksz); 4681258746Savg rl = zfs_range_lock(zp, lo_off, lo_len, RL_WRITER); 4682258746Savg 4683258746Savg zfs_vmobject_wlock(object); 4684258746Savg if (len + off > object->un_pager.vnp.vnp_size) { 4685258746Savg if (object->un_pager.vnp.vnp_size > off) { 4686258746Savg int pgoff; 4687258746Savg 4688258746Savg len = object->un_pager.vnp.vnp_size - off; 4689258746Savg ncount = btoc(len); 4690258746Savg if ((pgoff = (int)len & PAGE_MASK) != 0) { 4691258746Savg /* 4692258746Savg * If the object is locked and the following 4693258746Savg * conditions hold, then the page's dirty 4694258746Savg * field cannot be concurrently changed by a 4695258746Savg * pmap operation. 4696258746Savg */ 4697258746Savg m = ma[ncount - 1]; 4698258746Savg vm_page_assert_sbusied(m); 4699258746Savg KASSERT(!pmap_page_is_write_mapped(m), 4700258746Savg ("zfs_putpages: page %p is not read-only", m)); 4701258746Savg vm_page_clear_dirty(m, pgoff, PAGE_SIZE - 4702258746Savg pgoff); 4703258746Savg } 4704258746Savg } else { 4705258746Savg len = 0; 4706258746Savg ncount = 0; 4707258746Savg } 4708258746Savg if (ncount < pcount) { 4709258746Savg for (i = ncount; i < pcount; i++) { 4710258746Savg rtvals[i] = zfs_vm_pagerret_bad; 4711258746Savg } 4712258746Savg } 4713258746Savg } 4714258746Savg zfs_vmobject_wunlock(object); 4715258746Savg 4716258746Savg if (ncount == 0) 4717258746Savg goto out; 4718258746Savg 4719258746Savg if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 4720258746Savg zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 4721258746Savg goto out; 4722258746Savg } 4723258746Savg 4724258746Savg tx = dmu_tx_create(zfsvfs->z_os); 4725258746Savg dmu_tx_hold_write(tx, zp->z_id, off, len); 4726258746Savg 4727258746Savg dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4728258746Savg zfs_sa_upgrade_txholds(tx, zp); 4729316847Savg err = dmu_tx_assign(tx, TXG_WAIT); 4730258746Savg if (err != 0) { 4731258746Savg dmu_tx_abort(tx); 4732258746Savg goto out; 4733258746Savg } 4734258746Savg 4735258746Savg if (zp->z_blksz < PAGE_SIZE) { 4736258746Savg i = 0; 4737258746Savg for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) { 4738258746Savg tocopy = len > PAGE_SIZE ? PAGE_SIZE : len; 4739258746Savg va = zfs_map_page(ma[i], &sf); 4740258746Savg dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx); 4741258746Savg zfs_unmap_page(sf); 4742258746Savg } 4743258746Savg } else { 4744258746Savg err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx); 4745258746Savg } 4746258746Savg 4747258746Savg if (err == 0) { 4748258746Savg uint64_t mtime[2], ctime[2]; 4749258746Savg sa_bulk_attr_t bulk[3]; 4750258746Savg int count = 0; 4751258746Savg 4752258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 4753258746Savg &mtime, 16); 4754258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 4755258746Savg &ctime, 16); 4756258746Savg SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4757258746Savg &zp->z_pflags, 8); 4758258746Savg zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 4759258746Savg B_TRUE); 4760275401Savg (void)sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 4761258746Savg zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 4762258746Savg 4763258746Savg zfs_vmobject_wlock(object); 4764258746Savg for (i = 0; i < ncount; i++) { 4765258746Savg rtvals[i] = zfs_vm_pagerret_ok; 4766258746Savg vm_page_undirty(ma[i]); 4767258746Savg } 4768258746Savg zfs_vmobject_wunlock(object); 4769258746Savg PCPU_INC(cnt.v_vnodeout); 4770258746Savg PCPU_ADD(cnt.v_vnodepgsout, ncount); 4771258746Savg } 4772258746Savg dmu_tx_commit(tx); 4773258746Savg 4774258746Savgout: 4775258746Savg zfs_range_unlock(rl); 4776258746Savg if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 || 4777258746Savg zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4778258746Savg zil_commit(zfsvfs->z_log, zp->z_id); 4779258746Savg ZFS_EXIT(zfsvfs); 4780258746Savg return (rtvals[0]); 4781258746Savg} 4782258746Savg 4783258746Savgint 4784258746Savgzfs_freebsd_putpages(ap) 4785258746Savg struct vop_putpages_args /* { 4786258746Savg struct vnode *a_vp; 4787258746Savg vm_page_t *a_m; 4788258746Savg int a_count; 4789258746Savg int a_sync; 4790258746Savg int *a_rtvals; 4791258746Savg } */ *ap; 4792258746Savg{ 4793258746Savg 4794258746Savg return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, 4795258746Savg ap->a_rtvals)); 4796258746Savg} 4797258746Savg 4798258746Savgstatic int 4799243518Savgzfs_freebsd_bmap(ap) 4800243518Savg struct vop_bmap_args /* { 4801243518Savg struct vnode *a_vp; 4802243518Savg daddr_t a_bn; 4803243518Savg struct bufobj **a_bop; 4804243518Savg daddr_t *a_bnp; 4805243518Savg int *a_runp; 4806243518Savg int *a_runb; 4807243518Savg } */ *ap; 4808243518Savg{ 4809243518Savg 4810243518Savg if (ap->a_bop != NULL) 4811243518Savg *ap->a_bop = &ap->a_vp->v_bufobj; 4812243518Savg if (ap->a_bnp != NULL) 4813243518Savg *ap->a_bnp = ap->a_bn; 4814243518Savg if (ap->a_runp != NULL) 4815243518Savg *ap->a_runp = 0; 4816243518Savg if (ap->a_runb != NULL) 4817243518Savg *ap->a_runb = 0; 4818243518Savg 4819243518Savg return (0); 4820243518Savg} 4821243518Savg 4822243518Savgstatic int 4823168962Spjdzfs_freebsd_open(ap) 4824168962Spjd struct vop_open_args /* { 4825168962Spjd struct vnode *a_vp; 4826168962Spjd int a_mode; 4827168962Spjd struct ucred *a_cred; 4828168962Spjd struct thread *a_td; 4829168962Spjd } */ *ap; 4830168962Spjd{ 4831168962Spjd vnode_t *vp = ap->a_vp; 4832168962Spjd znode_t *zp = VTOZ(vp); 4833168962Spjd int error; 4834168962Spjd 4835185029Spjd error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); 4836168962Spjd if (error == 0) 4837219089Spjd vnode_create_vobject(vp, zp->z_size, ap->a_td); 4838168962Spjd return (error); 4839168962Spjd} 4840168962Spjd 4841168962Spjdstatic int 4842168962Spjdzfs_freebsd_close(ap) 4843168962Spjd struct vop_close_args /* { 4844168962Spjd struct vnode *a_vp; 4845168962Spjd int a_fflag; 4846168962Spjd struct ucred *a_cred; 4847168962Spjd struct thread *a_td; 4848168962Spjd } */ *ap; 4849168962Spjd{ 4850168962Spjd 4851242566Savg return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred, NULL)); 4852168962Spjd} 4853168962Spjd 4854168962Spjdstatic int 4855168962Spjdzfs_freebsd_ioctl(ap) 4856168962Spjd struct vop_ioctl_args /* { 4857168962Spjd struct vnode *a_vp; 4858168962Spjd u_long a_command; 4859168962Spjd caddr_t a_data; 4860168962Spjd int a_fflag; 4861168962Spjd struct ucred *cred; 4862168962Spjd struct thread *td; 4863168962Spjd } */ *ap; 4864168962Spjd{ 4865168962Spjd 4866168978Spjd return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 4867185029Spjd ap->a_fflag, ap->a_cred, NULL, NULL)); 4868168962Spjd} 4869168962Spjd 4870168962Spjdstatic int 4871168962Spjdzfs_freebsd_read(ap) 4872168962Spjd struct vop_read_args /* { 4873168962Spjd struct vnode *a_vp; 4874168962Spjd struct uio *a_uio; 4875168962Spjd int a_ioflag; 4876168962Spjd struct ucred *a_cred; 4877168962Spjd } */ *ap; 4878168962Spjd{ 4879168962Spjd 4880213673Spjd return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 4881213673Spjd ap->a_cred, NULL)); 4882168962Spjd} 4883168962Spjd 4884168962Spjdstatic int 4885168962Spjdzfs_freebsd_write(ap) 4886168962Spjd struct vop_write_args /* { 4887168962Spjd struct vnode *a_vp; 4888168962Spjd struct uio *a_uio; 4889168962Spjd int a_ioflag; 4890168962Spjd struct ucred *a_cred; 4891168962Spjd } */ *ap; 4892168962Spjd{ 4893168962Spjd 4894213673Spjd return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 4895213673Spjd ap->a_cred, NULL)); 4896168962Spjd} 4897168962Spjd 4898168962Spjdstatic int 4899168962Spjdzfs_freebsd_access(ap) 4900168962Spjd struct vop_access_args /* { 4901168962Spjd struct vnode *a_vp; 4902192689Strasz accmode_t a_accmode; 4903168962Spjd struct ucred *a_cred; 4904168962Spjd struct thread *a_td; 4905168962Spjd } */ *ap; 4906168962Spjd{ 4907212002Sjh vnode_t *vp = ap->a_vp; 4908212002Sjh znode_t *zp = VTOZ(vp); 4909198703Spjd accmode_t accmode; 4910198703Spjd int error = 0; 4911168962Spjd 4912185172Spjd /* 4913198703Spjd * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 4914185172Spjd */ 4915198703Spjd accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 4916198703Spjd if (accmode != 0) 4917198703Spjd error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL); 4918185172Spjd 4919198703Spjd /* 4920198703Spjd * VADMIN has to be handled by vaccess(). 4921198703Spjd */ 4922198703Spjd if (error == 0) { 4923198703Spjd accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 4924198703Spjd if (accmode != 0) { 4925219089Spjd error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 4926219089Spjd zp->z_gid, accmode, ap->a_cred, NULL); 4927198703Spjd } 4928185172Spjd } 4929185172Spjd 4930212002Sjh /* 4931212002Sjh * For VEXEC, ensure that at least one execute bit is set for 4932212002Sjh * non-directories. 4933212002Sjh */ 4934212002Sjh if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 4935219089Spjd (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 4936212002Sjh error = EACCES; 4937219089Spjd } 4938212002Sjh 4939198703Spjd return (error); 4940168962Spjd} 4941168962Spjd 4942168962Spjdstatic int 4943168962Spjdzfs_freebsd_lookup(ap) 4944168962Spjd struct vop_lookup_args /* { 4945168962Spjd struct vnode *a_dvp; 4946168962Spjd struct vnode **a_vpp; 4947168962Spjd struct componentname *a_cnp; 4948168962Spjd } */ *ap; 4949168962Spjd{ 4950168962Spjd struct componentname *cnp = ap->a_cnp; 4951168962Spjd char nm[NAME_MAX + 1]; 4952168962Spjd 4953168962Spjd ASSERT(cnp->cn_namelen < sizeof(nm)); 4954168962Spjd strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 4955168962Spjd 4956168962Spjd return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 4957185029Spjd cnp->cn_cred, cnp->cn_thread, 0)); 4958168962Spjd} 4959168962Spjd 4960168962Spjdstatic int 4961303970Savgzfs_cache_lookup(ap) 4962303970Savg struct vop_lookup_args /* { 4963303970Savg struct vnode *a_dvp; 4964303970Savg struct vnode **a_vpp; 4965303970Savg struct componentname *a_cnp; 4966303970Savg } */ *ap; 4967303970Savg{ 4968303970Savg zfsvfs_t *zfsvfs; 4969303970Savg 4970303970Savg zfsvfs = ap->a_dvp->v_mount->mnt_data; 4971303970Savg if (zfsvfs->z_use_namecache) 4972303970Savg return (vfs_cache_lookup(ap)); 4973303970Savg else 4974303970Savg return (zfs_freebsd_lookup(ap)); 4975303970Savg} 4976303970Savg 4977303970Savgstatic int 4978168962Spjdzfs_freebsd_create(ap) 4979168962Spjd struct vop_create_args /* { 4980168962Spjd struct vnode *a_dvp; 4981168962Spjd struct vnode **a_vpp; 4982168962Spjd struct componentname *a_cnp; 4983168962Spjd struct vattr *a_vap; 4984168962Spjd } */ *ap; 4985168962Spjd{ 4986303970Savg zfsvfs_t *zfsvfs; 4987168962Spjd struct componentname *cnp = ap->a_cnp; 4988168962Spjd vattr_t *vap = ap->a_vap; 4989276007Skib int error, mode; 4990168962Spjd 4991168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 4992168962Spjd 4993168962Spjd vattr_init_mask(vap); 4994168962Spjd mode = vap->va_mode & ALLPERMS; 4995303970Savg zfsvfs = ap->a_dvp->v_mount->mnt_data; 4996168962Spjd 4997276007Skib error = zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 4998276007Skib ap->a_vpp, cnp->cn_cred, cnp->cn_thread); 4999303970Savg if (zfsvfs->z_use_namecache && 5000303970Savg error == 0 && (cnp->cn_flags & MAKEENTRY) != 0) 5001276007Skib cache_enter(ap->a_dvp, *ap->a_vpp, cnp); 5002276007Skib return (error); 5003168962Spjd} 5004168962Spjd 5005168962Spjdstatic int 5006168962Spjdzfs_freebsd_remove(ap) 5007168962Spjd struct vop_remove_args /* { 5008168962Spjd struct vnode *a_dvp; 5009168962Spjd struct vnode *a_vp; 5010168962Spjd struct componentname *a_cnp; 5011168962Spjd } */ *ap; 5012168962Spjd{ 5013168962Spjd 5014168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 5015168962Spjd 5016303970Savg return (zfs_remove(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr, 5017303970Savg ap->a_cnp->cn_cred)); 5018168962Spjd} 5019168962Spjd 5020168962Spjdstatic int 5021168962Spjdzfs_freebsd_mkdir(ap) 5022168962Spjd struct vop_mkdir_args /* { 5023168962Spjd struct vnode *a_dvp; 5024168962Spjd struct vnode **a_vpp; 5025168962Spjd struct componentname *a_cnp; 5026168962Spjd struct vattr *a_vap; 5027168962Spjd } */ *ap; 5028168962Spjd{ 5029168962Spjd vattr_t *vap = ap->a_vap; 5030168962Spjd 5031168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 5032168962Spjd 5033168962Spjd vattr_init_mask(vap); 5034168962Spjd 5035168962Spjd return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 5036303970Savg ap->a_cnp->cn_cred)); 5037168962Spjd} 5038168962Spjd 5039168962Spjdstatic int 5040168962Spjdzfs_freebsd_rmdir(ap) 5041168962Spjd struct vop_rmdir_args /* { 5042168962Spjd struct vnode *a_dvp; 5043168962Spjd struct vnode *a_vp; 5044168962Spjd struct componentname *a_cnp; 5045168962Spjd } */ *ap; 5046168962Spjd{ 5047168962Spjd struct componentname *cnp = ap->a_cnp; 5048168962Spjd 5049168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5050168962Spjd 5051303970Savg return (zfs_rmdir(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred)); 5052168962Spjd} 5053168962Spjd 5054168962Spjdstatic int 5055168962Spjdzfs_freebsd_readdir(ap) 5056168962Spjd struct vop_readdir_args /* { 5057168962Spjd struct vnode *a_vp; 5058168962Spjd struct uio *a_uio; 5059168962Spjd struct ucred *a_cred; 5060168962Spjd int *a_eofflag; 5061168962Spjd int *a_ncookies; 5062168962Spjd u_long **a_cookies; 5063168962Spjd } */ *ap; 5064168962Spjd{ 5065168962Spjd 5066168962Spjd return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 5067168962Spjd ap->a_ncookies, ap->a_cookies)); 5068168962Spjd} 5069168962Spjd 5070168962Spjdstatic int 5071168962Spjdzfs_freebsd_fsync(ap) 5072168962Spjd struct vop_fsync_args /* { 5073168962Spjd struct vnode *a_vp; 5074168962Spjd int a_waitfor; 5075168962Spjd struct thread *a_td; 5076168962Spjd } */ *ap; 5077168962Spjd{ 5078168962Spjd 5079168962Spjd vop_stdfsync(ap); 5080185029Spjd return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); 5081168962Spjd} 5082168962Spjd 5083168962Spjdstatic int 5084168962Spjdzfs_freebsd_getattr(ap) 5085168962Spjd struct vop_getattr_args /* { 5086168962Spjd struct vnode *a_vp; 5087168962Spjd struct vattr *a_vap; 5088168962Spjd struct ucred *a_cred; 5089168962Spjd } */ *ap; 5090168962Spjd{ 5091185029Spjd vattr_t *vap = ap->a_vap; 5092185029Spjd xvattr_t xvap; 5093185029Spjd u_long fflags = 0; 5094185029Spjd int error; 5095168962Spjd 5096185029Spjd xva_init(&xvap); 5097185029Spjd xvap.xva_vattr = *vap; 5098185029Spjd xvap.xva_vattr.va_mask |= AT_XVATTR; 5099185029Spjd 5100185029Spjd /* Convert chflags into ZFS-type flags. */ 5101185029Spjd /* XXX: what about SF_SETTABLE?. */ 5102185029Spjd XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 5103185029Spjd XVA_SET_REQ(&xvap, XAT_APPENDONLY); 5104185029Spjd XVA_SET_REQ(&xvap, XAT_NOUNLINK); 5105185029Spjd XVA_SET_REQ(&xvap, XAT_NODUMP); 5106254627Sken XVA_SET_REQ(&xvap, XAT_READONLY); 5107254627Sken XVA_SET_REQ(&xvap, XAT_ARCHIVE); 5108254627Sken XVA_SET_REQ(&xvap, XAT_SYSTEM); 5109254627Sken XVA_SET_REQ(&xvap, XAT_HIDDEN); 5110254627Sken XVA_SET_REQ(&xvap, XAT_REPARSE); 5111254627Sken XVA_SET_REQ(&xvap, XAT_OFFLINE); 5112254627Sken XVA_SET_REQ(&xvap, XAT_SPARSE); 5113254627Sken 5114185029Spjd error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 5115185029Spjd if (error != 0) 5116185029Spjd return (error); 5117185029Spjd 5118185029Spjd /* Convert ZFS xattr into chflags. */ 5119185029Spjd#define FLAG_CHECK(fflag, xflag, xfield) do { \ 5120185029Spjd if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 5121185029Spjd fflags |= (fflag); \ 5122185029Spjd} while (0) 5123185029Spjd FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 5124185029Spjd xvap.xva_xoptattrs.xoa_immutable); 5125185029Spjd FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 5126185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 5127185029Spjd FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 5128185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 5129254627Sken FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE, 5130254627Sken xvap.xva_xoptattrs.xoa_archive); 5131185029Spjd FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 5132185029Spjd xvap.xva_xoptattrs.xoa_nodump); 5133254627Sken FLAG_CHECK(UF_READONLY, XAT_READONLY, 5134254627Sken xvap.xva_xoptattrs.xoa_readonly); 5135254627Sken FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM, 5136254627Sken xvap.xva_xoptattrs.xoa_system); 5137254627Sken FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN, 5138254627Sken xvap.xva_xoptattrs.xoa_hidden); 5139254627Sken FLAG_CHECK(UF_REPARSE, XAT_REPARSE, 5140254627Sken xvap.xva_xoptattrs.xoa_reparse); 5141254627Sken FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE, 5142254627Sken xvap.xva_xoptattrs.xoa_offline); 5143254627Sken FLAG_CHECK(UF_SPARSE, XAT_SPARSE, 5144254627Sken xvap.xva_xoptattrs.xoa_sparse); 5145254627Sken 5146185029Spjd#undef FLAG_CHECK 5147185029Spjd *vap = xvap.xva_vattr; 5148185029Spjd vap->va_flags = fflags; 5149185029Spjd return (0); 5150168962Spjd} 5151168962Spjd 5152168962Spjdstatic int 5153168962Spjdzfs_freebsd_setattr(ap) 5154168962Spjd struct vop_setattr_args /* { 5155168962Spjd struct vnode *a_vp; 5156168962Spjd struct vattr *a_vap; 5157168962Spjd struct ucred *a_cred; 5158168962Spjd } */ *ap; 5159168962Spjd{ 5160185172Spjd vnode_t *vp = ap->a_vp; 5161168962Spjd vattr_t *vap = ap->a_vap; 5162185172Spjd cred_t *cred = ap->a_cred; 5163185029Spjd xvattr_t xvap; 5164185029Spjd u_long fflags; 5165185029Spjd uint64_t zflags; 5166168962Spjd 5167168962Spjd vattr_init_mask(vap); 5168170044Spjd vap->va_mask &= ~AT_NOSET; 5169168962Spjd 5170185029Spjd xva_init(&xvap); 5171185029Spjd xvap.xva_vattr = *vap; 5172185029Spjd 5173219089Spjd zflags = VTOZ(vp)->z_pflags; 5174185172Spjd 5175185029Spjd if (vap->va_flags != VNOVAL) { 5176197683Sdelphij zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 5177185172Spjd int error; 5178185172Spjd 5179197683Sdelphij if (zfsvfs->z_use_fuids == B_FALSE) 5180197683Sdelphij return (EOPNOTSUPP); 5181197683Sdelphij 5182185029Spjd fflags = vap->va_flags; 5183254627Sken /* 5184254627Sken * XXX KDM 5185254627Sken * We need to figure out whether it makes sense to allow 5186254627Sken * UF_REPARSE through, since we don't really have other 5187254627Sken * facilities to handle reparse points and zfs_setattr() 5188254627Sken * doesn't currently allow setting that attribute anyway. 5189254627Sken */ 5190254627Sken if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE| 5191254627Sken UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE| 5192254627Sken UF_OFFLINE|UF_SPARSE)) != 0) 5193185029Spjd return (EOPNOTSUPP); 5194185172Spjd /* 5195185172Spjd * Unprivileged processes are not permitted to unset system 5196185172Spjd * flags, or modify flags if any system flags are set. 5197185172Spjd * Privileged non-jail processes may not modify system flags 5198185172Spjd * if securelevel > 0 and any existing system flags are set. 5199185172Spjd * Privileged jail processes behave like privileged non-jail 5200185172Spjd * processes if the security.jail.chflags_allowed sysctl is 5201185172Spjd * is non-zero; otherwise, they behave like unprivileged 5202185172Spjd * processes. 5203185172Spjd */ 5204197861Spjd if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 5205197861Spjd priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) { 5206185172Spjd if (zflags & 5207185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 5208185172Spjd error = securelevel_gt(cred, 0); 5209197861Spjd if (error != 0) 5210185172Spjd return (error); 5211185172Spjd } 5212185172Spjd } else { 5213197861Spjd /* 5214197861Spjd * Callers may only modify the file flags on objects they 5215197861Spjd * have VADMIN rights for. 5216197861Spjd */ 5217197861Spjd if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0) 5218197861Spjd return (error); 5219185172Spjd if (zflags & 5220185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 5221185172Spjd return (EPERM); 5222185172Spjd } 5223185172Spjd if (fflags & 5224185172Spjd (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 5225185172Spjd return (EPERM); 5226185172Spjd } 5227185172Spjd } 5228185029Spjd 5229185029Spjd#define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 5230185029Spjd if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 5231185029Spjd ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 5232185029Spjd XVA_SET_REQ(&xvap, (xflag)); \ 5233185029Spjd (xfield) = ((fflags & (fflag)) != 0); \ 5234185029Spjd } \ 5235185029Spjd} while (0) 5236185029Spjd /* Convert chflags into ZFS-type flags. */ 5237185029Spjd /* XXX: what about SF_SETTABLE?. */ 5238185029Spjd FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 5239185029Spjd xvap.xva_xoptattrs.xoa_immutable); 5240185029Spjd FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 5241185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 5242185029Spjd FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 5243185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 5244254627Sken FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE, 5245254627Sken xvap.xva_xoptattrs.xoa_archive); 5246185029Spjd FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 5247185172Spjd xvap.xva_xoptattrs.xoa_nodump); 5248254627Sken FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY, 5249254627Sken xvap.xva_xoptattrs.xoa_readonly); 5250254627Sken FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM, 5251254627Sken xvap.xva_xoptattrs.xoa_system); 5252254627Sken FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN, 5253254627Sken xvap.xva_xoptattrs.xoa_hidden); 5254254627Sken FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE, 5255254627Sken xvap.xva_xoptattrs.xoa_hidden); 5256254627Sken FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE, 5257254627Sken xvap.xva_xoptattrs.xoa_offline); 5258254627Sken FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE, 5259254627Sken xvap.xva_xoptattrs.xoa_sparse); 5260185029Spjd#undef FLAG_CHANGE 5261185029Spjd } 5262316391Sasomers if (vap->va_birthtime.tv_sec != VNOVAL) { 5263316391Sasomers xvap.xva_vattr.va_mask |= AT_XVATTR; 5264316391Sasomers XVA_SET_REQ(&xvap, XAT_CREATETIME); 5265316391Sasomers } 5266185172Spjd return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL)); 5267168962Spjd} 5268168962Spjd 5269168962Spjdstatic int 5270168962Spjdzfs_freebsd_rename(ap) 5271168962Spjd struct vop_rename_args /* { 5272168962Spjd struct vnode *a_fdvp; 5273168962Spjd struct vnode *a_fvp; 5274168962Spjd struct componentname *a_fcnp; 5275168962Spjd struct vnode *a_tdvp; 5276168962Spjd struct vnode *a_tvp; 5277168962Spjd struct componentname *a_tcnp; 5278168962Spjd } */ *ap; 5279168962Spjd{ 5280168962Spjd vnode_t *fdvp = ap->a_fdvp; 5281168962Spjd vnode_t *fvp = ap->a_fvp; 5282168962Spjd vnode_t *tdvp = ap->a_tdvp; 5283168962Spjd vnode_t *tvp = ap->a_tvp; 5284168962Spjd int error; 5285168962Spjd 5286192237Skmacy ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 5287192237Skmacy ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 5288168962Spjd 5289303970Savg error = zfs_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp, 5290303970Savg ap->a_tcnp, ap->a_fcnp->cn_cred); 5291168962Spjd 5292303970Savg vrele(fdvp); 5293303970Savg vrele(fvp); 5294303970Savg vrele(tdvp); 5295303970Savg if (tvp != NULL) 5296303970Savg vrele(tvp); 5297303970Savg 5298168962Spjd return (error); 5299168962Spjd} 5300168962Spjd 5301168962Spjdstatic int 5302168962Spjdzfs_freebsd_symlink(ap) 5303168962Spjd struct vop_symlink_args /* { 5304168962Spjd struct vnode *a_dvp; 5305168962Spjd struct vnode **a_vpp; 5306168962Spjd struct componentname *a_cnp; 5307168962Spjd struct vattr *a_vap; 5308168962Spjd char *a_target; 5309168962Spjd } */ *ap; 5310168962Spjd{ 5311168962Spjd struct componentname *cnp = ap->a_cnp; 5312168962Spjd vattr_t *vap = ap->a_vap; 5313168962Spjd 5314168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5315168962Spjd 5316168962Spjd vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 5317168962Spjd vattr_init_mask(vap); 5318168962Spjd 5319168962Spjd return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 5320168962Spjd ap->a_target, cnp->cn_cred, cnp->cn_thread)); 5321168962Spjd} 5322168962Spjd 5323168962Spjdstatic int 5324168962Spjdzfs_freebsd_readlink(ap) 5325168962Spjd struct vop_readlink_args /* { 5326168962Spjd struct vnode *a_vp; 5327168962Spjd struct uio *a_uio; 5328168962Spjd struct ucred *a_cred; 5329168962Spjd } */ *ap; 5330168962Spjd{ 5331168962Spjd 5332185029Spjd return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 5333168962Spjd} 5334168962Spjd 5335168962Spjdstatic int 5336168962Spjdzfs_freebsd_link(ap) 5337168962Spjd struct vop_link_args /* { 5338168962Spjd struct vnode *a_tdvp; 5339168962Spjd struct vnode *a_vp; 5340168962Spjd struct componentname *a_cnp; 5341168962Spjd } */ *ap; 5342168962Spjd{ 5343168962Spjd struct componentname *cnp = ap->a_cnp; 5344254982Sdelphij vnode_t *vp = ap->a_vp; 5345254982Sdelphij vnode_t *tdvp = ap->a_tdvp; 5346168962Spjd 5347254982Sdelphij if (tdvp->v_mount != vp->v_mount) 5348254982Sdelphij return (EXDEV); 5349254982Sdelphij 5350168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5351168962Spjd 5352254982Sdelphij return (zfs_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); 5353168962Spjd} 5354168962Spjd 5355168962Spjdstatic int 5356168962Spjdzfs_freebsd_inactive(ap) 5357169170Spjd struct vop_inactive_args /* { 5358169170Spjd struct vnode *a_vp; 5359169170Spjd struct thread *a_td; 5360169170Spjd } */ *ap; 5361168962Spjd{ 5362168962Spjd vnode_t *vp = ap->a_vp; 5363168962Spjd 5364185029Spjd zfs_inactive(vp, ap->a_td->td_ucred, NULL); 5365168962Spjd return (0); 5366168962Spjd} 5367168962Spjd 5368168962Spjdstatic int 5369168962Spjdzfs_freebsd_reclaim(ap) 5370168962Spjd struct vop_reclaim_args /* { 5371168962Spjd struct vnode *a_vp; 5372168962Spjd struct thread *a_td; 5373168962Spjd } */ *ap; 5374168962Spjd{ 5375169170Spjd vnode_t *vp = ap->a_vp; 5376168962Spjd znode_t *zp = VTOZ(vp); 5377197133Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5378168962Spjd 5379169025Spjd ASSERT(zp != NULL); 5380169025Spjd 5381243520Savg /* Destroy the vm object and flush associated pages. */ 5382243520Savg vnode_destroy_vobject(vp); 5383243520Savg 5384168962Spjd /* 5385243520Savg * z_teardown_inactive_lock protects from a race with 5386243520Savg * zfs_znode_dmu_fini in zfsvfs_teardown during 5387243520Savg * force unmount. 5388168962Spjd */ 5389243520Savg rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 5390243520Savg if (zp->z_sa_hdl == NULL) 5391196301Spjd zfs_znode_free(zp); 5392243520Savg else 5393243520Savg zfs_zinactive(zp); 5394243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 5395185029Spjd 5396168962Spjd vp->v_data = NULL; 5397168962Spjd return (0); 5398168962Spjd} 5399168962Spjd 5400168962Spjdstatic int 5401168962Spjdzfs_freebsd_fid(ap) 5402168962Spjd struct vop_fid_args /* { 5403168962Spjd struct vnode *a_vp; 5404168962Spjd struct fid *a_fid; 5405168962Spjd } */ *ap; 5406168962Spjd{ 5407168962Spjd 5408185029Spjd return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 5409168962Spjd} 5410168962Spjd 5411168962Spjdstatic int 5412168962Spjdzfs_freebsd_pathconf(ap) 5413168962Spjd struct vop_pathconf_args /* { 5414168962Spjd struct vnode *a_vp; 5415168962Spjd int a_name; 5416168962Spjd register_t *a_retval; 5417168962Spjd } */ *ap; 5418168962Spjd{ 5419168962Spjd ulong_t val; 5420168962Spjd int error; 5421168962Spjd 5422185029Spjd error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); 5423168962Spjd if (error == 0) 5424168962Spjd *ap->a_retval = val; 5425168962Spjd else if (error == EOPNOTSUPP) 5426168962Spjd error = vop_stdpathconf(ap); 5427168962Spjd return (error); 5428168962Spjd} 5429168962Spjd 5430196949Straszstatic int 5431196949Straszzfs_freebsd_fifo_pathconf(ap) 5432196949Strasz struct vop_pathconf_args /* { 5433196949Strasz struct vnode *a_vp; 5434196949Strasz int a_name; 5435196949Strasz register_t *a_retval; 5436196949Strasz } */ *ap; 5437196949Strasz{ 5438196949Strasz 5439196949Strasz switch (ap->a_name) { 5440196949Strasz case _PC_ACL_EXTENDED: 5441196949Strasz case _PC_ACL_NFS4: 5442196949Strasz case _PC_ACL_PATH_MAX: 5443196949Strasz case _PC_MAC_PRESENT: 5444196949Strasz return (zfs_freebsd_pathconf(ap)); 5445196949Strasz default: 5446196949Strasz return (fifo_specops.vop_pathconf(ap)); 5447196949Strasz } 5448196949Strasz} 5449196949Strasz 5450185029Spjd/* 5451185029Spjd * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 5452185029Spjd * extended attribute name: 5453185029Spjd * 5454185029Spjd * NAMESPACE PREFIX 5455185029Spjd * system freebsd:system: 5456185029Spjd * user (none, can be used to access ZFS fsattr(5) attributes 5457185029Spjd * created on Solaris) 5458185029Spjd */ 5459185029Spjdstatic int 5460185029Spjdzfs_create_attrname(int attrnamespace, const char *name, char *attrname, 5461185029Spjd size_t size) 5462185029Spjd{ 5463185029Spjd const char *namespace, *prefix, *suffix; 5464185029Spjd 5465185029Spjd /* We don't allow '/' character in attribute name. */ 5466185029Spjd if (strchr(name, '/') != NULL) 5467185029Spjd return (EINVAL); 5468185029Spjd /* We don't allow attribute names that start with "freebsd:" string. */ 5469185029Spjd if (strncmp(name, "freebsd:", 8) == 0) 5470185029Spjd return (EINVAL); 5471185029Spjd 5472185029Spjd bzero(attrname, size); 5473185029Spjd 5474185029Spjd switch (attrnamespace) { 5475185029Spjd case EXTATTR_NAMESPACE_USER: 5476185029Spjd#if 0 5477185029Spjd prefix = "freebsd:"; 5478185029Spjd namespace = EXTATTR_NAMESPACE_USER_STRING; 5479185029Spjd suffix = ":"; 5480185029Spjd#else 5481185029Spjd /* 5482185029Spjd * This is the default namespace by which we can access all 5483185029Spjd * attributes created on Solaris. 5484185029Spjd */ 5485185029Spjd prefix = namespace = suffix = ""; 5486185029Spjd#endif 5487185029Spjd break; 5488185029Spjd case EXTATTR_NAMESPACE_SYSTEM: 5489185029Spjd prefix = "freebsd:"; 5490185029Spjd namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 5491185029Spjd suffix = ":"; 5492185029Spjd break; 5493185029Spjd case EXTATTR_NAMESPACE_EMPTY: 5494185029Spjd default: 5495185029Spjd return (EINVAL); 5496185029Spjd } 5497185029Spjd if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 5498185029Spjd name) >= size) { 5499185029Spjd return (ENAMETOOLONG); 5500185029Spjd } 5501185029Spjd return (0); 5502185029Spjd} 5503185029Spjd 5504185029Spjd/* 5505185029Spjd * Vnode operating to retrieve a named extended attribute. 5506185029Spjd */ 5507185029Spjdstatic int 5508185029Spjdzfs_getextattr(struct vop_getextattr_args *ap) 5509185029Spjd/* 5510185029Spjdvop_getextattr { 5511185029Spjd IN struct vnode *a_vp; 5512185029Spjd IN int a_attrnamespace; 5513185029Spjd IN const char *a_name; 5514185029Spjd INOUT struct uio *a_uio; 5515185029Spjd OUT size_t *a_size; 5516185029Spjd IN struct ucred *a_cred; 5517185029Spjd IN struct thread *a_td; 5518185029Spjd}; 5519185029Spjd*/ 5520185029Spjd{ 5521185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5522185029Spjd struct thread *td = ap->a_td; 5523185029Spjd struct nameidata nd; 5524185029Spjd char attrname[255]; 5525185029Spjd struct vattr va; 5526185029Spjd vnode_t *xvp = NULL, *vp; 5527185029Spjd int error, flags; 5528185029Spjd 5529195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5530195785Strasz ap->a_cred, ap->a_td, VREAD); 5531195785Strasz if (error != 0) 5532195785Strasz return (error); 5533195785Strasz 5534185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5535185029Spjd sizeof(attrname)); 5536185029Spjd if (error != 0) 5537185029Spjd return (error); 5538185029Spjd 5539185029Spjd ZFS_ENTER(zfsvfs); 5540185029Spjd 5541185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5542185029Spjd LOOKUP_XATTR); 5543185029Spjd if (error != 0) { 5544185029Spjd ZFS_EXIT(zfsvfs); 5545185029Spjd return (error); 5546185029Spjd } 5547185029Spjd 5548185029Spjd flags = FREAD; 5549241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 5550185029Spjd xvp, td); 5551194586Skib error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL); 5552185029Spjd vp = nd.ni_vp; 5553185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 5554185029Spjd if (error != 0) { 5555196303Spjd ZFS_EXIT(zfsvfs); 5556195785Strasz if (error == ENOENT) 5557195785Strasz error = ENOATTR; 5558185029Spjd return (error); 5559185029Spjd } 5560185029Spjd 5561185029Spjd if (ap->a_size != NULL) { 5562185029Spjd error = VOP_GETATTR(vp, &va, ap->a_cred); 5563185029Spjd if (error == 0) 5564185029Spjd *ap->a_size = (size_t)va.va_size; 5565185029Spjd } else if (ap->a_uio != NULL) 5566224605Smm error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5567185029Spjd 5568185029Spjd VOP_UNLOCK(vp, 0); 5569185029Spjd vn_close(vp, flags, ap->a_cred, td); 5570185029Spjd ZFS_EXIT(zfsvfs); 5571185029Spjd 5572185029Spjd return (error); 5573185029Spjd} 5574185029Spjd 5575185029Spjd/* 5576185029Spjd * Vnode operation to remove a named attribute. 5577185029Spjd */ 5578185029Spjdint 5579185029Spjdzfs_deleteextattr(struct vop_deleteextattr_args *ap) 5580185029Spjd/* 5581185029Spjdvop_deleteextattr { 5582185029Spjd IN struct vnode *a_vp; 5583185029Spjd IN int a_attrnamespace; 5584185029Spjd IN const char *a_name; 5585185029Spjd IN struct ucred *a_cred; 5586185029Spjd IN struct thread *a_td; 5587185029Spjd}; 5588185029Spjd*/ 5589185029Spjd{ 5590185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5591185029Spjd struct thread *td = ap->a_td; 5592185029Spjd struct nameidata nd; 5593185029Spjd char attrname[255]; 5594185029Spjd struct vattr va; 5595185029Spjd vnode_t *xvp = NULL, *vp; 5596185029Spjd int error, flags; 5597185029Spjd 5598195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5599195785Strasz ap->a_cred, ap->a_td, VWRITE); 5600195785Strasz if (error != 0) 5601195785Strasz return (error); 5602195785Strasz 5603185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5604185029Spjd sizeof(attrname)); 5605185029Spjd if (error != 0) 5606185029Spjd return (error); 5607185029Spjd 5608185029Spjd ZFS_ENTER(zfsvfs); 5609185029Spjd 5610185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5611185029Spjd LOOKUP_XATTR); 5612185029Spjd if (error != 0) { 5613185029Spjd ZFS_EXIT(zfsvfs); 5614185029Spjd return (error); 5615185029Spjd } 5616185029Spjd 5617241896Skib NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 5618185029Spjd UIO_SYSSPACE, attrname, xvp, td); 5619185029Spjd error = namei(&nd); 5620185029Spjd vp = nd.ni_vp; 5621185029Spjd if (error != 0) { 5622196303Spjd ZFS_EXIT(zfsvfs); 5623260706Savg NDFREE(&nd, NDF_ONLY_PNBUF); 5624195785Strasz if (error == ENOENT) 5625195785Strasz error = ENOATTR; 5626185029Spjd return (error); 5627185029Spjd } 5628260706Savg 5629185029Spjd error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 5630260706Savg NDFREE(&nd, NDF_ONLY_PNBUF); 5631185029Spjd 5632185029Spjd vput(nd.ni_dvp); 5633185029Spjd if (vp == nd.ni_dvp) 5634185029Spjd vrele(vp); 5635185029Spjd else 5636185029Spjd vput(vp); 5637185029Spjd ZFS_EXIT(zfsvfs); 5638185029Spjd 5639185029Spjd return (error); 5640185029Spjd} 5641185029Spjd 5642185029Spjd/* 5643185029Spjd * Vnode operation to set a named attribute. 5644185029Spjd */ 5645185029Spjdstatic int 5646185029Spjdzfs_setextattr(struct vop_setextattr_args *ap) 5647185029Spjd/* 5648185029Spjdvop_setextattr { 5649185029Spjd IN struct vnode *a_vp; 5650185029Spjd IN int a_attrnamespace; 5651185029Spjd IN const char *a_name; 5652185029Spjd INOUT struct uio *a_uio; 5653185029Spjd IN struct ucred *a_cred; 5654185029Spjd IN struct thread *a_td; 5655185029Spjd}; 5656185029Spjd*/ 5657185029Spjd{ 5658185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5659185029Spjd struct thread *td = ap->a_td; 5660185029Spjd struct nameidata nd; 5661185029Spjd char attrname[255]; 5662185029Spjd struct vattr va; 5663185029Spjd vnode_t *xvp = NULL, *vp; 5664185029Spjd int error, flags; 5665185029Spjd 5666195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5667195785Strasz ap->a_cred, ap->a_td, VWRITE); 5668195785Strasz if (error != 0) 5669195785Strasz return (error); 5670195785Strasz 5671185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5672185029Spjd sizeof(attrname)); 5673185029Spjd if (error != 0) 5674185029Spjd return (error); 5675185029Spjd 5676185029Spjd ZFS_ENTER(zfsvfs); 5677185029Spjd 5678185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5679195785Strasz LOOKUP_XATTR | CREATE_XATTR_DIR); 5680185029Spjd if (error != 0) { 5681185029Spjd ZFS_EXIT(zfsvfs); 5682185029Spjd return (error); 5683185029Spjd } 5684185029Spjd 5685185029Spjd flags = FFLAGS(O_WRONLY | O_CREAT); 5686241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 5687185029Spjd xvp, td); 5688194586Skib error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL); 5689185029Spjd vp = nd.ni_vp; 5690185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 5691185029Spjd if (error != 0) { 5692185029Spjd ZFS_EXIT(zfsvfs); 5693185029Spjd return (error); 5694185029Spjd } 5695185029Spjd 5696185029Spjd VATTR_NULL(&va); 5697185029Spjd va.va_size = 0; 5698185029Spjd error = VOP_SETATTR(vp, &va, ap->a_cred); 5699185029Spjd if (error == 0) 5700268420Smav VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5701185029Spjd 5702185029Spjd VOP_UNLOCK(vp, 0); 5703185029Spjd vn_close(vp, flags, ap->a_cred, td); 5704185029Spjd ZFS_EXIT(zfsvfs); 5705185029Spjd 5706185029Spjd return (error); 5707185029Spjd} 5708185029Spjd 5709185029Spjd/* 5710185029Spjd * Vnode operation to retrieve extended attributes on a vnode. 5711185029Spjd */ 5712185029Spjdstatic int 5713185029Spjdzfs_listextattr(struct vop_listextattr_args *ap) 5714185029Spjd/* 5715185029Spjdvop_listextattr { 5716185029Spjd IN struct vnode *a_vp; 5717185029Spjd IN int a_attrnamespace; 5718185029Spjd INOUT struct uio *a_uio; 5719185029Spjd OUT size_t *a_size; 5720185029Spjd IN struct ucred *a_cred; 5721185029Spjd IN struct thread *a_td; 5722185029Spjd}; 5723185029Spjd*/ 5724185029Spjd{ 5725185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 5726185029Spjd struct thread *td = ap->a_td; 5727185029Spjd struct nameidata nd; 5728185029Spjd char attrprefix[16]; 5729185029Spjd u_char dirbuf[sizeof(struct dirent)]; 5730185029Spjd struct dirent *dp; 5731185029Spjd struct iovec aiov; 5732185029Spjd struct uio auio, *uio = ap->a_uio; 5733185029Spjd size_t *sizep = ap->a_size; 5734185029Spjd size_t plen; 5735185029Spjd vnode_t *xvp = NULL, *vp; 5736185029Spjd int done, error, eof, pos; 5737185029Spjd 5738195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5739195785Strasz ap->a_cred, ap->a_td, VREAD); 5740196303Spjd if (error != 0) 5741195785Strasz return (error); 5742195785Strasz 5743185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 5744185029Spjd sizeof(attrprefix)); 5745185029Spjd if (error != 0) 5746185029Spjd return (error); 5747185029Spjd plen = strlen(attrprefix); 5748185029Spjd 5749185029Spjd ZFS_ENTER(zfsvfs); 5750185029Spjd 5751195822Strasz if (sizep != NULL) 5752195822Strasz *sizep = 0; 5753195822Strasz 5754185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5755185029Spjd LOOKUP_XATTR); 5756185029Spjd if (error != 0) { 5757196303Spjd ZFS_EXIT(zfsvfs); 5758195785Strasz /* 5759195785Strasz * ENOATTR means that the EA directory does not yet exist, 5760195785Strasz * i.e. there are no extended attributes there. 5761195785Strasz */ 5762195785Strasz if (error == ENOATTR) 5763195785Strasz error = 0; 5764185029Spjd return (error); 5765185029Spjd } 5766185029Spjd 5767241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 5768188588Sjhb UIO_SYSSPACE, ".", xvp, td); 5769185029Spjd error = namei(&nd); 5770185029Spjd vp = nd.ni_vp; 5771185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 5772185029Spjd if (error != 0) { 5773185029Spjd ZFS_EXIT(zfsvfs); 5774185029Spjd return (error); 5775185029Spjd } 5776185029Spjd 5777185029Spjd auio.uio_iov = &aiov; 5778185029Spjd auio.uio_iovcnt = 1; 5779185029Spjd auio.uio_segflg = UIO_SYSSPACE; 5780185029Spjd auio.uio_td = td; 5781185029Spjd auio.uio_rw = UIO_READ; 5782185029Spjd auio.uio_offset = 0; 5783185029Spjd 5784185029Spjd do { 5785185029Spjd u_char nlen; 5786185029Spjd 5787185029Spjd aiov.iov_base = (void *)dirbuf; 5788185029Spjd aiov.iov_len = sizeof(dirbuf); 5789185029Spjd auio.uio_resid = sizeof(dirbuf); 5790185029Spjd error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 5791185029Spjd done = sizeof(dirbuf) - auio.uio_resid; 5792185029Spjd if (error != 0) 5793185029Spjd break; 5794185029Spjd for (pos = 0; pos < done;) { 5795185029Spjd dp = (struct dirent *)(dirbuf + pos); 5796185029Spjd pos += dp->d_reclen; 5797185029Spjd /* 5798185029Spjd * XXX: Temporarily we also accept DT_UNKNOWN, as this 5799185029Spjd * is what we get when attribute was created on Solaris. 5800185029Spjd */ 5801185029Spjd if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 5802185029Spjd continue; 5803185029Spjd if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) 5804185029Spjd continue; 5805185029Spjd else if (strncmp(dp->d_name, attrprefix, plen) != 0) 5806185029Spjd continue; 5807185029Spjd nlen = dp->d_namlen - plen; 5808185029Spjd if (sizep != NULL) 5809185029Spjd *sizep += 1 + nlen; 5810185029Spjd else if (uio != NULL) { 5811185029Spjd /* 5812185029Spjd * Format of extattr name entry is one byte for 5813185029Spjd * length and the rest for name. 5814185029Spjd */ 5815185029Spjd error = uiomove(&nlen, 1, uio->uio_rw, uio); 5816185029Spjd if (error == 0) { 5817185029Spjd error = uiomove(dp->d_name + plen, nlen, 5818185029Spjd uio->uio_rw, uio); 5819185029Spjd } 5820185029Spjd if (error != 0) 5821185029Spjd break; 5822185029Spjd } 5823185029Spjd } 5824185029Spjd } while (!eof && error == 0); 5825185029Spjd 5826185029Spjd vput(vp); 5827185029Spjd ZFS_EXIT(zfsvfs); 5828185029Spjd 5829185029Spjd return (error); 5830185029Spjd} 5831185029Spjd 5832192800Straszint 5833192800Straszzfs_freebsd_getacl(ap) 5834192800Strasz struct vop_getacl_args /* { 5835192800Strasz struct vnode *vp; 5836192800Strasz acl_type_t type; 5837192800Strasz struct acl *aclp; 5838192800Strasz struct ucred *cred; 5839192800Strasz struct thread *td; 5840192800Strasz } */ *ap; 5841192800Strasz{ 5842192800Strasz int error; 5843192800Strasz vsecattr_t vsecattr; 5844192800Strasz 5845192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 5846197435Strasz return (EINVAL); 5847192800Strasz 5848192800Strasz vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 5849192800Strasz if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL)) 5850192800Strasz return (error); 5851192800Strasz 5852192800Strasz error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt); 5853196303Spjd if (vsecattr.vsa_aclentp != NULL) 5854196303Spjd kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 5855192800Strasz 5856196303Spjd return (error); 5857192800Strasz} 5858192800Strasz 5859192800Straszint 5860192800Straszzfs_freebsd_setacl(ap) 5861192800Strasz struct vop_setacl_args /* { 5862192800Strasz struct vnode *vp; 5863192800Strasz acl_type_t type; 5864192800Strasz struct acl *aclp; 5865192800Strasz struct ucred *cred; 5866192800Strasz struct thread *td; 5867192800Strasz } */ *ap; 5868192800Strasz{ 5869192800Strasz int error; 5870192800Strasz vsecattr_t vsecattr; 5871192800Strasz int aclbsize; /* size of acl list in bytes */ 5872192800Strasz aclent_t *aaclp; 5873192800Strasz 5874192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 5875197435Strasz return (EINVAL); 5876192800Strasz 5877314710Smm if (ap->a_aclp == NULL) 5878314710Smm return (EINVAL); 5879314710Smm 5880192800Strasz if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 5881192800Strasz return (EINVAL); 5882192800Strasz 5883192800Strasz /* 5884196949Strasz * With NFSv4 ACLs, chmod(2) may need to add additional entries, 5885192800Strasz * splitting every entry into two and appending "canonical six" 5886192800Strasz * entries at the end. Don't allow for setting an ACL that would 5887192800Strasz * cause chmod(2) to run out of ACL entries. 5888192800Strasz */ 5889192800Strasz if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 5890192800Strasz return (ENOSPC); 5891192800Strasz 5892208030Strasz error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 5893208030Strasz if (error != 0) 5894208030Strasz return (error); 5895208030Strasz 5896192800Strasz vsecattr.vsa_mask = VSA_ACE; 5897192800Strasz aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t); 5898192800Strasz vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 5899192800Strasz aaclp = vsecattr.vsa_aclentp; 5900192800Strasz vsecattr.vsa_aclentsz = aclbsize; 5901192800Strasz 5902192800Strasz aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 5903192800Strasz error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL); 5904192800Strasz kmem_free(aaclp, aclbsize); 5905192800Strasz 5906192800Strasz return (error); 5907192800Strasz} 5908192800Strasz 5909192800Straszint 5910192800Straszzfs_freebsd_aclcheck(ap) 5911192800Strasz struct vop_aclcheck_args /* { 5912192800Strasz struct vnode *vp; 5913192800Strasz acl_type_t type; 5914192800Strasz struct acl *aclp; 5915192800Strasz struct ucred *cred; 5916192800Strasz struct thread *td; 5917192800Strasz } */ *ap; 5918192800Strasz{ 5919192800Strasz 5920192800Strasz return (EOPNOTSUPP); 5921192800Strasz} 5922192800Strasz 5923299906Savgstatic int 5924299906Savgzfs_vptocnp(struct vop_vptocnp_args *ap) 5925299906Savg{ 5926299906Savg vnode_t *covered_vp; 5927299906Savg vnode_t *vp = ap->a_vp;; 5928299906Savg zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 5929299906Savg znode_t *zp = VTOZ(vp); 5930299906Savg uint64_t parent; 5931299906Savg int ltype; 5932299906Savg int error; 5933299906Savg 5934301870Savg ZFS_ENTER(zfsvfs); 5935301870Savg ZFS_VERIFY_ZP(zp); 5936301870Savg 5937299906Savg /* 5938299906Savg * If we are a snapshot mounted under .zfs, run the operation 5939299906Savg * on the covered vnode. 5940299906Savg */ 5941299906Savg if ((error = sa_lookup(zp->z_sa_hdl, 5942301870Savg SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) { 5943301870Savg ZFS_EXIT(zfsvfs); 5944299906Savg return (error); 5945301870Savg } 5946299906Savg 5947301870Savg if (zp->z_id != parent || zfsvfs->z_parent == zfsvfs) { 5948307995Savg char name[MAXNAMLEN + 1]; 5949307995Savg znode_t *dzp; 5950307995Savg size_t len; 5951307995Savg 5952307995Savg error = zfs_znode_parent_and_name(zp, &dzp, name); 5953307995Savg if (error == 0) { 5954307995Savg len = strlen(name); 5955314030Savg if (*ap->a_buflen < len) 5956314030Savg error = SET_ERROR(ENOMEM); 5957314030Savg } 5958314030Savg if (error == 0) { 5959307995Savg *ap->a_buflen -= len; 5960307995Savg bcopy(name, ap->a_buf + *ap->a_buflen, len); 5961307995Savg *ap->a_vpp = ZTOV(dzp); 5962307995Savg } 5963301870Savg ZFS_EXIT(zfsvfs); 5964307995Savg return (error); 5965301870Savg } 5966301870Savg ZFS_EXIT(zfsvfs); 5967299906Savg 5968299906Savg covered_vp = vp->v_mount->mnt_vnodecovered; 5969299906Savg vhold(covered_vp); 5970299906Savg ltype = VOP_ISLOCKED(vp); 5971299906Savg VOP_UNLOCK(vp, 0); 5972315842Savg error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread); 5973299906Savg if (error == 0) { 5974299906Savg error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred, 5975299906Savg ap->a_buf, ap->a_buflen); 5976299906Savg vput(covered_vp); 5977299906Savg } 5978299906Savg vn_lock(vp, ltype | LK_RETRY); 5979299906Savg if ((vp->v_iflag & VI_DOOMED) != 0) 5980299906Savg error = SET_ERROR(ENOENT); 5981299906Savg return (error); 5982299906Savg} 5983299906Savg 5984303970Savg#ifdef DIAGNOSTIC 5985303970Savgstatic int 5986303970Savgzfs_lock(ap) 5987303970Savg struct vop_lock1_args /* { 5988303970Savg struct vnode *a_vp; 5989303970Savg int a_flags; 5990303970Savg char *file; 5991303970Savg int line; 5992303970Savg } */ *ap; 5993303970Savg{ 5994310066Savg vnode_t *vp; 5995303970Savg znode_t *zp; 5996303970Savg int err; 5997303970Savg 5998303970Savg err = vop_stdlock(ap); 5999310066Savg if (err == 0 && (ap->a_flags & LK_NOWAIT) == 0) { 6000310066Savg vp = ap->a_vp; 6001310066Savg zp = vp->v_data; 6002310066Savg if (vp->v_mount != NULL && (vp->v_iflag & VI_DOOMED) == 0 && 6003310066Savg zp != NULL && (zp->z_pflags & ZFS_XATTR) == 0) 6004310066Savg VERIFY(!RRM_LOCK_HELD(&zp->z_zfsvfs->z_teardown_lock)); 6005303970Savg } 6006303970Savg return (err); 6007303970Savg} 6008303970Savg#endif 6009303970Savg 6010168404Spjdstruct vop_vector zfs_vnodeops; 6011168404Spjdstruct vop_vector zfs_fifoops; 6012209962Smmstruct vop_vector zfs_shareops; 6013168404Spjd 6014168404Spjdstruct vop_vector zfs_vnodeops = { 6015185029Spjd .vop_default = &default_vnodeops, 6016185029Spjd .vop_inactive = zfs_freebsd_inactive, 6017185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 6018185029Spjd .vop_access = zfs_freebsd_access, 6019303970Savg .vop_lookup = zfs_cache_lookup, 6020185029Spjd .vop_cachedlookup = zfs_freebsd_lookup, 6021185029Spjd .vop_getattr = zfs_freebsd_getattr, 6022185029Spjd .vop_setattr = zfs_freebsd_setattr, 6023185029Spjd .vop_create = zfs_freebsd_create, 6024185029Spjd .vop_mknod = zfs_freebsd_create, 6025185029Spjd .vop_mkdir = zfs_freebsd_mkdir, 6026185029Spjd .vop_readdir = zfs_freebsd_readdir, 6027185029Spjd .vop_fsync = zfs_freebsd_fsync, 6028185029Spjd .vop_open = zfs_freebsd_open, 6029185029Spjd .vop_close = zfs_freebsd_close, 6030185029Spjd .vop_rmdir = zfs_freebsd_rmdir, 6031185029Spjd .vop_ioctl = zfs_freebsd_ioctl, 6032185029Spjd .vop_link = zfs_freebsd_link, 6033185029Spjd .vop_symlink = zfs_freebsd_symlink, 6034185029Spjd .vop_readlink = zfs_freebsd_readlink, 6035185029Spjd .vop_read = zfs_freebsd_read, 6036185029Spjd .vop_write = zfs_freebsd_write, 6037185029Spjd .vop_remove = zfs_freebsd_remove, 6038185029Spjd .vop_rename = zfs_freebsd_rename, 6039185029Spjd .vop_pathconf = zfs_freebsd_pathconf, 6040243518Savg .vop_bmap = zfs_freebsd_bmap, 6041185029Spjd .vop_fid = zfs_freebsd_fid, 6042185029Spjd .vop_getextattr = zfs_getextattr, 6043185029Spjd .vop_deleteextattr = zfs_deleteextattr, 6044185029Spjd .vop_setextattr = zfs_setextattr, 6045185029Spjd .vop_listextattr = zfs_listextattr, 6046192800Strasz .vop_getacl = zfs_freebsd_getacl, 6047192800Strasz .vop_setacl = zfs_freebsd_setacl, 6048192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 6049213937Savg .vop_getpages = zfs_freebsd_getpages, 6050258746Savg .vop_putpages = zfs_freebsd_putpages, 6051299906Savg .vop_vptocnp = zfs_vptocnp, 6052303970Savg#ifdef DIAGNOSTIC 6053303970Savg .vop_lock1 = zfs_lock, 6054303970Savg#endif 6055168404Spjd}; 6056168404Spjd 6057169170Spjdstruct vop_vector zfs_fifoops = { 6058185029Spjd .vop_default = &fifo_specops, 6059200162Skib .vop_fsync = zfs_freebsd_fsync, 6060185029Spjd .vop_access = zfs_freebsd_access, 6061185029Spjd .vop_getattr = zfs_freebsd_getattr, 6062185029Spjd .vop_inactive = zfs_freebsd_inactive, 6063185029Spjd .vop_read = VOP_PANIC, 6064185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 6065185029Spjd .vop_setattr = zfs_freebsd_setattr, 6066185029Spjd .vop_write = VOP_PANIC, 6067196949Strasz .vop_pathconf = zfs_freebsd_fifo_pathconf, 6068185029Spjd .vop_fid = zfs_freebsd_fid, 6069192800Strasz .vop_getacl = zfs_freebsd_getacl, 6070192800Strasz .vop_setacl = zfs_freebsd_setacl, 6071192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 6072168404Spjd}; 6073209962Smm 6074209962Smm/* 6075209962Smm * special share hidden files vnode operations template 6076209962Smm */ 6077209962Smmstruct vop_vector zfs_shareops = { 6078209962Smm .vop_default = &default_vnodeops, 6079209962Smm .vop_access = zfs_freebsd_access, 6080209962Smm .vop_inactive = zfs_freebsd_inactive, 6081209962Smm .vop_reclaim = zfs_freebsd_reclaim, 6082209962Smm .vop_fid = zfs_freebsd_fid, 6083209962Smm .vop_pathconf = zfs_freebsd_pathconf, 6084209962Smm}; 6085