zfs_vnops.c revision 254982
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22212694Smm * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23249195Smm * Copyright (c) 2013 by Delphix. All rights reserved. 24254585Sdelphij * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 25168404Spjd */ 26168404Spjd 27169195Spjd/* Portions Copyright 2007 Jeremy Teo */ 28219089Spjd/* Portions Copyright 2010 Robert Milkowski */ 29169195Spjd 30168404Spjd#include <sys/types.h> 31168404Spjd#include <sys/param.h> 32168404Spjd#include <sys/time.h> 33168404Spjd#include <sys/systm.h> 34168404Spjd#include <sys/sysmacros.h> 35168404Spjd#include <sys/resource.h> 36168404Spjd#include <sys/vfs.h> 37248084Sattilio#include <sys/vm.h> 38168404Spjd#include <sys/vnode.h> 39168404Spjd#include <sys/file.h> 40168404Spjd#include <sys/stat.h> 41168404Spjd#include <sys/kmem.h> 42168404Spjd#include <sys/taskq.h> 43168404Spjd#include <sys/uio.h> 44168404Spjd#include <sys/atomic.h> 45168404Spjd#include <sys/namei.h> 46168404Spjd#include <sys/mman.h> 47168404Spjd#include <sys/cmn_err.h> 48168404Spjd#include <sys/errno.h> 49168404Spjd#include <sys/unistd.h> 50168404Spjd#include <sys/zfs_dir.h> 51168404Spjd#include <sys/zfs_ioctl.h> 52168404Spjd#include <sys/fs/zfs.h> 53168404Spjd#include <sys/dmu.h> 54219089Spjd#include <sys/dmu_objset.h> 55168404Spjd#include <sys/spa.h> 56168404Spjd#include <sys/txg.h> 57168404Spjd#include <sys/dbuf.h> 58168404Spjd#include <sys/zap.h> 59219089Spjd#include <sys/sa.h> 60168404Spjd#include <sys/dirent.h> 61168962Spjd#include <sys/policy.h> 62168962Spjd#include <sys/sunddi.h> 63168404Spjd#include <sys/filio.h> 64209962Smm#include <sys/sid.h> 65168404Spjd#include <sys/zfs_ctldir.h> 66185029Spjd#include <sys/zfs_fuid.h> 67219089Spjd#include <sys/zfs_sa.h> 68168404Spjd#include <sys/dnlc.h> 69168404Spjd#include <sys/zfs_rlock.h> 70185029Spjd#include <sys/extdirent.h> 71185029Spjd#include <sys/kidmap.h> 72168404Spjd#include <sys/bio.h> 73168404Spjd#include <sys/buf.h> 74168404Spjd#include <sys/sf_buf.h> 75168404Spjd#include <sys/sched.h> 76192800Strasz#include <sys/acl.h> 77239077Smarius#include <vm/vm_param.h> 78215401Savg#include <vm/vm_pageout.h> 79168404Spjd 80168404Spjd/* 81168404Spjd * Programming rules. 82168404Spjd * 83168404Spjd * Each vnode op performs some logical unit of work. To do this, the ZPL must 84168404Spjd * properly lock its in-core state, create a DMU transaction, do the work, 85168404Spjd * record this work in the intent log (ZIL), commit the DMU transaction, 86185029Spjd * and wait for the intent log to commit if it is a synchronous operation. 87185029Spjd * Moreover, the vnode ops must work in both normal and log replay context. 88168404Spjd * The ordering of events is important to avoid deadlocks and references 89168404Spjd * to freed memory. The example below illustrates the following Big Rules: 90168404Spjd * 91251631Sdelphij * (1) A check must be made in each zfs thread for a mounted file system. 92168404Spjd * This is done avoiding races using ZFS_ENTER(zfsvfs). 93251631Sdelphij * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 94251631Sdelphij * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 95251631Sdelphij * can return EIO from the calling function. 96168404Spjd * 97168404Spjd * (2) VN_RELE() should always be the last thing except for zil_commit() 98168404Spjd * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 99168404Spjd * First, if it's the last reference, the vnode/znode 100168404Spjd * can be freed, so the zp may point to freed memory. Second, the last 101168404Spjd * reference will call zfs_zinactive(), which may induce a lot of work -- 102168404Spjd * pushing cached pages (which acquires range locks) and syncing out 103168404Spjd * cached atime changes. Third, zfs_zinactive() may require a new tx, 104168404Spjd * which could deadlock the system if you were already holding one. 105191900Skmacy * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 106168404Spjd * 107168404Spjd * (3) All range locks must be grabbed before calling dmu_tx_assign(), 108168404Spjd * as they can span dmu_tx_assign() calls. 109168404Spjd * 110209962Smm * (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign(). 111168404Spjd * This is critical because we don't want to block while holding locks. 112168404Spjd * Note, in particular, that if a lock is sometimes acquired before 113168404Spjd * the tx assigns, and sometimes after (e.g. z_lock), then failing to 114168404Spjd * use a non-blocking assign can deadlock the system. The scenario: 115168404Spjd * 116168404Spjd * Thread A has grabbed a lock before calling dmu_tx_assign(). 117168404Spjd * Thread B is in an already-assigned tx, and blocks for this lock. 118168404Spjd * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 119168404Spjd * forever, because the previous txg can't quiesce until B's tx commits. 120168404Spjd * 121168404Spjd * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 122168404Spjd * then drop all locks, call dmu_tx_wait(), and try again. 123168404Spjd * 124168404Spjd * (5) If the operation succeeded, generate the intent log entry for it 125168404Spjd * before dropping locks. This ensures that the ordering of events 126168404Spjd * in the intent log matches the order in which they actually occurred. 127251631Sdelphij * During ZIL replay the zfs_log_* functions will update the sequence 128209962Smm * number to indicate the zil transaction has replayed. 129168404Spjd * 130168404Spjd * (6) At the end of each vnode op, the DMU tx must always commit, 131168404Spjd * regardless of whether there were any errors. 132168404Spjd * 133219089Spjd * (7) After dropping all locks, invoke zil_commit(zilog, foid) 134168404Spjd * to ensure that synchronous semantics are provided when necessary. 135168404Spjd * 136168404Spjd * In general, this is how things should be ordered in each vnode op: 137168404Spjd * 138168404Spjd * ZFS_ENTER(zfsvfs); // exit if unmounted 139168404Spjd * top: 140168404Spjd * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 141168404Spjd * rw_enter(...); // grab any other locks you need 142168404Spjd * tx = dmu_tx_create(...); // get DMU tx 143168404Spjd * dmu_tx_hold_*(); // hold each object you might modify 144209962Smm * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign 145168404Spjd * if (error) { 146168404Spjd * rw_exit(...); // drop locks 147168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 148168404Spjd * VN_RELE(...); // release held vnodes 149209962Smm * if (error == ERESTART) { 150168404Spjd * dmu_tx_wait(tx); 151168404Spjd * dmu_tx_abort(tx); 152168404Spjd * goto top; 153168404Spjd * } 154168404Spjd * dmu_tx_abort(tx); // abort DMU tx 155168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 156168404Spjd * return (error); // really out of space 157168404Spjd * } 158168404Spjd * error = do_real_work(); // do whatever this VOP does 159168404Spjd * if (error == 0) 160168404Spjd * zfs_log_*(...); // on success, make ZIL entry 161168404Spjd * dmu_tx_commit(tx); // commit DMU tx -- error or not 162168404Spjd * rw_exit(...); // drop locks 163168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 164168404Spjd * VN_RELE(...); // release held vnodes 165219089Spjd * zil_commit(zilog, foid); // synchronous when necessary 166168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 167168404Spjd * return (error); // done, report error 168168404Spjd */ 169185029Spjd 170168404Spjd/* ARGSUSED */ 171168404Spjdstatic int 172185029Spjdzfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 173168404Spjd{ 174168962Spjd znode_t *zp = VTOZ(*vpp); 175209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 176168404Spjd 177209962Smm ZFS_ENTER(zfsvfs); 178209962Smm ZFS_VERIFY_ZP(zp); 179209962Smm 180219089Spjd if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 181185029Spjd ((flag & FAPPEND) == 0)) { 182209962Smm ZFS_EXIT(zfsvfs); 183249195Smm return (SET_ERROR(EPERM)); 184185029Spjd } 185185029Spjd 186185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 187185029Spjd ZTOV(zp)->v_type == VREG && 188219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 189209962Smm if (fs_vscan(*vpp, cr, 0) != 0) { 190209962Smm ZFS_EXIT(zfsvfs); 191249195Smm return (SET_ERROR(EACCES)); 192209962Smm } 193209962Smm } 194185029Spjd 195168404Spjd /* Keep a count of the synchronous opens in the znode */ 196168962Spjd if (flag & (FSYNC | FDSYNC)) 197168404Spjd atomic_inc_32(&zp->z_sync_cnt); 198185029Spjd 199209962Smm ZFS_EXIT(zfsvfs); 200168404Spjd return (0); 201168404Spjd} 202168404Spjd 203168404Spjd/* ARGSUSED */ 204168404Spjdstatic int 205185029Spjdzfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 206185029Spjd caller_context_t *ct) 207168404Spjd{ 208168962Spjd znode_t *zp = VTOZ(vp); 209209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 210168404Spjd 211210470Smm /* 212210470Smm * Clean up any locks held by this process on the vp. 213210470Smm */ 214210470Smm cleanlocks(vp, ddi_get_pid(), 0); 215210470Smm cleanshares(vp, ddi_get_pid()); 216210470Smm 217209962Smm ZFS_ENTER(zfsvfs); 218209962Smm ZFS_VERIFY_ZP(zp); 219209962Smm 220168404Spjd /* Decrement the synchronous opens in the znode */ 221185029Spjd if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 222168404Spjd atomic_dec_32(&zp->z_sync_cnt); 223168404Spjd 224185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 225185029Spjd ZTOV(zp)->v_type == VREG && 226219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 227185029Spjd VERIFY(fs_vscan(vp, cr, 1) == 0); 228185029Spjd 229209962Smm ZFS_EXIT(zfsvfs); 230168404Spjd return (0); 231168404Spjd} 232168404Spjd 233168404Spjd/* 234168404Spjd * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 235168404Spjd * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 236168404Spjd */ 237168404Spjdstatic int 238168978Spjdzfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 239168404Spjd{ 240168404Spjd znode_t *zp = VTOZ(vp); 241168404Spjd uint64_t noff = (uint64_t)*off; /* new offset */ 242168404Spjd uint64_t file_sz; 243168404Spjd int error; 244168404Spjd boolean_t hole; 245168404Spjd 246219089Spjd file_sz = zp->z_size; 247168404Spjd if (noff >= file_sz) { 248249195Smm return (SET_ERROR(ENXIO)); 249168404Spjd } 250168404Spjd 251168962Spjd if (cmd == _FIO_SEEK_HOLE) 252168404Spjd hole = B_TRUE; 253168404Spjd else 254168404Spjd hole = B_FALSE; 255168404Spjd 256168404Spjd error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 257168404Spjd 258168404Spjd /* end of file? */ 259168404Spjd if ((error == ESRCH) || (noff > file_sz)) { 260168404Spjd /* 261168404Spjd * Handle the virtual hole at the end of file. 262168404Spjd */ 263168404Spjd if (hole) { 264168404Spjd *off = file_sz; 265168404Spjd return (0); 266168404Spjd } 267249195Smm return (SET_ERROR(ENXIO)); 268168404Spjd } 269168404Spjd 270168404Spjd if (noff < *off) 271168404Spjd return (error); 272168404Spjd *off = noff; 273168404Spjd return (error); 274168404Spjd} 275168404Spjd 276168404Spjd/* ARGSUSED */ 277168404Spjdstatic int 278168978Spjdzfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 279185029Spjd int *rvalp, caller_context_t *ct) 280168404Spjd{ 281168962Spjd offset_t off; 282168962Spjd int error; 283168962Spjd zfsvfs_t *zfsvfs; 284185029Spjd znode_t *zp; 285168404Spjd 286168404Spjd switch (com) { 287185029Spjd case _FIOFFS: 288168962Spjd return (0); 289168404Spjd 290168962Spjd /* 291168962Spjd * The following two ioctls are used by bfu. Faking out, 292168962Spjd * necessary to avoid bfu errors. 293168962Spjd */ 294185029Spjd case _FIOGDIO: 295185029Spjd case _FIOSDIO: 296168962Spjd return (0); 297168962Spjd 298185029Spjd case _FIO_SEEK_DATA: 299185029Spjd case _FIO_SEEK_HOLE: 300233918Savg#ifdef sun 301168962Spjd if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 302249195Smm return (SET_ERROR(EFAULT)); 303233918Savg#else 304233918Savg off = *(offset_t *)data; 305233918Savg#endif 306185029Spjd zp = VTOZ(vp); 307185029Spjd zfsvfs = zp->z_zfsvfs; 308168404Spjd ZFS_ENTER(zfsvfs); 309185029Spjd ZFS_VERIFY_ZP(zp); 310168404Spjd 311168404Spjd /* offset parameter is in/out */ 312168404Spjd error = zfs_holey(vp, com, &off); 313168404Spjd ZFS_EXIT(zfsvfs); 314168404Spjd if (error) 315168404Spjd return (error); 316233918Savg#ifdef sun 317168962Spjd if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 318249195Smm return (SET_ERROR(EFAULT)); 319233918Savg#else 320233918Savg *(offset_t *)data = off; 321233918Savg#endif 322168404Spjd return (0); 323168404Spjd } 324249195Smm return (SET_ERROR(ENOTTY)); 325168404Spjd} 326168404Spjd 327209962Smmstatic vm_page_t 328253953Sattiliopage_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 329209962Smm{ 330209962Smm vm_object_t obj; 331209962Smm vm_page_t pp; 332209962Smm 333209962Smm obj = vp->v_object; 334248084Sattilio zfs_vmobject_assert_wlocked(obj); 335209962Smm 336209962Smm for (;;) { 337209962Smm if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 338246293Savg pp->valid) { 339254138Sattilio if (vm_page_xbusied(pp)) { 340212652Savg /* 341212652Savg * Reference the page before unlocking and 342212652Savg * sleeping so that the page daemon is less 343212652Savg * likely to reclaim it. 344212652Savg */ 345225418Skib vm_page_reference(pp); 346254138Sattilio vm_page_lock(pp); 347254138Sattilio zfs_vmobject_wunlock(obj); 348254138Sattilio vm_page_busy_sleep(pp, "zfsmwb"); 349254138Sattilio zfs_vmobject_wlock(obj); 350209962Smm continue; 351212652Savg } 352254138Sattilio vm_page_sbusy(pp); 353252337Sgavin } else if (pp == NULL) { 354246293Savg pp = vm_page_alloc(obj, OFF_TO_IDX(start), 355246293Savg VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED | 356254138Sattilio VM_ALLOC_SBUSY); 357252337Sgavin } else { 358252337Sgavin ASSERT(pp != NULL && !pp->valid); 359252337Sgavin pp = NULL; 360209962Smm } 361246293Savg 362246293Savg if (pp != NULL) { 363246293Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 364253953Sattilio vm_object_pip_add(obj, 1); 365246293Savg pmap_remove_write(pp); 366246293Savg vm_page_clear_dirty(pp, off, nbytes); 367246293Savg } 368209962Smm break; 369209962Smm } 370209962Smm return (pp); 371209962Smm} 372209962Smm 373209962Smmstatic void 374253953Sattiliopage_unbusy(vm_page_t pp) 375209962Smm{ 376209962Smm 377254138Sattilio vm_page_sunbusy(pp); 378253953Sattilio vm_object_pip_subtract(pp->object, 1); 379209962Smm} 380209962Smm 381253953Sattiliostatic vm_page_t 382253953Sattiliopage_hold(vnode_t *vp, int64_t start) 383253953Sattilio{ 384253953Sattilio vm_object_t obj; 385253953Sattilio vm_page_t pp; 386253953Sattilio 387253953Sattilio obj = vp->v_object; 388253953Sattilio zfs_vmobject_assert_wlocked(obj); 389253953Sattilio 390253953Sattilio for (;;) { 391253953Sattilio if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 392253953Sattilio pp->valid) { 393254138Sattilio if (vm_page_xbusied(pp)) { 394253953Sattilio /* 395253953Sattilio * Reference the page before unlocking and 396253953Sattilio * sleeping so that the page daemon is less 397253953Sattilio * likely to reclaim it. 398253953Sattilio */ 399253953Sattilio vm_page_reference(pp); 400254138Sattilio vm_page_lock(pp); 401254138Sattilio zfs_vmobject_wunlock(obj); 402254138Sattilio vm_page_busy_sleep(pp, "zfsmwb"); 403254138Sattilio zfs_vmobject_wlock(obj); 404253953Sattilio continue; 405253953Sattilio } 406253953Sattilio 407253953Sattilio ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 408253953Sattilio vm_page_lock(pp); 409253953Sattilio vm_page_hold(pp); 410253953Sattilio vm_page_unlock(pp); 411253953Sattilio 412253953Sattilio } else 413253953Sattilio pp = NULL; 414253953Sattilio break; 415253953Sattilio } 416253953Sattilio return (pp); 417253953Sattilio} 418253953Sattilio 419253953Sattiliostatic void 420253953Sattiliopage_unhold(vm_page_t pp) 421253953Sattilio{ 422253953Sattilio 423253953Sattilio vm_page_lock(pp); 424253953Sattilio vm_page_unhold(pp); 425253953Sattilio vm_page_unlock(pp); 426253953Sattilio} 427253953Sattilio 428209962Smmstatic caddr_t 429209962Smmzfs_map_page(vm_page_t pp, struct sf_buf **sfp) 430209962Smm{ 431209962Smm 432212951Savg *sfp = sf_buf_alloc(pp, 0); 433209962Smm return ((caddr_t)sf_buf_kva(*sfp)); 434209962Smm} 435209962Smm 436209962Smmstatic void 437209962Smmzfs_unmap_page(struct sf_buf *sf) 438209962Smm{ 439209962Smm 440209962Smm sf_buf_free(sf); 441209962Smm} 442209962Smm 443168404Spjd/* 444168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 445168404Spjd * between the DMU cache and the memory mapped pages. What this means: 446168404Spjd * 447168404Spjd * On Write: If we find a memory mapped page, we write to *both* 448168404Spjd * the page and the dmu buffer. 449168404Spjd */ 450209962Smmstatic void 451209962Smmupdate_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 452209962Smm int segflg, dmu_tx_t *tx) 453168404Spjd{ 454168404Spjd vm_object_t obj; 455168404Spjd struct sf_buf *sf; 456246293Savg caddr_t va; 457212655Savg int off; 458168404Spjd 459168404Spjd ASSERT(vp->v_mount != NULL); 460168404Spjd obj = vp->v_object; 461168404Spjd ASSERT(obj != NULL); 462168404Spjd 463168404Spjd off = start & PAGEOFFSET; 464248084Sattilio zfs_vmobject_wlock(obj); 465168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 466209962Smm vm_page_t pp; 467246293Savg int nbytes = imin(PAGESIZE - off, len); 468168404Spjd 469246293Savg if (segflg == UIO_NOCOPY) { 470246293Savg pp = vm_page_lookup(obj, OFF_TO_IDX(start)); 471246293Savg KASSERT(pp != NULL, 472246293Savg ("zfs update_pages: NULL page in putpages case")); 473246293Savg KASSERT(off == 0, 474246293Savg ("zfs update_pages: unaligned data in putpages case")); 475246293Savg KASSERT(pp->valid == VM_PAGE_BITS_ALL, 476246293Savg ("zfs update_pages: invalid page in putpages case")); 477254138Sattilio KASSERT(vm_page_sbusied(pp), 478246293Savg ("zfs update_pages: unbusy page in putpages case")); 479246293Savg KASSERT(!pmap_page_is_write_mapped(pp), 480246293Savg ("zfs update_pages: writable page in putpages case")); 481248084Sattilio zfs_vmobject_wunlock(obj); 482168404Spjd 483246293Savg va = zfs_map_page(pp, &sf); 484246293Savg (void) dmu_write(os, oid, start, nbytes, va, tx); 485246293Savg zfs_unmap_page(sf); 486246293Savg 487248084Sattilio zfs_vmobject_wlock(obj); 488246293Savg vm_page_undirty(pp); 489253953Sattilio } else if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 490248084Sattilio zfs_vmobject_wunlock(obj); 491246293Savg 492209962Smm va = zfs_map_page(pp, &sf); 493246293Savg (void) dmu_read(os, oid, start+off, nbytes, 494246293Savg va+off, DMU_READ_PREFETCH);; 495209962Smm zfs_unmap_page(sf); 496246293Savg 497248084Sattilio zfs_vmobject_wlock(obj); 498253953Sattilio page_unbusy(pp); 499168404Spjd } 500209962Smm len -= nbytes; 501168404Spjd off = 0; 502168404Spjd } 503246293Savg if (segflg != UIO_NOCOPY) 504246293Savg vm_object_pip_wakeupn(obj, 0); 505248084Sattilio zfs_vmobject_wunlock(obj); 506168404Spjd} 507168404Spjd 508168404Spjd/* 509219089Spjd * Read with UIO_NOCOPY flag means that sendfile(2) requests 510219089Spjd * ZFS to populate a range of page cache pages with data. 511219089Spjd * 512219089Spjd * NOTE: this function could be optimized to pre-allocate 513254138Sattilio * all pages in advance, drain exclusive busy on all of them, 514219089Spjd * map them into contiguous KVA region and populate them 515219089Spjd * in one single dmu_read() call. 516219089Spjd */ 517219089Spjdstatic int 518219089Spjdmappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) 519219089Spjd{ 520219089Spjd znode_t *zp = VTOZ(vp); 521219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 522219089Spjd struct sf_buf *sf; 523219089Spjd vm_object_t obj; 524219089Spjd vm_page_t pp; 525219089Spjd int64_t start; 526219089Spjd caddr_t va; 527219089Spjd int len = nbytes; 528219089Spjd int off; 529219089Spjd int error = 0; 530219089Spjd 531219089Spjd ASSERT(uio->uio_segflg == UIO_NOCOPY); 532219089Spjd ASSERT(vp->v_mount != NULL); 533219089Spjd obj = vp->v_object; 534219089Spjd ASSERT(obj != NULL); 535219089Spjd ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); 536219089Spjd 537248084Sattilio zfs_vmobject_wlock(obj); 538219089Spjd for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { 539219089Spjd int bytes = MIN(PAGESIZE, len); 540219089Spjd 541254138Sattilio pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY | 542254649Skib VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 543219089Spjd if (pp->valid == 0) { 544248084Sattilio zfs_vmobject_wunlock(obj); 545219089Spjd va = zfs_map_page(pp, &sf); 546219089Spjd error = dmu_read(os, zp->z_id, start, bytes, va, 547219089Spjd DMU_READ_PREFETCH); 548219089Spjd if (bytes != PAGESIZE && error == 0) 549219089Spjd bzero(va + bytes, PAGESIZE - bytes); 550219089Spjd zfs_unmap_page(sf); 551248084Sattilio zfs_vmobject_wlock(obj); 552254138Sattilio vm_page_sunbusy(pp); 553219089Spjd vm_page_lock(pp); 554219089Spjd if (error) { 555253073Savg if (pp->wire_count == 0 && pp->valid == 0 && 556254138Sattilio !vm_page_busied(pp)) 557253073Savg vm_page_free(pp); 558219089Spjd } else { 559219089Spjd pp->valid = VM_PAGE_BITS_ALL; 560219089Spjd vm_page_activate(pp); 561219089Spjd } 562219089Spjd vm_page_unlock(pp); 563254138Sattilio } else 564254138Sattilio vm_page_sunbusy(pp); 565219089Spjd if (error) 566219089Spjd break; 567219089Spjd uio->uio_resid -= bytes; 568219089Spjd uio->uio_offset += bytes; 569219089Spjd len -= bytes; 570219089Spjd } 571248084Sattilio zfs_vmobject_wunlock(obj); 572219089Spjd return (error); 573219089Spjd} 574219089Spjd 575219089Spjd/* 576168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 577168404Spjd * between the DMU cache and the memory mapped pages. What this means: 578168404Spjd * 579168404Spjd * On Read: We "read" preferentially from memory mapped pages, 580168404Spjd * else we default from the dmu buffer. 581168404Spjd * 582168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 583251631Sdelphij * the file is memory mapped. 584168404Spjd */ 585168404Spjdstatic int 586168404Spjdmappedread(vnode_t *vp, int nbytes, uio_t *uio) 587168404Spjd{ 588168404Spjd znode_t *zp = VTOZ(vp); 589168404Spjd objset_t *os = zp->z_zfsvfs->z_os; 590168404Spjd vm_object_t obj; 591212655Savg int64_t start; 592168926Spjd caddr_t va; 593168404Spjd int len = nbytes; 594212655Savg int off; 595168404Spjd int error = 0; 596168404Spjd 597168404Spjd ASSERT(vp->v_mount != NULL); 598168404Spjd obj = vp->v_object; 599168404Spjd ASSERT(obj != NULL); 600168404Spjd 601168404Spjd start = uio->uio_loffset; 602168404Spjd off = start & PAGEOFFSET; 603248084Sattilio zfs_vmobject_wlock(obj); 604168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 605219089Spjd vm_page_t pp; 606219089Spjd uint64_t bytes = MIN(PAGESIZE - off, len); 607168404Spjd 608253953Sattilio if (pp = page_hold(vp, start)) { 609219089Spjd struct sf_buf *sf; 610219089Spjd caddr_t va; 611212652Savg 612248084Sattilio zfs_vmobject_wunlock(obj); 613219089Spjd va = zfs_map_page(pp, &sf); 614219089Spjd error = uiomove(va + off, bytes, UIO_READ, uio); 615219089Spjd zfs_unmap_page(sf); 616248084Sattilio zfs_vmobject_wlock(obj); 617253953Sattilio page_unhold(pp); 618219089Spjd } else { 619248084Sattilio zfs_vmobject_wunlock(obj); 620219089Spjd error = dmu_read_uio(os, zp->z_id, uio, bytes); 621248084Sattilio zfs_vmobject_wlock(obj); 622168404Spjd } 623168404Spjd len -= bytes; 624168404Spjd off = 0; 625168404Spjd if (error) 626168404Spjd break; 627168404Spjd } 628248084Sattilio zfs_vmobject_wunlock(obj); 629168404Spjd return (error); 630168404Spjd} 631168404Spjd 632168404Spjdoffset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 633168404Spjd 634168404Spjd/* 635168404Spjd * Read bytes from specified file into supplied buffer. 636168404Spjd * 637168404Spjd * IN: vp - vnode of file to be read from. 638168404Spjd * uio - structure supplying read location, range info, 639168404Spjd * and return buffer. 640168404Spjd * ioflag - SYNC flags; used to provide FRSYNC semantics. 641168404Spjd * cr - credentials of caller. 642185029Spjd * ct - caller context 643168404Spjd * 644168404Spjd * OUT: uio - updated offset and range, buffer filled. 645168404Spjd * 646251631Sdelphij * RETURN: 0 on success, error code on failure. 647168404Spjd * 648168404Spjd * Side Effects: 649168404Spjd * vp - atime updated if byte count > 0 650168404Spjd */ 651168404Spjd/* ARGSUSED */ 652168404Spjdstatic int 653168962Spjdzfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 654168404Spjd{ 655168404Spjd znode_t *zp = VTOZ(vp); 656168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 657185029Spjd objset_t *os; 658168404Spjd ssize_t n, nbytes; 659247187Smm int error = 0; 660168404Spjd rl_t *rl; 661219089Spjd xuio_t *xuio = NULL; 662168404Spjd 663168404Spjd ZFS_ENTER(zfsvfs); 664185029Spjd ZFS_VERIFY_ZP(zp); 665185029Spjd os = zfsvfs->z_os; 666168404Spjd 667219089Spjd if (zp->z_pflags & ZFS_AV_QUARANTINED) { 668185029Spjd ZFS_EXIT(zfsvfs); 669249195Smm return (SET_ERROR(EACCES)); 670185029Spjd } 671185029Spjd 672168404Spjd /* 673168404Spjd * Validate file offset 674168404Spjd */ 675168404Spjd if (uio->uio_loffset < (offset_t)0) { 676168404Spjd ZFS_EXIT(zfsvfs); 677249195Smm return (SET_ERROR(EINVAL)); 678168404Spjd } 679168404Spjd 680168404Spjd /* 681168404Spjd * Fasttrack empty reads 682168404Spjd */ 683168404Spjd if (uio->uio_resid == 0) { 684168404Spjd ZFS_EXIT(zfsvfs); 685168404Spjd return (0); 686168404Spjd } 687168404Spjd 688168404Spjd /* 689168962Spjd * Check for mandatory locks 690168962Spjd */ 691219089Spjd if (MANDMODE(zp->z_mode)) { 692168962Spjd if (error = chklock(vp, FREAD, 693168962Spjd uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 694168962Spjd ZFS_EXIT(zfsvfs); 695168962Spjd return (error); 696168962Spjd } 697168962Spjd } 698168962Spjd 699168962Spjd /* 700168404Spjd * If we're in FRSYNC mode, sync out this znode before reading it. 701168404Spjd */ 702224605Smm if (zfsvfs->z_log && 703224605Smm (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) 704219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 705168404Spjd 706168404Spjd /* 707168404Spjd * Lock the range against changes. 708168404Spjd */ 709168404Spjd rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 710168404Spjd 711168404Spjd /* 712168404Spjd * If we are reading past end-of-file we can skip 713168404Spjd * to the end; but we might still need to set atime. 714168404Spjd */ 715219089Spjd if (uio->uio_loffset >= zp->z_size) { 716168404Spjd error = 0; 717168404Spjd goto out; 718168404Spjd } 719168404Spjd 720219089Spjd ASSERT(uio->uio_loffset < zp->z_size); 721219089Spjd n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 722168404Spjd 723219089Spjd#ifdef sun 724219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 725219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 726219089Spjd int nblk; 727219089Spjd int blksz = zp->z_blksz; 728219089Spjd uint64_t offset = uio->uio_loffset; 729219089Spjd 730219089Spjd xuio = (xuio_t *)uio; 731219089Spjd if ((ISP2(blksz))) { 732219089Spjd nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 733219089Spjd blksz)) / blksz; 734219089Spjd } else { 735219089Spjd ASSERT(offset + n <= blksz); 736219089Spjd nblk = 1; 737219089Spjd } 738219089Spjd (void) dmu_xuio_init(xuio, nblk); 739219089Spjd 740219089Spjd if (vn_has_cached_data(vp)) { 741219089Spjd /* 742219089Spjd * For simplicity, we always allocate a full buffer 743219089Spjd * even if we only expect to read a portion of a block. 744219089Spjd */ 745219089Spjd while (--nblk >= 0) { 746219089Spjd (void) dmu_xuio_add(xuio, 747219089Spjd dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 748219089Spjd blksz), 0, blksz); 749219089Spjd } 750219089Spjd } 751219089Spjd } 752219089Spjd#endif /* sun */ 753219089Spjd 754168404Spjd while (n > 0) { 755168404Spjd nbytes = MIN(n, zfs_read_chunk_size - 756168404Spjd P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 757168404Spjd 758219089Spjd#ifdef __FreeBSD__ 759219089Spjd if (uio->uio_segflg == UIO_NOCOPY) 760219089Spjd error = mappedread_sf(vp, nbytes, uio); 761219089Spjd else 762219089Spjd#endif /* __FreeBSD__ */ 763168404Spjd if (vn_has_cached_data(vp)) 764168404Spjd error = mappedread(vp, nbytes, uio); 765168404Spjd else 766168404Spjd error = dmu_read_uio(os, zp->z_id, uio, nbytes); 767185029Spjd if (error) { 768185029Spjd /* convert checksum errors into IO errors */ 769185029Spjd if (error == ECKSUM) 770249195Smm error = SET_ERROR(EIO); 771168404Spjd break; 772185029Spjd } 773168962Spjd 774168404Spjd n -= nbytes; 775168404Spjd } 776168404Spjdout: 777168404Spjd zfs_range_unlock(rl); 778168404Spjd 779168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 780168404Spjd ZFS_EXIT(zfsvfs); 781168404Spjd return (error); 782168404Spjd} 783168404Spjd 784168404Spjd/* 785168404Spjd * Write the bytes to a file. 786168404Spjd * 787168404Spjd * IN: vp - vnode of file to be written to. 788168404Spjd * uio - structure supplying write location, range info, 789168404Spjd * and data buffer. 790251631Sdelphij * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 791251631Sdelphij * set if in append mode. 792168404Spjd * cr - credentials of caller. 793185029Spjd * ct - caller context (NFS/CIFS fem monitor only) 794168404Spjd * 795168404Spjd * OUT: uio - updated offset and range. 796168404Spjd * 797251631Sdelphij * RETURN: 0 on success, error code on failure. 798168404Spjd * 799168404Spjd * Timestamps: 800168404Spjd * vp - ctime|mtime updated if byte count > 0 801168404Spjd */ 802219089Spjd 803168404Spjd/* ARGSUSED */ 804168404Spjdstatic int 805168962Spjdzfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 806168404Spjd{ 807168404Spjd znode_t *zp = VTOZ(vp); 808168962Spjd rlim64_t limit = MAXOFFSET_T; 809168404Spjd ssize_t start_resid = uio->uio_resid; 810168404Spjd ssize_t tx_bytes; 811168404Spjd uint64_t end_size; 812168404Spjd dmu_tx_t *tx; 813168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 814185029Spjd zilog_t *zilog; 815168404Spjd offset_t woff; 816168404Spjd ssize_t n, nbytes; 817168404Spjd rl_t *rl; 818168404Spjd int max_blksz = zfsvfs->z_max_blksz; 819247187Smm int error = 0; 820209962Smm arc_buf_t *abuf; 821247187Smm iovec_t *aiov = NULL; 822219089Spjd xuio_t *xuio = NULL; 823219089Spjd int i_iov = 0; 824219089Spjd int iovcnt = uio->uio_iovcnt; 825219089Spjd iovec_t *iovp = uio->uio_iov; 826219089Spjd int write_eof; 827219089Spjd int count = 0; 828219089Spjd sa_bulk_attr_t bulk[4]; 829219089Spjd uint64_t mtime[2], ctime[2]; 830168404Spjd 831168404Spjd /* 832168404Spjd * Fasttrack empty write 833168404Spjd */ 834168404Spjd n = start_resid; 835168404Spjd if (n == 0) 836168404Spjd return (0); 837168404Spjd 838168962Spjd if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 839168962Spjd limit = MAXOFFSET_T; 840168962Spjd 841168404Spjd ZFS_ENTER(zfsvfs); 842185029Spjd ZFS_VERIFY_ZP(zp); 843168404Spjd 844219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 845219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 846219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 847219089Spjd &zp->z_size, 8); 848219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 849219089Spjd &zp->z_pflags, 8); 850219089Spjd 851168404Spjd /* 852185029Spjd * If immutable or not appending then return EPERM 853185029Spjd */ 854219089Spjd if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 855219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 856219089Spjd (uio->uio_loffset < zp->z_size))) { 857185029Spjd ZFS_EXIT(zfsvfs); 858249195Smm return (SET_ERROR(EPERM)); 859185029Spjd } 860185029Spjd 861185029Spjd zilog = zfsvfs->z_log; 862185029Spjd 863185029Spjd /* 864219089Spjd * Validate file offset 865219089Spjd */ 866219089Spjd woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 867219089Spjd if (woff < 0) { 868219089Spjd ZFS_EXIT(zfsvfs); 869249195Smm return (SET_ERROR(EINVAL)); 870219089Spjd } 871219089Spjd 872219089Spjd /* 873219089Spjd * Check for mandatory locks before calling zfs_range_lock() 874219089Spjd * in order to prevent a deadlock with locks set via fcntl(). 875219089Spjd */ 876219089Spjd if (MANDMODE((mode_t)zp->z_mode) && 877219089Spjd (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 878219089Spjd ZFS_EXIT(zfsvfs); 879219089Spjd return (error); 880219089Spjd } 881219089Spjd 882219089Spjd#ifdef sun 883219089Spjd /* 884168404Spjd * Pre-fault the pages to ensure slow (eg NFS) pages 885168404Spjd * don't hold up txg. 886219089Spjd * Skip this if uio contains loaned arc_buf. 887168404Spjd */ 888219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 889219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 890219089Spjd xuio = (xuio_t *)uio; 891219089Spjd else 892219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 893219089Spjd#endif /* sun */ 894168404Spjd 895168404Spjd /* 896168404Spjd * If in append mode, set the io offset pointer to eof. 897168404Spjd */ 898213673Spjd if (ioflag & FAPPEND) { 899168404Spjd /* 900219089Spjd * Obtain an appending range lock to guarantee file append 901219089Spjd * semantics. We reset the write offset once we have the lock. 902168404Spjd */ 903168404Spjd rl = zfs_range_lock(zp, 0, n, RL_APPEND); 904219089Spjd woff = rl->r_off; 905168404Spjd if (rl->r_len == UINT64_MAX) { 906219089Spjd /* 907219089Spjd * We overlocked the file because this write will cause 908219089Spjd * the file block size to increase. 909219089Spjd * Note that zp_size cannot change with this lock held. 910219089Spjd */ 911219089Spjd woff = zp->z_size; 912168404Spjd } 913219089Spjd uio->uio_loffset = woff; 914168404Spjd } else { 915168404Spjd /* 916219089Spjd * Note that if the file block size will change as a result of 917219089Spjd * this write, then this range lock will lock the entire file 918219089Spjd * so that we can re-write the block safely. 919168404Spjd */ 920168404Spjd rl = zfs_range_lock(zp, woff, n, RL_WRITER); 921168404Spjd } 922168404Spjd 923235781Strasz if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { 924235781Strasz zfs_range_unlock(rl); 925235781Strasz ZFS_EXIT(zfsvfs); 926235781Strasz return (EFBIG); 927235781Strasz } 928235781Strasz 929168962Spjd if (woff >= limit) { 930168962Spjd zfs_range_unlock(rl); 931168962Spjd ZFS_EXIT(zfsvfs); 932249195Smm return (SET_ERROR(EFBIG)); 933168962Spjd } 934168962Spjd 935168962Spjd if ((woff + n) > limit || woff > (limit - n)) 936168962Spjd n = limit - woff; 937168962Spjd 938219089Spjd /* Will this write extend the file length? */ 939219089Spjd write_eof = (woff + n > zp->z_size); 940168404Spjd 941219089Spjd end_size = MAX(zp->z_size, woff + n); 942219089Spjd 943168404Spjd /* 944168404Spjd * Write the file in reasonable size chunks. Each chunk is written 945168404Spjd * in a separate transaction; this keeps the intent log records small 946168404Spjd * and allows us to do more fine-grained space accounting. 947168404Spjd */ 948168404Spjd while (n > 0) { 949209962Smm abuf = NULL; 950209962Smm woff = uio->uio_loffset; 951209962Smmagain: 952219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 953219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 954209962Smm if (abuf != NULL) 955209962Smm dmu_return_arcbuf(abuf); 956249195Smm error = SET_ERROR(EDQUOT); 957209962Smm break; 958209962Smm } 959209962Smm 960219089Spjd if (xuio && abuf == NULL) { 961219089Spjd ASSERT(i_iov < iovcnt); 962219089Spjd aiov = &iovp[i_iov]; 963219089Spjd abuf = dmu_xuio_arcbuf(xuio, i_iov); 964219089Spjd dmu_xuio_clear(xuio, i_iov); 965219089Spjd DTRACE_PROBE3(zfs_cp_write, int, i_iov, 966219089Spjd iovec_t *, aiov, arc_buf_t *, abuf); 967219089Spjd ASSERT((aiov->iov_base == abuf->b_data) || 968219089Spjd ((char *)aiov->iov_base - (char *)abuf->b_data + 969219089Spjd aiov->iov_len == arc_buf_size(abuf))); 970219089Spjd i_iov++; 971219089Spjd } else if (abuf == NULL && n >= max_blksz && 972219089Spjd woff >= zp->z_size && 973209962Smm P2PHASE(woff, max_blksz) == 0 && 974209962Smm zp->z_blksz == max_blksz) { 975219089Spjd /* 976219089Spjd * This write covers a full block. "Borrow" a buffer 977219089Spjd * from the dmu so that we can fill it before we enter 978219089Spjd * a transaction. This avoids the possibility of 979219089Spjd * holding up the transaction if the data copy hangs 980219089Spjd * up on a pagefault (e.g., from an NFS server mapping). 981219089Spjd */ 982209962Smm size_t cbytes; 983209962Smm 984219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 985219089Spjd max_blksz); 986209962Smm ASSERT(abuf != NULL); 987209962Smm ASSERT(arc_buf_size(abuf) == max_blksz); 988209962Smm if (error = uiocopy(abuf->b_data, max_blksz, 989209962Smm UIO_WRITE, uio, &cbytes)) { 990209962Smm dmu_return_arcbuf(abuf); 991209962Smm break; 992209962Smm } 993209962Smm ASSERT(cbytes == max_blksz); 994209962Smm } 995209962Smm 996209962Smm /* 997168404Spjd * Start a transaction. 998168404Spjd */ 999168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1000219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1001168404Spjd dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 1002219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1003209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 1004168404Spjd if (error) { 1005209962Smm if (error == ERESTART) { 1006168404Spjd dmu_tx_wait(tx); 1007168404Spjd dmu_tx_abort(tx); 1008209962Smm goto again; 1009168404Spjd } 1010168404Spjd dmu_tx_abort(tx); 1011209962Smm if (abuf != NULL) 1012209962Smm dmu_return_arcbuf(abuf); 1013168404Spjd break; 1014168404Spjd } 1015168404Spjd 1016168404Spjd /* 1017168404Spjd * If zfs_range_lock() over-locked we grow the blocksize 1018168404Spjd * and then reduce the lock range. This will only happen 1019168404Spjd * on the first iteration since zfs_range_reduce() will 1020168404Spjd * shrink down r_len to the appropriate size. 1021168404Spjd */ 1022168404Spjd if (rl->r_len == UINT64_MAX) { 1023168404Spjd uint64_t new_blksz; 1024168404Spjd 1025168404Spjd if (zp->z_blksz > max_blksz) { 1026168404Spjd ASSERT(!ISP2(zp->z_blksz)); 1027168404Spjd new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 1028168404Spjd } else { 1029168404Spjd new_blksz = MIN(end_size, max_blksz); 1030168404Spjd } 1031168404Spjd zfs_grow_blocksize(zp, new_blksz, tx); 1032168404Spjd zfs_range_reduce(rl, woff, n); 1033168404Spjd } 1034168404Spjd 1035168404Spjd /* 1036168404Spjd * XXX - should we really limit each write to z_max_blksz? 1037168404Spjd * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 1038168404Spjd */ 1039168404Spjd nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 1040168404Spjd 1041219089Spjd if (woff + nbytes > zp->z_size) 1042168404Spjd vnode_pager_setsize(vp, woff + nbytes); 1043168404Spjd 1044209962Smm if (abuf == NULL) { 1045209962Smm tx_bytes = uio->uio_resid; 1046219089Spjd error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 1047219089Spjd uio, nbytes, tx); 1048209962Smm tx_bytes -= uio->uio_resid; 1049168404Spjd } else { 1050209962Smm tx_bytes = nbytes; 1051219089Spjd ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 1052219089Spjd /* 1053219089Spjd * If this is not a full block write, but we are 1054219089Spjd * extending the file past EOF and this data starts 1055219089Spjd * block-aligned, use assign_arcbuf(). Otherwise, 1056219089Spjd * write via dmu_write(). 1057219089Spjd */ 1058219089Spjd if (tx_bytes < max_blksz && (!write_eof || 1059219089Spjd aiov->iov_base != abuf->b_data)) { 1060219089Spjd ASSERT(xuio); 1061219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, woff, 1062219089Spjd aiov->iov_len, aiov->iov_base, tx); 1063219089Spjd dmu_return_arcbuf(abuf); 1064219089Spjd xuio_stat_wbuf_copied(); 1065219089Spjd } else { 1066219089Spjd ASSERT(xuio || tx_bytes == max_blksz); 1067219089Spjd dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 1068219089Spjd woff, abuf, tx); 1069219089Spjd } 1070209962Smm ASSERT(tx_bytes <= uio->uio_resid); 1071209962Smm uioskip(uio, tx_bytes); 1072168404Spjd } 1073212657Savg if (tx_bytes && vn_has_cached_data(vp)) { 1074209962Smm update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 1075209962Smm zp->z_id, uio->uio_segflg, tx); 1076209962Smm } 1077209962Smm 1078209962Smm /* 1079168404Spjd * If we made no progress, we're done. If we made even 1080168404Spjd * partial progress, update the znode and ZIL accordingly. 1081168404Spjd */ 1082168404Spjd if (tx_bytes == 0) { 1083219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 1084219089Spjd (void *)&zp->z_size, sizeof (uint64_t), tx); 1085168404Spjd dmu_tx_commit(tx); 1086168404Spjd ASSERT(error != 0); 1087168404Spjd break; 1088168404Spjd } 1089168404Spjd 1090168404Spjd /* 1091168404Spjd * Clear Set-UID/Set-GID bits on successful write if not 1092168404Spjd * privileged and at least one of the excute bits is set. 1093168404Spjd * 1094168404Spjd * It would be nice to to this after all writes have 1095168404Spjd * been done, but that would still expose the ISUID/ISGID 1096168404Spjd * to another app after the partial write is committed. 1097185029Spjd * 1098185029Spjd * Note: we don't call zfs_fuid_map_id() here because 1099185029Spjd * user 0 is not an ephemeral uid. 1100168404Spjd */ 1101168404Spjd mutex_enter(&zp->z_acl_lock); 1102219089Spjd if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 1103168404Spjd (S_IXUSR >> 6))) != 0 && 1104219089Spjd (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 1105185029Spjd secpolicy_vnode_setid_retain(vp, cr, 1106219089Spjd (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 1107219089Spjd uint64_t newmode; 1108219089Spjd zp->z_mode &= ~(S_ISUID | S_ISGID); 1109219089Spjd newmode = zp->z_mode; 1110219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 1111219089Spjd (void *)&newmode, sizeof (uint64_t), tx); 1112168404Spjd } 1113168404Spjd mutex_exit(&zp->z_acl_lock); 1114168404Spjd 1115219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 1116219089Spjd B_TRUE); 1117168404Spjd 1118168404Spjd /* 1119168404Spjd * Update the file size (zp_size) if it has changed; 1120168404Spjd * account for possible concurrent updates. 1121168404Spjd */ 1122219089Spjd while ((end_size = zp->z_size) < uio->uio_loffset) { 1123219089Spjd (void) atomic_cas_64(&zp->z_size, end_size, 1124168404Spjd uio->uio_loffset); 1125219089Spjd ASSERT(error == 0); 1126219089Spjd } 1127219089Spjd /* 1128219089Spjd * If we are replaying and eof is non zero then force 1129219089Spjd * the file size to the specified eof. Note, there's no 1130219089Spjd * concurrency during replay. 1131219089Spjd */ 1132219089Spjd if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 1133219089Spjd zp->z_size = zfsvfs->z_replay_eof; 1134219089Spjd 1135219089Spjd error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1136219089Spjd 1137168404Spjd zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 1138168404Spjd dmu_tx_commit(tx); 1139168404Spjd 1140168404Spjd if (error != 0) 1141168404Spjd break; 1142168404Spjd ASSERT(tx_bytes == nbytes); 1143168404Spjd n -= nbytes; 1144219089Spjd 1145219089Spjd#ifdef sun 1146219089Spjd if (!xuio && n > 0) 1147219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 1148219089Spjd#endif /* sun */ 1149168404Spjd } 1150168404Spjd 1151168404Spjd zfs_range_unlock(rl); 1152168404Spjd 1153168404Spjd /* 1154168404Spjd * If we're in replay mode, or we made no progress, return error. 1155168404Spjd * Otherwise, it's at least a partial write, so it's successful. 1156168404Spjd */ 1157209962Smm if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1158168404Spjd ZFS_EXIT(zfsvfs); 1159168404Spjd return (error); 1160168404Spjd } 1161168404Spjd 1162219089Spjd if (ioflag & (FSYNC | FDSYNC) || 1163219089Spjd zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1164219089Spjd zil_commit(zilog, zp->z_id); 1165168404Spjd 1166168404Spjd ZFS_EXIT(zfsvfs); 1167168404Spjd return (0); 1168168404Spjd} 1169168404Spjd 1170168404Spjdvoid 1171219089Spjdzfs_get_done(zgd_t *zgd, int error) 1172168404Spjd{ 1173219089Spjd znode_t *zp = zgd->zgd_private; 1174219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 1175168404Spjd 1176219089Spjd if (zgd->zgd_db) 1177219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1178219089Spjd 1179219089Spjd zfs_range_unlock(zgd->zgd_rl); 1180219089Spjd 1181191900Skmacy /* 1182191900Skmacy * Release the vnode asynchronously as we currently have the 1183191900Skmacy * txg stopped from syncing. 1184191900Skmacy */ 1185219089Spjd VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1186219089Spjd 1187219089Spjd if (error == 0 && zgd->zgd_bp) 1188219089Spjd zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1189219089Spjd 1190168404Spjd kmem_free(zgd, sizeof (zgd_t)); 1191168404Spjd} 1192168404Spjd 1193214378Smm#ifdef DEBUG 1194214378Smmstatic int zil_fault_io = 0; 1195214378Smm#endif 1196214378Smm 1197168404Spjd/* 1198168404Spjd * Get data to generate a TX_WRITE intent log record. 1199168404Spjd */ 1200168404Spjdint 1201168404Spjdzfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1202168404Spjd{ 1203168404Spjd zfsvfs_t *zfsvfs = arg; 1204168404Spjd objset_t *os = zfsvfs->z_os; 1205168404Spjd znode_t *zp; 1206219089Spjd uint64_t object = lr->lr_foid; 1207219089Spjd uint64_t offset = lr->lr_offset; 1208219089Spjd uint64_t size = lr->lr_length; 1209219089Spjd blkptr_t *bp = &lr->lr_blkptr; 1210168404Spjd dmu_buf_t *db; 1211168404Spjd zgd_t *zgd; 1212168404Spjd int error = 0; 1213168404Spjd 1214219089Spjd ASSERT(zio != NULL); 1215219089Spjd ASSERT(size != 0); 1216168404Spjd 1217168404Spjd /* 1218168404Spjd * Nothing to do if the file has been removed 1219168404Spjd */ 1220219089Spjd if (zfs_zget(zfsvfs, object, &zp) != 0) 1221249195Smm return (SET_ERROR(ENOENT)); 1222168404Spjd if (zp->z_unlinked) { 1223191900Skmacy /* 1224191900Skmacy * Release the vnode asynchronously as we currently have the 1225191900Skmacy * txg stopped from syncing. 1226191900Skmacy */ 1227196307Spjd VN_RELE_ASYNC(ZTOV(zp), 1228196307Spjd dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1229249195Smm return (SET_ERROR(ENOENT)); 1230168404Spjd } 1231168404Spjd 1232219089Spjd zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1233219089Spjd zgd->zgd_zilog = zfsvfs->z_log; 1234219089Spjd zgd->zgd_private = zp; 1235219089Spjd 1236168404Spjd /* 1237168404Spjd * Write records come in two flavors: immediate and indirect. 1238168404Spjd * For small writes it's cheaper to store the data with the 1239168404Spjd * log record (immediate); for large writes it's cheaper to 1240168404Spjd * sync the data and get a pointer to it (indirect) so that 1241168404Spjd * we don't have to write the data twice. 1242168404Spjd */ 1243168404Spjd if (buf != NULL) { /* immediate write */ 1244219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1245168404Spjd /* test for truncation needs to be done while range locked */ 1246219089Spjd if (offset >= zp->z_size) { 1247249195Smm error = SET_ERROR(ENOENT); 1248219089Spjd } else { 1249219089Spjd error = dmu_read(os, object, offset, size, buf, 1250219089Spjd DMU_READ_NO_PREFETCH); 1251168404Spjd } 1252219089Spjd ASSERT(error == 0 || error == ENOENT); 1253168404Spjd } else { /* indirect write */ 1254168404Spjd /* 1255168404Spjd * Have to lock the whole block to ensure when it's 1256168404Spjd * written out and it's checksum is being calculated 1257168404Spjd * that no one can change the data. We need to re-check 1258168404Spjd * blocksize after we get the lock in case it's changed! 1259168404Spjd */ 1260168404Spjd for (;;) { 1261219089Spjd uint64_t blkoff; 1262219089Spjd size = zp->z_blksz; 1263219089Spjd blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1264219089Spjd offset -= blkoff; 1265219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1266219089Spjd RL_READER); 1267219089Spjd if (zp->z_blksz == size) 1268168404Spjd break; 1269219089Spjd offset += blkoff; 1270219089Spjd zfs_range_unlock(zgd->zgd_rl); 1271168404Spjd } 1272168404Spjd /* test for truncation needs to be done while range locked */ 1273219089Spjd if (lr->lr_offset >= zp->z_size) 1274249195Smm error = SET_ERROR(ENOENT); 1275214378Smm#ifdef DEBUG 1276214378Smm if (zil_fault_io) { 1277249195Smm error = SET_ERROR(EIO); 1278214378Smm zil_fault_io = 0; 1279214378Smm } 1280214378Smm#endif 1281219089Spjd if (error == 0) 1282219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1283219089Spjd DMU_READ_NO_PREFETCH); 1284214378Smm 1285209962Smm if (error == 0) { 1286243524Smm blkptr_t *obp = dmu_buf_get_blkptr(db); 1287243524Smm if (obp) { 1288243524Smm ASSERT(BP_IS_HOLE(bp)); 1289243524Smm *bp = *obp; 1290243524Smm } 1291243524Smm 1292219089Spjd zgd->zgd_db = db; 1293219089Spjd zgd->zgd_bp = bp; 1294219089Spjd 1295219089Spjd ASSERT(db->db_offset == offset); 1296219089Spjd ASSERT(db->db_size == size); 1297219089Spjd 1298219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1299219089Spjd zfs_get_done, zgd); 1300219089Spjd ASSERT(error || lr->lr_length <= zp->z_blksz); 1301219089Spjd 1302209962Smm /* 1303219089Spjd * On success, we need to wait for the write I/O 1304219089Spjd * initiated by dmu_sync() to complete before we can 1305219089Spjd * release this dbuf. We will finish everything up 1306219089Spjd * in the zfs_get_done() callback. 1307209962Smm */ 1308219089Spjd if (error == 0) 1309219089Spjd return (0); 1310209962Smm 1311219089Spjd if (error == EALREADY) { 1312219089Spjd lr->lr_common.lrc_txtype = TX_WRITE2; 1313219089Spjd error = 0; 1314219089Spjd } 1315209962Smm } 1316168404Spjd } 1317219089Spjd 1318219089Spjd zfs_get_done(zgd, error); 1319219089Spjd 1320168404Spjd return (error); 1321168404Spjd} 1322168404Spjd 1323168404Spjd/*ARGSUSED*/ 1324168404Spjdstatic int 1325185029Spjdzfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1326185029Spjd caller_context_t *ct) 1327168404Spjd{ 1328168404Spjd znode_t *zp = VTOZ(vp); 1329168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1330168404Spjd int error; 1331168404Spjd 1332168404Spjd ZFS_ENTER(zfsvfs); 1333185029Spjd ZFS_VERIFY_ZP(zp); 1334185029Spjd 1335185029Spjd if (flag & V_ACE_MASK) 1336185029Spjd error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1337185029Spjd else 1338185029Spjd error = zfs_zaccess_rwx(zp, mode, flag, cr); 1339185029Spjd 1340168404Spjd ZFS_EXIT(zfsvfs); 1341168404Spjd return (error); 1342168404Spjd} 1343168404Spjd 1344168404Spjd/* 1345211932Smm * If vnode is for a device return a specfs vnode instead. 1346211932Smm */ 1347211932Smmstatic int 1348211932Smmspecvp_check(vnode_t **vpp, cred_t *cr) 1349211932Smm{ 1350211932Smm int error = 0; 1351211932Smm 1352211932Smm if (IS_DEVVP(*vpp)) { 1353211932Smm struct vnode *svp; 1354211932Smm 1355211932Smm svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1356211932Smm VN_RELE(*vpp); 1357211932Smm if (svp == NULL) 1358249195Smm error = SET_ERROR(ENOSYS); 1359211932Smm *vpp = svp; 1360211932Smm } 1361211932Smm return (error); 1362211932Smm} 1363211932Smm 1364211932Smm 1365211932Smm/* 1366168404Spjd * Lookup an entry in a directory, or an extended attribute directory. 1367168404Spjd * If it exists, return a held vnode reference for it. 1368168404Spjd * 1369168404Spjd * IN: dvp - vnode of directory to search. 1370168404Spjd * nm - name of entry to lookup. 1371168404Spjd * pnp - full pathname to lookup [UNUSED]. 1372168404Spjd * flags - LOOKUP_XATTR set if looking for an attribute. 1373168404Spjd * rdir - root directory vnode [UNUSED]. 1374168404Spjd * cr - credentials of caller. 1375185029Spjd * ct - caller context 1376185029Spjd * direntflags - directory lookup flags 1377185029Spjd * realpnp - returned pathname. 1378168404Spjd * 1379168404Spjd * OUT: vpp - vnode of located entry, NULL if not found. 1380168404Spjd * 1381251631Sdelphij * RETURN: 0 on success, error code on failure. 1382168404Spjd * 1383168404Spjd * Timestamps: 1384168404Spjd * NA 1385168404Spjd */ 1386168404Spjd/* ARGSUSED */ 1387168962Spjdstatic int 1388168962Spjdzfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1389185029Spjd int nameiop, cred_t *cr, kthread_t *td, int flags) 1390168404Spjd{ 1391168962Spjd znode_t *zdp = VTOZ(dvp); 1392168962Spjd zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1393211932Smm int error = 0; 1394185029Spjd int *direntflags = NULL; 1395185029Spjd void *realpnp = NULL; 1396168404Spjd 1397211932Smm /* fast path */ 1398211932Smm if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 1399211932Smm 1400211932Smm if (dvp->v_type != VDIR) { 1401249195Smm return (SET_ERROR(ENOTDIR)); 1402219089Spjd } else if (zdp->z_sa_hdl == NULL) { 1403249195Smm return (SET_ERROR(EIO)); 1404211932Smm } 1405211932Smm 1406211932Smm if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1407211932Smm error = zfs_fastaccesschk_execute(zdp, cr); 1408211932Smm if (!error) { 1409211932Smm *vpp = dvp; 1410211932Smm VN_HOLD(*vpp); 1411211932Smm return (0); 1412211932Smm } 1413211932Smm return (error); 1414211932Smm } else { 1415211932Smm vnode_t *tvp = dnlc_lookup(dvp, nm); 1416211932Smm 1417211932Smm if (tvp) { 1418211932Smm error = zfs_fastaccesschk_execute(zdp, cr); 1419211932Smm if (error) { 1420211932Smm VN_RELE(tvp); 1421211932Smm return (error); 1422211932Smm } 1423211932Smm if (tvp == DNLC_NO_VNODE) { 1424211932Smm VN_RELE(tvp); 1425249195Smm return (SET_ERROR(ENOENT)); 1426211932Smm } else { 1427211932Smm *vpp = tvp; 1428211932Smm return (specvp_check(vpp, cr)); 1429211932Smm } 1430211932Smm } 1431211932Smm } 1432211932Smm } 1433211932Smm 1434211932Smm DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1435211932Smm 1436168404Spjd ZFS_ENTER(zfsvfs); 1437185029Spjd ZFS_VERIFY_ZP(zdp); 1438168404Spjd 1439168404Spjd *vpp = NULL; 1440168404Spjd 1441185029Spjd if (flags & LOOKUP_XATTR) { 1442168404Spjd#ifdef TODO 1443168404Spjd /* 1444168404Spjd * If the xattr property is off, refuse the lookup request. 1445168404Spjd */ 1446168404Spjd if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1447168404Spjd ZFS_EXIT(zfsvfs); 1448249195Smm return (SET_ERROR(EINVAL)); 1449168404Spjd } 1450185029Spjd#endif 1451168404Spjd 1452168404Spjd /* 1453168404Spjd * We don't allow recursive attributes.. 1454168404Spjd * Maybe someday we will. 1455168404Spjd */ 1456219089Spjd if (zdp->z_pflags & ZFS_XATTR) { 1457168404Spjd ZFS_EXIT(zfsvfs); 1458249195Smm return (SET_ERROR(EINVAL)); 1459168404Spjd } 1460168404Spjd 1461168404Spjd if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1462168404Spjd ZFS_EXIT(zfsvfs); 1463168404Spjd return (error); 1464168404Spjd } 1465168404Spjd 1466168404Spjd /* 1467168404Spjd * Do we have permission to get into attribute directory? 1468168404Spjd */ 1469168404Spjd 1470185029Spjd if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1471185029Spjd B_FALSE, cr)) { 1472168404Spjd VN_RELE(*vpp); 1473185029Spjd *vpp = NULL; 1474168404Spjd } 1475168404Spjd 1476168404Spjd ZFS_EXIT(zfsvfs); 1477168404Spjd return (error); 1478168404Spjd } 1479168404Spjd 1480168404Spjd if (dvp->v_type != VDIR) { 1481168404Spjd ZFS_EXIT(zfsvfs); 1482249195Smm return (SET_ERROR(ENOTDIR)); 1483168404Spjd } 1484168404Spjd 1485168404Spjd /* 1486168404Spjd * Check accessibility of directory. 1487168404Spjd */ 1488168404Spjd 1489185029Spjd if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1490168404Spjd ZFS_EXIT(zfsvfs); 1491168404Spjd return (error); 1492168404Spjd } 1493168404Spjd 1494185029Spjd if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1495185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1496185029Spjd ZFS_EXIT(zfsvfs); 1497249195Smm return (SET_ERROR(EILSEQ)); 1498185029Spjd } 1499168404Spjd 1500185029Spjd error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1501211932Smm if (error == 0) 1502211932Smm error = specvp_check(vpp, cr); 1503168962Spjd 1504168404Spjd /* Translate errors and add SAVENAME when needed. */ 1505168404Spjd if (cnp->cn_flags & ISLASTCN) { 1506168404Spjd switch (nameiop) { 1507168404Spjd case CREATE: 1508168404Spjd case RENAME: 1509168404Spjd if (error == ENOENT) { 1510168404Spjd error = EJUSTRETURN; 1511168404Spjd cnp->cn_flags |= SAVENAME; 1512168404Spjd break; 1513168404Spjd } 1514168404Spjd /* FALLTHROUGH */ 1515168404Spjd case DELETE: 1516168404Spjd if (error == 0) 1517168404Spjd cnp->cn_flags |= SAVENAME; 1518168404Spjd break; 1519168404Spjd } 1520168404Spjd } 1521168404Spjd if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) { 1522169198Spjd int ltype = 0; 1523169198Spjd 1524169198Spjd if (cnp->cn_flags & ISDOTDOT) { 1525176559Sattilio ltype = VOP_ISLOCKED(dvp); 1526175294Sattilio VOP_UNLOCK(dvp, 0); 1527169198Spjd } 1528206667Spjd ZFS_EXIT(zfsvfs); 1529254711Savg error = vn_lock(*vpp, cnp->cn_lkflags); 1530168962Spjd if (cnp->cn_flags & ISDOTDOT) 1531175202Sattilio vn_lock(dvp, ltype | LK_RETRY); 1532169172Spjd if (error != 0) { 1533169172Spjd VN_RELE(*vpp); 1534169172Spjd *vpp = NULL; 1535169172Spjd return (error); 1536169172Spjd } 1537206667Spjd } else { 1538206667Spjd ZFS_EXIT(zfsvfs); 1539168404Spjd } 1540168404Spjd 1541168404Spjd#ifdef FREEBSD_NAMECACHE 1542168404Spjd /* 1543168404Spjd * Insert name into cache (as non-existent) if appropriate. 1544168404Spjd */ 1545168404Spjd if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 1546168404Spjd cache_enter(dvp, *vpp, cnp); 1547169170Spjd /* 1548169170Spjd * Insert name into cache if appropriate. 1549169170Spjd */ 1550168404Spjd if (error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1551168404Spjd if (!(cnp->cn_flags & ISLASTCN) || 1552168404Spjd (nameiop != DELETE && nameiop != RENAME)) { 1553168404Spjd cache_enter(dvp, *vpp, cnp); 1554168404Spjd } 1555168404Spjd } 1556168404Spjd#endif 1557168404Spjd 1558168404Spjd return (error); 1559168404Spjd} 1560168404Spjd 1561168404Spjd/* 1562168404Spjd * Attempt to create a new entry in a directory. If the entry 1563168404Spjd * already exists, truncate the file if permissible, else return 1564168404Spjd * an error. Return the vp of the created or trunc'd file. 1565168404Spjd * 1566168404Spjd * IN: dvp - vnode of directory to put new file entry in. 1567168404Spjd * name - name of new file entry. 1568168404Spjd * vap - attributes of new file. 1569168404Spjd * excl - flag indicating exclusive or non-exclusive mode. 1570168404Spjd * mode - mode to open file with. 1571168404Spjd * cr - credentials of caller. 1572168404Spjd * flag - large file flag [UNUSED]. 1573185029Spjd * ct - caller context 1574185029Spjd * vsecp - ACL to be set 1575168404Spjd * 1576168404Spjd * OUT: vpp - vnode of created or trunc'd entry. 1577168404Spjd * 1578251631Sdelphij * RETURN: 0 on success, error code on failure. 1579168404Spjd * 1580168404Spjd * Timestamps: 1581168404Spjd * dvp - ctime|mtime updated if new entry created 1582168404Spjd * vp - ctime|mtime always, atime if new 1583168404Spjd */ 1584185029Spjd 1585168404Spjd/* ARGSUSED */ 1586168404Spjdstatic int 1587168962Spjdzfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 1588185029Spjd vnode_t **vpp, cred_t *cr, kthread_t *td) 1589168404Spjd{ 1590168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1591168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1592185029Spjd zilog_t *zilog; 1593185029Spjd objset_t *os; 1594168404Spjd zfs_dirlock_t *dl; 1595168404Spjd dmu_tx_t *tx; 1596168404Spjd int error; 1597209962Smm ksid_t *ksid; 1598209962Smm uid_t uid; 1599209962Smm gid_t gid = crgetgid(cr); 1600219089Spjd zfs_acl_ids_t acl_ids; 1601209962Smm boolean_t fuid_dirtied; 1602219089Spjd boolean_t have_acl = B_FALSE; 1603185029Spjd void *vsecp = NULL; 1604185029Spjd int flag = 0; 1605168404Spjd 1606185029Spjd /* 1607185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 1608185029Spjd * make sure file system is at proper version 1609185029Spjd */ 1610185029Spjd 1611209962Smm ksid = crgetsid(cr, KSID_OWNER); 1612209962Smm if (ksid) 1613209962Smm uid = ksid_getid(ksid); 1614209962Smm else 1615209962Smm uid = crgetuid(cr); 1616219089Spjd 1617185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 1618185029Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 1619219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1620249195Smm return (SET_ERROR(EINVAL)); 1621185029Spjd 1622168404Spjd ZFS_ENTER(zfsvfs); 1623185029Spjd ZFS_VERIFY_ZP(dzp); 1624185029Spjd os = zfsvfs->z_os; 1625185029Spjd zilog = zfsvfs->z_log; 1626168404Spjd 1627185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1628185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1629185029Spjd ZFS_EXIT(zfsvfs); 1630249195Smm return (SET_ERROR(EILSEQ)); 1631185029Spjd } 1632185029Spjd 1633185029Spjd if (vap->va_mask & AT_XVATTR) { 1634197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1635185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 1636185029Spjd ZFS_EXIT(zfsvfs); 1637185029Spjd return (error); 1638185029Spjd } 1639185029Spjd } 1640168404Spjdtop: 1641168404Spjd *vpp = NULL; 1642168404Spjd 1643182905Strasz if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1644182905Strasz vap->va_mode &= ~S_ISVTX; 1645168404Spjd 1646168404Spjd if (*name == '\0') { 1647168404Spjd /* 1648168404Spjd * Null component name refers to the directory itself. 1649168404Spjd */ 1650168404Spjd VN_HOLD(dvp); 1651168404Spjd zp = dzp; 1652168404Spjd dl = NULL; 1653168404Spjd error = 0; 1654168404Spjd } else { 1655168404Spjd /* possible VN_HOLD(zp) */ 1656185029Spjd int zflg = 0; 1657185029Spjd 1658185029Spjd if (flag & FIGNORECASE) 1659185029Spjd zflg |= ZCILOOK; 1660185029Spjd 1661185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1662185029Spjd NULL, NULL); 1663185029Spjd if (error) { 1664219089Spjd if (have_acl) 1665219089Spjd zfs_acl_ids_free(&acl_ids); 1666168404Spjd if (strcmp(name, "..") == 0) 1667249195Smm error = SET_ERROR(EISDIR); 1668168404Spjd ZFS_EXIT(zfsvfs); 1669168404Spjd return (error); 1670168404Spjd } 1671168404Spjd } 1672219089Spjd 1673185029Spjd if (zp == NULL) { 1674185029Spjd uint64_t txtype; 1675168404Spjd 1676168404Spjd /* 1677168404Spjd * Create a new file object and update the directory 1678168404Spjd * to reference it. 1679168404Spjd */ 1680185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1681219089Spjd if (have_acl) 1682219089Spjd zfs_acl_ids_free(&acl_ids); 1683168404Spjd goto out; 1684168404Spjd } 1685168404Spjd 1686168404Spjd /* 1687168404Spjd * We only support the creation of regular files in 1688168404Spjd * extended attribute directories. 1689168404Spjd */ 1690219089Spjd 1691219089Spjd if ((dzp->z_pflags & ZFS_XATTR) && 1692168404Spjd (vap->va_type != VREG)) { 1693219089Spjd if (have_acl) 1694219089Spjd zfs_acl_ids_free(&acl_ids); 1695249195Smm error = SET_ERROR(EINVAL); 1696168404Spjd goto out; 1697168404Spjd } 1698168404Spjd 1699219089Spjd if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, 1700219089Spjd cr, vsecp, &acl_ids)) != 0) 1701219089Spjd goto out; 1702219089Spjd have_acl = B_TRUE; 1703209962Smm 1704209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1705211932Smm zfs_acl_ids_free(&acl_ids); 1706249195Smm error = SET_ERROR(EDQUOT); 1707209962Smm goto out; 1708209962Smm } 1709209962Smm 1710168404Spjd tx = dmu_tx_create(os); 1711219089Spjd 1712219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1713219089Spjd ZFS_SA_BASE_ATTR_SIZE); 1714219089Spjd 1715209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 1716209962Smm if (fuid_dirtied) 1717209962Smm zfs_fuid_txhold(zfsvfs, tx); 1718168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1719219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 1720219089Spjd if (!zfsvfs->z_use_sa && 1721219089Spjd acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1722168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1723219089Spjd 0, acl_ids.z_aclp->z_acl_bytes); 1724185029Spjd } 1725209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 1726168404Spjd if (error) { 1727168404Spjd zfs_dirent_unlock(dl); 1728209962Smm if (error == ERESTART) { 1729168404Spjd dmu_tx_wait(tx); 1730168404Spjd dmu_tx_abort(tx); 1731168404Spjd goto top; 1732168404Spjd } 1733219089Spjd zfs_acl_ids_free(&acl_ids); 1734168404Spjd dmu_tx_abort(tx); 1735168404Spjd ZFS_EXIT(zfsvfs); 1736168404Spjd return (error); 1737168404Spjd } 1738219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1739209962Smm 1740209962Smm if (fuid_dirtied) 1741209962Smm zfs_fuid_sync(zfsvfs, tx); 1742209962Smm 1743168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 1744185029Spjd txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1745185029Spjd if (flag & FIGNORECASE) 1746185029Spjd txtype |= TX_CI; 1747185029Spjd zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1748209962Smm vsecp, acl_ids.z_fuidp, vap); 1749209962Smm zfs_acl_ids_free(&acl_ids); 1750168404Spjd dmu_tx_commit(tx); 1751168404Spjd } else { 1752185029Spjd int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1753185029Spjd 1754219089Spjd if (have_acl) 1755219089Spjd zfs_acl_ids_free(&acl_ids); 1756219089Spjd have_acl = B_FALSE; 1757219089Spjd 1758168404Spjd /* 1759168404Spjd * A directory entry already exists for this name. 1760168404Spjd */ 1761168404Spjd /* 1762168962Spjd * Can't truncate an existing file if in exclusive mode. 1763168962Spjd */ 1764168962Spjd if (excl == EXCL) { 1765249195Smm error = SET_ERROR(EEXIST); 1766168962Spjd goto out; 1767168962Spjd } 1768168962Spjd /* 1769168404Spjd * Can't open a directory for writing. 1770168404Spjd */ 1771168404Spjd if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1772249195Smm error = SET_ERROR(EISDIR); 1773168404Spjd goto out; 1774168404Spjd } 1775168404Spjd /* 1776168404Spjd * Verify requested access to file. 1777168404Spjd */ 1778185029Spjd if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1779168404Spjd goto out; 1780168404Spjd } 1781168404Spjd 1782168404Spjd mutex_enter(&dzp->z_lock); 1783168404Spjd dzp->z_seq++; 1784168404Spjd mutex_exit(&dzp->z_lock); 1785168404Spjd 1786168404Spjd /* 1787168404Spjd * Truncate regular files if requested. 1788168404Spjd */ 1789168404Spjd if ((ZTOV(zp)->v_type == VREG) && 1790168404Spjd (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1791185029Spjd /* we can't hold any locks when calling zfs_freesp() */ 1792185029Spjd zfs_dirent_unlock(dl); 1793185029Spjd dl = NULL; 1794168404Spjd error = zfs_freesp(zp, 0, 0, mode, TRUE); 1795185029Spjd if (error == 0) { 1796185029Spjd vnevent_create(ZTOV(zp), ct); 1797168404Spjd } 1798168404Spjd } 1799168404Spjd } 1800168404Spjdout: 1801168404Spjd if (dl) 1802168404Spjd zfs_dirent_unlock(dl); 1803168404Spjd 1804168404Spjd if (error) { 1805168404Spjd if (zp) 1806168404Spjd VN_RELE(ZTOV(zp)); 1807168962Spjd } else { 1808168962Spjd *vpp = ZTOV(zp); 1809211932Smm error = specvp_check(vpp, cr); 1810168404Spjd } 1811168404Spjd 1812219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1813219089Spjd zil_commit(zilog, 0); 1814219089Spjd 1815168404Spjd ZFS_EXIT(zfsvfs); 1816168404Spjd return (error); 1817168404Spjd} 1818168404Spjd 1819168404Spjd/* 1820168404Spjd * Remove an entry from a directory. 1821168404Spjd * 1822168404Spjd * IN: dvp - vnode of directory to remove entry from. 1823168404Spjd * name - name of entry to remove. 1824168404Spjd * cr - credentials of caller. 1825185029Spjd * ct - caller context 1826185029Spjd * flags - case flags 1827168404Spjd * 1828251631Sdelphij * RETURN: 0 on success, error code on failure. 1829168404Spjd * 1830168404Spjd * Timestamps: 1831168404Spjd * dvp - ctime|mtime 1832168404Spjd * vp - ctime (if nlink > 0) 1833168404Spjd */ 1834219089Spjd 1835219089Spjduint64_t null_xattr = 0; 1836219089Spjd 1837185029Spjd/*ARGSUSED*/ 1838168404Spjdstatic int 1839185029Spjdzfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1840185029Spjd int flags) 1841168404Spjd{ 1842168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1843219089Spjd znode_t *xzp; 1844168404Spjd vnode_t *vp; 1845168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1846185029Spjd zilog_t *zilog; 1847168962Spjd uint64_t acl_obj, xattr_obj; 1848219089Spjd uint64_t xattr_obj_unlinked = 0; 1849219089Spjd uint64_t obj = 0; 1850168404Spjd zfs_dirlock_t *dl; 1851168404Spjd dmu_tx_t *tx; 1852168962Spjd boolean_t may_delete_now, delete_now = FALSE; 1853185029Spjd boolean_t unlinked, toobig = FALSE; 1854185029Spjd uint64_t txtype; 1855185029Spjd pathname_t *realnmp = NULL; 1856185029Spjd pathname_t realnm; 1857168404Spjd int error; 1858185029Spjd int zflg = ZEXISTS; 1859168404Spjd 1860168404Spjd ZFS_ENTER(zfsvfs); 1861185029Spjd ZFS_VERIFY_ZP(dzp); 1862185029Spjd zilog = zfsvfs->z_log; 1863168404Spjd 1864185029Spjd if (flags & FIGNORECASE) { 1865185029Spjd zflg |= ZCILOOK; 1866185029Spjd pn_alloc(&realnm); 1867185029Spjd realnmp = &realnm; 1868185029Spjd } 1869185029Spjd 1870168404Spjdtop: 1871219089Spjd xattr_obj = 0; 1872219089Spjd xzp = NULL; 1873168404Spjd /* 1874168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 1875168404Spjd */ 1876185029Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1877185029Spjd NULL, realnmp)) { 1878185029Spjd if (realnmp) 1879185029Spjd pn_free(realnmp); 1880168404Spjd ZFS_EXIT(zfsvfs); 1881168404Spjd return (error); 1882168404Spjd } 1883168404Spjd 1884168404Spjd vp = ZTOV(zp); 1885168404Spjd 1886168962Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1887168404Spjd goto out; 1888168962Spjd } 1889168404Spjd 1890168962Spjd /* 1891168962Spjd * Need to use rmdir for removing directories. 1892168962Spjd */ 1893168962Spjd if (vp->v_type == VDIR) { 1894249195Smm error = SET_ERROR(EPERM); 1895168962Spjd goto out; 1896168962Spjd } 1897168962Spjd 1898185029Spjd vnevent_remove(vp, dvp, name, ct); 1899168962Spjd 1900185029Spjd if (realnmp) 1901185029Spjd dnlc_remove(dvp, realnmp->pn_buf); 1902185029Spjd else 1903185029Spjd dnlc_remove(dvp, name); 1904168404Spjd 1905219089Spjd VI_LOCK(vp); 1906219089Spjd may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); 1907219089Spjd VI_UNLOCK(vp); 1908168962Spjd 1909168404Spjd /* 1910168404Spjd * We may delete the znode now, or we may put it in the unlinked set; 1911168404Spjd * it depends on whether we're the last link, and on whether there are 1912168404Spjd * other holds on the vnode. So we dmu_tx_hold() the right things to 1913168404Spjd * allow for either case. 1914168404Spjd */ 1915219089Spjd obj = zp->z_id; 1916168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1917168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1918219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1919219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1920219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 1921185029Spjd if (may_delete_now) { 1922185029Spjd toobig = 1923219089Spjd zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1924185029Spjd /* if the file is too big, only hold_free a token amount */ 1925185029Spjd dmu_tx_hold_free(tx, zp->z_id, 0, 1926185029Spjd (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1927185029Spjd } 1928168404Spjd 1929168404Spjd /* are there any extended attributes? */ 1930219089Spjd error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1931219089Spjd &xattr_obj, sizeof (xattr_obj)); 1932219089Spjd if (error == 0 && xattr_obj) { 1933219089Spjd error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1934240415Smm ASSERT0(error); 1935219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 1936219089Spjd dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1937168404Spjd } 1938168404Spjd 1939219089Spjd mutex_enter(&zp->z_lock); 1940219089Spjd if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) 1941168962Spjd dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 1942219089Spjd mutex_exit(&zp->z_lock); 1943168962Spjd 1944168404Spjd /* charge as an update -- would be nice not to charge at all */ 1945168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1946168404Spjd 1947209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 1948168404Spjd if (error) { 1949168404Spjd zfs_dirent_unlock(dl); 1950168962Spjd VN_RELE(vp); 1951219089Spjd if (xzp) 1952219089Spjd VN_RELE(ZTOV(xzp)); 1953209962Smm if (error == ERESTART) { 1954168404Spjd dmu_tx_wait(tx); 1955168404Spjd dmu_tx_abort(tx); 1956168404Spjd goto top; 1957168404Spjd } 1958185029Spjd if (realnmp) 1959185029Spjd pn_free(realnmp); 1960168404Spjd dmu_tx_abort(tx); 1961168404Spjd ZFS_EXIT(zfsvfs); 1962168404Spjd return (error); 1963168404Spjd } 1964168404Spjd 1965168404Spjd /* 1966168404Spjd * Remove the directory entry. 1967168404Spjd */ 1968185029Spjd error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1969168404Spjd 1970168404Spjd if (error) { 1971168404Spjd dmu_tx_commit(tx); 1972168404Spjd goto out; 1973168404Spjd } 1974168404Spjd 1975219089Spjd if (unlinked) { 1976219089Spjd 1977219089Spjd /* 1978219089Spjd * Hold z_lock so that we can make sure that the ACL obj 1979219089Spjd * hasn't changed. Could have been deleted due to 1980219089Spjd * zfs_sa_upgrade(). 1981219089Spjd */ 1982219089Spjd mutex_enter(&zp->z_lock); 1983168962Spjd VI_LOCK(vp); 1984219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1985219089Spjd &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); 1986185029Spjd delete_now = may_delete_now && !toobig && 1987168962Spjd vp->v_count == 1 && !vn_has_cached_data(vp) && 1988219089Spjd xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == 1989219089Spjd acl_obj; 1990168962Spjd VI_UNLOCK(vp); 1991168962Spjd } 1992168962Spjd 1993168962Spjd if (delete_now) { 1994243270Savg#ifdef __FreeBSD__ 1995243270Savg panic("zfs_remove: delete_now branch taken"); 1996243270Savg#endif 1997219089Spjd if (xattr_obj_unlinked) { 1998219089Spjd ASSERT3U(xzp->z_links, ==, 2); 1999168962Spjd mutex_enter(&xzp->z_lock); 2000168962Spjd xzp->z_unlinked = 1; 2001219089Spjd xzp->z_links = 0; 2002219089Spjd error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 2003219089Spjd &xzp->z_links, sizeof (xzp->z_links), tx); 2004219089Spjd ASSERT3U(error, ==, 0); 2005168962Spjd mutex_exit(&xzp->z_lock); 2006168962Spjd zfs_unlinked_add(xzp, tx); 2007219089Spjd 2008219089Spjd if (zp->z_is_sa) 2009219089Spjd error = sa_remove(zp->z_sa_hdl, 2010219089Spjd SA_ZPL_XATTR(zfsvfs), tx); 2011219089Spjd else 2012219089Spjd error = sa_update(zp->z_sa_hdl, 2013219089Spjd SA_ZPL_XATTR(zfsvfs), &null_xattr, 2014219089Spjd sizeof (uint64_t), tx); 2015240415Smm ASSERT0(error); 2016168962Spjd } 2017168962Spjd VI_LOCK(vp); 2018168962Spjd vp->v_count--; 2019240415Smm ASSERT0(vp->v_count); 2020168962Spjd VI_UNLOCK(vp); 2021168962Spjd mutex_exit(&zp->z_lock); 2022168962Spjd zfs_znode_delete(zp, tx); 2023168962Spjd } else if (unlinked) { 2024219089Spjd mutex_exit(&zp->z_lock); 2025168404Spjd zfs_unlinked_add(zp, tx); 2026243268Savg#ifdef __FreeBSD__ 2027243268Savg vp->v_vflag |= VV_NOSYNC; 2028243268Savg#endif 2029168962Spjd } 2030168404Spjd 2031185029Spjd txtype = TX_REMOVE; 2032185029Spjd if (flags & FIGNORECASE) 2033185029Spjd txtype |= TX_CI; 2034219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 2035168404Spjd 2036168404Spjd dmu_tx_commit(tx); 2037168404Spjdout: 2038185029Spjd if (realnmp) 2039185029Spjd pn_free(realnmp); 2040185029Spjd 2041168404Spjd zfs_dirent_unlock(dl); 2042168404Spjd 2043219089Spjd if (!delete_now) 2044168962Spjd VN_RELE(vp); 2045219089Spjd if (xzp) 2046168962Spjd VN_RELE(ZTOV(xzp)); 2047168962Spjd 2048219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2049219089Spjd zil_commit(zilog, 0); 2050219089Spjd 2051168404Spjd ZFS_EXIT(zfsvfs); 2052168404Spjd return (error); 2053168404Spjd} 2054168404Spjd 2055168404Spjd/* 2056168404Spjd * Create a new directory and insert it into dvp using the name 2057168404Spjd * provided. Return a pointer to the inserted directory. 2058168404Spjd * 2059168404Spjd * IN: dvp - vnode of directory to add subdir to. 2060168404Spjd * dirname - name of new directory. 2061168404Spjd * vap - attributes of new directory. 2062168404Spjd * cr - credentials of caller. 2063185029Spjd * ct - caller context 2064251631Sdelphij * flags - case flags 2065185029Spjd * vsecp - ACL to be set 2066168404Spjd * 2067168404Spjd * OUT: vpp - vnode of created directory. 2068168404Spjd * 2069251631Sdelphij * RETURN: 0 on success, error code on failure. 2070168404Spjd * 2071168404Spjd * Timestamps: 2072168404Spjd * dvp - ctime|mtime updated 2073168404Spjd * vp - ctime|mtime|atime updated 2074168404Spjd */ 2075185029Spjd/*ARGSUSED*/ 2076168404Spjdstatic int 2077185029Spjdzfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 2078185029Spjd caller_context_t *ct, int flags, vsecattr_t *vsecp) 2079168404Spjd{ 2080168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 2081168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2082185029Spjd zilog_t *zilog; 2083168404Spjd zfs_dirlock_t *dl; 2084185029Spjd uint64_t txtype; 2085168404Spjd dmu_tx_t *tx; 2086168404Spjd int error; 2087185029Spjd int zf = ZNEW; 2088209962Smm ksid_t *ksid; 2089209962Smm uid_t uid; 2090209962Smm gid_t gid = crgetgid(cr); 2091219089Spjd zfs_acl_ids_t acl_ids; 2092209962Smm boolean_t fuid_dirtied; 2093168404Spjd 2094168404Spjd ASSERT(vap->va_type == VDIR); 2095168404Spjd 2096185029Spjd /* 2097185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 2098185029Spjd * make sure file system is at proper version 2099185029Spjd */ 2100185029Spjd 2101209962Smm ksid = crgetsid(cr, KSID_OWNER); 2102209962Smm if (ksid) 2103209962Smm uid = ksid_getid(ksid); 2104209962Smm else 2105209962Smm uid = crgetuid(cr); 2106185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2107219089Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 2108219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2109249195Smm return (SET_ERROR(EINVAL)); 2110185029Spjd 2111168404Spjd ZFS_ENTER(zfsvfs); 2112185029Spjd ZFS_VERIFY_ZP(dzp); 2113185029Spjd zilog = zfsvfs->z_log; 2114168404Spjd 2115219089Spjd if (dzp->z_pflags & ZFS_XATTR) { 2116168404Spjd ZFS_EXIT(zfsvfs); 2117249195Smm return (SET_ERROR(EINVAL)); 2118168404Spjd } 2119168404Spjd 2120185029Spjd if (zfsvfs->z_utf8 && u8_validate(dirname, 2121185029Spjd strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2122185029Spjd ZFS_EXIT(zfsvfs); 2123249195Smm return (SET_ERROR(EILSEQ)); 2124185029Spjd } 2125185029Spjd if (flags & FIGNORECASE) 2126185029Spjd zf |= ZCILOOK; 2127185029Spjd 2128219089Spjd if (vap->va_mask & AT_XVATTR) { 2129197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2130185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 2131185029Spjd ZFS_EXIT(zfsvfs); 2132185029Spjd return (error); 2133185029Spjd } 2134219089Spjd } 2135185029Spjd 2136219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 2137219089Spjd vsecp, &acl_ids)) != 0) { 2138219089Spjd ZFS_EXIT(zfsvfs); 2139219089Spjd return (error); 2140219089Spjd } 2141168404Spjd /* 2142168404Spjd * First make sure the new directory doesn't exist. 2143219089Spjd * 2144219089Spjd * Existence is checked first to make sure we don't return 2145219089Spjd * EACCES instead of EEXIST which can cause some applications 2146219089Spjd * to fail. 2147168404Spjd */ 2148185029Spjdtop: 2149185029Spjd *vpp = NULL; 2150185029Spjd 2151185029Spjd if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 2152185029Spjd NULL, NULL)) { 2153219089Spjd zfs_acl_ids_free(&acl_ids); 2154168404Spjd ZFS_EXIT(zfsvfs); 2155168404Spjd return (error); 2156168404Spjd } 2157168404Spjd 2158185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 2159219089Spjd zfs_acl_ids_free(&acl_ids); 2160168404Spjd zfs_dirent_unlock(dl); 2161168404Spjd ZFS_EXIT(zfsvfs); 2162168404Spjd return (error); 2163168404Spjd } 2164168404Spjd 2165209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2166211932Smm zfs_acl_ids_free(&acl_ids); 2167209962Smm zfs_dirent_unlock(dl); 2168209962Smm ZFS_EXIT(zfsvfs); 2169249195Smm return (SET_ERROR(EDQUOT)); 2170209962Smm } 2171209962Smm 2172168404Spjd /* 2173168404Spjd * Add a new entry to the directory. 2174168404Spjd */ 2175168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2176168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 2177168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2178209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 2179209962Smm if (fuid_dirtied) 2180209962Smm zfs_fuid_txhold(zfsvfs, tx); 2181219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2182219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2183219089Spjd acl_ids.z_aclp->z_acl_bytes); 2184219089Spjd } 2185219089Spjd 2186219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2187219089Spjd ZFS_SA_BASE_ATTR_SIZE); 2188219089Spjd 2189209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 2190168404Spjd if (error) { 2191168404Spjd zfs_dirent_unlock(dl); 2192209962Smm if (error == ERESTART) { 2193168404Spjd dmu_tx_wait(tx); 2194168404Spjd dmu_tx_abort(tx); 2195168404Spjd goto top; 2196168404Spjd } 2197219089Spjd zfs_acl_ids_free(&acl_ids); 2198168404Spjd dmu_tx_abort(tx); 2199168404Spjd ZFS_EXIT(zfsvfs); 2200168404Spjd return (error); 2201168404Spjd } 2202168404Spjd 2203168404Spjd /* 2204168404Spjd * Create new node. 2205168404Spjd */ 2206219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2207168404Spjd 2208209962Smm if (fuid_dirtied) 2209209962Smm zfs_fuid_sync(zfsvfs, tx); 2210219089Spjd 2211168404Spjd /* 2212168404Spjd * Now put new name in parent dir. 2213168404Spjd */ 2214168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 2215168404Spjd 2216168404Spjd *vpp = ZTOV(zp); 2217168404Spjd 2218185029Spjd txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 2219185029Spjd if (flags & FIGNORECASE) 2220185029Spjd txtype |= TX_CI; 2221209962Smm zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 2222209962Smm acl_ids.z_fuidp, vap); 2223185029Spjd 2224209962Smm zfs_acl_ids_free(&acl_ids); 2225219089Spjd 2226168404Spjd dmu_tx_commit(tx); 2227168404Spjd 2228168404Spjd zfs_dirent_unlock(dl); 2229168404Spjd 2230219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2231219089Spjd zil_commit(zilog, 0); 2232219089Spjd 2233168404Spjd ZFS_EXIT(zfsvfs); 2234168404Spjd return (0); 2235168404Spjd} 2236168404Spjd 2237168404Spjd/* 2238168404Spjd * Remove a directory subdir entry. If the current working 2239168404Spjd * directory is the same as the subdir to be removed, the 2240168404Spjd * remove will fail. 2241168404Spjd * 2242168404Spjd * IN: dvp - vnode of directory to remove from. 2243168404Spjd * name - name of directory to be removed. 2244168404Spjd * cwd - vnode of current working directory. 2245168404Spjd * cr - credentials of caller. 2246185029Spjd * ct - caller context 2247185029Spjd * flags - case flags 2248168404Spjd * 2249251631Sdelphij * RETURN: 0 on success, error code on failure. 2250168404Spjd * 2251168404Spjd * Timestamps: 2252168404Spjd * dvp - ctime|mtime updated 2253168404Spjd */ 2254185029Spjd/*ARGSUSED*/ 2255168404Spjdstatic int 2256185029Spjdzfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 2257185029Spjd caller_context_t *ct, int flags) 2258168404Spjd{ 2259168404Spjd znode_t *dzp = VTOZ(dvp); 2260168404Spjd znode_t *zp; 2261168404Spjd vnode_t *vp; 2262168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2263185029Spjd zilog_t *zilog; 2264168404Spjd zfs_dirlock_t *dl; 2265168404Spjd dmu_tx_t *tx; 2266168404Spjd int error; 2267185029Spjd int zflg = ZEXISTS; 2268168404Spjd 2269168962Spjd ZFS_ENTER(zfsvfs); 2270185029Spjd ZFS_VERIFY_ZP(dzp); 2271185029Spjd zilog = zfsvfs->z_log; 2272168404Spjd 2273185029Spjd if (flags & FIGNORECASE) 2274185029Spjd zflg |= ZCILOOK; 2275168404Spjdtop: 2276168404Spjd zp = NULL; 2277168404Spjd 2278168404Spjd /* 2279168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 2280168404Spjd */ 2281185029Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 2282185029Spjd NULL, NULL)) { 2283168404Spjd ZFS_EXIT(zfsvfs); 2284168404Spjd return (error); 2285168404Spjd } 2286168404Spjd 2287168404Spjd vp = ZTOV(zp); 2288168404Spjd 2289168404Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2290168404Spjd goto out; 2291168404Spjd } 2292168404Spjd 2293168962Spjd if (vp->v_type != VDIR) { 2294249195Smm error = SET_ERROR(ENOTDIR); 2295168962Spjd goto out; 2296168962Spjd } 2297168962Spjd 2298168962Spjd if (vp == cwd) { 2299249195Smm error = SET_ERROR(EINVAL); 2300168962Spjd goto out; 2301168962Spjd } 2302168962Spjd 2303185029Spjd vnevent_rmdir(vp, dvp, name, ct); 2304168962Spjd 2305168404Spjd /* 2306168404Spjd * Grab a lock on the directory to make sure that noone is 2307168404Spjd * trying to add (or lookup) entries while we are removing it. 2308168404Spjd */ 2309168404Spjd rw_enter(&zp->z_name_lock, RW_WRITER); 2310168404Spjd 2311168404Spjd /* 2312168404Spjd * Grab a lock on the parent pointer to make sure we play well 2313168404Spjd * with the treewalk and directory rename code. 2314168404Spjd */ 2315168404Spjd rw_enter(&zp->z_parent_lock, RW_WRITER); 2316168404Spjd 2317168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2318168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2319219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2320168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2321219089Spjd zfs_sa_upgrade_txholds(tx, zp); 2322219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 2323209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 2324168404Spjd if (error) { 2325168404Spjd rw_exit(&zp->z_parent_lock); 2326168404Spjd rw_exit(&zp->z_name_lock); 2327168404Spjd zfs_dirent_unlock(dl); 2328168962Spjd VN_RELE(vp); 2329209962Smm if (error == ERESTART) { 2330168404Spjd dmu_tx_wait(tx); 2331168404Spjd dmu_tx_abort(tx); 2332168404Spjd goto top; 2333168404Spjd } 2334168404Spjd dmu_tx_abort(tx); 2335168404Spjd ZFS_EXIT(zfsvfs); 2336168404Spjd return (error); 2337168404Spjd } 2338168404Spjd 2339168404Spjd#ifdef FREEBSD_NAMECACHE 2340168404Spjd cache_purge(dvp); 2341168404Spjd#endif 2342168404Spjd 2343185029Spjd error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 2344168404Spjd 2345185029Spjd if (error == 0) { 2346185029Spjd uint64_t txtype = TX_RMDIR; 2347185029Spjd if (flags & FIGNORECASE) 2348185029Spjd txtype |= TX_CI; 2349219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2350185029Spjd } 2351168404Spjd 2352168404Spjd dmu_tx_commit(tx); 2353168404Spjd 2354168404Spjd rw_exit(&zp->z_parent_lock); 2355168404Spjd rw_exit(&zp->z_name_lock); 2356168404Spjd#ifdef FREEBSD_NAMECACHE 2357168404Spjd cache_purge(vp); 2358168404Spjd#endif 2359168404Spjdout: 2360168404Spjd zfs_dirent_unlock(dl); 2361168404Spjd 2362168962Spjd VN_RELE(vp); 2363168962Spjd 2364219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2365219089Spjd zil_commit(zilog, 0); 2366219089Spjd 2367168404Spjd ZFS_EXIT(zfsvfs); 2368168404Spjd return (error); 2369168404Spjd} 2370168404Spjd 2371168404Spjd/* 2372168404Spjd * Read as many directory entries as will fit into the provided 2373168404Spjd * buffer from the given directory cursor position (specified in 2374251631Sdelphij * the uio structure). 2375168404Spjd * 2376168404Spjd * IN: vp - vnode of directory to read. 2377168404Spjd * uio - structure supplying read location, range info, 2378168404Spjd * and return buffer. 2379168404Spjd * cr - credentials of caller. 2380185029Spjd * ct - caller context 2381185029Spjd * flags - case flags 2382168404Spjd * 2383168404Spjd * OUT: uio - updated offset and range, buffer filled. 2384168404Spjd * eofp - set to true if end-of-file detected. 2385168404Spjd * 2386251631Sdelphij * RETURN: 0 on success, error code on failure. 2387168404Spjd * 2388168404Spjd * Timestamps: 2389168404Spjd * vp - atime updated 2390168404Spjd * 2391168404Spjd * Note that the low 4 bits of the cookie returned by zap is always zero. 2392168404Spjd * This allows us to use the low range for "special" directory entries: 2393168404Spjd * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2394168404Spjd * we use the offset 2 for the '.zfs' directory. 2395168404Spjd */ 2396168404Spjd/* ARGSUSED */ 2397168404Spjdstatic int 2398168962Spjdzfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) 2399168404Spjd{ 2400168404Spjd znode_t *zp = VTOZ(vp); 2401168404Spjd iovec_t *iovp; 2402185029Spjd edirent_t *eodp; 2403168404Spjd dirent64_t *odp; 2404168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2405168404Spjd objset_t *os; 2406168404Spjd caddr_t outbuf; 2407168404Spjd size_t bufsize; 2408168404Spjd zap_cursor_t zc; 2409168404Spjd zap_attribute_t zap; 2410168404Spjd uint_t bytes_wanted; 2411168404Spjd uint64_t offset; /* must be unsigned; checks for < 1 */ 2412219089Spjd uint64_t parent; 2413168404Spjd int local_eof; 2414168404Spjd int outcount; 2415168404Spjd int error; 2416168404Spjd uint8_t prefetch; 2417185029Spjd boolean_t check_sysattrs; 2418168404Spjd uint8_t type; 2419168962Spjd int ncooks; 2420168962Spjd u_long *cooks = NULL; 2421185029Spjd int flags = 0; 2422168404Spjd 2423168404Spjd ZFS_ENTER(zfsvfs); 2424185029Spjd ZFS_VERIFY_ZP(zp); 2425168404Spjd 2426219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 2427219089Spjd &parent, sizeof (parent))) != 0) { 2428219089Spjd ZFS_EXIT(zfsvfs); 2429219089Spjd return (error); 2430219089Spjd } 2431219089Spjd 2432168404Spjd /* 2433168404Spjd * If we are not given an eof variable, 2434168404Spjd * use a local one. 2435168404Spjd */ 2436168404Spjd if (eofp == NULL) 2437168404Spjd eofp = &local_eof; 2438168404Spjd 2439168404Spjd /* 2440168404Spjd * Check for valid iov_len. 2441168404Spjd */ 2442168404Spjd if (uio->uio_iov->iov_len <= 0) { 2443168404Spjd ZFS_EXIT(zfsvfs); 2444249195Smm return (SET_ERROR(EINVAL)); 2445168404Spjd } 2446168404Spjd 2447168404Spjd /* 2448168404Spjd * Quit if directory has been removed (posix) 2449168404Spjd */ 2450168404Spjd if ((*eofp = zp->z_unlinked) != 0) { 2451168404Spjd ZFS_EXIT(zfsvfs); 2452168404Spjd return (0); 2453168404Spjd } 2454168404Spjd 2455168404Spjd error = 0; 2456168404Spjd os = zfsvfs->z_os; 2457168404Spjd offset = uio->uio_loffset; 2458168404Spjd prefetch = zp->z_zn_prefetch; 2459168404Spjd 2460168404Spjd /* 2461168404Spjd * Initialize the iterator cursor. 2462168404Spjd */ 2463168404Spjd if (offset <= 3) { 2464168404Spjd /* 2465168404Spjd * Start iteration from the beginning of the directory. 2466168404Spjd */ 2467168404Spjd zap_cursor_init(&zc, os, zp->z_id); 2468168404Spjd } else { 2469168404Spjd /* 2470168404Spjd * The offset is a serialized cursor. 2471168404Spjd */ 2472168404Spjd zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2473168404Spjd } 2474168404Spjd 2475168404Spjd /* 2476168404Spjd * Get space to change directory entries into fs independent format. 2477168404Spjd */ 2478168404Spjd iovp = uio->uio_iov; 2479168404Spjd bytes_wanted = iovp->iov_len; 2480168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2481168404Spjd bufsize = bytes_wanted; 2482168404Spjd outbuf = kmem_alloc(bufsize, KM_SLEEP); 2483168404Spjd odp = (struct dirent64 *)outbuf; 2484168404Spjd } else { 2485168404Spjd bufsize = bytes_wanted; 2486247187Smm outbuf = NULL; 2487168404Spjd odp = (struct dirent64 *)iovp->iov_base; 2488168404Spjd } 2489185029Spjd eodp = (struct edirent *)odp; 2490168404Spjd 2491169170Spjd if (ncookies != NULL) { 2492168404Spjd /* 2493168404Spjd * Minimum entry size is dirent size and 1 byte for a file name. 2494168404Spjd */ 2495168962Spjd ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2496219404Spjd cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2497219404Spjd *cookies = cooks; 2498168962Spjd *ncookies = ncooks; 2499168404Spjd } 2500185029Spjd /* 2501185029Spjd * If this VFS supports the system attribute view interface; and 2502185029Spjd * we're looking at an extended attribute directory; and we care 2503185029Spjd * about normalization conflicts on this vfs; then we must check 2504185029Spjd * for normalization conflicts with the sysattr name space. 2505185029Spjd */ 2506185029Spjd#ifdef TODO 2507185029Spjd check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2508185029Spjd (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2509185029Spjd (flags & V_RDDIR_ENTFLAGS); 2510185029Spjd#else 2511185029Spjd check_sysattrs = 0; 2512185029Spjd#endif 2513168404Spjd 2514168404Spjd /* 2515168404Spjd * Transform to file-system independent format 2516168404Spjd */ 2517168404Spjd outcount = 0; 2518168404Spjd while (outcount < bytes_wanted) { 2519168404Spjd ino64_t objnum; 2520168404Spjd ushort_t reclen; 2521219089Spjd off64_t *next = NULL; 2522168404Spjd 2523168404Spjd /* 2524168404Spjd * Special case `.', `..', and `.zfs'. 2525168404Spjd */ 2526168404Spjd if (offset == 0) { 2527168404Spjd (void) strcpy(zap.za_name, "."); 2528185029Spjd zap.za_normalization_conflict = 0; 2529168404Spjd objnum = zp->z_id; 2530169108Spjd type = DT_DIR; 2531168404Spjd } else if (offset == 1) { 2532168404Spjd (void) strcpy(zap.za_name, ".."); 2533185029Spjd zap.za_normalization_conflict = 0; 2534219089Spjd objnum = parent; 2535169108Spjd type = DT_DIR; 2536168404Spjd } else if (offset == 2 && zfs_show_ctldir(zp)) { 2537168404Spjd (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2538185029Spjd zap.za_normalization_conflict = 0; 2539168404Spjd objnum = ZFSCTL_INO_ROOT; 2540169108Spjd type = DT_DIR; 2541168404Spjd } else { 2542168404Spjd /* 2543168404Spjd * Grab next entry. 2544168404Spjd */ 2545168404Spjd if (error = zap_cursor_retrieve(&zc, &zap)) { 2546168404Spjd if ((*eofp = (error == ENOENT)) != 0) 2547168404Spjd break; 2548168404Spjd else 2549168404Spjd goto update; 2550168404Spjd } 2551168404Spjd 2552168404Spjd if (zap.za_integer_length != 8 || 2553168404Spjd zap.za_num_integers != 1) { 2554168404Spjd cmn_err(CE_WARN, "zap_readdir: bad directory " 2555168404Spjd "entry, obj = %lld, offset = %lld\n", 2556168404Spjd (u_longlong_t)zp->z_id, 2557168404Spjd (u_longlong_t)offset); 2558249195Smm error = SET_ERROR(ENXIO); 2559168404Spjd goto update; 2560168404Spjd } 2561168404Spjd 2562168404Spjd objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2563168404Spjd /* 2564168404Spjd * MacOS X can extract the object type here such as: 2565168404Spjd * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2566168404Spjd */ 2567168404Spjd type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2568185029Spjd 2569185029Spjd if (check_sysattrs && !zap.za_normalization_conflict) { 2570185029Spjd#ifdef TODO 2571185029Spjd zap.za_normalization_conflict = 2572185029Spjd xattr_sysattr_casechk(zap.za_name); 2573185029Spjd#else 2574185029Spjd panic("%s:%u: TODO", __func__, __LINE__); 2575185029Spjd#endif 2576185029Spjd } 2577168404Spjd } 2578168404Spjd 2579211932Smm if (flags & V_RDDIR_ACCFILTER) { 2580211932Smm /* 2581211932Smm * If we have no access at all, don't include 2582211932Smm * this entry in the returned information 2583211932Smm */ 2584211932Smm znode_t *ezp; 2585211932Smm if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2586211932Smm goto skip_entry; 2587211932Smm if (!zfs_has_access(ezp, cr)) { 2588211932Smm VN_RELE(ZTOV(ezp)); 2589211932Smm goto skip_entry; 2590211932Smm } 2591211932Smm VN_RELE(ZTOV(ezp)); 2592211932Smm } 2593211932Smm 2594185029Spjd if (flags & V_RDDIR_ENTFLAGS) 2595185029Spjd reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2596185029Spjd else 2597185029Spjd reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2598185029Spjd 2599168404Spjd /* 2600168404Spjd * Will this entry fit in the buffer? 2601168404Spjd */ 2602168404Spjd if (outcount + reclen > bufsize) { 2603168404Spjd /* 2604168404Spjd * Did we manage to fit anything in the buffer? 2605168404Spjd */ 2606168404Spjd if (!outcount) { 2607249195Smm error = SET_ERROR(EINVAL); 2608168404Spjd goto update; 2609168404Spjd } 2610168404Spjd break; 2611168404Spjd } 2612185029Spjd if (flags & V_RDDIR_ENTFLAGS) { 2613185029Spjd /* 2614185029Spjd * Add extended flag entry: 2615185029Spjd */ 2616185029Spjd eodp->ed_ino = objnum; 2617185029Spjd eodp->ed_reclen = reclen; 2618185029Spjd /* NOTE: ed_off is the offset for the *next* entry */ 2619185029Spjd next = &(eodp->ed_off); 2620185029Spjd eodp->ed_eflags = zap.za_normalization_conflict ? 2621185029Spjd ED_CASE_CONFLICT : 0; 2622185029Spjd (void) strncpy(eodp->ed_name, zap.za_name, 2623185029Spjd EDIRENT_NAMELEN(reclen)); 2624185029Spjd eodp = (edirent_t *)((intptr_t)eodp + reclen); 2625185029Spjd } else { 2626185029Spjd /* 2627185029Spjd * Add normal entry: 2628185029Spjd */ 2629185029Spjd odp->d_ino = objnum; 2630185029Spjd odp->d_reclen = reclen; 2631185029Spjd odp->d_namlen = strlen(zap.za_name); 2632185029Spjd (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2633185029Spjd odp->d_type = type; 2634185029Spjd odp = (dirent64_t *)((intptr_t)odp + reclen); 2635185029Spjd } 2636168404Spjd outcount += reclen; 2637168404Spjd 2638168404Spjd ASSERT(outcount <= bufsize); 2639168404Spjd 2640168404Spjd /* Prefetch znode */ 2641168404Spjd if (prefetch) 2642168404Spjd dmu_prefetch(os, objnum, 0, 0); 2643168404Spjd 2644211932Smm skip_entry: 2645168404Spjd /* 2646168404Spjd * Move to the next entry, fill in the previous offset. 2647168404Spjd */ 2648168404Spjd if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2649168404Spjd zap_cursor_advance(&zc); 2650168404Spjd offset = zap_cursor_serialize(&zc); 2651168404Spjd } else { 2652168404Spjd offset += 1; 2653168404Spjd } 2654219404Spjd 2655219404Spjd if (cooks != NULL) { 2656219404Spjd *cooks++ = offset; 2657219404Spjd ncooks--; 2658219404Spjd KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2659219404Spjd } 2660168404Spjd } 2661168404Spjd zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2662168404Spjd 2663168404Spjd /* Subtract unused cookies */ 2664168962Spjd if (ncookies != NULL) 2665168962Spjd *ncookies -= ncooks; 2666168404Spjd 2667168404Spjd if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2668168404Spjd iovp->iov_base += outcount; 2669168404Spjd iovp->iov_len -= outcount; 2670168404Spjd uio->uio_resid -= outcount; 2671168404Spjd } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2672168404Spjd /* 2673168404Spjd * Reset the pointer. 2674168404Spjd */ 2675168404Spjd offset = uio->uio_loffset; 2676168404Spjd } 2677168404Spjd 2678168404Spjdupdate: 2679168404Spjd zap_cursor_fini(&zc); 2680168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2681168404Spjd kmem_free(outbuf, bufsize); 2682168404Spjd 2683168404Spjd if (error == ENOENT) 2684168404Spjd error = 0; 2685168404Spjd 2686168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2687168404Spjd 2688168404Spjd uio->uio_loffset = offset; 2689168404Spjd ZFS_EXIT(zfsvfs); 2690169107Spjd if (error != 0 && cookies != NULL) { 2691168962Spjd free(*cookies, M_TEMP); 2692168962Spjd *cookies = NULL; 2693168962Spjd *ncookies = 0; 2694168404Spjd } 2695168404Spjd return (error); 2696168404Spjd} 2697168404Spjd 2698185029Spjdulong_t zfs_fsync_sync_cnt = 4; 2699185029Spjd 2700168404Spjdstatic int 2701185029Spjdzfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2702168404Spjd{ 2703168962Spjd znode_t *zp = VTOZ(vp); 2704168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2705168404Spjd 2706185029Spjd (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2707185029Spjd 2708219089Spjd if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 2709219089Spjd ZFS_ENTER(zfsvfs); 2710219089Spjd ZFS_VERIFY_ZP(zp); 2711219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 2712219089Spjd ZFS_EXIT(zfsvfs); 2713219089Spjd } 2714168404Spjd return (0); 2715168404Spjd} 2716168404Spjd 2717185029Spjd 2718168404Spjd/* 2719168404Spjd * Get the requested file attributes and place them in the provided 2720168404Spjd * vattr structure. 2721168404Spjd * 2722168404Spjd * IN: vp - vnode of file. 2723168404Spjd * vap - va_mask identifies requested attributes. 2724185029Spjd * If AT_XVATTR set, then optional attrs are requested 2725185029Spjd * flags - ATTR_NOACLCHECK (CIFS server context) 2726168404Spjd * cr - credentials of caller. 2727185029Spjd * ct - caller context 2728168404Spjd * 2729168404Spjd * OUT: vap - attribute values. 2730168404Spjd * 2731251631Sdelphij * RETURN: 0 (always succeeds). 2732168404Spjd */ 2733168404Spjd/* ARGSUSED */ 2734168404Spjdstatic int 2735185029Spjdzfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2736185029Spjd caller_context_t *ct) 2737168404Spjd{ 2738168962Spjd znode_t *zp = VTOZ(vp); 2739168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2740185029Spjd int error = 0; 2741168962Spjd uint32_t blksize; 2742168962Spjd u_longlong_t nblocks; 2743185029Spjd uint64_t links; 2744224251Sdelphij uint64_t mtime[2], ctime[2], crtime[2], rdev; 2745185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2746185029Spjd xoptattr_t *xoap = NULL; 2747185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2748224251Sdelphij sa_bulk_attr_t bulk[4]; 2749219089Spjd int count = 0; 2750168404Spjd 2751168404Spjd ZFS_ENTER(zfsvfs); 2752185029Spjd ZFS_VERIFY_ZP(zp); 2753168404Spjd 2754219089Spjd zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2755219089Spjd 2756219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 2757219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 2758243807Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 2759224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2760224251Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 2761224251Sdelphij &rdev, 8); 2762219089Spjd 2763219089Spjd if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 2764219089Spjd ZFS_EXIT(zfsvfs); 2765219089Spjd return (error); 2766219089Spjd } 2767219089Spjd 2768168404Spjd /* 2769185029Spjd * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2770185029Spjd * Also, if we are the owner don't bother, since owner should 2771185029Spjd * always be allowed to read basic attributes of file. 2772185029Spjd */ 2773219089Spjd if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2774219089Spjd (vap->va_uid != crgetuid(cr))) { 2775185029Spjd if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2776185029Spjd skipaclchk, cr)) { 2777185029Spjd ZFS_EXIT(zfsvfs); 2778185029Spjd return (error); 2779185029Spjd } 2780185029Spjd } 2781185029Spjd 2782185029Spjd /* 2783168404Spjd * Return all attributes. It's cheaper to provide the answer 2784168404Spjd * than to determine whether we were asked the question. 2785168404Spjd */ 2786168404Spjd 2787209097Smm mutex_enter(&zp->z_lock); 2788219089Spjd vap->va_type = IFTOVT(zp->z_mode); 2789219089Spjd vap->va_mode = zp->z_mode & ~S_IFMT; 2790224252Sdelphij#ifdef sun 2791224252Sdelphij vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2792224252Sdelphij#else 2793224252Sdelphij vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 2794224252Sdelphij#endif 2795168404Spjd vap->va_nodeid = zp->z_id; 2796185029Spjd if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2797219089Spjd links = zp->z_links + 1; 2798185029Spjd else 2799219089Spjd links = zp->z_links; 2800229425Sdim vap->va_nlink = MIN(links, LINK_MAX); /* nlink_t limit! */ 2801219089Spjd vap->va_size = zp->z_size; 2802224252Sdelphij#ifdef sun 2803224252Sdelphij vap->va_rdev = vp->v_rdev; 2804224252Sdelphij#else 2805224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2806224251Sdelphij vap->va_rdev = zfs_cmpldev(rdev); 2807224252Sdelphij#endif 2808168404Spjd vap->va_seq = zp->z_seq; 2809168404Spjd vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2810168404Spjd 2811185029Spjd /* 2812185029Spjd * Add in any requested optional attributes and the create time. 2813185029Spjd * Also set the corresponding bits in the returned attribute bitmap. 2814185029Spjd */ 2815185029Spjd if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2816185029Spjd if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2817185029Spjd xoap->xoa_archive = 2818219089Spjd ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2819185029Spjd XVA_SET_RTN(xvap, XAT_ARCHIVE); 2820185029Spjd } 2821185029Spjd 2822185029Spjd if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2823185029Spjd xoap->xoa_readonly = 2824219089Spjd ((zp->z_pflags & ZFS_READONLY) != 0); 2825185029Spjd XVA_SET_RTN(xvap, XAT_READONLY); 2826185029Spjd } 2827185029Spjd 2828185029Spjd if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2829185029Spjd xoap->xoa_system = 2830219089Spjd ((zp->z_pflags & ZFS_SYSTEM) != 0); 2831185029Spjd XVA_SET_RTN(xvap, XAT_SYSTEM); 2832185029Spjd } 2833185029Spjd 2834185029Spjd if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2835185029Spjd xoap->xoa_hidden = 2836219089Spjd ((zp->z_pflags & ZFS_HIDDEN) != 0); 2837185029Spjd XVA_SET_RTN(xvap, XAT_HIDDEN); 2838185029Spjd } 2839185029Spjd 2840185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2841185029Spjd xoap->xoa_nounlink = 2842219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2843185029Spjd XVA_SET_RTN(xvap, XAT_NOUNLINK); 2844185029Spjd } 2845185029Spjd 2846185029Spjd if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2847185029Spjd xoap->xoa_immutable = 2848219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2849185029Spjd XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2850185029Spjd } 2851185029Spjd 2852185029Spjd if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2853185029Spjd xoap->xoa_appendonly = 2854219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2855185029Spjd XVA_SET_RTN(xvap, XAT_APPENDONLY); 2856185029Spjd } 2857185029Spjd 2858185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2859185029Spjd xoap->xoa_nodump = 2860219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0); 2861185029Spjd XVA_SET_RTN(xvap, XAT_NODUMP); 2862185029Spjd } 2863185029Spjd 2864185029Spjd if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2865185029Spjd xoap->xoa_opaque = 2866219089Spjd ((zp->z_pflags & ZFS_OPAQUE) != 0); 2867185029Spjd XVA_SET_RTN(xvap, XAT_OPAQUE); 2868185029Spjd } 2869185029Spjd 2870185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2871185029Spjd xoap->xoa_av_quarantined = 2872219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2873185029Spjd XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2874185029Spjd } 2875185029Spjd 2876185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2877185029Spjd xoap->xoa_av_modified = 2878219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2879185029Spjd XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2880185029Spjd } 2881185029Spjd 2882185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2883219089Spjd vp->v_type == VREG) { 2884219089Spjd zfs_sa_get_scanstamp(zp, xvap); 2885185029Spjd } 2886185029Spjd 2887185029Spjd if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 2888219089Spjd uint64_t times[2]; 2889219089Spjd 2890219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 2891219089Spjd times, sizeof (times)); 2892219089Spjd ZFS_TIME_DECODE(&xoap->xoa_createtime, times); 2893185029Spjd XVA_SET_RTN(xvap, XAT_CREATETIME); 2894185029Spjd } 2895219089Spjd 2896219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2897219089Spjd xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 2898219089Spjd XVA_SET_RTN(xvap, XAT_REPARSE); 2899219089Spjd } 2900219089Spjd if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 2901219089Spjd xoap->xoa_generation = zp->z_gen; 2902219089Spjd XVA_SET_RTN(xvap, XAT_GEN); 2903219089Spjd } 2904219089Spjd 2905219089Spjd if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2906219089Spjd xoap->xoa_offline = 2907219089Spjd ((zp->z_pflags & ZFS_OFFLINE) != 0); 2908219089Spjd XVA_SET_RTN(xvap, XAT_OFFLINE); 2909219089Spjd } 2910219089Spjd 2911219089Spjd if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2912219089Spjd xoap->xoa_sparse = 2913219089Spjd ((zp->z_pflags & ZFS_SPARSE) != 0); 2914219089Spjd XVA_SET_RTN(xvap, XAT_SPARSE); 2915219089Spjd } 2916185029Spjd } 2917185029Spjd 2918219089Spjd ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 2919219089Spjd ZFS_TIME_DECODE(&vap->va_mtime, mtime); 2920219089Spjd ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2921219089Spjd ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 2922168404Spjd 2923168404Spjd mutex_exit(&zp->z_lock); 2924168404Spjd 2925219089Spjd sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 2926168404Spjd vap->va_blksize = blksize; 2927168404Spjd vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2928168404Spjd 2929168404Spjd if (zp->z_blksz == 0) { 2930168404Spjd /* 2931168404Spjd * Block size hasn't been set; suggest maximal I/O transfers. 2932168404Spjd */ 2933168404Spjd vap->va_blksize = zfsvfs->z_max_blksz; 2934168404Spjd } 2935168404Spjd 2936168404Spjd ZFS_EXIT(zfsvfs); 2937168404Spjd return (0); 2938168404Spjd} 2939168404Spjd 2940168404Spjd/* 2941168404Spjd * Set the file attributes to the values contained in the 2942168404Spjd * vattr structure. 2943168404Spjd * 2944168404Spjd * IN: vp - vnode of file to be modified. 2945168404Spjd * vap - new attribute values. 2946185029Spjd * If AT_XVATTR set, then optional attrs are being set 2947168404Spjd * flags - ATTR_UTIME set if non-default time values provided. 2948185029Spjd * - ATTR_NOACLCHECK (CIFS context only). 2949168404Spjd * cr - credentials of caller. 2950185029Spjd * ct - caller context 2951168404Spjd * 2952251631Sdelphij * RETURN: 0 on success, error code on failure. 2953168404Spjd * 2954168404Spjd * Timestamps: 2955168404Spjd * vp - ctime updated, mtime updated if size changed. 2956168404Spjd */ 2957168404Spjd/* ARGSUSED */ 2958168404Spjdstatic int 2959168962Spjdzfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2960251631Sdelphij caller_context_t *ct) 2961168404Spjd{ 2962185029Spjd znode_t *zp = VTOZ(vp); 2963168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2964185029Spjd zilog_t *zilog; 2965168404Spjd dmu_tx_t *tx; 2966168404Spjd vattr_t oldva; 2967209962Smm xvattr_t tmpxvattr; 2968168962Spjd uint_t mask = vap->va_mask; 2969247187Smm uint_t saved_mask = 0; 2970197831Spjd uint64_t saved_mode; 2971168404Spjd int trim_mask = 0; 2972168404Spjd uint64_t new_mode; 2973209962Smm uint64_t new_uid, new_gid; 2974219089Spjd uint64_t xattr_obj; 2975219089Spjd uint64_t mtime[2], ctime[2]; 2976168404Spjd znode_t *attrzp; 2977168404Spjd int need_policy = FALSE; 2978219089Spjd int err, err2; 2979185029Spjd zfs_fuid_info_t *fuidp = NULL; 2980185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2981185029Spjd xoptattr_t *xoap; 2982219089Spjd zfs_acl_t *aclp; 2983185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2984219089Spjd boolean_t fuid_dirtied = B_FALSE; 2985219089Spjd sa_bulk_attr_t bulk[7], xattr_bulk[7]; 2986219089Spjd int count = 0, xattr_count = 0; 2987168404Spjd 2988168404Spjd if (mask == 0) 2989168404Spjd return (0); 2990168404Spjd 2991168962Spjd if (mask & AT_NOSET) 2992249195Smm return (SET_ERROR(EINVAL)); 2993168962Spjd 2994185029Spjd ZFS_ENTER(zfsvfs); 2995185029Spjd ZFS_VERIFY_ZP(zp); 2996185029Spjd 2997185029Spjd zilog = zfsvfs->z_log; 2998185029Spjd 2999185029Spjd /* 3000185029Spjd * Make sure that if we have ephemeral uid/gid or xvattr specified 3001185029Spjd * that file system is at proper version level 3002185029Spjd */ 3003185029Spjd 3004185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 3005185029Spjd (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 3006185029Spjd ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 3007185029Spjd (mask & AT_XVATTR))) { 3008185029Spjd ZFS_EXIT(zfsvfs); 3009249195Smm return (SET_ERROR(EINVAL)); 3010185029Spjd } 3011185029Spjd 3012185029Spjd if (mask & AT_SIZE && vp->v_type == VDIR) { 3013185029Spjd ZFS_EXIT(zfsvfs); 3014249195Smm return (SET_ERROR(EISDIR)); 3015185029Spjd } 3016168404Spjd 3017185029Spjd if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 3018185029Spjd ZFS_EXIT(zfsvfs); 3019249195Smm return (SET_ERROR(EINVAL)); 3020185029Spjd } 3021168404Spjd 3022185029Spjd /* 3023185029Spjd * If this is an xvattr_t, then get a pointer to the structure of 3024185029Spjd * optional attributes. If this is NULL, then we have a vattr_t. 3025185029Spjd */ 3026185029Spjd xoap = xva_getxoptattr(xvap); 3027168404Spjd 3028209962Smm xva_init(&tmpxvattr); 3029209962Smm 3030185029Spjd /* 3031185029Spjd * Immutable files can only alter immutable bit and atime 3032185029Spjd */ 3033219089Spjd if ((zp->z_pflags & ZFS_IMMUTABLE) && 3034185029Spjd ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 3035185029Spjd ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 3036185029Spjd ZFS_EXIT(zfsvfs); 3037249195Smm return (SET_ERROR(EPERM)); 3038185029Spjd } 3039185029Spjd 3040219089Spjd if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { 3041185029Spjd ZFS_EXIT(zfsvfs); 3042249195Smm return (SET_ERROR(EPERM)); 3043185029Spjd } 3044185029Spjd 3045185029Spjd /* 3046185029Spjd * Verify timestamps doesn't overflow 32 bits. 3047185029Spjd * ZFS can handle large timestamps, but 32bit syscalls can't 3048185029Spjd * handle times greater than 2039. This check should be removed 3049185029Spjd * once large timestamps are fully supported. 3050185029Spjd */ 3051185029Spjd if (mask & (AT_ATIME | AT_MTIME)) { 3052185029Spjd if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 3053185029Spjd ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 3054185029Spjd ZFS_EXIT(zfsvfs); 3055249195Smm return (SET_ERROR(EOVERFLOW)); 3056185029Spjd } 3057185029Spjd } 3058185029Spjd 3059168404Spjdtop: 3060168404Spjd attrzp = NULL; 3061219089Spjd aclp = NULL; 3062168404Spjd 3063211932Smm /* Can this be moved to before the top label? */ 3064168404Spjd if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 3065168404Spjd ZFS_EXIT(zfsvfs); 3066249195Smm return (SET_ERROR(EROFS)); 3067168404Spjd } 3068168404Spjd 3069168404Spjd /* 3070168404Spjd * First validate permissions 3071168404Spjd */ 3072168404Spjd 3073168404Spjd if (mask & AT_SIZE) { 3074168404Spjd /* 3075168404Spjd * XXX - Note, we are not providing any open 3076168404Spjd * mode flags here (like FNDELAY), so we may 3077168404Spjd * block if there are locks present... this 3078168404Spjd * should be addressed in openat(). 3079168404Spjd */ 3080185029Spjd /* XXX - would it be OK to generate a log record here? */ 3081185029Spjd err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 3082168404Spjd if (err) { 3083168404Spjd ZFS_EXIT(zfsvfs); 3084168404Spjd return (err); 3085168404Spjd } 3086168404Spjd } 3087168404Spjd 3088185029Spjd if (mask & (AT_ATIME|AT_MTIME) || 3089185029Spjd ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 3090185029Spjd XVA_ISSET_REQ(xvap, XAT_READONLY) || 3091185029Spjd XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 3092219089Spjd XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 3093219089Spjd XVA_ISSET_REQ(xvap, XAT_SPARSE) || 3094185029Spjd XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 3095219089Spjd XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 3096185029Spjd need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 3097185029Spjd skipaclchk, cr); 3098219089Spjd } 3099168404Spjd 3100168404Spjd if (mask & (AT_UID|AT_GID)) { 3101168404Spjd int idmask = (mask & (AT_UID|AT_GID)); 3102168404Spjd int take_owner; 3103168404Spjd int take_group; 3104168404Spjd 3105168404Spjd /* 3106168404Spjd * NOTE: even if a new mode is being set, 3107168404Spjd * we may clear S_ISUID/S_ISGID bits. 3108168404Spjd */ 3109168404Spjd 3110168404Spjd if (!(mask & AT_MODE)) 3111219089Spjd vap->va_mode = zp->z_mode; 3112168404Spjd 3113168404Spjd /* 3114168404Spjd * Take ownership or chgrp to group we are a member of 3115168404Spjd */ 3116168404Spjd 3117168404Spjd take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 3118185029Spjd take_group = (mask & AT_GID) && 3119185029Spjd zfs_groupmember(zfsvfs, vap->va_gid, cr); 3120168404Spjd 3121168404Spjd /* 3122168404Spjd * If both AT_UID and AT_GID are set then take_owner and 3123168404Spjd * take_group must both be set in order to allow taking 3124168404Spjd * ownership. 3125168404Spjd * 3126168404Spjd * Otherwise, send the check through secpolicy_vnode_setattr() 3127168404Spjd * 3128168404Spjd */ 3129168404Spjd 3130168404Spjd if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 3131168404Spjd ((idmask == AT_UID) && take_owner) || 3132168404Spjd ((idmask == AT_GID) && take_group)) { 3133185029Spjd if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 3134185029Spjd skipaclchk, cr) == 0) { 3135168404Spjd /* 3136168404Spjd * Remove setuid/setgid for non-privileged users 3137168404Spjd */ 3138185029Spjd secpolicy_setid_clear(vap, vp, cr); 3139168404Spjd trim_mask = (mask & (AT_UID|AT_GID)); 3140168404Spjd } else { 3141168404Spjd need_policy = TRUE; 3142168404Spjd } 3143168404Spjd } else { 3144168404Spjd need_policy = TRUE; 3145168404Spjd } 3146168404Spjd } 3147168404Spjd 3148168404Spjd mutex_enter(&zp->z_lock); 3149219089Spjd oldva.va_mode = zp->z_mode; 3150185029Spjd zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 3151185029Spjd if (mask & AT_XVATTR) { 3152209962Smm /* 3153209962Smm * Update xvattr mask to include only those attributes 3154209962Smm * that are actually changing. 3155209962Smm * 3156209962Smm * the bits will be restored prior to actually setting 3157209962Smm * the attributes so the caller thinks they were set. 3158209962Smm */ 3159209962Smm if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3160209962Smm if (xoap->xoa_appendonly != 3161219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 3162209962Smm need_policy = TRUE; 3163209962Smm } else { 3164209962Smm XVA_CLR_REQ(xvap, XAT_APPENDONLY); 3165209962Smm XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 3166209962Smm } 3167209962Smm } 3168209962Smm 3169209962Smm if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3170209962Smm if (xoap->xoa_nounlink != 3171219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 3172209962Smm need_policy = TRUE; 3173209962Smm } else { 3174209962Smm XVA_CLR_REQ(xvap, XAT_NOUNLINK); 3175209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 3176209962Smm } 3177209962Smm } 3178209962Smm 3179209962Smm if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3180209962Smm if (xoap->xoa_immutable != 3181219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 3182209962Smm need_policy = TRUE; 3183209962Smm } else { 3184209962Smm XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 3185209962Smm XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 3186209962Smm } 3187209962Smm } 3188209962Smm 3189209962Smm if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3190209962Smm if (xoap->xoa_nodump != 3191219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0)) { 3192209962Smm need_policy = TRUE; 3193209962Smm } else { 3194209962Smm XVA_CLR_REQ(xvap, XAT_NODUMP); 3195209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 3196209962Smm } 3197209962Smm } 3198209962Smm 3199209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3200209962Smm if (xoap->xoa_av_modified != 3201219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 3202209962Smm need_policy = TRUE; 3203209962Smm } else { 3204209962Smm XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 3205209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 3206209962Smm } 3207209962Smm } 3208209962Smm 3209209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3210209962Smm if ((vp->v_type != VREG && 3211209962Smm xoap->xoa_av_quarantined) || 3212209962Smm xoap->xoa_av_quarantined != 3213219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 3214209962Smm need_policy = TRUE; 3215209962Smm } else { 3216209962Smm XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 3217209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 3218209962Smm } 3219209962Smm } 3220209962Smm 3221219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3222219089Spjd mutex_exit(&zp->z_lock); 3223219089Spjd ZFS_EXIT(zfsvfs); 3224249195Smm return (SET_ERROR(EPERM)); 3225219089Spjd } 3226219089Spjd 3227209962Smm if (need_policy == FALSE && 3228209962Smm (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3229209962Smm XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3230185029Spjd need_policy = TRUE; 3231185029Spjd } 3232185029Spjd } 3233185029Spjd 3234168404Spjd mutex_exit(&zp->z_lock); 3235168404Spjd 3236168404Spjd if (mask & AT_MODE) { 3237185029Spjd if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3238168962Spjd err = secpolicy_setid_setsticky_clear(vp, vap, 3239168962Spjd &oldva, cr); 3240168962Spjd if (err) { 3241168962Spjd ZFS_EXIT(zfsvfs); 3242168962Spjd return (err); 3243168962Spjd } 3244168404Spjd trim_mask |= AT_MODE; 3245168404Spjd } else { 3246168404Spjd need_policy = TRUE; 3247168404Spjd } 3248168404Spjd } 3249168404Spjd 3250168404Spjd if (need_policy) { 3251168404Spjd /* 3252168404Spjd * If trim_mask is set then take ownership 3253168404Spjd * has been granted or write_acl is present and user 3254168404Spjd * has the ability to modify mode. In that case remove 3255168404Spjd * UID|GID and or MODE from mask so that 3256168404Spjd * secpolicy_vnode_setattr() doesn't revoke it. 3257168404Spjd */ 3258168404Spjd 3259168404Spjd if (trim_mask) { 3260168404Spjd saved_mask = vap->va_mask; 3261168404Spjd vap->va_mask &= ~trim_mask; 3262197831Spjd if (trim_mask & AT_MODE) { 3263197831Spjd /* 3264197831Spjd * Save the mode, as secpolicy_vnode_setattr() 3265197831Spjd * will overwrite it with ova.va_mode. 3266197831Spjd */ 3267197831Spjd saved_mode = vap->va_mode; 3268197831Spjd } 3269168404Spjd } 3270168404Spjd err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3271185029Spjd (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3272168404Spjd if (err) { 3273168404Spjd ZFS_EXIT(zfsvfs); 3274168404Spjd return (err); 3275168404Spjd } 3276168404Spjd 3277197831Spjd if (trim_mask) { 3278168404Spjd vap->va_mask |= saved_mask; 3279197831Spjd if (trim_mask & AT_MODE) { 3280197831Spjd /* 3281197831Spjd * Recover the mode after 3282197831Spjd * secpolicy_vnode_setattr(). 3283197831Spjd */ 3284197831Spjd vap->va_mode = saved_mode; 3285197831Spjd } 3286197831Spjd } 3287168404Spjd } 3288168404Spjd 3289168404Spjd /* 3290168404Spjd * secpolicy_vnode_setattr, or take ownership may have 3291168404Spjd * changed va_mask 3292168404Spjd */ 3293168404Spjd mask = vap->va_mask; 3294168404Spjd 3295219089Spjd if ((mask & (AT_UID | AT_GID))) { 3296219089Spjd err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 3297219089Spjd &xattr_obj, sizeof (xattr_obj)); 3298168404Spjd 3299219089Spjd if (err == 0 && xattr_obj) { 3300219089Spjd err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 3301209962Smm if (err) 3302219089Spjd goto out2; 3303168404Spjd } 3304209962Smm if (mask & AT_UID) { 3305209962Smm new_uid = zfs_fuid_create(zfsvfs, 3306209962Smm (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3307219089Spjd if (new_uid != zp->z_uid && 3308219089Spjd zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 3309219089Spjd if (attrzp) 3310219089Spjd VN_RELE(ZTOV(attrzp)); 3311249195Smm err = SET_ERROR(EDQUOT); 3312219089Spjd goto out2; 3313209962Smm } 3314209962Smm } 3315209962Smm 3316209962Smm if (mask & AT_GID) { 3317209962Smm new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3318209962Smm cr, ZFS_GROUP, &fuidp); 3319219089Spjd if (new_gid != zp->z_gid && 3320219089Spjd zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 3321219089Spjd if (attrzp) 3322219089Spjd VN_RELE(ZTOV(attrzp)); 3323249195Smm err = SET_ERROR(EDQUOT); 3324219089Spjd goto out2; 3325209962Smm } 3326209962Smm } 3327219089Spjd } 3328219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 3329219089Spjd 3330219089Spjd if (mask & AT_MODE) { 3331219089Spjd uint64_t pmode = zp->z_mode; 3332219089Spjd uint64_t acl_obj; 3333219089Spjd new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3334219089Spjd 3335243560Smm if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 3336243560Smm !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3337249195Smm err = SET_ERROR(EPERM); 3338243560Smm goto out; 3339243560Smm } 3340243560Smm 3341224174Smm if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3342224174Smm goto out; 3343219089Spjd 3344219089Spjd mutex_enter(&zp->z_lock); 3345219089Spjd if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 3346219089Spjd /* 3347219089Spjd * Are we upgrading ACL from old V0 format 3348219089Spjd * to V1 format? 3349219089Spjd */ 3350219089Spjd if (zfsvfs->z_version >= ZPL_VERSION_FUID && 3351219089Spjd zfs_znode_acl_version(zp) == 3352219089Spjd ZFS_ACL_VERSION_INITIAL) { 3353219089Spjd dmu_tx_hold_free(tx, acl_obj, 0, 3354219089Spjd DMU_OBJECT_END); 3355219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3356219089Spjd 0, aclp->z_acl_bytes); 3357209962Smm } else { 3358219089Spjd dmu_tx_hold_write(tx, acl_obj, 0, 3359219089Spjd aclp->z_acl_bytes); 3360209962Smm } 3361219089Spjd } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3362219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3363219089Spjd 0, aclp->z_acl_bytes); 3364209962Smm } 3365219089Spjd mutex_exit(&zp->z_lock); 3366219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3367219089Spjd } else { 3368219089Spjd if ((mask & AT_XVATTR) && 3369219089Spjd XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3370219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3371219089Spjd else 3372219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3373168404Spjd } 3374168404Spjd 3375219089Spjd if (attrzp) { 3376219089Spjd dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3377219089Spjd } 3378219089Spjd 3379219089Spjd fuid_dirtied = zfsvfs->z_fuid_dirty; 3380219089Spjd if (fuid_dirtied) 3381219089Spjd zfs_fuid_txhold(zfsvfs, tx); 3382219089Spjd 3383219089Spjd zfs_sa_upgrade_txholds(tx, zp); 3384219089Spjd 3385209962Smm err = dmu_tx_assign(tx, TXG_NOWAIT); 3386168404Spjd if (err) { 3387209962Smm if (err == ERESTART) 3388168404Spjd dmu_tx_wait(tx); 3389209962Smm goto out; 3390168404Spjd } 3391168404Spjd 3392219089Spjd count = 0; 3393168404Spjd /* 3394168404Spjd * Set each attribute requested. 3395168404Spjd * We group settings according to the locks they need to acquire. 3396168404Spjd * 3397168404Spjd * Note: you cannot set ctime directly, although it will be 3398168404Spjd * updated as a side-effect of calling this function. 3399168404Spjd */ 3400168404Spjd 3401219089Spjd 3402219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3403219089Spjd mutex_enter(&zp->z_acl_lock); 3404168404Spjd mutex_enter(&zp->z_lock); 3405168404Spjd 3406219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3407219089Spjd &zp->z_pflags, sizeof (zp->z_pflags)); 3408219089Spjd 3409219089Spjd if (attrzp) { 3410219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3411219089Spjd mutex_enter(&attrzp->z_acl_lock); 3412219089Spjd mutex_enter(&attrzp->z_lock); 3413219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3414219089Spjd SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3415219089Spjd sizeof (attrzp->z_pflags)); 3416219089Spjd } 3417219089Spjd 3418219089Spjd if (mask & (AT_UID|AT_GID)) { 3419219089Spjd 3420219089Spjd if (mask & AT_UID) { 3421219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 3422219089Spjd &new_uid, sizeof (new_uid)); 3423219089Spjd zp->z_uid = new_uid; 3424219089Spjd if (attrzp) { 3425219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3426219089Spjd SA_ZPL_UID(zfsvfs), NULL, &new_uid, 3427219089Spjd sizeof (new_uid)); 3428219089Spjd attrzp->z_uid = new_uid; 3429219089Spjd } 3430219089Spjd } 3431219089Spjd 3432219089Spjd if (mask & AT_GID) { 3433219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 3434219089Spjd NULL, &new_gid, sizeof (new_gid)); 3435219089Spjd zp->z_gid = new_gid; 3436219089Spjd if (attrzp) { 3437219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3438219089Spjd SA_ZPL_GID(zfsvfs), NULL, &new_gid, 3439219089Spjd sizeof (new_gid)); 3440219089Spjd attrzp->z_gid = new_gid; 3441219089Spjd } 3442219089Spjd } 3443219089Spjd if (!(mask & AT_MODE)) { 3444219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 3445219089Spjd NULL, &new_mode, sizeof (new_mode)); 3446219089Spjd new_mode = zp->z_mode; 3447219089Spjd } 3448219089Spjd err = zfs_acl_chown_setattr(zp); 3449219089Spjd ASSERT(err == 0); 3450219089Spjd if (attrzp) { 3451219089Spjd err = zfs_acl_chown_setattr(attrzp); 3452219089Spjd ASSERT(err == 0); 3453219089Spjd } 3454219089Spjd } 3455219089Spjd 3456168404Spjd if (mask & AT_MODE) { 3457219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 3458219089Spjd &new_mode, sizeof (new_mode)); 3459219089Spjd zp->z_mode = new_mode; 3460219089Spjd ASSERT3U((uintptr_t)aclp, !=, 0); 3461209962Smm err = zfs_aclset_common(zp, aclp, cr, tx); 3462240415Smm ASSERT0(err); 3463219089Spjd if (zp->z_acl_cached) 3464219089Spjd zfs_acl_free(zp->z_acl_cached); 3465211932Smm zp->z_acl_cached = aclp; 3466211932Smm aclp = NULL; 3467168404Spjd } 3468168404Spjd 3469168404Spjd 3470219089Spjd if (mask & AT_ATIME) { 3471219089Spjd ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 3472219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 3473219089Spjd &zp->z_atime, sizeof (zp->z_atime)); 3474168404Spjd } 3475168404Spjd 3476219089Spjd if (mask & AT_MTIME) { 3477219089Spjd ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 3478219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 3479219089Spjd mtime, sizeof (mtime)); 3480168404Spjd } 3481168404Spjd 3482185029Spjd /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3483219089Spjd if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3484219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3485219089Spjd NULL, mtime, sizeof (mtime)); 3486219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3487219089Spjd &ctime, sizeof (ctime)); 3488219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 3489219089Spjd B_TRUE); 3490219089Spjd } else if (mask != 0) { 3491219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3492219089Spjd &ctime, sizeof (ctime)); 3493219089Spjd zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 3494219089Spjd B_TRUE); 3495219089Spjd if (attrzp) { 3496219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3497219089Spjd SA_ZPL_CTIME(zfsvfs), NULL, 3498219089Spjd &ctime, sizeof (ctime)); 3499219089Spjd zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 3500219089Spjd mtime, ctime, B_TRUE); 3501219089Spjd } 3502219089Spjd } 3503185029Spjd /* 3504185029Spjd * Do this after setting timestamps to prevent timestamp 3505185029Spjd * update from toggling bit 3506185029Spjd */ 3507168404Spjd 3508185029Spjd if (xoap && (mask & AT_XVATTR)) { 3509209962Smm 3510209962Smm /* 3511209962Smm * restore trimmed off masks 3512209962Smm * so that return masks can be set for caller. 3513209962Smm */ 3514209962Smm 3515209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3516209962Smm XVA_SET_REQ(xvap, XAT_APPENDONLY); 3517209962Smm } 3518209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3519209962Smm XVA_SET_REQ(xvap, XAT_NOUNLINK); 3520209962Smm } 3521209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3522209962Smm XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3523209962Smm } 3524209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3525209962Smm XVA_SET_REQ(xvap, XAT_NODUMP); 3526209962Smm } 3527209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3528209962Smm XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3529209962Smm } 3530209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3531209962Smm XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3532209962Smm } 3533209962Smm 3534219089Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3535185029Spjd ASSERT(vp->v_type == VREG); 3536185029Spjd 3537219089Spjd zfs_xvattr_set(zp, xvap, tx); 3538185029Spjd } 3539185029Spjd 3540209962Smm if (fuid_dirtied) 3541209962Smm zfs_fuid_sync(zfsvfs, tx); 3542209962Smm 3543168404Spjd if (mask != 0) 3544185029Spjd zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3545168404Spjd 3546168404Spjd mutex_exit(&zp->z_lock); 3547219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3548219089Spjd mutex_exit(&zp->z_acl_lock); 3549168404Spjd 3550219089Spjd if (attrzp) { 3551219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3552219089Spjd mutex_exit(&attrzp->z_acl_lock); 3553219089Spjd mutex_exit(&attrzp->z_lock); 3554219089Spjd } 3555209962Smmout: 3556219089Spjd if (err == 0 && attrzp) { 3557219089Spjd err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 3558219089Spjd xattr_count, tx); 3559219089Spjd ASSERT(err2 == 0); 3560219089Spjd } 3561219089Spjd 3562168404Spjd if (attrzp) 3563168404Spjd VN_RELE(ZTOV(attrzp)); 3564251631Sdelphij 3565211932Smm if (aclp) 3566209962Smm zfs_acl_free(aclp); 3567168404Spjd 3568209962Smm if (fuidp) { 3569209962Smm zfs_fuid_info_free(fuidp); 3570209962Smm fuidp = NULL; 3571209962Smm } 3572209962Smm 3573219089Spjd if (err) { 3574209962Smm dmu_tx_abort(tx); 3575219089Spjd if (err == ERESTART) 3576219089Spjd goto top; 3577219089Spjd } else { 3578219089Spjd err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 3579209962Smm dmu_tx_commit(tx); 3580219089Spjd } 3581209962Smm 3582219089Spjdout2: 3583219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3584219089Spjd zil_commit(zilog, 0); 3585209962Smm 3586168404Spjd ZFS_EXIT(zfsvfs); 3587168404Spjd return (err); 3588168404Spjd} 3589168404Spjd 3590168404Spjdtypedef struct zfs_zlock { 3591168404Spjd krwlock_t *zl_rwlock; /* lock we acquired */ 3592168404Spjd znode_t *zl_znode; /* znode we held */ 3593168404Spjd struct zfs_zlock *zl_next; /* next in list */ 3594168404Spjd} zfs_zlock_t; 3595168404Spjd 3596168404Spjd/* 3597168404Spjd * Drop locks and release vnodes that were held by zfs_rename_lock(). 3598168404Spjd */ 3599168404Spjdstatic void 3600168404Spjdzfs_rename_unlock(zfs_zlock_t **zlpp) 3601168404Spjd{ 3602168404Spjd zfs_zlock_t *zl; 3603168404Spjd 3604168404Spjd while ((zl = *zlpp) != NULL) { 3605168404Spjd if (zl->zl_znode != NULL) 3606168404Spjd VN_RELE(ZTOV(zl->zl_znode)); 3607168404Spjd rw_exit(zl->zl_rwlock); 3608168404Spjd *zlpp = zl->zl_next; 3609168404Spjd kmem_free(zl, sizeof (*zl)); 3610168404Spjd } 3611168404Spjd} 3612168404Spjd 3613168404Spjd/* 3614168404Spjd * Search back through the directory tree, using the ".." entries. 3615168404Spjd * Lock each directory in the chain to prevent concurrent renames. 3616168404Spjd * Fail any attempt to move a directory into one of its own descendants. 3617168404Spjd * XXX - z_parent_lock can overlap with map or grow locks 3618168404Spjd */ 3619168404Spjdstatic int 3620168404Spjdzfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 3621168404Spjd{ 3622168404Spjd zfs_zlock_t *zl; 3623168404Spjd znode_t *zp = tdzp; 3624168404Spjd uint64_t rootid = zp->z_zfsvfs->z_root; 3625219089Spjd uint64_t oidp = zp->z_id; 3626168404Spjd krwlock_t *rwlp = &szp->z_parent_lock; 3627168404Spjd krw_t rw = RW_WRITER; 3628168404Spjd 3629168404Spjd /* 3630168404Spjd * First pass write-locks szp and compares to zp->z_id. 3631168404Spjd * Later passes read-lock zp and compare to zp->z_parent. 3632168404Spjd */ 3633168404Spjd do { 3634168404Spjd if (!rw_tryenter(rwlp, rw)) { 3635168404Spjd /* 3636168404Spjd * Another thread is renaming in this path. 3637168404Spjd * Note that if we are a WRITER, we don't have any 3638168404Spjd * parent_locks held yet. 3639168404Spjd */ 3640168404Spjd if (rw == RW_READER && zp->z_id > szp->z_id) { 3641168404Spjd /* 3642168404Spjd * Drop our locks and restart 3643168404Spjd */ 3644168404Spjd zfs_rename_unlock(&zl); 3645168404Spjd *zlpp = NULL; 3646168404Spjd zp = tdzp; 3647219089Spjd oidp = zp->z_id; 3648168404Spjd rwlp = &szp->z_parent_lock; 3649168404Spjd rw = RW_WRITER; 3650168404Spjd continue; 3651168404Spjd } else { 3652168404Spjd /* 3653168404Spjd * Wait for other thread to drop its locks 3654168404Spjd */ 3655168404Spjd rw_enter(rwlp, rw); 3656168404Spjd } 3657168404Spjd } 3658168404Spjd 3659168404Spjd zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3660168404Spjd zl->zl_rwlock = rwlp; 3661168404Spjd zl->zl_znode = NULL; 3662168404Spjd zl->zl_next = *zlpp; 3663168404Spjd *zlpp = zl; 3664168404Spjd 3665219089Spjd if (oidp == szp->z_id) /* We're a descendant of szp */ 3666249195Smm return (SET_ERROR(EINVAL)); 3667168404Spjd 3668219089Spjd if (oidp == rootid) /* We've hit the top */ 3669168404Spjd return (0); 3670168404Spjd 3671168404Spjd if (rw == RW_READER) { /* i.e. not the first pass */ 3672219089Spjd int error = zfs_zget(zp->z_zfsvfs, oidp, &zp); 3673168404Spjd if (error) 3674168404Spjd return (error); 3675168404Spjd zl->zl_znode = zp; 3676168404Spjd } 3677219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), 3678219089Spjd &oidp, sizeof (oidp)); 3679168404Spjd rwlp = &zp->z_parent_lock; 3680168404Spjd rw = RW_READER; 3681168404Spjd 3682168404Spjd } while (zp->z_id != sdzp->z_id); 3683168404Spjd 3684168404Spjd return (0); 3685168404Spjd} 3686168404Spjd 3687168404Spjd/* 3688168404Spjd * Move an entry from the provided source directory to the target 3689168404Spjd * directory. Change the entry name as indicated. 3690168404Spjd * 3691168404Spjd * IN: sdvp - Source directory containing the "old entry". 3692168404Spjd * snm - Old entry name. 3693168404Spjd * tdvp - Target directory to contain the "new entry". 3694168404Spjd * tnm - New entry name. 3695168404Spjd * cr - credentials of caller. 3696185029Spjd * ct - caller context 3697185029Spjd * flags - case flags 3698168404Spjd * 3699251631Sdelphij * RETURN: 0 on success, error code on failure. 3700168404Spjd * 3701168404Spjd * Timestamps: 3702168404Spjd * sdvp,tdvp - ctime|mtime updated 3703168404Spjd */ 3704185029Spjd/*ARGSUSED*/ 3705168404Spjdstatic int 3706185029Spjdzfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3707185029Spjd caller_context_t *ct, int flags) 3708168404Spjd{ 3709168404Spjd znode_t *tdzp, *szp, *tzp; 3710168404Spjd znode_t *sdzp = VTOZ(sdvp); 3711168404Spjd zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 3712185029Spjd zilog_t *zilog; 3713168962Spjd vnode_t *realvp; 3714168404Spjd zfs_dirlock_t *sdl, *tdl; 3715168404Spjd dmu_tx_t *tx; 3716168404Spjd zfs_zlock_t *zl; 3717185029Spjd int cmp, serr, terr; 3718185029Spjd int error = 0; 3719185029Spjd int zflg = 0; 3720168404Spjd 3721168404Spjd ZFS_ENTER(zfsvfs); 3722185029Spjd ZFS_VERIFY_ZP(sdzp); 3723185029Spjd zilog = zfsvfs->z_log; 3724168404Spjd 3725168962Spjd /* 3726168962Spjd * Make sure we have the real vp for the target directory. 3727168962Spjd */ 3728185029Spjd if (VOP_REALVP(tdvp, &realvp, ct) == 0) 3729168962Spjd tdvp = realvp; 3730168962Spjd 3731254585Sdelphij tdzp = VTOZ(tdvp); 3732254585Sdelphij ZFS_VERIFY_ZP(tdzp); 3733254585Sdelphij 3734254585Sdelphij /* 3735254585Sdelphij * We check z_zfsvfs rather than v_vfsp here, because snapshots and the 3736254585Sdelphij * ctldir appear to have the same v_vfsp. 3737254585Sdelphij */ 3738254585Sdelphij if (tdzp->z_zfsvfs != zfsvfs || zfsctl_is_node(tdvp)) { 3739168404Spjd ZFS_EXIT(zfsvfs); 3740249195Smm return (SET_ERROR(EXDEV)); 3741168404Spjd } 3742168404Spjd 3743185029Spjd if (zfsvfs->z_utf8 && u8_validate(tnm, 3744185029Spjd strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3745185029Spjd ZFS_EXIT(zfsvfs); 3746249195Smm return (SET_ERROR(EILSEQ)); 3747185029Spjd } 3748185029Spjd 3749185029Spjd if (flags & FIGNORECASE) 3750185029Spjd zflg |= ZCILOOK; 3751185029Spjd 3752168404Spjdtop: 3753168404Spjd szp = NULL; 3754168404Spjd tzp = NULL; 3755168404Spjd zl = NULL; 3756168404Spjd 3757168404Spjd /* 3758168404Spjd * This is to prevent the creation of links into attribute space 3759168404Spjd * by renaming a linked file into/outof an attribute directory. 3760168404Spjd * See the comment in zfs_link() for why this is considered bad. 3761168404Spjd */ 3762219089Spjd if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3763168962Spjd ZFS_EXIT(zfsvfs); 3764249195Smm return (SET_ERROR(EINVAL)); 3765168404Spjd } 3766168404Spjd 3767168404Spjd /* 3768168404Spjd * Lock source and target directory entries. To prevent deadlock, 3769168404Spjd * a lock ordering must be defined. We lock the directory with 3770168404Spjd * the smallest object id first, or if it's a tie, the one with 3771168404Spjd * the lexically first name. 3772168404Spjd */ 3773168404Spjd if (sdzp->z_id < tdzp->z_id) { 3774168962Spjd cmp = -1; 3775168962Spjd } else if (sdzp->z_id > tdzp->z_id) { 3776168962Spjd cmp = 1; 3777168962Spjd } else { 3778185029Spjd /* 3779185029Spjd * First compare the two name arguments without 3780185029Spjd * considering any case folding. 3781185029Spjd */ 3782185029Spjd int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3783185029Spjd 3784185029Spjd cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3785185029Spjd ASSERT(error == 0 || !zfsvfs->z_utf8); 3786168962Spjd if (cmp == 0) { 3787168962Spjd /* 3788168962Spjd * POSIX: "If the old argument and the new argument 3789168962Spjd * both refer to links to the same existing file, 3790168962Spjd * the rename() function shall return successfully 3791168962Spjd * and perform no other action." 3792168962Spjd */ 3793168962Spjd ZFS_EXIT(zfsvfs); 3794168962Spjd return (0); 3795168962Spjd } 3796185029Spjd /* 3797185029Spjd * If the file system is case-folding, then we may 3798185029Spjd * have some more checking to do. A case-folding file 3799185029Spjd * system is either supporting mixed case sensitivity 3800185029Spjd * access or is completely case-insensitive. Note 3801185029Spjd * that the file system is always case preserving. 3802185029Spjd * 3803185029Spjd * In mixed sensitivity mode case sensitive behavior 3804185029Spjd * is the default. FIGNORECASE must be used to 3805185029Spjd * explicitly request case insensitive behavior. 3806185029Spjd * 3807185029Spjd * If the source and target names provided differ only 3808185029Spjd * by case (e.g., a request to rename 'tim' to 'Tim'), 3809185029Spjd * we will treat this as a special case in the 3810185029Spjd * case-insensitive mode: as long as the source name 3811185029Spjd * is an exact match, we will allow this to proceed as 3812185029Spjd * a name-change request. 3813185029Spjd */ 3814185029Spjd if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3815185029Spjd (zfsvfs->z_case == ZFS_CASE_MIXED && 3816185029Spjd flags & FIGNORECASE)) && 3817185029Spjd u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3818185029Spjd &error) == 0) { 3819185029Spjd /* 3820185029Spjd * case preserving rename request, require exact 3821185029Spjd * name matches 3822185029Spjd */ 3823185029Spjd zflg |= ZCIEXACT; 3824185029Spjd zflg &= ~ZCILOOK; 3825185029Spjd } 3826168962Spjd } 3827185029Spjd 3828208131Smm /* 3829208131Smm * If the source and destination directories are the same, we should 3830208131Smm * grab the z_name_lock of that directory only once. 3831208131Smm */ 3832208131Smm if (sdzp == tdzp) { 3833208131Smm zflg |= ZHAVELOCK; 3834208131Smm rw_enter(&sdzp->z_name_lock, RW_READER); 3835208131Smm } 3836208131Smm 3837168962Spjd if (cmp < 0) { 3838185029Spjd serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3839185029Spjd ZEXISTS | zflg, NULL, NULL); 3840185029Spjd terr = zfs_dirent_lock(&tdl, 3841185029Spjd tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3842168962Spjd } else { 3843185029Spjd terr = zfs_dirent_lock(&tdl, 3844185029Spjd tdzp, tnm, &tzp, zflg, NULL, NULL); 3845185029Spjd serr = zfs_dirent_lock(&sdl, 3846185029Spjd sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3847185029Spjd NULL, NULL); 3848168404Spjd } 3849168404Spjd 3850168962Spjd if (serr) { 3851168404Spjd /* 3852168404Spjd * Source entry invalid or not there. 3853168404Spjd */ 3854168962Spjd if (!terr) { 3855168404Spjd zfs_dirent_unlock(tdl); 3856168962Spjd if (tzp) 3857168962Spjd VN_RELE(ZTOV(tzp)); 3858168962Spjd } 3859208131Smm 3860208131Smm if (sdzp == tdzp) 3861208131Smm rw_exit(&sdzp->z_name_lock); 3862208131Smm 3863219089Spjd /* 3864219089Spjd * FreeBSD: In OpenSolaris they only check if rename source is 3865219089Spjd * ".." here, because "." is handled in their lookup. This is 3866219089Spjd * not the case for FreeBSD, so we check for "." explicitly. 3867219089Spjd */ 3868168404Spjd if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0) 3869249195Smm serr = SET_ERROR(EINVAL); 3870168962Spjd ZFS_EXIT(zfsvfs); 3871168962Spjd return (serr); 3872168404Spjd } 3873168404Spjd if (terr) { 3874168404Spjd zfs_dirent_unlock(sdl); 3875168962Spjd VN_RELE(ZTOV(szp)); 3876208131Smm 3877208131Smm if (sdzp == tdzp) 3878208131Smm rw_exit(&sdzp->z_name_lock); 3879208131Smm 3880168404Spjd if (strcmp(tnm, "..") == 0) 3881249195Smm terr = SET_ERROR(EINVAL); 3882168962Spjd ZFS_EXIT(zfsvfs); 3883168962Spjd return (terr); 3884168404Spjd } 3885168404Spjd 3886168404Spjd /* 3887168404Spjd * Must have write access at the source to remove the old entry 3888168404Spjd * and write access at the target to create the new entry. 3889168404Spjd * Note that if target and source are the same, this can be 3890168404Spjd * done in a single check. 3891168404Spjd */ 3892168404Spjd 3893168404Spjd if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3894168404Spjd goto out; 3895168404Spjd 3896168962Spjd if (ZTOV(szp)->v_type == VDIR) { 3897168404Spjd /* 3898168404Spjd * Check to make sure rename is valid. 3899168404Spjd * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3900168404Spjd */ 3901168404Spjd if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3902168404Spjd goto out; 3903168404Spjd } 3904168404Spjd 3905168404Spjd /* 3906168404Spjd * Does target exist? 3907168404Spjd */ 3908168404Spjd if (tzp) { 3909168404Spjd /* 3910168404Spjd * Source and target must be the same type. 3911168404Spjd */ 3912168962Spjd if (ZTOV(szp)->v_type == VDIR) { 3913168962Spjd if (ZTOV(tzp)->v_type != VDIR) { 3914249195Smm error = SET_ERROR(ENOTDIR); 3915168404Spjd goto out; 3916168404Spjd } 3917168404Spjd } else { 3918168962Spjd if (ZTOV(tzp)->v_type == VDIR) { 3919249195Smm error = SET_ERROR(EISDIR); 3920168404Spjd goto out; 3921168404Spjd } 3922168404Spjd } 3923168404Spjd /* 3924168404Spjd * POSIX dictates that when the source and target 3925168404Spjd * entries refer to the same file object, rename 3926168404Spjd * must do nothing and exit without error. 3927168404Spjd */ 3928168404Spjd if (szp->z_id == tzp->z_id) { 3929168404Spjd error = 0; 3930168404Spjd goto out; 3931168404Spjd } 3932168404Spjd } 3933168404Spjd 3934185029Spjd vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 3935168962Spjd if (tzp) 3936185029Spjd vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3937168962Spjd 3938185029Spjd /* 3939185029Spjd * notify the target directory if it is not the same 3940185029Spjd * as source directory. 3941185029Spjd */ 3942185029Spjd if (tdvp != sdvp) { 3943185029Spjd vnevent_rename_dest_dir(tdvp, ct); 3944185029Spjd } 3945185029Spjd 3946168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 3947219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3948219089Spjd dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3949168404Spjd dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3950168404Spjd dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3951219089Spjd if (sdzp != tdzp) { 3952219089Spjd dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 3953219089Spjd zfs_sa_upgrade_txholds(tx, tdzp); 3954219089Spjd } 3955219089Spjd if (tzp) { 3956219089Spjd dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 3957219089Spjd zfs_sa_upgrade_txholds(tx, tzp); 3958219089Spjd } 3959219089Spjd 3960219089Spjd zfs_sa_upgrade_txholds(tx, szp); 3961168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3962209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 3963168404Spjd if (error) { 3964168404Spjd if (zl != NULL) 3965168404Spjd zfs_rename_unlock(&zl); 3966168404Spjd zfs_dirent_unlock(sdl); 3967168404Spjd zfs_dirent_unlock(tdl); 3968208131Smm 3969208131Smm if (sdzp == tdzp) 3970208131Smm rw_exit(&sdzp->z_name_lock); 3971208131Smm 3972168962Spjd VN_RELE(ZTOV(szp)); 3973168962Spjd if (tzp) 3974168962Spjd VN_RELE(ZTOV(tzp)); 3975209962Smm if (error == ERESTART) { 3976168404Spjd dmu_tx_wait(tx); 3977168404Spjd dmu_tx_abort(tx); 3978168404Spjd goto top; 3979168404Spjd } 3980168404Spjd dmu_tx_abort(tx); 3981168962Spjd ZFS_EXIT(zfsvfs); 3982168962Spjd return (error); 3983168404Spjd } 3984168404Spjd 3985168404Spjd if (tzp) /* Attempt to remove the existing target */ 3986185029Spjd error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 3987168404Spjd 3988168404Spjd if (error == 0) { 3989168404Spjd error = zfs_link_create(tdl, szp, tx, ZRENAMING); 3990168404Spjd if (error == 0) { 3991219089Spjd szp->z_pflags |= ZFS_AV_MODIFIED; 3992185029Spjd 3993219089Spjd error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 3994219089Spjd (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3995240415Smm ASSERT0(error); 3996219089Spjd 3997168404Spjd error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 3998219089Spjd if (error == 0) { 3999219089Spjd zfs_log_rename(zilog, tx, TX_RENAME | 4000219089Spjd (flags & FIGNORECASE ? TX_CI : 0), sdzp, 4001219089Spjd sdl->dl_name, tdzp, tdl->dl_name, szp); 4002185029Spjd 4003219089Spjd /* 4004219089Spjd * Update path information for the target vnode 4005219089Spjd */ 4006219089Spjd vn_renamepath(tdvp, ZTOV(szp), tnm, 4007219089Spjd strlen(tnm)); 4008219089Spjd } else { 4009219089Spjd /* 4010219089Spjd * At this point, we have successfully created 4011219089Spjd * the target name, but have failed to remove 4012219089Spjd * the source name. Since the create was done 4013219089Spjd * with the ZRENAMING flag, there are 4014219089Spjd * complications; for one, the link count is 4015219089Spjd * wrong. The easiest way to deal with this 4016219089Spjd * is to remove the newly created target, and 4017219089Spjd * return the original error. This must 4018219089Spjd * succeed; fortunately, it is very unlikely to 4019219089Spjd * fail, since we just created it. 4020219089Spjd */ 4021219089Spjd VERIFY3U(zfs_link_destroy(tdl, szp, tx, 4022219089Spjd ZRENAMING, NULL), ==, 0); 4023219089Spjd } 4024168404Spjd } 4025168404Spjd#ifdef FREEBSD_NAMECACHE 4026168404Spjd if (error == 0) { 4027168404Spjd cache_purge(sdvp); 4028168404Spjd cache_purge(tdvp); 4029240829Spjd cache_purge(ZTOV(szp)); 4030240829Spjd if (tzp) 4031240829Spjd cache_purge(ZTOV(tzp)); 4032168404Spjd } 4033168404Spjd#endif 4034168404Spjd } 4035168404Spjd 4036168404Spjd dmu_tx_commit(tx); 4037168404Spjdout: 4038168404Spjd if (zl != NULL) 4039168404Spjd zfs_rename_unlock(&zl); 4040168404Spjd 4041168404Spjd zfs_dirent_unlock(sdl); 4042168404Spjd zfs_dirent_unlock(tdl); 4043168404Spjd 4044208131Smm if (sdzp == tdzp) 4045208131Smm rw_exit(&sdzp->z_name_lock); 4046208131Smm 4047219089Spjd 4048168962Spjd VN_RELE(ZTOV(szp)); 4049168404Spjd if (tzp) 4050168962Spjd VN_RELE(ZTOV(tzp)); 4051168404Spjd 4052219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4053219089Spjd zil_commit(zilog, 0); 4054219089Spjd 4055168404Spjd ZFS_EXIT(zfsvfs); 4056168404Spjd 4057168404Spjd return (error); 4058168404Spjd} 4059168404Spjd 4060168404Spjd/* 4061168404Spjd * Insert the indicated symbolic reference entry into the directory. 4062168404Spjd * 4063168404Spjd * IN: dvp - Directory to contain new symbolic link. 4064168404Spjd * link - Name for new symlink entry. 4065168404Spjd * vap - Attributes of new entry. 4066168404Spjd * cr - credentials of caller. 4067185029Spjd * ct - caller context 4068185029Spjd * flags - case flags 4069168404Spjd * 4070251631Sdelphij * RETURN: 0 on success, error code on failure. 4071168404Spjd * 4072168404Spjd * Timestamps: 4073168404Spjd * dvp - ctime|mtime updated 4074168404Spjd */ 4075185029Spjd/*ARGSUSED*/ 4076168404Spjdstatic int 4077185029Spjdzfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 4078185029Spjd cred_t *cr, kthread_t *td) 4079168404Spjd{ 4080168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 4081168404Spjd zfs_dirlock_t *dl; 4082168404Spjd dmu_tx_t *tx; 4083168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4084185029Spjd zilog_t *zilog; 4085219089Spjd uint64_t len = strlen(link); 4086168404Spjd int error; 4087185029Spjd int zflg = ZNEW; 4088209962Smm zfs_acl_ids_t acl_ids; 4089209962Smm boolean_t fuid_dirtied; 4090219089Spjd uint64_t txtype = TX_SYMLINK; 4091185029Spjd int flags = 0; 4092168404Spjd 4093168962Spjd ASSERT(vap->va_type == VLNK); 4094168404Spjd 4095168404Spjd ZFS_ENTER(zfsvfs); 4096185029Spjd ZFS_VERIFY_ZP(dzp); 4097185029Spjd zilog = zfsvfs->z_log; 4098185029Spjd 4099185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 4100185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4101185029Spjd ZFS_EXIT(zfsvfs); 4102249195Smm return (SET_ERROR(EILSEQ)); 4103185029Spjd } 4104185029Spjd if (flags & FIGNORECASE) 4105185029Spjd zflg |= ZCILOOK; 4106168404Spjd 4107168404Spjd if (len > MAXPATHLEN) { 4108168404Spjd ZFS_EXIT(zfsvfs); 4109249195Smm return (SET_ERROR(ENAMETOOLONG)); 4110168404Spjd } 4111168404Spjd 4112219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, 4113219089Spjd vap, cr, NULL, &acl_ids)) != 0) { 4114219089Spjd ZFS_EXIT(zfsvfs); 4115219089Spjd return (error); 4116219089Spjd } 4117219089Spjdtop: 4118168404Spjd /* 4119168404Spjd * Attempt to lock directory; fail if entry already exists. 4120168404Spjd */ 4121185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 4122185029Spjd if (error) { 4123219089Spjd zfs_acl_ids_free(&acl_ids); 4124168404Spjd ZFS_EXIT(zfsvfs); 4125168404Spjd return (error); 4126168404Spjd } 4127168404Spjd 4128219089Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4129219089Spjd zfs_acl_ids_free(&acl_ids); 4130219089Spjd zfs_dirent_unlock(dl); 4131219089Spjd ZFS_EXIT(zfsvfs); 4132219089Spjd return (error); 4133219089Spjd } 4134219089Spjd 4135209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 4136209962Smm zfs_acl_ids_free(&acl_ids); 4137209962Smm zfs_dirent_unlock(dl); 4138209962Smm ZFS_EXIT(zfsvfs); 4139249195Smm return (SET_ERROR(EDQUOT)); 4140209962Smm } 4141168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4142209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 4143168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 4144168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4145219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 4146219089Spjd ZFS_SA_BASE_ATTR_SIZE + len); 4147219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 4148219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 4149219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 4150219089Spjd acl_ids.z_aclp->z_acl_bytes); 4151219089Spjd } 4152209962Smm if (fuid_dirtied) 4153209962Smm zfs_fuid_txhold(zfsvfs, tx); 4154209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 4155168404Spjd if (error) { 4156168404Spjd zfs_dirent_unlock(dl); 4157209962Smm if (error == ERESTART) { 4158168404Spjd dmu_tx_wait(tx); 4159168404Spjd dmu_tx_abort(tx); 4160168404Spjd goto top; 4161168404Spjd } 4162219089Spjd zfs_acl_ids_free(&acl_ids); 4163168404Spjd dmu_tx_abort(tx); 4164168404Spjd ZFS_EXIT(zfsvfs); 4165168404Spjd return (error); 4166168404Spjd } 4167168404Spjd 4168168404Spjd /* 4169168404Spjd * Create a new object for the symlink. 4170219089Spjd * for version 4 ZPL datsets the symlink will be an SA attribute 4171168404Spjd */ 4172219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 4173168404Spjd 4174219089Spjd if (fuid_dirtied) 4175219089Spjd zfs_fuid_sync(zfsvfs, tx); 4176209962Smm 4177219089Spjd mutex_enter(&zp->z_lock); 4178219089Spjd if (zp->z_is_sa) 4179219089Spjd error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 4180219089Spjd link, len, tx); 4181219089Spjd else 4182219089Spjd zfs_sa_symlink(zp, link, len, tx); 4183219089Spjd mutex_exit(&zp->z_lock); 4184168404Spjd 4185219089Spjd zp->z_size = len; 4186219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 4187219089Spjd &zp->z_size, sizeof (zp->z_size), tx); 4188168404Spjd /* 4189168404Spjd * Insert the new object into the directory. 4190168404Spjd */ 4191168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 4192168404Spjd 4193219089Spjd if (flags & FIGNORECASE) 4194219089Spjd txtype |= TX_CI; 4195219089Spjd zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 4196219089Spjd *vpp = ZTOV(zp); 4197219089Spjd 4198209962Smm zfs_acl_ids_free(&acl_ids); 4199209962Smm 4200168404Spjd dmu_tx_commit(tx); 4201168404Spjd 4202168404Spjd zfs_dirent_unlock(dl); 4203168404Spjd 4204219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4205219089Spjd zil_commit(zilog, 0); 4206219089Spjd 4207168404Spjd ZFS_EXIT(zfsvfs); 4208168404Spjd return (error); 4209168404Spjd} 4210168404Spjd 4211168404Spjd/* 4212168404Spjd * Return, in the buffer contained in the provided uio structure, 4213168404Spjd * the symbolic path referred to by vp. 4214168404Spjd * 4215168404Spjd * IN: vp - vnode of symbolic link. 4216251631Sdelphij * uio - structure to contain the link path. 4217168404Spjd * cr - credentials of caller. 4218185029Spjd * ct - caller context 4219168404Spjd * 4220251631Sdelphij * OUT: uio - structure containing the link path. 4221168404Spjd * 4222251631Sdelphij * RETURN: 0 on success, error code on failure. 4223168404Spjd * 4224168404Spjd * Timestamps: 4225168404Spjd * vp - atime updated 4226168404Spjd */ 4227168404Spjd/* ARGSUSED */ 4228168404Spjdstatic int 4229185029Spjdzfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 4230168404Spjd{ 4231168404Spjd znode_t *zp = VTOZ(vp); 4232168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4233168404Spjd int error; 4234168404Spjd 4235168404Spjd ZFS_ENTER(zfsvfs); 4236185029Spjd ZFS_VERIFY_ZP(zp); 4237168404Spjd 4238219089Spjd mutex_enter(&zp->z_lock); 4239219089Spjd if (zp->z_is_sa) 4240219089Spjd error = sa_lookup_uio(zp->z_sa_hdl, 4241219089Spjd SA_ZPL_SYMLINK(zfsvfs), uio); 4242219089Spjd else 4243219089Spjd error = zfs_sa_readlink(zp, uio); 4244219089Spjd mutex_exit(&zp->z_lock); 4245168404Spjd 4246168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4247219089Spjd 4248168404Spjd ZFS_EXIT(zfsvfs); 4249168404Spjd return (error); 4250168404Spjd} 4251168404Spjd 4252168404Spjd/* 4253168404Spjd * Insert a new entry into directory tdvp referencing svp. 4254168404Spjd * 4255168404Spjd * IN: tdvp - Directory to contain new entry. 4256168404Spjd * svp - vnode of new entry. 4257168404Spjd * name - name of new entry. 4258168404Spjd * cr - credentials of caller. 4259185029Spjd * ct - caller context 4260168404Spjd * 4261251631Sdelphij * RETURN: 0 on success, error code on failure. 4262168404Spjd * 4263168404Spjd * Timestamps: 4264168404Spjd * tdvp - ctime|mtime updated 4265168404Spjd * svp - ctime updated 4266168404Spjd */ 4267168404Spjd/* ARGSUSED */ 4268168404Spjdstatic int 4269185029Spjdzfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4270185029Spjd caller_context_t *ct, int flags) 4271168404Spjd{ 4272168404Spjd znode_t *dzp = VTOZ(tdvp); 4273168404Spjd znode_t *tzp, *szp; 4274168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4275185029Spjd zilog_t *zilog; 4276168404Spjd zfs_dirlock_t *dl; 4277168404Spjd dmu_tx_t *tx; 4278168962Spjd vnode_t *realvp; 4279168404Spjd int error; 4280185029Spjd int zf = ZNEW; 4281212694Smm uint64_t parent; 4282185029Spjd uid_t owner; 4283168404Spjd 4284168404Spjd ASSERT(tdvp->v_type == VDIR); 4285168404Spjd 4286168404Spjd ZFS_ENTER(zfsvfs); 4287185029Spjd ZFS_VERIFY_ZP(dzp); 4288185029Spjd zilog = zfsvfs->z_log; 4289168404Spjd 4290185029Spjd if (VOP_REALVP(svp, &realvp, ct) == 0) 4291168962Spjd svp = realvp; 4292168962Spjd 4293212694Smm /* 4294212694Smm * POSIX dictates that we return EPERM here. 4295212694Smm * Better choices include ENOTSUP or EISDIR. 4296212694Smm */ 4297212694Smm if (svp->v_type == VDIR) { 4298168404Spjd ZFS_EXIT(zfsvfs); 4299249195Smm return (SET_ERROR(EPERM)); 4300212694Smm } 4301212694Smm 4302254585Sdelphij szp = VTOZ(svp); 4303254585Sdelphij ZFS_VERIFY_ZP(szp); 4304254585Sdelphij 4305254585Sdelphij /* 4306254585Sdelphij * We check z_zfsvfs rather than v_vfsp here, because snapshots and the 4307254585Sdelphij * ctldir appear to have the same v_vfsp. 4308254585Sdelphij */ 4309254585Sdelphij if (szp->z_zfsvfs != zfsvfs || zfsctl_is_node(svp)) { 4310212694Smm ZFS_EXIT(zfsvfs); 4311249195Smm return (SET_ERROR(EXDEV)); 4312168404Spjd } 4313212694Smm 4314212694Smm /* Prevent links to .zfs/shares files */ 4315212694Smm 4316219089Spjd if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4317219089Spjd &parent, sizeof (uint64_t))) != 0) { 4318212694Smm ZFS_EXIT(zfsvfs); 4319219089Spjd return (error); 4320219089Spjd } 4321219089Spjd if (parent == zfsvfs->z_shares_dir) { 4322219089Spjd ZFS_EXIT(zfsvfs); 4323249195Smm return (SET_ERROR(EPERM)); 4324212694Smm } 4325212694Smm 4326185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, 4327185029Spjd strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4328185029Spjd ZFS_EXIT(zfsvfs); 4329249195Smm return (SET_ERROR(EILSEQ)); 4330185029Spjd } 4331185029Spjd if (flags & FIGNORECASE) 4332185029Spjd zf |= ZCILOOK; 4333185029Spjd 4334168404Spjd /* 4335168404Spjd * We do not support links between attributes and non-attributes 4336168404Spjd * because of the potential security risk of creating links 4337168404Spjd * into "normal" file space in order to circumvent restrictions 4338168404Spjd * imposed in attribute space. 4339168404Spjd */ 4340219089Spjd if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4341168404Spjd ZFS_EXIT(zfsvfs); 4342249195Smm return (SET_ERROR(EINVAL)); 4343168404Spjd } 4344168404Spjd 4345168404Spjd 4346219089Spjd owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4347219089Spjd if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { 4348168404Spjd ZFS_EXIT(zfsvfs); 4349249195Smm return (SET_ERROR(EPERM)); 4350168404Spjd } 4351168404Spjd 4352185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4353168404Spjd ZFS_EXIT(zfsvfs); 4354168404Spjd return (error); 4355168404Spjd } 4356168404Spjd 4357212694Smmtop: 4358168404Spjd /* 4359168404Spjd * Attempt to lock directory; fail if entry already exists. 4360168404Spjd */ 4361185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 4362185029Spjd if (error) { 4363168404Spjd ZFS_EXIT(zfsvfs); 4364168404Spjd return (error); 4365168404Spjd } 4366168404Spjd 4367168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4368219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4369168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4370219089Spjd zfs_sa_upgrade_txholds(tx, szp); 4371219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 4372209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 4373168404Spjd if (error) { 4374168404Spjd zfs_dirent_unlock(dl); 4375209962Smm if (error == ERESTART) { 4376168404Spjd dmu_tx_wait(tx); 4377168404Spjd dmu_tx_abort(tx); 4378168404Spjd goto top; 4379168404Spjd } 4380168404Spjd dmu_tx_abort(tx); 4381168404Spjd ZFS_EXIT(zfsvfs); 4382168404Spjd return (error); 4383168404Spjd } 4384168404Spjd 4385168404Spjd error = zfs_link_create(dl, szp, tx, 0); 4386168404Spjd 4387185029Spjd if (error == 0) { 4388185029Spjd uint64_t txtype = TX_LINK; 4389185029Spjd if (flags & FIGNORECASE) 4390185029Spjd txtype |= TX_CI; 4391185029Spjd zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4392185029Spjd } 4393168404Spjd 4394168404Spjd dmu_tx_commit(tx); 4395168404Spjd 4396168404Spjd zfs_dirent_unlock(dl); 4397168404Spjd 4398185029Spjd if (error == 0) { 4399185029Spjd vnevent_link(svp, ct); 4400185029Spjd } 4401185029Spjd 4402219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4403219089Spjd zil_commit(zilog, 0); 4404219089Spjd 4405168404Spjd ZFS_EXIT(zfsvfs); 4406168404Spjd return (error); 4407168404Spjd} 4408168404Spjd 4409219089Spjd#ifdef sun 4410219089Spjd/* 4411219089Spjd * zfs_null_putapage() is used when the file system has been force 4412219089Spjd * unmounted. It just drops the pages. 4413219089Spjd */ 4414219089Spjd/* ARGSUSED */ 4415219089Spjdstatic int 4416219089Spjdzfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4417219089Spjd size_t *lenp, int flags, cred_t *cr) 4418219089Spjd{ 4419219089Spjd pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); 4420219089Spjd return (0); 4421219089Spjd} 4422219089Spjd 4423219089Spjd/* 4424219089Spjd * Push a page out to disk, klustering if possible. 4425219089Spjd * 4426219089Spjd * IN: vp - file to push page to. 4427219089Spjd * pp - page to push. 4428219089Spjd * flags - additional flags. 4429219089Spjd * cr - credentials of caller. 4430219089Spjd * 4431219089Spjd * OUT: offp - start of range pushed. 4432219089Spjd * lenp - len of range pushed. 4433219089Spjd * 4434251631Sdelphij * RETURN: 0 on success, error code on failure. 4435219089Spjd * 4436219089Spjd * NOTE: callers must have locked the page to be pushed. On 4437219089Spjd * exit, the page (and all other pages in the kluster) must be 4438219089Spjd * unlocked. 4439219089Spjd */ 4440219089Spjd/* ARGSUSED */ 4441219089Spjdstatic int 4442219089Spjdzfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4443219089Spjd size_t *lenp, int flags, cred_t *cr) 4444219089Spjd{ 4445219089Spjd znode_t *zp = VTOZ(vp); 4446219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4447219089Spjd dmu_tx_t *tx; 4448219089Spjd u_offset_t off, koff; 4449219089Spjd size_t len, klen; 4450219089Spjd int err; 4451219089Spjd 4452219089Spjd off = pp->p_offset; 4453219089Spjd len = PAGESIZE; 4454219089Spjd /* 4455219089Spjd * If our blocksize is bigger than the page size, try to kluster 4456219089Spjd * multiple pages so that we write a full block (thus avoiding 4457219089Spjd * a read-modify-write). 4458219089Spjd */ 4459219089Spjd if (off < zp->z_size && zp->z_blksz > PAGESIZE) { 4460219089Spjd klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); 4461219089Spjd koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; 4462219089Spjd ASSERT(koff <= zp->z_size); 4463219089Spjd if (koff + klen > zp->z_size) 4464219089Spjd klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE); 4465219089Spjd pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); 4466219089Spjd } 4467219089Spjd ASSERT3U(btop(len), ==, btopr(len)); 4468219089Spjd 4469219089Spjd /* 4470219089Spjd * Can't push pages past end-of-file. 4471219089Spjd */ 4472219089Spjd if (off >= zp->z_size) { 4473219089Spjd /* ignore all pages */ 4474219089Spjd err = 0; 4475219089Spjd goto out; 4476219089Spjd } else if (off + len > zp->z_size) { 4477219089Spjd int npages = btopr(zp->z_size - off); 4478219089Spjd page_t *trunc; 4479219089Spjd 4480219089Spjd page_list_break(&pp, &trunc, npages); 4481219089Spjd /* ignore pages past end of file */ 4482219089Spjd if (trunc) 4483219089Spjd pvn_write_done(trunc, flags); 4484219089Spjd len = zp->z_size - off; 4485219089Spjd } 4486219089Spjd 4487219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 4488219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 4489249195Smm err = SET_ERROR(EDQUOT); 4490219089Spjd goto out; 4491219089Spjd } 4492219089Spjdtop: 4493219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 4494219089Spjd dmu_tx_hold_write(tx, zp->z_id, off, len); 4495219089Spjd 4496219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4497219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4498219089Spjd err = dmu_tx_assign(tx, TXG_NOWAIT); 4499219089Spjd if (err != 0) { 4500219089Spjd if (err == ERESTART) { 4501219089Spjd dmu_tx_wait(tx); 4502219089Spjd dmu_tx_abort(tx); 4503219089Spjd goto top; 4504219089Spjd } 4505219089Spjd dmu_tx_abort(tx); 4506219089Spjd goto out; 4507219089Spjd } 4508219089Spjd 4509219089Spjd if (zp->z_blksz <= PAGESIZE) { 4510219089Spjd caddr_t va = zfs_map_page(pp, S_READ); 4511219089Spjd ASSERT3U(len, <=, PAGESIZE); 4512219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 4513219089Spjd zfs_unmap_page(pp, va); 4514219089Spjd } else { 4515219089Spjd err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 4516219089Spjd } 4517219089Spjd 4518219089Spjd if (err == 0) { 4519219089Spjd uint64_t mtime[2], ctime[2]; 4520219089Spjd sa_bulk_attr_t bulk[3]; 4521219089Spjd int count = 0; 4522219089Spjd 4523219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 4524219089Spjd &mtime, 16); 4525219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 4526219089Spjd &ctime, 16); 4527219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4528219089Spjd &zp->z_pflags, 8); 4529219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 4530219089Spjd B_TRUE); 4531219089Spjd zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 4532219089Spjd } 4533219089Spjd dmu_tx_commit(tx); 4534219089Spjd 4535219089Spjdout: 4536219089Spjd pvn_write_done(pp, (err ? B_ERROR : 0) | flags); 4537219089Spjd if (offp) 4538219089Spjd *offp = off; 4539219089Spjd if (lenp) 4540219089Spjd *lenp = len; 4541219089Spjd 4542219089Spjd return (err); 4543219089Spjd} 4544219089Spjd 4545219089Spjd/* 4546219089Spjd * Copy the portion of the file indicated from pages into the file. 4547219089Spjd * The pages are stored in a page list attached to the files vnode. 4548219089Spjd * 4549219089Spjd * IN: vp - vnode of file to push page data to. 4550219089Spjd * off - position in file to put data. 4551219089Spjd * len - amount of data to write. 4552219089Spjd * flags - flags to control the operation. 4553219089Spjd * cr - credentials of caller. 4554219089Spjd * ct - caller context. 4555219089Spjd * 4556251631Sdelphij * RETURN: 0 on success, error code on failure. 4557219089Spjd * 4558219089Spjd * Timestamps: 4559219089Spjd * vp - ctime|mtime updated 4560219089Spjd */ 4561185029Spjd/*ARGSUSED*/ 4562219089Spjdstatic int 4563219089Spjdzfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 4564219089Spjd caller_context_t *ct) 4565219089Spjd{ 4566219089Spjd znode_t *zp = VTOZ(vp); 4567219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4568219089Spjd page_t *pp; 4569219089Spjd size_t io_len; 4570219089Spjd u_offset_t io_off; 4571219089Spjd uint_t blksz; 4572219089Spjd rl_t *rl; 4573219089Spjd int error = 0; 4574219089Spjd 4575219089Spjd ZFS_ENTER(zfsvfs); 4576219089Spjd ZFS_VERIFY_ZP(zp); 4577219089Spjd 4578219089Spjd /* 4579219089Spjd * Align this request to the file block size in case we kluster. 4580219089Spjd * XXX - this can result in pretty aggresive locking, which can 4581219089Spjd * impact simultanious read/write access. One option might be 4582219089Spjd * to break up long requests (len == 0) into block-by-block 4583219089Spjd * operations to get narrower locking. 4584219089Spjd */ 4585219089Spjd blksz = zp->z_blksz; 4586219089Spjd if (ISP2(blksz)) 4587219089Spjd io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); 4588219089Spjd else 4589219089Spjd io_off = 0; 4590219089Spjd if (len > 0 && ISP2(blksz)) 4591219089Spjd io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); 4592219089Spjd else 4593219089Spjd io_len = 0; 4594219089Spjd 4595219089Spjd if (io_len == 0) { 4596219089Spjd /* 4597219089Spjd * Search the entire vp list for pages >= io_off. 4598219089Spjd */ 4599219089Spjd rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); 4600219089Spjd error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); 4601219089Spjd goto out; 4602219089Spjd } 4603219089Spjd rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); 4604219089Spjd 4605219089Spjd if (off > zp->z_size) { 4606219089Spjd /* past end of file */ 4607219089Spjd zfs_range_unlock(rl); 4608219089Spjd ZFS_EXIT(zfsvfs); 4609219089Spjd return (0); 4610219089Spjd } 4611219089Spjd 4612219089Spjd len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); 4613219089Spjd 4614219089Spjd for (off = io_off; io_off < off + len; io_off += io_len) { 4615219089Spjd if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 4616219089Spjd pp = page_lookup(vp, io_off, 4617219089Spjd (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); 4618219089Spjd } else { 4619219089Spjd pp = page_lookup_nowait(vp, io_off, 4620219089Spjd (flags & B_FREE) ? SE_EXCL : SE_SHARED); 4621219089Spjd } 4622219089Spjd 4623219089Spjd if (pp != NULL && pvn_getdirty(pp, flags)) { 4624219089Spjd int err; 4625219089Spjd 4626219089Spjd /* 4627219089Spjd * Found a dirty page to push 4628219089Spjd */ 4629219089Spjd err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); 4630219089Spjd if (err) 4631219089Spjd error = err; 4632219089Spjd } else { 4633219089Spjd io_len = PAGESIZE; 4634219089Spjd } 4635219089Spjd } 4636219089Spjdout: 4637219089Spjd zfs_range_unlock(rl); 4638219089Spjd if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4639219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 4640219089Spjd ZFS_EXIT(zfsvfs); 4641219089Spjd return (error); 4642219089Spjd} 4643219089Spjd#endif /* sun */ 4644219089Spjd 4645219089Spjd/*ARGSUSED*/ 4646168962Spjdvoid 4647185029Spjdzfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4648168404Spjd{ 4649168962Spjd znode_t *zp = VTOZ(vp); 4650168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4651168962Spjd int error; 4652168404Spjd 4653185029Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4654219089Spjd if (zp->z_sa_hdl == NULL) { 4655185029Spjd /* 4656185029Spjd * The fs has been unmounted, or we did a 4657185029Spjd * suspend/resume and this file no longer exists. 4658185029Spjd */ 4659243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 4660234607Strasz vrecycle(vp); 4661243520Savg return; 4662243520Savg } 4663243520Savg 4664243520Savg mutex_enter(&zp->z_lock); 4665243520Savg if (zp->z_unlinked) { 4666243520Savg /* 4667243520Savg * Fast path to recycle a vnode of a removed file. 4668243520Savg */ 4669243520Savg mutex_exit(&zp->z_lock); 4670185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4671243520Savg vrecycle(vp); 4672168962Spjd return; 4673168404Spjd } 4674243520Savg mutex_exit(&zp->z_lock); 4675168404Spjd 4676168404Spjd if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4677168404Spjd dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4678168404Spjd 4679219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4680219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4681168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 4682168404Spjd if (error) { 4683168404Spjd dmu_tx_abort(tx); 4684168404Spjd } else { 4685168404Spjd mutex_enter(&zp->z_lock); 4686219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 4687219089Spjd (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4688168404Spjd zp->z_atime_dirty = 0; 4689168404Spjd mutex_exit(&zp->z_lock); 4690168404Spjd dmu_tx_commit(tx); 4691168404Spjd } 4692168404Spjd } 4693185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4694168404Spjd} 4695168404Spjd 4696219089Spjd#ifdef sun 4697219089Spjd/* 4698219089Spjd * Bounds-check the seek operation. 4699219089Spjd * 4700219089Spjd * IN: vp - vnode seeking within 4701219089Spjd * ooff - old file offset 4702219089Spjd * noffp - pointer to new file offset 4703219089Spjd * ct - caller context 4704219089Spjd * 4705251631Sdelphij * RETURN: 0 on success, EINVAL if new offset invalid. 4706219089Spjd */ 4707219089Spjd/* ARGSUSED */ 4708219089Spjdstatic int 4709219089Spjdzfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, 4710219089Spjd caller_context_t *ct) 4711219089Spjd{ 4712219089Spjd if (vp->v_type == VDIR) 4713219089Spjd return (0); 4714219089Spjd return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 4715219089Spjd} 4716219089Spjd 4717219089Spjd/* 4718219089Spjd * Pre-filter the generic locking function to trap attempts to place 4719219089Spjd * a mandatory lock on a memory mapped file. 4720219089Spjd */ 4721219089Spjdstatic int 4722219089Spjdzfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 4723219089Spjd flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 4724219089Spjd{ 4725219089Spjd znode_t *zp = VTOZ(vp); 4726219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4727219089Spjd 4728219089Spjd ZFS_ENTER(zfsvfs); 4729219089Spjd ZFS_VERIFY_ZP(zp); 4730219089Spjd 4731219089Spjd /* 4732219089Spjd * We are following the UFS semantics with respect to mapcnt 4733219089Spjd * here: If we see that the file is mapped already, then we will 4734219089Spjd * return an error, but we don't worry about races between this 4735219089Spjd * function and zfs_map(). 4736219089Spjd */ 4737219089Spjd if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) { 4738219089Spjd ZFS_EXIT(zfsvfs); 4739249195Smm return (SET_ERROR(EAGAIN)); 4740219089Spjd } 4741219089Spjd ZFS_EXIT(zfsvfs); 4742219089Spjd return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 4743219089Spjd} 4744219089Spjd 4745219089Spjd/* 4746219089Spjd * If we can't find a page in the cache, we will create a new page 4747219089Spjd * and fill it with file data. For efficiency, we may try to fill 4748219089Spjd * multiple pages at once (klustering) to fill up the supplied page 4749219089Spjd * list. Note that the pages to be filled are held with an exclusive 4750219089Spjd * lock to prevent access by other threads while they are being filled. 4751219089Spjd */ 4752219089Spjdstatic int 4753219089Spjdzfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, 4754219089Spjd caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) 4755219089Spjd{ 4756219089Spjd znode_t *zp = VTOZ(vp); 4757219089Spjd page_t *pp, *cur_pp; 4758219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 4759219089Spjd u_offset_t io_off, total; 4760219089Spjd size_t io_len; 4761219089Spjd int err; 4762219089Spjd 4763219089Spjd if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { 4764219089Spjd /* 4765219089Spjd * We only have a single page, don't bother klustering 4766219089Spjd */ 4767219089Spjd io_off = off; 4768219089Spjd io_len = PAGESIZE; 4769219089Spjd pp = page_create_va(vp, io_off, io_len, 4770219089Spjd PG_EXCL | PG_WAIT, seg, addr); 4771219089Spjd } else { 4772219089Spjd /* 4773219089Spjd * Try to find enough pages to fill the page list 4774219089Spjd */ 4775219089Spjd pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 4776219089Spjd &io_len, off, plsz, 0); 4777219089Spjd } 4778219089Spjd if (pp == NULL) { 4779219089Spjd /* 4780219089Spjd * The page already exists, nothing to do here. 4781219089Spjd */ 4782219089Spjd *pl = NULL; 4783219089Spjd return (0); 4784219089Spjd } 4785219089Spjd 4786219089Spjd /* 4787219089Spjd * Fill the pages in the kluster. 4788219089Spjd */ 4789219089Spjd cur_pp = pp; 4790219089Spjd for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { 4791219089Spjd caddr_t va; 4792219089Spjd 4793219089Spjd ASSERT3U(io_off, ==, cur_pp->p_offset); 4794219089Spjd va = zfs_map_page(cur_pp, S_WRITE); 4795219089Spjd err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, 4796219089Spjd DMU_READ_PREFETCH); 4797219089Spjd zfs_unmap_page(cur_pp, va); 4798219089Spjd if (err) { 4799219089Spjd /* On error, toss the entire kluster */ 4800219089Spjd pvn_read_done(pp, B_ERROR); 4801219089Spjd /* convert checksum errors into IO errors */ 4802219089Spjd if (err == ECKSUM) 4803249195Smm err = SET_ERROR(EIO); 4804219089Spjd return (err); 4805219089Spjd } 4806219089Spjd cur_pp = cur_pp->p_next; 4807219089Spjd } 4808219089Spjd 4809219089Spjd /* 4810219089Spjd * Fill in the page list array from the kluster starting 4811219089Spjd * from the desired offset `off'. 4812219089Spjd * NOTE: the page list will always be null terminated. 4813219089Spjd */ 4814219089Spjd pvn_plist_init(pp, pl, plsz, off, io_len, rw); 4815219089Spjd ASSERT(pl == NULL || (*pl)->p_offset == off); 4816219089Spjd 4817219089Spjd return (0); 4818219089Spjd} 4819219089Spjd 4820219089Spjd/* 4821219089Spjd * Return pointers to the pages for the file region [off, off + len] 4822219089Spjd * in the pl array. If plsz is greater than len, this function may 4823219089Spjd * also return page pointers from after the specified region 4824219089Spjd * (i.e. the region [off, off + plsz]). These additional pages are 4825219089Spjd * only returned if they are already in the cache, or were created as 4826219089Spjd * part of a klustered read. 4827219089Spjd * 4828219089Spjd * IN: vp - vnode of file to get data from. 4829219089Spjd * off - position in file to get data from. 4830219089Spjd * len - amount of data to retrieve. 4831219089Spjd * plsz - length of provided page list. 4832219089Spjd * seg - segment to obtain pages for. 4833219089Spjd * addr - virtual address of fault. 4834219089Spjd * rw - mode of created pages. 4835219089Spjd * cr - credentials of caller. 4836219089Spjd * ct - caller context. 4837219089Spjd * 4838219089Spjd * OUT: protp - protection mode of created pages. 4839219089Spjd * pl - list of pages created. 4840219089Spjd * 4841251631Sdelphij * RETURN: 0 on success, error code on failure. 4842219089Spjd * 4843219089Spjd * Timestamps: 4844219089Spjd * vp - atime updated 4845219089Spjd */ 4846219089Spjd/* ARGSUSED */ 4847219089Spjdstatic int 4848219089Spjdzfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 4849251631Sdelphij page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 4850251631Sdelphij enum seg_rw rw, cred_t *cr, caller_context_t *ct) 4851219089Spjd{ 4852219089Spjd znode_t *zp = VTOZ(vp); 4853219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4854219089Spjd page_t **pl0 = pl; 4855219089Spjd int err = 0; 4856219089Spjd 4857219089Spjd /* we do our own caching, faultahead is unnecessary */ 4858219089Spjd if (pl == NULL) 4859219089Spjd return (0); 4860219089Spjd else if (len > plsz) 4861219089Spjd len = plsz; 4862219089Spjd else 4863219089Spjd len = P2ROUNDUP(len, PAGESIZE); 4864219089Spjd ASSERT(plsz >= len); 4865219089Spjd 4866219089Spjd ZFS_ENTER(zfsvfs); 4867219089Spjd ZFS_VERIFY_ZP(zp); 4868219089Spjd 4869219089Spjd if (protp) 4870219089Spjd *protp = PROT_ALL; 4871219089Spjd 4872219089Spjd /* 4873219089Spjd * Loop through the requested range [off, off + len) looking 4874219089Spjd * for pages. If we don't find a page, we will need to create 4875219089Spjd * a new page and fill it with data from the file. 4876219089Spjd */ 4877219089Spjd while (len > 0) { 4878219089Spjd if (*pl = page_lookup(vp, off, SE_SHARED)) 4879219089Spjd *(pl+1) = NULL; 4880219089Spjd else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) 4881219089Spjd goto out; 4882219089Spjd while (*pl) { 4883219089Spjd ASSERT3U((*pl)->p_offset, ==, off); 4884219089Spjd off += PAGESIZE; 4885219089Spjd addr += PAGESIZE; 4886219089Spjd if (len > 0) { 4887219089Spjd ASSERT3U(len, >=, PAGESIZE); 4888219089Spjd len -= PAGESIZE; 4889219089Spjd } 4890219089Spjd ASSERT3U(plsz, >=, PAGESIZE); 4891219089Spjd plsz -= PAGESIZE; 4892219089Spjd pl++; 4893219089Spjd } 4894219089Spjd } 4895219089Spjd 4896219089Spjd /* 4897219089Spjd * Fill out the page array with any pages already in the cache. 4898219089Spjd */ 4899219089Spjd while (plsz > 0 && 4900219089Spjd (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { 4901219089Spjd off += PAGESIZE; 4902219089Spjd plsz -= PAGESIZE; 4903219089Spjd } 4904219089Spjdout: 4905219089Spjd if (err) { 4906219089Spjd /* 4907219089Spjd * Release any pages we have previously locked. 4908219089Spjd */ 4909219089Spjd while (pl > pl0) 4910219089Spjd page_unlock(*--pl); 4911219089Spjd } else { 4912219089Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4913219089Spjd } 4914219089Spjd 4915219089Spjd *pl = NULL; 4916219089Spjd 4917219089Spjd ZFS_EXIT(zfsvfs); 4918219089Spjd return (err); 4919219089Spjd} 4920219089Spjd 4921219089Spjd/* 4922219089Spjd * Request a memory map for a section of a file. This code interacts 4923219089Spjd * with common code and the VM system as follows: 4924219089Spjd * 4925251631Sdelphij * - common code calls mmap(), which ends up in smmap_common() 4926251631Sdelphij * - this calls VOP_MAP(), which takes you into (say) zfs 4927251631Sdelphij * - zfs_map() calls as_map(), passing segvn_create() as the callback 4928251631Sdelphij * - segvn_create() creates the new segment and calls VOP_ADDMAP() 4929251631Sdelphij * - zfs_addmap() updates z_mapcnt 4930219089Spjd */ 4931219089Spjd/*ARGSUSED*/ 4932219089Spjdstatic int 4933219089Spjdzfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 4934219089Spjd size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4935219089Spjd caller_context_t *ct) 4936219089Spjd{ 4937219089Spjd znode_t *zp = VTOZ(vp); 4938219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4939219089Spjd segvn_crargs_t vn_a; 4940219089Spjd int error; 4941219089Spjd 4942219089Spjd ZFS_ENTER(zfsvfs); 4943219089Spjd ZFS_VERIFY_ZP(zp); 4944219089Spjd 4945219089Spjd if ((prot & PROT_WRITE) && (zp->z_pflags & 4946219089Spjd (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) { 4947219089Spjd ZFS_EXIT(zfsvfs); 4948249195Smm return (SET_ERROR(EPERM)); 4949219089Spjd } 4950219089Spjd 4951219089Spjd if ((prot & (PROT_READ | PROT_EXEC)) && 4952219089Spjd (zp->z_pflags & ZFS_AV_QUARANTINED)) { 4953219089Spjd ZFS_EXIT(zfsvfs); 4954249195Smm return (SET_ERROR(EACCES)); 4955219089Spjd } 4956219089Spjd 4957219089Spjd if (vp->v_flag & VNOMAP) { 4958219089Spjd ZFS_EXIT(zfsvfs); 4959249195Smm return (SET_ERROR(ENOSYS)); 4960219089Spjd } 4961219089Spjd 4962219089Spjd if (off < 0 || len > MAXOFFSET_T - off) { 4963219089Spjd ZFS_EXIT(zfsvfs); 4964249195Smm return (SET_ERROR(ENXIO)); 4965219089Spjd } 4966219089Spjd 4967219089Spjd if (vp->v_type != VREG) { 4968219089Spjd ZFS_EXIT(zfsvfs); 4969249195Smm return (SET_ERROR(ENODEV)); 4970219089Spjd } 4971219089Spjd 4972219089Spjd /* 4973219089Spjd * If file is locked, disallow mapping. 4974219089Spjd */ 4975219089Spjd if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) { 4976219089Spjd ZFS_EXIT(zfsvfs); 4977249195Smm return (SET_ERROR(EAGAIN)); 4978219089Spjd } 4979219089Spjd 4980219089Spjd as_rangelock(as); 4981219089Spjd error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 4982219089Spjd if (error != 0) { 4983219089Spjd as_rangeunlock(as); 4984219089Spjd ZFS_EXIT(zfsvfs); 4985219089Spjd return (error); 4986219089Spjd } 4987219089Spjd 4988219089Spjd vn_a.vp = vp; 4989219089Spjd vn_a.offset = (u_offset_t)off; 4990219089Spjd vn_a.type = flags & MAP_TYPE; 4991219089Spjd vn_a.prot = prot; 4992219089Spjd vn_a.maxprot = maxprot; 4993219089Spjd vn_a.cred = cr; 4994219089Spjd vn_a.amp = NULL; 4995219089Spjd vn_a.flags = flags & ~MAP_TYPE; 4996219089Spjd vn_a.szc = 0; 4997219089Spjd vn_a.lgrp_mem_policy_flags = 0; 4998219089Spjd 4999219089Spjd error = as_map(as, *addrp, len, segvn_create, &vn_a); 5000219089Spjd 5001219089Spjd as_rangeunlock(as); 5002219089Spjd ZFS_EXIT(zfsvfs); 5003219089Spjd return (error); 5004219089Spjd} 5005219089Spjd 5006219089Spjd/* ARGSUSED */ 5007219089Spjdstatic int 5008219089Spjdzfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 5009219089Spjd size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 5010219089Spjd caller_context_t *ct) 5011219089Spjd{ 5012219089Spjd uint64_t pages = btopr(len); 5013219089Spjd 5014219089Spjd atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); 5015219089Spjd return (0); 5016219089Spjd} 5017219089Spjd 5018219089Spjd/* 5019219089Spjd * The reason we push dirty pages as part of zfs_delmap() is so that we get a 5020219089Spjd * more accurate mtime for the associated file. Since we don't have a way of 5021219089Spjd * detecting when the data was actually modified, we have to resort to 5022219089Spjd * heuristics. If an explicit msync() is done, then we mark the mtime when the 5023219089Spjd * last page is pushed. The problem occurs when the msync() call is omitted, 5024219089Spjd * which by far the most common case: 5025219089Spjd * 5026219089Spjd * open() 5027219089Spjd * mmap() 5028219089Spjd * <modify memory> 5029219089Spjd * munmap() 5030219089Spjd * close() 5031219089Spjd * <time lapse> 5032219089Spjd * putpage() via fsflush 5033219089Spjd * 5034219089Spjd * If we wait until fsflush to come along, we can have a modification time that 5035219089Spjd * is some arbitrary point in the future. In order to prevent this in the 5036219089Spjd * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is 5037219089Spjd * torn down. 5038219089Spjd */ 5039219089Spjd/* ARGSUSED */ 5040219089Spjdstatic int 5041219089Spjdzfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 5042219089Spjd size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, 5043219089Spjd caller_context_t *ct) 5044219089Spjd{ 5045219089Spjd uint64_t pages = btopr(len); 5046219089Spjd 5047219089Spjd ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); 5048219089Spjd atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); 5049219089Spjd 5050219089Spjd if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && 5051219089Spjd vn_has_cached_data(vp)) 5052219089Spjd (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); 5053219089Spjd 5054219089Spjd return (0); 5055219089Spjd} 5056219089Spjd 5057219089Spjd/* 5058219089Spjd * Free or allocate space in a file. Currently, this function only 5059219089Spjd * supports the `F_FREESP' command. However, this command is somewhat 5060219089Spjd * misnamed, as its functionality includes the ability to allocate as 5061219089Spjd * well as free space. 5062219089Spjd * 5063219089Spjd * IN: vp - vnode of file to free data in. 5064219089Spjd * cmd - action to take (only F_FREESP supported). 5065219089Spjd * bfp - section of file to free/alloc. 5066219089Spjd * flag - current file open mode flags. 5067219089Spjd * offset - current file offset. 5068219089Spjd * cr - credentials of caller [UNUSED]. 5069219089Spjd * ct - caller context. 5070219089Spjd * 5071251631Sdelphij * RETURN: 0 on success, error code on failure. 5072219089Spjd * 5073219089Spjd * Timestamps: 5074219089Spjd * vp - ctime|mtime updated 5075219089Spjd */ 5076219089Spjd/* ARGSUSED */ 5077219089Spjdstatic int 5078219089Spjdzfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, 5079219089Spjd offset_t offset, cred_t *cr, caller_context_t *ct) 5080219089Spjd{ 5081219089Spjd znode_t *zp = VTOZ(vp); 5082219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5083219089Spjd uint64_t off, len; 5084219089Spjd int error; 5085219089Spjd 5086219089Spjd ZFS_ENTER(zfsvfs); 5087219089Spjd ZFS_VERIFY_ZP(zp); 5088219089Spjd 5089219089Spjd if (cmd != F_FREESP) { 5090219089Spjd ZFS_EXIT(zfsvfs); 5091249195Smm return (SET_ERROR(EINVAL)); 5092219089Spjd } 5093219089Spjd 5094219089Spjd if (error = convoff(vp, bfp, 0, offset)) { 5095219089Spjd ZFS_EXIT(zfsvfs); 5096219089Spjd return (error); 5097219089Spjd } 5098219089Spjd 5099219089Spjd if (bfp->l_len < 0) { 5100219089Spjd ZFS_EXIT(zfsvfs); 5101249195Smm return (SET_ERROR(EINVAL)); 5102219089Spjd } 5103219089Spjd 5104219089Spjd off = bfp->l_start; 5105219089Spjd len = bfp->l_len; /* 0 means from off to end of file */ 5106219089Spjd 5107219089Spjd error = zfs_freesp(zp, off, len, flag, TRUE); 5108219089Spjd 5109219089Spjd ZFS_EXIT(zfsvfs); 5110219089Spjd return (error); 5111219089Spjd} 5112219089Spjd#endif /* sun */ 5113219089Spjd 5114168404SpjdCTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 5115168404SpjdCTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 5116168404Spjd 5117185029Spjd/*ARGSUSED*/ 5118168404Spjdstatic int 5119185029Spjdzfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 5120168404Spjd{ 5121168404Spjd znode_t *zp = VTOZ(vp); 5122168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5123185029Spjd uint32_t gen; 5124219089Spjd uint64_t gen64; 5125168404Spjd uint64_t object = zp->z_id; 5126168404Spjd zfid_short_t *zfid; 5127219089Spjd int size, i, error; 5128168404Spjd 5129168404Spjd ZFS_ENTER(zfsvfs); 5130185029Spjd ZFS_VERIFY_ZP(zp); 5131168404Spjd 5132219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 5133219089Spjd &gen64, sizeof (uint64_t))) != 0) { 5134219089Spjd ZFS_EXIT(zfsvfs); 5135219089Spjd return (error); 5136219089Spjd } 5137219089Spjd 5138219089Spjd gen = (uint32_t)gen64; 5139219089Spjd 5140168404Spjd size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 5141249195Smm 5142249195Smm#ifdef illumos 5143249195Smm if (fidp->fid_len < size) { 5144249195Smm fidp->fid_len = size; 5145249195Smm ZFS_EXIT(zfsvfs); 5146249195Smm return (SET_ERROR(ENOSPC)); 5147249195Smm } 5148249195Smm#else 5149168404Spjd fidp->fid_len = size; 5150249195Smm#endif 5151168404Spjd 5152168404Spjd zfid = (zfid_short_t *)fidp; 5153168404Spjd 5154168404Spjd zfid->zf_len = size; 5155168404Spjd 5156168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 5157168404Spjd zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 5158168404Spjd 5159168404Spjd /* Must have a non-zero generation number to distinguish from .zfs */ 5160168404Spjd if (gen == 0) 5161168404Spjd gen = 1; 5162168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 5163168404Spjd zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 5164168404Spjd 5165168404Spjd if (size == LONG_FID_LEN) { 5166168404Spjd uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 5167169023Spjd zfid_long_t *zlfid; 5168168404Spjd 5169168404Spjd zlfid = (zfid_long_t *)fidp; 5170168404Spjd 5171168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 5172168404Spjd zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 5173168404Spjd 5174168404Spjd /* XXX - this should be the generation number for the objset */ 5175168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 5176168404Spjd zlfid->zf_setgen[i] = 0; 5177168404Spjd } 5178168404Spjd 5179168404Spjd ZFS_EXIT(zfsvfs); 5180168404Spjd return (0); 5181168404Spjd} 5182168404Spjd 5183168404Spjdstatic int 5184185029Spjdzfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 5185185029Spjd caller_context_t *ct) 5186168404Spjd{ 5187168404Spjd znode_t *zp, *xzp; 5188168404Spjd zfsvfs_t *zfsvfs; 5189168404Spjd zfs_dirlock_t *dl; 5190168404Spjd int error; 5191168404Spjd 5192168404Spjd switch (cmd) { 5193168404Spjd case _PC_LINK_MAX: 5194168404Spjd *valp = INT_MAX; 5195168404Spjd return (0); 5196168404Spjd 5197168404Spjd case _PC_FILESIZEBITS: 5198168404Spjd *valp = 64; 5199168404Spjd return (0); 5200219089Spjd#ifdef sun 5201168404Spjd case _PC_XATTR_EXISTS: 5202168404Spjd zp = VTOZ(vp); 5203168404Spjd zfsvfs = zp->z_zfsvfs; 5204168404Spjd ZFS_ENTER(zfsvfs); 5205185029Spjd ZFS_VERIFY_ZP(zp); 5206168404Spjd *valp = 0; 5207168404Spjd error = zfs_dirent_lock(&dl, zp, "", &xzp, 5208185029Spjd ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 5209168404Spjd if (error == 0) { 5210168404Spjd zfs_dirent_unlock(dl); 5211168404Spjd if (!zfs_dirempty(xzp)) 5212168404Spjd *valp = 1; 5213168404Spjd VN_RELE(ZTOV(xzp)); 5214168404Spjd } else if (error == ENOENT) { 5215168404Spjd /* 5216168404Spjd * If there aren't extended attributes, it's the 5217168404Spjd * same as having zero of them. 5218168404Spjd */ 5219168404Spjd error = 0; 5220168404Spjd } 5221168404Spjd ZFS_EXIT(zfsvfs); 5222168404Spjd return (error); 5223168404Spjd 5224219089Spjd case _PC_SATTR_ENABLED: 5225219089Spjd case _PC_SATTR_EXISTS: 5226219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 5227219089Spjd (vp->v_type == VREG || vp->v_type == VDIR); 5228219089Spjd return (0); 5229219089Spjd 5230219089Spjd case _PC_ACCESS_FILTERING: 5231219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 5232219089Spjd vp->v_type == VDIR; 5233219089Spjd return (0); 5234219089Spjd 5235219089Spjd case _PC_ACL_ENABLED: 5236219089Spjd *valp = _ACL_ACE_ENABLED; 5237219089Spjd return (0); 5238219089Spjd#endif /* sun */ 5239219089Spjd case _PC_MIN_HOLE_SIZE: 5240219089Spjd *valp = (int)SPA_MINBLOCKSIZE; 5241219089Spjd return (0); 5242219089Spjd#ifdef sun 5243219089Spjd case _PC_TIMESTAMP_RESOLUTION: 5244219089Spjd /* nanosecond timestamp resolution */ 5245219089Spjd *valp = 1L; 5246219089Spjd return (0); 5247219089Spjd#endif /* sun */ 5248168404Spjd case _PC_ACL_EXTENDED: 5249196949Strasz *valp = 0; 5250168404Spjd return (0); 5251168404Spjd 5252196949Strasz case _PC_ACL_NFS4: 5253196949Strasz *valp = 1; 5254196949Strasz return (0); 5255196949Strasz 5256196949Strasz case _PC_ACL_PATH_MAX: 5257196949Strasz *valp = ACL_MAX_ENTRIES; 5258196949Strasz return (0); 5259196949Strasz 5260168404Spjd default: 5261168962Spjd return (EOPNOTSUPP); 5262168404Spjd } 5263168404Spjd} 5264168404Spjd 5265168404Spjd/*ARGSUSED*/ 5266168404Spjdstatic int 5267185029Spjdzfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5268185029Spjd caller_context_t *ct) 5269168404Spjd{ 5270168404Spjd znode_t *zp = VTOZ(vp); 5271168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5272168404Spjd int error; 5273185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5274168404Spjd 5275168404Spjd ZFS_ENTER(zfsvfs); 5276185029Spjd ZFS_VERIFY_ZP(zp); 5277185029Spjd error = zfs_getacl(zp, vsecp, skipaclchk, cr); 5278168404Spjd ZFS_EXIT(zfsvfs); 5279168404Spjd 5280168404Spjd return (error); 5281168404Spjd} 5282168404Spjd 5283168404Spjd/*ARGSUSED*/ 5284228685Spjdint 5285185029Spjdzfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5286185029Spjd caller_context_t *ct) 5287168404Spjd{ 5288168404Spjd znode_t *zp = VTOZ(vp); 5289168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5290168404Spjd int error; 5291185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5292219089Spjd zilog_t *zilog = zfsvfs->z_log; 5293168404Spjd 5294168404Spjd ZFS_ENTER(zfsvfs); 5295185029Spjd ZFS_VERIFY_ZP(zp); 5296219089Spjd 5297185029Spjd error = zfs_setacl(zp, vsecp, skipaclchk, cr); 5298219089Spjd 5299219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5300219089Spjd zil_commit(zilog, 0); 5301219089Spjd 5302168404Spjd ZFS_EXIT(zfsvfs); 5303168404Spjd return (error); 5304168404Spjd} 5305168404Spjd 5306219089Spjd#ifdef sun 5307219089Spjd/* 5308251631Sdelphij * The smallest read we may consider to loan out an arcbuf. 5309251631Sdelphij * This must be a power of 2. 5310219089Spjd */ 5311219089Spjdint zcr_blksz_min = (1 << 10); /* 1K */ 5312251631Sdelphij/* 5313251631Sdelphij * If set to less than the file block size, allow loaning out of an 5314251631Sdelphij * arcbuf for a partial block read. This must be a power of 2. 5315251631Sdelphij */ 5316219089Spjdint zcr_blksz_max = (1 << 17); /* 128K */ 5317219089Spjd 5318219089Spjd/*ARGSUSED*/ 5319168962Spjdstatic int 5320219089Spjdzfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, 5321219089Spjd caller_context_t *ct) 5322219089Spjd{ 5323219089Spjd znode_t *zp = VTOZ(vp); 5324219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5325219089Spjd int max_blksz = zfsvfs->z_max_blksz; 5326219089Spjd uio_t *uio = &xuio->xu_uio; 5327219089Spjd ssize_t size = uio->uio_resid; 5328219089Spjd offset_t offset = uio->uio_loffset; 5329219089Spjd int blksz; 5330219089Spjd int fullblk, i; 5331219089Spjd arc_buf_t *abuf; 5332219089Spjd ssize_t maxsize; 5333219089Spjd int preamble, postamble; 5334219089Spjd 5335219089Spjd if (xuio->xu_type != UIOTYPE_ZEROCOPY) 5336249195Smm return (SET_ERROR(EINVAL)); 5337219089Spjd 5338219089Spjd ZFS_ENTER(zfsvfs); 5339219089Spjd ZFS_VERIFY_ZP(zp); 5340219089Spjd switch (ioflag) { 5341219089Spjd case UIO_WRITE: 5342219089Spjd /* 5343219089Spjd * Loan out an arc_buf for write if write size is bigger than 5344219089Spjd * max_blksz, and the file's block size is also max_blksz. 5345219089Spjd */ 5346219089Spjd blksz = max_blksz; 5347219089Spjd if (size < blksz || zp->z_blksz != blksz) { 5348219089Spjd ZFS_EXIT(zfsvfs); 5349249195Smm return (SET_ERROR(EINVAL)); 5350219089Spjd } 5351219089Spjd /* 5352219089Spjd * Caller requests buffers for write before knowing where the 5353219089Spjd * write offset might be (e.g. NFS TCP write). 5354219089Spjd */ 5355219089Spjd if (offset == -1) { 5356219089Spjd preamble = 0; 5357219089Spjd } else { 5358219089Spjd preamble = P2PHASE(offset, blksz); 5359219089Spjd if (preamble) { 5360219089Spjd preamble = blksz - preamble; 5361219089Spjd size -= preamble; 5362219089Spjd } 5363219089Spjd } 5364219089Spjd 5365219089Spjd postamble = P2PHASE(size, blksz); 5366219089Spjd size -= postamble; 5367219089Spjd 5368219089Spjd fullblk = size / blksz; 5369219089Spjd (void) dmu_xuio_init(xuio, 5370219089Spjd (preamble != 0) + fullblk + (postamble != 0)); 5371219089Spjd DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble, 5372219089Spjd int, postamble, int, 5373219089Spjd (preamble != 0) + fullblk + (postamble != 0)); 5374219089Spjd 5375219089Spjd /* 5376219089Spjd * Have to fix iov base/len for partial buffers. They 5377219089Spjd * currently represent full arc_buf's. 5378219089Spjd */ 5379219089Spjd if (preamble) { 5380219089Spjd /* data begins in the middle of the arc_buf */ 5381219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5382219089Spjd blksz); 5383219089Spjd ASSERT(abuf); 5384219089Spjd (void) dmu_xuio_add(xuio, abuf, 5385219089Spjd blksz - preamble, preamble); 5386219089Spjd } 5387219089Spjd 5388219089Spjd for (i = 0; i < fullblk; i++) { 5389219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5390219089Spjd blksz); 5391219089Spjd ASSERT(abuf); 5392219089Spjd (void) dmu_xuio_add(xuio, abuf, 0, blksz); 5393219089Spjd } 5394219089Spjd 5395219089Spjd if (postamble) { 5396219089Spjd /* data ends in the middle of the arc_buf */ 5397219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5398219089Spjd blksz); 5399219089Spjd ASSERT(abuf); 5400219089Spjd (void) dmu_xuio_add(xuio, abuf, 0, postamble); 5401219089Spjd } 5402219089Spjd break; 5403219089Spjd case UIO_READ: 5404219089Spjd /* 5405219089Spjd * Loan out an arc_buf for read if the read size is larger than 5406219089Spjd * the current file block size. Block alignment is not 5407219089Spjd * considered. Partial arc_buf will be loaned out for read. 5408219089Spjd */ 5409219089Spjd blksz = zp->z_blksz; 5410219089Spjd if (blksz < zcr_blksz_min) 5411219089Spjd blksz = zcr_blksz_min; 5412219089Spjd if (blksz > zcr_blksz_max) 5413219089Spjd blksz = zcr_blksz_max; 5414219089Spjd /* avoid potential complexity of dealing with it */ 5415219089Spjd if (blksz > max_blksz) { 5416219089Spjd ZFS_EXIT(zfsvfs); 5417249195Smm return (SET_ERROR(EINVAL)); 5418219089Spjd } 5419219089Spjd 5420219089Spjd maxsize = zp->z_size - uio->uio_loffset; 5421219089Spjd if (size > maxsize) 5422219089Spjd size = maxsize; 5423219089Spjd 5424219089Spjd if (size < blksz || vn_has_cached_data(vp)) { 5425219089Spjd ZFS_EXIT(zfsvfs); 5426249195Smm return (SET_ERROR(EINVAL)); 5427219089Spjd } 5428219089Spjd break; 5429219089Spjd default: 5430219089Spjd ZFS_EXIT(zfsvfs); 5431249195Smm return (SET_ERROR(EINVAL)); 5432219089Spjd } 5433219089Spjd 5434219089Spjd uio->uio_extflg = UIO_XUIO; 5435219089Spjd XUIO_XUZC_RW(xuio) = ioflag; 5436219089Spjd ZFS_EXIT(zfsvfs); 5437219089Spjd return (0); 5438219089Spjd} 5439219089Spjd 5440219089Spjd/*ARGSUSED*/ 5441219089Spjdstatic int 5442219089Spjdzfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) 5443219089Spjd{ 5444219089Spjd int i; 5445219089Spjd arc_buf_t *abuf; 5446219089Spjd int ioflag = XUIO_XUZC_RW(xuio); 5447219089Spjd 5448219089Spjd ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); 5449219089Spjd 5450219089Spjd i = dmu_xuio_cnt(xuio); 5451219089Spjd while (i-- > 0) { 5452219089Spjd abuf = dmu_xuio_arcbuf(xuio, i); 5453219089Spjd /* 5454219089Spjd * if abuf == NULL, it must be a write buffer 5455219089Spjd * that has been returned in zfs_write(). 5456219089Spjd */ 5457219089Spjd if (abuf) 5458219089Spjd dmu_return_arcbuf(abuf); 5459219089Spjd ASSERT(abuf || ioflag == UIO_WRITE); 5460219089Spjd } 5461219089Spjd 5462219089Spjd dmu_xuio_fini(xuio); 5463219089Spjd return (0); 5464219089Spjd} 5465219089Spjd 5466219089Spjd/* 5467219089Spjd * Predeclare these here so that the compiler assumes that 5468219089Spjd * this is an "old style" function declaration that does 5469219089Spjd * not include arguments => we won't get type mismatch errors 5470219089Spjd * in the initializations that follow. 5471219089Spjd */ 5472219089Spjdstatic int zfs_inval(); 5473219089Spjdstatic int zfs_isdir(); 5474219089Spjd 5475219089Spjdstatic int 5476219089Spjdzfs_inval() 5477219089Spjd{ 5478249195Smm return (SET_ERROR(EINVAL)); 5479219089Spjd} 5480219089Spjd 5481219089Spjdstatic int 5482219089Spjdzfs_isdir() 5483219089Spjd{ 5484249195Smm return (SET_ERROR(EISDIR)); 5485219089Spjd} 5486219089Spjd/* 5487219089Spjd * Directory vnode operations template 5488219089Spjd */ 5489219089Spjdvnodeops_t *zfs_dvnodeops; 5490219089Spjdconst fs_operation_def_t zfs_dvnodeops_template[] = { 5491219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5492219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5493219089Spjd VOPNAME_READ, { .error = zfs_isdir }, 5494219089Spjd VOPNAME_WRITE, { .error = zfs_isdir }, 5495219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5496219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5497219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5498219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5499219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5500219089Spjd VOPNAME_CREATE, { .vop_create = zfs_create }, 5501219089Spjd VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5502219089Spjd VOPNAME_LINK, { .vop_link = zfs_link }, 5503219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5504219089Spjd VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, 5505219089Spjd VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5506219089Spjd VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5507219089Spjd VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, 5508219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5509219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5510219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5511219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5512219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5513219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5514219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5515219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5516219089Spjd NULL, NULL 5517219089Spjd}; 5518219089Spjd 5519219089Spjd/* 5520219089Spjd * Regular file vnode operations template 5521219089Spjd */ 5522219089Spjdvnodeops_t *zfs_fvnodeops; 5523219089Spjdconst fs_operation_def_t zfs_fvnodeops_template[] = { 5524219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5525219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5526219089Spjd VOPNAME_READ, { .vop_read = zfs_read }, 5527219089Spjd VOPNAME_WRITE, { .vop_write = zfs_write }, 5528219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5529219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5530219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5531219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5532219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5533219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5534219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5535219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5536219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5537219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5538219089Spjd VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, 5539219089Spjd VOPNAME_SPACE, { .vop_space = zfs_space }, 5540219089Spjd VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, 5541219089Spjd VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, 5542219089Spjd VOPNAME_MAP, { .vop_map = zfs_map }, 5543219089Spjd VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, 5544219089Spjd VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, 5545219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5546219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5547219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5548219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5549219089Spjd VOPNAME_REQZCBUF, { .vop_reqzcbuf = zfs_reqzcbuf }, 5550219089Spjd VOPNAME_RETZCBUF, { .vop_retzcbuf = zfs_retzcbuf }, 5551219089Spjd NULL, NULL 5552219089Spjd}; 5553219089Spjd 5554219089Spjd/* 5555219089Spjd * Symbolic link vnode operations template 5556219089Spjd */ 5557219089Spjdvnodeops_t *zfs_symvnodeops; 5558219089Spjdconst fs_operation_def_t zfs_symvnodeops_template[] = { 5559219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5560219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5561219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5562219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5563219089Spjd VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, 5564219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5565219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5566219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5567219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5568219089Spjd NULL, NULL 5569219089Spjd}; 5570219089Spjd 5571219089Spjd/* 5572219089Spjd * special share hidden files vnode operations template 5573219089Spjd */ 5574219089Spjdvnodeops_t *zfs_sharevnodeops; 5575219089Spjdconst fs_operation_def_t zfs_sharevnodeops_template[] = { 5576219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5577219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5578219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5579219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5580219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5581219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5582219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5583219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5584219089Spjd NULL, NULL 5585219089Spjd}; 5586219089Spjd 5587219089Spjd/* 5588219089Spjd * Extended attribute directory vnode operations template 5589251631Sdelphij * 5590251631Sdelphij * This template is identical to the directory vnodes 5591251631Sdelphij * operation template except for restricted operations: 5592251631Sdelphij * VOP_MKDIR() 5593251631Sdelphij * VOP_SYMLINK() 5594251631Sdelphij * 5595219089Spjd * Note that there are other restrictions embedded in: 5596219089Spjd * zfs_create() - restrict type to VREG 5597219089Spjd * zfs_link() - no links into/out of attribute space 5598219089Spjd * zfs_rename() - no moves into/out of attribute space 5599219089Spjd */ 5600219089Spjdvnodeops_t *zfs_xdvnodeops; 5601219089Spjdconst fs_operation_def_t zfs_xdvnodeops_template[] = { 5602219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5603219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5604219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5605219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5606219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5607219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5608219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5609219089Spjd VOPNAME_CREATE, { .vop_create = zfs_create }, 5610219089Spjd VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5611219089Spjd VOPNAME_LINK, { .vop_link = zfs_link }, 5612219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5613219089Spjd VOPNAME_MKDIR, { .error = zfs_inval }, 5614219089Spjd VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5615219089Spjd VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5616219089Spjd VOPNAME_SYMLINK, { .error = zfs_inval }, 5617219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5618219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5619219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5620219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5621219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5622219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5623219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5624219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5625219089Spjd NULL, NULL 5626219089Spjd}; 5627219089Spjd 5628219089Spjd/* 5629219089Spjd * Error vnode operations template 5630219089Spjd */ 5631219089Spjdvnodeops_t *zfs_evnodeops; 5632219089Spjdconst fs_operation_def_t zfs_evnodeops_template[] = { 5633219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5634219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5635219089Spjd NULL, NULL 5636219089Spjd}; 5637219089Spjd#endif /* sun */ 5638219089Spjd 5639219089Spjdstatic int 5640213673Spjdioflags(int ioflags) 5641213673Spjd{ 5642213673Spjd int flags = 0; 5643213673Spjd 5644213673Spjd if (ioflags & IO_APPEND) 5645213673Spjd flags |= FAPPEND; 5646213673Spjd if (ioflags & IO_NDELAY) 5647213673Spjd flags |= FNONBLOCK; 5648213673Spjd if (ioflags & IO_SYNC) 5649213673Spjd flags |= (FSYNC | FDSYNC | FRSYNC); 5650213673Spjd 5651213673Spjd return (flags); 5652213673Spjd} 5653213673Spjd 5654213673Spjdstatic int 5655213937Savgzfs_getpages(struct vnode *vp, vm_page_t *m, int count, int reqpage) 5656213937Savg{ 5657213937Savg znode_t *zp = VTOZ(vp); 5658213937Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5659213937Savg objset_t *os = zp->z_zfsvfs->z_os; 5660243517Savg vm_page_t mfirst, mlast, mreq; 5661213937Savg vm_object_t object; 5662213937Savg caddr_t va; 5663213937Savg struct sf_buf *sf; 5664243517Savg off_t startoff, endoff; 5665213937Savg int i, error; 5666243517Savg vm_pindex_t reqstart, reqend; 5667243517Savg int pcount, lsize, reqsize, size; 5668213937Savg 5669213937Savg ZFS_ENTER(zfsvfs); 5670213937Savg ZFS_VERIFY_ZP(zp); 5671213937Savg 5672243517Savg pcount = OFF_TO_IDX(round_page(count)); 5673213937Savg mreq = m[reqpage]; 5674213937Savg object = mreq->object; 5675213937Savg error = 0; 5676213937Savg 5677213937Savg KASSERT(vp->v_object == object, ("mismatching object")); 5678213937Savg 5679243517Savg if (pcount > 1 && zp->z_blksz > PAGESIZE) { 5680243517Savg startoff = rounddown(IDX_TO_OFF(mreq->pindex), zp->z_blksz); 5681243517Savg reqstart = OFF_TO_IDX(round_page(startoff)); 5682243517Savg if (reqstart < m[0]->pindex) 5683243517Savg reqstart = 0; 5684243517Savg else 5685243517Savg reqstart = reqstart - m[0]->pindex; 5686243517Savg endoff = roundup(IDX_TO_OFF(mreq->pindex) + PAGE_SIZE, 5687243517Savg zp->z_blksz); 5688243517Savg reqend = OFF_TO_IDX(trunc_page(endoff)) - 1; 5689243517Savg if (reqend > m[pcount - 1]->pindex) 5690243517Savg reqend = m[pcount - 1]->pindex; 5691243517Savg reqsize = reqend - m[reqstart]->pindex + 1; 5692243517Savg KASSERT(reqstart <= reqpage && reqpage < reqstart + reqsize, 5693243517Savg ("reqpage beyond [reqstart, reqstart + reqsize[ bounds")); 5694243517Savg } else { 5695243517Savg reqstart = reqpage; 5696243517Savg reqsize = 1; 5697243517Savg } 5698243517Savg mfirst = m[reqstart]; 5699243517Savg mlast = m[reqstart + reqsize - 1]; 5700243517Savg 5701248084Sattilio zfs_vmobject_wlock(object); 5702213937Savg 5703243517Savg for (i = 0; i < reqstart; i++) { 5704243517Savg vm_page_lock(m[i]); 5705243517Savg vm_page_free(m[i]); 5706243517Savg vm_page_unlock(m[i]); 5707213937Savg } 5708243517Savg for (i = reqstart + reqsize; i < pcount; i++) { 5709243517Savg vm_page_lock(m[i]); 5710243517Savg vm_page_free(m[i]); 5711243517Savg vm_page_unlock(m[i]); 5712243517Savg } 5713213937Savg 5714243517Savg if (mreq->valid && reqsize == 1) { 5715213937Savg if (mreq->valid != VM_PAGE_BITS_ALL) 5716213937Savg vm_page_zero_invalid(mreq, TRUE); 5717248084Sattilio zfs_vmobject_wunlock(object); 5718213937Savg ZFS_EXIT(zfsvfs); 5719248084Sattilio return (zfs_vm_pagerret_ok); 5720213937Savg } 5721213937Savg 5722213937Savg PCPU_INC(cnt.v_vnodein); 5723243517Savg PCPU_ADD(cnt.v_vnodepgsin, reqsize); 5724213937Savg 5725213937Savg if (IDX_TO_OFF(mreq->pindex) >= object->un_pager.vnp.vnp_size) { 5726243517Savg for (i = reqstart; i < reqstart + reqsize; i++) { 5727243517Savg if (i != reqpage) { 5728243517Savg vm_page_lock(m[i]); 5729243517Savg vm_page_free(m[i]); 5730243517Savg vm_page_unlock(m[i]); 5731243517Savg } 5732243517Savg } 5733248084Sattilio zfs_vmobject_wunlock(object); 5734213937Savg ZFS_EXIT(zfsvfs); 5735248084Sattilio return (zfs_vm_pagerret_bad); 5736213937Savg } 5737213937Savg 5738243517Savg lsize = PAGE_SIZE; 5739243517Savg if (IDX_TO_OFF(mlast->pindex) + lsize > object->un_pager.vnp.vnp_size) 5740243517Savg lsize = object->un_pager.vnp.vnp_size - IDX_TO_OFF(mlast->pindex); 5741213937Savg 5742248084Sattilio zfs_vmobject_wunlock(object); 5743243517Savg 5744243517Savg for (i = reqstart; i < reqstart + reqsize; i++) { 5745243517Savg size = PAGE_SIZE; 5746243517Savg if (i == (reqstart + reqsize - 1)) 5747243517Savg size = lsize; 5748243517Savg va = zfs_map_page(m[i], &sf); 5749243517Savg error = dmu_read(os, zp->z_id, IDX_TO_OFF(m[i]->pindex), 5750243517Savg size, va, DMU_READ_PREFETCH); 5751243517Savg if (size != PAGE_SIZE) 5752243517Savg bzero(va + size, PAGE_SIZE - size); 5753243517Savg zfs_unmap_page(sf); 5754243517Savg if (error != 0) 5755243517Savg break; 5756243517Savg } 5757243517Savg 5758248084Sattilio zfs_vmobject_wlock(object); 5759213937Savg 5760243517Savg for (i = reqstart; i < reqstart + reqsize; i++) { 5761243763Savg if (!error) 5762243763Savg m[i]->valid = VM_PAGE_BITS_ALL; 5763243517Savg KASSERT(m[i]->dirty == 0, ("zfs_getpages: page %p is dirty", m[i])); 5764243763Savg if (i != reqpage) 5765243763Savg vm_page_readahead_finish(m[i]); 5766243517Savg } 5767243517Savg 5768248084Sattilio zfs_vmobject_wunlock(object); 5769213937Savg 5770213937Savg ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 5771213937Savg ZFS_EXIT(zfsvfs); 5772248084Sattilio return (error ? zfs_vm_pagerret_error : zfs_vm_pagerret_ok); 5773213937Savg} 5774213937Savg 5775213937Savgstatic int 5776213937Savgzfs_freebsd_getpages(ap) 5777213937Savg struct vop_getpages_args /* { 5778213937Savg struct vnode *a_vp; 5779213937Savg vm_page_t *a_m; 5780213937Savg int a_count; 5781213937Savg int a_reqpage; 5782213937Savg vm_ooffset_t a_offset; 5783213937Savg } */ *ap; 5784213937Savg{ 5785213937Savg 5786213937Savg return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage)); 5787213937Savg} 5788213937Savg 5789213937Savgstatic int 5790243518Savgzfs_freebsd_bmap(ap) 5791243518Savg struct vop_bmap_args /* { 5792243518Savg struct vnode *a_vp; 5793243518Savg daddr_t a_bn; 5794243518Savg struct bufobj **a_bop; 5795243518Savg daddr_t *a_bnp; 5796243518Savg int *a_runp; 5797243518Savg int *a_runb; 5798243518Savg } */ *ap; 5799243518Savg{ 5800243518Savg 5801243518Savg if (ap->a_bop != NULL) 5802243518Savg *ap->a_bop = &ap->a_vp->v_bufobj; 5803243518Savg if (ap->a_bnp != NULL) 5804243518Savg *ap->a_bnp = ap->a_bn; 5805243518Savg if (ap->a_runp != NULL) 5806243518Savg *ap->a_runp = 0; 5807243518Savg if (ap->a_runb != NULL) 5808243518Savg *ap->a_runb = 0; 5809243518Savg 5810243518Savg return (0); 5811243518Savg} 5812243518Savg 5813243518Savgstatic int 5814168962Spjdzfs_freebsd_open(ap) 5815168962Spjd struct vop_open_args /* { 5816168962Spjd struct vnode *a_vp; 5817168962Spjd int a_mode; 5818168962Spjd struct ucred *a_cred; 5819168962Spjd struct thread *a_td; 5820168962Spjd } */ *ap; 5821168962Spjd{ 5822168962Spjd vnode_t *vp = ap->a_vp; 5823168962Spjd znode_t *zp = VTOZ(vp); 5824168962Spjd int error; 5825168962Spjd 5826185029Spjd error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); 5827168962Spjd if (error == 0) 5828219089Spjd vnode_create_vobject(vp, zp->z_size, ap->a_td); 5829168962Spjd return (error); 5830168962Spjd} 5831168962Spjd 5832168962Spjdstatic int 5833168962Spjdzfs_freebsd_close(ap) 5834168962Spjd struct vop_close_args /* { 5835168962Spjd struct vnode *a_vp; 5836168962Spjd int a_fflag; 5837168962Spjd struct ucred *a_cred; 5838168962Spjd struct thread *a_td; 5839168962Spjd } */ *ap; 5840168962Spjd{ 5841168962Spjd 5842242566Savg return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred, NULL)); 5843168962Spjd} 5844168962Spjd 5845168962Spjdstatic int 5846168962Spjdzfs_freebsd_ioctl(ap) 5847168962Spjd struct vop_ioctl_args /* { 5848168962Spjd struct vnode *a_vp; 5849168962Spjd u_long a_command; 5850168962Spjd caddr_t a_data; 5851168962Spjd int a_fflag; 5852168962Spjd struct ucred *cred; 5853168962Spjd struct thread *td; 5854168962Spjd } */ *ap; 5855168962Spjd{ 5856168962Spjd 5857168978Spjd return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 5858185029Spjd ap->a_fflag, ap->a_cred, NULL, NULL)); 5859168962Spjd} 5860168962Spjd 5861168962Spjdstatic int 5862168962Spjdzfs_freebsd_read(ap) 5863168962Spjd struct vop_read_args /* { 5864168962Spjd struct vnode *a_vp; 5865168962Spjd struct uio *a_uio; 5866168962Spjd int a_ioflag; 5867168962Spjd struct ucred *a_cred; 5868168962Spjd } */ *ap; 5869168962Spjd{ 5870168962Spjd 5871213673Spjd return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 5872213673Spjd ap->a_cred, NULL)); 5873168962Spjd} 5874168962Spjd 5875168962Spjdstatic int 5876168962Spjdzfs_freebsd_write(ap) 5877168962Spjd struct vop_write_args /* { 5878168962Spjd struct vnode *a_vp; 5879168962Spjd struct uio *a_uio; 5880168962Spjd int a_ioflag; 5881168962Spjd struct ucred *a_cred; 5882168962Spjd } */ *ap; 5883168962Spjd{ 5884168962Spjd 5885213673Spjd return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 5886213673Spjd ap->a_cred, NULL)); 5887168962Spjd} 5888168962Spjd 5889168962Spjdstatic int 5890168962Spjdzfs_freebsd_access(ap) 5891168962Spjd struct vop_access_args /* { 5892168962Spjd struct vnode *a_vp; 5893192689Strasz accmode_t a_accmode; 5894168962Spjd struct ucred *a_cred; 5895168962Spjd struct thread *a_td; 5896168962Spjd } */ *ap; 5897168962Spjd{ 5898212002Sjh vnode_t *vp = ap->a_vp; 5899212002Sjh znode_t *zp = VTOZ(vp); 5900198703Spjd accmode_t accmode; 5901198703Spjd int error = 0; 5902168962Spjd 5903185172Spjd /* 5904198703Spjd * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 5905185172Spjd */ 5906198703Spjd accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 5907198703Spjd if (accmode != 0) 5908198703Spjd error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL); 5909185172Spjd 5910198703Spjd /* 5911198703Spjd * VADMIN has to be handled by vaccess(). 5912198703Spjd */ 5913198703Spjd if (error == 0) { 5914198703Spjd accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 5915198703Spjd if (accmode != 0) { 5916219089Spjd error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 5917219089Spjd zp->z_gid, accmode, ap->a_cred, NULL); 5918198703Spjd } 5919185172Spjd } 5920185172Spjd 5921212002Sjh /* 5922212002Sjh * For VEXEC, ensure that at least one execute bit is set for 5923212002Sjh * non-directories. 5924212002Sjh */ 5925212002Sjh if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 5926219089Spjd (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 5927212002Sjh error = EACCES; 5928219089Spjd } 5929212002Sjh 5930198703Spjd return (error); 5931168962Spjd} 5932168962Spjd 5933168962Spjdstatic int 5934168962Spjdzfs_freebsd_lookup(ap) 5935168962Spjd struct vop_lookup_args /* { 5936168962Spjd struct vnode *a_dvp; 5937168962Spjd struct vnode **a_vpp; 5938168962Spjd struct componentname *a_cnp; 5939168962Spjd } */ *ap; 5940168962Spjd{ 5941168962Spjd struct componentname *cnp = ap->a_cnp; 5942168962Spjd char nm[NAME_MAX + 1]; 5943168962Spjd 5944168962Spjd ASSERT(cnp->cn_namelen < sizeof(nm)); 5945168962Spjd strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 5946168962Spjd 5947168962Spjd return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 5948185029Spjd cnp->cn_cred, cnp->cn_thread, 0)); 5949168962Spjd} 5950168962Spjd 5951168962Spjdstatic int 5952168962Spjdzfs_freebsd_create(ap) 5953168962Spjd struct vop_create_args /* { 5954168962Spjd struct vnode *a_dvp; 5955168962Spjd struct vnode **a_vpp; 5956168962Spjd struct componentname *a_cnp; 5957168962Spjd struct vattr *a_vap; 5958168962Spjd } */ *ap; 5959168962Spjd{ 5960168962Spjd struct componentname *cnp = ap->a_cnp; 5961168962Spjd vattr_t *vap = ap->a_vap; 5962168962Spjd int mode; 5963168962Spjd 5964168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5965168962Spjd 5966168962Spjd vattr_init_mask(vap); 5967168962Spjd mode = vap->va_mode & ALLPERMS; 5968168962Spjd 5969168962Spjd return (zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 5970185029Spjd ap->a_vpp, cnp->cn_cred, cnp->cn_thread)); 5971168962Spjd} 5972168962Spjd 5973168962Spjdstatic int 5974168962Spjdzfs_freebsd_remove(ap) 5975168962Spjd struct vop_remove_args /* { 5976168962Spjd struct vnode *a_dvp; 5977168962Spjd struct vnode *a_vp; 5978168962Spjd struct componentname *a_cnp; 5979168962Spjd } */ *ap; 5980168962Spjd{ 5981168962Spjd 5982168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 5983168962Spjd 5984168962Spjd return (zfs_remove(ap->a_dvp, ap->a_cnp->cn_nameptr, 5985185029Spjd ap->a_cnp->cn_cred, NULL, 0)); 5986168962Spjd} 5987168962Spjd 5988168962Spjdstatic int 5989168962Spjdzfs_freebsd_mkdir(ap) 5990168962Spjd struct vop_mkdir_args /* { 5991168962Spjd struct vnode *a_dvp; 5992168962Spjd struct vnode **a_vpp; 5993168962Spjd struct componentname *a_cnp; 5994168962Spjd struct vattr *a_vap; 5995168962Spjd } */ *ap; 5996168962Spjd{ 5997168962Spjd vattr_t *vap = ap->a_vap; 5998168962Spjd 5999168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 6000168962Spjd 6001168962Spjd vattr_init_mask(vap); 6002168962Spjd 6003168962Spjd return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 6004185029Spjd ap->a_cnp->cn_cred, NULL, 0, NULL)); 6005168962Spjd} 6006168962Spjd 6007168962Spjdstatic int 6008168962Spjdzfs_freebsd_rmdir(ap) 6009168962Spjd struct vop_rmdir_args /* { 6010168962Spjd struct vnode *a_dvp; 6011168962Spjd struct vnode *a_vp; 6012168962Spjd struct componentname *a_cnp; 6013168962Spjd } */ *ap; 6014168962Spjd{ 6015168962Spjd struct componentname *cnp = ap->a_cnp; 6016168962Spjd 6017168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6018168962Spjd 6019185029Spjd return (zfs_rmdir(ap->a_dvp, cnp->cn_nameptr, NULL, cnp->cn_cred, NULL, 0)); 6020168962Spjd} 6021168962Spjd 6022168962Spjdstatic int 6023168962Spjdzfs_freebsd_readdir(ap) 6024168962Spjd struct vop_readdir_args /* { 6025168962Spjd struct vnode *a_vp; 6026168962Spjd struct uio *a_uio; 6027168962Spjd struct ucred *a_cred; 6028168962Spjd int *a_eofflag; 6029168962Spjd int *a_ncookies; 6030168962Spjd u_long **a_cookies; 6031168962Spjd } */ *ap; 6032168962Spjd{ 6033168962Spjd 6034168962Spjd return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 6035168962Spjd ap->a_ncookies, ap->a_cookies)); 6036168962Spjd} 6037168962Spjd 6038168962Spjdstatic int 6039168962Spjdzfs_freebsd_fsync(ap) 6040168962Spjd struct vop_fsync_args /* { 6041168962Spjd struct vnode *a_vp; 6042168962Spjd int a_waitfor; 6043168962Spjd struct thread *a_td; 6044168962Spjd } */ *ap; 6045168962Spjd{ 6046168962Spjd 6047168962Spjd vop_stdfsync(ap); 6048185029Spjd return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); 6049168962Spjd} 6050168962Spjd 6051168962Spjdstatic int 6052168962Spjdzfs_freebsd_getattr(ap) 6053168962Spjd struct vop_getattr_args /* { 6054168962Spjd struct vnode *a_vp; 6055168962Spjd struct vattr *a_vap; 6056168962Spjd struct ucred *a_cred; 6057168962Spjd } */ *ap; 6058168962Spjd{ 6059185029Spjd vattr_t *vap = ap->a_vap; 6060185029Spjd xvattr_t xvap; 6061185029Spjd u_long fflags = 0; 6062185029Spjd int error; 6063168962Spjd 6064185029Spjd xva_init(&xvap); 6065185029Spjd xvap.xva_vattr = *vap; 6066185029Spjd xvap.xva_vattr.va_mask |= AT_XVATTR; 6067185029Spjd 6068185029Spjd /* Convert chflags into ZFS-type flags. */ 6069185029Spjd /* XXX: what about SF_SETTABLE?. */ 6070185029Spjd XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 6071185029Spjd XVA_SET_REQ(&xvap, XAT_APPENDONLY); 6072185029Spjd XVA_SET_REQ(&xvap, XAT_NOUNLINK); 6073185029Spjd XVA_SET_REQ(&xvap, XAT_NODUMP); 6074254627Sken XVA_SET_REQ(&xvap, XAT_READONLY); 6075254627Sken XVA_SET_REQ(&xvap, XAT_ARCHIVE); 6076254627Sken XVA_SET_REQ(&xvap, XAT_SYSTEM); 6077254627Sken XVA_SET_REQ(&xvap, XAT_HIDDEN); 6078254627Sken XVA_SET_REQ(&xvap, XAT_REPARSE); 6079254627Sken XVA_SET_REQ(&xvap, XAT_OFFLINE); 6080254627Sken XVA_SET_REQ(&xvap, XAT_SPARSE); 6081254627Sken 6082185029Spjd error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 6083185029Spjd if (error != 0) 6084185029Spjd return (error); 6085185029Spjd 6086185029Spjd /* Convert ZFS xattr into chflags. */ 6087185029Spjd#define FLAG_CHECK(fflag, xflag, xfield) do { \ 6088185029Spjd if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 6089185029Spjd fflags |= (fflag); \ 6090185029Spjd} while (0) 6091185029Spjd FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 6092185029Spjd xvap.xva_xoptattrs.xoa_immutable); 6093185029Spjd FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 6094185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 6095185029Spjd FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 6096185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 6097254627Sken FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE, 6098254627Sken xvap.xva_xoptattrs.xoa_archive); 6099185029Spjd FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 6100185029Spjd xvap.xva_xoptattrs.xoa_nodump); 6101254627Sken FLAG_CHECK(UF_READONLY, XAT_READONLY, 6102254627Sken xvap.xva_xoptattrs.xoa_readonly); 6103254627Sken FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM, 6104254627Sken xvap.xva_xoptattrs.xoa_system); 6105254627Sken FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN, 6106254627Sken xvap.xva_xoptattrs.xoa_hidden); 6107254627Sken FLAG_CHECK(UF_REPARSE, XAT_REPARSE, 6108254627Sken xvap.xva_xoptattrs.xoa_reparse); 6109254627Sken FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE, 6110254627Sken xvap.xva_xoptattrs.xoa_offline); 6111254627Sken FLAG_CHECK(UF_SPARSE, XAT_SPARSE, 6112254627Sken xvap.xva_xoptattrs.xoa_sparse); 6113254627Sken 6114185029Spjd#undef FLAG_CHECK 6115185029Spjd *vap = xvap.xva_vattr; 6116185029Spjd vap->va_flags = fflags; 6117185029Spjd return (0); 6118168962Spjd} 6119168962Spjd 6120168962Spjdstatic int 6121168962Spjdzfs_freebsd_setattr(ap) 6122168962Spjd struct vop_setattr_args /* { 6123168962Spjd struct vnode *a_vp; 6124168962Spjd struct vattr *a_vap; 6125168962Spjd struct ucred *a_cred; 6126168962Spjd } */ *ap; 6127168962Spjd{ 6128185172Spjd vnode_t *vp = ap->a_vp; 6129168962Spjd vattr_t *vap = ap->a_vap; 6130185172Spjd cred_t *cred = ap->a_cred; 6131185029Spjd xvattr_t xvap; 6132185029Spjd u_long fflags; 6133185029Spjd uint64_t zflags; 6134168962Spjd 6135168962Spjd vattr_init_mask(vap); 6136170044Spjd vap->va_mask &= ~AT_NOSET; 6137168962Spjd 6138185029Spjd xva_init(&xvap); 6139185029Spjd xvap.xva_vattr = *vap; 6140185029Spjd 6141219089Spjd zflags = VTOZ(vp)->z_pflags; 6142185172Spjd 6143185029Spjd if (vap->va_flags != VNOVAL) { 6144197683Sdelphij zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 6145185172Spjd int error; 6146185172Spjd 6147197683Sdelphij if (zfsvfs->z_use_fuids == B_FALSE) 6148197683Sdelphij return (EOPNOTSUPP); 6149197683Sdelphij 6150185029Spjd fflags = vap->va_flags; 6151254627Sken /* 6152254627Sken * XXX KDM 6153254627Sken * We need to figure out whether it makes sense to allow 6154254627Sken * UF_REPARSE through, since we don't really have other 6155254627Sken * facilities to handle reparse points and zfs_setattr() 6156254627Sken * doesn't currently allow setting that attribute anyway. 6157254627Sken */ 6158254627Sken if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE| 6159254627Sken UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE| 6160254627Sken UF_OFFLINE|UF_SPARSE)) != 0) 6161185029Spjd return (EOPNOTSUPP); 6162185172Spjd /* 6163185172Spjd * Unprivileged processes are not permitted to unset system 6164185172Spjd * flags, or modify flags if any system flags are set. 6165185172Spjd * Privileged non-jail processes may not modify system flags 6166185172Spjd * if securelevel > 0 and any existing system flags are set. 6167185172Spjd * Privileged jail processes behave like privileged non-jail 6168185172Spjd * processes if the security.jail.chflags_allowed sysctl is 6169185172Spjd * is non-zero; otherwise, they behave like unprivileged 6170185172Spjd * processes. 6171185172Spjd */ 6172197861Spjd if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 6173197861Spjd priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) { 6174185172Spjd if (zflags & 6175185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 6176185172Spjd error = securelevel_gt(cred, 0); 6177197861Spjd if (error != 0) 6178185172Spjd return (error); 6179185172Spjd } 6180185172Spjd } else { 6181197861Spjd /* 6182197861Spjd * Callers may only modify the file flags on objects they 6183197861Spjd * have VADMIN rights for. 6184197861Spjd */ 6185197861Spjd if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0) 6186197861Spjd return (error); 6187185172Spjd if (zflags & 6188185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 6189185172Spjd return (EPERM); 6190185172Spjd } 6191185172Spjd if (fflags & 6192185172Spjd (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 6193185172Spjd return (EPERM); 6194185172Spjd } 6195185172Spjd } 6196185029Spjd 6197185029Spjd#define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 6198185029Spjd if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 6199185029Spjd ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 6200185029Spjd XVA_SET_REQ(&xvap, (xflag)); \ 6201185029Spjd (xfield) = ((fflags & (fflag)) != 0); \ 6202185029Spjd } \ 6203185029Spjd} while (0) 6204185029Spjd /* Convert chflags into ZFS-type flags. */ 6205185029Spjd /* XXX: what about SF_SETTABLE?. */ 6206185029Spjd FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 6207185029Spjd xvap.xva_xoptattrs.xoa_immutable); 6208185029Spjd FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 6209185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 6210185029Spjd FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 6211185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 6212254627Sken FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE, 6213254627Sken xvap.xva_xoptattrs.xoa_archive); 6214185029Spjd FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 6215185172Spjd xvap.xva_xoptattrs.xoa_nodump); 6216254627Sken FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY, 6217254627Sken xvap.xva_xoptattrs.xoa_readonly); 6218254627Sken FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM, 6219254627Sken xvap.xva_xoptattrs.xoa_system); 6220254627Sken FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN, 6221254627Sken xvap.xva_xoptattrs.xoa_hidden); 6222254627Sken FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE, 6223254627Sken xvap.xva_xoptattrs.xoa_hidden); 6224254627Sken FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE, 6225254627Sken xvap.xva_xoptattrs.xoa_offline); 6226254627Sken FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE, 6227254627Sken xvap.xva_xoptattrs.xoa_sparse); 6228185029Spjd#undef FLAG_CHANGE 6229185029Spjd } 6230185172Spjd return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL)); 6231168962Spjd} 6232168962Spjd 6233168962Spjdstatic int 6234168962Spjdzfs_freebsd_rename(ap) 6235168962Spjd struct vop_rename_args /* { 6236168962Spjd struct vnode *a_fdvp; 6237168962Spjd struct vnode *a_fvp; 6238168962Spjd struct componentname *a_fcnp; 6239168962Spjd struct vnode *a_tdvp; 6240168962Spjd struct vnode *a_tvp; 6241168962Spjd struct componentname *a_tcnp; 6242168962Spjd } */ *ap; 6243168962Spjd{ 6244168962Spjd vnode_t *fdvp = ap->a_fdvp; 6245168962Spjd vnode_t *fvp = ap->a_fvp; 6246168962Spjd vnode_t *tdvp = ap->a_tdvp; 6247168962Spjd vnode_t *tvp = ap->a_tvp; 6248168962Spjd int error; 6249168962Spjd 6250192237Skmacy ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 6251192237Skmacy ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 6252168962Spjd 6253254982Sdelphij if (fdvp->v_mount == tdvp->v_mount) 6254254982Sdelphij error = zfs_rename(fdvp, ap->a_fcnp->cn_nameptr, tdvp, 6255254982Sdelphij ap->a_tcnp->cn_nameptr, ap->a_fcnp->cn_cred, NULL, 0); 6256254982Sdelphij else 6257254982Sdelphij error = EXDEV; 6258168962Spjd 6259168962Spjd if (tdvp == tvp) 6260168962Spjd VN_RELE(tdvp); 6261168962Spjd else 6262168962Spjd VN_URELE(tdvp); 6263168962Spjd if (tvp) 6264168962Spjd VN_URELE(tvp); 6265168962Spjd VN_RELE(fdvp); 6266168962Spjd VN_RELE(fvp); 6267168962Spjd 6268168962Spjd return (error); 6269168962Spjd} 6270168962Spjd 6271168962Spjdstatic int 6272168962Spjdzfs_freebsd_symlink(ap) 6273168962Spjd struct vop_symlink_args /* { 6274168962Spjd struct vnode *a_dvp; 6275168962Spjd struct vnode **a_vpp; 6276168962Spjd struct componentname *a_cnp; 6277168962Spjd struct vattr *a_vap; 6278168962Spjd char *a_target; 6279168962Spjd } */ *ap; 6280168962Spjd{ 6281168962Spjd struct componentname *cnp = ap->a_cnp; 6282168962Spjd vattr_t *vap = ap->a_vap; 6283168962Spjd 6284168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6285168962Spjd 6286168962Spjd vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 6287168962Spjd vattr_init_mask(vap); 6288168962Spjd 6289168962Spjd return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 6290168962Spjd ap->a_target, cnp->cn_cred, cnp->cn_thread)); 6291168962Spjd} 6292168962Spjd 6293168962Spjdstatic int 6294168962Spjdzfs_freebsd_readlink(ap) 6295168962Spjd struct vop_readlink_args /* { 6296168962Spjd struct vnode *a_vp; 6297168962Spjd struct uio *a_uio; 6298168962Spjd struct ucred *a_cred; 6299168962Spjd } */ *ap; 6300168962Spjd{ 6301168962Spjd 6302185029Spjd return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 6303168962Spjd} 6304168962Spjd 6305168962Spjdstatic int 6306168962Spjdzfs_freebsd_link(ap) 6307168962Spjd struct vop_link_args /* { 6308168962Spjd struct vnode *a_tdvp; 6309168962Spjd struct vnode *a_vp; 6310168962Spjd struct componentname *a_cnp; 6311168962Spjd } */ *ap; 6312168962Spjd{ 6313168962Spjd struct componentname *cnp = ap->a_cnp; 6314254982Sdelphij vnode_t *vp = ap->a_vp; 6315254982Sdelphij vnode_t *tdvp = ap->a_tdvp; 6316168962Spjd 6317254982Sdelphij if (tdvp->v_mount != vp->v_mount) 6318254982Sdelphij return (EXDEV); 6319254982Sdelphij 6320168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6321168962Spjd 6322254982Sdelphij return (zfs_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); 6323168962Spjd} 6324168962Spjd 6325168962Spjdstatic int 6326168962Spjdzfs_freebsd_inactive(ap) 6327169170Spjd struct vop_inactive_args /* { 6328169170Spjd struct vnode *a_vp; 6329169170Spjd struct thread *a_td; 6330169170Spjd } */ *ap; 6331168962Spjd{ 6332168962Spjd vnode_t *vp = ap->a_vp; 6333168962Spjd 6334185029Spjd zfs_inactive(vp, ap->a_td->td_ucred, NULL); 6335168962Spjd return (0); 6336168962Spjd} 6337168962Spjd 6338168962Spjdstatic int 6339168962Spjdzfs_freebsd_reclaim(ap) 6340168962Spjd struct vop_reclaim_args /* { 6341168962Spjd struct vnode *a_vp; 6342168962Spjd struct thread *a_td; 6343168962Spjd } */ *ap; 6344168962Spjd{ 6345169170Spjd vnode_t *vp = ap->a_vp; 6346168962Spjd znode_t *zp = VTOZ(vp); 6347197133Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6348168962Spjd 6349169025Spjd ASSERT(zp != NULL); 6350169025Spjd 6351243520Savg /* Destroy the vm object and flush associated pages. */ 6352243520Savg vnode_destroy_vobject(vp); 6353243520Savg 6354168962Spjd /* 6355243520Savg * z_teardown_inactive_lock protects from a race with 6356243520Savg * zfs_znode_dmu_fini in zfsvfs_teardown during 6357243520Savg * force unmount. 6358168962Spjd */ 6359243520Savg rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 6360243520Savg if (zp->z_sa_hdl == NULL) 6361196301Spjd zfs_znode_free(zp); 6362243520Savg else 6363243520Savg zfs_zinactive(zp); 6364243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 6365185029Spjd 6366168962Spjd vp->v_data = NULL; 6367168962Spjd return (0); 6368168962Spjd} 6369168962Spjd 6370168962Spjdstatic int 6371168962Spjdzfs_freebsd_fid(ap) 6372168962Spjd struct vop_fid_args /* { 6373168962Spjd struct vnode *a_vp; 6374168962Spjd struct fid *a_fid; 6375168962Spjd } */ *ap; 6376168962Spjd{ 6377168962Spjd 6378185029Spjd return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 6379168962Spjd} 6380168962Spjd 6381168962Spjdstatic int 6382168962Spjdzfs_freebsd_pathconf(ap) 6383168962Spjd struct vop_pathconf_args /* { 6384168962Spjd struct vnode *a_vp; 6385168962Spjd int a_name; 6386168962Spjd register_t *a_retval; 6387168962Spjd } */ *ap; 6388168962Spjd{ 6389168962Spjd ulong_t val; 6390168962Spjd int error; 6391168962Spjd 6392185029Spjd error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); 6393168962Spjd if (error == 0) 6394168962Spjd *ap->a_retval = val; 6395168962Spjd else if (error == EOPNOTSUPP) 6396168962Spjd error = vop_stdpathconf(ap); 6397168962Spjd return (error); 6398168962Spjd} 6399168962Spjd 6400196949Straszstatic int 6401196949Straszzfs_freebsd_fifo_pathconf(ap) 6402196949Strasz struct vop_pathconf_args /* { 6403196949Strasz struct vnode *a_vp; 6404196949Strasz int a_name; 6405196949Strasz register_t *a_retval; 6406196949Strasz } */ *ap; 6407196949Strasz{ 6408196949Strasz 6409196949Strasz switch (ap->a_name) { 6410196949Strasz case _PC_ACL_EXTENDED: 6411196949Strasz case _PC_ACL_NFS4: 6412196949Strasz case _PC_ACL_PATH_MAX: 6413196949Strasz case _PC_MAC_PRESENT: 6414196949Strasz return (zfs_freebsd_pathconf(ap)); 6415196949Strasz default: 6416196949Strasz return (fifo_specops.vop_pathconf(ap)); 6417196949Strasz } 6418196949Strasz} 6419196949Strasz 6420185029Spjd/* 6421185029Spjd * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 6422185029Spjd * extended attribute name: 6423185029Spjd * 6424185029Spjd * NAMESPACE PREFIX 6425185029Spjd * system freebsd:system: 6426185029Spjd * user (none, can be used to access ZFS fsattr(5) attributes 6427185029Spjd * created on Solaris) 6428185029Spjd */ 6429185029Spjdstatic int 6430185029Spjdzfs_create_attrname(int attrnamespace, const char *name, char *attrname, 6431185029Spjd size_t size) 6432185029Spjd{ 6433185029Spjd const char *namespace, *prefix, *suffix; 6434185029Spjd 6435185029Spjd /* We don't allow '/' character in attribute name. */ 6436185029Spjd if (strchr(name, '/') != NULL) 6437185029Spjd return (EINVAL); 6438185029Spjd /* We don't allow attribute names that start with "freebsd:" string. */ 6439185029Spjd if (strncmp(name, "freebsd:", 8) == 0) 6440185029Spjd return (EINVAL); 6441185029Spjd 6442185029Spjd bzero(attrname, size); 6443185029Spjd 6444185029Spjd switch (attrnamespace) { 6445185029Spjd case EXTATTR_NAMESPACE_USER: 6446185029Spjd#if 0 6447185029Spjd prefix = "freebsd:"; 6448185029Spjd namespace = EXTATTR_NAMESPACE_USER_STRING; 6449185029Spjd suffix = ":"; 6450185029Spjd#else 6451185029Spjd /* 6452185029Spjd * This is the default namespace by which we can access all 6453185029Spjd * attributes created on Solaris. 6454185029Spjd */ 6455185029Spjd prefix = namespace = suffix = ""; 6456185029Spjd#endif 6457185029Spjd break; 6458185029Spjd case EXTATTR_NAMESPACE_SYSTEM: 6459185029Spjd prefix = "freebsd:"; 6460185029Spjd namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 6461185029Spjd suffix = ":"; 6462185029Spjd break; 6463185029Spjd case EXTATTR_NAMESPACE_EMPTY: 6464185029Spjd default: 6465185029Spjd return (EINVAL); 6466185029Spjd } 6467185029Spjd if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 6468185029Spjd name) >= size) { 6469185029Spjd return (ENAMETOOLONG); 6470185029Spjd } 6471185029Spjd return (0); 6472185029Spjd} 6473185029Spjd 6474185029Spjd/* 6475185029Spjd * Vnode operating to retrieve a named extended attribute. 6476185029Spjd */ 6477185029Spjdstatic int 6478185029Spjdzfs_getextattr(struct vop_getextattr_args *ap) 6479185029Spjd/* 6480185029Spjdvop_getextattr { 6481185029Spjd IN struct vnode *a_vp; 6482185029Spjd IN int a_attrnamespace; 6483185029Spjd IN const char *a_name; 6484185029Spjd INOUT struct uio *a_uio; 6485185029Spjd OUT size_t *a_size; 6486185029Spjd IN struct ucred *a_cred; 6487185029Spjd IN struct thread *a_td; 6488185029Spjd}; 6489185029Spjd*/ 6490185029Spjd{ 6491185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6492185029Spjd struct thread *td = ap->a_td; 6493185029Spjd struct nameidata nd; 6494185029Spjd char attrname[255]; 6495185029Spjd struct vattr va; 6496185029Spjd vnode_t *xvp = NULL, *vp; 6497185029Spjd int error, flags; 6498185029Spjd 6499195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6500195785Strasz ap->a_cred, ap->a_td, VREAD); 6501195785Strasz if (error != 0) 6502195785Strasz return (error); 6503195785Strasz 6504185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6505185029Spjd sizeof(attrname)); 6506185029Spjd if (error != 0) 6507185029Spjd return (error); 6508185029Spjd 6509185029Spjd ZFS_ENTER(zfsvfs); 6510185029Spjd 6511185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6512185029Spjd LOOKUP_XATTR); 6513185029Spjd if (error != 0) { 6514185029Spjd ZFS_EXIT(zfsvfs); 6515185029Spjd return (error); 6516185029Spjd } 6517185029Spjd 6518185029Spjd flags = FREAD; 6519241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 6520185029Spjd xvp, td); 6521194586Skib error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL); 6522185029Spjd vp = nd.ni_vp; 6523185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6524185029Spjd if (error != 0) { 6525196303Spjd ZFS_EXIT(zfsvfs); 6526195785Strasz if (error == ENOENT) 6527195785Strasz error = ENOATTR; 6528185029Spjd return (error); 6529185029Spjd } 6530185029Spjd 6531185029Spjd if (ap->a_size != NULL) { 6532185029Spjd error = VOP_GETATTR(vp, &va, ap->a_cred); 6533185029Spjd if (error == 0) 6534185029Spjd *ap->a_size = (size_t)va.va_size; 6535185029Spjd } else if (ap->a_uio != NULL) 6536224605Smm error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 6537185029Spjd 6538185029Spjd VOP_UNLOCK(vp, 0); 6539185029Spjd vn_close(vp, flags, ap->a_cred, td); 6540185029Spjd ZFS_EXIT(zfsvfs); 6541185029Spjd 6542185029Spjd return (error); 6543185029Spjd} 6544185029Spjd 6545185029Spjd/* 6546185029Spjd * Vnode operation to remove a named attribute. 6547185029Spjd */ 6548185029Spjdint 6549185029Spjdzfs_deleteextattr(struct vop_deleteextattr_args *ap) 6550185029Spjd/* 6551185029Spjdvop_deleteextattr { 6552185029Spjd IN struct vnode *a_vp; 6553185029Spjd IN int a_attrnamespace; 6554185029Spjd IN const char *a_name; 6555185029Spjd IN struct ucred *a_cred; 6556185029Spjd IN struct thread *a_td; 6557185029Spjd}; 6558185029Spjd*/ 6559185029Spjd{ 6560185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6561185029Spjd struct thread *td = ap->a_td; 6562185029Spjd struct nameidata nd; 6563185029Spjd char attrname[255]; 6564185029Spjd struct vattr va; 6565185029Spjd vnode_t *xvp = NULL, *vp; 6566185029Spjd int error, flags; 6567185029Spjd 6568195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6569195785Strasz ap->a_cred, ap->a_td, VWRITE); 6570195785Strasz if (error != 0) 6571195785Strasz return (error); 6572195785Strasz 6573185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6574185029Spjd sizeof(attrname)); 6575185029Spjd if (error != 0) 6576185029Spjd return (error); 6577185029Spjd 6578185029Spjd ZFS_ENTER(zfsvfs); 6579185029Spjd 6580185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6581185029Spjd LOOKUP_XATTR); 6582185029Spjd if (error != 0) { 6583185029Spjd ZFS_EXIT(zfsvfs); 6584185029Spjd return (error); 6585185029Spjd } 6586185029Spjd 6587241896Skib NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 6588185029Spjd UIO_SYSSPACE, attrname, xvp, td); 6589185029Spjd error = namei(&nd); 6590185029Spjd vp = nd.ni_vp; 6591185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6592185029Spjd if (error != 0) { 6593196303Spjd ZFS_EXIT(zfsvfs); 6594195785Strasz if (error == ENOENT) 6595195785Strasz error = ENOATTR; 6596185029Spjd return (error); 6597185029Spjd } 6598185029Spjd error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 6599185029Spjd 6600185029Spjd vput(nd.ni_dvp); 6601185029Spjd if (vp == nd.ni_dvp) 6602185029Spjd vrele(vp); 6603185029Spjd else 6604185029Spjd vput(vp); 6605185029Spjd ZFS_EXIT(zfsvfs); 6606185029Spjd 6607185029Spjd return (error); 6608185029Spjd} 6609185029Spjd 6610185029Spjd/* 6611185029Spjd * Vnode operation to set a named attribute. 6612185029Spjd */ 6613185029Spjdstatic int 6614185029Spjdzfs_setextattr(struct vop_setextattr_args *ap) 6615185029Spjd/* 6616185029Spjdvop_setextattr { 6617185029Spjd IN struct vnode *a_vp; 6618185029Spjd IN int a_attrnamespace; 6619185029Spjd IN const char *a_name; 6620185029Spjd INOUT struct uio *a_uio; 6621185029Spjd IN struct ucred *a_cred; 6622185029Spjd IN struct thread *a_td; 6623185029Spjd}; 6624185029Spjd*/ 6625185029Spjd{ 6626185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6627185029Spjd struct thread *td = ap->a_td; 6628185029Spjd struct nameidata nd; 6629185029Spjd char attrname[255]; 6630185029Spjd struct vattr va; 6631185029Spjd vnode_t *xvp = NULL, *vp; 6632185029Spjd int error, flags; 6633185029Spjd 6634195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6635195785Strasz ap->a_cred, ap->a_td, VWRITE); 6636195785Strasz if (error != 0) 6637195785Strasz return (error); 6638195785Strasz 6639185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6640185029Spjd sizeof(attrname)); 6641185029Spjd if (error != 0) 6642185029Spjd return (error); 6643185029Spjd 6644185029Spjd ZFS_ENTER(zfsvfs); 6645185029Spjd 6646185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6647195785Strasz LOOKUP_XATTR | CREATE_XATTR_DIR); 6648185029Spjd if (error != 0) { 6649185029Spjd ZFS_EXIT(zfsvfs); 6650185029Spjd return (error); 6651185029Spjd } 6652185029Spjd 6653185029Spjd flags = FFLAGS(O_WRONLY | O_CREAT); 6654241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 6655185029Spjd xvp, td); 6656194586Skib error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL); 6657185029Spjd vp = nd.ni_vp; 6658185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6659185029Spjd if (error != 0) { 6660185029Spjd ZFS_EXIT(zfsvfs); 6661185029Spjd return (error); 6662185029Spjd } 6663185029Spjd 6664185029Spjd VATTR_NULL(&va); 6665185029Spjd va.va_size = 0; 6666185029Spjd error = VOP_SETATTR(vp, &va, ap->a_cred); 6667185029Spjd if (error == 0) 6668185029Spjd VOP_WRITE(vp, ap->a_uio, IO_UNIT | IO_SYNC, ap->a_cred); 6669185029Spjd 6670185029Spjd VOP_UNLOCK(vp, 0); 6671185029Spjd vn_close(vp, flags, ap->a_cred, td); 6672185029Spjd ZFS_EXIT(zfsvfs); 6673185029Spjd 6674185029Spjd return (error); 6675185029Spjd} 6676185029Spjd 6677185029Spjd/* 6678185029Spjd * Vnode operation to retrieve extended attributes on a vnode. 6679185029Spjd */ 6680185029Spjdstatic int 6681185029Spjdzfs_listextattr(struct vop_listextattr_args *ap) 6682185029Spjd/* 6683185029Spjdvop_listextattr { 6684185029Spjd IN struct vnode *a_vp; 6685185029Spjd IN int a_attrnamespace; 6686185029Spjd INOUT struct uio *a_uio; 6687185029Spjd OUT size_t *a_size; 6688185029Spjd IN struct ucred *a_cred; 6689185029Spjd IN struct thread *a_td; 6690185029Spjd}; 6691185029Spjd*/ 6692185029Spjd{ 6693185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6694185029Spjd struct thread *td = ap->a_td; 6695185029Spjd struct nameidata nd; 6696185029Spjd char attrprefix[16]; 6697185029Spjd u_char dirbuf[sizeof(struct dirent)]; 6698185029Spjd struct dirent *dp; 6699185029Spjd struct iovec aiov; 6700185029Spjd struct uio auio, *uio = ap->a_uio; 6701185029Spjd size_t *sizep = ap->a_size; 6702185029Spjd size_t plen; 6703185029Spjd vnode_t *xvp = NULL, *vp; 6704185029Spjd int done, error, eof, pos; 6705185029Spjd 6706195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6707195785Strasz ap->a_cred, ap->a_td, VREAD); 6708196303Spjd if (error != 0) 6709195785Strasz return (error); 6710195785Strasz 6711185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 6712185029Spjd sizeof(attrprefix)); 6713185029Spjd if (error != 0) 6714185029Spjd return (error); 6715185029Spjd plen = strlen(attrprefix); 6716185029Spjd 6717185029Spjd ZFS_ENTER(zfsvfs); 6718185029Spjd 6719195822Strasz if (sizep != NULL) 6720195822Strasz *sizep = 0; 6721195822Strasz 6722185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6723185029Spjd LOOKUP_XATTR); 6724185029Spjd if (error != 0) { 6725196303Spjd ZFS_EXIT(zfsvfs); 6726195785Strasz /* 6727195785Strasz * ENOATTR means that the EA directory does not yet exist, 6728195785Strasz * i.e. there are no extended attributes there. 6729195785Strasz */ 6730195785Strasz if (error == ENOATTR) 6731195785Strasz error = 0; 6732185029Spjd return (error); 6733185029Spjd } 6734185029Spjd 6735241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 6736188588Sjhb UIO_SYSSPACE, ".", xvp, td); 6737185029Spjd error = namei(&nd); 6738185029Spjd vp = nd.ni_vp; 6739185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6740185029Spjd if (error != 0) { 6741185029Spjd ZFS_EXIT(zfsvfs); 6742185029Spjd return (error); 6743185029Spjd } 6744185029Spjd 6745185029Spjd auio.uio_iov = &aiov; 6746185029Spjd auio.uio_iovcnt = 1; 6747185029Spjd auio.uio_segflg = UIO_SYSSPACE; 6748185029Spjd auio.uio_td = td; 6749185029Spjd auio.uio_rw = UIO_READ; 6750185029Spjd auio.uio_offset = 0; 6751185029Spjd 6752185029Spjd do { 6753185029Spjd u_char nlen; 6754185029Spjd 6755185029Spjd aiov.iov_base = (void *)dirbuf; 6756185029Spjd aiov.iov_len = sizeof(dirbuf); 6757185029Spjd auio.uio_resid = sizeof(dirbuf); 6758185029Spjd error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 6759185029Spjd done = sizeof(dirbuf) - auio.uio_resid; 6760185029Spjd if (error != 0) 6761185029Spjd break; 6762185029Spjd for (pos = 0; pos < done;) { 6763185029Spjd dp = (struct dirent *)(dirbuf + pos); 6764185029Spjd pos += dp->d_reclen; 6765185029Spjd /* 6766185029Spjd * XXX: Temporarily we also accept DT_UNKNOWN, as this 6767185029Spjd * is what we get when attribute was created on Solaris. 6768185029Spjd */ 6769185029Spjd if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 6770185029Spjd continue; 6771185029Spjd if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) 6772185029Spjd continue; 6773185029Spjd else if (strncmp(dp->d_name, attrprefix, plen) != 0) 6774185029Spjd continue; 6775185029Spjd nlen = dp->d_namlen - plen; 6776185029Spjd if (sizep != NULL) 6777185029Spjd *sizep += 1 + nlen; 6778185029Spjd else if (uio != NULL) { 6779185029Spjd /* 6780185029Spjd * Format of extattr name entry is one byte for 6781185029Spjd * length and the rest for name. 6782185029Spjd */ 6783185029Spjd error = uiomove(&nlen, 1, uio->uio_rw, uio); 6784185029Spjd if (error == 0) { 6785185029Spjd error = uiomove(dp->d_name + plen, nlen, 6786185029Spjd uio->uio_rw, uio); 6787185029Spjd } 6788185029Spjd if (error != 0) 6789185029Spjd break; 6790185029Spjd } 6791185029Spjd } 6792185029Spjd } while (!eof && error == 0); 6793185029Spjd 6794185029Spjd vput(vp); 6795185029Spjd ZFS_EXIT(zfsvfs); 6796185029Spjd 6797185029Spjd return (error); 6798185029Spjd} 6799185029Spjd 6800192800Straszint 6801192800Straszzfs_freebsd_getacl(ap) 6802192800Strasz struct vop_getacl_args /* { 6803192800Strasz struct vnode *vp; 6804192800Strasz acl_type_t type; 6805192800Strasz struct acl *aclp; 6806192800Strasz struct ucred *cred; 6807192800Strasz struct thread *td; 6808192800Strasz } */ *ap; 6809192800Strasz{ 6810192800Strasz int error; 6811192800Strasz vsecattr_t vsecattr; 6812192800Strasz 6813192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 6814197435Strasz return (EINVAL); 6815192800Strasz 6816192800Strasz vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 6817192800Strasz if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL)) 6818192800Strasz return (error); 6819192800Strasz 6820192800Strasz error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt); 6821196303Spjd if (vsecattr.vsa_aclentp != NULL) 6822196303Spjd kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 6823192800Strasz 6824196303Spjd return (error); 6825192800Strasz} 6826192800Strasz 6827192800Straszint 6828192800Straszzfs_freebsd_setacl(ap) 6829192800Strasz struct vop_setacl_args /* { 6830192800Strasz struct vnode *vp; 6831192800Strasz acl_type_t type; 6832192800Strasz struct acl *aclp; 6833192800Strasz struct ucred *cred; 6834192800Strasz struct thread *td; 6835192800Strasz } */ *ap; 6836192800Strasz{ 6837192800Strasz int error; 6838192800Strasz vsecattr_t vsecattr; 6839192800Strasz int aclbsize; /* size of acl list in bytes */ 6840192800Strasz aclent_t *aaclp; 6841192800Strasz 6842192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 6843197435Strasz return (EINVAL); 6844192800Strasz 6845192800Strasz if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 6846192800Strasz return (EINVAL); 6847192800Strasz 6848192800Strasz /* 6849196949Strasz * With NFSv4 ACLs, chmod(2) may need to add additional entries, 6850192800Strasz * splitting every entry into two and appending "canonical six" 6851192800Strasz * entries at the end. Don't allow for setting an ACL that would 6852192800Strasz * cause chmod(2) to run out of ACL entries. 6853192800Strasz */ 6854192800Strasz if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 6855192800Strasz return (ENOSPC); 6856192800Strasz 6857208030Strasz error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 6858208030Strasz if (error != 0) 6859208030Strasz return (error); 6860208030Strasz 6861192800Strasz vsecattr.vsa_mask = VSA_ACE; 6862192800Strasz aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t); 6863192800Strasz vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 6864192800Strasz aaclp = vsecattr.vsa_aclentp; 6865192800Strasz vsecattr.vsa_aclentsz = aclbsize; 6866192800Strasz 6867192800Strasz aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 6868192800Strasz error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL); 6869192800Strasz kmem_free(aaclp, aclbsize); 6870192800Strasz 6871192800Strasz return (error); 6872192800Strasz} 6873192800Strasz 6874192800Straszint 6875192800Straszzfs_freebsd_aclcheck(ap) 6876192800Strasz struct vop_aclcheck_args /* { 6877192800Strasz struct vnode *vp; 6878192800Strasz acl_type_t type; 6879192800Strasz struct acl *aclp; 6880192800Strasz struct ucred *cred; 6881192800Strasz struct thread *td; 6882192800Strasz } */ *ap; 6883192800Strasz{ 6884192800Strasz 6885192800Strasz return (EOPNOTSUPP); 6886192800Strasz} 6887192800Strasz 6888168404Spjdstruct vop_vector zfs_vnodeops; 6889168404Spjdstruct vop_vector zfs_fifoops; 6890209962Smmstruct vop_vector zfs_shareops; 6891168404Spjd 6892168404Spjdstruct vop_vector zfs_vnodeops = { 6893185029Spjd .vop_default = &default_vnodeops, 6894185029Spjd .vop_inactive = zfs_freebsd_inactive, 6895185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 6896185029Spjd .vop_access = zfs_freebsd_access, 6897168404Spjd#ifdef FREEBSD_NAMECACHE 6898185029Spjd .vop_lookup = vfs_cache_lookup, 6899185029Spjd .vop_cachedlookup = zfs_freebsd_lookup, 6900168404Spjd#else 6901185029Spjd .vop_lookup = zfs_freebsd_lookup, 6902168404Spjd#endif 6903185029Spjd .vop_getattr = zfs_freebsd_getattr, 6904185029Spjd .vop_setattr = zfs_freebsd_setattr, 6905185029Spjd .vop_create = zfs_freebsd_create, 6906185029Spjd .vop_mknod = zfs_freebsd_create, 6907185029Spjd .vop_mkdir = zfs_freebsd_mkdir, 6908185029Spjd .vop_readdir = zfs_freebsd_readdir, 6909185029Spjd .vop_fsync = zfs_freebsd_fsync, 6910185029Spjd .vop_open = zfs_freebsd_open, 6911185029Spjd .vop_close = zfs_freebsd_close, 6912185029Spjd .vop_rmdir = zfs_freebsd_rmdir, 6913185029Spjd .vop_ioctl = zfs_freebsd_ioctl, 6914185029Spjd .vop_link = zfs_freebsd_link, 6915185029Spjd .vop_symlink = zfs_freebsd_symlink, 6916185029Spjd .vop_readlink = zfs_freebsd_readlink, 6917185029Spjd .vop_read = zfs_freebsd_read, 6918185029Spjd .vop_write = zfs_freebsd_write, 6919185029Spjd .vop_remove = zfs_freebsd_remove, 6920185029Spjd .vop_rename = zfs_freebsd_rename, 6921185029Spjd .vop_pathconf = zfs_freebsd_pathconf, 6922243518Savg .vop_bmap = zfs_freebsd_bmap, 6923185029Spjd .vop_fid = zfs_freebsd_fid, 6924185029Spjd .vop_getextattr = zfs_getextattr, 6925185029Spjd .vop_deleteextattr = zfs_deleteextattr, 6926185029Spjd .vop_setextattr = zfs_setextattr, 6927185029Spjd .vop_listextattr = zfs_listextattr, 6928192800Strasz .vop_getacl = zfs_freebsd_getacl, 6929192800Strasz .vop_setacl = zfs_freebsd_setacl, 6930192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 6931213937Savg .vop_getpages = zfs_freebsd_getpages, 6932168404Spjd}; 6933168404Spjd 6934169170Spjdstruct vop_vector zfs_fifoops = { 6935185029Spjd .vop_default = &fifo_specops, 6936200162Skib .vop_fsync = zfs_freebsd_fsync, 6937185029Spjd .vop_access = zfs_freebsd_access, 6938185029Spjd .vop_getattr = zfs_freebsd_getattr, 6939185029Spjd .vop_inactive = zfs_freebsd_inactive, 6940185029Spjd .vop_read = VOP_PANIC, 6941185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 6942185029Spjd .vop_setattr = zfs_freebsd_setattr, 6943185029Spjd .vop_write = VOP_PANIC, 6944196949Strasz .vop_pathconf = zfs_freebsd_fifo_pathconf, 6945185029Spjd .vop_fid = zfs_freebsd_fid, 6946192800Strasz .vop_getacl = zfs_freebsd_getacl, 6947192800Strasz .vop_setacl = zfs_freebsd_setacl, 6948192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 6949168404Spjd}; 6950209962Smm 6951209962Smm/* 6952209962Smm * special share hidden files vnode operations template 6953209962Smm */ 6954209962Smmstruct vop_vector zfs_shareops = { 6955209962Smm .vop_default = &default_vnodeops, 6956209962Smm .vop_access = zfs_freebsd_access, 6957209962Smm .vop_inactive = zfs_freebsd_inactive, 6958209962Smm .vop_reclaim = zfs_freebsd_reclaim, 6959209962Smm .vop_fid = zfs_freebsd_fid, 6960209962Smm .vop_pathconf = zfs_freebsd_pathconf, 6961209962Smm}; 6962