zfs_vnops.c revision 249195
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22212694Smm * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23249195Smm * Copyright (c) 2013 by Delphix. All rights reserved. 24168404Spjd */ 25168404Spjd 26169195Spjd/* Portions Copyright 2007 Jeremy Teo */ 27219089Spjd/* Portions Copyright 2010 Robert Milkowski */ 28169195Spjd 29168404Spjd#include <sys/types.h> 30168404Spjd#include <sys/param.h> 31168404Spjd#include <sys/time.h> 32168404Spjd#include <sys/systm.h> 33168404Spjd#include <sys/sysmacros.h> 34168404Spjd#include <sys/resource.h> 35168404Spjd#include <sys/vfs.h> 36248084Sattilio#include <sys/vm.h> 37168404Spjd#include <sys/vnode.h> 38168404Spjd#include <sys/file.h> 39168404Spjd#include <sys/stat.h> 40168404Spjd#include <sys/kmem.h> 41168404Spjd#include <sys/taskq.h> 42168404Spjd#include <sys/uio.h> 43168404Spjd#include <sys/atomic.h> 44168404Spjd#include <sys/namei.h> 45168404Spjd#include <sys/mman.h> 46168404Spjd#include <sys/cmn_err.h> 47168404Spjd#include <sys/errno.h> 48168404Spjd#include <sys/unistd.h> 49168404Spjd#include <sys/zfs_dir.h> 50168404Spjd#include <sys/zfs_ioctl.h> 51168404Spjd#include <sys/fs/zfs.h> 52168404Spjd#include <sys/dmu.h> 53219089Spjd#include <sys/dmu_objset.h> 54168404Spjd#include <sys/spa.h> 55168404Spjd#include <sys/txg.h> 56168404Spjd#include <sys/dbuf.h> 57168404Spjd#include <sys/zap.h> 58219089Spjd#include <sys/sa.h> 59168404Spjd#include <sys/dirent.h> 60168962Spjd#include <sys/policy.h> 61168962Spjd#include <sys/sunddi.h> 62168404Spjd#include <sys/filio.h> 63209962Smm#include <sys/sid.h> 64168404Spjd#include <sys/zfs_ctldir.h> 65185029Spjd#include <sys/zfs_fuid.h> 66219089Spjd#include <sys/zfs_sa.h> 67168404Spjd#include <sys/dnlc.h> 68168404Spjd#include <sys/zfs_rlock.h> 69185029Spjd#include <sys/extdirent.h> 70185029Spjd#include <sys/kidmap.h> 71168404Spjd#include <sys/bio.h> 72168404Spjd#include <sys/buf.h> 73168404Spjd#include <sys/sf_buf.h> 74168404Spjd#include <sys/sched.h> 75192800Strasz#include <sys/acl.h> 76239077Smarius#include <vm/vm_param.h> 77215401Savg#include <vm/vm_pageout.h> 78168404Spjd 79168404Spjd/* 80168404Spjd * Programming rules. 81168404Spjd * 82168404Spjd * Each vnode op performs some logical unit of work. To do this, the ZPL must 83168404Spjd * properly lock its in-core state, create a DMU transaction, do the work, 84168404Spjd * record this work in the intent log (ZIL), commit the DMU transaction, 85185029Spjd * and wait for the intent log to commit if it is a synchronous operation. 86185029Spjd * Moreover, the vnode ops must work in both normal and log replay context. 87168404Spjd * The ordering of events is important to avoid deadlocks and references 88168404Spjd * to freed memory. The example below illustrates the following Big Rules: 89168404Spjd * 90168404Spjd * (1) A check must be made in each zfs thread for a mounted file system. 91168404Spjd * This is done avoiding races using ZFS_ENTER(zfsvfs). 92185029Spjd * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 93185029Spjd * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 94185029Spjd * can return EIO from the calling function. 95168404Spjd * 96168404Spjd * (2) VN_RELE() should always be the last thing except for zil_commit() 97168404Spjd * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 98168404Spjd * First, if it's the last reference, the vnode/znode 99168404Spjd * can be freed, so the zp may point to freed memory. Second, the last 100168404Spjd * reference will call zfs_zinactive(), which may induce a lot of work -- 101168404Spjd * pushing cached pages (which acquires range locks) and syncing out 102168404Spjd * cached atime changes. Third, zfs_zinactive() may require a new tx, 103168404Spjd * which could deadlock the system if you were already holding one. 104191900Skmacy * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 105168404Spjd * 106168404Spjd * (3) All range locks must be grabbed before calling dmu_tx_assign(), 107168404Spjd * as they can span dmu_tx_assign() calls. 108168404Spjd * 109209962Smm * (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign(). 110168404Spjd * This is critical because we don't want to block while holding locks. 111168404Spjd * Note, in particular, that if a lock is sometimes acquired before 112168404Spjd * the tx assigns, and sometimes after (e.g. z_lock), then failing to 113168404Spjd * use a non-blocking assign can deadlock the system. The scenario: 114168404Spjd * 115168404Spjd * Thread A has grabbed a lock before calling dmu_tx_assign(). 116168404Spjd * Thread B is in an already-assigned tx, and blocks for this lock. 117168404Spjd * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 118168404Spjd * forever, because the previous txg can't quiesce until B's tx commits. 119168404Spjd * 120168404Spjd * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 121168404Spjd * then drop all locks, call dmu_tx_wait(), and try again. 122168404Spjd * 123168404Spjd * (5) If the operation succeeded, generate the intent log entry for it 124168404Spjd * before dropping locks. This ensures that the ordering of events 125168404Spjd * in the intent log matches the order in which they actually occurred. 126209962Smm * During ZIL replay the zfs_log_* functions will update the sequence 127209962Smm * number to indicate the zil transaction has replayed. 128168404Spjd * 129168404Spjd * (6) At the end of each vnode op, the DMU tx must always commit, 130168404Spjd * regardless of whether there were any errors. 131168404Spjd * 132219089Spjd * (7) After dropping all locks, invoke zil_commit(zilog, foid) 133168404Spjd * to ensure that synchronous semantics are provided when necessary. 134168404Spjd * 135168404Spjd * In general, this is how things should be ordered in each vnode op: 136168404Spjd * 137168404Spjd * ZFS_ENTER(zfsvfs); // exit if unmounted 138168404Spjd * top: 139168404Spjd * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 140168404Spjd * rw_enter(...); // grab any other locks you need 141168404Spjd * tx = dmu_tx_create(...); // get DMU tx 142168404Spjd * dmu_tx_hold_*(); // hold each object you might modify 143209962Smm * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign 144168404Spjd * if (error) { 145168404Spjd * rw_exit(...); // drop locks 146168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 147168404Spjd * VN_RELE(...); // release held vnodes 148209962Smm * if (error == ERESTART) { 149168404Spjd * dmu_tx_wait(tx); 150168404Spjd * dmu_tx_abort(tx); 151168404Spjd * goto top; 152168404Spjd * } 153168404Spjd * dmu_tx_abort(tx); // abort DMU tx 154168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 155168404Spjd * return (error); // really out of space 156168404Spjd * } 157168404Spjd * error = do_real_work(); // do whatever this VOP does 158168404Spjd * if (error == 0) 159168404Spjd * zfs_log_*(...); // on success, make ZIL entry 160168404Spjd * dmu_tx_commit(tx); // commit DMU tx -- error or not 161168404Spjd * rw_exit(...); // drop locks 162168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 163168404Spjd * VN_RELE(...); // release held vnodes 164219089Spjd * zil_commit(zilog, foid); // synchronous when necessary 165168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 166168404Spjd * return (error); // done, report error 167168404Spjd */ 168185029Spjd 169168404Spjd/* ARGSUSED */ 170168404Spjdstatic int 171185029Spjdzfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 172168404Spjd{ 173168962Spjd znode_t *zp = VTOZ(*vpp); 174209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 175168404Spjd 176209962Smm ZFS_ENTER(zfsvfs); 177209962Smm ZFS_VERIFY_ZP(zp); 178209962Smm 179219089Spjd if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 180185029Spjd ((flag & FAPPEND) == 0)) { 181209962Smm ZFS_EXIT(zfsvfs); 182249195Smm return (SET_ERROR(EPERM)); 183185029Spjd } 184185029Spjd 185185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 186185029Spjd ZTOV(zp)->v_type == VREG && 187219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 188209962Smm if (fs_vscan(*vpp, cr, 0) != 0) { 189209962Smm ZFS_EXIT(zfsvfs); 190249195Smm return (SET_ERROR(EACCES)); 191209962Smm } 192209962Smm } 193185029Spjd 194168404Spjd /* Keep a count of the synchronous opens in the znode */ 195168962Spjd if (flag & (FSYNC | FDSYNC)) 196168404Spjd atomic_inc_32(&zp->z_sync_cnt); 197185029Spjd 198209962Smm ZFS_EXIT(zfsvfs); 199168404Spjd return (0); 200168404Spjd} 201168404Spjd 202168404Spjd/* ARGSUSED */ 203168404Spjdstatic int 204185029Spjdzfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 205185029Spjd caller_context_t *ct) 206168404Spjd{ 207168962Spjd znode_t *zp = VTOZ(vp); 208209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 209168404Spjd 210210470Smm /* 211210470Smm * Clean up any locks held by this process on the vp. 212210470Smm */ 213210470Smm cleanlocks(vp, ddi_get_pid(), 0); 214210470Smm cleanshares(vp, ddi_get_pid()); 215210470Smm 216209962Smm ZFS_ENTER(zfsvfs); 217209962Smm ZFS_VERIFY_ZP(zp); 218209962Smm 219168404Spjd /* Decrement the synchronous opens in the znode */ 220185029Spjd if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 221168404Spjd atomic_dec_32(&zp->z_sync_cnt); 222168404Spjd 223185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 224185029Spjd ZTOV(zp)->v_type == VREG && 225219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 226185029Spjd VERIFY(fs_vscan(vp, cr, 1) == 0); 227185029Spjd 228209962Smm ZFS_EXIT(zfsvfs); 229168404Spjd return (0); 230168404Spjd} 231168404Spjd 232168404Spjd/* 233168404Spjd * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 234168404Spjd * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 235168404Spjd */ 236168404Spjdstatic int 237168978Spjdzfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 238168404Spjd{ 239168404Spjd znode_t *zp = VTOZ(vp); 240168404Spjd uint64_t noff = (uint64_t)*off; /* new offset */ 241168404Spjd uint64_t file_sz; 242168404Spjd int error; 243168404Spjd boolean_t hole; 244168404Spjd 245219089Spjd file_sz = zp->z_size; 246168404Spjd if (noff >= file_sz) { 247249195Smm return (SET_ERROR(ENXIO)); 248168404Spjd } 249168404Spjd 250168962Spjd if (cmd == _FIO_SEEK_HOLE) 251168404Spjd hole = B_TRUE; 252168404Spjd else 253168404Spjd hole = B_FALSE; 254168404Spjd 255168404Spjd error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 256168404Spjd 257168404Spjd /* end of file? */ 258168404Spjd if ((error == ESRCH) || (noff > file_sz)) { 259168404Spjd /* 260168404Spjd * Handle the virtual hole at the end of file. 261168404Spjd */ 262168404Spjd if (hole) { 263168404Spjd *off = file_sz; 264168404Spjd return (0); 265168404Spjd } 266249195Smm return (SET_ERROR(ENXIO)); 267168404Spjd } 268168404Spjd 269168404Spjd if (noff < *off) 270168404Spjd return (error); 271168404Spjd *off = noff; 272168404Spjd return (error); 273168404Spjd} 274168404Spjd 275168404Spjd/* ARGSUSED */ 276168404Spjdstatic int 277168978Spjdzfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 278185029Spjd int *rvalp, caller_context_t *ct) 279168404Spjd{ 280168962Spjd offset_t off; 281168962Spjd int error; 282168962Spjd zfsvfs_t *zfsvfs; 283185029Spjd znode_t *zp; 284168404Spjd 285168404Spjd switch (com) { 286185029Spjd case _FIOFFS: 287168962Spjd return (0); 288168404Spjd 289168962Spjd /* 290168962Spjd * The following two ioctls are used by bfu. Faking out, 291168962Spjd * necessary to avoid bfu errors. 292168962Spjd */ 293185029Spjd case _FIOGDIO: 294185029Spjd case _FIOSDIO: 295168962Spjd return (0); 296168962Spjd 297185029Spjd case _FIO_SEEK_DATA: 298185029Spjd case _FIO_SEEK_HOLE: 299233918Savg#ifdef sun 300168962Spjd if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 301249195Smm return (SET_ERROR(EFAULT)); 302233918Savg#else 303233918Savg off = *(offset_t *)data; 304233918Savg#endif 305185029Spjd zp = VTOZ(vp); 306185029Spjd zfsvfs = zp->z_zfsvfs; 307168404Spjd ZFS_ENTER(zfsvfs); 308185029Spjd ZFS_VERIFY_ZP(zp); 309168404Spjd 310168404Spjd /* offset parameter is in/out */ 311168404Spjd error = zfs_holey(vp, com, &off); 312168404Spjd ZFS_EXIT(zfsvfs); 313168404Spjd if (error) 314168404Spjd return (error); 315233918Savg#ifdef sun 316168962Spjd if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 317249195Smm return (SET_ERROR(EFAULT)); 318233918Savg#else 319233918Savg *(offset_t *)data = off; 320233918Savg#endif 321168404Spjd return (0); 322168404Spjd } 323249195Smm return (SET_ERROR(ENOTTY)); 324168404Spjd} 325168404Spjd 326209962Smmstatic vm_page_t 327246293Savgpage_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 328209962Smm{ 329209962Smm vm_object_t obj; 330209962Smm vm_page_t pp; 331209962Smm 332209962Smm obj = vp->v_object; 333248084Sattilio zfs_vmobject_assert_wlocked(obj); 334209962Smm 335209962Smm for (;;) { 336209962Smm if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 337246293Savg pp->valid) { 338212652Savg if ((pp->oflags & VPO_BUSY) != 0) { 339212652Savg /* 340212652Savg * Reference the page before unlocking and 341212652Savg * sleeping so that the page daemon is less 342212652Savg * likely to reclaim it. 343212652Savg */ 344225418Skib vm_page_reference(pp); 345212652Savg vm_page_sleep(pp, "zfsmwb"); 346209962Smm continue; 347212652Savg } 348209962Smm } else { 349246293Savg pp = vm_page_alloc(obj, OFF_TO_IDX(start), 350246293Savg VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED | 351246293Savg VM_ALLOC_NOBUSY); 352209962Smm } 353246293Savg 354246293Savg if (pp != NULL) { 355246293Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 356246293Savg vm_object_pip_add(obj, 1); 357246293Savg vm_page_io_start(pp); 358246293Savg pmap_remove_write(pp); 359246293Savg vm_page_clear_dirty(pp, off, nbytes); 360246293Savg } 361209962Smm break; 362209962Smm } 363209962Smm return (pp); 364209962Smm} 365209962Smm 366209962Smmstatic void 367246293Savgpage_unbusy(vm_page_t pp) 368209962Smm{ 369209962Smm 370246293Savg vm_page_io_finish(pp); 371246293Savg vm_object_pip_subtract(pp->object, 1); 372209962Smm} 373209962Smm 374246293Savgstatic vm_page_t 375246293Savgpage_hold(vnode_t *vp, int64_t start) 376246293Savg{ 377246293Savg vm_object_t obj; 378246293Savg vm_page_t pp; 379246293Savg 380246293Savg obj = vp->v_object; 381248084Sattilio zfs_vmobject_assert_wlocked(obj); 382246293Savg 383246293Savg for (;;) { 384246293Savg if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 385246293Savg pp->valid) { 386246293Savg if ((pp->oflags & VPO_BUSY) != 0) { 387246293Savg /* 388246293Savg * Reference the page before unlocking and 389246293Savg * sleeping so that the page daemon is less 390246293Savg * likely to reclaim it. 391246293Savg */ 392246293Savg vm_page_reference(pp); 393246293Savg vm_page_sleep(pp, "zfsmwb"); 394246293Savg continue; 395246293Savg } 396246293Savg 397246293Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 398246293Savg vm_page_lock(pp); 399246293Savg vm_page_hold(pp); 400246293Savg vm_page_unlock(pp); 401246293Savg 402246293Savg } else 403246293Savg pp = NULL; 404246293Savg break; 405246293Savg } 406246293Savg return (pp); 407246293Savg} 408246293Savg 409246293Savgstatic void 410246293Savgpage_unhold(vm_page_t pp) 411246293Savg{ 412246293Savg 413246293Savg vm_page_lock(pp); 414246293Savg vm_page_unhold(pp); 415246293Savg vm_page_unlock(pp); 416246293Savg} 417246293Savg 418209962Smmstatic caddr_t 419209962Smmzfs_map_page(vm_page_t pp, struct sf_buf **sfp) 420209962Smm{ 421209962Smm 422212951Savg *sfp = sf_buf_alloc(pp, 0); 423209962Smm return ((caddr_t)sf_buf_kva(*sfp)); 424209962Smm} 425209962Smm 426209962Smmstatic void 427209962Smmzfs_unmap_page(struct sf_buf *sf) 428209962Smm{ 429209962Smm 430209962Smm sf_buf_free(sf); 431209962Smm} 432209962Smm 433168404Spjd/* 434168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 435168404Spjd * between the DMU cache and the memory mapped pages. What this means: 436168404Spjd * 437168404Spjd * On Write: If we find a memory mapped page, we write to *both* 438168404Spjd * the page and the dmu buffer. 439168404Spjd */ 440209962Smmstatic void 441209962Smmupdate_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 442209962Smm int segflg, dmu_tx_t *tx) 443168404Spjd{ 444168404Spjd vm_object_t obj; 445168404Spjd struct sf_buf *sf; 446246293Savg caddr_t va; 447212655Savg int off; 448168404Spjd 449168404Spjd ASSERT(vp->v_mount != NULL); 450168404Spjd obj = vp->v_object; 451168404Spjd ASSERT(obj != NULL); 452168404Spjd 453168404Spjd off = start & PAGEOFFSET; 454248084Sattilio zfs_vmobject_wlock(obj); 455168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 456209962Smm vm_page_t pp; 457246293Savg int nbytes = imin(PAGESIZE - off, len); 458168404Spjd 459246293Savg if (segflg == UIO_NOCOPY) { 460246293Savg pp = vm_page_lookup(obj, OFF_TO_IDX(start)); 461246293Savg KASSERT(pp != NULL, 462246293Savg ("zfs update_pages: NULL page in putpages case")); 463246293Savg KASSERT(off == 0, 464246293Savg ("zfs update_pages: unaligned data in putpages case")); 465246293Savg KASSERT(pp->valid == VM_PAGE_BITS_ALL, 466246293Savg ("zfs update_pages: invalid page in putpages case")); 467246293Savg KASSERT(pp->busy > 0, 468246293Savg ("zfs update_pages: unbusy page in putpages case")); 469246293Savg KASSERT(!pmap_page_is_write_mapped(pp), 470246293Savg ("zfs update_pages: writable page in putpages case")); 471248084Sattilio zfs_vmobject_wunlock(obj); 472168404Spjd 473246293Savg va = zfs_map_page(pp, &sf); 474246293Savg (void) dmu_write(os, oid, start, nbytes, va, tx); 475246293Savg zfs_unmap_page(sf); 476246293Savg 477248084Sattilio zfs_vmobject_wlock(obj); 478246293Savg vm_page_undirty(pp); 479246293Savg } else if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 480248084Sattilio zfs_vmobject_wunlock(obj); 481246293Savg 482209962Smm va = zfs_map_page(pp, &sf); 483246293Savg (void) dmu_read(os, oid, start+off, nbytes, 484246293Savg va+off, DMU_READ_PREFETCH);; 485209962Smm zfs_unmap_page(sf); 486246293Savg 487248084Sattilio zfs_vmobject_wlock(obj); 488246293Savg page_unbusy(pp); 489168404Spjd } 490209962Smm len -= nbytes; 491168404Spjd off = 0; 492168404Spjd } 493246293Savg if (segflg != UIO_NOCOPY) 494246293Savg vm_object_pip_wakeupn(obj, 0); 495248084Sattilio zfs_vmobject_wunlock(obj); 496168404Spjd} 497168404Spjd 498168404Spjd/* 499219089Spjd * Read with UIO_NOCOPY flag means that sendfile(2) requests 500219089Spjd * ZFS to populate a range of page cache pages with data. 501219089Spjd * 502219089Spjd * NOTE: this function could be optimized to pre-allocate 503219089Spjd * all pages in advance, drain VPO_BUSY on all of them, 504219089Spjd * map them into contiguous KVA region and populate them 505219089Spjd * in one single dmu_read() call. 506219089Spjd */ 507219089Spjdstatic int 508219089Spjdmappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) 509219089Spjd{ 510219089Spjd znode_t *zp = VTOZ(vp); 511219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 512219089Spjd struct sf_buf *sf; 513219089Spjd vm_object_t obj; 514219089Spjd vm_page_t pp; 515219089Spjd int64_t start; 516219089Spjd caddr_t va; 517219089Spjd int len = nbytes; 518219089Spjd int off; 519219089Spjd int error = 0; 520219089Spjd 521219089Spjd ASSERT(uio->uio_segflg == UIO_NOCOPY); 522219089Spjd ASSERT(vp->v_mount != NULL); 523219089Spjd obj = vp->v_object; 524219089Spjd ASSERT(obj != NULL); 525219089Spjd ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); 526219089Spjd 527248084Sattilio zfs_vmobject_wlock(obj); 528219089Spjd for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { 529219089Spjd int bytes = MIN(PAGESIZE, len); 530219089Spjd 531219089Spjd pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_NOBUSY | 532219089Spjd VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_IGN_SBUSY); 533219089Spjd if (pp->valid == 0) { 534219089Spjd vm_page_io_start(pp); 535248084Sattilio zfs_vmobject_wunlock(obj); 536219089Spjd va = zfs_map_page(pp, &sf); 537219089Spjd error = dmu_read(os, zp->z_id, start, bytes, va, 538219089Spjd DMU_READ_PREFETCH); 539219089Spjd if (bytes != PAGESIZE && error == 0) 540219089Spjd bzero(va + bytes, PAGESIZE - bytes); 541219089Spjd zfs_unmap_page(sf); 542248084Sattilio zfs_vmobject_wlock(obj); 543219089Spjd vm_page_io_finish(pp); 544219089Spjd vm_page_lock(pp); 545219089Spjd if (error) { 546219089Spjd vm_page_free(pp); 547219089Spjd } else { 548219089Spjd pp->valid = VM_PAGE_BITS_ALL; 549219089Spjd vm_page_activate(pp); 550219089Spjd } 551219089Spjd vm_page_unlock(pp); 552219089Spjd } 553219089Spjd if (error) 554219089Spjd break; 555219089Spjd uio->uio_resid -= bytes; 556219089Spjd uio->uio_offset += bytes; 557219089Spjd len -= bytes; 558219089Spjd } 559248084Sattilio zfs_vmobject_wunlock(obj); 560219089Spjd return (error); 561219089Spjd} 562219089Spjd 563219089Spjd/* 564168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 565168404Spjd * between the DMU cache and the memory mapped pages. What this means: 566168404Spjd * 567168404Spjd * On Read: We "read" preferentially from memory mapped pages, 568168404Spjd * else we default from the dmu buffer. 569168404Spjd * 570168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 571168404Spjd * the file is memory mapped. 572168404Spjd */ 573168404Spjdstatic int 574168404Spjdmappedread(vnode_t *vp, int nbytes, uio_t *uio) 575168404Spjd{ 576168404Spjd znode_t *zp = VTOZ(vp); 577168404Spjd objset_t *os = zp->z_zfsvfs->z_os; 578168404Spjd vm_object_t obj; 579212655Savg int64_t start; 580168926Spjd caddr_t va; 581168404Spjd int len = nbytes; 582212655Savg int off; 583168404Spjd int error = 0; 584168404Spjd 585168404Spjd ASSERT(vp->v_mount != NULL); 586168404Spjd obj = vp->v_object; 587168404Spjd ASSERT(obj != NULL); 588168404Spjd 589168404Spjd start = uio->uio_loffset; 590168404Spjd off = start & PAGEOFFSET; 591248084Sattilio zfs_vmobject_wlock(obj); 592168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 593219089Spjd vm_page_t pp; 594219089Spjd uint64_t bytes = MIN(PAGESIZE - off, len); 595168404Spjd 596246293Savg if (pp = page_hold(vp, start)) { 597219089Spjd struct sf_buf *sf; 598219089Spjd caddr_t va; 599212652Savg 600248084Sattilio zfs_vmobject_wunlock(obj); 601219089Spjd va = zfs_map_page(pp, &sf); 602219089Spjd error = uiomove(va + off, bytes, UIO_READ, uio); 603219089Spjd zfs_unmap_page(sf); 604248084Sattilio zfs_vmobject_wlock(obj); 605246293Savg page_unhold(pp); 606219089Spjd } else { 607248084Sattilio zfs_vmobject_wunlock(obj); 608219089Spjd error = dmu_read_uio(os, zp->z_id, uio, bytes); 609248084Sattilio zfs_vmobject_wlock(obj); 610168404Spjd } 611168404Spjd len -= bytes; 612168404Spjd off = 0; 613168404Spjd if (error) 614168404Spjd break; 615168404Spjd } 616248084Sattilio zfs_vmobject_wunlock(obj); 617168404Spjd return (error); 618168404Spjd} 619168404Spjd 620168404Spjdoffset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 621168404Spjd 622168404Spjd/* 623168404Spjd * Read bytes from specified file into supplied buffer. 624168404Spjd * 625168404Spjd * IN: vp - vnode of file to be read from. 626168404Spjd * uio - structure supplying read location, range info, 627168404Spjd * and return buffer. 628168404Spjd * ioflag - SYNC flags; used to provide FRSYNC semantics. 629168404Spjd * cr - credentials of caller. 630185029Spjd * ct - caller context 631168404Spjd * 632168404Spjd * OUT: uio - updated offset and range, buffer filled. 633168404Spjd * 634168404Spjd * RETURN: 0 if success 635168404Spjd * error code if failure 636168404Spjd * 637168404Spjd * Side Effects: 638168404Spjd * vp - atime updated if byte count > 0 639168404Spjd */ 640168404Spjd/* ARGSUSED */ 641168404Spjdstatic int 642168962Spjdzfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 643168404Spjd{ 644168404Spjd znode_t *zp = VTOZ(vp); 645168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 646185029Spjd objset_t *os; 647168404Spjd ssize_t n, nbytes; 648247187Smm int error = 0; 649168404Spjd rl_t *rl; 650219089Spjd xuio_t *xuio = NULL; 651168404Spjd 652168404Spjd ZFS_ENTER(zfsvfs); 653185029Spjd ZFS_VERIFY_ZP(zp); 654185029Spjd os = zfsvfs->z_os; 655168404Spjd 656219089Spjd if (zp->z_pflags & ZFS_AV_QUARANTINED) { 657185029Spjd ZFS_EXIT(zfsvfs); 658249195Smm return (SET_ERROR(EACCES)); 659185029Spjd } 660185029Spjd 661168404Spjd /* 662168404Spjd * Validate file offset 663168404Spjd */ 664168404Spjd if (uio->uio_loffset < (offset_t)0) { 665168404Spjd ZFS_EXIT(zfsvfs); 666249195Smm return (SET_ERROR(EINVAL)); 667168404Spjd } 668168404Spjd 669168404Spjd /* 670168404Spjd * Fasttrack empty reads 671168404Spjd */ 672168404Spjd if (uio->uio_resid == 0) { 673168404Spjd ZFS_EXIT(zfsvfs); 674168404Spjd return (0); 675168404Spjd } 676168404Spjd 677168404Spjd /* 678168962Spjd * Check for mandatory locks 679168962Spjd */ 680219089Spjd if (MANDMODE(zp->z_mode)) { 681168962Spjd if (error = chklock(vp, FREAD, 682168962Spjd uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 683168962Spjd ZFS_EXIT(zfsvfs); 684168962Spjd return (error); 685168962Spjd } 686168962Spjd } 687168962Spjd 688168962Spjd /* 689168404Spjd * If we're in FRSYNC mode, sync out this znode before reading it. 690168404Spjd */ 691224605Smm if (zfsvfs->z_log && 692224605Smm (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) 693219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 694168404Spjd 695168404Spjd /* 696168404Spjd * Lock the range against changes. 697168404Spjd */ 698168404Spjd rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 699168404Spjd 700168404Spjd /* 701168404Spjd * If we are reading past end-of-file we can skip 702168404Spjd * to the end; but we might still need to set atime. 703168404Spjd */ 704219089Spjd if (uio->uio_loffset >= zp->z_size) { 705168404Spjd error = 0; 706168404Spjd goto out; 707168404Spjd } 708168404Spjd 709219089Spjd ASSERT(uio->uio_loffset < zp->z_size); 710219089Spjd n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 711168404Spjd 712219089Spjd#ifdef sun 713219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 714219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 715219089Spjd int nblk; 716219089Spjd int blksz = zp->z_blksz; 717219089Spjd uint64_t offset = uio->uio_loffset; 718219089Spjd 719219089Spjd xuio = (xuio_t *)uio; 720219089Spjd if ((ISP2(blksz))) { 721219089Spjd nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 722219089Spjd blksz)) / blksz; 723219089Spjd } else { 724219089Spjd ASSERT(offset + n <= blksz); 725219089Spjd nblk = 1; 726219089Spjd } 727219089Spjd (void) dmu_xuio_init(xuio, nblk); 728219089Spjd 729219089Spjd if (vn_has_cached_data(vp)) { 730219089Spjd /* 731219089Spjd * For simplicity, we always allocate a full buffer 732219089Spjd * even if we only expect to read a portion of a block. 733219089Spjd */ 734219089Spjd while (--nblk >= 0) { 735219089Spjd (void) dmu_xuio_add(xuio, 736219089Spjd dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 737219089Spjd blksz), 0, blksz); 738219089Spjd } 739219089Spjd } 740219089Spjd } 741219089Spjd#endif /* sun */ 742219089Spjd 743168404Spjd while (n > 0) { 744168404Spjd nbytes = MIN(n, zfs_read_chunk_size - 745168404Spjd P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 746168404Spjd 747219089Spjd#ifdef __FreeBSD__ 748219089Spjd if (uio->uio_segflg == UIO_NOCOPY) 749219089Spjd error = mappedread_sf(vp, nbytes, uio); 750219089Spjd else 751219089Spjd#endif /* __FreeBSD__ */ 752168404Spjd if (vn_has_cached_data(vp)) 753168404Spjd error = mappedread(vp, nbytes, uio); 754168404Spjd else 755168404Spjd error = dmu_read_uio(os, zp->z_id, uio, nbytes); 756185029Spjd if (error) { 757185029Spjd /* convert checksum errors into IO errors */ 758185029Spjd if (error == ECKSUM) 759249195Smm error = SET_ERROR(EIO); 760168404Spjd break; 761185029Spjd } 762168962Spjd 763168404Spjd n -= nbytes; 764168404Spjd } 765168404Spjdout: 766168404Spjd zfs_range_unlock(rl); 767168404Spjd 768168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 769168404Spjd ZFS_EXIT(zfsvfs); 770168404Spjd return (error); 771168404Spjd} 772168404Spjd 773168404Spjd/* 774168404Spjd * Write the bytes to a file. 775168404Spjd * 776168404Spjd * IN: vp - vnode of file to be written to. 777168404Spjd * uio - structure supplying write location, range info, 778168404Spjd * and data buffer. 779213673Spjd * ioflag - FAPPEND flag set if in append mode. 780168404Spjd * cr - credentials of caller. 781185029Spjd * ct - caller context (NFS/CIFS fem monitor only) 782168404Spjd * 783168404Spjd * OUT: uio - updated offset and range. 784168404Spjd * 785168404Spjd * RETURN: 0 if success 786168404Spjd * error code if failure 787168404Spjd * 788168404Spjd * Timestamps: 789168404Spjd * vp - ctime|mtime updated if byte count > 0 790168404Spjd */ 791219089Spjd 792168404Spjd/* ARGSUSED */ 793168404Spjdstatic int 794168962Spjdzfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 795168404Spjd{ 796168404Spjd znode_t *zp = VTOZ(vp); 797168962Spjd rlim64_t limit = MAXOFFSET_T; 798168404Spjd ssize_t start_resid = uio->uio_resid; 799168404Spjd ssize_t tx_bytes; 800168404Spjd uint64_t end_size; 801168404Spjd dmu_tx_t *tx; 802168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 803185029Spjd zilog_t *zilog; 804168404Spjd offset_t woff; 805168404Spjd ssize_t n, nbytes; 806168404Spjd rl_t *rl; 807168404Spjd int max_blksz = zfsvfs->z_max_blksz; 808247187Smm int error = 0; 809209962Smm arc_buf_t *abuf; 810247187Smm iovec_t *aiov = NULL; 811219089Spjd xuio_t *xuio = NULL; 812219089Spjd int i_iov = 0; 813219089Spjd int iovcnt = uio->uio_iovcnt; 814219089Spjd iovec_t *iovp = uio->uio_iov; 815219089Spjd int write_eof; 816219089Spjd int count = 0; 817219089Spjd sa_bulk_attr_t bulk[4]; 818219089Spjd uint64_t mtime[2], ctime[2]; 819168404Spjd 820168404Spjd /* 821168404Spjd * Fasttrack empty write 822168404Spjd */ 823168404Spjd n = start_resid; 824168404Spjd if (n == 0) 825168404Spjd return (0); 826168404Spjd 827168962Spjd if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 828168962Spjd limit = MAXOFFSET_T; 829168962Spjd 830168404Spjd ZFS_ENTER(zfsvfs); 831185029Spjd ZFS_VERIFY_ZP(zp); 832168404Spjd 833219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 834219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 835219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 836219089Spjd &zp->z_size, 8); 837219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 838219089Spjd &zp->z_pflags, 8); 839219089Spjd 840168404Spjd /* 841185029Spjd * If immutable or not appending then return EPERM 842185029Spjd */ 843219089Spjd if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 844219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 845219089Spjd (uio->uio_loffset < zp->z_size))) { 846185029Spjd ZFS_EXIT(zfsvfs); 847249195Smm return (SET_ERROR(EPERM)); 848185029Spjd } 849185029Spjd 850185029Spjd zilog = zfsvfs->z_log; 851185029Spjd 852185029Spjd /* 853219089Spjd * Validate file offset 854219089Spjd */ 855219089Spjd woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 856219089Spjd if (woff < 0) { 857219089Spjd ZFS_EXIT(zfsvfs); 858249195Smm return (SET_ERROR(EINVAL)); 859219089Spjd } 860219089Spjd 861219089Spjd /* 862219089Spjd * Check for mandatory locks before calling zfs_range_lock() 863219089Spjd * in order to prevent a deadlock with locks set via fcntl(). 864219089Spjd */ 865219089Spjd if (MANDMODE((mode_t)zp->z_mode) && 866219089Spjd (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 867219089Spjd ZFS_EXIT(zfsvfs); 868219089Spjd return (error); 869219089Spjd } 870219089Spjd 871219089Spjd#ifdef sun 872219089Spjd /* 873168404Spjd * Pre-fault the pages to ensure slow (eg NFS) pages 874168404Spjd * don't hold up txg. 875219089Spjd * Skip this if uio contains loaned arc_buf. 876168404Spjd */ 877219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 878219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 879219089Spjd xuio = (xuio_t *)uio; 880219089Spjd else 881219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 882219089Spjd#endif /* sun */ 883168404Spjd 884168404Spjd /* 885168404Spjd * If in append mode, set the io offset pointer to eof. 886168404Spjd */ 887213673Spjd if (ioflag & FAPPEND) { 888168404Spjd /* 889219089Spjd * Obtain an appending range lock to guarantee file append 890219089Spjd * semantics. We reset the write offset once we have the lock. 891168404Spjd */ 892168404Spjd rl = zfs_range_lock(zp, 0, n, RL_APPEND); 893219089Spjd woff = rl->r_off; 894168404Spjd if (rl->r_len == UINT64_MAX) { 895219089Spjd /* 896219089Spjd * We overlocked the file because this write will cause 897219089Spjd * the file block size to increase. 898219089Spjd * Note that zp_size cannot change with this lock held. 899219089Spjd */ 900219089Spjd woff = zp->z_size; 901168404Spjd } 902219089Spjd uio->uio_loffset = woff; 903168404Spjd } else { 904168404Spjd /* 905219089Spjd * Note that if the file block size will change as a result of 906219089Spjd * this write, then this range lock will lock the entire file 907219089Spjd * so that we can re-write the block safely. 908168404Spjd */ 909168404Spjd rl = zfs_range_lock(zp, woff, n, RL_WRITER); 910168404Spjd } 911168404Spjd 912235781Strasz if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { 913235781Strasz zfs_range_unlock(rl); 914235781Strasz ZFS_EXIT(zfsvfs); 915235781Strasz return (EFBIG); 916235781Strasz } 917235781Strasz 918168962Spjd if (woff >= limit) { 919168962Spjd zfs_range_unlock(rl); 920168962Spjd ZFS_EXIT(zfsvfs); 921249195Smm return (SET_ERROR(EFBIG)); 922168962Spjd } 923168962Spjd 924168962Spjd if ((woff + n) > limit || woff > (limit - n)) 925168962Spjd n = limit - woff; 926168962Spjd 927219089Spjd /* Will this write extend the file length? */ 928219089Spjd write_eof = (woff + n > zp->z_size); 929168404Spjd 930219089Spjd end_size = MAX(zp->z_size, woff + n); 931219089Spjd 932168404Spjd /* 933168404Spjd * Write the file in reasonable size chunks. Each chunk is written 934168404Spjd * in a separate transaction; this keeps the intent log records small 935168404Spjd * and allows us to do more fine-grained space accounting. 936168404Spjd */ 937168404Spjd while (n > 0) { 938209962Smm abuf = NULL; 939209962Smm woff = uio->uio_loffset; 940209962Smmagain: 941219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 942219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 943209962Smm if (abuf != NULL) 944209962Smm dmu_return_arcbuf(abuf); 945249195Smm error = SET_ERROR(EDQUOT); 946209962Smm break; 947209962Smm } 948209962Smm 949219089Spjd if (xuio && abuf == NULL) { 950219089Spjd ASSERT(i_iov < iovcnt); 951219089Spjd aiov = &iovp[i_iov]; 952219089Spjd abuf = dmu_xuio_arcbuf(xuio, i_iov); 953219089Spjd dmu_xuio_clear(xuio, i_iov); 954219089Spjd DTRACE_PROBE3(zfs_cp_write, int, i_iov, 955219089Spjd iovec_t *, aiov, arc_buf_t *, abuf); 956219089Spjd ASSERT((aiov->iov_base == abuf->b_data) || 957219089Spjd ((char *)aiov->iov_base - (char *)abuf->b_data + 958219089Spjd aiov->iov_len == arc_buf_size(abuf))); 959219089Spjd i_iov++; 960219089Spjd } else if (abuf == NULL && n >= max_blksz && 961219089Spjd woff >= zp->z_size && 962209962Smm P2PHASE(woff, max_blksz) == 0 && 963209962Smm zp->z_blksz == max_blksz) { 964219089Spjd /* 965219089Spjd * This write covers a full block. "Borrow" a buffer 966219089Spjd * from the dmu so that we can fill it before we enter 967219089Spjd * a transaction. This avoids the possibility of 968219089Spjd * holding up the transaction if the data copy hangs 969219089Spjd * up on a pagefault (e.g., from an NFS server mapping). 970219089Spjd */ 971209962Smm size_t cbytes; 972209962Smm 973219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 974219089Spjd max_blksz); 975209962Smm ASSERT(abuf != NULL); 976209962Smm ASSERT(arc_buf_size(abuf) == max_blksz); 977209962Smm if (error = uiocopy(abuf->b_data, max_blksz, 978209962Smm UIO_WRITE, uio, &cbytes)) { 979209962Smm dmu_return_arcbuf(abuf); 980209962Smm break; 981209962Smm } 982209962Smm ASSERT(cbytes == max_blksz); 983209962Smm } 984209962Smm 985209962Smm /* 986168404Spjd * Start a transaction. 987168404Spjd */ 988168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 989219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 990168404Spjd dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 991219089Spjd zfs_sa_upgrade_txholds(tx, zp); 992209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 993168404Spjd if (error) { 994209962Smm if (error == ERESTART) { 995168404Spjd dmu_tx_wait(tx); 996168404Spjd dmu_tx_abort(tx); 997209962Smm goto again; 998168404Spjd } 999168404Spjd dmu_tx_abort(tx); 1000209962Smm if (abuf != NULL) 1001209962Smm dmu_return_arcbuf(abuf); 1002168404Spjd break; 1003168404Spjd } 1004168404Spjd 1005168404Spjd /* 1006168404Spjd * If zfs_range_lock() over-locked we grow the blocksize 1007168404Spjd * and then reduce the lock range. This will only happen 1008168404Spjd * on the first iteration since zfs_range_reduce() will 1009168404Spjd * shrink down r_len to the appropriate size. 1010168404Spjd */ 1011168404Spjd if (rl->r_len == UINT64_MAX) { 1012168404Spjd uint64_t new_blksz; 1013168404Spjd 1014168404Spjd if (zp->z_blksz > max_blksz) { 1015168404Spjd ASSERT(!ISP2(zp->z_blksz)); 1016168404Spjd new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 1017168404Spjd } else { 1018168404Spjd new_blksz = MIN(end_size, max_blksz); 1019168404Spjd } 1020168404Spjd zfs_grow_blocksize(zp, new_blksz, tx); 1021168404Spjd zfs_range_reduce(rl, woff, n); 1022168404Spjd } 1023168404Spjd 1024168404Spjd /* 1025168404Spjd * XXX - should we really limit each write to z_max_blksz? 1026168404Spjd * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 1027168404Spjd */ 1028168404Spjd nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 1029168404Spjd 1030219089Spjd if (woff + nbytes > zp->z_size) 1031168404Spjd vnode_pager_setsize(vp, woff + nbytes); 1032168404Spjd 1033209962Smm if (abuf == NULL) { 1034209962Smm tx_bytes = uio->uio_resid; 1035219089Spjd error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 1036219089Spjd uio, nbytes, tx); 1037209962Smm tx_bytes -= uio->uio_resid; 1038168404Spjd } else { 1039209962Smm tx_bytes = nbytes; 1040219089Spjd ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 1041219089Spjd /* 1042219089Spjd * If this is not a full block write, but we are 1043219089Spjd * extending the file past EOF and this data starts 1044219089Spjd * block-aligned, use assign_arcbuf(). Otherwise, 1045219089Spjd * write via dmu_write(). 1046219089Spjd */ 1047219089Spjd if (tx_bytes < max_blksz && (!write_eof || 1048219089Spjd aiov->iov_base != abuf->b_data)) { 1049219089Spjd ASSERT(xuio); 1050219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, woff, 1051219089Spjd aiov->iov_len, aiov->iov_base, tx); 1052219089Spjd dmu_return_arcbuf(abuf); 1053219089Spjd xuio_stat_wbuf_copied(); 1054219089Spjd } else { 1055219089Spjd ASSERT(xuio || tx_bytes == max_blksz); 1056219089Spjd dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 1057219089Spjd woff, abuf, tx); 1058219089Spjd } 1059209962Smm ASSERT(tx_bytes <= uio->uio_resid); 1060209962Smm uioskip(uio, tx_bytes); 1061168404Spjd } 1062212657Savg if (tx_bytes && vn_has_cached_data(vp)) { 1063209962Smm update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 1064209962Smm zp->z_id, uio->uio_segflg, tx); 1065209962Smm } 1066209962Smm 1067209962Smm /* 1068168404Spjd * If we made no progress, we're done. If we made even 1069168404Spjd * partial progress, update the znode and ZIL accordingly. 1070168404Spjd */ 1071168404Spjd if (tx_bytes == 0) { 1072219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 1073219089Spjd (void *)&zp->z_size, sizeof (uint64_t), tx); 1074168404Spjd dmu_tx_commit(tx); 1075168404Spjd ASSERT(error != 0); 1076168404Spjd break; 1077168404Spjd } 1078168404Spjd 1079168404Spjd /* 1080168404Spjd * Clear Set-UID/Set-GID bits on successful write if not 1081168404Spjd * privileged and at least one of the excute bits is set. 1082168404Spjd * 1083168404Spjd * It would be nice to to this after all writes have 1084168404Spjd * been done, but that would still expose the ISUID/ISGID 1085168404Spjd * to another app after the partial write is committed. 1086185029Spjd * 1087185029Spjd * Note: we don't call zfs_fuid_map_id() here because 1088185029Spjd * user 0 is not an ephemeral uid. 1089168404Spjd */ 1090168404Spjd mutex_enter(&zp->z_acl_lock); 1091219089Spjd if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 1092168404Spjd (S_IXUSR >> 6))) != 0 && 1093219089Spjd (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 1094185029Spjd secpolicy_vnode_setid_retain(vp, cr, 1095219089Spjd (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 1096219089Spjd uint64_t newmode; 1097219089Spjd zp->z_mode &= ~(S_ISUID | S_ISGID); 1098219089Spjd newmode = zp->z_mode; 1099219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 1100219089Spjd (void *)&newmode, sizeof (uint64_t), tx); 1101168404Spjd } 1102168404Spjd mutex_exit(&zp->z_acl_lock); 1103168404Spjd 1104219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 1105219089Spjd B_TRUE); 1106168404Spjd 1107168404Spjd /* 1108168404Spjd * Update the file size (zp_size) if it has changed; 1109168404Spjd * account for possible concurrent updates. 1110168404Spjd */ 1111219089Spjd while ((end_size = zp->z_size) < uio->uio_loffset) { 1112219089Spjd (void) atomic_cas_64(&zp->z_size, end_size, 1113168404Spjd uio->uio_loffset); 1114219089Spjd ASSERT(error == 0); 1115219089Spjd } 1116219089Spjd /* 1117219089Spjd * If we are replaying and eof is non zero then force 1118219089Spjd * the file size to the specified eof. Note, there's no 1119219089Spjd * concurrency during replay. 1120219089Spjd */ 1121219089Spjd if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 1122219089Spjd zp->z_size = zfsvfs->z_replay_eof; 1123219089Spjd 1124219089Spjd error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1125219089Spjd 1126168404Spjd zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 1127168404Spjd dmu_tx_commit(tx); 1128168404Spjd 1129168404Spjd if (error != 0) 1130168404Spjd break; 1131168404Spjd ASSERT(tx_bytes == nbytes); 1132168404Spjd n -= nbytes; 1133219089Spjd 1134219089Spjd#ifdef sun 1135219089Spjd if (!xuio && n > 0) 1136219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 1137219089Spjd#endif /* sun */ 1138168404Spjd } 1139168404Spjd 1140168404Spjd zfs_range_unlock(rl); 1141168404Spjd 1142168404Spjd /* 1143168404Spjd * If we're in replay mode, or we made no progress, return error. 1144168404Spjd * Otherwise, it's at least a partial write, so it's successful. 1145168404Spjd */ 1146209962Smm if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1147168404Spjd ZFS_EXIT(zfsvfs); 1148168404Spjd return (error); 1149168404Spjd } 1150168404Spjd 1151219089Spjd if (ioflag & (FSYNC | FDSYNC) || 1152219089Spjd zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1153219089Spjd zil_commit(zilog, zp->z_id); 1154168404Spjd 1155168404Spjd ZFS_EXIT(zfsvfs); 1156168404Spjd return (0); 1157168404Spjd} 1158168404Spjd 1159168404Spjdvoid 1160219089Spjdzfs_get_done(zgd_t *zgd, int error) 1161168404Spjd{ 1162219089Spjd znode_t *zp = zgd->zgd_private; 1163219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 1164168404Spjd 1165219089Spjd if (zgd->zgd_db) 1166219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1167219089Spjd 1168219089Spjd zfs_range_unlock(zgd->zgd_rl); 1169219089Spjd 1170191900Skmacy /* 1171191900Skmacy * Release the vnode asynchronously as we currently have the 1172191900Skmacy * txg stopped from syncing. 1173191900Skmacy */ 1174219089Spjd VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1175219089Spjd 1176219089Spjd if (error == 0 && zgd->zgd_bp) 1177219089Spjd zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1178219089Spjd 1179168404Spjd kmem_free(zgd, sizeof (zgd_t)); 1180168404Spjd} 1181168404Spjd 1182214378Smm#ifdef DEBUG 1183214378Smmstatic int zil_fault_io = 0; 1184214378Smm#endif 1185214378Smm 1186168404Spjd/* 1187168404Spjd * Get data to generate a TX_WRITE intent log record. 1188168404Spjd */ 1189168404Spjdint 1190168404Spjdzfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1191168404Spjd{ 1192168404Spjd zfsvfs_t *zfsvfs = arg; 1193168404Spjd objset_t *os = zfsvfs->z_os; 1194168404Spjd znode_t *zp; 1195219089Spjd uint64_t object = lr->lr_foid; 1196219089Spjd uint64_t offset = lr->lr_offset; 1197219089Spjd uint64_t size = lr->lr_length; 1198219089Spjd blkptr_t *bp = &lr->lr_blkptr; 1199168404Spjd dmu_buf_t *db; 1200168404Spjd zgd_t *zgd; 1201168404Spjd int error = 0; 1202168404Spjd 1203219089Spjd ASSERT(zio != NULL); 1204219089Spjd ASSERT(size != 0); 1205168404Spjd 1206168404Spjd /* 1207168404Spjd * Nothing to do if the file has been removed 1208168404Spjd */ 1209219089Spjd if (zfs_zget(zfsvfs, object, &zp) != 0) 1210249195Smm return (SET_ERROR(ENOENT)); 1211168404Spjd if (zp->z_unlinked) { 1212191900Skmacy /* 1213191900Skmacy * Release the vnode asynchronously as we currently have the 1214191900Skmacy * txg stopped from syncing. 1215191900Skmacy */ 1216196307Spjd VN_RELE_ASYNC(ZTOV(zp), 1217196307Spjd dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1218249195Smm return (SET_ERROR(ENOENT)); 1219168404Spjd } 1220168404Spjd 1221219089Spjd zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1222219089Spjd zgd->zgd_zilog = zfsvfs->z_log; 1223219089Spjd zgd->zgd_private = zp; 1224219089Spjd 1225168404Spjd /* 1226168404Spjd * Write records come in two flavors: immediate and indirect. 1227168404Spjd * For small writes it's cheaper to store the data with the 1228168404Spjd * log record (immediate); for large writes it's cheaper to 1229168404Spjd * sync the data and get a pointer to it (indirect) so that 1230168404Spjd * we don't have to write the data twice. 1231168404Spjd */ 1232168404Spjd if (buf != NULL) { /* immediate write */ 1233219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1234168404Spjd /* test for truncation needs to be done while range locked */ 1235219089Spjd if (offset >= zp->z_size) { 1236249195Smm error = SET_ERROR(ENOENT); 1237219089Spjd } else { 1238219089Spjd error = dmu_read(os, object, offset, size, buf, 1239219089Spjd DMU_READ_NO_PREFETCH); 1240168404Spjd } 1241219089Spjd ASSERT(error == 0 || error == ENOENT); 1242168404Spjd } else { /* indirect write */ 1243168404Spjd /* 1244168404Spjd * Have to lock the whole block to ensure when it's 1245168404Spjd * written out and it's checksum is being calculated 1246168404Spjd * that no one can change the data. We need to re-check 1247168404Spjd * blocksize after we get the lock in case it's changed! 1248168404Spjd */ 1249168404Spjd for (;;) { 1250219089Spjd uint64_t blkoff; 1251219089Spjd size = zp->z_blksz; 1252219089Spjd blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1253219089Spjd offset -= blkoff; 1254219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1255219089Spjd RL_READER); 1256219089Spjd if (zp->z_blksz == size) 1257168404Spjd break; 1258219089Spjd offset += blkoff; 1259219089Spjd zfs_range_unlock(zgd->zgd_rl); 1260168404Spjd } 1261168404Spjd /* test for truncation needs to be done while range locked */ 1262219089Spjd if (lr->lr_offset >= zp->z_size) 1263249195Smm error = SET_ERROR(ENOENT); 1264214378Smm#ifdef DEBUG 1265214378Smm if (zil_fault_io) { 1266249195Smm error = SET_ERROR(EIO); 1267214378Smm zil_fault_io = 0; 1268214378Smm } 1269214378Smm#endif 1270219089Spjd if (error == 0) 1271219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1272219089Spjd DMU_READ_NO_PREFETCH); 1273214378Smm 1274209962Smm if (error == 0) { 1275243524Smm blkptr_t *obp = dmu_buf_get_blkptr(db); 1276243524Smm if (obp) { 1277243524Smm ASSERT(BP_IS_HOLE(bp)); 1278243524Smm *bp = *obp; 1279243524Smm } 1280243524Smm 1281219089Spjd zgd->zgd_db = db; 1282219089Spjd zgd->zgd_bp = bp; 1283219089Spjd 1284219089Spjd ASSERT(db->db_offset == offset); 1285219089Spjd ASSERT(db->db_size == size); 1286219089Spjd 1287219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1288219089Spjd zfs_get_done, zgd); 1289219089Spjd ASSERT(error || lr->lr_length <= zp->z_blksz); 1290219089Spjd 1291209962Smm /* 1292219089Spjd * On success, we need to wait for the write I/O 1293219089Spjd * initiated by dmu_sync() to complete before we can 1294219089Spjd * release this dbuf. We will finish everything up 1295219089Spjd * in the zfs_get_done() callback. 1296209962Smm */ 1297219089Spjd if (error == 0) 1298219089Spjd return (0); 1299209962Smm 1300219089Spjd if (error == EALREADY) { 1301219089Spjd lr->lr_common.lrc_txtype = TX_WRITE2; 1302219089Spjd error = 0; 1303219089Spjd } 1304209962Smm } 1305168404Spjd } 1306219089Spjd 1307219089Spjd zfs_get_done(zgd, error); 1308219089Spjd 1309168404Spjd return (error); 1310168404Spjd} 1311168404Spjd 1312168404Spjd/*ARGSUSED*/ 1313168404Spjdstatic int 1314185029Spjdzfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1315185029Spjd caller_context_t *ct) 1316168404Spjd{ 1317168404Spjd znode_t *zp = VTOZ(vp); 1318168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1319168404Spjd int error; 1320168404Spjd 1321168404Spjd ZFS_ENTER(zfsvfs); 1322185029Spjd ZFS_VERIFY_ZP(zp); 1323185029Spjd 1324185029Spjd if (flag & V_ACE_MASK) 1325185029Spjd error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1326185029Spjd else 1327185029Spjd error = zfs_zaccess_rwx(zp, mode, flag, cr); 1328185029Spjd 1329168404Spjd ZFS_EXIT(zfsvfs); 1330168404Spjd return (error); 1331168404Spjd} 1332168404Spjd 1333168404Spjd/* 1334211932Smm * If vnode is for a device return a specfs vnode instead. 1335211932Smm */ 1336211932Smmstatic int 1337211932Smmspecvp_check(vnode_t **vpp, cred_t *cr) 1338211932Smm{ 1339211932Smm int error = 0; 1340211932Smm 1341211932Smm if (IS_DEVVP(*vpp)) { 1342211932Smm struct vnode *svp; 1343211932Smm 1344211932Smm svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1345211932Smm VN_RELE(*vpp); 1346211932Smm if (svp == NULL) 1347249195Smm error = SET_ERROR(ENOSYS); 1348211932Smm *vpp = svp; 1349211932Smm } 1350211932Smm return (error); 1351211932Smm} 1352211932Smm 1353211932Smm 1354211932Smm/* 1355168404Spjd * Lookup an entry in a directory, or an extended attribute directory. 1356168404Spjd * If it exists, return a held vnode reference for it. 1357168404Spjd * 1358168404Spjd * IN: dvp - vnode of directory to search. 1359168404Spjd * nm - name of entry to lookup. 1360168404Spjd * pnp - full pathname to lookup [UNUSED]. 1361168404Spjd * flags - LOOKUP_XATTR set if looking for an attribute. 1362168404Spjd * rdir - root directory vnode [UNUSED]. 1363168404Spjd * cr - credentials of caller. 1364185029Spjd * ct - caller context 1365185029Spjd * direntflags - directory lookup flags 1366185029Spjd * realpnp - returned pathname. 1367168404Spjd * 1368168404Spjd * OUT: vpp - vnode of located entry, NULL if not found. 1369168404Spjd * 1370168404Spjd * RETURN: 0 if success 1371168404Spjd * error code if failure 1372168404Spjd * 1373168404Spjd * Timestamps: 1374168404Spjd * NA 1375168404Spjd */ 1376168404Spjd/* ARGSUSED */ 1377168962Spjdstatic int 1378168962Spjdzfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1379185029Spjd int nameiop, cred_t *cr, kthread_t *td, int flags) 1380168404Spjd{ 1381168962Spjd znode_t *zdp = VTOZ(dvp); 1382168962Spjd zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1383211932Smm int error = 0; 1384185029Spjd int *direntflags = NULL; 1385185029Spjd void *realpnp = NULL; 1386168404Spjd 1387211932Smm /* fast path */ 1388211932Smm if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 1389211932Smm 1390211932Smm if (dvp->v_type != VDIR) { 1391249195Smm return (SET_ERROR(ENOTDIR)); 1392219089Spjd } else if (zdp->z_sa_hdl == NULL) { 1393249195Smm return (SET_ERROR(EIO)); 1394211932Smm } 1395211932Smm 1396211932Smm if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1397211932Smm error = zfs_fastaccesschk_execute(zdp, cr); 1398211932Smm if (!error) { 1399211932Smm *vpp = dvp; 1400211932Smm VN_HOLD(*vpp); 1401211932Smm return (0); 1402211932Smm } 1403211932Smm return (error); 1404211932Smm } else { 1405211932Smm vnode_t *tvp = dnlc_lookup(dvp, nm); 1406211932Smm 1407211932Smm if (tvp) { 1408211932Smm error = zfs_fastaccesschk_execute(zdp, cr); 1409211932Smm if (error) { 1410211932Smm VN_RELE(tvp); 1411211932Smm return (error); 1412211932Smm } 1413211932Smm if (tvp == DNLC_NO_VNODE) { 1414211932Smm VN_RELE(tvp); 1415249195Smm return (SET_ERROR(ENOENT)); 1416211932Smm } else { 1417211932Smm *vpp = tvp; 1418211932Smm return (specvp_check(vpp, cr)); 1419211932Smm } 1420211932Smm } 1421211932Smm } 1422211932Smm } 1423211932Smm 1424211932Smm DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1425211932Smm 1426168404Spjd ZFS_ENTER(zfsvfs); 1427185029Spjd ZFS_VERIFY_ZP(zdp); 1428168404Spjd 1429168404Spjd *vpp = NULL; 1430168404Spjd 1431185029Spjd if (flags & LOOKUP_XATTR) { 1432168404Spjd#ifdef TODO 1433168404Spjd /* 1434168404Spjd * If the xattr property is off, refuse the lookup request. 1435168404Spjd */ 1436168404Spjd if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1437168404Spjd ZFS_EXIT(zfsvfs); 1438249195Smm return (SET_ERROR(EINVAL)); 1439168404Spjd } 1440185029Spjd#endif 1441168404Spjd 1442168404Spjd /* 1443168404Spjd * We don't allow recursive attributes.. 1444168404Spjd * Maybe someday we will. 1445168404Spjd */ 1446219089Spjd if (zdp->z_pflags & ZFS_XATTR) { 1447168404Spjd ZFS_EXIT(zfsvfs); 1448249195Smm return (SET_ERROR(EINVAL)); 1449168404Spjd } 1450168404Spjd 1451168404Spjd if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1452168404Spjd ZFS_EXIT(zfsvfs); 1453168404Spjd return (error); 1454168404Spjd } 1455168404Spjd 1456168404Spjd /* 1457168404Spjd * Do we have permission to get into attribute directory? 1458168404Spjd */ 1459168404Spjd 1460185029Spjd if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1461185029Spjd B_FALSE, cr)) { 1462168404Spjd VN_RELE(*vpp); 1463185029Spjd *vpp = NULL; 1464168404Spjd } 1465168404Spjd 1466168404Spjd ZFS_EXIT(zfsvfs); 1467168404Spjd return (error); 1468168404Spjd } 1469168404Spjd 1470168404Spjd if (dvp->v_type != VDIR) { 1471168404Spjd ZFS_EXIT(zfsvfs); 1472249195Smm return (SET_ERROR(ENOTDIR)); 1473168404Spjd } 1474168404Spjd 1475168404Spjd /* 1476168404Spjd * Check accessibility of directory. 1477168404Spjd */ 1478168404Spjd 1479185029Spjd if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1480168404Spjd ZFS_EXIT(zfsvfs); 1481168404Spjd return (error); 1482168404Spjd } 1483168404Spjd 1484185029Spjd if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1485185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1486185029Spjd ZFS_EXIT(zfsvfs); 1487249195Smm return (SET_ERROR(EILSEQ)); 1488185029Spjd } 1489168404Spjd 1490185029Spjd error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1491211932Smm if (error == 0) 1492211932Smm error = specvp_check(vpp, cr); 1493168962Spjd 1494168404Spjd /* Translate errors and add SAVENAME when needed. */ 1495168404Spjd if (cnp->cn_flags & ISLASTCN) { 1496168404Spjd switch (nameiop) { 1497168404Spjd case CREATE: 1498168404Spjd case RENAME: 1499168404Spjd if (error == ENOENT) { 1500168404Spjd error = EJUSTRETURN; 1501168404Spjd cnp->cn_flags |= SAVENAME; 1502168404Spjd break; 1503168404Spjd } 1504168404Spjd /* FALLTHROUGH */ 1505168404Spjd case DELETE: 1506168404Spjd if (error == 0) 1507168404Spjd cnp->cn_flags |= SAVENAME; 1508168404Spjd break; 1509168404Spjd } 1510168404Spjd } 1511168404Spjd if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) { 1512169198Spjd int ltype = 0; 1513169198Spjd 1514169198Spjd if (cnp->cn_flags & ISDOTDOT) { 1515176559Sattilio ltype = VOP_ISLOCKED(dvp); 1516175294Sattilio VOP_UNLOCK(dvp, 0); 1517169198Spjd } 1518206667Spjd ZFS_EXIT(zfsvfs); 1519219089Spjd error = zfs_vnode_lock(*vpp, cnp->cn_lkflags); 1520168962Spjd if (cnp->cn_flags & ISDOTDOT) 1521175202Sattilio vn_lock(dvp, ltype | LK_RETRY); 1522169172Spjd if (error != 0) { 1523169172Spjd VN_RELE(*vpp); 1524169172Spjd *vpp = NULL; 1525169172Spjd return (error); 1526169172Spjd } 1527206667Spjd } else { 1528206667Spjd ZFS_EXIT(zfsvfs); 1529168404Spjd } 1530168404Spjd 1531168404Spjd#ifdef FREEBSD_NAMECACHE 1532168404Spjd /* 1533168404Spjd * Insert name into cache (as non-existent) if appropriate. 1534168404Spjd */ 1535168404Spjd if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 1536168404Spjd cache_enter(dvp, *vpp, cnp); 1537169170Spjd /* 1538169170Spjd * Insert name into cache if appropriate. 1539169170Spjd */ 1540168404Spjd if (error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1541168404Spjd if (!(cnp->cn_flags & ISLASTCN) || 1542168404Spjd (nameiop != DELETE && nameiop != RENAME)) { 1543168404Spjd cache_enter(dvp, *vpp, cnp); 1544168404Spjd } 1545168404Spjd } 1546168404Spjd#endif 1547168404Spjd 1548168404Spjd return (error); 1549168404Spjd} 1550168404Spjd 1551168404Spjd/* 1552168404Spjd * Attempt to create a new entry in a directory. If the entry 1553168404Spjd * already exists, truncate the file if permissible, else return 1554168404Spjd * an error. Return the vp of the created or trunc'd file. 1555168404Spjd * 1556168404Spjd * IN: dvp - vnode of directory to put new file entry in. 1557168404Spjd * name - name of new file entry. 1558168404Spjd * vap - attributes of new file. 1559168404Spjd * excl - flag indicating exclusive or non-exclusive mode. 1560168404Spjd * mode - mode to open file with. 1561168404Spjd * cr - credentials of caller. 1562168404Spjd * flag - large file flag [UNUSED]. 1563185029Spjd * ct - caller context 1564185029Spjd * vsecp - ACL to be set 1565168404Spjd * 1566168404Spjd * OUT: vpp - vnode of created or trunc'd entry. 1567168404Spjd * 1568168404Spjd * RETURN: 0 if success 1569168404Spjd * error code if failure 1570168404Spjd * 1571168404Spjd * Timestamps: 1572168404Spjd * dvp - ctime|mtime updated if new entry created 1573168404Spjd * vp - ctime|mtime always, atime if new 1574168404Spjd */ 1575185029Spjd 1576168404Spjd/* ARGSUSED */ 1577168404Spjdstatic int 1578168962Spjdzfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 1579185029Spjd vnode_t **vpp, cred_t *cr, kthread_t *td) 1580168404Spjd{ 1581168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1582168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1583185029Spjd zilog_t *zilog; 1584185029Spjd objset_t *os; 1585168404Spjd zfs_dirlock_t *dl; 1586168404Spjd dmu_tx_t *tx; 1587168404Spjd int error; 1588209962Smm ksid_t *ksid; 1589209962Smm uid_t uid; 1590209962Smm gid_t gid = crgetgid(cr); 1591219089Spjd zfs_acl_ids_t acl_ids; 1592209962Smm boolean_t fuid_dirtied; 1593219089Spjd boolean_t have_acl = B_FALSE; 1594185029Spjd void *vsecp = NULL; 1595185029Spjd int flag = 0; 1596168404Spjd 1597185029Spjd /* 1598185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 1599185029Spjd * make sure file system is at proper version 1600185029Spjd */ 1601185029Spjd 1602209962Smm ksid = crgetsid(cr, KSID_OWNER); 1603209962Smm if (ksid) 1604209962Smm uid = ksid_getid(ksid); 1605209962Smm else 1606209962Smm uid = crgetuid(cr); 1607219089Spjd 1608185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 1609185029Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 1610219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1611249195Smm return (SET_ERROR(EINVAL)); 1612185029Spjd 1613168404Spjd ZFS_ENTER(zfsvfs); 1614185029Spjd ZFS_VERIFY_ZP(dzp); 1615185029Spjd os = zfsvfs->z_os; 1616185029Spjd zilog = zfsvfs->z_log; 1617168404Spjd 1618185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1619185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1620185029Spjd ZFS_EXIT(zfsvfs); 1621249195Smm return (SET_ERROR(EILSEQ)); 1622185029Spjd } 1623185029Spjd 1624185029Spjd if (vap->va_mask & AT_XVATTR) { 1625197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1626185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 1627185029Spjd ZFS_EXIT(zfsvfs); 1628185029Spjd return (error); 1629185029Spjd } 1630185029Spjd } 1631168404Spjdtop: 1632168404Spjd *vpp = NULL; 1633168404Spjd 1634182905Strasz if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1635182905Strasz vap->va_mode &= ~S_ISVTX; 1636168404Spjd 1637168404Spjd if (*name == '\0') { 1638168404Spjd /* 1639168404Spjd * Null component name refers to the directory itself. 1640168404Spjd */ 1641168404Spjd VN_HOLD(dvp); 1642168404Spjd zp = dzp; 1643168404Spjd dl = NULL; 1644168404Spjd error = 0; 1645168404Spjd } else { 1646168404Spjd /* possible VN_HOLD(zp) */ 1647185029Spjd int zflg = 0; 1648185029Spjd 1649185029Spjd if (flag & FIGNORECASE) 1650185029Spjd zflg |= ZCILOOK; 1651185029Spjd 1652185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1653185029Spjd NULL, NULL); 1654185029Spjd if (error) { 1655219089Spjd if (have_acl) 1656219089Spjd zfs_acl_ids_free(&acl_ids); 1657168404Spjd if (strcmp(name, "..") == 0) 1658249195Smm error = SET_ERROR(EISDIR); 1659168404Spjd ZFS_EXIT(zfsvfs); 1660168404Spjd return (error); 1661168404Spjd } 1662168404Spjd } 1663219089Spjd 1664185029Spjd if (zp == NULL) { 1665185029Spjd uint64_t txtype; 1666168404Spjd 1667168404Spjd /* 1668168404Spjd * Create a new file object and update the directory 1669168404Spjd * to reference it. 1670168404Spjd */ 1671185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1672219089Spjd if (have_acl) 1673219089Spjd zfs_acl_ids_free(&acl_ids); 1674168404Spjd goto out; 1675168404Spjd } 1676168404Spjd 1677168404Spjd /* 1678168404Spjd * We only support the creation of regular files in 1679168404Spjd * extended attribute directories. 1680168404Spjd */ 1681219089Spjd 1682219089Spjd if ((dzp->z_pflags & ZFS_XATTR) && 1683168404Spjd (vap->va_type != VREG)) { 1684219089Spjd if (have_acl) 1685219089Spjd zfs_acl_ids_free(&acl_ids); 1686249195Smm error = SET_ERROR(EINVAL); 1687168404Spjd goto out; 1688168404Spjd } 1689168404Spjd 1690219089Spjd if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, 1691219089Spjd cr, vsecp, &acl_ids)) != 0) 1692219089Spjd goto out; 1693219089Spjd have_acl = B_TRUE; 1694209962Smm 1695209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1696211932Smm zfs_acl_ids_free(&acl_ids); 1697249195Smm error = SET_ERROR(EDQUOT); 1698209962Smm goto out; 1699209962Smm } 1700209962Smm 1701168404Spjd tx = dmu_tx_create(os); 1702219089Spjd 1703219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1704219089Spjd ZFS_SA_BASE_ATTR_SIZE); 1705219089Spjd 1706209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 1707209962Smm if (fuid_dirtied) 1708209962Smm zfs_fuid_txhold(zfsvfs, tx); 1709168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1710219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 1711219089Spjd if (!zfsvfs->z_use_sa && 1712219089Spjd acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1713168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1714219089Spjd 0, acl_ids.z_aclp->z_acl_bytes); 1715185029Spjd } 1716209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 1717168404Spjd if (error) { 1718168404Spjd zfs_dirent_unlock(dl); 1719209962Smm if (error == ERESTART) { 1720168404Spjd dmu_tx_wait(tx); 1721168404Spjd dmu_tx_abort(tx); 1722168404Spjd goto top; 1723168404Spjd } 1724219089Spjd zfs_acl_ids_free(&acl_ids); 1725168404Spjd dmu_tx_abort(tx); 1726168404Spjd ZFS_EXIT(zfsvfs); 1727168404Spjd return (error); 1728168404Spjd } 1729219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1730209962Smm 1731209962Smm if (fuid_dirtied) 1732209962Smm zfs_fuid_sync(zfsvfs, tx); 1733209962Smm 1734168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 1735185029Spjd txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1736185029Spjd if (flag & FIGNORECASE) 1737185029Spjd txtype |= TX_CI; 1738185029Spjd zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1739209962Smm vsecp, acl_ids.z_fuidp, vap); 1740209962Smm zfs_acl_ids_free(&acl_ids); 1741168404Spjd dmu_tx_commit(tx); 1742168404Spjd } else { 1743185029Spjd int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1744185029Spjd 1745219089Spjd if (have_acl) 1746219089Spjd zfs_acl_ids_free(&acl_ids); 1747219089Spjd have_acl = B_FALSE; 1748219089Spjd 1749168404Spjd /* 1750168404Spjd * A directory entry already exists for this name. 1751168404Spjd */ 1752168404Spjd /* 1753168962Spjd * Can't truncate an existing file if in exclusive mode. 1754168962Spjd */ 1755168962Spjd if (excl == EXCL) { 1756249195Smm error = SET_ERROR(EEXIST); 1757168962Spjd goto out; 1758168962Spjd } 1759168962Spjd /* 1760168404Spjd * Can't open a directory for writing. 1761168404Spjd */ 1762168404Spjd if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1763249195Smm error = SET_ERROR(EISDIR); 1764168404Spjd goto out; 1765168404Spjd } 1766168404Spjd /* 1767168404Spjd * Verify requested access to file. 1768168404Spjd */ 1769185029Spjd if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1770168404Spjd goto out; 1771168404Spjd } 1772168404Spjd 1773168404Spjd mutex_enter(&dzp->z_lock); 1774168404Spjd dzp->z_seq++; 1775168404Spjd mutex_exit(&dzp->z_lock); 1776168404Spjd 1777168404Spjd /* 1778168404Spjd * Truncate regular files if requested. 1779168404Spjd */ 1780168404Spjd if ((ZTOV(zp)->v_type == VREG) && 1781168404Spjd (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1782185029Spjd /* we can't hold any locks when calling zfs_freesp() */ 1783185029Spjd zfs_dirent_unlock(dl); 1784185029Spjd dl = NULL; 1785168404Spjd error = zfs_freesp(zp, 0, 0, mode, TRUE); 1786185029Spjd if (error == 0) { 1787185029Spjd vnevent_create(ZTOV(zp), ct); 1788168404Spjd } 1789168404Spjd } 1790168404Spjd } 1791168404Spjdout: 1792168404Spjd if (dl) 1793168404Spjd zfs_dirent_unlock(dl); 1794168404Spjd 1795168404Spjd if (error) { 1796168404Spjd if (zp) 1797168404Spjd VN_RELE(ZTOV(zp)); 1798168962Spjd } else { 1799168962Spjd *vpp = ZTOV(zp); 1800211932Smm error = specvp_check(vpp, cr); 1801168404Spjd } 1802168404Spjd 1803219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1804219089Spjd zil_commit(zilog, 0); 1805219089Spjd 1806168404Spjd ZFS_EXIT(zfsvfs); 1807168404Spjd return (error); 1808168404Spjd} 1809168404Spjd 1810168404Spjd/* 1811168404Spjd * Remove an entry from a directory. 1812168404Spjd * 1813168404Spjd * IN: dvp - vnode of directory to remove entry from. 1814168404Spjd * name - name of entry to remove. 1815168404Spjd * cr - credentials of caller. 1816185029Spjd * ct - caller context 1817185029Spjd * flags - case flags 1818168404Spjd * 1819168404Spjd * RETURN: 0 if success 1820168404Spjd * error code if failure 1821168404Spjd * 1822168404Spjd * Timestamps: 1823168404Spjd * dvp - ctime|mtime 1824168404Spjd * vp - ctime (if nlink > 0) 1825168404Spjd */ 1826219089Spjd 1827219089Spjduint64_t null_xattr = 0; 1828219089Spjd 1829185029Spjd/*ARGSUSED*/ 1830168404Spjdstatic int 1831185029Spjdzfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1832185029Spjd int flags) 1833168404Spjd{ 1834168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1835219089Spjd znode_t *xzp; 1836168404Spjd vnode_t *vp; 1837168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1838185029Spjd zilog_t *zilog; 1839168962Spjd uint64_t acl_obj, xattr_obj; 1840219089Spjd uint64_t xattr_obj_unlinked = 0; 1841219089Spjd uint64_t obj = 0; 1842168404Spjd zfs_dirlock_t *dl; 1843168404Spjd dmu_tx_t *tx; 1844168962Spjd boolean_t may_delete_now, delete_now = FALSE; 1845185029Spjd boolean_t unlinked, toobig = FALSE; 1846185029Spjd uint64_t txtype; 1847185029Spjd pathname_t *realnmp = NULL; 1848185029Spjd pathname_t realnm; 1849168404Spjd int error; 1850185029Spjd int zflg = ZEXISTS; 1851168404Spjd 1852168404Spjd ZFS_ENTER(zfsvfs); 1853185029Spjd ZFS_VERIFY_ZP(dzp); 1854185029Spjd zilog = zfsvfs->z_log; 1855168404Spjd 1856185029Spjd if (flags & FIGNORECASE) { 1857185029Spjd zflg |= ZCILOOK; 1858185029Spjd pn_alloc(&realnm); 1859185029Spjd realnmp = &realnm; 1860185029Spjd } 1861185029Spjd 1862168404Spjdtop: 1863219089Spjd xattr_obj = 0; 1864219089Spjd xzp = NULL; 1865168404Spjd /* 1866168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 1867168404Spjd */ 1868185029Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1869185029Spjd NULL, realnmp)) { 1870185029Spjd if (realnmp) 1871185029Spjd pn_free(realnmp); 1872168404Spjd ZFS_EXIT(zfsvfs); 1873168404Spjd return (error); 1874168404Spjd } 1875168404Spjd 1876168404Spjd vp = ZTOV(zp); 1877168404Spjd 1878168962Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1879168404Spjd goto out; 1880168962Spjd } 1881168404Spjd 1882168962Spjd /* 1883168962Spjd * Need to use rmdir for removing directories. 1884168962Spjd */ 1885168962Spjd if (vp->v_type == VDIR) { 1886249195Smm error = SET_ERROR(EPERM); 1887168962Spjd goto out; 1888168962Spjd } 1889168962Spjd 1890185029Spjd vnevent_remove(vp, dvp, name, ct); 1891168962Spjd 1892185029Spjd if (realnmp) 1893185029Spjd dnlc_remove(dvp, realnmp->pn_buf); 1894185029Spjd else 1895185029Spjd dnlc_remove(dvp, name); 1896168404Spjd 1897219089Spjd VI_LOCK(vp); 1898219089Spjd may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); 1899219089Spjd VI_UNLOCK(vp); 1900168962Spjd 1901168404Spjd /* 1902168404Spjd * We may delete the znode now, or we may put it in the unlinked set; 1903168404Spjd * it depends on whether we're the last link, and on whether there are 1904168404Spjd * other holds on the vnode. So we dmu_tx_hold() the right things to 1905168404Spjd * allow for either case. 1906168404Spjd */ 1907219089Spjd obj = zp->z_id; 1908168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1909168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1910219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1911219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1912219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 1913185029Spjd if (may_delete_now) { 1914185029Spjd toobig = 1915219089Spjd zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1916185029Spjd /* if the file is too big, only hold_free a token amount */ 1917185029Spjd dmu_tx_hold_free(tx, zp->z_id, 0, 1918185029Spjd (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1919185029Spjd } 1920168404Spjd 1921168404Spjd /* are there any extended attributes? */ 1922219089Spjd error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1923219089Spjd &xattr_obj, sizeof (xattr_obj)); 1924219089Spjd if (error == 0 && xattr_obj) { 1925219089Spjd error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1926240415Smm ASSERT0(error); 1927219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 1928219089Spjd dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1929168404Spjd } 1930168404Spjd 1931219089Spjd mutex_enter(&zp->z_lock); 1932219089Spjd if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) 1933168962Spjd dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 1934219089Spjd mutex_exit(&zp->z_lock); 1935168962Spjd 1936168404Spjd /* charge as an update -- would be nice not to charge at all */ 1937168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1938168404Spjd 1939209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 1940168404Spjd if (error) { 1941168404Spjd zfs_dirent_unlock(dl); 1942168962Spjd VN_RELE(vp); 1943219089Spjd if (xzp) 1944219089Spjd VN_RELE(ZTOV(xzp)); 1945209962Smm if (error == ERESTART) { 1946168404Spjd dmu_tx_wait(tx); 1947168404Spjd dmu_tx_abort(tx); 1948168404Spjd goto top; 1949168404Spjd } 1950185029Spjd if (realnmp) 1951185029Spjd pn_free(realnmp); 1952168404Spjd dmu_tx_abort(tx); 1953168404Spjd ZFS_EXIT(zfsvfs); 1954168404Spjd return (error); 1955168404Spjd } 1956168404Spjd 1957168404Spjd /* 1958168404Spjd * Remove the directory entry. 1959168404Spjd */ 1960185029Spjd error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1961168404Spjd 1962168404Spjd if (error) { 1963168404Spjd dmu_tx_commit(tx); 1964168404Spjd goto out; 1965168404Spjd } 1966168404Spjd 1967219089Spjd if (unlinked) { 1968219089Spjd 1969219089Spjd /* 1970219089Spjd * Hold z_lock so that we can make sure that the ACL obj 1971219089Spjd * hasn't changed. Could have been deleted due to 1972219089Spjd * zfs_sa_upgrade(). 1973219089Spjd */ 1974219089Spjd mutex_enter(&zp->z_lock); 1975168962Spjd VI_LOCK(vp); 1976219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1977219089Spjd &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); 1978185029Spjd delete_now = may_delete_now && !toobig && 1979168962Spjd vp->v_count == 1 && !vn_has_cached_data(vp) && 1980219089Spjd xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == 1981219089Spjd acl_obj; 1982168962Spjd VI_UNLOCK(vp); 1983168962Spjd } 1984168962Spjd 1985168962Spjd if (delete_now) { 1986243270Savg#ifdef __FreeBSD__ 1987243270Savg panic("zfs_remove: delete_now branch taken"); 1988243270Savg#endif 1989219089Spjd if (xattr_obj_unlinked) { 1990219089Spjd ASSERT3U(xzp->z_links, ==, 2); 1991168962Spjd mutex_enter(&xzp->z_lock); 1992168962Spjd xzp->z_unlinked = 1; 1993219089Spjd xzp->z_links = 0; 1994219089Spjd error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 1995219089Spjd &xzp->z_links, sizeof (xzp->z_links), tx); 1996219089Spjd ASSERT3U(error, ==, 0); 1997168962Spjd mutex_exit(&xzp->z_lock); 1998168962Spjd zfs_unlinked_add(xzp, tx); 1999219089Spjd 2000219089Spjd if (zp->z_is_sa) 2001219089Spjd error = sa_remove(zp->z_sa_hdl, 2002219089Spjd SA_ZPL_XATTR(zfsvfs), tx); 2003219089Spjd else 2004219089Spjd error = sa_update(zp->z_sa_hdl, 2005219089Spjd SA_ZPL_XATTR(zfsvfs), &null_xattr, 2006219089Spjd sizeof (uint64_t), tx); 2007240415Smm ASSERT0(error); 2008168962Spjd } 2009168962Spjd VI_LOCK(vp); 2010168962Spjd vp->v_count--; 2011240415Smm ASSERT0(vp->v_count); 2012168962Spjd VI_UNLOCK(vp); 2013168962Spjd mutex_exit(&zp->z_lock); 2014168962Spjd zfs_znode_delete(zp, tx); 2015168962Spjd } else if (unlinked) { 2016219089Spjd mutex_exit(&zp->z_lock); 2017168404Spjd zfs_unlinked_add(zp, tx); 2018243268Savg#ifdef __FreeBSD__ 2019243268Savg vp->v_vflag |= VV_NOSYNC; 2020243268Savg#endif 2021168962Spjd } 2022168404Spjd 2023185029Spjd txtype = TX_REMOVE; 2024185029Spjd if (flags & FIGNORECASE) 2025185029Spjd txtype |= TX_CI; 2026219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 2027168404Spjd 2028168404Spjd dmu_tx_commit(tx); 2029168404Spjdout: 2030185029Spjd if (realnmp) 2031185029Spjd pn_free(realnmp); 2032185029Spjd 2033168404Spjd zfs_dirent_unlock(dl); 2034168404Spjd 2035219089Spjd if (!delete_now) 2036168962Spjd VN_RELE(vp); 2037219089Spjd if (xzp) 2038168962Spjd VN_RELE(ZTOV(xzp)); 2039168962Spjd 2040219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2041219089Spjd zil_commit(zilog, 0); 2042219089Spjd 2043168404Spjd ZFS_EXIT(zfsvfs); 2044168404Spjd return (error); 2045168404Spjd} 2046168404Spjd 2047168404Spjd/* 2048168404Spjd * Create a new directory and insert it into dvp using the name 2049168404Spjd * provided. Return a pointer to the inserted directory. 2050168404Spjd * 2051168404Spjd * IN: dvp - vnode of directory to add subdir to. 2052168404Spjd * dirname - name of new directory. 2053168404Spjd * vap - attributes of new directory. 2054168404Spjd * cr - credentials of caller. 2055185029Spjd * ct - caller context 2056185029Spjd * vsecp - ACL to be set 2057168404Spjd * 2058168404Spjd * OUT: vpp - vnode of created directory. 2059168404Spjd * 2060168404Spjd * RETURN: 0 if success 2061168404Spjd * error code if failure 2062168404Spjd * 2063168404Spjd * Timestamps: 2064168404Spjd * dvp - ctime|mtime updated 2065168404Spjd * vp - ctime|mtime|atime updated 2066168404Spjd */ 2067185029Spjd/*ARGSUSED*/ 2068168404Spjdstatic int 2069185029Spjdzfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 2070185029Spjd caller_context_t *ct, int flags, vsecattr_t *vsecp) 2071168404Spjd{ 2072168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 2073168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2074185029Spjd zilog_t *zilog; 2075168404Spjd zfs_dirlock_t *dl; 2076185029Spjd uint64_t txtype; 2077168404Spjd dmu_tx_t *tx; 2078168404Spjd int error; 2079185029Spjd int zf = ZNEW; 2080209962Smm ksid_t *ksid; 2081209962Smm uid_t uid; 2082209962Smm gid_t gid = crgetgid(cr); 2083219089Spjd zfs_acl_ids_t acl_ids; 2084209962Smm boolean_t fuid_dirtied; 2085168404Spjd 2086168404Spjd ASSERT(vap->va_type == VDIR); 2087168404Spjd 2088185029Spjd /* 2089185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 2090185029Spjd * make sure file system is at proper version 2091185029Spjd */ 2092185029Spjd 2093209962Smm ksid = crgetsid(cr, KSID_OWNER); 2094209962Smm if (ksid) 2095209962Smm uid = ksid_getid(ksid); 2096209962Smm else 2097209962Smm uid = crgetuid(cr); 2098185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2099219089Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 2100219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2101249195Smm return (SET_ERROR(EINVAL)); 2102185029Spjd 2103168404Spjd ZFS_ENTER(zfsvfs); 2104185029Spjd ZFS_VERIFY_ZP(dzp); 2105185029Spjd zilog = zfsvfs->z_log; 2106168404Spjd 2107219089Spjd if (dzp->z_pflags & ZFS_XATTR) { 2108168404Spjd ZFS_EXIT(zfsvfs); 2109249195Smm return (SET_ERROR(EINVAL)); 2110168404Spjd } 2111168404Spjd 2112185029Spjd if (zfsvfs->z_utf8 && u8_validate(dirname, 2113185029Spjd strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2114185029Spjd ZFS_EXIT(zfsvfs); 2115249195Smm return (SET_ERROR(EILSEQ)); 2116185029Spjd } 2117185029Spjd if (flags & FIGNORECASE) 2118185029Spjd zf |= ZCILOOK; 2119185029Spjd 2120219089Spjd if (vap->va_mask & AT_XVATTR) { 2121197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2122185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 2123185029Spjd ZFS_EXIT(zfsvfs); 2124185029Spjd return (error); 2125185029Spjd } 2126219089Spjd } 2127185029Spjd 2128219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 2129219089Spjd vsecp, &acl_ids)) != 0) { 2130219089Spjd ZFS_EXIT(zfsvfs); 2131219089Spjd return (error); 2132219089Spjd } 2133168404Spjd /* 2134168404Spjd * First make sure the new directory doesn't exist. 2135219089Spjd * 2136219089Spjd * Existence is checked first to make sure we don't return 2137219089Spjd * EACCES instead of EEXIST which can cause some applications 2138219089Spjd * to fail. 2139168404Spjd */ 2140185029Spjdtop: 2141185029Spjd *vpp = NULL; 2142185029Spjd 2143185029Spjd if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 2144185029Spjd NULL, NULL)) { 2145219089Spjd zfs_acl_ids_free(&acl_ids); 2146168404Spjd ZFS_EXIT(zfsvfs); 2147168404Spjd return (error); 2148168404Spjd } 2149168404Spjd 2150185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 2151219089Spjd zfs_acl_ids_free(&acl_ids); 2152168404Spjd zfs_dirent_unlock(dl); 2153168404Spjd ZFS_EXIT(zfsvfs); 2154168404Spjd return (error); 2155168404Spjd } 2156168404Spjd 2157209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2158211932Smm zfs_acl_ids_free(&acl_ids); 2159209962Smm zfs_dirent_unlock(dl); 2160209962Smm ZFS_EXIT(zfsvfs); 2161249195Smm return (SET_ERROR(EDQUOT)); 2162209962Smm } 2163209962Smm 2164168404Spjd /* 2165168404Spjd * Add a new entry to the directory. 2166168404Spjd */ 2167168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2168168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 2169168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2170209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 2171209962Smm if (fuid_dirtied) 2172209962Smm zfs_fuid_txhold(zfsvfs, tx); 2173219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2174219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2175219089Spjd acl_ids.z_aclp->z_acl_bytes); 2176219089Spjd } 2177219089Spjd 2178219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2179219089Spjd ZFS_SA_BASE_ATTR_SIZE); 2180219089Spjd 2181209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 2182168404Spjd if (error) { 2183168404Spjd zfs_dirent_unlock(dl); 2184209962Smm if (error == ERESTART) { 2185168404Spjd dmu_tx_wait(tx); 2186168404Spjd dmu_tx_abort(tx); 2187168404Spjd goto top; 2188168404Spjd } 2189219089Spjd zfs_acl_ids_free(&acl_ids); 2190168404Spjd dmu_tx_abort(tx); 2191168404Spjd ZFS_EXIT(zfsvfs); 2192168404Spjd return (error); 2193168404Spjd } 2194168404Spjd 2195168404Spjd /* 2196168404Spjd * Create new node. 2197168404Spjd */ 2198219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2199168404Spjd 2200209962Smm if (fuid_dirtied) 2201209962Smm zfs_fuid_sync(zfsvfs, tx); 2202219089Spjd 2203168404Spjd /* 2204168404Spjd * Now put new name in parent dir. 2205168404Spjd */ 2206168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 2207168404Spjd 2208168404Spjd *vpp = ZTOV(zp); 2209168404Spjd 2210185029Spjd txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 2211185029Spjd if (flags & FIGNORECASE) 2212185029Spjd txtype |= TX_CI; 2213209962Smm zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 2214209962Smm acl_ids.z_fuidp, vap); 2215185029Spjd 2216209962Smm zfs_acl_ids_free(&acl_ids); 2217219089Spjd 2218168404Spjd dmu_tx_commit(tx); 2219168404Spjd 2220168404Spjd zfs_dirent_unlock(dl); 2221168404Spjd 2222219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2223219089Spjd zil_commit(zilog, 0); 2224219089Spjd 2225168404Spjd ZFS_EXIT(zfsvfs); 2226168404Spjd return (0); 2227168404Spjd} 2228168404Spjd 2229168404Spjd/* 2230168404Spjd * Remove a directory subdir entry. If the current working 2231168404Spjd * directory is the same as the subdir to be removed, the 2232168404Spjd * remove will fail. 2233168404Spjd * 2234168404Spjd * IN: dvp - vnode of directory to remove from. 2235168404Spjd * name - name of directory to be removed. 2236168404Spjd * cwd - vnode of current working directory. 2237168404Spjd * cr - credentials of caller. 2238185029Spjd * ct - caller context 2239185029Spjd * flags - case flags 2240168404Spjd * 2241168404Spjd * RETURN: 0 if success 2242168404Spjd * error code if failure 2243168404Spjd * 2244168404Spjd * Timestamps: 2245168404Spjd * dvp - ctime|mtime updated 2246168404Spjd */ 2247185029Spjd/*ARGSUSED*/ 2248168404Spjdstatic int 2249185029Spjdzfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 2250185029Spjd caller_context_t *ct, int flags) 2251168404Spjd{ 2252168404Spjd znode_t *dzp = VTOZ(dvp); 2253168404Spjd znode_t *zp; 2254168404Spjd vnode_t *vp; 2255168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2256185029Spjd zilog_t *zilog; 2257168404Spjd zfs_dirlock_t *dl; 2258168404Spjd dmu_tx_t *tx; 2259168404Spjd int error; 2260185029Spjd int zflg = ZEXISTS; 2261168404Spjd 2262168962Spjd ZFS_ENTER(zfsvfs); 2263185029Spjd ZFS_VERIFY_ZP(dzp); 2264185029Spjd zilog = zfsvfs->z_log; 2265168404Spjd 2266185029Spjd if (flags & FIGNORECASE) 2267185029Spjd zflg |= ZCILOOK; 2268168404Spjdtop: 2269168404Spjd zp = NULL; 2270168404Spjd 2271168404Spjd /* 2272168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 2273168404Spjd */ 2274185029Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 2275185029Spjd NULL, NULL)) { 2276168404Spjd ZFS_EXIT(zfsvfs); 2277168404Spjd return (error); 2278168404Spjd } 2279168404Spjd 2280168404Spjd vp = ZTOV(zp); 2281168404Spjd 2282168404Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2283168404Spjd goto out; 2284168404Spjd } 2285168404Spjd 2286168962Spjd if (vp->v_type != VDIR) { 2287249195Smm error = SET_ERROR(ENOTDIR); 2288168962Spjd goto out; 2289168962Spjd } 2290168962Spjd 2291168962Spjd if (vp == cwd) { 2292249195Smm error = SET_ERROR(EINVAL); 2293168962Spjd goto out; 2294168962Spjd } 2295168962Spjd 2296185029Spjd vnevent_rmdir(vp, dvp, name, ct); 2297168962Spjd 2298168404Spjd /* 2299168404Spjd * Grab a lock on the directory to make sure that noone is 2300168404Spjd * trying to add (or lookup) entries while we are removing it. 2301168404Spjd */ 2302168404Spjd rw_enter(&zp->z_name_lock, RW_WRITER); 2303168404Spjd 2304168404Spjd /* 2305168404Spjd * Grab a lock on the parent pointer to make sure we play well 2306168404Spjd * with the treewalk and directory rename code. 2307168404Spjd */ 2308168404Spjd rw_enter(&zp->z_parent_lock, RW_WRITER); 2309168404Spjd 2310168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2311168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2312219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2313168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2314219089Spjd zfs_sa_upgrade_txholds(tx, zp); 2315219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 2316209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 2317168404Spjd if (error) { 2318168404Spjd rw_exit(&zp->z_parent_lock); 2319168404Spjd rw_exit(&zp->z_name_lock); 2320168404Spjd zfs_dirent_unlock(dl); 2321168962Spjd VN_RELE(vp); 2322209962Smm if (error == ERESTART) { 2323168404Spjd dmu_tx_wait(tx); 2324168404Spjd dmu_tx_abort(tx); 2325168404Spjd goto top; 2326168404Spjd } 2327168404Spjd dmu_tx_abort(tx); 2328168404Spjd ZFS_EXIT(zfsvfs); 2329168404Spjd return (error); 2330168404Spjd } 2331168404Spjd 2332168404Spjd#ifdef FREEBSD_NAMECACHE 2333168404Spjd cache_purge(dvp); 2334168404Spjd#endif 2335168404Spjd 2336185029Spjd error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 2337168404Spjd 2338185029Spjd if (error == 0) { 2339185029Spjd uint64_t txtype = TX_RMDIR; 2340185029Spjd if (flags & FIGNORECASE) 2341185029Spjd txtype |= TX_CI; 2342219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2343185029Spjd } 2344168404Spjd 2345168404Spjd dmu_tx_commit(tx); 2346168404Spjd 2347168404Spjd rw_exit(&zp->z_parent_lock); 2348168404Spjd rw_exit(&zp->z_name_lock); 2349168404Spjd#ifdef FREEBSD_NAMECACHE 2350168404Spjd cache_purge(vp); 2351168404Spjd#endif 2352168404Spjdout: 2353168404Spjd zfs_dirent_unlock(dl); 2354168404Spjd 2355168962Spjd VN_RELE(vp); 2356168962Spjd 2357219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2358219089Spjd zil_commit(zilog, 0); 2359219089Spjd 2360168404Spjd ZFS_EXIT(zfsvfs); 2361168404Spjd return (error); 2362168404Spjd} 2363168404Spjd 2364168404Spjd/* 2365168404Spjd * Read as many directory entries as will fit into the provided 2366168404Spjd * buffer from the given directory cursor position (specified in 2367168404Spjd * the uio structure. 2368168404Spjd * 2369168404Spjd * IN: vp - vnode of directory to read. 2370168404Spjd * uio - structure supplying read location, range info, 2371168404Spjd * and return buffer. 2372168404Spjd * cr - credentials of caller. 2373185029Spjd * ct - caller context 2374185029Spjd * flags - case flags 2375168404Spjd * 2376168404Spjd * OUT: uio - updated offset and range, buffer filled. 2377168404Spjd * eofp - set to true if end-of-file detected. 2378168404Spjd * 2379168404Spjd * RETURN: 0 if success 2380168404Spjd * error code if failure 2381168404Spjd * 2382168404Spjd * Timestamps: 2383168404Spjd * vp - atime updated 2384168404Spjd * 2385168404Spjd * Note that the low 4 bits of the cookie returned by zap is always zero. 2386168404Spjd * This allows us to use the low range for "special" directory entries: 2387168404Spjd * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2388168404Spjd * we use the offset 2 for the '.zfs' directory. 2389168404Spjd */ 2390168404Spjd/* ARGSUSED */ 2391168404Spjdstatic int 2392168962Spjdzfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) 2393168404Spjd{ 2394168404Spjd znode_t *zp = VTOZ(vp); 2395168404Spjd iovec_t *iovp; 2396185029Spjd edirent_t *eodp; 2397168404Spjd dirent64_t *odp; 2398168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2399168404Spjd objset_t *os; 2400168404Spjd caddr_t outbuf; 2401168404Spjd size_t bufsize; 2402168404Spjd zap_cursor_t zc; 2403168404Spjd zap_attribute_t zap; 2404168404Spjd uint_t bytes_wanted; 2405168404Spjd uint64_t offset; /* must be unsigned; checks for < 1 */ 2406219089Spjd uint64_t parent; 2407168404Spjd int local_eof; 2408168404Spjd int outcount; 2409168404Spjd int error; 2410168404Spjd uint8_t prefetch; 2411185029Spjd boolean_t check_sysattrs; 2412168404Spjd uint8_t type; 2413168962Spjd int ncooks; 2414168962Spjd u_long *cooks = NULL; 2415185029Spjd int flags = 0; 2416168404Spjd 2417168404Spjd ZFS_ENTER(zfsvfs); 2418185029Spjd ZFS_VERIFY_ZP(zp); 2419168404Spjd 2420219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 2421219089Spjd &parent, sizeof (parent))) != 0) { 2422219089Spjd ZFS_EXIT(zfsvfs); 2423219089Spjd return (error); 2424219089Spjd } 2425219089Spjd 2426168404Spjd /* 2427168404Spjd * If we are not given an eof variable, 2428168404Spjd * use a local one. 2429168404Spjd */ 2430168404Spjd if (eofp == NULL) 2431168404Spjd eofp = &local_eof; 2432168404Spjd 2433168404Spjd /* 2434168404Spjd * Check for valid iov_len. 2435168404Spjd */ 2436168404Spjd if (uio->uio_iov->iov_len <= 0) { 2437168404Spjd ZFS_EXIT(zfsvfs); 2438249195Smm return (SET_ERROR(EINVAL)); 2439168404Spjd } 2440168404Spjd 2441168404Spjd /* 2442168404Spjd * Quit if directory has been removed (posix) 2443168404Spjd */ 2444168404Spjd if ((*eofp = zp->z_unlinked) != 0) { 2445168404Spjd ZFS_EXIT(zfsvfs); 2446168404Spjd return (0); 2447168404Spjd } 2448168404Spjd 2449168404Spjd error = 0; 2450168404Spjd os = zfsvfs->z_os; 2451168404Spjd offset = uio->uio_loffset; 2452168404Spjd prefetch = zp->z_zn_prefetch; 2453168404Spjd 2454168404Spjd /* 2455168404Spjd * Initialize the iterator cursor. 2456168404Spjd */ 2457168404Spjd if (offset <= 3) { 2458168404Spjd /* 2459168404Spjd * Start iteration from the beginning of the directory. 2460168404Spjd */ 2461168404Spjd zap_cursor_init(&zc, os, zp->z_id); 2462168404Spjd } else { 2463168404Spjd /* 2464168404Spjd * The offset is a serialized cursor. 2465168404Spjd */ 2466168404Spjd zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2467168404Spjd } 2468168404Spjd 2469168404Spjd /* 2470168404Spjd * Get space to change directory entries into fs independent format. 2471168404Spjd */ 2472168404Spjd iovp = uio->uio_iov; 2473168404Spjd bytes_wanted = iovp->iov_len; 2474168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2475168404Spjd bufsize = bytes_wanted; 2476168404Spjd outbuf = kmem_alloc(bufsize, KM_SLEEP); 2477168404Spjd odp = (struct dirent64 *)outbuf; 2478168404Spjd } else { 2479168404Spjd bufsize = bytes_wanted; 2480247187Smm outbuf = NULL; 2481168404Spjd odp = (struct dirent64 *)iovp->iov_base; 2482168404Spjd } 2483185029Spjd eodp = (struct edirent *)odp; 2484168404Spjd 2485169170Spjd if (ncookies != NULL) { 2486168404Spjd /* 2487168404Spjd * Minimum entry size is dirent size and 1 byte for a file name. 2488168404Spjd */ 2489168962Spjd ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2490219404Spjd cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2491219404Spjd *cookies = cooks; 2492168962Spjd *ncookies = ncooks; 2493168404Spjd } 2494185029Spjd /* 2495185029Spjd * If this VFS supports the system attribute view interface; and 2496185029Spjd * we're looking at an extended attribute directory; and we care 2497185029Spjd * about normalization conflicts on this vfs; then we must check 2498185029Spjd * for normalization conflicts with the sysattr name space. 2499185029Spjd */ 2500185029Spjd#ifdef TODO 2501185029Spjd check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2502185029Spjd (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2503185029Spjd (flags & V_RDDIR_ENTFLAGS); 2504185029Spjd#else 2505185029Spjd check_sysattrs = 0; 2506185029Spjd#endif 2507168404Spjd 2508168404Spjd /* 2509168404Spjd * Transform to file-system independent format 2510168404Spjd */ 2511168404Spjd outcount = 0; 2512168404Spjd while (outcount < bytes_wanted) { 2513168404Spjd ino64_t objnum; 2514168404Spjd ushort_t reclen; 2515219089Spjd off64_t *next = NULL; 2516168404Spjd 2517168404Spjd /* 2518168404Spjd * Special case `.', `..', and `.zfs'. 2519168404Spjd */ 2520168404Spjd if (offset == 0) { 2521168404Spjd (void) strcpy(zap.za_name, "."); 2522185029Spjd zap.za_normalization_conflict = 0; 2523168404Spjd objnum = zp->z_id; 2524169108Spjd type = DT_DIR; 2525168404Spjd } else if (offset == 1) { 2526168404Spjd (void) strcpy(zap.za_name, ".."); 2527185029Spjd zap.za_normalization_conflict = 0; 2528219089Spjd objnum = parent; 2529169108Spjd type = DT_DIR; 2530168404Spjd } else if (offset == 2 && zfs_show_ctldir(zp)) { 2531168404Spjd (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2532185029Spjd zap.za_normalization_conflict = 0; 2533168404Spjd objnum = ZFSCTL_INO_ROOT; 2534169108Spjd type = DT_DIR; 2535168404Spjd } else { 2536168404Spjd /* 2537168404Spjd * Grab next entry. 2538168404Spjd */ 2539168404Spjd if (error = zap_cursor_retrieve(&zc, &zap)) { 2540168404Spjd if ((*eofp = (error == ENOENT)) != 0) 2541168404Spjd break; 2542168404Spjd else 2543168404Spjd goto update; 2544168404Spjd } 2545168404Spjd 2546168404Spjd if (zap.za_integer_length != 8 || 2547168404Spjd zap.za_num_integers != 1) { 2548168404Spjd cmn_err(CE_WARN, "zap_readdir: bad directory " 2549168404Spjd "entry, obj = %lld, offset = %lld\n", 2550168404Spjd (u_longlong_t)zp->z_id, 2551168404Spjd (u_longlong_t)offset); 2552249195Smm error = SET_ERROR(ENXIO); 2553168404Spjd goto update; 2554168404Spjd } 2555168404Spjd 2556168404Spjd objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2557168404Spjd /* 2558168404Spjd * MacOS X can extract the object type here such as: 2559168404Spjd * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2560168404Spjd */ 2561168404Spjd type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2562185029Spjd 2563185029Spjd if (check_sysattrs && !zap.za_normalization_conflict) { 2564185029Spjd#ifdef TODO 2565185029Spjd zap.za_normalization_conflict = 2566185029Spjd xattr_sysattr_casechk(zap.za_name); 2567185029Spjd#else 2568185029Spjd panic("%s:%u: TODO", __func__, __LINE__); 2569185029Spjd#endif 2570185029Spjd } 2571168404Spjd } 2572168404Spjd 2573211932Smm if (flags & V_RDDIR_ACCFILTER) { 2574211932Smm /* 2575211932Smm * If we have no access at all, don't include 2576211932Smm * this entry in the returned information 2577211932Smm */ 2578211932Smm znode_t *ezp; 2579211932Smm if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2580211932Smm goto skip_entry; 2581211932Smm if (!zfs_has_access(ezp, cr)) { 2582211932Smm VN_RELE(ZTOV(ezp)); 2583211932Smm goto skip_entry; 2584211932Smm } 2585211932Smm VN_RELE(ZTOV(ezp)); 2586211932Smm } 2587211932Smm 2588185029Spjd if (flags & V_RDDIR_ENTFLAGS) 2589185029Spjd reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2590185029Spjd else 2591185029Spjd reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2592185029Spjd 2593168404Spjd /* 2594168404Spjd * Will this entry fit in the buffer? 2595168404Spjd */ 2596168404Spjd if (outcount + reclen > bufsize) { 2597168404Spjd /* 2598168404Spjd * Did we manage to fit anything in the buffer? 2599168404Spjd */ 2600168404Spjd if (!outcount) { 2601249195Smm error = SET_ERROR(EINVAL); 2602168404Spjd goto update; 2603168404Spjd } 2604168404Spjd break; 2605168404Spjd } 2606185029Spjd if (flags & V_RDDIR_ENTFLAGS) { 2607185029Spjd /* 2608185029Spjd * Add extended flag entry: 2609185029Spjd */ 2610185029Spjd eodp->ed_ino = objnum; 2611185029Spjd eodp->ed_reclen = reclen; 2612185029Spjd /* NOTE: ed_off is the offset for the *next* entry */ 2613185029Spjd next = &(eodp->ed_off); 2614185029Spjd eodp->ed_eflags = zap.za_normalization_conflict ? 2615185029Spjd ED_CASE_CONFLICT : 0; 2616185029Spjd (void) strncpy(eodp->ed_name, zap.za_name, 2617185029Spjd EDIRENT_NAMELEN(reclen)); 2618185029Spjd eodp = (edirent_t *)((intptr_t)eodp + reclen); 2619185029Spjd } else { 2620185029Spjd /* 2621185029Spjd * Add normal entry: 2622185029Spjd */ 2623185029Spjd odp->d_ino = objnum; 2624185029Spjd odp->d_reclen = reclen; 2625185029Spjd odp->d_namlen = strlen(zap.za_name); 2626185029Spjd (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2627185029Spjd odp->d_type = type; 2628185029Spjd odp = (dirent64_t *)((intptr_t)odp + reclen); 2629185029Spjd } 2630168404Spjd outcount += reclen; 2631168404Spjd 2632168404Spjd ASSERT(outcount <= bufsize); 2633168404Spjd 2634168404Spjd /* Prefetch znode */ 2635168404Spjd if (prefetch) 2636168404Spjd dmu_prefetch(os, objnum, 0, 0); 2637168404Spjd 2638211932Smm skip_entry: 2639168404Spjd /* 2640168404Spjd * Move to the next entry, fill in the previous offset. 2641168404Spjd */ 2642168404Spjd if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2643168404Spjd zap_cursor_advance(&zc); 2644168404Spjd offset = zap_cursor_serialize(&zc); 2645168404Spjd } else { 2646168404Spjd offset += 1; 2647168404Spjd } 2648219404Spjd 2649219404Spjd if (cooks != NULL) { 2650219404Spjd *cooks++ = offset; 2651219404Spjd ncooks--; 2652219404Spjd KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2653219404Spjd } 2654168404Spjd } 2655168404Spjd zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2656168404Spjd 2657168404Spjd /* Subtract unused cookies */ 2658168962Spjd if (ncookies != NULL) 2659168962Spjd *ncookies -= ncooks; 2660168404Spjd 2661168404Spjd if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2662168404Spjd iovp->iov_base += outcount; 2663168404Spjd iovp->iov_len -= outcount; 2664168404Spjd uio->uio_resid -= outcount; 2665168404Spjd } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2666168404Spjd /* 2667168404Spjd * Reset the pointer. 2668168404Spjd */ 2669168404Spjd offset = uio->uio_loffset; 2670168404Spjd } 2671168404Spjd 2672168404Spjdupdate: 2673168404Spjd zap_cursor_fini(&zc); 2674168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2675168404Spjd kmem_free(outbuf, bufsize); 2676168404Spjd 2677168404Spjd if (error == ENOENT) 2678168404Spjd error = 0; 2679168404Spjd 2680168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2681168404Spjd 2682168404Spjd uio->uio_loffset = offset; 2683168404Spjd ZFS_EXIT(zfsvfs); 2684169107Spjd if (error != 0 && cookies != NULL) { 2685168962Spjd free(*cookies, M_TEMP); 2686168962Spjd *cookies = NULL; 2687168962Spjd *ncookies = 0; 2688168404Spjd } 2689168404Spjd return (error); 2690168404Spjd} 2691168404Spjd 2692185029Spjdulong_t zfs_fsync_sync_cnt = 4; 2693185029Spjd 2694168404Spjdstatic int 2695185029Spjdzfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2696168404Spjd{ 2697168962Spjd znode_t *zp = VTOZ(vp); 2698168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2699168404Spjd 2700185029Spjd (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2701185029Spjd 2702219089Spjd if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 2703219089Spjd ZFS_ENTER(zfsvfs); 2704219089Spjd ZFS_VERIFY_ZP(zp); 2705219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 2706219089Spjd ZFS_EXIT(zfsvfs); 2707219089Spjd } 2708168404Spjd return (0); 2709168404Spjd} 2710168404Spjd 2711185029Spjd 2712168404Spjd/* 2713168404Spjd * Get the requested file attributes and place them in the provided 2714168404Spjd * vattr structure. 2715168404Spjd * 2716168404Spjd * IN: vp - vnode of file. 2717168404Spjd * vap - va_mask identifies requested attributes. 2718185029Spjd * If AT_XVATTR set, then optional attrs are requested 2719185029Spjd * flags - ATTR_NOACLCHECK (CIFS server context) 2720168404Spjd * cr - credentials of caller. 2721185029Spjd * ct - caller context 2722168404Spjd * 2723168404Spjd * OUT: vap - attribute values. 2724168404Spjd * 2725168404Spjd * RETURN: 0 (always succeeds) 2726168404Spjd */ 2727168404Spjd/* ARGSUSED */ 2728168404Spjdstatic int 2729185029Spjdzfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2730185029Spjd caller_context_t *ct) 2731168404Spjd{ 2732168962Spjd znode_t *zp = VTOZ(vp); 2733168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2734185029Spjd int error = 0; 2735168962Spjd uint32_t blksize; 2736168962Spjd u_longlong_t nblocks; 2737185029Spjd uint64_t links; 2738224251Sdelphij uint64_t mtime[2], ctime[2], crtime[2], rdev; 2739185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2740185029Spjd xoptattr_t *xoap = NULL; 2741185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2742224251Sdelphij sa_bulk_attr_t bulk[4]; 2743219089Spjd int count = 0; 2744168404Spjd 2745168404Spjd ZFS_ENTER(zfsvfs); 2746185029Spjd ZFS_VERIFY_ZP(zp); 2747168404Spjd 2748219089Spjd zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2749219089Spjd 2750219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 2751219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 2752243807Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 2753224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2754224251Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 2755224251Sdelphij &rdev, 8); 2756219089Spjd 2757219089Spjd if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 2758219089Spjd ZFS_EXIT(zfsvfs); 2759219089Spjd return (error); 2760219089Spjd } 2761219089Spjd 2762168404Spjd /* 2763185029Spjd * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2764185029Spjd * Also, if we are the owner don't bother, since owner should 2765185029Spjd * always be allowed to read basic attributes of file. 2766185029Spjd */ 2767219089Spjd if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2768219089Spjd (vap->va_uid != crgetuid(cr))) { 2769185029Spjd if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2770185029Spjd skipaclchk, cr)) { 2771185029Spjd ZFS_EXIT(zfsvfs); 2772185029Spjd return (error); 2773185029Spjd } 2774185029Spjd } 2775185029Spjd 2776185029Spjd /* 2777168404Spjd * Return all attributes. It's cheaper to provide the answer 2778168404Spjd * than to determine whether we were asked the question. 2779168404Spjd */ 2780168404Spjd 2781209097Smm mutex_enter(&zp->z_lock); 2782219089Spjd vap->va_type = IFTOVT(zp->z_mode); 2783219089Spjd vap->va_mode = zp->z_mode & ~S_IFMT; 2784224252Sdelphij#ifdef sun 2785224252Sdelphij vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2786224252Sdelphij#else 2787224252Sdelphij vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 2788224252Sdelphij#endif 2789168404Spjd vap->va_nodeid = zp->z_id; 2790185029Spjd if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2791219089Spjd links = zp->z_links + 1; 2792185029Spjd else 2793219089Spjd links = zp->z_links; 2794229425Sdim vap->va_nlink = MIN(links, LINK_MAX); /* nlink_t limit! */ 2795219089Spjd vap->va_size = zp->z_size; 2796224252Sdelphij#ifdef sun 2797224252Sdelphij vap->va_rdev = vp->v_rdev; 2798224252Sdelphij#else 2799224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2800224251Sdelphij vap->va_rdev = zfs_cmpldev(rdev); 2801224252Sdelphij#endif 2802168404Spjd vap->va_seq = zp->z_seq; 2803168404Spjd vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2804168404Spjd 2805185029Spjd /* 2806185029Spjd * Add in any requested optional attributes and the create time. 2807185029Spjd * Also set the corresponding bits in the returned attribute bitmap. 2808185029Spjd */ 2809185029Spjd if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2810185029Spjd if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2811185029Spjd xoap->xoa_archive = 2812219089Spjd ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2813185029Spjd XVA_SET_RTN(xvap, XAT_ARCHIVE); 2814185029Spjd } 2815185029Spjd 2816185029Spjd if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2817185029Spjd xoap->xoa_readonly = 2818219089Spjd ((zp->z_pflags & ZFS_READONLY) != 0); 2819185029Spjd XVA_SET_RTN(xvap, XAT_READONLY); 2820185029Spjd } 2821185029Spjd 2822185029Spjd if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2823185029Spjd xoap->xoa_system = 2824219089Spjd ((zp->z_pflags & ZFS_SYSTEM) != 0); 2825185029Spjd XVA_SET_RTN(xvap, XAT_SYSTEM); 2826185029Spjd } 2827185029Spjd 2828185029Spjd if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2829185029Spjd xoap->xoa_hidden = 2830219089Spjd ((zp->z_pflags & ZFS_HIDDEN) != 0); 2831185029Spjd XVA_SET_RTN(xvap, XAT_HIDDEN); 2832185029Spjd } 2833185029Spjd 2834185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2835185029Spjd xoap->xoa_nounlink = 2836219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2837185029Spjd XVA_SET_RTN(xvap, XAT_NOUNLINK); 2838185029Spjd } 2839185029Spjd 2840185029Spjd if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2841185029Spjd xoap->xoa_immutable = 2842219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2843185029Spjd XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2844185029Spjd } 2845185029Spjd 2846185029Spjd if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2847185029Spjd xoap->xoa_appendonly = 2848219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2849185029Spjd XVA_SET_RTN(xvap, XAT_APPENDONLY); 2850185029Spjd } 2851185029Spjd 2852185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2853185029Spjd xoap->xoa_nodump = 2854219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0); 2855185029Spjd XVA_SET_RTN(xvap, XAT_NODUMP); 2856185029Spjd } 2857185029Spjd 2858185029Spjd if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2859185029Spjd xoap->xoa_opaque = 2860219089Spjd ((zp->z_pflags & ZFS_OPAQUE) != 0); 2861185029Spjd XVA_SET_RTN(xvap, XAT_OPAQUE); 2862185029Spjd } 2863185029Spjd 2864185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2865185029Spjd xoap->xoa_av_quarantined = 2866219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2867185029Spjd XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2868185029Spjd } 2869185029Spjd 2870185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2871185029Spjd xoap->xoa_av_modified = 2872219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2873185029Spjd XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2874185029Spjd } 2875185029Spjd 2876185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2877219089Spjd vp->v_type == VREG) { 2878219089Spjd zfs_sa_get_scanstamp(zp, xvap); 2879185029Spjd } 2880185029Spjd 2881185029Spjd if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 2882219089Spjd uint64_t times[2]; 2883219089Spjd 2884219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 2885219089Spjd times, sizeof (times)); 2886219089Spjd ZFS_TIME_DECODE(&xoap->xoa_createtime, times); 2887185029Spjd XVA_SET_RTN(xvap, XAT_CREATETIME); 2888185029Spjd } 2889219089Spjd 2890219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2891219089Spjd xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 2892219089Spjd XVA_SET_RTN(xvap, XAT_REPARSE); 2893219089Spjd } 2894219089Spjd if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 2895219089Spjd xoap->xoa_generation = zp->z_gen; 2896219089Spjd XVA_SET_RTN(xvap, XAT_GEN); 2897219089Spjd } 2898219089Spjd 2899219089Spjd if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2900219089Spjd xoap->xoa_offline = 2901219089Spjd ((zp->z_pflags & ZFS_OFFLINE) != 0); 2902219089Spjd XVA_SET_RTN(xvap, XAT_OFFLINE); 2903219089Spjd } 2904219089Spjd 2905219089Spjd if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2906219089Spjd xoap->xoa_sparse = 2907219089Spjd ((zp->z_pflags & ZFS_SPARSE) != 0); 2908219089Spjd XVA_SET_RTN(xvap, XAT_SPARSE); 2909219089Spjd } 2910185029Spjd } 2911185029Spjd 2912219089Spjd ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 2913219089Spjd ZFS_TIME_DECODE(&vap->va_mtime, mtime); 2914219089Spjd ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2915219089Spjd ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 2916168404Spjd 2917168404Spjd mutex_exit(&zp->z_lock); 2918168404Spjd 2919219089Spjd sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 2920168404Spjd vap->va_blksize = blksize; 2921168404Spjd vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2922168404Spjd 2923168404Spjd if (zp->z_blksz == 0) { 2924168404Spjd /* 2925168404Spjd * Block size hasn't been set; suggest maximal I/O transfers. 2926168404Spjd */ 2927168404Spjd vap->va_blksize = zfsvfs->z_max_blksz; 2928168404Spjd } 2929168404Spjd 2930168404Spjd ZFS_EXIT(zfsvfs); 2931168404Spjd return (0); 2932168404Spjd} 2933168404Spjd 2934168404Spjd/* 2935168404Spjd * Set the file attributes to the values contained in the 2936168404Spjd * vattr structure. 2937168404Spjd * 2938168404Spjd * IN: vp - vnode of file to be modified. 2939168404Spjd * vap - new attribute values. 2940185029Spjd * If AT_XVATTR set, then optional attrs are being set 2941168404Spjd * flags - ATTR_UTIME set if non-default time values provided. 2942185029Spjd * - ATTR_NOACLCHECK (CIFS context only). 2943168404Spjd * cr - credentials of caller. 2944185029Spjd * ct - caller context 2945168404Spjd * 2946168404Spjd * RETURN: 0 if success 2947168404Spjd * error code if failure 2948168404Spjd * 2949168404Spjd * Timestamps: 2950168404Spjd * vp - ctime updated, mtime updated if size changed. 2951168404Spjd */ 2952168404Spjd/* ARGSUSED */ 2953168404Spjdstatic int 2954168962Spjdzfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2955168962Spjd caller_context_t *ct) 2956168404Spjd{ 2957185029Spjd znode_t *zp = VTOZ(vp); 2958168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2959185029Spjd zilog_t *zilog; 2960168404Spjd dmu_tx_t *tx; 2961168404Spjd vattr_t oldva; 2962209962Smm xvattr_t tmpxvattr; 2963168962Spjd uint_t mask = vap->va_mask; 2964247187Smm uint_t saved_mask = 0; 2965197831Spjd uint64_t saved_mode; 2966168404Spjd int trim_mask = 0; 2967168404Spjd uint64_t new_mode; 2968209962Smm uint64_t new_uid, new_gid; 2969219089Spjd uint64_t xattr_obj; 2970219089Spjd uint64_t mtime[2], ctime[2]; 2971168404Spjd znode_t *attrzp; 2972168404Spjd int need_policy = FALSE; 2973219089Spjd int err, err2; 2974185029Spjd zfs_fuid_info_t *fuidp = NULL; 2975185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2976185029Spjd xoptattr_t *xoap; 2977219089Spjd zfs_acl_t *aclp; 2978185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2979219089Spjd boolean_t fuid_dirtied = B_FALSE; 2980219089Spjd sa_bulk_attr_t bulk[7], xattr_bulk[7]; 2981219089Spjd int count = 0, xattr_count = 0; 2982168404Spjd 2983168404Spjd if (mask == 0) 2984168404Spjd return (0); 2985168404Spjd 2986168962Spjd if (mask & AT_NOSET) 2987249195Smm return (SET_ERROR(EINVAL)); 2988168962Spjd 2989185029Spjd ZFS_ENTER(zfsvfs); 2990185029Spjd ZFS_VERIFY_ZP(zp); 2991185029Spjd 2992185029Spjd zilog = zfsvfs->z_log; 2993185029Spjd 2994185029Spjd /* 2995185029Spjd * Make sure that if we have ephemeral uid/gid or xvattr specified 2996185029Spjd * that file system is at proper version level 2997185029Spjd */ 2998185029Spjd 2999185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 3000185029Spjd (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 3001185029Spjd ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 3002185029Spjd (mask & AT_XVATTR))) { 3003185029Spjd ZFS_EXIT(zfsvfs); 3004249195Smm return (SET_ERROR(EINVAL)); 3005185029Spjd } 3006185029Spjd 3007185029Spjd if (mask & AT_SIZE && vp->v_type == VDIR) { 3008185029Spjd ZFS_EXIT(zfsvfs); 3009249195Smm return (SET_ERROR(EISDIR)); 3010185029Spjd } 3011168404Spjd 3012185029Spjd if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 3013185029Spjd ZFS_EXIT(zfsvfs); 3014249195Smm return (SET_ERROR(EINVAL)); 3015185029Spjd } 3016168404Spjd 3017185029Spjd /* 3018185029Spjd * If this is an xvattr_t, then get a pointer to the structure of 3019185029Spjd * optional attributes. If this is NULL, then we have a vattr_t. 3020185029Spjd */ 3021185029Spjd xoap = xva_getxoptattr(xvap); 3022168404Spjd 3023209962Smm xva_init(&tmpxvattr); 3024209962Smm 3025185029Spjd /* 3026185029Spjd * Immutable files can only alter immutable bit and atime 3027185029Spjd */ 3028219089Spjd if ((zp->z_pflags & ZFS_IMMUTABLE) && 3029185029Spjd ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 3030185029Spjd ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 3031185029Spjd ZFS_EXIT(zfsvfs); 3032249195Smm return (SET_ERROR(EPERM)); 3033185029Spjd } 3034185029Spjd 3035219089Spjd if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { 3036185029Spjd ZFS_EXIT(zfsvfs); 3037249195Smm return (SET_ERROR(EPERM)); 3038185029Spjd } 3039185029Spjd 3040185029Spjd /* 3041185029Spjd * Verify timestamps doesn't overflow 32 bits. 3042185029Spjd * ZFS can handle large timestamps, but 32bit syscalls can't 3043185029Spjd * handle times greater than 2039. This check should be removed 3044185029Spjd * once large timestamps are fully supported. 3045185029Spjd */ 3046185029Spjd if (mask & (AT_ATIME | AT_MTIME)) { 3047185029Spjd if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 3048185029Spjd ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 3049185029Spjd ZFS_EXIT(zfsvfs); 3050249195Smm return (SET_ERROR(EOVERFLOW)); 3051185029Spjd } 3052185029Spjd } 3053185029Spjd 3054168404Spjdtop: 3055168404Spjd attrzp = NULL; 3056219089Spjd aclp = NULL; 3057168404Spjd 3058211932Smm /* Can this be moved to before the top label? */ 3059168404Spjd if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 3060168404Spjd ZFS_EXIT(zfsvfs); 3061249195Smm return (SET_ERROR(EROFS)); 3062168404Spjd } 3063168404Spjd 3064168404Spjd /* 3065168404Spjd * First validate permissions 3066168404Spjd */ 3067168404Spjd 3068168404Spjd if (mask & AT_SIZE) { 3069168404Spjd /* 3070168404Spjd * XXX - Note, we are not providing any open 3071168404Spjd * mode flags here (like FNDELAY), so we may 3072168404Spjd * block if there are locks present... this 3073168404Spjd * should be addressed in openat(). 3074168404Spjd */ 3075185029Spjd /* XXX - would it be OK to generate a log record here? */ 3076185029Spjd err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 3077168404Spjd if (err) { 3078168404Spjd ZFS_EXIT(zfsvfs); 3079168404Spjd return (err); 3080168404Spjd } 3081168404Spjd } 3082168404Spjd 3083185029Spjd if (mask & (AT_ATIME|AT_MTIME) || 3084185029Spjd ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 3085185029Spjd XVA_ISSET_REQ(xvap, XAT_READONLY) || 3086185029Spjd XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 3087219089Spjd XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 3088219089Spjd XVA_ISSET_REQ(xvap, XAT_SPARSE) || 3089185029Spjd XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 3090219089Spjd XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 3091185029Spjd need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 3092185029Spjd skipaclchk, cr); 3093219089Spjd } 3094168404Spjd 3095168404Spjd if (mask & (AT_UID|AT_GID)) { 3096168404Spjd int idmask = (mask & (AT_UID|AT_GID)); 3097168404Spjd int take_owner; 3098168404Spjd int take_group; 3099168404Spjd 3100168404Spjd /* 3101168404Spjd * NOTE: even if a new mode is being set, 3102168404Spjd * we may clear S_ISUID/S_ISGID bits. 3103168404Spjd */ 3104168404Spjd 3105168404Spjd if (!(mask & AT_MODE)) 3106219089Spjd vap->va_mode = zp->z_mode; 3107168404Spjd 3108168404Spjd /* 3109168404Spjd * Take ownership or chgrp to group we are a member of 3110168404Spjd */ 3111168404Spjd 3112168404Spjd take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 3113185029Spjd take_group = (mask & AT_GID) && 3114185029Spjd zfs_groupmember(zfsvfs, vap->va_gid, cr); 3115168404Spjd 3116168404Spjd /* 3117168404Spjd * If both AT_UID and AT_GID are set then take_owner and 3118168404Spjd * take_group must both be set in order to allow taking 3119168404Spjd * ownership. 3120168404Spjd * 3121168404Spjd * Otherwise, send the check through secpolicy_vnode_setattr() 3122168404Spjd * 3123168404Spjd */ 3124168404Spjd 3125168404Spjd if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 3126168404Spjd ((idmask == AT_UID) && take_owner) || 3127168404Spjd ((idmask == AT_GID) && take_group)) { 3128185029Spjd if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 3129185029Spjd skipaclchk, cr) == 0) { 3130168404Spjd /* 3131168404Spjd * Remove setuid/setgid for non-privileged users 3132168404Spjd */ 3133185029Spjd secpolicy_setid_clear(vap, vp, cr); 3134168404Spjd trim_mask = (mask & (AT_UID|AT_GID)); 3135168404Spjd } else { 3136168404Spjd need_policy = TRUE; 3137168404Spjd } 3138168404Spjd } else { 3139168404Spjd need_policy = TRUE; 3140168404Spjd } 3141168404Spjd } 3142168404Spjd 3143168404Spjd mutex_enter(&zp->z_lock); 3144219089Spjd oldva.va_mode = zp->z_mode; 3145185029Spjd zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 3146185029Spjd if (mask & AT_XVATTR) { 3147209962Smm /* 3148209962Smm * Update xvattr mask to include only those attributes 3149209962Smm * that are actually changing. 3150209962Smm * 3151209962Smm * the bits will be restored prior to actually setting 3152209962Smm * the attributes so the caller thinks they were set. 3153209962Smm */ 3154209962Smm if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3155209962Smm if (xoap->xoa_appendonly != 3156219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 3157209962Smm need_policy = TRUE; 3158209962Smm } else { 3159209962Smm XVA_CLR_REQ(xvap, XAT_APPENDONLY); 3160209962Smm XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 3161209962Smm } 3162209962Smm } 3163209962Smm 3164209962Smm if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3165209962Smm if (xoap->xoa_nounlink != 3166219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 3167209962Smm need_policy = TRUE; 3168209962Smm } else { 3169209962Smm XVA_CLR_REQ(xvap, XAT_NOUNLINK); 3170209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 3171209962Smm } 3172209962Smm } 3173209962Smm 3174209962Smm if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3175209962Smm if (xoap->xoa_immutable != 3176219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 3177209962Smm need_policy = TRUE; 3178209962Smm } else { 3179209962Smm XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 3180209962Smm XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 3181209962Smm } 3182209962Smm } 3183209962Smm 3184209962Smm if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3185209962Smm if (xoap->xoa_nodump != 3186219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0)) { 3187209962Smm need_policy = TRUE; 3188209962Smm } else { 3189209962Smm XVA_CLR_REQ(xvap, XAT_NODUMP); 3190209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 3191209962Smm } 3192209962Smm } 3193209962Smm 3194209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3195209962Smm if (xoap->xoa_av_modified != 3196219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 3197209962Smm need_policy = TRUE; 3198209962Smm } else { 3199209962Smm XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 3200209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 3201209962Smm } 3202209962Smm } 3203209962Smm 3204209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3205209962Smm if ((vp->v_type != VREG && 3206209962Smm xoap->xoa_av_quarantined) || 3207209962Smm xoap->xoa_av_quarantined != 3208219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 3209209962Smm need_policy = TRUE; 3210209962Smm } else { 3211209962Smm XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 3212209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 3213209962Smm } 3214209962Smm } 3215209962Smm 3216219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3217219089Spjd mutex_exit(&zp->z_lock); 3218219089Spjd ZFS_EXIT(zfsvfs); 3219249195Smm return (SET_ERROR(EPERM)); 3220219089Spjd } 3221219089Spjd 3222209962Smm if (need_policy == FALSE && 3223209962Smm (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3224209962Smm XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3225185029Spjd need_policy = TRUE; 3226185029Spjd } 3227185029Spjd } 3228185029Spjd 3229168404Spjd mutex_exit(&zp->z_lock); 3230168404Spjd 3231168404Spjd if (mask & AT_MODE) { 3232185029Spjd if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3233168962Spjd err = secpolicy_setid_setsticky_clear(vp, vap, 3234168962Spjd &oldva, cr); 3235168962Spjd if (err) { 3236168962Spjd ZFS_EXIT(zfsvfs); 3237168962Spjd return (err); 3238168962Spjd } 3239168404Spjd trim_mask |= AT_MODE; 3240168404Spjd } else { 3241168404Spjd need_policy = TRUE; 3242168404Spjd } 3243168404Spjd } 3244168404Spjd 3245168404Spjd if (need_policy) { 3246168404Spjd /* 3247168404Spjd * If trim_mask is set then take ownership 3248168404Spjd * has been granted or write_acl is present and user 3249168404Spjd * has the ability to modify mode. In that case remove 3250168404Spjd * UID|GID and or MODE from mask so that 3251168404Spjd * secpolicy_vnode_setattr() doesn't revoke it. 3252168404Spjd */ 3253168404Spjd 3254168404Spjd if (trim_mask) { 3255168404Spjd saved_mask = vap->va_mask; 3256168404Spjd vap->va_mask &= ~trim_mask; 3257197831Spjd if (trim_mask & AT_MODE) { 3258197831Spjd /* 3259197831Spjd * Save the mode, as secpolicy_vnode_setattr() 3260197831Spjd * will overwrite it with ova.va_mode. 3261197831Spjd */ 3262197831Spjd saved_mode = vap->va_mode; 3263197831Spjd } 3264168404Spjd } 3265168404Spjd err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3266185029Spjd (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3267168404Spjd if (err) { 3268168404Spjd ZFS_EXIT(zfsvfs); 3269168404Spjd return (err); 3270168404Spjd } 3271168404Spjd 3272197831Spjd if (trim_mask) { 3273168404Spjd vap->va_mask |= saved_mask; 3274197831Spjd if (trim_mask & AT_MODE) { 3275197831Spjd /* 3276197831Spjd * Recover the mode after 3277197831Spjd * secpolicy_vnode_setattr(). 3278197831Spjd */ 3279197831Spjd vap->va_mode = saved_mode; 3280197831Spjd } 3281197831Spjd } 3282168404Spjd } 3283168404Spjd 3284168404Spjd /* 3285168404Spjd * secpolicy_vnode_setattr, or take ownership may have 3286168404Spjd * changed va_mask 3287168404Spjd */ 3288168404Spjd mask = vap->va_mask; 3289168404Spjd 3290219089Spjd if ((mask & (AT_UID | AT_GID))) { 3291219089Spjd err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 3292219089Spjd &xattr_obj, sizeof (xattr_obj)); 3293168404Spjd 3294219089Spjd if (err == 0 && xattr_obj) { 3295219089Spjd err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 3296209962Smm if (err) 3297219089Spjd goto out2; 3298168404Spjd } 3299209962Smm if (mask & AT_UID) { 3300209962Smm new_uid = zfs_fuid_create(zfsvfs, 3301209962Smm (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3302219089Spjd if (new_uid != zp->z_uid && 3303219089Spjd zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 3304219089Spjd if (attrzp) 3305219089Spjd VN_RELE(ZTOV(attrzp)); 3306249195Smm err = SET_ERROR(EDQUOT); 3307219089Spjd goto out2; 3308209962Smm } 3309209962Smm } 3310209962Smm 3311209962Smm if (mask & AT_GID) { 3312209962Smm new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3313209962Smm cr, ZFS_GROUP, &fuidp); 3314219089Spjd if (new_gid != zp->z_gid && 3315219089Spjd zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 3316219089Spjd if (attrzp) 3317219089Spjd VN_RELE(ZTOV(attrzp)); 3318249195Smm err = SET_ERROR(EDQUOT); 3319219089Spjd goto out2; 3320209962Smm } 3321209962Smm } 3322219089Spjd } 3323219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 3324219089Spjd 3325219089Spjd if (mask & AT_MODE) { 3326219089Spjd uint64_t pmode = zp->z_mode; 3327219089Spjd uint64_t acl_obj; 3328219089Spjd new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3329219089Spjd 3330243560Smm if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 3331243560Smm !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3332249195Smm err = SET_ERROR(EPERM); 3333243560Smm goto out; 3334243560Smm } 3335243560Smm 3336224174Smm if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3337224174Smm goto out; 3338219089Spjd 3339219089Spjd mutex_enter(&zp->z_lock); 3340219089Spjd if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 3341219089Spjd /* 3342219089Spjd * Are we upgrading ACL from old V0 format 3343219089Spjd * to V1 format? 3344219089Spjd */ 3345219089Spjd if (zfsvfs->z_version >= ZPL_VERSION_FUID && 3346219089Spjd zfs_znode_acl_version(zp) == 3347219089Spjd ZFS_ACL_VERSION_INITIAL) { 3348219089Spjd dmu_tx_hold_free(tx, acl_obj, 0, 3349219089Spjd DMU_OBJECT_END); 3350219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3351219089Spjd 0, aclp->z_acl_bytes); 3352209962Smm } else { 3353219089Spjd dmu_tx_hold_write(tx, acl_obj, 0, 3354219089Spjd aclp->z_acl_bytes); 3355209962Smm } 3356219089Spjd } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3357219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3358219089Spjd 0, aclp->z_acl_bytes); 3359209962Smm } 3360219089Spjd mutex_exit(&zp->z_lock); 3361219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3362219089Spjd } else { 3363219089Spjd if ((mask & AT_XVATTR) && 3364219089Spjd XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3365219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3366219089Spjd else 3367219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3368168404Spjd } 3369168404Spjd 3370219089Spjd if (attrzp) { 3371219089Spjd dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3372219089Spjd } 3373219089Spjd 3374219089Spjd fuid_dirtied = zfsvfs->z_fuid_dirty; 3375219089Spjd if (fuid_dirtied) 3376219089Spjd zfs_fuid_txhold(zfsvfs, tx); 3377219089Spjd 3378219089Spjd zfs_sa_upgrade_txholds(tx, zp); 3379219089Spjd 3380209962Smm err = dmu_tx_assign(tx, TXG_NOWAIT); 3381168404Spjd if (err) { 3382209962Smm if (err == ERESTART) 3383168404Spjd dmu_tx_wait(tx); 3384209962Smm goto out; 3385168404Spjd } 3386168404Spjd 3387219089Spjd count = 0; 3388168404Spjd /* 3389168404Spjd * Set each attribute requested. 3390168404Spjd * We group settings according to the locks they need to acquire. 3391168404Spjd * 3392168404Spjd * Note: you cannot set ctime directly, although it will be 3393168404Spjd * updated as a side-effect of calling this function. 3394168404Spjd */ 3395168404Spjd 3396219089Spjd 3397219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3398219089Spjd mutex_enter(&zp->z_acl_lock); 3399168404Spjd mutex_enter(&zp->z_lock); 3400168404Spjd 3401219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3402219089Spjd &zp->z_pflags, sizeof (zp->z_pflags)); 3403219089Spjd 3404219089Spjd if (attrzp) { 3405219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3406219089Spjd mutex_enter(&attrzp->z_acl_lock); 3407219089Spjd mutex_enter(&attrzp->z_lock); 3408219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3409219089Spjd SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3410219089Spjd sizeof (attrzp->z_pflags)); 3411219089Spjd } 3412219089Spjd 3413219089Spjd if (mask & (AT_UID|AT_GID)) { 3414219089Spjd 3415219089Spjd if (mask & AT_UID) { 3416219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 3417219089Spjd &new_uid, sizeof (new_uid)); 3418219089Spjd zp->z_uid = new_uid; 3419219089Spjd if (attrzp) { 3420219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3421219089Spjd SA_ZPL_UID(zfsvfs), NULL, &new_uid, 3422219089Spjd sizeof (new_uid)); 3423219089Spjd attrzp->z_uid = new_uid; 3424219089Spjd } 3425219089Spjd } 3426219089Spjd 3427219089Spjd if (mask & AT_GID) { 3428219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 3429219089Spjd NULL, &new_gid, sizeof (new_gid)); 3430219089Spjd zp->z_gid = new_gid; 3431219089Spjd if (attrzp) { 3432219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3433219089Spjd SA_ZPL_GID(zfsvfs), NULL, &new_gid, 3434219089Spjd sizeof (new_gid)); 3435219089Spjd attrzp->z_gid = new_gid; 3436219089Spjd } 3437219089Spjd } 3438219089Spjd if (!(mask & AT_MODE)) { 3439219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 3440219089Spjd NULL, &new_mode, sizeof (new_mode)); 3441219089Spjd new_mode = zp->z_mode; 3442219089Spjd } 3443219089Spjd err = zfs_acl_chown_setattr(zp); 3444219089Spjd ASSERT(err == 0); 3445219089Spjd if (attrzp) { 3446219089Spjd err = zfs_acl_chown_setattr(attrzp); 3447219089Spjd ASSERT(err == 0); 3448219089Spjd } 3449219089Spjd } 3450219089Spjd 3451168404Spjd if (mask & AT_MODE) { 3452219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 3453219089Spjd &new_mode, sizeof (new_mode)); 3454219089Spjd zp->z_mode = new_mode; 3455219089Spjd ASSERT3U((uintptr_t)aclp, !=, 0); 3456209962Smm err = zfs_aclset_common(zp, aclp, cr, tx); 3457240415Smm ASSERT0(err); 3458219089Spjd if (zp->z_acl_cached) 3459219089Spjd zfs_acl_free(zp->z_acl_cached); 3460211932Smm zp->z_acl_cached = aclp; 3461211932Smm aclp = NULL; 3462168404Spjd } 3463168404Spjd 3464168404Spjd 3465219089Spjd if (mask & AT_ATIME) { 3466219089Spjd ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 3467219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 3468219089Spjd &zp->z_atime, sizeof (zp->z_atime)); 3469168404Spjd } 3470168404Spjd 3471219089Spjd if (mask & AT_MTIME) { 3472219089Spjd ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 3473219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 3474219089Spjd mtime, sizeof (mtime)); 3475168404Spjd } 3476168404Spjd 3477185029Spjd /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3478219089Spjd if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3479219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3480219089Spjd NULL, mtime, sizeof (mtime)); 3481219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3482219089Spjd &ctime, sizeof (ctime)); 3483219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 3484219089Spjd B_TRUE); 3485219089Spjd } else if (mask != 0) { 3486219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3487219089Spjd &ctime, sizeof (ctime)); 3488219089Spjd zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 3489219089Spjd B_TRUE); 3490219089Spjd if (attrzp) { 3491219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3492219089Spjd SA_ZPL_CTIME(zfsvfs), NULL, 3493219089Spjd &ctime, sizeof (ctime)); 3494219089Spjd zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 3495219089Spjd mtime, ctime, B_TRUE); 3496219089Spjd } 3497219089Spjd } 3498185029Spjd /* 3499185029Spjd * Do this after setting timestamps to prevent timestamp 3500185029Spjd * update from toggling bit 3501185029Spjd */ 3502168404Spjd 3503185029Spjd if (xoap && (mask & AT_XVATTR)) { 3504209962Smm 3505209962Smm /* 3506209962Smm * restore trimmed off masks 3507209962Smm * so that return masks can be set for caller. 3508209962Smm */ 3509209962Smm 3510209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3511209962Smm XVA_SET_REQ(xvap, XAT_APPENDONLY); 3512209962Smm } 3513209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3514209962Smm XVA_SET_REQ(xvap, XAT_NOUNLINK); 3515209962Smm } 3516209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3517209962Smm XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3518209962Smm } 3519209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3520209962Smm XVA_SET_REQ(xvap, XAT_NODUMP); 3521209962Smm } 3522209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3523209962Smm XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3524209962Smm } 3525209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3526209962Smm XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3527209962Smm } 3528209962Smm 3529219089Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3530185029Spjd ASSERT(vp->v_type == VREG); 3531185029Spjd 3532219089Spjd zfs_xvattr_set(zp, xvap, tx); 3533185029Spjd } 3534185029Spjd 3535209962Smm if (fuid_dirtied) 3536209962Smm zfs_fuid_sync(zfsvfs, tx); 3537209962Smm 3538168404Spjd if (mask != 0) 3539185029Spjd zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3540168404Spjd 3541168404Spjd mutex_exit(&zp->z_lock); 3542219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3543219089Spjd mutex_exit(&zp->z_acl_lock); 3544168404Spjd 3545219089Spjd if (attrzp) { 3546219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3547219089Spjd mutex_exit(&attrzp->z_acl_lock); 3548219089Spjd mutex_exit(&attrzp->z_lock); 3549219089Spjd } 3550209962Smmout: 3551219089Spjd if (err == 0 && attrzp) { 3552219089Spjd err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 3553219089Spjd xattr_count, tx); 3554219089Spjd ASSERT(err2 == 0); 3555219089Spjd } 3556219089Spjd 3557168404Spjd if (attrzp) 3558168404Spjd VN_RELE(ZTOV(attrzp)); 3559211932Smm if (aclp) 3560209962Smm zfs_acl_free(aclp); 3561168404Spjd 3562209962Smm if (fuidp) { 3563209962Smm zfs_fuid_info_free(fuidp); 3564209962Smm fuidp = NULL; 3565209962Smm } 3566209962Smm 3567219089Spjd if (err) { 3568209962Smm dmu_tx_abort(tx); 3569219089Spjd if (err == ERESTART) 3570219089Spjd goto top; 3571219089Spjd } else { 3572219089Spjd err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 3573209962Smm dmu_tx_commit(tx); 3574219089Spjd } 3575209962Smm 3576219089Spjdout2: 3577219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3578219089Spjd zil_commit(zilog, 0); 3579209962Smm 3580168404Spjd ZFS_EXIT(zfsvfs); 3581168404Spjd return (err); 3582168404Spjd} 3583168404Spjd 3584168404Spjdtypedef struct zfs_zlock { 3585168404Spjd krwlock_t *zl_rwlock; /* lock we acquired */ 3586168404Spjd znode_t *zl_znode; /* znode we held */ 3587168404Spjd struct zfs_zlock *zl_next; /* next in list */ 3588168404Spjd} zfs_zlock_t; 3589168404Spjd 3590168404Spjd/* 3591168404Spjd * Drop locks and release vnodes that were held by zfs_rename_lock(). 3592168404Spjd */ 3593168404Spjdstatic void 3594168404Spjdzfs_rename_unlock(zfs_zlock_t **zlpp) 3595168404Spjd{ 3596168404Spjd zfs_zlock_t *zl; 3597168404Spjd 3598168404Spjd while ((zl = *zlpp) != NULL) { 3599168404Spjd if (zl->zl_znode != NULL) 3600168404Spjd VN_RELE(ZTOV(zl->zl_znode)); 3601168404Spjd rw_exit(zl->zl_rwlock); 3602168404Spjd *zlpp = zl->zl_next; 3603168404Spjd kmem_free(zl, sizeof (*zl)); 3604168404Spjd } 3605168404Spjd} 3606168404Spjd 3607168404Spjd/* 3608168404Spjd * Search back through the directory tree, using the ".." entries. 3609168404Spjd * Lock each directory in the chain to prevent concurrent renames. 3610168404Spjd * Fail any attempt to move a directory into one of its own descendants. 3611168404Spjd * XXX - z_parent_lock can overlap with map or grow locks 3612168404Spjd */ 3613168404Spjdstatic int 3614168404Spjdzfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 3615168404Spjd{ 3616168404Spjd zfs_zlock_t *zl; 3617168404Spjd znode_t *zp = tdzp; 3618168404Spjd uint64_t rootid = zp->z_zfsvfs->z_root; 3619219089Spjd uint64_t oidp = zp->z_id; 3620168404Spjd krwlock_t *rwlp = &szp->z_parent_lock; 3621168404Spjd krw_t rw = RW_WRITER; 3622168404Spjd 3623168404Spjd /* 3624168404Spjd * First pass write-locks szp and compares to zp->z_id. 3625168404Spjd * Later passes read-lock zp and compare to zp->z_parent. 3626168404Spjd */ 3627168404Spjd do { 3628168404Spjd if (!rw_tryenter(rwlp, rw)) { 3629168404Spjd /* 3630168404Spjd * Another thread is renaming in this path. 3631168404Spjd * Note that if we are a WRITER, we don't have any 3632168404Spjd * parent_locks held yet. 3633168404Spjd */ 3634168404Spjd if (rw == RW_READER && zp->z_id > szp->z_id) { 3635168404Spjd /* 3636168404Spjd * Drop our locks and restart 3637168404Spjd */ 3638168404Spjd zfs_rename_unlock(&zl); 3639168404Spjd *zlpp = NULL; 3640168404Spjd zp = tdzp; 3641219089Spjd oidp = zp->z_id; 3642168404Spjd rwlp = &szp->z_parent_lock; 3643168404Spjd rw = RW_WRITER; 3644168404Spjd continue; 3645168404Spjd } else { 3646168404Spjd /* 3647168404Spjd * Wait for other thread to drop its locks 3648168404Spjd */ 3649168404Spjd rw_enter(rwlp, rw); 3650168404Spjd } 3651168404Spjd } 3652168404Spjd 3653168404Spjd zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3654168404Spjd zl->zl_rwlock = rwlp; 3655168404Spjd zl->zl_znode = NULL; 3656168404Spjd zl->zl_next = *zlpp; 3657168404Spjd *zlpp = zl; 3658168404Spjd 3659219089Spjd if (oidp == szp->z_id) /* We're a descendant of szp */ 3660249195Smm return (SET_ERROR(EINVAL)); 3661168404Spjd 3662219089Spjd if (oidp == rootid) /* We've hit the top */ 3663168404Spjd return (0); 3664168404Spjd 3665168404Spjd if (rw == RW_READER) { /* i.e. not the first pass */ 3666219089Spjd int error = zfs_zget(zp->z_zfsvfs, oidp, &zp); 3667168404Spjd if (error) 3668168404Spjd return (error); 3669168404Spjd zl->zl_znode = zp; 3670168404Spjd } 3671219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), 3672219089Spjd &oidp, sizeof (oidp)); 3673168404Spjd rwlp = &zp->z_parent_lock; 3674168404Spjd rw = RW_READER; 3675168404Spjd 3676168404Spjd } while (zp->z_id != sdzp->z_id); 3677168404Spjd 3678168404Spjd return (0); 3679168404Spjd} 3680168404Spjd 3681168404Spjd/* 3682168404Spjd * Move an entry from the provided source directory to the target 3683168404Spjd * directory. Change the entry name as indicated. 3684168404Spjd * 3685168404Spjd * IN: sdvp - Source directory containing the "old entry". 3686168404Spjd * snm - Old entry name. 3687168404Spjd * tdvp - Target directory to contain the "new entry". 3688168404Spjd * tnm - New entry name. 3689168404Spjd * cr - credentials of caller. 3690185029Spjd * ct - caller context 3691185029Spjd * flags - case flags 3692168404Spjd * 3693168404Spjd * RETURN: 0 if success 3694168404Spjd * error code if failure 3695168404Spjd * 3696168404Spjd * Timestamps: 3697168404Spjd * sdvp,tdvp - ctime|mtime updated 3698168404Spjd */ 3699185029Spjd/*ARGSUSED*/ 3700168404Spjdstatic int 3701185029Spjdzfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3702185029Spjd caller_context_t *ct, int flags) 3703168404Spjd{ 3704168404Spjd znode_t *tdzp, *szp, *tzp; 3705168404Spjd znode_t *sdzp = VTOZ(sdvp); 3706168404Spjd zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 3707185029Spjd zilog_t *zilog; 3708168962Spjd vnode_t *realvp; 3709168404Spjd zfs_dirlock_t *sdl, *tdl; 3710168404Spjd dmu_tx_t *tx; 3711168404Spjd zfs_zlock_t *zl; 3712185029Spjd int cmp, serr, terr; 3713185029Spjd int error = 0; 3714185029Spjd int zflg = 0; 3715168404Spjd 3716168404Spjd ZFS_ENTER(zfsvfs); 3717185029Spjd ZFS_VERIFY_ZP(sdzp); 3718185029Spjd zilog = zfsvfs->z_log; 3719168404Spjd 3720168962Spjd /* 3721168962Spjd * Make sure we have the real vp for the target directory. 3722168962Spjd */ 3723185029Spjd if (VOP_REALVP(tdvp, &realvp, ct) == 0) 3724168962Spjd tdvp = realvp; 3725168962Spjd 3726212694Smm if (tdvp->v_vfsp != sdvp->v_vfsp || zfsctl_is_node(tdvp)) { 3727168404Spjd ZFS_EXIT(zfsvfs); 3728249195Smm return (SET_ERROR(EXDEV)); 3729168404Spjd } 3730168404Spjd 3731168404Spjd tdzp = VTOZ(tdvp); 3732185029Spjd ZFS_VERIFY_ZP(tdzp); 3733185029Spjd if (zfsvfs->z_utf8 && u8_validate(tnm, 3734185029Spjd strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3735185029Spjd ZFS_EXIT(zfsvfs); 3736249195Smm return (SET_ERROR(EILSEQ)); 3737185029Spjd } 3738185029Spjd 3739185029Spjd if (flags & FIGNORECASE) 3740185029Spjd zflg |= ZCILOOK; 3741185029Spjd 3742168404Spjdtop: 3743168404Spjd szp = NULL; 3744168404Spjd tzp = NULL; 3745168404Spjd zl = NULL; 3746168404Spjd 3747168404Spjd /* 3748168404Spjd * This is to prevent the creation of links into attribute space 3749168404Spjd * by renaming a linked file into/outof an attribute directory. 3750168404Spjd * See the comment in zfs_link() for why this is considered bad. 3751168404Spjd */ 3752219089Spjd if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3753168962Spjd ZFS_EXIT(zfsvfs); 3754249195Smm return (SET_ERROR(EINVAL)); 3755168404Spjd } 3756168404Spjd 3757168404Spjd /* 3758168404Spjd * Lock source and target directory entries. To prevent deadlock, 3759168404Spjd * a lock ordering must be defined. We lock the directory with 3760168404Spjd * the smallest object id first, or if it's a tie, the one with 3761168404Spjd * the lexically first name. 3762168404Spjd */ 3763168404Spjd if (sdzp->z_id < tdzp->z_id) { 3764168962Spjd cmp = -1; 3765168962Spjd } else if (sdzp->z_id > tdzp->z_id) { 3766168962Spjd cmp = 1; 3767168962Spjd } else { 3768185029Spjd /* 3769185029Spjd * First compare the two name arguments without 3770185029Spjd * considering any case folding. 3771185029Spjd */ 3772185029Spjd int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3773185029Spjd 3774185029Spjd cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3775185029Spjd ASSERT(error == 0 || !zfsvfs->z_utf8); 3776168962Spjd if (cmp == 0) { 3777168962Spjd /* 3778168962Spjd * POSIX: "If the old argument and the new argument 3779168962Spjd * both refer to links to the same existing file, 3780168962Spjd * the rename() function shall return successfully 3781168962Spjd * and perform no other action." 3782168962Spjd */ 3783168962Spjd ZFS_EXIT(zfsvfs); 3784168962Spjd return (0); 3785168962Spjd } 3786185029Spjd /* 3787185029Spjd * If the file system is case-folding, then we may 3788185029Spjd * have some more checking to do. A case-folding file 3789185029Spjd * system is either supporting mixed case sensitivity 3790185029Spjd * access or is completely case-insensitive. Note 3791185029Spjd * that the file system is always case preserving. 3792185029Spjd * 3793185029Spjd * In mixed sensitivity mode case sensitive behavior 3794185029Spjd * is the default. FIGNORECASE must be used to 3795185029Spjd * explicitly request case insensitive behavior. 3796185029Spjd * 3797185029Spjd * If the source and target names provided differ only 3798185029Spjd * by case (e.g., a request to rename 'tim' to 'Tim'), 3799185029Spjd * we will treat this as a special case in the 3800185029Spjd * case-insensitive mode: as long as the source name 3801185029Spjd * is an exact match, we will allow this to proceed as 3802185029Spjd * a name-change request. 3803185029Spjd */ 3804185029Spjd if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3805185029Spjd (zfsvfs->z_case == ZFS_CASE_MIXED && 3806185029Spjd flags & FIGNORECASE)) && 3807185029Spjd u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3808185029Spjd &error) == 0) { 3809185029Spjd /* 3810185029Spjd * case preserving rename request, require exact 3811185029Spjd * name matches 3812185029Spjd */ 3813185029Spjd zflg |= ZCIEXACT; 3814185029Spjd zflg &= ~ZCILOOK; 3815185029Spjd } 3816168962Spjd } 3817185029Spjd 3818208131Smm /* 3819208131Smm * If the source and destination directories are the same, we should 3820208131Smm * grab the z_name_lock of that directory only once. 3821208131Smm */ 3822208131Smm if (sdzp == tdzp) { 3823208131Smm zflg |= ZHAVELOCK; 3824208131Smm rw_enter(&sdzp->z_name_lock, RW_READER); 3825208131Smm } 3826208131Smm 3827168962Spjd if (cmp < 0) { 3828185029Spjd serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3829185029Spjd ZEXISTS | zflg, NULL, NULL); 3830185029Spjd terr = zfs_dirent_lock(&tdl, 3831185029Spjd tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3832168962Spjd } else { 3833185029Spjd terr = zfs_dirent_lock(&tdl, 3834185029Spjd tdzp, tnm, &tzp, zflg, NULL, NULL); 3835185029Spjd serr = zfs_dirent_lock(&sdl, 3836185029Spjd sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3837185029Spjd NULL, NULL); 3838168404Spjd } 3839168404Spjd 3840168962Spjd if (serr) { 3841168404Spjd /* 3842168404Spjd * Source entry invalid or not there. 3843168404Spjd */ 3844168962Spjd if (!terr) { 3845168404Spjd zfs_dirent_unlock(tdl); 3846168962Spjd if (tzp) 3847168962Spjd VN_RELE(ZTOV(tzp)); 3848168962Spjd } 3849208131Smm 3850208131Smm if (sdzp == tdzp) 3851208131Smm rw_exit(&sdzp->z_name_lock); 3852208131Smm 3853219089Spjd /* 3854219089Spjd * FreeBSD: In OpenSolaris they only check if rename source is 3855219089Spjd * ".." here, because "." is handled in their lookup. This is 3856219089Spjd * not the case for FreeBSD, so we check for "." explicitly. 3857219089Spjd */ 3858168404Spjd if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0) 3859249195Smm serr = SET_ERROR(EINVAL); 3860168962Spjd ZFS_EXIT(zfsvfs); 3861168962Spjd return (serr); 3862168404Spjd } 3863168404Spjd if (terr) { 3864168404Spjd zfs_dirent_unlock(sdl); 3865168962Spjd VN_RELE(ZTOV(szp)); 3866208131Smm 3867208131Smm if (sdzp == tdzp) 3868208131Smm rw_exit(&sdzp->z_name_lock); 3869208131Smm 3870168404Spjd if (strcmp(tnm, "..") == 0) 3871249195Smm terr = SET_ERROR(EINVAL); 3872168962Spjd ZFS_EXIT(zfsvfs); 3873168962Spjd return (terr); 3874168404Spjd } 3875168404Spjd 3876168404Spjd /* 3877168404Spjd * Must have write access at the source to remove the old entry 3878168404Spjd * and write access at the target to create the new entry. 3879168404Spjd * Note that if target and source are the same, this can be 3880168404Spjd * done in a single check. 3881168404Spjd */ 3882168404Spjd 3883168404Spjd if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3884168404Spjd goto out; 3885168404Spjd 3886168962Spjd if (ZTOV(szp)->v_type == VDIR) { 3887168404Spjd /* 3888168404Spjd * Check to make sure rename is valid. 3889168404Spjd * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3890168404Spjd */ 3891168404Spjd if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3892168404Spjd goto out; 3893168404Spjd } 3894168404Spjd 3895168404Spjd /* 3896168404Spjd * Does target exist? 3897168404Spjd */ 3898168404Spjd if (tzp) { 3899168404Spjd /* 3900168404Spjd * Source and target must be the same type. 3901168404Spjd */ 3902168962Spjd if (ZTOV(szp)->v_type == VDIR) { 3903168962Spjd if (ZTOV(tzp)->v_type != VDIR) { 3904249195Smm error = SET_ERROR(ENOTDIR); 3905168404Spjd goto out; 3906168404Spjd } 3907168404Spjd } else { 3908168962Spjd if (ZTOV(tzp)->v_type == VDIR) { 3909249195Smm error = SET_ERROR(EISDIR); 3910168404Spjd goto out; 3911168404Spjd } 3912168404Spjd } 3913168404Spjd /* 3914168404Spjd * POSIX dictates that when the source and target 3915168404Spjd * entries refer to the same file object, rename 3916168404Spjd * must do nothing and exit without error. 3917168404Spjd */ 3918168404Spjd if (szp->z_id == tzp->z_id) { 3919168404Spjd error = 0; 3920168404Spjd goto out; 3921168404Spjd } 3922168404Spjd } 3923168404Spjd 3924185029Spjd vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 3925168962Spjd if (tzp) 3926185029Spjd vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3927168962Spjd 3928185029Spjd /* 3929185029Spjd * notify the target directory if it is not the same 3930185029Spjd * as source directory. 3931185029Spjd */ 3932185029Spjd if (tdvp != sdvp) { 3933185029Spjd vnevent_rename_dest_dir(tdvp, ct); 3934185029Spjd } 3935185029Spjd 3936168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 3937219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3938219089Spjd dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3939168404Spjd dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3940168404Spjd dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3941219089Spjd if (sdzp != tdzp) { 3942219089Spjd dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 3943219089Spjd zfs_sa_upgrade_txholds(tx, tdzp); 3944219089Spjd } 3945219089Spjd if (tzp) { 3946219089Spjd dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 3947219089Spjd zfs_sa_upgrade_txholds(tx, tzp); 3948219089Spjd } 3949219089Spjd 3950219089Spjd zfs_sa_upgrade_txholds(tx, szp); 3951168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3952209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 3953168404Spjd if (error) { 3954168404Spjd if (zl != NULL) 3955168404Spjd zfs_rename_unlock(&zl); 3956168404Spjd zfs_dirent_unlock(sdl); 3957168404Spjd zfs_dirent_unlock(tdl); 3958208131Smm 3959208131Smm if (sdzp == tdzp) 3960208131Smm rw_exit(&sdzp->z_name_lock); 3961208131Smm 3962168962Spjd VN_RELE(ZTOV(szp)); 3963168962Spjd if (tzp) 3964168962Spjd VN_RELE(ZTOV(tzp)); 3965209962Smm if (error == ERESTART) { 3966168404Spjd dmu_tx_wait(tx); 3967168404Spjd dmu_tx_abort(tx); 3968168404Spjd goto top; 3969168404Spjd } 3970168404Spjd dmu_tx_abort(tx); 3971168962Spjd ZFS_EXIT(zfsvfs); 3972168962Spjd return (error); 3973168404Spjd } 3974168404Spjd 3975168404Spjd if (tzp) /* Attempt to remove the existing target */ 3976185029Spjd error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 3977168404Spjd 3978168404Spjd if (error == 0) { 3979168404Spjd error = zfs_link_create(tdl, szp, tx, ZRENAMING); 3980168404Spjd if (error == 0) { 3981219089Spjd szp->z_pflags |= ZFS_AV_MODIFIED; 3982185029Spjd 3983219089Spjd error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 3984219089Spjd (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3985240415Smm ASSERT0(error); 3986219089Spjd 3987168404Spjd error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 3988219089Spjd if (error == 0) { 3989219089Spjd zfs_log_rename(zilog, tx, TX_RENAME | 3990219089Spjd (flags & FIGNORECASE ? TX_CI : 0), sdzp, 3991219089Spjd sdl->dl_name, tdzp, tdl->dl_name, szp); 3992185029Spjd 3993219089Spjd /* 3994219089Spjd * Update path information for the target vnode 3995219089Spjd */ 3996219089Spjd vn_renamepath(tdvp, ZTOV(szp), tnm, 3997219089Spjd strlen(tnm)); 3998219089Spjd } else { 3999219089Spjd /* 4000219089Spjd * At this point, we have successfully created 4001219089Spjd * the target name, but have failed to remove 4002219089Spjd * the source name. Since the create was done 4003219089Spjd * with the ZRENAMING flag, there are 4004219089Spjd * complications; for one, the link count is 4005219089Spjd * wrong. The easiest way to deal with this 4006219089Spjd * is to remove the newly created target, and 4007219089Spjd * return the original error. This must 4008219089Spjd * succeed; fortunately, it is very unlikely to 4009219089Spjd * fail, since we just created it. 4010219089Spjd */ 4011219089Spjd VERIFY3U(zfs_link_destroy(tdl, szp, tx, 4012219089Spjd ZRENAMING, NULL), ==, 0); 4013219089Spjd } 4014168404Spjd } 4015168404Spjd#ifdef FREEBSD_NAMECACHE 4016168404Spjd if (error == 0) { 4017168404Spjd cache_purge(sdvp); 4018168404Spjd cache_purge(tdvp); 4019240829Spjd cache_purge(ZTOV(szp)); 4020240829Spjd if (tzp) 4021240829Spjd cache_purge(ZTOV(tzp)); 4022168404Spjd } 4023168404Spjd#endif 4024168404Spjd } 4025168404Spjd 4026168404Spjd dmu_tx_commit(tx); 4027168404Spjdout: 4028168404Spjd if (zl != NULL) 4029168404Spjd zfs_rename_unlock(&zl); 4030168404Spjd 4031168404Spjd zfs_dirent_unlock(sdl); 4032168404Spjd zfs_dirent_unlock(tdl); 4033168404Spjd 4034208131Smm if (sdzp == tdzp) 4035208131Smm rw_exit(&sdzp->z_name_lock); 4036208131Smm 4037219089Spjd 4038168962Spjd VN_RELE(ZTOV(szp)); 4039168404Spjd if (tzp) 4040168962Spjd VN_RELE(ZTOV(tzp)); 4041168404Spjd 4042219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4043219089Spjd zil_commit(zilog, 0); 4044219089Spjd 4045168404Spjd ZFS_EXIT(zfsvfs); 4046168404Spjd 4047168404Spjd return (error); 4048168404Spjd} 4049168404Spjd 4050168404Spjd/* 4051168404Spjd * Insert the indicated symbolic reference entry into the directory. 4052168404Spjd * 4053168404Spjd * IN: dvp - Directory to contain new symbolic link. 4054168404Spjd * link - Name for new symlink entry. 4055168404Spjd * vap - Attributes of new entry. 4056168404Spjd * target - Target path of new symlink. 4057168404Spjd * cr - credentials of caller. 4058185029Spjd * ct - caller context 4059185029Spjd * flags - case flags 4060168404Spjd * 4061168404Spjd * RETURN: 0 if success 4062168404Spjd * error code if failure 4063168404Spjd * 4064168404Spjd * Timestamps: 4065168404Spjd * dvp - ctime|mtime updated 4066168404Spjd */ 4067185029Spjd/*ARGSUSED*/ 4068168404Spjdstatic int 4069185029Spjdzfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 4070185029Spjd cred_t *cr, kthread_t *td) 4071168404Spjd{ 4072168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 4073168404Spjd zfs_dirlock_t *dl; 4074168404Spjd dmu_tx_t *tx; 4075168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4076185029Spjd zilog_t *zilog; 4077219089Spjd uint64_t len = strlen(link); 4078168404Spjd int error; 4079185029Spjd int zflg = ZNEW; 4080209962Smm zfs_acl_ids_t acl_ids; 4081209962Smm boolean_t fuid_dirtied; 4082219089Spjd uint64_t txtype = TX_SYMLINK; 4083185029Spjd int flags = 0; 4084168404Spjd 4085168962Spjd ASSERT(vap->va_type == VLNK); 4086168404Spjd 4087168404Spjd ZFS_ENTER(zfsvfs); 4088185029Spjd ZFS_VERIFY_ZP(dzp); 4089185029Spjd zilog = zfsvfs->z_log; 4090185029Spjd 4091185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 4092185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4093185029Spjd ZFS_EXIT(zfsvfs); 4094249195Smm return (SET_ERROR(EILSEQ)); 4095185029Spjd } 4096185029Spjd if (flags & FIGNORECASE) 4097185029Spjd zflg |= ZCILOOK; 4098168404Spjd 4099168404Spjd if (len > MAXPATHLEN) { 4100168404Spjd ZFS_EXIT(zfsvfs); 4101249195Smm return (SET_ERROR(ENAMETOOLONG)); 4102168404Spjd } 4103168404Spjd 4104219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, 4105219089Spjd vap, cr, NULL, &acl_ids)) != 0) { 4106219089Spjd ZFS_EXIT(zfsvfs); 4107219089Spjd return (error); 4108219089Spjd } 4109219089Spjdtop: 4110168404Spjd /* 4111168404Spjd * Attempt to lock directory; fail if entry already exists. 4112168404Spjd */ 4113185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 4114185029Spjd if (error) { 4115219089Spjd zfs_acl_ids_free(&acl_ids); 4116168404Spjd ZFS_EXIT(zfsvfs); 4117168404Spjd return (error); 4118168404Spjd } 4119168404Spjd 4120219089Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4121219089Spjd zfs_acl_ids_free(&acl_ids); 4122219089Spjd zfs_dirent_unlock(dl); 4123219089Spjd ZFS_EXIT(zfsvfs); 4124219089Spjd return (error); 4125219089Spjd } 4126219089Spjd 4127209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 4128209962Smm zfs_acl_ids_free(&acl_ids); 4129209962Smm zfs_dirent_unlock(dl); 4130209962Smm ZFS_EXIT(zfsvfs); 4131249195Smm return (SET_ERROR(EDQUOT)); 4132209962Smm } 4133168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4134209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 4135168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 4136168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4137219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 4138219089Spjd ZFS_SA_BASE_ATTR_SIZE + len); 4139219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 4140219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 4141219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 4142219089Spjd acl_ids.z_aclp->z_acl_bytes); 4143219089Spjd } 4144209962Smm if (fuid_dirtied) 4145209962Smm zfs_fuid_txhold(zfsvfs, tx); 4146209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 4147168404Spjd if (error) { 4148168404Spjd zfs_dirent_unlock(dl); 4149209962Smm if (error == ERESTART) { 4150168404Spjd dmu_tx_wait(tx); 4151168404Spjd dmu_tx_abort(tx); 4152168404Spjd goto top; 4153168404Spjd } 4154219089Spjd zfs_acl_ids_free(&acl_ids); 4155168404Spjd dmu_tx_abort(tx); 4156168404Spjd ZFS_EXIT(zfsvfs); 4157168404Spjd return (error); 4158168404Spjd } 4159168404Spjd 4160168404Spjd /* 4161168404Spjd * Create a new object for the symlink. 4162219089Spjd * for version 4 ZPL datsets the symlink will be an SA attribute 4163168404Spjd */ 4164219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 4165168404Spjd 4166219089Spjd if (fuid_dirtied) 4167219089Spjd zfs_fuid_sync(zfsvfs, tx); 4168209962Smm 4169219089Spjd mutex_enter(&zp->z_lock); 4170219089Spjd if (zp->z_is_sa) 4171219089Spjd error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 4172219089Spjd link, len, tx); 4173219089Spjd else 4174219089Spjd zfs_sa_symlink(zp, link, len, tx); 4175219089Spjd mutex_exit(&zp->z_lock); 4176168404Spjd 4177219089Spjd zp->z_size = len; 4178219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 4179219089Spjd &zp->z_size, sizeof (zp->z_size), tx); 4180168404Spjd /* 4181168404Spjd * Insert the new object into the directory. 4182168404Spjd */ 4183168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 4184168404Spjd 4185219089Spjd if (flags & FIGNORECASE) 4186219089Spjd txtype |= TX_CI; 4187219089Spjd zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 4188219089Spjd *vpp = ZTOV(zp); 4189219089Spjd 4190209962Smm zfs_acl_ids_free(&acl_ids); 4191209962Smm 4192168404Spjd dmu_tx_commit(tx); 4193168404Spjd 4194168404Spjd zfs_dirent_unlock(dl); 4195168404Spjd 4196219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4197219089Spjd zil_commit(zilog, 0); 4198219089Spjd 4199168404Spjd ZFS_EXIT(zfsvfs); 4200168404Spjd return (error); 4201168404Spjd} 4202168404Spjd 4203168404Spjd/* 4204168404Spjd * Return, in the buffer contained in the provided uio structure, 4205168404Spjd * the symbolic path referred to by vp. 4206168404Spjd * 4207168404Spjd * IN: vp - vnode of symbolic link. 4208168404Spjd * uoip - structure to contain the link path. 4209168404Spjd * cr - credentials of caller. 4210185029Spjd * ct - caller context 4211168404Spjd * 4212168404Spjd * OUT: uio - structure to contain the link path. 4213168404Spjd * 4214168404Spjd * RETURN: 0 if success 4215168404Spjd * error code if failure 4216168404Spjd * 4217168404Spjd * Timestamps: 4218168404Spjd * vp - atime updated 4219168404Spjd */ 4220168404Spjd/* ARGSUSED */ 4221168404Spjdstatic int 4222185029Spjdzfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 4223168404Spjd{ 4224168404Spjd znode_t *zp = VTOZ(vp); 4225168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4226168404Spjd int error; 4227168404Spjd 4228168404Spjd ZFS_ENTER(zfsvfs); 4229185029Spjd ZFS_VERIFY_ZP(zp); 4230168404Spjd 4231219089Spjd mutex_enter(&zp->z_lock); 4232219089Spjd if (zp->z_is_sa) 4233219089Spjd error = sa_lookup_uio(zp->z_sa_hdl, 4234219089Spjd SA_ZPL_SYMLINK(zfsvfs), uio); 4235219089Spjd else 4236219089Spjd error = zfs_sa_readlink(zp, uio); 4237219089Spjd mutex_exit(&zp->z_lock); 4238168404Spjd 4239168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4240219089Spjd 4241168404Spjd ZFS_EXIT(zfsvfs); 4242168404Spjd return (error); 4243168404Spjd} 4244168404Spjd 4245168404Spjd/* 4246168404Spjd * Insert a new entry into directory tdvp referencing svp. 4247168404Spjd * 4248168404Spjd * IN: tdvp - Directory to contain new entry. 4249168404Spjd * svp - vnode of new entry. 4250168404Spjd * name - name of new entry. 4251168404Spjd * cr - credentials of caller. 4252185029Spjd * ct - caller context 4253168404Spjd * 4254168404Spjd * RETURN: 0 if success 4255168404Spjd * error code if failure 4256168404Spjd * 4257168404Spjd * Timestamps: 4258168404Spjd * tdvp - ctime|mtime updated 4259168404Spjd * svp - ctime updated 4260168404Spjd */ 4261168404Spjd/* ARGSUSED */ 4262168404Spjdstatic int 4263185029Spjdzfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4264185029Spjd caller_context_t *ct, int flags) 4265168404Spjd{ 4266168404Spjd znode_t *dzp = VTOZ(tdvp); 4267168404Spjd znode_t *tzp, *szp; 4268168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4269185029Spjd zilog_t *zilog; 4270168404Spjd zfs_dirlock_t *dl; 4271168404Spjd dmu_tx_t *tx; 4272168962Spjd vnode_t *realvp; 4273168404Spjd int error; 4274185029Spjd int zf = ZNEW; 4275212694Smm uint64_t parent; 4276185029Spjd uid_t owner; 4277168404Spjd 4278168404Spjd ASSERT(tdvp->v_type == VDIR); 4279168404Spjd 4280168404Spjd ZFS_ENTER(zfsvfs); 4281185029Spjd ZFS_VERIFY_ZP(dzp); 4282185029Spjd zilog = zfsvfs->z_log; 4283168404Spjd 4284185029Spjd if (VOP_REALVP(svp, &realvp, ct) == 0) 4285168962Spjd svp = realvp; 4286168962Spjd 4287212694Smm /* 4288212694Smm * POSIX dictates that we return EPERM here. 4289212694Smm * Better choices include ENOTSUP or EISDIR. 4290212694Smm */ 4291212694Smm if (svp->v_type == VDIR) { 4292168404Spjd ZFS_EXIT(zfsvfs); 4293249195Smm return (SET_ERROR(EPERM)); 4294212694Smm } 4295212694Smm 4296212694Smm if (svp->v_vfsp != tdvp->v_vfsp || zfsctl_is_node(svp)) { 4297212694Smm ZFS_EXIT(zfsvfs); 4298249195Smm return (SET_ERROR(EXDEV)); 4299168404Spjd } 4300212694Smm 4301185029Spjd szp = VTOZ(svp); 4302185029Spjd ZFS_VERIFY_ZP(szp); 4303168404Spjd 4304212694Smm /* Prevent links to .zfs/shares files */ 4305212694Smm 4306219089Spjd if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4307219089Spjd &parent, sizeof (uint64_t))) != 0) { 4308212694Smm ZFS_EXIT(zfsvfs); 4309219089Spjd return (error); 4310219089Spjd } 4311219089Spjd if (parent == zfsvfs->z_shares_dir) { 4312219089Spjd ZFS_EXIT(zfsvfs); 4313249195Smm return (SET_ERROR(EPERM)); 4314212694Smm } 4315212694Smm 4316185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, 4317185029Spjd strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4318185029Spjd ZFS_EXIT(zfsvfs); 4319249195Smm return (SET_ERROR(EILSEQ)); 4320185029Spjd } 4321185029Spjd if (flags & FIGNORECASE) 4322185029Spjd zf |= ZCILOOK; 4323185029Spjd 4324168404Spjd /* 4325168404Spjd * We do not support links between attributes and non-attributes 4326168404Spjd * because of the potential security risk of creating links 4327168404Spjd * into "normal" file space in order to circumvent restrictions 4328168404Spjd * imposed in attribute space. 4329168404Spjd */ 4330219089Spjd if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4331168404Spjd ZFS_EXIT(zfsvfs); 4332249195Smm return (SET_ERROR(EINVAL)); 4333168404Spjd } 4334168404Spjd 4335168404Spjd 4336219089Spjd owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4337219089Spjd if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { 4338168404Spjd ZFS_EXIT(zfsvfs); 4339249195Smm return (SET_ERROR(EPERM)); 4340168404Spjd } 4341168404Spjd 4342185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4343168404Spjd ZFS_EXIT(zfsvfs); 4344168404Spjd return (error); 4345168404Spjd } 4346168404Spjd 4347212694Smmtop: 4348168404Spjd /* 4349168404Spjd * Attempt to lock directory; fail if entry already exists. 4350168404Spjd */ 4351185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 4352185029Spjd if (error) { 4353168404Spjd ZFS_EXIT(zfsvfs); 4354168404Spjd return (error); 4355168404Spjd } 4356168404Spjd 4357168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4358219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4359168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4360219089Spjd zfs_sa_upgrade_txholds(tx, szp); 4361219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 4362209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 4363168404Spjd if (error) { 4364168404Spjd zfs_dirent_unlock(dl); 4365209962Smm if (error == ERESTART) { 4366168404Spjd dmu_tx_wait(tx); 4367168404Spjd dmu_tx_abort(tx); 4368168404Spjd goto top; 4369168404Spjd } 4370168404Spjd dmu_tx_abort(tx); 4371168404Spjd ZFS_EXIT(zfsvfs); 4372168404Spjd return (error); 4373168404Spjd } 4374168404Spjd 4375168404Spjd error = zfs_link_create(dl, szp, tx, 0); 4376168404Spjd 4377185029Spjd if (error == 0) { 4378185029Spjd uint64_t txtype = TX_LINK; 4379185029Spjd if (flags & FIGNORECASE) 4380185029Spjd txtype |= TX_CI; 4381185029Spjd zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4382185029Spjd } 4383168404Spjd 4384168404Spjd dmu_tx_commit(tx); 4385168404Spjd 4386168404Spjd zfs_dirent_unlock(dl); 4387168404Spjd 4388185029Spjd if (error == 0) { 4389185029Spjd vnevent_link(svp, ct); 4390185029Spjd } 4391185029Spjd 4392219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4393219089Spjd zil_commit(zilog, 0); 4394219089Spjd 4395168404Spjd ZFS_EXIT(zfsvfs); 4396168404Spjd return (error); 4397168404Spjd} 4398168404Spjd 4399219089Spjd#ifdef sun 4400219089Spjd/* 4401219089Spjd * zfs_null_putapage() is used when the file system has been force 4402219089Spjd * unmounted. It just drops the pages. 4403219089Spjd */ 4404219089Spjd/* ARGSUSED */ 4405219089Spjdstatic int 4406219089Spjdzfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4407219089Spjd size_t *lenp, int flags, cred_t *cr) 4408219089Spjd{ 4409219089Spjd pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); 4410219089Spjd return (0); 4411219089Spjd} 4412219089Spjd 4413219089Spjd/* 4414219089Spjd * Push a page out to disk, klustering if possible. 4415219089Spjd * 4416219089Spjd * IN: vp - file to push page to. 4417219089Spjd * pp - page to push. 4418219089Spjd * flags - additional flags. 4419219089Spjd * cr - credentials of caller. 4420219089Spjd * 4421219089Spjd * OUT: offp - start of range pushed. 4422219089Spjd * lenp - len of range pushed. 4423219089Spjd * 4424219089Spjd * RETURN: 0 if success 4425219089Spjd * error code if failure 4426219089Spjd * 4427219089Spjd * NOTE: callers must have locked the page to be pushed. On 4428219089Spjd * exit, the page (and all other pages in the kluster) must be 4429219089Spjd * unlocked. 4430219089Spjd */ 4431219089Spjd/* ARGSUSED */ 4432219089Spjdstatic int 4433219089Spjdzfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4434219089Spjd size_t *lenp, int flags, cred_t *cr) 4435219089Spjd{ 4436219089Spjd znode_t *zp = VTOZ(vp); 4437219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4438219089Spjd dmu_tx_t *tx; 4439219089Spjd u_offset_t off, koff; 4440219089Spjd size_t len, klen; 4441219089Spjd int err; 4442219089Spjd 4443219089Spjd off = pp->p_offset; 4444219089Spjd len = PAGESIZE; 4445219089Spjd /* 4446219089Spjd * If our blocksize is bigger than the page size, try to kluster 4447219089Spjd * multiple pages so that we write a full block (thus avoiding 4448219089Spjd * a read-modify-write). 4449219089Spjd */ 4450219089Spjd if (off < zp->z_size && zp->z_blksz > PAGESIZE) { 4451219089Spjd klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); 4452219089Spjd koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; 4453219089Spjd ASSERT(koff <= zp->z_size); 4454219089Spjd if (koff + klen > zp->z_size) 4455219089Spjd klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE); 4456219089Spjd pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); 4457219089Spjd } 4458219089Spjd ASSERT3U(btop(len), ==, btopr(len)); 4459219089Spjd 4460219089Spjd /* 4461219089Spjd * Can't push pages past end-of-file. 4462219089Spjd */ 4463219089Spjd if (off >= zp->z_size) { 4464219089Spjd /* ignore all pages */ 4465219089Spjd err = 0; 4466219089Spjd goto out; 4467219089Spjd } else if (off + len > zp->z_size) { 4468219089Spjd int npages = btopr(zp->z_size - off); 4469219089Spjd page_t *trunc; 4470219089Spjd 4471219089Spjd page_list_break(&pp, &trunc, npages); 4472219089Spjd /* ignore pages past end of file */ 4473219089Spjd if (trunc) 4474219089Spjd pvn_write_done(trunc, flags); 4475219089Spjd len = zp->z_size - off; 4476219089Spjd } 4477219089Spjd 4478219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 4479219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 4480249195Smm err = SET_ERROR(EDQUOT); 4481219089Spjd goto out; 4482219089Spjd } 4483219089Spjdtop: 4484219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 4485219089Spjd dmu_tx_hold_write(tx, zp->z_id, off, len); 4486219089Spjd 4487219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4488219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4489219089Spjd err = dmu_tx_assign(tx, TXG_NOWAIT); 4490219089Spjd if (err != 0) { 4491219089Spjd if (err == ERESTART) { 4492219089Spjd dmu_tx_wait(tx); 4493219089Spjd dmu_tx_abort(tx); 4494219089Spjd goto top; 4495219089Spjd } 4496219089Spjd dmu_tx_abort(tx); 4497219089Spjd goto out; 4498219089Spjd } 4499219089Spjd 4500219089Spjd if (zp->z_blksz <= PAGESIZE) { 4501219089Spjd caddr_t va = zfs_map_page(pp, S_READ); 4502219089Spjd ASSERT3U(len, <=, PAGESIZE); 4503219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 4504219089Spjd zfs_unmap_page(pp, va); 4505219089Spjd } else { 4506219089Spjd err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 4507219089Spjd } 4508219089Spjd 4509219089Spjd if (err == 0) { 4510219089Spjd uint64_t mtime[2], ctime[2]; 4511219089Spjd sa_bulk_attr_t bulk[3]; 4512219089Spjd int count = 0; 4513219089Spjd 4514219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 4515219089Spjd &mtime, 16); 4516219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 4517219089Spjd &ctime, 16); 4518219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4519219089Spjd &zp->z_pflags, 8); 4520219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 4521219089Spjd B_TRUE); 4522219089Spjd zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 4523219089Spjd } 4524219089Spjd dmu_tx_commit(tx); 4525219089Spjd 4526219089Spjdout: 4527219089Spjd pvn_write_done(pp, (err ? B_ERROR : 0) | flags); 4528219089Spjd if (offp) 4529219089Spjd *offp = off; 4530219089Spjd if (lenp) 4531219089Spjd *lenp = len; 4532219089Spjd 4533219089Spjd return (err); 4534219089Spjd} 4535219089Spjd 4536219089Spjd/* 4537219089Spjd * Copy the portion of the file indicated from pages into the file. 4538219089Spjd * The pages are stored in a page list attached to the files vnode. 4539219089Spjd * 4540219089Spjd * IN: vp - vnode of file to push page data to. 4541219089Spjd * off - position in file to put data. 4542219089Spjd * len - amount of data to write. 4543219089Spjd * flags - flags to control the operation. 4544219089Spjd * cr - credentials of caller. 4545219089Spjd * ct - caller context. 4546219089Spjd * 4547219089Spjd * RETURN: 0 if success 4548219089Spjd * error code if failure 4549219089Spjd * 4550219089Spjd * Timestamps: 4551219089Spjd * vp - ctime|mtime updated 4552219089Spjd */ 4553185029Spjd/*ARGSUSED*/ 4554219089Spjdstatic int 4555219089Spjdzfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 4556219089Spjd caller_context_t *ct) 4557219089Spjd{ 4558219089Spjd znode_t *zp = VTOZ(vp); 4559219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4560219089Spjd page_t *pp; 4561219089Spjd size_t io_len; 4562219089Spjd u_offset_t io_off; 4563219089Spjd uint_t blksz; 4564219089Spjd rl_t *rl; 4565219089Spjd int error = 0; 4566219089Spjd 4567219089Spjd ZFS_ENTER(zfsvfs); 4568219089Spjd ZFS_VERIFY_ZP(zp); 4569219089Spjd 4570219089Spjd /* 4571219089Spjd * Align this request to the file block size in case we kluster. 4572219089Spjd * XXX - this can result in pretty aggresive locking, which can 4573219089Spjd * impact simultanious read/write access. One option might be 4574219089Spjd * to break up long requests (len == 0) into block-by-block 4575219089Spjd * operations to get narrower locking. 4576219089Spjd */ 4577219089Spjd blksz = zp->z_blksz; 4578219089Spjd if (ISP2(blksz)) 4579219089Spjd io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); 4580219089Spjd else 4581219089Spjd io_off = 0; 4582219089Spjd if (len > 0 && ISP2(blksz)) 4583219089Spjd io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); 4584219089Spjd else 4585219089Spjd io_len = 0; 4586219089Spjd 4587219089Spjd if (io_len == 0) { 4588219089Spjd /* 4589219089Spjd * Search the entire vp list for pages >= io_off. 4590219089Spjd */ 4591219089Spjd rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); 4592219089Spjd error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); 4593219089Spjd goto out; 4594219089Spjd } 4595219089Spjd rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); 4596219089Spjd 4597219089Spjd if (off > zp->z_size) { 4598219089Spjd /* past end of file */ 4599219089Spjd zfs_range_unlock(rl); 4600219089Spjd ZFS_EXIT(zfsvfs); 4601219089Spjd return (0); 4602219089Spjd } 4603219089Spjd 4604219089Spjd len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); 4605219089Spjd 4606219089Spjd for (off = io_off; io_off < off + len; io_off += io_len) { 4607219089Spjd if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 4608219089Spjd pp = page_lookup(vp, io_off, 4609219089Spjd (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); 4610219089Spjd } else { 4611219089Spjd pp = page_lookup_nowait(vp, io_off, 4612219089Spjd (flags & B_FREE) ? SE_EXCL : SE_SHARED); 4613219089Spjd } 4614219089Spjd 4615219089Spjd if (pp != NULL && pvn_getdirty(pp, flags)) { 4616219089Spjd int err; 4617219089Spjd 4618219089Spjd /* 4619219089Spjd * Found a dirty page to push 4620219089Spjd */ 4621219089Spjd err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); 4622219089Spjd if (err) 4623219089Spjd error = err; 4624219089Spjd } else { 4625219089Spjd io_len = PAGESIZE; 4626219089Spjd } 4627219089Spjd } 4628219089Spjdout: 4629219089Spjd zfs_range_unlock(rl); 4630219089Spjd if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4631219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 4632219089Spjd ZFS_EXIT(zfsvfs); 4633219089Spjd return (error); 4634219089Spjd} 4635219089Spjd#endif /* sun */ 4636219089Spjd 4637219089Spjd/*ARGSUSED*/ 4638168962Spjdvoid 4639185029Spjdzfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4640168404Spjd{ 4641168962Spjd znode_t *zp = VTOZ(vp); 4642168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4643168962Spjd int error; 4644168404Spjd 4645185029Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4646219089Spjd if (zp->z_sa_hdl == NULL) { 4647185029Spjd /* 4648185029Spjd * The fs has been unmounted, or we did a 4649185029Spjd * suspend/resume and this file no longer exists. 4650185029Spjd */ 4651243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 4652234607Strasz vrecycle(vp); 4653243520Savg return; 4654243520Savg } 4655243520Savg 4656243520Savg mutex_enter(&zp->z_lock); 4657243520Savg if (zp->z_unlinked) { 4658243520Savg /* 4659243520Savg * Fast path to recycle a vnode of a removed file. 4660243520Savg */ 4661243520Savg mutex_exit(&zp->z_lock); 4662185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4663243520Savg vrecycle(vp); 4664168962Spjd return; 4665168404Spjd } 4666243520Savg mutex_exit(&zp->z_lock); 4667168404Spjd 4668168404Spjd if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4669168404Spjd dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4670168404Spjd 4671219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4672219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4673168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 4674168404Spjd if (error) { 4675168404Spjd dmu_tx_abort(tx); 4676168404Spjd } else { 4677168404Spjd mutex_enter(&zp->z_lock); 4678219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 4679219089Spjd (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4680168404Spjd zp->z_atime_dirty = 0; 4681168404Spjd mutex_exit(&zp->z_lock); 4682168404Spjd dmu_tx_commit(tx); 4683168404Spjd } 4684168404Spjd } 4685185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4686168404Spjd} 4687168404Spjd 4688219089Spjd#ifdef sun 4689219089Spjd/* 4690219089Spjd * Bounds-check the seek operation. 4691219089Spjd * 4692219089Spjd * IN: vp - vnode seeking within 4693219089Spjd * ooff - old file offset 4694219089Spjd * noffp - pointer to new file offset 4695219089Spjd * ct - caller context 4696219089Spjd * 4697219089Spjd * RETURN: 0 if success 4698219089Spjd * EINVAL if new offset invalid 4699219089Spjd */ 4700219089Spjd/* ARGSUSED */ 4701219089Spjdstatic int 4702219089Spjdzfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, 4703219089Spjd caller_context_t *ct) 4704219089Spjd{ 4705219089Spjd if (vp->v_type == VDIR) 4706219089Spjd return (0); 4707219089Spjd return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 4708219089Spjd} 4709219089Spjd 4710219089Spjd/* 4711219089Spjd * Pre-filter the generic locking function to trap attempts to place 4712219089Spjd * a mandatory lock on a memory mapped file. 4713219089Spjd */ 4714219089Spjdstatic int 4715219089Spjdzfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 4716219089Spjd flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 4717219089Spjd{ 4718219089Spjd znode_t *zp = VTOZ(vp); 4719219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4720219089Spjd 4721219089Spjd ZFS_ENTER(zfsvfs); 4722219089Spjd ZFS_VERIFY_ZP(zp); 4723219089Spjd 4724219089Spjd /* 4725219089Spjd * We are following the UFS semantics with respect to mapcnt 4726219089Spjd * here: If we see that the file is mapped already, then we will 4727219089Spjd * return an error, but we don't worry about races between this 4728219089Spjd * function and zfs_map(). 4729219089Spjd */ 4730219089Spjd if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) { 4731219089Spjd ZFS_EXIT(zfsvfs); 4732249195Smm return (SET_ERROR(EAGAIN)); 4733219089Spjd } 4734219089Spjd ZFS_EXIT(zfsvfs); 4735219089Spjd return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 4736219089Spjd} 4737219089Spjd 4738219089Spjd/* 4739219089Spjd * If we can't find a page in the cache, we will create a new page 4740219089Spjd * and fill it with file data. For efficiency, we may try to fill 4741219089Spjd * multiple pages at once (klustering) to fill up the supplied page 4742219089Spjd * list. Note that the pages to be filled are held with an exclusive 4743219089Spjd * lock to prevent access by other threads while they are being filled. 4744219089Spjd */ 4745219089Spjdstatic int 4746219089Spjdzfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, 4747219089Spjd caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) 4748219089Spjd{ 4749219089Spjd znode_t *zp = VTOZ(vp); 4750219089Spjd page_t *pp, *cur_pp; 4751219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 4752219089Spjd u_offset_t io_off, total; 4753219089Spjd size_t io_len; 4754219089Spjd int err; 4755219089Spjd 4756219089Spjd if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { 4757219089Spjd /* 4758219089Spjd * We only have a single page, don't bother klustering 4759219089Spjd */ 4760219089Spjd io_off = off; 4761219089Spjd io_len = PAGESIZE; 4762219089Spjd pp = page_create_va(vp, io_off, io_len, 4763219089Spjd PG_EXCL | PG_WAIT, seg, addr); 4764219089Spjd } else { 4765219089Spjd /* 4766219089Spjd * Try to find enough pages to fill the page list 4767219089Spjd */ 4768219089Spjd pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 4769219089Spjd &io_len, off, plsz, 0); 4770219089Spjd } 4771219089Spjd if (pp == NULL) { 4772219089Spjd /* 4773219089Spjd * The page already exists, nothing to do here. 4774219089Spjd */ 4775219089Spjd *pl = NULL; 4776219089Spjd return (0); 4777219089Spjd } 4778219089Spjd 4779219089Spjd /* 4780219089Spjd * Fill the pages in the kluster. 4781219089Spjd */ 4782219089Spjd cur_pp = pp; 4783219089Spjd for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { 4784219089Spjd caddr_t va; 4785219089Spjd 4786219089Spjd ASSERT3U(io_off, ==, cur_pp->p_offset); 4787219089Spjd va = zfs_map_page(cur_pp, S_WRITE); 4788219089Spjd err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, 4789219089Spjd DMU_READ_PREFETCH); 4790219089Spjd zfs_unmap_page(cur_pp, va); 4791219089Spjd if (err) { 4792219089Spjd /* On error, toss the entire kluster */ 4793219089Spjd pvn_read_done(pp, B_ERROR); 4794219089Spjd /* convert checksum errors into IO errors */ 4795219089Spjd if (err == ECKSUM) 4796249195Smm err = SET_ERROR(EIO); 4797219089Spjd return (err); 4798219089Spjd } 4799219089Spjd cur_pp = cur_pp->p_next; 4800219089Spjd } 4801219089Spjd 4802219089Spjd /* 4803219089Spjd * Fill in the page list array from the kluster starting 4804219089Spjd * from the desired offset `off'. 4805219089Spjd * NOTE: the page list will always be null terminated. 4806219089Spjd */ 4807219089Spjd pvn_plist_init(pp, pl, plsz, off, io_len, rw); 4808219089Spjd ASSERT(pl == NULL || (*pl)->p_offset == off); 4809219089Spjd 4810219089Spjd return (0); 4811219089Spjd} 4812219089Spjd 4813219089Spjd/* 4814219089Spjd * Return pointers to the pages for the file region [off, off + len] 4815219089Spjd * in the pl array. If plsz is greater than len, this function may 4816219089Spjd * also return page pointers from after the specified region 4817219089Spjd * (i.e. the region [off, off + plsz]). These additional pages are 4818219089Spjd * only returned if they are already in the cache, or were created as 4819219089Spjd * part of a klustered read. 4820219089Spjd * 4821219089Spjd * IN: vp - vnode of file to get data from. 4822219089Spjd * off - position in file to get data from. 4823219089Spjd * len - amount of data to retrieve. 4824219089Spjd * plsz - length of provided page list. 4825219089Spjd * seg - segment to obtain pages for. 4826219089Spjd * addr - virtual address of fault. 4827219089Spjd * rw - mode of created pages. 4828219089Spjd * cr - credentials of caller. 4829219089Spjd * ct - caller context. 4830219089Spjd * 4831219089Spjd * OUT: protp - protection mode of created pages. 4832219089Spjd * pl - list of pages created. 4833219089Spjd * 4834219089Spjd * RETURN: 0 if success 4835219089Spjd * error code if failure 4836219089Spjd * 4837219089Spjd * Timestamps: 4838219089Spjd * vp - atime updated 4839219089Spjd */ 4840219089Spjd/* ARGSUSED */ 4841219089Spjdstatic int 4842219089Spjdzfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 4843219089Spjd page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 4844219089Spjd enum seg_rw rw, cred_t *cr, caller_context_t *ct) 4845219089Spjd{ 4846219089Spjd znode_t *zp = VTOZ(vp); 4847219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4848219089Spjd page_t **pl0 = pl; 4849219089Spjd int err = 0; 4850219089Spjd 4851219089Spjd /* we do our own caching, faultahead is unnecessary */ 4852219089Spjd if (pl == NULL) 4853219089Spjd return (0); 4854219089Spjd else if (len > plsz) 4855219089Spjd len = plsz; 4856219089Spjd else 4857219089Spjd len = P2ROUNDUP(len, PAGESIZE); 4858219089Spjd ASSERT(plsz >= len); 4859219089Spjd 4860219089Spjd ZFS_ENTER(zfsvfs); 4861219089Spjd ZFS_VERIFY_ZP(zp); 4862219089Spjd 4863219089Spjd if (protp) 4864219089Spjd *protp = PROT_ALL; 4865219089Spjd 4866219089Spjd /* 4867219089Spjd * Loop through the requested range [off, off + len) looking 4868219089Spjd * for pages. If we don't find a page, we will need to create 4869219089Spjd * a new page and fill it with data from the file. 4870219089Spjd */ 4871219089Spjd while (len > 0) { 4872219089Spjd if (*pl = page_lookup(vp, off, SE_SHARED)) 4873219089Spjd *(pl+1) = NULL; 4874219089Spjd else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) 4875219089Spjd goto out; 4876219089Spjd while (*pl) { 4877219089Spjd ASSERT3U((*pl)->p_offset, ==, off); 4878219089Spjd off += PAGESIZE; 4879219089Spjd addr += PAGESIZE; 4880219089Spjd if (len > 0) { 4881219089Spjd ASSERT3U(len, >=, PAGESIZE); 4882219089Spjd len -= PAGESIZE; 4883219089Spjd } 4884219089Spjd ASSERT3U(plsz, >=, PAGESIZE); 4885219089Spjd plsz -= PAGESIZE; 4886219089Spjd pl++; 4887219089Spjd } 4888219089Spjd } 4889219089Spjd 4890219089Spjd /* 4891219089Spjd * Fill out the page array with any pages already in the cache. 4892219089Spjd */ 4893219089Spjd while (plsz > 0 && 4894219089Spjd (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { 4895219089Spjd off += PAGESIZE; 4896219089Spjd plsz -= PAGESIZE; 4897219089Spjd } 4898219089Spjdout: 4899219089Spjd if (err) { 4900219089Spjd /* 4901219089Spjd * Release any pages we have previously locked. 4902219089Spjd */ 4903219089Spjd while (pl > pl0) 4904219089Spjd page_unlock(*--pl); 4905219089Spjd } else { 4906219089Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4907219089Spjd } 4908219089Spjd 4909219089Spjd *pl = NULL; 4910219089Spjd 4911219089Spjd ZFS_EXIT(zfsvfs); 4912219089Spjd return (err); 4913219089Spjd} 4914219089Spjd 4915219089Spjd/* 4916219089Spjd * Request a memory map for a section of a file. This code interacts 4917219089Spjd * with common code and the VM system as follows: 4918219089Spjd * 4919219089Spjd * common code calls mmap(), which ends up in smmap_common() 4920219089Spjd * 4921219089Spjd * this calls VOP_MAP(), which takes you into (say) zfs 4922219089Spjd * 4923219089Spjd * zfs_map() calls as_map(), passing segvn_create() as the callback 4924219089Spjd * 4925219089Spjd * segvn_create() creates the new segment and calls VOP_ADDMAP() 4926219089Spjd * 4927219089Spjd * zfs_addmap() updates z_mapcnt 4928219089Spjd */ 4929219089Spjd/*ARGSUSED*/ 4930219089Spjdstatic int 4931219089Spjdzfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 4932219089Spjd size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4933219089Spjd caller_context_t *ct) 4934219089Spjd{ 4935219089Spjd znode_t *zp = VTOZ(vp); 4936219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4937219089Spjd segvn_crargs_t vn_a; 4938219089Spjd int error; 4939219089Spjd 4940219089Spjd ZFS_ENTER(zfsvfs); 4941219089Spjd ZFS_VERIFY_ZP(zp); 4942219089Spjd 4943219089Spjd if ((prot & PROT_WRITE) && (zp->z_pflags & 4944219089Spjd (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) { 4945219089Spjd ZFS_EXIT(zfsvfs); 4946249195Smm return (SET_ERROR(EPERM)); 4947219089Spjd } 4948219089Spjd 4949219089Spjd if ((prot & (PROT_READ | PROT_EXEC)) && 4950219089Spjd (zp->z_pflags & ZFS_AV_QUARANTINED)) { 4951219089Spjd ZFS_EXIT(zfsvfs); 4952249195Smm return (SET_ERROR(EACCES)); 4953219089Spjd } 4954219089Spjd 4955219089Spjd if (vp->v_flag & VNOMAP) { 4956219089Spjd ZFS_EXIT(zfsvfs); 4957249195Smm return (SET_ERROR(ENOSYS)); 4958219089Spjd } 4959219089Spjd 4960219089Spjd if (off < 0 || len > MAXOFFSET_T - off) { 4961219089Spjd ZFS_EXIT(zfsvfs); 4962249195Smm return (SET_ERROR(ENXIO)); 4963219089Spjd } 4964219089Spjd 4965219089Spjd if (vp->v_type != VREG) { 4966219089Spjd ZFS_EXIT(zfsvfs); 4967249195Smm return (SET_ERROR(ENODEV)); 4968219089Spjd } 4969219089Spjd 4970219089Spjd /* 4971219089Spjd * If file is locked, disallow mapping. 4972219089Spjd */ 4973219089Spjd if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) { 4974219089Spjd ZFS_EXIT(zfsvfs); 4975249195Smm return (SET_ERROR(EAGAIN)); 4976219089Spjd } 4977219089Spjd 4978219089Spjd as_rangelock(as); 4979219089Spjd error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 4980219089Spjd if (error != 0) { 4981219089Spjd as_rangeunlock(as); 4982219089Spjd ZFS_EXIT(zfsvfs); 4983219089Spjd return (error); 4984219089Spjd } 4985219089Spjd 4986219089Spjd vn_a.vp = vp; 4987219089Spjd vn_a.offset = (u_offset_t)off; 4988219089Spjd vn_a.type = flags & MAP_TYPE; 4989219089Spjd vn_a.prot = prot; 4990219089Spjd vn_a.maxprot = maxprot; 4991219089Spjd vn_a.cred = cr; 4992219089Spjd vn_a.amp = NULL; 4993219089Spjd vn_a.flags = flags & ~MAP_TYPE; 4994219089Spjd vn_a.szc = 0; 4995219089Spjd vn_a.lgrp_mem_policy_flags = 0; 4996219089Spjd 4997219089Spjd error = as_map(as, *addrp, len, segvn_create, &vn_a); 4998219089Spjd 4999219089Spjd as_rangeunlock(as); 5000219089Spjd ZFS_EXIT(zfsvfs); 5001219089Spjd return (error); 5002219089Spjd} 5003219089Spjd 5004219089Spjd/* ARGSUSED */ 5005219089Spjdstatic int 5006219089Spjdzfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 5007219089Spjd size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 5008219089Spjd caller_context_t *ct) 5009219089Spjd{ 5010219089Spjd uint64_t pages = btopr(len); 5011219089Spjd 5012219089Spjd atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); 5013219089Spjd return (0); 5014219089Spjd} 5015219089Spjd 5016219089Spjd/* 5017219089Spjd * The reason we push dirty pages as part of zfs_delmap() is so that we get a 5018219089Spjd * more accurate mtime for the associated file. Since we don't have a way of 5019219089Spjd * detecting when the data was actually modified, we have to resort to 5020219089Spjd * heuristics. If an explicit msync() is done, then we mark the mtime when the 5021219089Spjd * last page is pushed. The problem occurs when the msync() call is omitted, 5022219089Spjd * which by far the most common case: 5023219089Spjd * 5024219089Spjd * open() 5025219089Spjd * mmap() 5026219089Spjd * <modify memory> 5027219089Spjd * munmap() 5028219089Spjd * close() 5029219089Spjd * <time lapse> 5030219089Spjd * putpage() via fsflush 5031219089Spjd * 5032219089Spjd * If we wait until fsflush to come along, we can have a modification time that 5033219089Spjd * is some arbitrary point in the future. In order to prevent this in the 5034219089Spjd * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is 5035219089Spjd * torn down. 5036219089Spjd */ 5037219089Spjd/* ARGSUSED */ 5038219089Spjdstatic int 5039219089Spjdzfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 5040219089Spjd size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, 5041219089Spjd caller_context_t *ct) 5042219089Spjd{ 5043219089Spjd uint64_t pages = btopr(len); 5044219089Spjd 5045219089Spjd ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); 5046219089Spjd atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); 5047219089Spjd 5048219089Spjd if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && 5049219089Spjd vn_has_cached_data(vp)) 5050219089Spjd (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); 5051219089Spjd 5052219089Spjd return (0); 5053219089Spjd} 5054219089Spjd 5055219089Spjd/* 5056219089Spjd * Free or allocate space in a file. Currently, this function only 5057219089Spjd * supports the `F_FREESP' command. However, this command is somewhat 5058219089Spjd * misnamed, as its functionality includes the ability to allocate as 5059219089Spjd * well as free space. 5060219089Spjd * 5061219089Spjd * IN: vp - vnode of file to free data in. 5062219089Spjd * cmd - action to take (only F_FREESP supported). 5063219089Spjd * bfp - section of file to free/alloc. 5064219089Spjd * flag - current file open mode flags. 5065219089Spjd * offset - current file offset. 5066219089Spjd * cr - credentials of caller [UNUSED]. 5067219089Spjd * ct - caller context. 5068219089Spjd * 5069219089Spjd * RETURN: 0 if success 5070219089Spjd * error code if failure 5071219089Spjd * 5072219089Spjd * Timestamps: 5073219089Spjd * vp - ctime|mtime updated 5074219089Spjd */ 5075219089Spjd/* ARGSUSED */ 5076219089Spjdstatic int 5077219089Spjdzfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, 5078219089Spjd offset_t offset, cred_t *cr, caller_context_t *ct) 5079219089Spjd{ 5080219089Spjd znode_t *zp = VTOZ(vp); 5081219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5082219089Spjd uint64_t off, len; 5083219089Spjd int error; 5084219089Spjd 5085219089Spjd ZFS_ENTER(zfsvfs); 5086219089Spjd ZFS_VERIFY_ZP(zp); 5087219089Spjd 5088219089Spjd if (cmd != F_FREESP) { 5089219089Spjd ZFS_EXIT(zfsvfs); 5090249195Smm return (SET_ERROR(EINVAL)); 5091219089Spjd } 5092219089Spjd 5093219089Spjd if (error = convoff(vp, bfp, 0, offset)) { 5094219089Spjd ZFS_EXIT(zfsvfs); 5095219089Spjd return (error); 5096219089Spjd } 5097219089Spjd 5098219089Spjd if (bfp->l_len < 0) { 5099219089Spjd ZFS_EXIT(zfsvfs); 5100249195Smm return (SET_ERROR(EINVAL)); 5101219089Spjd } 5102219089Spjd 5103219089Spjd off = bfp->l_start; 5104219089Spjd len = bfp->l_len; /* 0 means from off to end of file */ 5105219089Spjd 5106219089Spjd error = zfs_freesp(zp, off, len, flag, TRUE); 5107219089Spjd 5108219089Spjd ZFS_EXIT(zfsvfs); 5109219089Spjd return (error); 5110219089Spjd} 5111219089Spjd#endif /* sun */ 5112219089Spjd 5113168404SpjdCTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 5114168404SpjdCTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 5115168404Spjd 5116185029Spjd/*ARGSUSED*/ 5117168404Spjdstatic int 5118185029Spjdzfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 5119168404Spjd{ 5120168404Spjd znode_t *zp = VTOZ(vp); 5121168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5122185029Spjd uint32_t gen; 5123219089Spjd uint64_t gen64; 5124168404Spjd uint64_t object = zp->z_id; 5125168404Spjd zfid_short_t *zfid; 5126219089Spjd int size, i, error; 5127168404Spjd 5128168404Spjd ZFS_ENTER(zfsvfs); 5129185029Spjd ZFS_VERIFY_ZP(zp); 5130168404Spjd 5131219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 5132219089Spjd &gen64, sizeof (uint64_t))) != 0) { 5133219089Spjd ZFS_EXIT(zfsvfs); 5134219089Spjd return (error); 5135219089Spjd } 5136219089Spjd 5137219089Spjd gen = (uint32_t)gen64; 5138219089Spjd 5139168404Spjd size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 5140249195Smm 5141249195Smm#ifdef illumos 5142249195Smm if (fidp->fid_len < size) { 5143249195Smm fidp->fid_len = size; 5144249195Smm ZFS_EXIT(zfsvfs); 5145249195Smm return (SET_ERROR(ENOSPC)); 5146249195Smm } 5147249195Smm#else 5148168404Spjd fidp->fid_len = size; 5149249195Smm#endif 5150168404Spjd 5151168404Spjd zfid = (zfid_short_t *)fidp; 5152168404Spjd 5153168404Spjd zfid->zf_len = size; 5154168404Spjd 5155168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 5156168404Spjd zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 5157168404Spjd 5158168404Spjd /* Must have a non-zero generation number to distinguish from .zfs */ 5159168404Spjd if (gen == 0) 5160168404Spjd gen = 1; 5161168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 5162168404Spjd zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 5163168404Spjd 5164168404Spjd if (size == LONG_FID_LEN) { 5165168404Spjd uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 5166169023Spjd zfid_long_t *zlfid; 5167168404Spjd 5168168404Spjd zlfid = (zfid_long_t *)fidp; 5169168404Spjd 5170168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 5171168404Spjd zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 5172168404Spjd 5173168404Spjd /* XXX - this should be the generation number for the objset */ 5174168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 5175168404Spjd zlfid->zf_setgen[i] = 0; 5176168404Spjd } 5177168404Spjd 5178168404Spjd ZFS_EXIT(zfsvfs); 5179168404Spjd return (0); 5180168404Spjd} 5181168404Spjd 5182168404Spjdstatic int 5183185029Spjdzfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 5184185029Spjd caller_context_t *ct) 5185168404Spjd{ 5186168404Spjd znode_t *zp, *xzp; 5187168404Spjd zfsvfs_t *zfsvfs; 5188168404Spjd zfs_dirlock_t *dl; 5189168404Spjd int error; 5190168404Spjd 5191168404Spjd switch (cmd) { 5192168404Spjd case _PC_LINK_MAX: 5193168404Spjd *valp = INT_MAX; 5194168404Spjd return (0); 5195168404Spjd 5196168404Spjd case _PC_FILESIZEBITS: 5197168404Spjd *valp = 64; 5198168404Spjd return (0); 5199219089Spjd#ifdef sun 5200168404Spjd case _PC_XATTR_EXISTS: 5201168404Spjd zp = VTOZ(vp); 5202168404Spjd zfsvfs = zp->z_zfsvfs; 5203168404Spjd ZFS_ENTER(zfsvfs); 5204185029Spjd ZFS_VERIFY_ZP(zp); 5205168404Spjd *valp = 0; 5206168404Spjd error = zfs_dirent_lock(&dl, zp, "", &xzp, 5207185029Spjd ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 5208168404Spjd if (error == 0) { 5209168404Spjd zfs_dirent_unlock(dl); 5210168404Spjd if (!zfs_dirempty(xzp)) 5211168404Spjd *valp = 1; 5212168404Spjd VN_RELE(ZTOV(xzp)); 5213168404Spjd } else if (error == ENOENT) { 5214168404Spjd /* 5215168404Spjd * If there aren't extended attributes, it's the 5216168404Spjd * same as having zero of them. 5217168404Spjd */ 5218168404Spjd error = 0; 5219168404Spjd } 5220168404Spjd ZFS_EXIT(zfsvfs); 5221168404Spjd return (error); 5222168404Spjd 5223219089Spjd case _PC_SATTR_ENABLED: 5224219089Spjd case _PC_SATTR_EXISTS: 5225219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 5226219089Spjd (vp->v_type == VREG || vp->v_type == VDIR); 5227219089Spjd return (0); 5228219089Spjd 5229219089Spjd case _PC_ACCESS_FILTERING: 5230219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 5231219089Spjd vp->v_type == VDIR; 5232219089Spjd return (0); 5233219089Spjd 5234219089Spjd case _PC_ACL_ENABLED: 5235219089Spjd *valp = _ACL_ACE_ENABLED; 5236219089Spjd return (0); 5237219089Spjd#endif /* sun */ 5238219089Spjd case _PC_MIN_HOLE_SIZE: 5239219089Spjd *valp = (int)SPA_MINBLOCKSIZE; 5240219089Spjd return (0); 5241219089Spjd#ifdef sun 5242219089Spjd case _PC_TIMESTAMP_RESOLUTION: 5243219089Spjd /* nanosecond timestamp resolution */ 5244219089Spjd *valp = 1L; 5245219089Spjd return (0); 5246219089Spjd#endif /* sun */ 5247168404Spjd case _PC_ACL_EXTENDED: 5248196949Strasz *valp = 0; 5249168404Spjd return (0); 5250168404Spjd 5251196949Strasz case _PC_ACL_NFS4: 5252196949Strasz *valp = 1; 5253196949Strasz return (0); 5254196949Strasz 5255196949Strasz case _PC_ACL_PATH_MAX: 5256196949Strasz *valp = ACL_MAX_ENTRIES; 5257196949Strasz return (0); 5258196949Strasz 5259168404Spjd default: 5260168962Spjd return (EOPNOTSUPP); 5261168404Spjd } 5262168404Spjd} 5263168404Spjd 5264168404Spjd/*ARGSUSED*/ 5265168404Spjdstatic int 5266185029Spjdzfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5267185029Spjd caller_context_t *ct) 5268168404Spjd{ 5269168404Spjd znode_t *zp = VTOZ(vp); 5270168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5271168404Spjd int error; 5272185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5273168404Spjd 5274168404Spjd ZFS_ENTER(zfsvfs); 5275185029Spjd ZFS_VERIFY_ZP(zp); 5276185029Spjd error = zfs_getacl(zp, vsecp, skipaclchk, cr); 5277168404Spjd ZFS_EXIT(zfsvfs); 5278168404Spjd 5279168404Spjd return (error); 5280168404Spjd} 5281168404Spjd 5282168404Spjd/*ARGSUSED*/ 5283228685Spjdint 5284185029Spjdzfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5285185029Spjd caller_context_t *ct) 5286168404Spjd{ 5287168404Spjd znode_t *zp = VTOZ(vp); 5288168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5289168404Spjd int error; 5290185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5291219089Spjd zilog_t *zilog = zfsvfs->z_log; 5292168404Spjd 5293168404Spjd ZFS_ENTER(zfsvfs); 5294185029Spjd ZFS_VERIFY_ZP(zp); 5295219089Spjd 5296185029Spjd error = zfs_setacl(zp, vsecp, skipaclchk, cr); 5297219089Spjd 5298219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5299219089Spjd zil_commit(zilog, 0); 5300219089Spjd 5301168404Spjd ZFS_EXIT(zfsvfs); 5302168404Spjd return (error); 5303168404Spjd} 5304168404Spjd 5305219089Spjd#ifdef sun 5306219089Spjd/* 5307219089Spjd * Tunable, both must be a power of 2. 5308219089Spjd * 5309219089Spjd * zcr_blksz_min: the smallest read we may consider to loan out an arcbuf 5310219089Spjd * zcr_blksz_max: if set to less than the file block size, allow loaning out of 5311219089Spjd * an arcbuf for a partial block read 5312219089Spjd */ 5313219089Spjdint zcr_blksz_min = (1 << 10); /* 1K */ 5314219089Spjdint zcr_blksz_max = (1 << 17); /* 128K */ 5315219089Spjd 5316219089Spjd/*ARGSUSED*/ 5317168962Spjdstatic int 5318219089Spjdzfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, 5319219089Spjd caller_context_t *ct) 5320219089Spjd{ 5321219089Spjd znode_t *zp = VTOZ(vp); 5322219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5323219089Spjd int max_blksz = zfsvfs->z_max_blksz; 5324219089Spjd uio_t *uio = &xuio->xu_uio; 5325219089Spjd ssize_t size = uio->uio_resid; 5326219089Spjd offset_t offset = uio->uio_loffset; 5327219089Spjd int blksz; 5328219089Spjd int fullblk, i; 5329219089Spjd arc_buf_t *abuf; 5330219089Spjd ssize_t maxsize; 5331219089Spjd int preamble, postamble; 5332219089Spjd 5333219089Spjd if (xuio->xu_type != UIOTYPE_ZEROCOPY) 5334249195Smm return (SET_ERROR(EINVAL)); 5335219089Spjd 5336219089Spjd ZFS_ENTER(zfsvfs); 5337219089Spjd ZFS_VERIFY_ZP(zp); 5338219089Spjd switch (ioflag) { 5339219089Spjd case UIO_WRITE: 5340219089Spjd /* 5341219089Spjd * Loan out an arc_buf for write if write size is bigger than 5342219089Spjd * max_blksz, and the file's block size is also max_blksz. 5343219089Spjd */ 5344219089Spjd blksz = max_blksz; 5345219089Spjd if (size < blksz || zp->z_blksz != blksz) { 5346219089Spjd ZFS_EXIT(zfsvfs); 5347249195Smm return (SET_ERROR(EINVAL)); 5348219089Spjd } 5349219089Spjd /* 5350219089Spjd * Caller requests buffers for write before knowing where the 5351219089Spjd * write offset might be (e.g. NFS TCP write). 5352219089Spjd */ 5353219089Spjd if (offset == -1) { 5354219089Spjd preamble = 0; 5355219089Spjd } else { 5356219089Spjd preamble = P2PHASE(offset, blksz); 5357219089Spjd if (preamble) { 5358219089Spjd preamble = blksz - preamble; 5359219089Spjd size -= preamble; 5360219089Spjd } 5361219089Spjd } 5362219089Spjd 5363219089Spjd postamble = P2PHASE(size, blksz); 5364219089Spjd size -= postamble; 5365219089Spjd 5366219089Spjd fullblk = size / blksz; 5367219089Spjd (void) dmu_xuio_init(xuio, 5368219089Spjd (preamble != 0) + fullblk + (postamble != 0)); 5369219089Spjd DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble, 5370219089Spjd int, postamble, int, 5371219089Spjd (preamble != 0) + fullblk + (postamble != 0)); 5372219089Spjd 5373219089Spjd /* 5374219089Spjd * Have to fix iov base/len for partial buffers. They 5375219089Spjd * currently represent full arc_buf's. 5376219089Spjd */ 5377219089Spjd if (preamble) { 5378219089Spjd /* data begins in the middle of the arc_buf */ 5379219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5380219089Spjd blksz); 5381219089Spjd ASSERT(abuf); 5382219089Spjd (void) dmu_xuio_add(xuio, abuf, 5383219089Spjd blksz - preamble, preamble); 5384219089Spjd } 5385219089Spjd 5386219089Spjd for (i = 0; i < fullblk; i++) { 5387219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5388219089Spjd blksz); 5389219089Spjd ASSERT(abuf); 5390219089Spjd (void) dmu_xuio_add(xuio, abuf, 0, blksz); 5391219089Spjd } 5392219089Spjd 5393219089Spjd if (postamble) { 5394219089Spjd /* data ends in the middle of the arc_buf */ 5395219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5396219089Spjd blksz); 5397219089Spjd ASSERT(abuf); 5398219089Spjd (void) dmu_xuio_add(xuio, abuf, 0, postamble); 5399219089Spjd } 5400219089Spjd break; 5401219089Spjd case UIO_READ: 5402219089Spjd /* 5403219089Spjd * Loan out an arc_buf for read if the read size is larger than 5404219089Spjd * the current file block size. Block alignment is not 5405219089Spjd * considered. Partial arc_buf will be loaned out for read. 5406219089Spjd */ 5407219089Spjd blksz = zp->z_blksz; 5408219089Spjd if (blksz < zcr_blksz_min) 5409219089Spjd blksz = zcr_blksz_min; 5410219089Spjd if (blksz > zcr_blksz_max) 5411219089Spjd blksz = zcr_blksz_max; 5412219089Spjd /* avoid potential complexity of dealing with it */ 5413219089Spjd if (blksz > max_blksz) { 5414219089Spjd ZFS_EXIT(zfsvfs); 5415249195Smm return (SET_ERROR(EINVAL)); 5416219089Spjd } 5417219089Spjd 5418219089Spjd maxsize = zp->z_size - uio->uio_loffset; 5419219089Spjd if (size > maxsize) 5420219089Spjd size = maxsize; 5421219089Spjd 5422219089Spjd if (size < blksz || vn_has_cached_data(vp)) { 5423219089Spjd ZFS_EXIT(zfsvfs); 5424249195Smm return (SET_ERROR(EINVAL)); 5425219089Spjd } 5426219089Spjd break; 5427219089Spjd default: 5428219089Spjd ZFS_EXIT(zfsvfs); 5429249195Smm return (SET_ERROR(EINVAL)); 5430219089Spjd } 5431219089Spjd 5432219089Spjd uio->uio_extflg = UIO_XUIO; 5433219089Spjd XUIO_XUZC_RW(xuio) = ioflag; 5434219089Spjd ZFS_EXIT(zfsvfs); 5435219089Spjd return (0); 5436219089Spjd} 5437219089Spjd 5438219089Spjd/*ARGSUSED*/ 5439219089Spjdstatic int 5440219089Spjdzfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) 5441219089Spjd{ 5442219089Spjd int i; 5443219089Spjd arc_buf_t *abuf; 5444219089Spjd int ioflag = XUIO_XUZC_RW(xuio); 5445219089Spjd 5446219089Spjd ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); 5447219089Spjd 5448219089Spjd i = dmu_xuio_cnt(xuio); 5449219089Spjd while (i-- > 0) { 5450219089Spjd abuf = dmu_xuio_arcbuf(xuio, i); 5451219089Spjd /* 5452219089Spjd * if abuf == NULL, it must be a write buffer 5453219089Spjd * that has been returned in zfs_write(). 5454219089Spjd */ 5455219089Spjd if (abuf) 5456219089Spjd dmu_return_arcbuf(abuf); 5457219089Spjd ASSERT(abuf || ioflag == UIO_WRITE); 5458219089Spjd } 5459219089Spjd 5460219089Spjd dmu_xuio_fini(xuio); 5461219089Spjd return (0); 5462219089Spjd} 5463219089Spjd 5464219089Spjd/* 5465219089Spjd * Predeclare these here so that the compiler assumes that 5466219089Spjd * this is an "old style" function declaration that does 5467219089Spjd * not include arguments => we won't get type mismatch errors 5468219089Spjd * in the initializations that follow. 5469219089Spjd */ 5470219089Spjdstatic int zfs_inval(); 5471219089Spjdstatic int zfs_isdir(); 5472219089Spjd 5473219089Spjdstatic int 5474219089Spjdzfs_inval() 5475219089Spjd{ 5476249195Smm return (SET_ERROR(EINVAL)); 5477219089Spjd} 5478219089Spjd 5479219089Spjdstatic int 5480219089Spjdzfs_isdir() 5481219089Spjd{ 5482249195Smm return (SET_ERROR(EISDIR)); 5483219089Spjd} 5484219089Spjd/* 5485219089Spjd * Directory vnode operations template 5486219089Spjd */ 5487219089Spjdvnodeops_t *zfs_dvnodeops; 5488219089Spjdconst fs_operation_def_t zfs_dvnodeops_template[] = { 5489219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5490219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5491219089Spjd VOPNAME_READ, { .error = zfs_isdir }, 5492219089Spjd VOPNAME_WRITE, { .error = zfs_isdir }, 5493219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5494219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5495219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5496219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5497219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5498219089Spjd VOPNAME_CREATE, { .vop_create = zfs_create }, 5499219089Spjd VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5500219089Spjd VOPNAME_LINK, { .vop_link = zfs_link }, 5501219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5502219089Spjd VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, 5503219089Spjd VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5504219089Spjd VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5505219089Spjd VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, 5506219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5507219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5508219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5509219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5510219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5511219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5512219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5513219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5514219089Spjd NULL, NULL 5515219089Spjd}; 5516219089Spjd 5517219089Spjd/* 5518219089Spjd * Regular file vnode operations template 5519219089Spjd */ 5520219089Spjdvnodeops_t *zfs_fvnodeops; 5521219089Spjdconst fs_operation_def_t zfs_fvnodeops_template[] = { 5522219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5523219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5524219089Spjd VOPNAME_READ, { .vop_read = zfs_read }, 5525219089Spjd VOPNAME_WRITE, { .vop_write = zfs_write }, 5526219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5527219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5528219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5529219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5530219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5531219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5532219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5533219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5534219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5535219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5536219089Spjd VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, 5537219089Spjd VOPNAME_SPACE, { .vop_space = zfs_space }, 5538219089Spjd VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, 5539219089Spjd VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, 5540219089Spjd VOPNAME_MAP, { .vop_map = zfs_map }, 5541219089Spjd VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, 5542219089Spjd VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, 5543219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5544219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5545219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5546219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5547219089Spjd VOPNAME_REQZCBUF, { .vop_reqzcbuf = zfs_reqzcbuf }, 5548219089Spjd VOPNAME_RETZCBUF, { .vop_retzcbuf = zfs_retzcbuf }, 5549219089Spjd NULL, NULL 5550219089Spjd}; 5551219089Spjd 5552219089Spjd/* 5553219089Spjd * Symbolic link vnode operations template 5554219089Spjd */ 5555219089Spjdvnodeops_t *zfs_symvnodeops; 5556219089Spjdconst fs_operation_def_t zfs_symvnodeops_template[] = { 5557219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5558219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5559219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5560219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5561219089Spjd VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, 5562219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5563219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5564219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5565219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5566219089Spjd NULL, NULL 5567219089Spjd}; 5568219089Spjd 5569219089Spjd/* 5570219089Spjd * special share hidden files vnode operations template 5571219089Spjd */ 5572219089Spjdvnodeops_t *zfs_sharevnodeops; 5573219089Spjdconst fs_operation_def_t zfs_sharevnodeops_template[] = { 5574219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5575219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5576219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5577219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5578219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5579219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5580219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5581219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5582219089Spjd NULL, NULL 5583219089Spjd}; 5584219089Spjd 5585219089Spjd/* 5586219089Spjd * Extended attribute directory vnode operations template 5587219089Spjd * This template is identical to the directory vnodes 5588219089Spjd * operation template except for restricted operations: 5589219089Spjd * VOP_MKDIR() 5590219089Spjd * VOP_SYMLINK() 5591219089Spjd * Note that there are other restrictions embedded in: 5592219089Spjd * zfs_create() - restrict type to VREG 5593219089Spjd * zfs_link() - no links into/out of attribute space 5594219089Spjd * zfs_rename() - no moves into/out of attribute space 5595219089Spjd */ 5596219089Spjdvnodeops_t *zfs_xdvnodeops; 5597219089Spjdconst fs_operation_def_t zfs_xdvnodeops_template[] = { 5598219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5599219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5600219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5601219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5602219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5603219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5604219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5605219089Spjd VOPNAME_CREATE, { .vop_create = zfs_create }, 5606219089Spjd VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5607219089Spjd VOPNAME_LINK, { .vop_link = zfs_link }, 5608219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5609219089Spjd VOPNAME_MKDIR, { .error = zfs_inval }, 5610219089Spjd VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5611219089Spjd VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5612219089Spjd VOPNAME_SYMLINK, { .error = zfs_inval }, 5613219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5614219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5615219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5616219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5617219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5618219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5619219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5620219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5621219089Spjd NULL, NULL 5622219089Spjd}; 5623219089Spjd 5624219089Spjd/* 5625219089Spjd * Error vnode operations template 5626219089Spjd */ 5627219089Spjdvnodeops_t *zfs_evnodeops; 5628219089Spjdconst fs_operation_def_t zfs_evnodeops_template[] = { 5629219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5630219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5631219089Spjd NULL, NULL 5632219089Spjd}; 5633219089Spjd#endif /* sun */ 5634219089Spjd 5635219089Spjdstatic int 5636213673Spjdioflags(int ioflags) 5637213673Spjd{ 5638213673Spjd int flags = 0; 5639213673Spjd 5640213673Spjd if (ioflags & IO_APPEND) 5641213673Spjd flags |= FAPPEND; 5642213673Spjd if (ioflags & IO_NDELAY) 5643213673Spjd flags |= FNONBLOCK; 5644213673Spjd if (ioflags & IO_SYNC) 5645213673Spjd flags |= (FSYNC | FDSYNC | FRSYNC); 5646213673Spjd 5647213673Spjd return (flags); 5648213673Spjd} 5649213673Spjd 5650213673Spjdstatic int 5651213937Savgzfs_getpages(struct vnode *vp, vm_page_t *m, int count, int reqpage) 5652213937Savg{ 5653213937Savg znode_t *zp = VTOZ(vp); 5654213937Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5655213937Savg objset_t *os = zp->z_zfsvfs->z_os; 5656243517Savg vm_page_t mfirst, mlast, mreq; 5657213937Savg vm_object_t object; 5658213937Savg caddr_t va; 5659213937Savg struct sf_buf *sf; 5660243517Savg off_t startoff, endoff; 5661213937Savg int i, error; 5662243517Savg vm_pindex_t reqstart, reqend; 5663243517Savg int pcount, lsize, reqsize, size; 5664213937Savg 5665213937Savg ZFS_ENTER(zfsvfs); 5666213937Savg ZFS_VERIFY_ZP(zp); 5667213937Savg 5668243517Savg pcount = OFF_TO_IDX(round_page(count)); 5669213937Savg mreq = m[reqpage]; 5670213937Savg object = mreq->object; 5671213937Savg error = 0; 5672213937Savg 5673213937Savg KASSERT(vp->v_object == object, ("mismatching object")); 5674213937Savg 5675243517Savg if (pcount > 1 && zp->z_blksz > PAGESIZE) { 5676243517Savg startoff = rounddown(IDX_TO_OFF(mreq->pindex), zp->z_blksz); 5677243517Savg reqstart = OFF_TO_IDX(round_page(startoff)); 5678243517Savg if (reqstart < m[0]->pindex) 5679243517Savg reqstart = 0; 5680243517Savg else 5681243517Savg reqstart = reqstart - m[0]->pindex; 5682243517Savg endoff = roundup(IDX_TO_OFF(mreq->pindex) + PAGE_SIZE, 5683243517Savg zp->z_blksz); 5684243517Savg reqend = OFF_TO_IDX(trunc_page(endoff)) - 1; 5685243517Savg if (reqend > m[pcount - 1]->pindex) 5686243517Savg reqend = m[pcount - 1]->pindex; 5687243517Savg reqsize = reqend - m[reqstart]->pindex + 1; 5688243517Savg KASSERT(reqstart <= reqpage && reqpage < reqstart + reqsize, 5689243517Savg ("reqpage beyond [reqstart, reqstart + reqsize[ bounds")); 5690243517Savg } else { 5691243517Savg reqstart = reqpage; 5692243517Savg reqsize = 1; 5693243517Savg } 5694243517Savg mfirst = m[reqstart]; 5695243517Savg mlast = m[reqstart + reqsize - 1]; 5696243517Savg 5697248084Sattilio zfs_vmobject_wlock(object); 5698213937Savg 5699243517Savg for (i = 0; i < reqstart; i++) { 5700243517Savg vm_page_lock(m[i]); 5701243517Savg vm_page_free(m[i]); 5702243517Savg vm_page_unlock(m[i]); 5703213937Savg } 5704243517Savg for (i = reqstart + reqsize; i < pcount; i++) { 5705243517Savg vm_page_lock(m[i]); 5706243517Savg vm_page_free(m[i]); 5707243517Savg vm_page_unlock(m[i]); 5708243517Savg } 5709213937Savg 5710243517Savg if (mreq->valid && reqsize == 1) { 5711213937Savg if (mreq->valid != VM_PAGE_BITS_ALL) 5712213937Savg vm_page_zero_invalid(mreq, TRUE); 5713248084Sattilio zfs_vmobject_wunlock(object); 5714213937Savg ZFS_EXIT(zfsvfs); 5715248084Sattilio return (zfs_vm_pagerret_ok); 5716213937Savg } 5717213937Savg 5718213937Savg PCPU_INC(cnt.v_vnodein); 5719243517Savg PCPU_ADD(cnt.v_vnodepgsin, reqsize); 5720213937Savg 5721213937Savg if (IDX_TO_OFF(mreq->pindex) >= object->un_pager.vnp.vnp_size) { 5722243517Savg for (i = reqstart; i < reqstart + reqsize; i++) { 5723243517Savg if (i != reqpage) { 5724243517Savg vm_page_lock(m[i]); 5725243517Savg vm_page_free(m[i]); 5726243517Savg vm_page_unlock(m[i]); 5727243517Savg } 5728243517Savg } 5729248084Sattilio zfs_vmobject_wunlock(object); 5730213937Savg ZFS_EXIT(zfsvfs); 5731248084Sattilio return (zfs_vm_pagerret_bad); 5732213937Savg } 5733213937Savg 5734243517Savg lsize = PAGE_SIZE; 5735243517Savg if (IDX_TO_OFF(mlast->pindex) + lsize > object->un_pager.vnp.vnp_size) 5736243517Savg lsize = object->un_pager.vnp.vnp_size - IDX_TO_OFF(mlast->pindex); 5737213937Savg 5738248084Sattilio zfs_vmobject_wunlock(object); 5739243517Savg 5740243517Savg for (i = reqstart; i < reqstart + reqsize; i++) { 5741243517Savg size = PAGE_SIZE; 5742243517Savg if (i == (reqstart + reqsize - 1)) 5743243517Savg size = lsize; 5744243517Savg va = zfs_map_page(m[i], &sf); 5745243517Savg error = dmu_read(os, zp->z_id, IDX_TO_OFF(m[i]->pindex), 5746243517Savg size, va, DMU_READ_PREFETCH); 5747243517Savg if (size != PAGE_SIZE) 5748243517Savg bzero(va + size, PAGE_SIZE - size); 5749243517Savg zfs_unmap_page(sf); 5750243517Savg if (error != 0) 5751243517Savg break; 5752243517Savg } 5753243517Savg 5754248084Sattilio zfs_vmobject_wlock(object); 5755213937Savg 5756243517Savg for (i = reqstart; i < reqstart + reqsize; i++) { 5757243763Savg if (!error) 5758243763Savg m[i]->valid = VM_PAGE_BITS_ALL; 5759243517Savg KASSERT(m[i]->dirty == 0, ("zfs_getpages: page %p is dirty", m[i])); 5760243763Savg if (i != reqpage) 5761243763Savg vm_page_readahead_finish(m[i]); 5762243517Savg } 5763243517Savg 5764248084Sattilio zfs_vmobject_wunlock(object); 5765213937Savg 5766213937Savg ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 5767213937Savg ZFS_EXIT(zfsvfs); 5768248084Sattilio return (error ? zfs_vm_pagerret_error : zfs_vm_pagerret_ok); 5769213937Savg} 5770213937Savg 5771213937Savgstatic int 5772213937Savgzfs_freebsd_getpages(ap) 5773213937Savg struct vop_getpages_args /* { 5774213937Savg struct vnode *a_vp; 5775213937Savg vm_page_t *a_m; 5776213937Savg int a_count; 5777213937Savg int a_reqpage; 5778213937Savg vm_ooffset_t a_offset; 5779213937Savg } */ *ap; 5780213937Savg{ 5781213937Savg 5782213937Savg return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage)); 5783213937Savg} 5784213937Savg 5785213937Savgstatic int 5786243518Savgzfs_freebsd_bmap(ap) 5787243518Savg struct vop_bmap_args /* { 5788243518Savg struct vnode *a_vp; 5789243518Savg daddr_t a_bn; 5790243518Savg struct bufobj **a_bop; 5791243518Savg daddr_t *a_bnp; 5792243518Savg int *a_runp; 5793243518Savg int *a_runb; 5794243518Savg } */ *ap; 5795243518Savg{ 5796243518Savg 5797243518Savg if (ap->a_bop != NULL) 5798243518Savg *ap->a_bop = &ap->a_vp->v_bufobj; 5799243518Savg if (ap->a_bnp != NULL) 5800243518Savg *ap->a_bnp = ap->a_bn; 5801243518Savg if (ap->a_runp != NULL) 5802243518Savg *ap->a_runp = 0; 5803243518Savg if (ap->a_runb != NULL) 5804243518Savg *ap->a_runb = 0; 5805243518Savg 5806243518Savg return (0); 5807243518Savg} 5808243518Savg 5809243518Savgstatic int 5810168962Spjdzfs_freebsd_open(ap) 5811168962Spjd struct vop_open_args /* { 5812168962Spjd struct vnode *a_vp; 5813168962Spjd int a_mode; 5814168962Spjd struct ucred *a_cred; 5815168962Spjd struct thread *a_td; 5816168962Spjd } */ *ap; 5817168962Spjd{ 5818168962Spjd vnode_t *vp = ap->a_vp; 5819168962Spjd znode_t *zp = VTOZ(vp); 5820168962Spjd int error; 5821168962Spjd 5822185029Spjd error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); 5823168962Spjd if (error == 0) 5824219089Spjd vnode_create_vobject(vp, zp->z_size, ap->a_td); 5825168962Spjd return (error); 5826168962Spjd} 5827168962Spjd 5828168962Spjdstatic int 5829168962Spjdzfs_freebsd_close(ap) 5830168962Spjd struct vop_close_args /* { 5831168962Spjd struct vnode *a_vp; 5832168962Spjd int a_fflag; 5833168962Spjd struct ucred *a_cred; 5834168962Spjd struct thread *a_td; 5835168962Spjd } */ *ap; 5836168962Spjd{ 5837168962Spjd 5838242566Savg return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred, NULL)); 5839168962Spjd} 5840168962Spjd 5841168962Spjdstatic int 5842168962Spjdzfs_freebsd_ioctl(ap) 5843168962Spjd struct vop_ioctl_args /* { 5844168962Spjd struct vnode *a_vp; 5845168962Spjd u_long a_command; 5846168962Spjd caddr_t a_data; 5847168962Spjd int a_fflag; 5848168962Spjd struct ucred *cred; 5849168962Spjd struct thread *td; 5850168962Spjd } */ *ap; 5851168962Spjd{ 5852168962Spjd 5853168978Spjd return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 5854185029Spjd ap->a_fflag, ap->a_cred, NULL, NULL)); 5855168962Spjd} 5856168962Spjd 5857168962Spjdstatic int 5858168962Spjdzfs_freebsd_read(ap) 5859168962Spjd struct vop_read_args /* { 5860168962Spjd struct vnode *a_vp; 5861168962Spjd struct uio *a_uio; 5862168962Spjd int a_ioflag; 5863168962Spjd struct ucred *a_cred; 5864168962Spjd } */ *ap; 5865168962Spjd{ 5866168962Spjd 5867213673Spjd return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 5868213673Spjd ap->a_cred, NULL)); 5869168962Spjd} 5870168962Spjd 5871168962Spjdstatic int 5872168962Spjdzfs_freebsd_write(ap) 5873168962Spjd struct vop_write_args /* { 5874168962Spjd struct vnode *a_vp; 5875168962Spjd struct uio *a_uio; 5876168962Spjd int a_ioflag; 5877168962Spjd struct ucred *a_cred; 5878168962Spjd } */ *ap; 5879168962Spjd{ 5880168962Spjd 5881213673Spjd return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 5882213673Spjd ap->a_cred, NULL)); 5883168962Spjd} 5884168962Spjd 5885168962Spjdstatic int 5886168962Spjdzfs_freebsd_access(ap) 5887168962Spjd struct vop_access_args /* { 5888168962Spjd struct vnode *a_vp; 5889192689Strasz accmode_t a_accmode; 5890168962Spjd struct ucred *a_cred; 5891168962Spjd struct thread *a_td; 5892168962Spjd } */ *ap; 5893168962Spjd{ 5894212002Sjh vnode_t *vp = ap->a_vp; 5895212002Sjh znode_t *zp = VTOZ(vp); 5896198703Spjd accmode_t accmode; 5897198703Spjd int error = 0; 5898168962Spjd 5899185172Spjd /* 5900198703Spjd * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 5901185172Spjd */ 5902198703Spjd accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 5903198703Spjd if (accmode != 0) 5904198703Spjd error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL); 5905185172Spjd 5906198703Spjd /* 5907198703Spjd * VADMIN has to be handled by vaccess(). 5908198703Spjd */ 5909198703Spjd if (error == 0) { 5910198703Spjd accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 5911198703Spjd if (accmode != 0) { 5912219089Spjd error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 5913219089Spjd zp->z_gid, accmode, ap->a_cred, NULL); 5914198703Spjd } 5915185172Spjd } 5916185172Spjd 5917212002Sjh /* 5918212002Sjh * For VEXEC, ensure that at least one execute bit is set for 5919212002Sjh * non-directories. 5920212002Sjh */ 5921212002Sjh if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 5922219089Spjd (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 5923212002Sjh error = EACCES; 5924219089Spjd } 5925212002Sjh 5926198703Spjd return (error); 5927168962Spjd} 5928168962Spjd 5929168962Spjdstatic int 5930168962Spjdzfs_freebsd_lookup(ap) 5931168962Spjd struct vop_lookup_args /* { 5932168962Spjd struct vnode *a_dvp; 5933168962Spjd struct vnode **a_vpp; 5934168962Spjd struct componentname *a_cnp; 5935168962Spjd } */ *ap; 5936168962Spjd{ 5937168962Spjd struct componentname *cnp = ap->a_cnp; 5938168962Spjd char nm[NAME_MAX + 1]; 5939168962Spjd 5940168962Spjd ASSERT(cnp->cn_namelen < sizeof(nm)); 5941168962Spjd strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 5942168962Spjd 5943168962Spjd return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 5944185029Spjd cnp->cn_cred, cnp->cn_thread, 0)); 5945168962Spjd} 5946168962Spjd 5947168962Spjdstatic int 5948168962Spjdzfs_freebsd_create(ap) 5949168962Spjd struct vop_create_args /* { 5950168962Spjd struct vnode *a_dvp; 5951168962Spjd struct vnode **a_vpp; 5952168962Spjd struct componentname *a_cnp; 5953168962Spjd struct vattr *a_vap; 5954168962Spjd } */ *ap; 5955168962Spjd{ 5956168962Spjd struct componentname *cnp = ap->a_cnp; 5957168962Spjd vattr_t *vap = ap->a_vap; 5958168962Spjd int mode; 5959168962Spjd 5960168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5961168962Spjd 5962168962Spjd vattr_init_mask(vap); 5963168962Spjd mode = vap->va_mode & ALLPERMS; 5964168962Spjd 5965168962Spjd return (zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 5966185029Spjd ap->a_vpp, cnp->cn_cred, cnp->cn_thread)); 5967168962Spjd} 5968168962Spjd 5969168962Spjdstatic int 5970168962Spjdzfs_freebsd_remove(ap) 5971168962Spjd struct vop_remove_args /* { 5972168962Spjd struct vnode *a_dvp; 5973168962Spjd struct vnode *a_vp; 5974168962Spjd struct componentname *a_cnp; 5975168962Spjd } */ *ap; 5976168962Spjd{ 5977168962Spjd 5978168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 5979168962Spjd 5980168962Spjd return (zfs_remove(ap->a_dvp, ap->a_cnp->cn_nameptr, 5981185029Spjd ap->a_cnp->cn_cred, NULL, 0)); 5982168962Spjd} 5983168962Spjd 5984168962Spjdstatic int 5985168962Spjdzfs_freebsd_mkdir(ap) 5986168962Spjd struct vop_mkdir_args /* { 5987168962Spjd struct vnode *a_dvp; 5988168962Spjd struct vnode **a_vpp; 5989168962Spjd struct componentname *a_cnp; 5990168962Spjd struct vattr *a_vap; 5991168962Spjd } */ *ap; 5992168962Spjd{ 5993168962Spjd vattr_t *vap = ap->a_vap; 5994168962Spjd 5995168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 5996168962Spjd 5997168962Spjd vattr_init_mask(vap); 5998168962Spjd 5999168962Spjd return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 6000185029Spjd ap->a_cnp->cn_cred, NULL, 0, NULL)); 6001168962Spjd} 6002168962Spjd 6003168962Spjdstatic int 6004168962Spjdzfs_freebsd_rmdir(ap) 6005168962Spjd struct vop_rmdir_args /* { 6006168962Spjd struct vnode *a_dvp; 6007168962Spjd struct vnode *a_vp; 6008168962Spjd struct componentname *a_cnp; 6009168962Spjd } */ *ap; 6010168962Spjd{ 6011168962Spjd struct componentname *cnp = ap->a_cnp; 6012168962Spjd 6013168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6014168962Spjd 6015185029Spjd return (zfs_rmdir(ap->a_dvp, cnp->cn_nameptr, NULL, cnp->cn_cred, NULL, 0)); 6016168962Spjd} 6017168962Spjd 6018168962Spjdstatic int 6019168962Spjdzfs_freebsd_readdir(ap) 6020168962Spjd struct vop_readdir_args /* { 6021168962Spjd struct vnode *a_vp; 6022168962Spjd struct uio *a_uio; 6023168962Spjd struct ucred *a_cred; 6024168962Spjd int *a_eofflag; 6025168962Spjd int *a_ncookies; 6026168962Spjd u_long **a_cookies; 6027168962Spjd } */ *ap; 6028168962Spjd{ 6029168962Spjd 6030168962Spjd return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 6031168962Spjd ap->a_ncookies, ap->a_cookies)); 6032168962Spjd} 6033168962Spjd 6034168962Spjdstatic int 6035168962Spjdzfs_freebsd_fsync(ap) 6036168962Spjd struct vop_fsync_args /* { 6037168962Spjd struct vnode *a_vp; 6038168962Spjd int a_waitfor; 6039168962Spjd struct thread *a_td; 6040168962Spjd } */ *ap; 6041168962Spjd{ 6042168962Spjd 6043168962Spjd vop_stdfsync(ap); 6044185029Spjd return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); 6045168962Spjd} 6046168962Spjd 6047168962Spjdstatic int 6048168962Spjdzfs_freebsd_getattr(ap) 6049168962Spjd struct vop_getattr_args /* { 6050168962Spjd struct vnode *a_vp; 6051168962Spjd struct vattr *a_vap; 6052168962Spjd struct ucred *a_cred; 6053168962Spjd } */ *ap; 6054168962Spjd{ 6055185029Spjd vattr_t *vap = ap->a_vap; 6056185029Spjd xvattr_t xvap; 6057185029Spjd u_long fflags = 0; 6058185029Spjd int error; 6059168962Spjd 6060185029Spjd xva_init(&xvap); 6061185029Spjd xvap.xva_vattr = *vap; 6062185029Spjd xvap.xva_vattr.va_mask |= AT_XVATTR; 6063185029Spjd 6064185029Spjd /* Convert chflags into ZFS-type flags. */ 6065185029Spjd /* XXX: what about SF_SETTABLE?. */ 6066185029Spjd XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 6067185029Spjd XVA_SET_REQ(&xvap, XAT_APPENDONLY); 6068185029Spjd XVA_SET_REQ(&xvap, XAT_NOUNLINK); 6069185029Spjd XVA_SET_REQ(&xvap, XAT_NODUMP); 6070185029Spjd error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 6071185029Spjd if (error != 0) 6072185029Spjd return (error); 6073185029Spjd 6074185029Spjd /* Convert ZFS xattr into chflags. */ 6075185029Spjd#define FLAG_CHECK(fflag, xflag, xfield) do { \ 6076185029Spjd if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 6077185029Spjd fflags |= (fflag); \ 6078185029Spjd} while (0) 6079185029Spjd FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 6080185029Spjd xvap.xva_xoptattrs.xoa_immutable); 6081185029Spjd FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 6082185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 6083185029Spjd FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 6084185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 6085185029Spjd FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 6086185029Spjd xvap.xva_xoptattrs.xoa_nodump); 6087185029Spjd#undef FLAG_CHECK 6088185029Spjd *vap = xvap.xva_vattr; 6089185029Spjd vap->va_flags = fflags; 6090185029Spjd return (0); 6091168962Spjd} 6092168962Spjd 6093168962Spjdstatic int 6094168962Spjdzfs_freebsd_setattr(ap) 6095168962Spjd struct vop_setattr_args /* { 6096168962Spjd struct vnode *a_vp; 6097168962Spjd struct vattr *a_vap; 6098168962Spjd struct ucred *a_cred; 6099168962Spjd } */ *ap; 6100168962Spjd{ 6101185172Spjd vnode_t *vp = ap->a_vp; 6102168962Spjd vattr_t *vap = ap->a_vap; 6103185172Spjd cred_t *cred = ap->a_cred; 6104185029Spjd xvattr_t xvap; 6105185029Spjd u_long fflags; 6106185029Spjd uint64_t zflags; 6107168962Spjd 6108168962Spjd vattr_init_mask(vap); 6109170044Spjd vap->va_mask &= ~AT_NOSET; 6110168962Spjd 6111185029Spjd xva_init(&xvap); 6112185029Spjd xvap.xva_vattr = *vap; 6113185029Spjd 6114219089Spjd zflags = VTOZ(vp)->z_pflags; 6115185172Spjd 6116185029Spjd if (vap->va_flags != VNOVAL) { 6117197683Sdelphij zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 6118185172Spjd int error; 6119185172Spjd 6120197683Sdelphij if (zfsvfs->z_use_fuids == B_FALSE) 6121197683Sdelphij return (EOPNOTSUPP); 6122197683Sdelphij 6123185029Spjd fflags = vap->va_flags; 6124185029Spjd if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_NODUMP)) != 0) 6125185029Spjd return (EOPNOTSUPP); 6126185172Spjd /* 6127185172Spjd * Unprivileged processes are not permitted to unset system 6128185172Spjd * flags, or modify flags if any system flags are set. 6129185172Spjd * Privileged non-jail processes may not modify system flags 6130185172Spjd * if securelevel > 0 and any existing system flags are set. 6131185172Spjd * Privileged jail processes behave like privileged non-jail 6132185172Spjd * processes if the security.jail.chflags_allowed sysctl is 6133185172Spjd * is non-zero; otherwise, they behave like unprivileged 6134185172Spjd * processes. 6135185172Spjd */ 6136197861Spjd if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 6137197861Spjd priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) { 6138185172Spjd if (zflags & 6139185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 6140185172Spjd error = securelevel_gt(cred, 0); 6141197861Spjd if (error != 0) 6142185172Spjd return (error); 6143185172Spjd } 6144185172Spjd } else { 6145197861Spjd /* 6146197861Spjd * Callers may only modify the file flags on objects they 6147197861Spjd * have VADMIN rights for. 6148197861Spjd */ 6149197861Spjd if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0) 6150197861Spjd return (error); 6151185172Spjd if (zflags & 6152185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 6153185172Spjd return (EPERM); 6154185172Spjd } 6155185172Spjd if (fflags & 6156185172Spjd (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 6157185172Spjd return (EPERM); 6158185172Spjd } 6159185172Spjd } 6160185029Spjd 6161185029Spjd#define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 6162185029Spjd if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 6163185029Spjd ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 6164185029Spjd XVA_SET_REQ(&xvap, (xflag)); \ 6165185029Spjd (xfield) = ((fflags & (fflag)) != 0); \ 6166185029Spjd } \ 6167185029Spjd} while (0) 6168185029Spjd /* Convert chflags into ZFS-type flags. */ 6169185029Spjd /* XXX: what about SF_SETTABLE?. */ 6170185029Spjd FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 6171185029Spjd xvap.xva_xoptattrs.xoa_immutable); 6172185029Spjd FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 6173185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 6174185029Spjd FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 6175185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 6176185029Spjd FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 6177185172Spjd xvap.xva_xoptattrs.xoa_nodump); 6178185029Spjd#undef FLAG_CHANGE 6179185029Spjd } 6180185172Spjd return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL)); 6181168962Spjd} 6182168962Spjd 6183168962Spjdstatic int 6184168962Spjdzfs_freebsd_rename(ap) 6185168962Spjd struct vop_rename_args /* { 6186168962Spjd struct vnode *a_fdvp; 6187168962Spjd struct vnode *a_fvp; 6188168962Spjd struct componentname *a_fcnp; 6189168962Spjd struct vnode *a_tdvp; 6190168962Spjd struct vnode *a_tvp; 6191168962Spjd struct componentname *a_tcnp; 6192168962Spjd } */ *ap; 6193168962Spjd{ 6194168962Spjd vnode_t *fdvp = ap->a_fdvp; 6195168962Spjd vnode_t *fvp = ap->a_fvp; 6196168962Spjd vnode_t *tdvp = ap->a_tdvp; 6197168962Spjd vnode_t *tvp = ap->a_tvp; 6198168962Spjd int error; 6199168962Spjd 6200192237Skmacy ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 6201192237Skmacy ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 6202168962Spjd 6203168962Spjd error = zfs_rename(fdvp, ap->a_fcnp->cn_nameptr, tdvp, 6204185029Spjd ap->a_tcnp->cn_nameptr, ap->a_fcnp->cn_cred, NULL, 0); 6205168962Spjd 6206168962Spjd if (tdvp == tvp) 6207168962Spjd VN_RELE(tdvp); 6208168962Spjd else 6209168962Spjd VN_URELE(tdvp); 6210168962Spjd if (tvp) 6211168962Spjd VN_URELE(tvp); 6212168962Spjd VN_RELE(fdvp); 6213168962Spjd VN_RELE(fvp); 6214168962Spjd 6215168962Spjd return (error); 6216168962Spjd} 6217168962Spjd 6218168962Spjdstatic int 6219168962Spjdzfs_freebsd_symlink(ap) 6220168962Spjd struct vop_symlink_args /* { 6221168962Spjd struct vnode *a_dvp; 6222168962Spjd struct vnode **a_vpp; 6223168962Spjd struct componentname *a_cnp; 6224168962Spjd struct vattr *a_vap; 6225168962Spjd char *a_target; 6226168962Spjd } */ *ap; 6227168962Spjd{ 6228168962Spjd struct componentname *cnp = ap->a_cnp; 6229168962Spjd vattr_t *vap = ap->a_vap; 6230168962Spjd 6231168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6232168962Spjd 6233168962Spjd vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 6234168962Spjd vattr_init_mask(vap); 6235168962Spjd 6236168962Spjd return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 6237168962Spjd ap->a_target, cnp->cn_cred, cnp->cn_thread)); 6238168962Spjd} 6239168962Spjd 6240168962Spjdstatic int 6241168962Spjdzfs_freebsd_readlink(ap) 6242168962Spjd struct vop_readlink_args /* { 6243168962Spjd struct vnode *a_vp; 6244168962Spjd struct uio *a_uio; 6245168962Spjd struct ucred *a_cred; 6246168962Spjd } */ *ap; 6247168962Spjd{ 6248168962Spjd 6249185029Spjd return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 6250168962Spjd} 6251168962Spjd 6252168962Spjdstatic int 6253168962Spjdzfs_freebsd_link(ap) 6254168962Spjd struct vop_link_args /* { 6255168962Spjd struct vnode *a_tdvp; 6256168962Spjd struct vnode *a_vp; 6257168962Spjd struct componentname *a_cnp; 6258168962Spjd } */ *ap; 6259168962Spjd{ 6260168962Spjd struct componentname *cnp = ap->a_cnp; 6261168962Spjd 6262168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6263168962Spjd 6264185029Spjd return (zfs_link(ap->a_tdvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); 6265168962Spjd} 6266168962Spjd 6267168962Spjdstatic int 6268168962Spjdzfs_freebsd_inactive(ap) 6269169170Spjd struct vop_inactive_args /* { 6270169170Spjd struct vnode *a_vp; 6271169170Spjd struct thread *a_td; 6272169170Spjd } */ *ap; 6273168962Spjd{ 6274168962Spjd vnode_t *vp = ap->a_vp; 6275168962Spjd 6276185029Spjd zfs_inactive(vp, ap->a_td->td_ucred, NULL); 6277168962Spjd return (0); 6278168962Spjd} 6279168962Spjd 6280168962Spjdstatic int 6281168962Spjdzfs_freebsd_reclaim(ap) 6282168962Spjd struct vop_reclaim_args /* { 6283168962Spjd struct vnode *a_vp; 6284168962Spjd struct thread *a_td; 6285168962Spjd } */ *ap; 6286168962Spjd{ 6287169170Spjd vnode_t *vp = ap->a_vp; 6288168962Spjd znode_t *zp = VTOZ(vp); 6289197133Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6290168962Spjd 6291169025Spjd ASSERT(zp != NULL); 6292169025Spjd 6293243520Savg /* Destroy the vm object and flush associated pages. */ 6294243520Savg vnode_destroy_vobject(vp); 6295243520Savg 6296168962Spjd /* 6297243520Savg * z_teardown_inactive_lock protects from a race with 6298243520Savg * zfs_znode_dmu_fini in zfsvfs_teardown during 6299243520Savg * force unmount. 6300168962Spjd */ 6301243520Savg rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 6302243520Savg if (zp->z_sa_hdl == NULL) 6303196301Spjd zfs_znode_free(zp); 6304243520Savg else 6305243520Savg zfs_zinactive(zp); 6306243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 6307185029Spjd 6308168962Spjd vp->v_data = NULL; 6309168962Spjd return (0); 6310168962Spjd} 6311168962Spjd 6312168962Spjdstatic int 6313168962Spjdzfs_freebsd_fid(ap) 6314168962Spjd struct vop_fid_args /* { 6315168962Spjd struct vnode *a_vp; 6316168962Spjd struct fid *a_fid; 6317168962Spjd } */ *ap; 6318168962Spjd{ 6319168962Spjd 6320185029Spjd return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 6321168962Spjd} 6322168962Spjd 6323168962Spjdstatic int 6324168962Spjdzfs_freebsd_pathconf(ap) 6325168962Spjd struct vop_pathconf_args /* { 6326168962Spjd struct vnode *a_vp; 6327168962Spjd int a_name; 6328168962Spjd register_t *a_retval; 6329168962Spjd } */ *ap; 6330168962Spjd{ 6331168962Spjd ulong_t val; 6332168962Spjd int error; 6333168962Spjd 6334185029Spjd error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); 6335168962Spjd if (error == 0) 6336168962Spjd *ap->a_retval = val; 6337168962Spjd else if (error == EOPNOTSUPP) 6338168962Spjd error = vop_stdpathconf(ap); 6339168962Spjd return (error); 6340168962Spjd} 6341168962Spjd 6342196949Straszstatic int 6343196949Straszzfs_freebsd_fifo_pathconf(ap) 6344196949Strasz struct vop_pathconf_args /* { 6345196949Strasz struct vnode *a_vp; 6346196949Strasz int a_name; 6347196949Strasz register_t *a_retval; 6348196949Strasz } */ *ap; 6349196949Strasz{ 6350196949Strasz 6351196949Strasz switch (ap->a_name) { 6352196949Strasz case _PC_ACL_EXTENDED: 6353196949Strasz case _PC_ACL_NFS4: 6354196949Strasz case _PC_ACL_PATH_MAX: 6355196949Strasz case _PC_MAC_PRESENT: 6356196949Strasz return (zfs_freebsd_pathconf(ap)); 6357196949Strasz default: 6358196949Strasz return (fifo_specops.vop_pathconf(ap)); 6359196949Strasz } 6360196949Strasz} 6361196949Strasz 6362185029Spjd/* 6363185029Spjd * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 6364185029Spjd * extended attribute name: 6365185029Spjd * 6366185029Spjd * NAMESPACE PREFIX 6367185029Spjd * system freebsd:system: 6368185029Spjd * user (none, can be used to access ZFS fsattr(5) attributes 6369185029Spjd * created on Solaris) 6370185029Spjd */ 6371185029Spjdstatic int 6372185029Spjdzfs_create_attrname(int attrnamespace, const char *name, char *attrname, 6373185029Spjd size_t size) 6374185029Spjd{ 6375185029Spjd const char *namespace, *prefix, *suffix; 6376185029Spjd 6377185029Spjd /* We don't allow '/' character in attribute name. */ 6378185029Spjd if (strchr(name, '/') != NULL) 6379185029Spjd return (EINVAL); 6380185029Spjd /* We don't allow attribute names that start with "freebsd:" string. */ 6381185029Spjd if (strncmp(name, "freebsd:", 8) == 0) 6382185029Spjd return (EINVAL); 6383185029Spjd 6384185029Spjd bzero(attrname, size); 6385185029Spjd 6386185029Spjd switch (attrnamespace) { 6387185029Spjd case EXTATTR_NAMESPACE_USER: 6388185029Spjd#if 0 6389185029Spjd prefix = "freebsd:"; 6390185029Spjd namespace = EXTATTR_NAMESPACE_USER_STRING; 6391185029Spjd suffix = ":"; 6392185029Spjd#else 6393185029Spjd /* 6394185029Spjd * This is the default namespace by which we can access all 6395185029Spjd * attributes created on Solaris. 6396185029Spjd */ 6397185029Spjd prefix = namespace = suffix = ""; 6398185029Spjd#endif 6399185029Spjd break; 6400185029Spjd case EXTATTR_NAMESPACE_SYSTEM: 6401185029Spjd prefix = "freebsd:"; 6402185029Spjd namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 6403185029Spjd suffix = ":"; 6404185029Spjd break; 6405185029Spjd case EXTATTR_NAMESPACE_EMPTY: 6406185029Spjd default: 6407185029Spjd return (EINVAL); 6408185029Spjd } 6409185029Spjd if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 6410185029Spjd name) >= size) { 6411185029Spjd return (ENAMETOOLONG); 6412185029Spjd } 6413185029Spjd return (0); 6414185029Spjd} 6415185029Spjd 6416185029Spjd/* 6417185029Spjd * Vnode operating to retrieve a named extended attribute. 6418185029Spjd */ 6419185029Spjdstatic int 6420185029Spjdzfs_getextattr(struct vop_getextattr_args *ap) 6421185029Spjd/* 6422185029Spjdvop_getextattr { 6423185029Spjd IN struct vnode *a_vp; 6424185029Spjd IN int a_attrnamespace; 6425185029Spjd IN const char *a_name; 6426185029Spjd INOUT struct uio *a_uio; 6427185029Spjd OUT size_t *a_size; 6428185029Spjd IN struct ucred *a_cred; 6429185029Spjd IN struct thread *a_td; 6430185029Spjd}; 6431185029Spjd*/ 6432185029Spjd{ 6433185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6434185029Spjd struct thread *td = ap->a_td; 6435185029Spjd struct nameidata nd; 6436185029Spjd char attrname[255]; 6437185029Spjd struct vattr va; 6438185029Spjd vnode_t *xvp = NULL, *vp; 6439185029Spjd int error, flags; 6440185029Spjd 6441195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6442195785Strasz ap->a_cred, ap->a_td, VREAD); 6443195785Strasz if (error != 0) 6444195785Strasz return (error); 6445195785Strasz 6446185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6447185029Spjd sizeof(attrname)); 6448185029Spjd if (error != 0) 6449185029Spjd return (error); 6450185029Spjd 6451185029Spjd ZFS_ENTER(zfsvfs); 6452185029Spjd 6453185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6454185029Spjd LOOKUP_XATTR); 6455185029Spjd if (error != 0) { 6456185029Spjd ZFS_EXIT(zfsvfs); 6457185029Spjd return (error); 6458185029Spjd } 6459185029Spjd 6460185029Spjd flags = FREAD; 6461241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 6462185029Spjd xvp, td); 6463194586Skib error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL); 6464185029Spjd vp = nd.ni_vp; 6465185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6466185029Spjd if (error != 0) { 6467196303Spjd ZFS_EXIT(zfsvfs); 6468195785Strasz if (error == ENOENT) 6469195785Strasz error = ENOATTR; 6470185029Spjd return (error); 6471185029Spjd } 6472185029Spjd 6473185029Spjd if (ap->a_size != NULL) { 6474185029Spjd error = VOP_GETATTR(vp, &va, ap->a_cred); 6475185029Spjd if (error == 0) 6476185029Spjd *ap->a_size = (size_t)va.va_size; 6477185029Spjd } else if (ap->a_uio != NULL) 6478224605Smm error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 6479185029Spjd 6480185029Spjd VOP_UNLOCK(vp, 0); 6481185029Spjd vn_close(vp, flags, ap->a_cred, td); 6482185029Spjd ZFS_EXIT(zfsvfs); 6483185029Spjd 6484185029Spjd return (error); 6485185029Spjd} 6486185029Spjd 6487185029Spjd/* 6488185029Spjd * Vnode operation to remove a named attribute. 6489185029Spjd */ 6490185029Spjdint 6491185029Spjdzfs_deleteextattr(struct vop_deleteextattr_args *ap) 6492185029Spjd/* 6493185029Spjdvop_deleteextattr { 6494185029Spjd IN struct vnode *a_vp; 6495185029Spjd IN int a_attrnamespace; 6496185029Spjd IN const char *a_name; 6497185029Spjd IN struct ucred *a_cred; 6498185029Spjd IN struct thread *a_td; 6499185029Spjd}; 6500185029Spjd*/ 6501185029Spjd{ 6502185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6503185029Spjd struct thread *td = ap->a_td; 6504185029Spjd struct nameidata nd; 6505185029Spjd char attrname[255]; 6506185029Spjd struct vattr va; 6507185029Spjd vnode_t *xvp = NULL, *vp; 6508185029Spjd int error, flags; 6509185029Spjd 6510195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6511195785Strasz ap->a_cred, ap->a_td, VWRITE); 6512195785Strasz if (error != 0) 6513195785Strasz return (error); 6514195785Strasz 6515185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6516185029Spjd sizeof(attrname)); 6517185029Spjd if (error != 0) 6518185029Spjd return (error); 6519185029Spjd 6520185029Spjd ZFS_ENTER(zfsvfs); 6521185029Spjd 6522185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6523185029Spjd LOOKUP_XATTR); 6524185029Spjd if (error != 0) { 6525185029Spjd ZFS_EXIT(zfsvfs); 6526185029Spjd return (error); 6527185029Spjd } 6528185029Spjd 6529241896Skib NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 6530185029Spjd UIO_SYSSPACE, attrname, xvp, td); 6531185029Spjd error = namei(&nd); 6532185029Spjd vp = nd.ni_vp; 6533185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6534185029Spjd if (error != 0) { 6535196303Spjd ZFS_EXIT(zfsvfs); 6536195785Strasz if (error == ENOENT) 6537195785Strasz error = ENOATTR; 6538185029Spjd return (error); 6539185029Spjd } 6540185029Spjd error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 6541185029Spjd 6542185029Spjd vput(nd.ni_dvp); 6543185029Spjd if (vp == nd.ni_dvp) 6544185029Spjd vrele(vp); 6545185029Spjd else 6546185029Spjd vput(vp); 6547185029Spjd ZFS_EXIT(zfsvfs); 6548185029Spjd 6549185029Spjd return (error); 6550185029Spjd} 6551185029Spjd 6552185029Spjd/* 6553185029Spjd * Vnode operation to set a named attribute. 6554185029Spjd */ 6555185029Spjdstatic int 6556185029Spjdzfs_setextattr(struct vop_setextattr_args *ap) 6557185029Spjd/* 6558185029Spjdvop_setextattr { 6559185029Spjd IN struct vnode *a_vp; 6560185029Spjd IN int a_attrnamespace; 6561185029Spjd IN const char *a_name; 6562185029Spjd INOUT struct uio *a_uio; 6563185029Spjd IN struct ucred *a_cred; 6564185029Spjd IN struct thread *a_td; 6565185029Spjd}; 6566185029Spjd*/ 6567185029Spjd{ 6568185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6569185029Spjd struct thread *td = ap->a_td; 6570185029Spjd struct nameidata nd; 6571185029Spjd char attrname[255]; 6572185029Spjd struct vattr va; 6573185029Spjd vnode_t *xvp = NULL, *vp; 6574185029Spjd int error, flags; 6575185029Spjd 6576195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6577195785Strasz ap->a_cred, ap->a_td, VWRITE); 6578195785Strasz if (error != 0) 6579195785Strasz return (error); 6580195785Strasz 6581185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6582185029Spjd sizeof(attrname)); 6583185029Spjd if (error != 0) 6584185029Spjd return (error); 6585185029Spjd 6586185029Spjd ZFS_ENTER(zfsvfs); 6587185029Spjd 6588185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6589195785Strasz LOOKUP_XATTR | CREATE_XATTR_DIR); 6590185029Spjd if (error != 0) { 6591185029Spjd ZFS_EXIT(zfsvfs); 6592185029Spjd return (error); 6593185029Spjd } 6594185029Spjd 6595185029Spjd flags = FFLAGS(O_WRONLY | O_CREAT); 6596241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 6597185029Spjd xvp, td); 6598194586Skib error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL); 6599185029Spjd vp = nd.ni_vp; 6600185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6601185029Spjd if (error != 0) { 6602185029Spjd ZFS_EXIT(zfsvfs); 6603185029Spjd return (error); 6604185029Spjd } 6605185029Spjd 6606185029Spjd VATTR_NULL(&va); 6607185029Spjd va.va_size = 0; 6608185029Spjd error = VOP_SETATTR(vp, &va, ap->a_cred); 6609185029Spjd if (error == 0) 6610185029Spjd VOP_WRITE(vp, ap->a_uio, IO_UNIT | IO_SYNC, ap->a_cred); 6611185029Spjd 6612185029Spjd VOP_UNLOCK(vp, 0); 6613185029Spjd vn_close(vp, flags, ap->a_cred, td); 6614185029Spjd ZFS_EXIT(zfsvfs); 6615185029Spjd 6616185029Spjd return (error); 6617185029Spjd} 6618185029Spjd 6619185029Spjd/* 6620185029Spjd * Vnode operation to retrieve extended attributes on a vnode. 6621185029Spjd */ 6622185029Spjdstatic int 6623185029Spjdzfs_listextattr(struct vop_listextattr_args *ap) 6624185029Spjd/* 6625185029Spjdvop_listextattr { 6626185029Spjd IN struct vnode *a_vp; 6627185029Spjd IN int a_attrnamespace; 6628185029Spjd INOUT struct uio *a_uio; 6629185029Spjd OUT size_t *a_size; 6630185029Spjd IN struct ucred *a_cred; 6631185029Spjd IN struct thread *a_td; 6632185029Spjd}; 6633185029Spjd*/ 6634185029Spjd{ 6635185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6636185029Spjd struct thread *td = ap->a_td; 6637185029Spjd struct nameidata nd; 6638185029Spjd char attrprefix[16]; 6639185029Spjd u_char dirbuf[sizeof(struct dirent)]; 6640185029Spjd struct dirent *dp; 6641185029Spjd struct iovec aiov; 6642185029Spjd struct uio auio, *uio = ap->a_uio; 6643185029Spjd size_t *sizep = ap->a_size; 6644185029Spjd size_t plen; 6645185029Spjd vnode_t *xvp = NULL, *vp; 6646185029Spjd int done, error, eof, pos; 6647185029Spjd 6648195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6649195785Strasz ap->a_cred, ap->a_td, VREAD); 6650196303Spjd if (error != 0) 6651195785Strasz return (error); 6652195785Strasz 6653185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 6654185029Spjd sizeof(attrprefix)); 6655185029Spjd if (error != 0) 6656185029Spjd return (error); 6657185029Spjd plen = strlen(attrprefix); 6658185029Spjd 6659185029Spjd ZFS_ENTER(zfsvfs); 6660185029Spjd 6661195822Strasz if (sizep != NULL) 6662195822Strasz *sizep = 0; 6663195822Strasz 6664185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6665185029Spjd LOOKUP_XATTR); 6666185029Spjd if (error != 0) { 6667196303Spjd ZFS_EXIT(zfsvfs); 6668195785Strasz /* 6669195785Strasz * ENOATTR means that the EA directory does not yet exist, 6670195785Strasz * i.e. there are no extended attributes there. 6671195785Strasz */ 6672195785Strasz if (error == ENOATTR) 6673195785Strasz error = 0; 6674185029Spjd return (error); 6675185029Spjd } 6676185029Spjd 6677241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 6678188588Sjhb UIO_SYSSPACE, ".", xvp, td); 6679185029Spjd error = namei(&nd); 6680185029Spjd vp = nd.ni_vp; 6681185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6682185029Spjd if (error != 0) { 6683185029Spjd ZFS_EXIT(zfsvfs); 6684185029Spjd return (error); 6685185029Spjd } 6686185029Spjd 6687185029Spjd auio.uio_iov = &aiov; 6688185029Spjd auio.uio_iovcnt = 1; 6689185029Spjd auio.uio_segflg = UIO_SYSSPACE; 6690185029Spjd auio.uio_td = td; 6691185029Spjd auio.uio_rw = UIO_READ; 6692185029Spjd auio.uio_offset = 0; 6693185029Spjd 6694185029Spjd do { 6695185029Spjd u_char nlen; 6696185029Spjd 6697185029Spjd aiov.iov_base = (void *)dirbuf; 6698185029Spjd aiov.iov_len = sizeof(dirbuf); 6699185029Spjd auio.uio_resid = sizeof(dirbuf); 6700185029Spjd error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 6701185029Spjd done = sizeof(dirbuf) - auio.uio_resid; 6702185029Spjd if (error != 0) 6703185029Spjd break; 6704185029Spjd for (pos = 0; pos < done;) { 6705185029Spjd dp = (struct dirent *)(dirbuf + pos); 6706185029Spjd pos += dp->d_reclen; 6707185029Spjd /* 6708185029Spjd * XXX: Temporarily we also accept DT_UNKNOWN, as this 6709185029Spjd * is what we get when attribute was created on Solaris. 6710185029Spjd */ 6711185029Spjd if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 6712185029Spjd continue; 6713185029Spjd if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) 6714185029Spjd continue; 6715185029Spjd else if (strncmp(dp->d_name, attrprefix, plen) != 0) 6716185029Spjd continue; 6717185029Spjd nlen = dp->d_namlen - plen; 6718185029Spjd if (sizep != NULL) 6719185029Spjd *sizep += 1 + nlen; 6720185029Spjd else if (uio != NULL) { 6721185029Spjd /* 6722185029Spjd * Format of extattr name entry is one byte for 6723185029Spjd * length and the rest for name. 6724185029Spjd */ 6725185029Spjd error = uiomove(&nlen, 1, uio->uio_rw, uio); 6726185029Spjd if (error == 0) { 6727185029Spjd error = uiomove(dp->d_name + plen, nlen, 6728185029Spjd uio->uio_rw, uio); 6729185029Spjd } 6730185029Spjd if (error != 0) 6731185029Spjd break; 6732185029Spjd } 6733185029Spjd } 6734185029Spjd } while (!eof && error == 0); 6735185029Spjd 6736185029Spjd vput(vp); 6737185029Spjd ZFS_EXIT(zfsvfs); 6738185029Spjd 6739185029Spjd return (error); 6740185029Spjd} 6741185029Spjd 6742192800Straszint 6743192800Straszzfs_freebsd_getacl(ap) 6744192800Strasz struct vop_getacl_args /* { 6745192800Strasz struct vnode *vp; 6746192800Strasz acl_type_t type; 6747192800Strasz struct acl *aclp; 6748192800Strasz struct ucred *cred; 6749192800Strasz struct thread *td; 6750192800Strasz } */ *ap; 6751192800Strasz{ 6752192800Strasz int error; 6753192800Strasz vsecattr_t vsecattr; 6754192800Strasz 6755192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 6756197435Strasz return (EINVAL); 6757192800Strasz 6758192800Strasz vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 6759192800Strasz if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL)) 6760192800Strasz return (error); 6761192800Strasz 6762192800Strasz error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt); 6763196303Spjd if (vsecattr.vsa_aclentp != NULL) 6764196303Spjd kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 6765192800Strasz 6766196303Spjd return (error); 6767192800Strasz} 6768192800Strasz 6769192800Straszint 6770192800Straszzfs_freebsd_setacl(ap) 6771192800Strasz struct vop_setacl_args /* { 6772192800Strasz struct vnode *vp; 6773192800Strasz acl_type_t type; 6774192800Strasz struct acl *aclp; 6775192800Strasz struct ucred *cred; 6776192800Strasz struct thread *td; 6777192800Strasz } */ *ap; 6778192800Strasz{ 6779192800Strasz int error; 6780192800Strasz vsecattr_t vsecattr; 6781192800Strasz int aclbsize; /* size of acl list in bytes */ 6782192800Strasz aclent_t *aaclp; 6783192800Strasz 6784192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 6785197435Strasz return (EINVAL); 6786192800Strasz 6787192800Strasz if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 6788192800Strasz return (EINVAL); 6789192800Strasz 6790192800Strasz /* 6791196949Strasz * With NFSv4 ACLs, chmod(2) may need to add additional entries, 6792192800Strasz * splitting every entry into two and appending "canonical six" 6793192800Strasz * entries at the end. Don't allow for setting an ACL that would 6794192800Strasz * cause chmod(2) to run out of ACL entries. 6795192800Strasz */ 6796192800Strasz if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 6797192800Strasz return (ENOSPC); 6798192800Strasz 6799208030Strasz error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 6800208030Strasz if (error != 0) 6801208030Strasz return (error); 6802208030Strasz 6803192800Strasz vsecattr.vsa_mask = VSA_ACE; 6804192800Strasz aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t); 6805192800Strasz vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 6806192800Strasz aaclp = vsecattr.vsa_aclentp; 6807192800Strasz vsecattr.vsa_aclentsz = aclbsize; 6808192800Strasz 6809192800Strasz aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 6810192800Strasz error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL); 6811192800Strasz kmem_free(aaclp, aclbsize); 6812192800Strasz 6813192800Strasz return (error); 6814192800Strasz} 6815192800Strasz 6816192800Straszint 6817192800Straszzfs_freebsd_aclcheck(ap) 6818192800Strasz struct vop_aclcheck_args /* { 6819192800Strasz struct vnode *vp; 6820192800Strasz acl_type_t type; 6821192800Strasz struct acl *aclp; 6822192800Strasz struct ucred *cred; 6823192800Strasz struct thread *td; 6824192800Strasz } */ *ap; 6825192800Strasz{ 6826192800Strasz 6827192800Strasz return (EOPNOTSUPP); 6828192800Strasz} 6829192800Strasz 6830168404Spjdstruct vop_vector zfs_vnodeops; 6831168404Spjdstruct vop_vector zfs_fifoops; 6832209962Smmstruct vop_vector zfs_shareops; 6833168404Spjd 6834168404Spjdstruct vop_vector zfs_vnodeops = { 6835185029Spjd .vop_default = &default_vnodeops, 6836185029Spjd .vop_inactive = zfs_freebsd_inactive, 6837185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 6838185029Spjd .vop_access = zfs_freebsd_access, 6839168404Spjd#ifdef FREEBSD_NAMECACHE 6840185029Spjd .vop_lookup = vfs_cache_lookup, 6841185029Spjd .vop_cachedlookup = zfs_freebsd_lookup, 6842168404Spjd#else 6843185029Spjd .vop_lookup = zfs_freebsd_lookup, 6844168404Spjd#endif 6845185029Spjd .vop_getattr = zfs_freebsd_getattr, 6846185029Spjd .vop_setattr = zfs_freebsd_setattr, 6847185029Spjd .vop_create = zfs_freebsd_create, 6848185029Spjd .vop_mknod = zfs_freebsd_create, 6849185029Spjd .vop_mkdir = zfs_freebsd_mkdir, 6850185029Spjd .vop_readdir = zfs_freebsd_readdir, 6851185029Spjd .vop_fsync = zfs_freebsd_fsync, 6852185029Spjd .vop_open = zfs_freebsd_open, 6853185029Spjd .vop_close = zfs_freebsd_close, 6854185029Spjd .vop_rmdir = zfs_freebsd_rmdir, 6855185029Spjd .vop_ioctl = zfs_freebsd_ioctl, 6856185029Spjd .vop_link = zfs_freebsd_link, 6857185029Spjd .vop_symlink = zfs_freebsd_symlink, 6858185029Spjd .vop_readlink = zfs_freebsd_readlink, 6859185029Spjd .vop_read = zfs_freebsd_read, 6860185029Spjd .vop_write = zfs_freebsd_write, 6861185029Spjd .vop_remove = zfs_freebsd_remove, 6862185029Spjd .vop_rename = zfs_freebsd_rename, 6863185029Spjd .vop_pathconf = zfs_freebsd_pathconf, 6864243518Savg .vop_bmap = zfs_freebsd_bmap, 6865185029Spjd .vop_fid = zfs_freebsd_fid, 6866185029Spjd .vop_getextattr = zfs_getextattr, 6867185029Spjd .vop_deleteextattr = zfs_deleteextattr, 6868185029Spjd .vop_setextattr = zfs_setextattr, 6869185029Spjd .vop_listextattr = zfs_listextattr, 6870192800Strasz .vop_getacl = zfs_freebsd_getacl, 6871192800Strasz .vop_setacl = zfs_freebsd_setacl, 6872192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 6873213937Savg .vop_getpages = zfs_freebsd_getpages, 6874168404Spjd}; 6875168404Spjd 6876169170Spjdstruct vop_vector zfs_fifoops = { 6877185029Spjd .vop_default = &fifo_specops, 6878200162Skib .vop_fsync = zfs_freebsd_fsync, 6879185029Spjd .vop_access = zfs_freebsd_access, 6880185029Spjd .vop_getattr = zfs_freebsd_getattr, 6881185029Spjd .vop_inactive = zfs_freebsd_inactive, 6882185029Spjd .vop_read = VOP_PANIC, 6883185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 6884185029Spjd .vop_setattr = zfs_freebsd_setattr, 6885185029Spjd .vop_write = VOP_PANIC, 6886196949Strasz .vop_pathconf = zfs_freebsd_fifo_pathconf, 6887185029Spjd .vop_fid = zfs_freebsd_fid, 6888192800Strasz .vop_getacl = zfs_freebsd_getacl, 6889192800Strasz .vop_setacl = zfs_freebsd_setacl, 6890192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 6891168404Spjd}; 6892209962Smm 6893209962Smm/* 6894209962Smm * special share hidden files vnode operations template 6895209962Smm */ 6896209962Smmstruct vop_vector zfs_shareops = { 6897209962Smm .vop_default = &default_vnodeops, 6898209962Smm .vop_access = zfs_freebsd_access, 6899209962Smm .vop_inactive = zfs_freebsd_inactive, 6900209962Smm .vop_reclaim = zfs_freebsd_reclaim, 6901209962Smm .vop_fid = zfs_freebsd_fid, 6902209962Smm .vop_pathconf = zfs_freebsd_pathconf, 6903209962Smm}; 6904