zfs_vnops.c revision 253953
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22212694Smm * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23249195Smm * Copyright (c) 2013 by Delphix. All rights reserved. 24168404Spjd */ 25168404Spjd 26169195Spjd/* Portions Copyright 2007 Jeremy Teo */ 27219089Spjd/* Portions Copyright 2010 Robert Milkowski */ 28169195Spjd 29168404Spjd#include <sys/types.h> 30168404Spjd#include <sys/param.h> 31168404Spjd#include <sys/time.h> 32168404Spjd#include <sys/systm.h> 33168404Spjd#include <sys/sysmacros.h> 34168404Spjd#include <sys/resource.h> 35168404Spjd#include <sys/vfs.h> 36248084Sattilio#include <sys/vm.h> 37168404Spjd#include <sys/vnode.h> 38168404Spjd#include <sys/file.h> 39168404Spjd#include <sys/stat.h> 40168404Spjd#include <sys/kmem.h> 41168404Spjd#include <sys/taskq.h> 42168404Spjd#include <sys/uio.h> 43168404Spjd#include <sys/atomic.h> 44168404Spjd#include <sys/namei.h> 45168404Spjd#include <sys/mman.h> 46168404Spjd#include <sys/cmn_err.h> 47168404Spjd#include <sys/errno.h> 48168404Spjd#include <sys/unistd.h> 49168404Spjd#include <sys/zfs_dir.h> 50168404Spjd#include <sys/zfs_ioctl.h> 51168404Spjd#include <sys/fs/zfs.h> 52168404Spjd#include <sys/dmu.h> 53219089Spjd#include <sys/dmu_objset.h> 54168404Spjd#include <sys/spa.h> 55168404Spjd#include <sys/txg.h> 56168404Spjd#include <sys/dbuf.h> 57168404Spjd#include <sys/zap.h> 58219089Spjd#include <sys/sa.h> 59168404Spjd#include <sys/dirent.h> 60168962Spjd#include <sys/policy.h> 61168962Spjd#include <sys/sunddi.h> 62168404Spjd#include <sys/filio.h> 63209962Smm#include <sys/sid.h> 64168404Spjd#include <sys/zfs_ctldir.h> 65185029Spjd#include <sys/zfs_fuid.h> 66219089Spjd#include <sys/zfs_sa.h> 67168404Spjd#include <sys/dnlc.h> 68168404Spjd#include <sys/zfs_rlock.h> 69185029Spjd#include <sys/extdirent.h> 70185029Spjd#include <sys/kidmap.h> 71168404Spjd#include <sys/bio.h> 72168404Spjd#include <sys/buf.h> 73168404Spjd#include <sys/sf_buf.h> 74168404Spjd#include <sys/sched.h> 75192800Strasz#include <sys/acl.h> 76239077Smarius#include <vm/vm_param.h> 77215401Savg#include <vm/vm_pageout.h> 78168404Spjd 79168404Spjd/* 80168404Spjd * Programming rules. 81168404Spjd * 82168404Spjd * Each vnode op performs some logical unit of work. To do this, the ZPL must 83168404Spjd * properly lock its in-core state, create a DMU transaction, do the work, 84168404Spjd * record this work in the intent log (ZIL), commit the DMU transaction, 85185029Spjd * and wait for the intent log to commit if it is a synchronous operation. 86185029Spjd * Moreover, the vnode ops must work in both normal and log replay context. 87168404Spjd * The ordering of events is important to avoid deadlocks and references 88168404Spjd * to freed memory. The example below illustrates the following Big Rules: 89168404Spjd * 90251631Sdelphij * (1) A check must be made in each zfs thread for a mounted file system. 91168404Spjd * This is done avoiding races using ZFS_ENTER(zfsvfs). 92251631Sdelphij * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 93251631Sdelphij * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 94251631Sdelphij * can return EIO from the calling function. 95168404Spjd * 96168404Spjd * (2) VN_RELE() should always be the last thing except for zil_commit() 97168404Spjd * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 98168404Spjd * First, if it's the last reference, the vnode/znode 99168404Spjd * can be freed, so the zp may point to freed memory. Second, the last 100168404Spjd * reference will call zfs_zinactive(), which may induce a lot of work -- 101168404Spjd * pushing cached pages (which acquires range locks) and syncing out 102168404Spjd * cached atime changes. Third, zfs_zinactive() may require a new tx, 103168404Spjd * which could deadlock the system if you were already holding one. 104191900Skmacy * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 105168404Spjd * 106168404Spjd * (3) All range locks must be grabbed before calling dmu_tx_assign(), 107168404Spjd * as they can span dmu_tx_assign() calls. 108168404Spjd * 109209962Smm * (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign(). 110168404Spjd * This is critical because we don't want to block while holding locks. 111168404Spjd * Note, in particular, that if a lock is sometimes acquired before 112168404Spjd * the tx assigns, and sometimes after (e.g. z_lock), then failing to 113168404Spjd * use a non-blocking assign can deadlock the system. The scenario: 114168404Spjd * 115168404Spjd * Thread A has grabbed a lock before calling dmu_tx_assign(). 116168404Spjd * Thread B is in an already-assigned tx, and blocks for this lock. 117168404Spjd * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 118168404Spjd * forever, because the previous txg can't quiesce until B's tx commits. 119168404Spjd * 120168404Spjd * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 121168404Spjd * then drop all locks, call dmu_tx_wait(), and try again. 122168404Spjd * 123168404Spjd * (5) If the operation succeeded, generate the intent log entry for it 124168404Spjd * before dropping locks. This ensures that the ordering of events 125168404Spjd * in the intent log matches the order in which they actually occurred. 126251631Sdelphij * During ZIL replay the zfs_log_* functions will update the sequence 127209962Smm * number to indicate the zil transaction has replayed. 128168404Spjd * 129168404Spjd * (6) At the end of each vnode op, the DMU tx must always commit, 130168404Spjd * regardless of whether there were any errors. 131168404Spjd * 132219089Spjd * (7) After dropping all locks, invoke zil_commit(zilog, foid) 133168404Spjd * to ensure that synchronous semantics are provided when necessary. 134168404Spjd * 135168404Spjd * In general, this is how things should be ordered in each vnode op: 136168404Spjd * 137168404Spjd * ZFS_ENTER(zfsvfs); // exit if unmounted 138168404Spjd * top: 139168404Spjd * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 140168404Spjd * rw_enter(...); // grab any other locks you need 141168404Spjd * tx = dmu_tx_create(...); // get DMU tx 142168404Spjd * dmu_tx_hold_*(); // hold each object you might modify 143209962Smm * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign 144168404Spjd * if (error) { 145168404Spjd * rw_exit(...); // drop locks 146168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 147168404Spjd * VN_RELE(...); // release held vnodes 148209962Smm * if (error == ERESTART) { 149168404Spjd * dmu_tx_wait(tx); 150168404Spjd * dmu_tx_abort(tx); 151168404Spjd * goto top; 152168404Spjd * } 153168404Spjd * dmu_tx_abort(tx); // abort DMU tx 154168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 155168404Spjd * return (error); // really out of space 156168404Spjd * } 157168404Spjd * error = do_real_work(); // do whatever this VOP does 158168404Spjd * if (error == 0) 159168404Spjd * zfs_log_*(...); // on success, make ZIL entry 160168404Spjd * dmu_tx_commit(tx); // commit DMU tx -- error or not 161168404Spjd * rw_exit(...); // drop locks 162168404Spjd * zfs_dirent_unlock(dl); // unlock directory entry 163168404Spjd * VN_RELE(...); // release held vnodes 164219089Spjd * zil_commit(zilog, foid); // synchronous when necessary 165168404Spjd * ZFS_EXIT(zfsvfs); // finished in zfs 166168404Spjd * return (error); // done, report error 167168404Spjd */ 168185029Spjd 169168404Spjd/* ARGSUSED */ 170168404Spjdstatic int 171185029Spjdzfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 172168404Spjd{ 173168962Spjd znode_t *zp = VTOZ(*vpp); 174209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 175168404Spjd 176209962Smm ZFS_ENTER(zfsvfs); 177209962Smm ZFS_VERIFY_ZP(zp); 178209962Smm 179219089Spjd if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 180185029Spjd ((flag & FAPPEND) == 0)) { 181209962Smm ZFS_EXIT(zfsvfs); 182249195Smm return (SET_ERROR(EPERM)); 183185029Spjd } 184185029Spjd 185185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 186185029Spjd ZTOV(zp)->v_type == VREG && 187219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 188209962Smm if (fs_vscan(*vpp, cr, 0) != 0) { 189209962Smm ZFS_EXIT(zfsvfs); 190249195Smm return (SET_ERROR(EACCES)); 191209962Smm } 192209962Smm } 193185029Spjd 194168404Spjd /* Keep a count of the synchronous opens in the znode */ 195168962Spjd if (flag & (FSYNC | FDSYNC)) 196168404Spjd atomic_inc_32(&zp->z_sync_cnt); 197185029Spjd 198209962Smm ZFS_EXIT(zfsvfs); 199168404Spjd return (0); 200168404Spjd} 201168404Spjd 202168404Spjd/* ARGSUSED */ 203168404Spjdstatic int 204185029Spjdzfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 205185029Spjd caller_context_t *ct) 206168404Spjd{ 207168962Spjd znode_t *zp = VTOZ(vp); 208209962Smm zfsvfs_t *zfsvfs = zp->z_zfsvfs; 209168404Spjd 210210470Smm /* 211210470Smm * Clean up any locks held by this process on the vp. 212210470Smm */ 213210470Smm cleanlocks(vp, ddi_get_pid(), 0); 214210470Smm cleanshares(vp, ddi_get_pid()); 215210470Smm 216209962Smm ZFS_ENTER(zfsvfs); 217209962Smm ZFS_VERIFY_ZP(zp); 218209962Smm 219168404Spjd /* Decrement the synchronous opens in the znode */ 220185029Spjd if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 221168404Spjd atomic_dec_32(&zp->z_sync_cnt); 222168404Spjd 223185029Spjd if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 224185029Spjd ZTOV(zp)->v_type == VREG && 225219089Spjd !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 226185029Spjd VERIFY(fs_vscan(vp, cr, 1) == 0); 227185029Spjd 228209962Smm ZFS_EXIT(zfsvfs); 229168404Spjd return (0); 230168404Spjd} 231168404Spjd 232168404Spjd/* 233168404Spjd * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 234168404Spjd * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 235168404Spjd */ 236168404Spjdstatic int 237168978Spjdzfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 238168404Spjd{ 239168404Spjd znode_t *zp = VTOZ(vp); 240168404Spjd uint64_t noff = (uint64_t)*off; /* new offset */ 241168404Spjd uint64_t file_sz; 242168404Spjd int error; 243168404Spjd boolean_t hole; 244168404Spjd 245219089Spjd file_sz = zp->z_size; 246168404Spjd if (noff >= file_sz) { 247249195Smm return (SET_ERROR(ENXIO)); 248168404Spjd } 249168404Spjd 250168962Spjd if (cmd == _FIO_SEEK_HOLE) 251168404Spjd hole = B_TRUE; 252168404Spjd else 253168404Spjd hole = B_FALSE; 254168404Spjd 255168404Spjd error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 256168404Spjd 257168404Spjd /* end of file? */ 258168404Spjd if ((error == ESRCH) || (noff > file_sz)) { 259168404Spjd /* 260168404Spjd * Handle the virtual hole at the end of file. 261168404Spjd */ 262168404Spjd if (hole) { 263168404Spjd *off = file_sz; 264168404Spjd return (0); 265168404Spjd } 266249195Smm return (SET_ERROR(ENXIO)); 267168404Spjd } 268168404Spjd 269168404Spjd if (noff < *off) 270168404Spjd return (error); 271168404Spjd *off = noff; 272168404Spjd return (error); 273168404Spjd} 274168404Spjd 275168404Spjd/* ARGSUSED */ 276168404Spjdstatic int 277168978Spjdzfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 278185029Spjd int *rvalp, caller_context_t *ct) 279168404Spjd{ 280168962Spjd offset_t off; 281168962Spjd int error; 282168962Spjd zfsvfs_t *zfsvfs; 283185029Spjd znode_t *zp; 284168404Spjd 285168404Spjd switch (com) { 286185029Spjd case _FIOFFS: 287168962Spjd return (0); 288168404Spjd 289168962Spjd /* 290168962Spjd * The following two ioctls are used by bfu. Faking out, 291168962Spjd * necessary to avoid bfu errors. 292168962Spjd */ 293185029Spjd case _FIOGDIO: 294185029Spjd case _FIOSDIO: 295168962Spjd return (0); 296168962Spjd 297185029Spjd case _FIO_SEEK_DATA: 298185029Spjd case _FIO_SEEK_HOLE: 299233918Savg#ifdef sun 300168962Spjd if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 301249195Smm return (SET_ERROR(EFAULT)); 302233918Savg#else 303233918Savg off = *(offset_t *)data; 304233918Savg#endif 305185029Spjd zp = VTOZ(vp); 306185029Spjd zfsvfs = zp->z_zfsvfs; 307168404Spjd ZFS_ENTER(zfsvfs); 308185029Spjd ZFS_VERIFY_ZP(zp); 309168404Spjd 310168404Spjd /* offset parameter is in/out */ 311168404Spjd error = zfs_holey(vp, com, &off); 312168404Spjd ZFS_EXIT(zfsvfs); 313168404Spjd if (error) 314168404Spjd return (error); 315233918Savg#ifdef sun 316168962Spjd if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 317249195Smm return (SET_ERROR(EFAULT)); 318233918Savg#else 319233918Savg *(offset_t *)data = off; 320233918Savg#endif 321168404Spjd return (0); 322168404Spjd } 323249195Smm return (SET_ERROR(ENOTTY)); 324168404Spjd} 325168404Spjd 326209962Smmstatic vm_page_t 327253953Sattiliopage_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 328209962Smm{ 329209962Smm vm_object_t obj; 330209962Smm vm_page_t pp; 331209962Smm 332209962Smm obj = vp->v_object; 333248084Sattilio zfs_vmobject_assert_wlocked(obj); 334209962Smm 335209962Smm for (;;) { 336209962Smm if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 337246293Savg pp->valid) { 338212652Savg if ((pp->oflags & VPO_BUSY) != 0) { 339212652Savg /* 340212652Savg * Reference the page before unlocking and 341212652Savg * sleeping so that the page daemon is less 342212652Savg * likely to reclaim it. 343212652Savg */ 344225418Skib vm_page_reference(pp); 345212652Savg vm_page_sleep(pp, "zfsmwb"); 346209962Smm continue; 347212652Savg } 348252337Sgavin } else if (pp == NULL) { 349246293Savg pp = vm_page_alloc(obj, OFF_TO_IDX(start), 350246293Savg VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED | 351246293Savg VM_ALLOC_NOBUSY); 352252337Sgavin } else { 353252337Sgavin ASSERT(pp != NULL && !pp->valid); 354252337Sgavin pp = NULL; 355209962Smm } 356246293Savg 357246293Savg if (pp != NULL) { 358246293Savg ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 359253953Sattilio vm_object_pip_add(obj, 1); 360253939Sattilio vm_page_io_start(pp); 361246293Savg pmap_remove_write(pp); 362246293Savg vm_page_clear_dirty(pp, off, nbytes); 363246293Savg } 364209962Smm break; 365209962Smm } 366209962Smm return (pp); 367209962Smm} 368209962Smm 369209962Smmstatic void 370253953Sattiliopage_unbusy(vm_page_t pp) 371209962Smm{ 372209962Smm 373246293Savg vm_page_io_finish(pp); 374253953Sattilio vm_object_pip_subtract(pp->object, 1); 375209962Smm} 376209962Smm 377253953Sattiliostatic vm_page_t 378253953Sattiliopage_hold(vnode_t *vp, int64_t start) 379253953Sattilio{ 380253953Sattilio vm_object_t obj; 381253953Sattilio vm_page_t pp; 382253953Sattilio 383253953Sattilio obj = vp->v_object; 384253953Sattilio zfs_vmobject_assert_wlocked(obj); 385253953Sattilio 386253953Sattilio for (;;) { 387253953Sattilio if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 388253953Sattilio pp->valid) { 389253953Sattilio if ((pp->oflags & VPO_BUSY) != 0) { 390253953Sattilio /* 391253953Sattilio * Reference the page before unlocking and 392253953Sattilio * sleeping so that the page daemon is less 393253953Sattilio * likely to reclaim it. 394253953Sattilio */ 395253953Sattilio vm_page_reference(pp); 396253953Sattilio vm_page_sleep(pp, "zfsmwb"); 397253953Sattilio continue; 398253953Sattilio } 399253953Sattilio 400253953Sattilio ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 401253953Sattilio vm_page_lock(pp); 402253953Sattilio vm_page_hold(pp); 403253953Sattilio vm_page_unlock(pp); 404253953Sattilio 405253953Sattilio } else 406253953Sattilio pp = NULL; 407253953Sattilio break; 408253953Sattilio } 409253953Sattilio return (pp); 410253953Sattilio} 411253953Sattilio 412253953Sattiliostatic void 413253953Sattiliopage_unhold(vm_page_t pp) 414253953Sattilio{ 415253953Sattilio 416253953Sattilio vm_page_lock(pp); 417253953Sattilio vm_page_unhold(pp); 418253953Sattilio vm_page_unlock(pp); 419253953Sattilio} 420253953Sattilio 421209962Smmstatic caddr_t 422209962Smmzfs_map_page(vm_page_t pp, struct sf_buf **sfp) 423209962Smm{ 424209962Smm 425212951Savg *sfp = sf_buf_alloc(pp, 0); 426209962Smm return ((caddr_t)sf_buf_kva(*sfp)); 427209962Smm} 428209962Smm 429209962Smmstatic void 430209962Smmzfs_unmap_page(struct sf_buf *sf) 431209962Smm{ 432209962Smm 433209962Smm sf_buf_free(sf); 434209962Smm} 435209962Smm 436168404Spjd/* 437168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 438168404Spjd * between the DMU cache and the memory mapped pages. What this means: 439168404Spjd * 440168404Spjd * On Write: If we find a memory mapped page, we write to *both* 441168404Spjd * the page and the dmu buffer. 442168404Spjd */ 443209962Smmstatic void 444209962Smmupdate_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 445209962Smm int segflg, dmu_tx_t *tx) 446168404Spjd{ 447168404Spjd vm_object_t obj; 448168404Spjd struct sf_buf *sf; 449246293Savg caddr_t va; 450212655Savg int off; 451168404Spjd 452168404Spjd ASSERT(vp->v_mount != NULL); 453168404Spjd obj = vp->v_object; 454168404Spjd ASSERT(obj != NULL); 455168404Spjd 456168404Spjd off = start & PAGEOFFSET; 457248084Sattilio zfs_vmobject_wlock(obj); 458168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 459209962Smm vm_page_t pp; 460246293Savg int nbytes = imin(PAGESIZE - off, len); 461168404Spjd 462246293Savg if (segflg == UIO_NOCOPY) { 463246293Savg pp = vm_page_lookup(obj, OFF_TO_IDX(start)); 464246293Savg KASSERT(pp != NULL, 465246293Savg ("zfs update_pages: NULL page in putpages case")); 466246293Savg KASSERT(off == 0, 467246293Savg ("zfs update_pages: unaligned data in putpages case")); 468246293Savg KASSERT(pp->valid == VM_PAGE_BITS_ALL, 469246293Savg ("zfs update_pages: invalid page in putpages case")); 470246293Savg KASSERT(pp->busy > 0, 471246293Savg ("zfs update_pages: unbusy page in putpages case")); 472246293Savg KASSERT(!pmap_page_is_write_mapped(pp), 473246293Savg ("zfs update_pages: writable page in putpages case")); 474248084Sattilio zfs_vmobject_wunlock(obj); 475168404Spjd 476246293Savg va = zfs_map_page(pp, &sf); 477246293Savg (void) dmu_write(os, oid, start, nbytes, va, tx); 478246293Savg zfs_unmap_page(sf); 479246293Savg 480248084Sattilio zfs_vmobject_wlock(obj); 481246293Savg vm_page_undirty(pp); 482253953Sattilio } else if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 483248084Sattilio zfs_vmobject_wunlock(obj); 484246293Savg 485209962Smm va = zfs_map_page(pp, &sf); 486246293Savg (void) dmu_read(os, oid, start+off, nbytes, 487246293Savg va+off, DMU_READ_PREFETCH);; 488209962Smm zfs_unmap_page(sf); 489246293Savg 490248084Sattilio zfs_vmobject_wlock(obj); 491253953Sattilio page_unbusy(pp); 492168404Spjd } 493209962Smm len -= nbytes; 494168404Spjd off = 0; 495168404Spjd } 496246293Savg if (segflg != UIO_NOCOPY) 497246293Savg vm_object_pip_wakeupn(obj, 0); 498248084Sattilio zfs_vmobject_wunlock(obj); 499168404Spjd} 500168404Spjd 501168404Spjd/* 502219089Spjd * Read with UIO_NOCOPY flag means that sendfile(2) requests 503219089Spjd * ZFS to populate a range of page cache pages with data. 504219089Spjd * 505219089Spjd * NOTE: this function could be optimized to pre-allocate 506219089Spjd * all pages in advance, drain VPO_BUSY on all of them, 507219089Spjd * map them into contiguous KVA region and populate them 508219089Spjd * in one single dmu_read() call. 509219089Spjd */ 510219089Spjdstatic int 511219089Spjdmappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) 512219089Spjd{ 513219089Spjd znode_t *zp = VTOZ(vp); 514219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 515219089Spjd struct sf_buf *sf; 516219089Spjd vm_object_t obj; 517219089Spjd vm_page_t pp; 518219089Spjd int64_t start; 519219089Spjd caddr_t va; 520219089Spjd int len = nbytes; 521219089Spjd int off; 522219089Spjd int error = 0; 523219089Spjd 524219089Spjd ASSERT(uio->uio_segflg == UIO_NOCOPY); 525219089Spjd ASSERT(vp->v_mount != NULL); 526219089Spjd obj = vp->v_object; 527219089Spjd ASSERT(obj != NULL); 528219089Spjd ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); 529219089Spjd 530248084Sattilio zfs_vmobject_wlock(obj); 531219089Spjd for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { 532219089Spjd int bytes = MIN(PAGESIZE, len); 533219089Spjd 534219089Spjd pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_NOBUSY | 535219089Spjd VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_IGN_SBUSY); 536219089Spjd if (pp->valid == 0) { 537219089Spjd vm_page_io_start(pp); 538248084Sattilio zfs_vmobject_wunlock(obj); 539219089Spjd va = zfs_map_page(pp, &sf); 540219089Spjd error = dmu_read(os, zp->z_id, start, bytes, va, 541219089Spjd DMU_READ_PREFETCH); 542219089Spjd if (bytes != PAGESIZE && error == 0) 543219089Spjd bzero(va + bytes, PAGESIZE - bytes); 544219089Spjd zfs_unmap_page(sf); 545248084Sattilio zfs_vmobject_wlock(obj); 546219089Spjd vm_page_io_finish(pp); 547219089Spjd vm_page_lock(pp); 548219089Spjd if (error) { 549253073Savg if (pp->wire_count == 0 && pp->valid == 0 && 550253073Savg pp->busy == 0 && !(pp->oflags & VPO_BUSY)) 551253073Savg vm_page_free(pp); 552219089Spjd } else { 553219089Spjd pp->valid = VM_PAGE_BITS_ALL; 554219089Spjd vm_page_activate(pp); 555219089Spjd } 556219089Spjd vm_page_unlock(pp); 557219089Spjd } 558219089Spjd if (error) 559219089Spjd break; 560219089Spjd uio->uio_resid -= bytes; 561219089Spjd uio->uio_offset += bytes; 562219089Spjd len -= bytes; 563219089Spjd } 564248084Sattilio zfs_vmobject_wunlock(obj); 565219089Spjd return (error); 566219089Spjd} 567219089Spjd 568219089Spjd/* 569168404Spjd * When a file is memory mapped, we must keep the IO data synchronized 570168404Spjd * between the DMU cache and the memory mapped pages. What this means: 571168404Spjd * 572168404Spjd * On Read: We "read" preferentially from memory mapped pages, 573168404Spjd * else we default from the dmu buffer. 574168404Spjd * 575168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 576251631Sdelphij * the file is memory mapped. 577168404Spjd */ 578168404Spjdstatic int 579168404Spjdmappedread(vnode_t *vp, int nbytes, uio_t *uio) 580168404Spjd{ 581168404Spjd znode_t *zp = VTOZ(vp); 582168404Spjd objset_t *os = zp->z_zfsvfs->z_os; 583168404Spjd vm_object_t obj; 584212655Savg int64_t start; 585168926Spjd caddr_t va; 586168404Spjd int len = nbytes; 587212655Savg int off; 588168404Spjd int error = 0; 589168404Spjd 590168404Spjd ASSERT(vp->v_mount != NULL); 591168404Spjd obj = vp->v_object; 592168404Spjd ASSERT(obj != NULL); 593168404Spjd 594168404Spjd start = uio->uio_loffset; 595168404Spjd off = start & PAGEOFFSET; 596248084Sattilio zfs_vmobject_wlock(obj); 597168404Spjd for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 598219089Spjd vm_page_t pp; 599219089Spjd uint64_t bytes = MIN(PAGESIZE - off, len); 600168404Spjd 601253953Sattilio if (pp = page_hold(vp, start)) { 602219089Spjd struct sf_buf *sf; 603219089Spjd caddr_t va; 604212652Savg 605248084Sattilio zfs_vmobject_wunlock(obj); 606219089Spjd va = zfs_map_page(pp, &sf); 607219089Spjd error = uiomove(va + off, bytes, UIO_READ, uio); 608219089Spjd zfs_unmap_page(sf); 609248084Sattilio zfs_vmobject_wlock(obj); 610253953Sattilio page_unhold(pp); 611219089Spjd } else { 612248084Sattilio zfs_vmobject_wunlock(obj); 613219089Spjd error = dmu_read_uio(os, zp->z_id, uio, bytes); 614248084Sattilio zfs_vmobject_wlock(obj); 615168404Spjd } 616168404Spjd len -= bytes; 617168404Spjd off = 0; 618168404Spjd if (error) 619168404Spjd break; 620168404Spjd } 621248084Sattilio zfs_vmobject_wunlock(obj); 622168404Spjd return (error); 623168404Spjd} 624168404Spjd 625168404Spjdoffset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 626168404Spjd 627168404Spjd/* 628168404Spjd * Read bytes from specified file into supplied buffer. 629168404Spjd * 630168404Spjd * IN: vp - vnode of file to be read from. 631168404Spjd * uio - structure supplying read location, range info, 632168404Spjd * and return buffer. 633168404Spjd * ioflag - SYNC flags; used to provide FRSYNC semantics. 634168404Spjd * cr - credentials of caller. 635185029Spjd * ct - caller context 636168404Spjd * 637168404Spjd * OUT: uio - updated offset and range, buffer filled. 638168404Spjd * 639251631Sdelphij * RETURN: 0 on success, error code on failure. 640168404Spjd * 641168404Spjd * Side Effects: 642168404Spjd * vp - atime updated if byte count > 0 643168404Spjd */ 644168404Spjd/* ARGSUSED */ 645168404Spjdstatic int 646168962Spjdzfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 647168404Spjd{ 648168404Spjd znode_t *zp = VTOZ(vp); 649168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 650185029Spjd objset_t *os; 651168404Spjd ssize_t n, nbytes; 652247187Smm int error = 0; 653168404Spjd rl_t *rl; 654219089Spjd xuio_t *xuio = NULL; 655168404Spjd 656168404Spjd ZFS_ENTER(zfsvfs); 657185029Spjd ZFS_VERIFY_ZP(zp); 658185029Spjd os = zfsvfs->z_os; 659168404Spjd 660219089Spjd if (zp->z_pflags & ZFS_AV_QUARANTINED) { 661185029Spjd ZFS_EXIT(zfsvfs); 662249195Smm return (SET_ERROR(EACCES)); 663185029Spjd } 664185029Spjd 665168404Spjd /* 666168404Spjd * Validate file offset 667168404Spjd */ 668168404Spjd if (uio->uio_loffset < (offset_t)0) { 669168404Spjd ZFS_EXIT(zfsvfs); 670249195Smm return (SET_ERROR(EINVAL)); 671168404Spjd } 672168404Spjd 673168404Spjd /* 674168404Spjd * Fasttrack empty reads 675168404Spjd */ 676168404Spjd if (uio->uio_resid == 0) { 677168404Spjd ZFS_EXIT(zfsvfs); 678168404Spjd return (0); 679168404Spjd } 680168404Spjd 681168404Spjd /* 682168962Spjd * Check for mandatory locks 683168962Spjd */ 684219089Spjd if (MANDMODE(zp->z_mode)) { 685168962Spjd if (error = chklock(vp, FREAD, 686168962Spjd uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 687168962Spjd ZFS_EXIT(zfsvfs); 688168962Spjd return (error); 689168962Spjd } 690168962Spjd } 691168962Spjd 692168962Spjd /* 693168404Spjd * If we're in FRSYNC mode, sync out this znode before reading it. 694168404Spjd */ 695224605Smm if (zfsvfs->z_log && 696224605Smm (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) 697219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 698168404Spjd 699168404Spjd /* 700168404Spjd * Lock the range against changes. 701168404Spjd */ 702168404Spjd rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 703168404Spjd 704168404Spjd /* 705168404Spjd * If we are reading past end-of-file we can skip 706168404Spjd * to the end; but we might still need to set atime. 707168404Spjd */ 708219089Spjd if (uio->uio_loffset >= zp->z_size) { 709168404Spjd error = 0; 710168404Spjd goto out; 711168404Spjd } 712168404Spjd 713219089Spjd ASSERT(uio->uio_loffset < zp->z_size); 714219089Spjd n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 715168404Spjd 716219089Spjd#ifdef sun 717219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 718219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 719219089Spjd int nblk; 720219089Spjd int blksz = zp->z_blksz; 721219089Spjd uint64_t offset = uio->uio_loffset; 722219089Spjd 723219089Spjd xuio = (xuio_t *)uio; 724219089Spjd if ((ISP2(blksz))) { 725219089Spjd nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 726219089Spjd blksz)) / blksz; 727219089Spjd } else { 728219089Spjd ASSERT(offset + n <= blksz); 729219089Spjd nblk = 1; 730219089Spjd } 731219089Spjd (void) dmu_xuio_init(xuio, nblk); 732219089Spjd 733219089Spjd if (vn_has_cached_data(vp)) { 734219089Spjd /* 735219089Spjd * For simplicity, we always allocate a full buffer 736219089Spjd * even if we only expect to read a portion of a block. 737219089Spjd */ 738219089Spjd while (--nblk >= 0) { 739219089Spjd (void) dmu_xuio_add(xuio, 740219089Spjd dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 741219089Spjd blksz), 0, blksz); 742219089Spjd } 743219089Spjd } 744219089Spjd } 745219089Spjd#endif /* sun */ 746219089Spjd 747168404Spjd while (n > 0) { 748168404Spjd nbytes = MIN(n, zfs_read_chunk_size - 749168404Spjd P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 750168404Spjd 751219089Spjd#ifdef __FreeBSD__ 752219089Spjd if (uio->uio_segflg == UIO_NOCOPY) 753219089Spjd error = mappedread_sf(vp, nbytes, uio); 754219089Spjd else 755219089Spjd#endif /* __FreeBSD__ */ 756168404Spjd if (vn_has_cached_data(vp)) 757168404Spjd error = mappedread(vp, nbytes, uio); 758168404Spjd else 759168404Spjd error = dmu_read_uio(os, zp->z_id, uio, nbytes); 760185029Spjd if (error) { 761185029Spjd /* convert checksum errors into IO errors */ 762185029Spjd if (error == ECKSUM) 763249195Smm error = SET_ERROR(EIO); 764168404Spjd break; 765185029Spjd } 766168962Spjd 767168404Spjd n -= nbytes; 768168404Spjd } 769168404Spjdout: 770168404Spjd zfs_range_unlock(rl); 771168404Spjd 772168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 773168404Spjd ZFS_EXIT(zfsvfs); 774168404Spjd return (error); 775168404Spjd} 776168404Spjd 777168404Spjd/* 778168404Spjd * Write the bytes to a file. 779168404Spjd * 780168404Spjd * IN: vp - vnode of file to be written to. 781168404Spjd * uio - structure supplying write location, range info, 782168404Spjd * and data buffer. 783251631Sdelphij * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 784251631Sdelphij * set if in append mode. 785168404Spjd * cr - credentials of caller. 786185029Spjd * ct - caller context (NFS/CIFS fem monitor only) 787168404Spjd * 788168404Spjd * OUT: uio - updated offset and range. 789168404Spjd * 790251631Sdelphij * RETURN: 0 on success, error code on failure. 791168404Spjd * 792168404Spjd * Timestamps: 793168404Spjd * vp - ctime|mtime updated if byte count > 0 794168404Spjd */ 795219089Spjd 796168404Spjd/* ARGSUSED */ 797168404Spjdstatic int 798168962Spjdzfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 799168404Spjd{ 800168404Spjd znode_t *zp = VTOZ(vp); 801168962Spjd rlim64_t limit = MAXOFFSET_T; 802168404Spjd ssize_t start_resid = uio->uio_resid; 803168404Spjd ssize_t tx_bytes; 804168404Spjd uint64_t end_size; 805168404Spjd dmu_tx_t *tx; 806168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 807185029Spjd zilog_t *zilog; 808168404Spjd offset_t woff; 809168404Spjd ssize_t n, nbytes; 810168404Spjd rl_t *rl; 811168404Spjd int max_blksz = zfsvfs->z_max_blksz; 812247187Smm int error = 0; 813209962Smm arc_buf_t *abuf; 814247187Smm iovec_t *aiov = NULL; 815219089Spjd xuio_t *xuio = NULL; 816219089Spjd int i_iov = 0; 817219089Spjd int iovcnt = uio->uio_iovcnt; 818219089Spjd iovec_t *iovp = uio->uio_iov; 819219089Spjd int write_eof; 820219089Spjd int count = 0; 821219089Spjd sa_bulk_attr_t bulk[4]; 822219089Spjd uint64_t mtime[2], ctime[2]; 823168404Spjd 824168404Spjd /* 825168404Spjd * Fasttrack empty write 826168404Spjd */ 827168404Spjd n = start_resid; 828168404Spjd if (n == 0) 829168404Spjd return (0); 830168404Spjd 831168962Spjd if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 832168962Spjd limit = MAXOFFSET_T; 833168962Spjd 834168404Spjd ZFS_ENTER(zfsvfs); 835185029Spjd ZFS_VERIFY_ZP(zp); 836168404Spjd 837219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 838219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 839219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 840219089Spjd &zp->z_size, 8); 841219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 842219089Spjd &zp->z_pflags, 8); 843219089Spjd 844168404Spjd /* 845185029Spjd * If immutable or not appending then return EPERM 846185029Spjd */ 847219089Spjd if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 848219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 849219089Spjd (uio->uio_loffset < zp->z_size))) { 850185029Spjd ZFS_EXIT(zfsvfs); 851249195Smm return (SET_ERROR(EPERM)); 852185029Spjd } 853185029Spjd 854185029Spjd zilog = zfsvfs->z_log; 855185029Spjd 856185029Spjd /* 857219089Spjd * Validate file offset 858219089Spjd */ 859219089Spjd woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 860219089Spjd if (woff < 0) { 861219089Spjd ZFS_EXIT(zfsvfs); 862249195Smm return (SET_ERROR(EINVAL)); 863219089Spjd } 864219089Spjd 865219089Spjd /* 866219089Spjd * Check for mandatory locks before calling zfs_range_lock() 867219089Spjd * in order to prevent a deadlock with locks set via fcntl(). 868219089Spjd */ 869219089Spjd if (MANDMODE((mode_t)zp->z_mode) && 870219089Spjd (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 871219089Spjd ZFS_EXIT(zfsvfs); 872219089Spjd return (error); 873219089Spjd } 874219089Spjd 875219089Spjd#ifdef sun 876219089Spjd /* 877168404Spjd * Pre-fault the pages to ensure slow (eg NFS) pages 878168404Spjd * don't hold up txg. 879219089Spjd * Skip this if uio contains loaned arc_buf. 880168404Spjd */ 881219089Spjd if ((uio->uio_extflg == UIO_XUIO) && 882219089Spjd (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 883219089Spjd xuio = (xuio_t *)uio; 884219089Spjd else 885219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 886219089Spjd#endif /* sun */ 887168404Spjd 888168404Spjd /* 889168404Spjd * If in append mode, set the io offset pointer to eof. 890168404Spjd */ 891213673Spjd if (ioflag & FAPPEND) { 892168404Spjd /* 893219089Spjd * Obtain an appending range lock to guarantee file append 894219089Spjd * semantics. We reset the write offset once we have the lock. 895168404Spjd */ 896168404Spjd rl = zfs_range_lock(zp, 0, n, RL_APPEND); 897219089Spjd woff = rl->r_off; 898168404Spjd if (rl->r_len == UINT64_MAX) { 899219089Spjd /* 900219089Spjd * We overlocked the file because this write will cause 901219089Spjd * the file block size to increase. 902219089Spjd * Note that zp_size cannot change with this lock held. 903219089Spjd */ 904219089Spjd woff = zp->z_size; 905168404Spjd } 906219089Spjd uio->uio_loffset = woff; 907168404Spjd } else { 908168404Spjd /* 909219089Spjd * Note that if the file block size will change as a result of 910219089Spjd * this write, then this range lock will lock the entire file 911219089Spjd * so that we can re-write the block safely. 912168404Spjd */ 913168404Spjd rl = zfs_range_lock(zp, woff, n, RL_WRITER); 914168404Spjd } 915168404Spjd 916235781Strasz if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { 917235781Strasz zfs_range_unlock(rl); 918235781Strasz ZFS_EXIT(zfsvfs); 919235781Strasz return (EFBIG); 920235781Strasz } 921235781Strasz 922168962Spjd if (woff >= limit) { 923168962Spjd zfs_range_unlock(rl); 924168962Spjd ZFS_EXIT(zfsvfs); 925249195Smm return (SET_ERROR(EFBIG)); 926168962Spjd } 927168962Spjd 928168962Spjd if ((woff + n) > limit || woff > (limit - n)) 929168962Spjd n = limit - woff; 930168962Spjd 931219089Spjd /* Will this write extend the file length? */ 932219089Spjd write_eof = (woff + n > zp->z_size); 933168404Spjd 934219089Spjd end_size = MAX(zp->z_size, woff + n); 935219089Spjd 936168404Spjd /* 937168404Spjd * Write the file in reasonable size chunks. Each chunk is written 938168404Spjd * in a separate transaction; this keeps the intent log records small 939168404Spjd * and allows us to do more fine-grained space accounting. 940168404Spjd */ 941168404Spjd while (n > 0) { 942209962Smm abuf = NULL; 943209962Smm woff = uio->uio_loffset; 944209962Smmagain: 945219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 946219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 947209962Smm if (abuf != NULL) 948209962Smm dmu_return_arcbuf(abuf); 949249195Smm error = SET_ERROR(EDQUOT); 950209962Smm break; 951209962Smm } 952209962Smm 953219089Spjd if (xuio && abuf == NULL) { 954219089Spjd ASSERT(i_iov < iovcnt); 955219089Spjd aiov = &iovp[i_iov]; 956219089Spjd abuf = dmu_xuio_arcbuf(xuio, i_iov); 957219089Spjd dmu_xuio_clear(xuio, i_iov); 958219089Spjd DTRACE_PROBE3(zfs_cp_write, int, i_iov, 959219089Spjd iovec_t *, aiov, arc_buf_t *, abuf); 960219089Spjd ASSERT((aiov->iov_base == abuf->b_data) || 961219089Spjd ((char *)aiov->iov_base - (char *)abuf->b_data + 962219089Spjd aiov->iov_len == arc_buf_size(abuf))); 963219089Spjd i_iov++; 964219089Spjd } else if (abuf == NULL && n >= max_blksz && 965219089Spjd woff >= zp->z_size && 966209962Smm P2PHASE(woff, max_blksz) == 0 && 967209962Smm zp->z_blksz == max_blksz) { 968219089Spjd /* 969219089Spjd * This write covers a full block. "Borrow" a buffer 970219089Spjd * from the dmu so that we can fill it before we enter 971219089Spjd * a transaction. This avoids the possibility of 972219089Spjd * holding up the transaction if the data copy hangs 973219089Spjd * up on a pagefault (e.g., from an NFS server mapping). 974219089Spjd */ 975209962Smm size_t cbytes; 976209962Smm 977219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 978219089Spjd max_blksz); 979209962Smm ASSERT(abuf != NULL); 980209962Smm ASSERT(arc_buf_size(abuf) == max_blksz); 981209962Smm if (error = uiocopy(abuf->b_data, max_blksz, 982209962Smm UIO_WRITE, uio, &cbytes)) { 983209962Smm dmu_return_arcbuf(abuf); 984209962Smm break; 985209962Smm } 986209962Smm ASSERT(cbytes == max_blksz); 987209962Smm } 988209962Smm 989209962Smm /* 990168404Spjd * Start a transaction. 991168404Spjd */ 992168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 993219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 994168404Spjd dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 995219089Spjd zfs_sa_upgrade_txholds(tx, zp); 996209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 997168404Spjd if (error) { 998209962Smm if (error == ERESTART) { 999168404Spjd dmu_tx_wait(tx); 1000168404Spjd dmu_tx_abort(tx); 1001209962Smm goto again; 1002168404Spjd } 1003168404Spjd dmu_tx_abort(tx); 1004209962Smm if (abuf != NULL) 1005209962Smm dmu_return_arcbuf(abuf); 1006168404Spjd break; 1007168404Spjd } 1008168404Spjd 1009168404Spjd /* 1010168404Spjd * If zfs_range_lock() over-locked we grow the blocksize 1011168404Spjd * and then reduce the lock range. This will only happen 1012168404Spjd * on the first iteration since zfs_range_reduce() will 1013168404Spjd * shrink down r_len to the appropriate size. 1014168404Spjd */ 1015168404Spjd if (rl->r_len == UINT64_MAX) { 1016168404Spjd uint64_t new_blksz; 1017168404Spjd 1018168404Spjd if (zp->z_blksz > max_blksz) { 1019168404Spjd ASSERT(!ISP2(zp->z_blksz)); 1020168404Spjd new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 1021168404Spjd } else { 1022168404Spjd new_blksz = MIN(end_size, max_blksz); 1023168404Spjd } 1024168404Spjd zfs_grow_blocksize(zp, new_blksz, tx); 1025168404Spjd zfs_range_reduce(rl, woff, n); 1026168404Spjd } 1027168404Spjd 1028168404Spjd /* 1029168404Spjd * XXX - should we really limit each write to z_max_blksz? 1030168404Spjd * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 1031168404Spjd */ 1032168404Spjd nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 1033168404Spjd 1034219089Spjd if (woff + nbytes > zp->z_size) 1035168404Spjd vnode_pager_setsize(vp, woff + nbytes); 1036168404Spjd 1037209962Smm if (abuf == NULL) { 1038209962Smm tx_bytes = uio->uio_resid; 1039219089Spjd error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 1040219089Spjd uio, nbytes, tx); 1041209962Smm tx_bytes -= uio->uio_resid; 1042168404Spjd } else { 1043209962Smm tx_bytes = nbytes; 1044219089Spjd ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 1045219089Spjd /* 1046219089Spjd * If this is not a full block write, but we are 1047219089Spjd * extending the file past EOF and this data starts 1048219089Spjd * block-aligned, use assign_arcbuf(). Otherwise, 1049219089Spjd * write via dmu_write(). 1050219089Spjd */ 1051219089Spjd if (tx_bytes < max_blksz && (!write_eof || 1052219089Spjd aiov->iov_base != abuf->b_data)) { 1053219089Spjd ASSERT(xuio); 1054219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, woff, 1055219089Spjd aiov->iov_len, aiov->iov_base, tx); 1056219089Spjd dmu_return_arcbuf(abuf); 1057219089Spjd xuio_stat_wbuf_copied(); 1058219089Spjd } else { 1059219089Spjd ASSERT(xuio || tx_bytes == max_blksz); 1060219089Spjd dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 1061219089Spjd woff, abuf, tx); 1062219089Spjd } 1063209962Smm ASSERT(tx_bytes <= uio->uio_resid); 1064209962Smm uioskip(uio, tx_bytes); 1065168404Spjd } 1066212657Savg if (tx_bytes && vn_has_cached_data(vp)) { 1067209962Smm update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 1068209962Smm zp->z_id, uio->uio_segflg, tx); 1069209962Smm } 1070209962Smm 1071209962Smm /* 1072168404Spjd * If we made no progress, we're done. If we made even 1073168404Spjd * partial progress, update the znode and ZIL accordingly. 1074168404Spjd */ 1075168404Spjd if (tx_bytes == 0) { 1076219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 1077219089Spjd (void *)&zp->z_size, sizeof (uint64_t), tx); 1078168404Spjd dmu_tx_commit(tx); 1079168404Spjd ASSERT(error != 0); 1080168404Spjd break; 1081168404Spjd } 1082168404Spjd 1083168404Spjd /* 1084168404Spjd * Clear Set-UID/Set-GID bits on successful write if not 1085168404Spjd * privileged and at least one of the excute bits is set. 1086168404Spjd * 1087168404Spjd * It would be nice to to this after all writes have 1088168404Spjd * been done, but that would still expose the ISUID/ISGID 1089168404Spjd * to another app after the partial write is committed. 1090185029Spjd * 1091185029Spjd * Note: we don't call zfs_fuid_map_id() here because 1092185029Spjd * user 0 is not an ephemeral uid. 1093168404Spjd */ 1094168404Spjd mutex_enter(&zp->z_acl_lock); 1095219089Spjd if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 1096168404Spjd (S_IXUSR >> 6))) != 0 && 1097219089Spjd (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 1098185029Spjd secpolicy_vnode_setid_retain(vp, cr, 1099219089Spjd (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 1100219089Spjd uint64_t newmode; 1101219089Spjd zp->z_mode &= ~(S_ISUID | S_ISGID); 1102219089Spjd newmode = zp->z_mode; 1103219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 1104219089Spjd (void *)&newmode, sizeof (uint64_t), tx); 1105168404Spjd } 1106168404Spjd mutex_exit(&zp->z_acl_lock); 1107168404Spjd 1108219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 1109219089Spjd B_TRUE); 1110168404Spjd 1111168404Spjd /* 1112168404Spjd * Update the file size (zp_size) if it has changed; 1113168404Spjd * account for possible concurrent updates. 1114168404Spjd */ 1115219089Spjd while ((end_size = zp->z_size) < uio->uio_loffset) { 1116219089Spjd (void) atomic_cas_64(&zp->z_size, end_size, 1117168404Spjd uio->uio_loffset); 1118219089Spjd ASSERT(error == 0); 1119219089Spjd } 1120219089Spjd /* 1121219089Spjd * If we are replaying and eof is non zero then force 1122219089Spjd * the file size to the specified eof. Note, there's no 1123219089Spjd * concurrency during replay. 1124219089Spjd */ 1125219089Spjd if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 1126219089Spjd zp->z_size = zfsvfs->z_replay_eof; 1127219089Spjd 1128219089Spjd error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1129219089Spjd 1130168404Spjd zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 1131168404Spjd dmu_tx_commit(tx); 1132168404Spjd 1133168404Spjd if (error != 0) 1134168404Spjd break; 1135168404Spjd ASSERT(tx_bytes == nbytes); 1136168404Spjd n -= nbytes; 1137219089Spjd 1138219089Spjd#ifdef sun 1139219089Spjd if (!xuio && n > 0) 1140219089Spjd uio_prefaultpages(MIN(n, max_blksz), uio); 1141219089Spjd#endif /* sun */ 1142168404Spjd } 1143168404Spjd 1144168404Spjd zfs_range_unlock(rl); 1145168404Spjd 1146168404Spjd /* 1147168404Spjd * If we're in replay mode, or we made no progress, return error. 1148168404Spjd * Otherwise, it's at least a partial write, so it's successful. 1149168404Spjd */ 1150209962Smm if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1151168404Spjd ZFS_EXIT(zfsvfs); 1152168404Spjd return (error); 1153168404Spjd } 1154168404Spjd 1155219089Spjd if (ioflag & (FSYNC | FDSYNC) || 1156219089Spjd zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1157219089Spjd zil_commit(zilog, zp->z_id); 1158168404Spjd 1159168404Spjd ZFS_EXIT(zfsvfs); 1160168404Spjd return (0); 1161168404Spjd} 1162168404Spjd 1163168404Spjdvoid 1164219089Spjdzfs_get_done(zgd_t *zgd, int error) 1165168404Spjd{ 1166219089Spjd znode_t *zp = zgd->zgd_private; 1167219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 1168168404Spjd 1169219089Spjd if (zgd->zgd_db) 1170219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1171219089Spjd 1172219089Spjd zfs_range_unlock(zgd->zgd_rl); 1173219089Spjd 1174191900Skmacy /* 1175191900Skmacy * Release the vnode asynchronously as we currently have the 1176191900Skmacy * txg stopped from syncing. 1177191900Skmacy */ 1178219089Spjd VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1179219089Spjd 1180219089Spjd if (error == 0 && zgd->zgd_bp) 1181219089Spjd zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1182219089Spjd 1183168404Spjd kmem_free(zgd, sizeof (zgd_t)); 1184168404Spjd} 1185168404Spjd 1186214378Smm#ifdef DEBUG 1187214378Smmstatic int zil_fault_io = 0; 1188214378Smm#endif 1189214378Smm 1190168404Spjd/* 1191168404Spjd * Get data to generate a TX_WRITE intent log record. 1192168404Spjd */ 1193168404Spjdint 1194168404Spjdzfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1195168404Spjd{ 1196168404Spjd zfsvfs_t *zfsvfs = arg; 1197168404Spjd objset_t *os = zfsvfs->z_os; 1198168404Spjd znode_t *zp; 1199219089Spjd uint64_t object = lr->lr_foid; 1200219089Spjd uint64_t offset = lr->lr_offset; 1201219089Spjd uint64_t size = lr->lr_length; 1202219089Spjd blkptr_t *bp = &lr->lr_blkptr; 1203168404Spjd dmu_buf_t *db; 1204168404Spjd zgd_t *zgd; 1205168404Spjd int error = 0; 1206168404Spjd 1207219089Spjd ASSERT(zio != NULL); 1208219089Spjd ASSERT(size != 0); 1209168404Spjd 1210168404Spjd /* 1211168404Spjd * Nothing to do if the file has been removed 1212168404Spjd */ 1213219089Spjd if (zfs_zget(zfsvfs, object, &zp) != 0) 1214249195Smm return (SET_ERROR(ENOENT)); 1215168404Spjd if (zp->z_unlinked) { 1216191900Skmacy /* 1217191900Skmacy * Release the vnode asynchronously as we currently have the 1218191900Skmacy * txg stopped from syncing. 1219191900Skmacy */ 1220196307Spjd VN_RELE_ASYNC(ZTOV(zp), 1221196307Spjd dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1222249195Smm return (SET_ERROR(ENOENT)); 1223168404Spjd } 1224168404Spjd 1225219089Spjd zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1226219089Spjd zgd->zgd_zilog = zfsvfs->z_log; 1227219089Spjd zgd->zgd_private = zp; 1228219089Spjd 1229168404Spjd /* 1230168404Spjd * Write records come in two flavors: immediate and indirect. 1231168404Spjd * For small writes it's cheaper to store the data with the 1232168404Spjd * log record (immediate); for large writes it's cheaper to 1233168404Spjd * sync the data and get a pointer to it (indirect) so that 1234168404Spjd * we don't have to write the data twice. 1235168404Spjd */ 1236168404Spjd if (buf != NULL) { /* immediate write */ 1237219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1238168404Spjd /* test for truncation needs to be done while range locked */ 1239219089Spjd if (offset >= zp->z_size) { 1240249195Smm error = SET_ERROR(ENOENT); 1241219089Spjd } else { 1242219089Spjd error = dmu_read(os, object, offset, size, buf, 1243219089Spjd DMU_READ_NO_PREFETCH); 1244168404Spjd } 1245219089Spjd ASSERT(error == 0 || error == ENOENT); 1246168404Spjd } else { /* indirect write */ 1247168404Spjd /* 1248168404Spjd * Have to lock the whole block to ensure when it's 1249168404Spjd * written out and it's checksum is being calculated 1250168404Spjd * that no one can change the data. We need to re-check 1251168404Spjd * blocksize after we get the lock in case it's changed! 1252168404Spjd */ 1253168404Spjd for (;;) { 1254219089Spjd uint64_t blkoff; 1255219089Spjd size = zp->z_blksz; 1256219089Spjd blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1257219089Spjd offset -= blkoff; 1258219089Spjd zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1259219089Spjd RL_READER); 1260219089Spjd if (zp->z_blksz == size) 1261168404Spjd break; 1262219089Spjd offset += blkoff; 1263219089Spjd zfs_range_unlock(zgd->zgd_rl); 1264168404Spjd } 1265168404Spjd /* test for truncation needs to be done while range locked */ 1266219089Spjd if (lr->lr_offset >= zp->z_size) 1267249195Smm error = SET_ERROR(ENOENT); 1268214378Smm#ifdef DEBUG 1269214378Smm if (zil_fault_io) { 1270249195Smm error = SET_ERROR(EIO); 1271214378Smm zil_fault_io = 0; 1272214378Smm } 1273214378Smm#endif 1274219089Spjd if (error == 0) 1275219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1276219089Spjd DMU_READ_NO_PREFETCH); 1277214378Smm 1278209962Smm if (error == 0) { 1279243524Smm blkptr_t *obp = dmu_buf_get_blkptr(db); 1280243524Smm if (obp) { 1281243524Smm ASSERT(BP_IS_HOLE(bp)); 1282243524Smm *bp = *obp; 1283243524Smm } 1284243524Smm 1285219089Spjd zgd->zgd_db = db; 1286219089Spjd zgd->zgd_bp = bp; 1287219089Spjd 1288219089Spjd ASSERT(db->db_offset == offset); 1289219089Spjd ASSERT(db->db_size == size); 1290219089Spjd 1291219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1292219089Spjd zfs_get_done, zgd); 1293219089Spjd ASSERT(error || lr->lr_length <= zp->z_blksz); 1294219089Spjd 1295209962Smm /* 1296219089Spjd * On success, we need to wait for the write I/O 1297219089Spjd * initiated by dmu_sync() to complete before we can 1298219089Spjd * release this dbuf. We will finish everything up 1299219089Spjd * in the zfs_get_done() callback. 1300209962Smm */ 1301219089Spjd if (error == 0) 1302219089Spjd return (0); 1303209962Smm 1304219089Spjd if (error == EALREADY) { 1305219089Spjd lr->lr_common.lrc_txtype = TX_WRITE2; 1306219089Spjd error = 0; 1307219089Spjd } 1308209962Smm } 1309168404Spjd } 1310219089Spjd 1311219089Spjd zfs_get_done(zgd, error); 1312219089Spjd 1313168404Spjd return (error); 1314168404Spjd} 1315168404Spjd 1316168404Spjd/*ARGSUSED*/ 1317168404Spjdstatic int 1318185029Spjdzfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1319185029Spjd caller_context_t *ct) 1320168404Spjd{ 1321168404Spjd znode_t *zp = VTOZ(vp); 1322168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1323168404Spjd int error; 1324168404Spjd 1325168404Spjd ZFS_ENTER(zfsvfs); 1326185029Spjd ZFS_VERIFY_ZP(zp); 1327185029Spjd 1328185029Spjd if (flag & V_ACE_MASK) 1329185029Spjd error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1330185029Spjd else 1331185029Spjd error = zfs_zaccess_rwx(zp, mode, flag, cr); 1332185029Spjd 1333168404Spjd ZFS_EXIT(zfsvfs); 1334168404Spjd return (error); 1335168404Spjd} 1336168404Spjd 1337168404Spjd/* 1338211932Smm * If vnode is for a device return a specfs vnode instead. 1339211932Smm */ 1340211932Smmstatic int 1341211932Smmspecvp_check(vnode_t **vpp, cred_t *cr) 1342211932Smm{ 1343211932Smm int error = 0; 1344211932Smm 1345211932Smm if (IS_DEVVP(*vpp)) { 1346211932Smm struct vnode *svp; 1347211932Smm 1348211932Smm svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1349211932Smm VN_RELE(*vpp); 1350211932Smm if (svp == NULL) 1351249195Smm error = SET_ERROR(ENOSYS); 1352211932Smm *vpp = svp; 1353211932Smm } 1354211932Smm return (error); 1355211932Smm} 1356211932Smm 1357211932Smm 1358211932Smm/* 1359168404Spjd * Lookup an entry in a directory, or an extended attribute directory. 1360168404Spjd * If it exists, return a held vnode reference for it. 1361168404Spjd * 1362168404Spjd * IN: dvp - vnode of directory to search. 1363168404Spjd * nm - name of entry to lookup. 1364168404Spjd * pnp - full pathname to lookup [UNUSED]. 1365168404Spjd * flags - LOOKUP_XATTR set if looking for an attribute. 1366168404Spjd * rdir - root directory vnode [UNUSED]. 1367168404Spjd * cr - credentials of caller. 1368185029Spjd * ct - caller context 1369185029Spjd * direntflags - directory lookup flags 1370185029Spjd * realpnp - returned pathname. 1371168404Spjd * 1372168404Spjd * OUT: vpp - vnode of located entry, NULL if not found. 1373168404Spjd * 1374251631Sdelphij * RETURN: 0 on success, error code on failure. 1375168404Spjd * 1376168404Spjd * Timestamps: 1377168404Spjd * NA 1378168404Spjd */ 1379168404Spjd/* ARGSUSED */ 1380168962Spjdstatic int 1381168962Spjdzfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1382185029Spjd int nameiop, cred_t *cr, kthread_t *td, int flags) 1383168404Spjd{ 1384168962Spjd znode_t *zdp = VTOZ(dvp); 1385168962Spjd zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1386211932Smm int error = 0; 1387185029Spjd int *direntflags = NULL; 1388185029Spjd void *realpnp = NULL; 1389168404Spjd 1390211932Smm /* fast path */ 1391211932Smm if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 1392211932Smm 1393211932Smm if (dvp->v_type != VDIR) { 1394249195Smm return (SET_ERROR(ENOTDIR)); 1395219089Spjd } else if (zdp->z_sa_hdl == NULL) { 1396249195Smm return (SET_ERROR(EIO)); 1397211932Smm } 1398211932Smm 1399211932Smm if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1400211932Smm error = zfs_fastaccesschk_execute(zdp, cr); 1401211932Smm if (!error) { 1402211932Smm *vpp = dvp; 1403211932Smm VN_HOLD(*vpp); 1404211932Smm return (0); 1405211932Smm } 1406211932Smm return (error); 1407211932Smm } else { 1408211932Smm vnode_t *tvp = dnlc_lookup(dvp, nm); 1409211932Smm 1410211932Smm if (tvp) { 1411211932Smm error = zfs_fastaccesschk_execute(zdp, cr); 1412211932Smm if (error) { 1413211932Smm VN_RELE(tvp); 1414211932Smm return (error); 1415211932Smm } 1416211932Smm if (tvp == DNLC_NO_VNODE) { 1417211932Smm VN_RELE(tvp); 1418249195Smm return (SET_ERROR(ENOENT)); 1419211932Smm } else { 1420211932Smm *vpp = tvp; 1421211932Smm return (specvp_check(vpp, cr)); 1422211932Smm } 1423211932Smm } 1424211932Smm } 1425211932Smm } 1426211932Smm 1427211932Smm DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1428211932Smm 1429168404Spjd ZFS_ENTER(zfsvfs); 1430185029Spjd ZFS_VERIFY_ZP(zdp); 1431168404Spjd 1432168404Spjd *vpp = NULL; 1433168404Spjd 1434185029Spjd if (flags & LOOKUP_XATTR) { 1435168404Spjd#ifdef TODO 1436168404Spjd /* 1437168404Spjd * If the xattr property is off, refuse the lookup request. 1438168404Spjd */ 1439168404Spjd if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1440168404Spjd ZFS_EXIT(zfsvfs); 1441249195Smm return (SET_ERROR(EINVAL)); 1442168404Spjd } 1443185029Spjd#endif 1444168404Spjd 1445168404Spjd /* 1446168404Spjd * We don't allow recursive attributes.. 1447168404Spjd * Maybe someday we will. 1448168404Spjd */ 1449219089Spjd if (zdp->z_pflags & ZFS_XATTR) { 1450168404Spjd ZFS_EXIT(zfsvfs); 1451249195Smm return (SET_ERROR(EINVAL)); 1452168404Spjd } 1453168404Spjd 1454168404Spjd if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1455168404Spjd ZFS_EXIT(zfsvfs); 1456168404Spjd return (error); 1457168404Spjd } 1458168404Spjd 1459168404Spjd /* 1460168404Spjd * Do we have permission to get into attribute directory? 1461168404Spjd */ 1462168404Spjd 1463185029Spjd if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1464185029Spjd B_FALSE, cr)) { 1465168404Spjd VN_RELE(*vpp); 1466185029Spjd *vpp = NULL; 1467168404Spjd } 1468168404Spjd 1469168404Spjd ZFS_EXIT(zfsvfs); 1470168404Spjd return (error); 1471168404Spjd } 1472168404Spjd 1473168404Spjd if (dvp->v_type != VDIR) { 1474168404Spjd ZFS_EXIT(zfsvfs); 1475249195Smm return (SET_ERROR(ENOTDIR)); 1476168404Spjd } 1477168404Spjd 1478168404Spjd /* 1479168404Spjd * Check accessibility of directory. 1480168404Spjd */ 1481168404Spjd 1482185029Spjd if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1483168404Spjd ZFS_EXIT(zfsvfs); 1484168404Spjd return (error); 1485168404Spjd } 1486168404Spjd 1487185029Spjd if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1488185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1489185029Spjd ZFS_EXIT(zfsvfs); 1490249195Smm return (SET_ERROR(EILSEQ)); 1491185029Spjd } 1492168404Spjd 1493185029Spjd error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1494211932Smm if (error == 0) 1495211932Smm error = specvp_check(vpp, cr); 1496168962Spjd 1497168404Spjd /* Translate errors and add SAVENAME when needed. */ 1498168404Spjd if (cnp->cn_flags & ISLASTCN) { 1499168404Spjd switch (nameiop) { 1500168404Spjd case CREATE: 1501168404Spjd case RENAME: 1502168404Spjd if (error == ENOENT) { 1503168404Spjd error = EJUSTRETURN; 1504168404Spjd cnp->cn_flags |= SAVENAME; 1505168404Spjd break; 1506168404Spjd } 1507168404Spjd /* FALLTHROUGH */ 1508168404Spjd case DELETE: 1509168404Spjd if (error == 0) 1510168404Spjd cnp->cn_flags |= SAVENAME; 1511168404Spjd break; 1512168404Spjd } 1513168404Spjd } 1514168404Spjd if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) { 1515169198Spjd int ltype = 0; 1516169198Spjd 1517169198Spjd if (cnp->cn_flags & ISDOTDOT) { 1518176559Sattilio ltype = VOP_ISLOCKED(dvp); 1519175294Sattilio VOP_UNLOCK(dvp, 0); 1520169198Spjd } 1521206667Spjd ZFS_EXIT(zfsvfs); 1522219089Spjd error = zfs_vnode_lock(*vpp, cnp->cn_lkflags); 1523168962Spjd if (cnp->cn_flags & ISDOTDOT) 1524175202Sattilio vn_lock(dvp, ltype | LK_RETRY); 1525169172Spjd if (error != 0) { 1526169172Spjd VN_RELE(*vpp); 1527169172Spjd *vpp = NULL; 1528169172Spjd return (error); 1529169172Spjd } 1530206667Spjd } else { 1531206667Spjd ZFS_EXIT(zfsvfs); 1532168404Spjd } 1533168404Spjd 1534168404Spjd#ifdef FREEBSD_NAMECACHE 1535168404Spjd /* 1536168404Spjd * Insert name into cache (as non-existent) if appropriate. 1537168404Spjd */ 1538168404Spjd if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 1539168404Spjd cache_enter(dvp, *vpp, cnp); 1540169170Spjd /* 1541169170Spjd * Insert name into cache if appropriate. 1542169170Spjd */ 1543168404Spjd if (error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1544168404Spjd if (!(cnp->cn_flags & ISLASTCN) || 1545168404Spjd (nameiop != DELETE && nameiop != RENAME)) { 1546168404Spjd cache_enter(dvp, *vpp, cnp); 1547168404Spjd } 1548168404Spjd } 1549168404Spjd#endif 1550168404Spjd 1551168404Spjd return (error); 1552168404Spjd} 1553168404Spjd 1554168404Spjd/* 1555168404Spjd * Attempt to create a new entry in a directory. If the entry 1556168404Spjd * already exists, truncate the file if permissible, else return 1557168404Spjd * an error. Return the vp of the created or trunc'd file. 1558168404Spjd * 1559168404Spjd * IN: dvp - vnode of directory to put new file entry in. 1560168404Spjd * name - name of new file entry. 1561168404Spjd * vap - attributes of new file. 1562168404Spjd * excl - flag indicating exclusive or non-exclusive mode. 1563168404Spjd * mode - mode to open file with. 1564168404Spjd * cr - credentials of caller. 1565168404Spjd * flag - large file flag [UNUSED]. 1566185029Spjd * ct - caller context 1567185029Spjd * vsecp - ACL to be set 1568168404Spjd * 1569168404Spjd * OUT: vpp - vnode of created or trunc'd entry. 1570168404Spjd * 1571251631Sdelphij * RETURN: 0 on success, error code on failure. 1572168404Spjd * 1573168404Spjd * Timestamps: 1574168404Spjd * dvp - ctime|mtime updated if new entry created 1575168404Spjd * vp - ctime|mtime always, atime if new 1576168404Spjd */ 1577185029Spjd 1578168404Spjd/* ARGSUSED */ 1579168404Spjdstatic int 1580168962Spjdzfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 1581185029Spjd vnode_t **vpp, cred_t *cr, kthread_t *td) 1582168404Spjd{ 1583168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1584168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1585185029Spjd zilog_t *zilog; 1586185029Spjd objset_t *os; 1587168404Spjd zfs_dirlock_t *dl; 1588168404Spjd dmu_tx_t *tx; 1589168404Spjd int error; 1590209962Smm ksid_t *ksid; 1591209962Smm uid_t uid; 1592209962Smm gid_t gid = crgetgid(cr); 1593219089Spjd zfs_acl_ids_t acl_ids; 1594209962Smm boolean_t fuid_dirtied; 1595219089Spjd boolean_t have_acl = B_FALSE; 1596185029Spjd void *vsecp = NULL; 1597185029Spjd int flag = 0; 1598168404Spjd 1599185029Spjd /* 1600185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 1601185029Spjd * make sure file system is at proper version 1602185029Spjd */ 1603185029Spjd 1604209962Smm ksid = crgetsid(cr, KSID_OWNER); 1605209962Smm if (ksid) 1606209962Smm uid = ksid_getid(ksid); 1607209962Smm else 1608209962Smm uid = crgetuid(cr); 1609219089Spjd 1610185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 1611185029Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 1612219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1613249195Smm return (SET_ERROR(EINVAL)); 1614185029Spjd 1615168404Spjd ZFS_ENTER(zfsvfs); 1616185029Spjd ZFS_VERIFY_ZP(dzp); 1617185029Spjd os = zfsvfs->z_os; 1618185029Spjd zilog = zfsvfs->z_log; 1619168404Spjd 1620185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1621185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1622185029Spjd ZFS_EXIT(zfsvfs); 1623249195Smm return (SET_ERROR(EILSEQ)); 1624185029Spjd } 1625185029Spjd 1626185029Spjd if (vap->va_mask & AT_XVATTR) { 1627197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 1628185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 1629185029Spjd ZFS_EXIT(zfsvfs); 1630185029Spjd return (error); 1631185029Spjd } 1632185029Spjd } 1633168404Spjdtop: 1634168404Spjd *vpp = NULL; 1635168404Spjd 1636182905Strasz if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1637182905Strasz vap->va_mode &= ~S_ISVTX; 1638168404Spjd 1639168404Spjd if (*name == '\0') { 1640168404Spjd /* 1641168404Spjd * Null component name refers to the directory itself. 1642168404Spjd */ 1643168404Spjd VN_HOLD(dvp); 1644168404Spjd zp = dzp; 1645168404Spjd dl = NULL; 1646168404Spjd error = 0; 1647168404Spjd } else { 1648168404Spjd /* possible VN_HOLD(zp) */ 1649185029Spjd int zflg = 0; 1650185029Spjd 1651185029Spjd if (flag & FIGNORECASE) 1652185029Spjd zflg |= ZCILOOK; 1653185029Spjd 1654185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1655185029Spjd NULL, NULL); 1656185029Spjd if (error) { 1657219089Spjd if (have_acl) 1658219089Spjd zfs_acl_ids_free(&acl_ids); 1659168404Spjd if (strcmp(name, "..") == 0) 1660249195Smm error = SET_ERROR(EISDIR); 1661168404Spjd ZFS_EXIT(zfsvfs); 1662168404Spjd return (error); 1663168404Spjd } 1664168404Spjd } 1665219089Spjd 1666185029Spjd if (zp == NULL) { 1667185029Spjd uint64_t txtype; 1668168404Spjd 1669168404Spjd /* 1670168404Spjd * Create a new file object and update the directory 1671168404Spjd * to reference it. 1672168404Spjd */ 1673185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1674219089Spjd if (have_acl) 1675219089Spjd zfs_acl_ids_free(&acl_ids); 1676168404Spjd goto out; 1677168404Spjd } 1678168404Spjd 1679168404Spjd /* 1680168404Spjd * We only support the creation of regular files in 1681168404Spjd * extended attribute directories. 1682168404Spjd */ 1683219089Spjd 1684219089Spjd if ((dzp->z_pflags & ZFS_XATTR) && 1685168404Spjd (vap->va_type != VREG)) { 1686219089Spjd if (have_acl) 1687219089Spjd zfs_acl_ids_free(&acl_ids); 1688249195Smm error = SET_ERROR(EINVAL); 1689168404Spjd goto out; 1690168404Spjd } 1691168404Spjd 1692219089Spjd if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, 1693219089Spjd cr, vsecp, &acl_ids)) != 0) 1694219089Spjd goto out; 1695219089Spjd have_acl = B_TRUE; 1696209962Smm 1697209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1698211932Smm zfs_acl_ids_free(&acl_ids); 1699249195Smm error = SET_ERROR(EDQUOT); 1700209962Smm goto out; 1701209962Smm } 1702209962Smm 1703168404Spjd tx = dmu_tx_create(os); 1704219089Spjd 1705219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1706219089Spjd ZFS_SA_BASE_ATTR_SIZE); 1707219089Spjd 1708209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 1709209962Smm if (fuid_dirtied) 1710209962Smm zfs_fuid_txhold(zfsvfs, tx); 1711168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1712219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 1713219089Spjd if (!zfsvfs->z_use_sa && 1714219089Spjd acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1715168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1716219089Spjd 0, acl_ids.z_aclp->z_acl_bytes); 1717185029Spjd } 1718209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 1719168404Spjd if (error) { 1720168404Spjd zfs_dirent_unlock(dl); 1721209962Smm if (error == ERESTART) { 1722168404Spjd dmu_tx_wait(tx); 1723168404Spjd dmu_tx_abort(tx); 1724168404Spjd goto top; 1725168404Spjd } 1726219089Spjd zfs_acl_ids_free(&acl_ids); 1727168404Spjd dmu_tx_abort(tx); 1728168404Spjd ZFS_EXIT(zfsvfs); 1729168404Spjd return (error); 1730168404Spjd } 1731219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1732209962Smm 1733209962Smm if (fuid_dirtied) 1734209962Smm zfs_fuid_sync(zfsvfs, tx); 1735209962Smm 1736168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 1737185029Spjd txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1738185029Spjd if (flag & FIGNORECASE) 1739185029Spjd txtype |= TX_CI; 1740185029Spjd zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1741209962Smm vsecp, acl_ids.z_fuidp, vap); 1742209962Smm zfs_acl_ids_free(&acl_ids); 1743168404Spjd dmu_tx_commit(tx); 1744168404Spjd } else { 1745185029Spjd int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1746185029Spjd 1747219089Spjd if (have_acl) 1748219089Spjd zfs_acl_ids_free(&acl_ids); 1749219089Spjd have_acl = B_FALSE; 1750219089Spjd 1751168404Spjd /* 1752168404Spjd * A directory entry already exists for this name. 1753168404Spjd */ 1754168404Spjd /* 1755168962Spjd * Can't truncate an existing file if in exclusive mode. 1756168962Spjd */ 1757168962Spjd if (excl == EXCL) { 1758249195Smm error = SET_ERROR(EEXIST); 1759168962Spjd goto out; 1760168962Spjd } 1761168962Spjd /* 1762168404Spjd * Can't open a directory for writing. 1763168404Spjd */ 1764168404Spjd if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1765249195Smm error = SET_ERROR(EISDIR); 1766168404Spjd goto out; 1767168404Spjd } 1768168404Spjd /* 1769168404Spjd * Verify requested access to file. 1770168404Spjd */ 1771185029Spjd if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1772168404Spjd goto out; 1773168404Spjd } 1774168404Spjd 1775168404Spjd mutex_enter(&dzp->z_lock); 1776168404Spjd dzp->z_seq++; 1777168404Spjd mutex_exit(&dzp->z_lock); 1778168404Spjd 1779168404Spjd /* 1780168404Spjd * Truncate regular files if requested. 1781168404Spjd */ 1782168404Spjd if ((ZTOV(zp)->v_type == VREG) && 1783168404Spjd (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1784185029Spjd /* we can't hold any locks when calling zfs_freesp() */ 1785185029Spjd zfs_dirent_unlock(dl); 1786185029Spjd dl = NULL; 1787168404Spjd error = zfs_freesp(zp, 0, 0, mode, TRUE); 1788185029Spjd if (error == 0) { 1789185029Spjd vnevent_create(ZTOV(zp), ct); 1790168404Spjd } 1791168404Spjd } 1792168404Spjd } 1793168404Spjdout: 1794168404Spjd if (dl) 1795168404Spjd zfs_dirent_unlock(dl); 1796168404Spjd 1797168404Spjd if (error) { 1798168404Spjd if (zp) 1799168404Spjd VN_RELE(ZTOV(zp)); 1800168962Spjd } else { 1801168962Spjd *vpp = ZTOV(zp); 1802211932Smm error = specvp_check(vpp, cr); 1803168404Spjd } 1804168404Spjd 1805219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1806219089Spjd zil_commit(zilog, 0); 1807219089Spjd 1808168404Spjd ZFS_EXIT(zfsvfs); 1809168404Spjd return (error); 1810168404Spjd} 1811168404Spjd 1812168404Spjd/* 1813168404Spjd * Remove an entry from a directory. 1814168404Spjd * 1815168404Spjd * IN: dvp - vnode of directory to remove entry from. 1816168404Spjd * name - name of entry to remove. 1817168404Spjd * cr - credentials of caller. 1818185029Spjd * ct - caller context 1819185029Spjd * flags - case flags 1820168404Spjd * 1821251631Sdelphij * RETURN: 0 on success, error code on failure. 1822168404Spjd * 1823168404Spjd * Timestamps: 1824168404Spjd * dvp - ctime|mtime 1825168404Spjd * vp - ctime (if nlink > 0) 1826168404Spjd */ 1827219089Spjd 1828219089Spjduint64_t null_xattr = 0; 1829219089Spjd 1830185029Spjd/*ARGSUSED*/ 1831168404Spjdstatic int 1832185029Spjdzfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1833185029Spjd int flags) 1834168404Spjd{ 1835168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 1836219089Spjd znode_t *xzp; 1837168404Spjd vnode_t *vp; 1838168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1839185029Spjd zilog_t *zilog; 1840168962Spjd uint64_t acl_obj, xattr_obj; 1841219089Spjd uint64_t xattr_obj_unlinked = 0; 1842219089Spjd uint64_t obj = 0; 1843168404Spjd zfs_dirlock_t *dl; 1844168404Spjd dmu_tx_t *tx; 1845168962Spjd boolean_t may_delete_now, delete_now = FALSE; 1846185029Spjd boolean_t unlinked, toobig = FALSE; 1847185029Spjd uint64_t txtype; 1848185029Spjd pathname_t *realnmp = NULL; 1849185029Spjd pathname_t realnm; 1850168404Spjd int error; 1851185029Spjd int zflg = ZEXISTS; 1852168404Spjd 1853168404Spjd ZFS_ENTER(zfsvfs); 1854185029Spjd ZFS_VERIFY_ZP(dzp); 1855185029Spjd zilog = zfsvfs->z_log; 1856168404Spjd 1857185029Spjd if (flags & FIGNORECASE) { 1858185029Spjd zflg |= ZCILOOK; 1859185029Spjd pn_alloc(&realnm); 1860185029Spjd realnmp = &realnm; 1861185029Spjd } 1862185029Spjd 1863168404Spjdtop: 1864219089Spjd xattr_obj = 0; 1865219089Spjd xzp = NULL; 1866168404Spjd /* 1867168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 1868168404Spjd */ 1869185029Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1870185029Spjd NULL, realnmp)) { 1871185029Spjd if (realnmp) 1872185029Spjd pn_free(realnmp); 1873168404Spjd ZFS_EXIT(zfsvfs); 1874168404Spjd return (error); 1875168404Spjd } 1876168404Spjd 1877168404Spjd vp = ZTOV(zp); 1878168404Spjd 1879168962Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1880168404Spjd goto out; 1881168962Spjd } 1882168404Spjd 1883168962Spjd /* 1884168962Spjd * Need to use rmdir for removing directories. 1885168962Spjd */ 1886168962Spjd if (vp->v_type == VDIR) { 1887249195Smm error = SET_ERROR(EPERM); 1888168962Spjd goto out; 1889168962Spjd } 1890168962Spjd 1891185029Spjd vnevent_remove(vp, dvp, name, ct); 1892168962Spjd 1893185029Spjd if (realnmp) 1894185029Spjd dnlc_remove(dvp, realnmp->pn_buf); 1895185029Spjd else 1896185029Spjd dnlc_remove(dvp, name); 1897168404Spjd 1898219089Spjd VI_LOCK(vp); 1899219089Spjd may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); 1900219089Spjd VI_UNLOCK(vp); 1901168962Spjd 1902168404Spjd /* 1903168404Spjd * We may delete the znode now, or we may put it in the unlinked set; 1904168404Spjd * it depends on whether we're the last link, and on whether there are 1905168404Spjd * other holds on the vnode. So we dmu_tx_hold() the right things to 1906168404Spjd * allow for either case. 1907168404Spjd */ 1908219089Spjd obj = zp->z_id; 1909168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 1910168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1911219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1912219089Spjd zfs_sa_upgrade_txholds(tx, zp); 1913219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 1914185029Spjd if (may_delete_now) { 1915185029Spjd toobig = 1916219089Spjd zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1917185029Spjd /* if the file is too big, only hold_free a token amount */ 1918185029Spjd dmu_tx_hold_free(tx, zp->z_id, 0, 1919185029Spjd (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1920185029Spjd } 1921168404Spjd 1922168404Spjd /* are there any extended attributes? */ 1923219089Spjd error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1924219089Spjd &xattr_obj, sizeof (xattr_obj)); 1925219089Spjd if (error == 0 && xattr_obj) { 1926219089Spjd error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1927240415Smm ASSERT0(error); 1928219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 1929219089Spjd dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1930168404Spjd } 1931168404Spjd 1932219089Spjd mutex_enter(&zp->z_lock); 1933219089Spjd if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) 1934168962Spjd dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 1935219089Spjd mutex_exit(&zp->z_lock); 1936168962Spjd 1937168404Spjd /* charge as an update -- would be nice not to charge at all */ 1938168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1939168404Spjd 1940209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 1941168404Spjd if (error) { 1942168404Spjd zfs_dirent_unlock(dl); 1943168962Spjd VN_RELE(vp); 1944219089Spjd if (xzp) 1945219089Spjd VN_RELE(ZTOV(xzp)); 1946209962Smm if (error == ERESTART) { 1947168404Spjd dmu_tx_wait(tx); 1948168404Spjd dmu_tx_abort(tx); 1949168404Spjd goto top; 1950168404Spjd } 1951185029Spjd if (realnmp) 1952185029Spjd pn_free(realnmp); 1953168404Spjd dmu_tx_abort(tx); 1954168404Spjd ZFS_EXIT(zfsvfs); 1955168404Spjd return (error); 1956168404Spjd } 1957168404Spjd 1958168404Spjd /* 1959168404Spjd * Remove the directory entry. 1960168404Spjd */ 1961185029Spjd error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1962168404Spjd 1963168404Spjd if (error) { 1964168404Spjd dmu_tx_commit(tx); 1965168404Spjd goto out; 1966168404Spjd } 1967168404Spjd 1968219089Spjd if (unlinked) { 1969219089Spjd 1970219089Spjd /* 1971219089Spjd * Hold z_lock so that we can make sure that the ACL obj 1972219089Spjd * hasn't changed. Could have been deleted due to 1973219089Spjd * zfs_sa_upgrade(). 1974219089Spjd */ 1975219089Spjd mutex_enter(&zp->z_lock); 1976168962Spjd VI_LOCK(vp); 1977219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1978219089Spjd &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); 1979185029Spjd delete_now = may_delete_now && !toobig && 1980168962Spjd vp->v_count == 1 && !vn_has_cached_data(vp) && 1981219089Spjd xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == 1982219089Spjd acl_obj; 1983168962Spjd VI_UNLOCK(vp); 1984168962Spjd } 1985168962Spjd 1986168962Spjd if (delete_now) { 1987243270Savg#ifdef __FreeBSD__ 1988243270Savg panic("zfs_remove: delete_now branch taken"); 1989243270Savg#endif 1990219089Spjd if (xattr_obj_unlinked) { 1991219089Spjd ASSERT3U(xzp->z_links, ==, 2); 1992168962Spjd mutex_enter(&xzp->z_lock); 1993168962Spjd xzp->z_unlinked = 1; 1994219089Spjd xzp->z_links = 0; 1995219089Spjd error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 1996219089Spjd &xzp->z_links, sizeof (xzp->z_links), tx); 1997219089Spjd ASSERT3U(error, ==, 0); 1998168962Spjd mutex_exit(&xzp->z_lock); 1999168962Spjd zfs_unlinked_add(xzp, tx); 2000219089Spjd 2001219089Spjd if (zp->z_is_sa) 2002219089Spjd error = sa_remove(zp->z_sa_hdl, 2003219089Spjd SA_ZPL_XATTR(zfsvfs), tx); 2004219089Spjd else 2005219089Spjd error = sa_update(zp->z_sa_hdl, 2006219089Spjd SA_ZPL_XATTR(zfsvfs), &null_xattr, 2007219089Spjd sizeof (uint64_t), tx); 2008240415Smm ASSERT0(error); 2009168962Spjd } 2010168962Spjd VI_LOCK(vp); 2011168962Spjd vp->v_count--; 2012240415Smm ASSERT0(vp->v_count); 2013168962Spjd VI_UNLOCK(vp); 2014168962Spjd mutex_exit(&zp->z_lock); 2015168962Spjd zfs_znode_delete(zp, tx); 2016168962Spjd } else if (unlinked) { 2017219089Spjd mutex_exit(&zp->z_lock); 2018168404Spjd zfs_unlinked_add(zp, tx); 2019243268Savg#ifdef __FreeBSD__ 2020243268Savg vp->v_vflag |= VV_NOSYNC; 2021243268Savg#endif 2022168962Spjd } 2023168404Spjd 2024185029Spjd txtype = TX_REMOVE; 2025185029Spjd if (flags & FIGNORECASE) 2026185029Spjd txtype |= TX_CI; 2027219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 2028168404Spjd 2029168404Spjd dmu_tx_commit(tx); 2030168404Spjdout: 2031185029Spjd if (realnmp) 2032185029Spjd pn_free(realnmp); 2033185029Spjd 2034168404Spjd zfs_dirent_unlock(dl); 2035168404Spjd 2036219089Spjd if (!delete_now) 2037168962Spjd VN_RELE(vp); 2038219089Spjd if (xzp) 2039168962Spjd VN_RELE(ZTOV(xzp)); 2040168962Spjd 2041219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2042219089Spjd zil_commit(zilog, 0); 2043219089Spjd 2044168404Spjd ZFS_EXIT(zfsvfs); 2045168404Spjd return (error); 2046168404Spjd} 2047168404Spjd 2048168404Spjd/* 2049168404Spjd * Create a new directory and insert it into dvp using the name 2050168404Spjd * provided. Return a pointer to the inserted directory. 2051168404Spjd * 2052168404Spjd * IN: dvp - vnode of directory to add subdir to. 2053168404Spjd * dirname - name of new directory. 2054168404Spjd * vap - attributes of new directory. 2055168404Spjd * cr - credentials of caller. 2056185029Spjd * ct - caller context 2057251631Sdelphij * flags - case flags 2058185029Spjd * vsecp - ACL to be set 2059168404Spjd * 2060168404Spjd * OUT: vpp - vnode of created directory. 2061168404Spjd * 2062251631Sdelphij * RETURN: 0 on success, error code on failure. 2063168404Spjd * 2064168404Spjd * Timestamps: 2065168404Spjd * dvp - ctime|mtime updated 2066168404Spjd * vp - ctime|mtime|atime updated 2067168404Spjd */ 2068185029Spjd/*ARGSUSED*/ 2069168404Spjdstatic int 2070185029Spjdzfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 2071185029Spjd caller_context_t *ct, int flags, vsecattr_t *vsecp) 2072168404Spjd{ 2073168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 2074168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2075185029Spjd zilog_t *zilog; 2076168404Spjd zfs_dirlock_t *dl; 2077185029Spjd uint64_t txtype; 2078168404Spjd dmu_tx_t *tx; 2079168404Spjd int error; 2080185029Spjd int zf = ZNEW; 2081209962Smm ksid_t *ksid; 2082209962Smm uid_t uid; 2083209962Smm gid_t gid = crgetgid(cr); 2084219089Spjd zfs_acl_ids_t acl_ids; 2085209962Smm boolean_t fuid_dirtied; 2086168404Spjd 2087168404Spjd ASSERT(vap->va_type == VDIR); 2088168404Spjd 2089185029Spjd /* 2090185029Spjd * If we have an ephemeral id, ACL, or XVATTR then 2091185029Spjd * make sure file system is at proper version 2092185029Spjd */ 2093185029Spjd 2094209962Smm ksid = crgetsid(cr, KSID_OWNER); 2095209962Smm if (ksid) 2096209962Smm uid = ksid_getid(ksid); 2097209962Smm else 2098209962Smm uid = crgetuid(cr); 2099185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2100219089Spjd (vsecp || (vap->va_mask & AT_XVATTR) || 2101219089Spjd IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2102249195Smm return (SET_ERROR(EINVAL)); 2103185029Spjd 2104168404Spjd ZFS_ENTER(zfsvfs); 2105185029Spjd ZFS_VERIFY_ZP(dzp); 2106185029Spjd zilog = zfsvfs->z_log; 2107168404Spjd 2108219089Spjd if (dzp->z_pflags & ZFS_XATTR) { 2109168404Spjd ZFS_EXIT(zfsvfs); 2110249195Smm return (SET_ERROR(EINVAL)); 2111168404Spjd } 2112168404Spjd 2113185029Spjd if (zfsvfs->z_utf8 && u8_validate(dirname, 2114185029Spjd strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2115185029Spjd ZFS_EXIT(zfsvfs); 2116249195Smm return (SET_ERROR(EILSEQ)); 2117185029Spjd } 2118185029Spjd if (flags & FIGNORECASE) 2119185029Spjd zf |= ZCILOOK; 2120185029Spjd 2121219089Spjd if (vap->va_mask & AT_XVATTR) { 2122197861Spjd if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2123185029Spjd crgetuid(cr), cr, vap->va_type)) != 0) { 2124185029Spjd ZFS_EXIT(zfsvfs); 2125185029Spjd return (error); 2126185029Spjd } 2127219089Spjd } 2128185029Spjd 2129219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 2130219089Spjd vsecp, &acl_ids)) != 0) { 2131219089Spjd ZFS_EXIT(zfsvfs); 2132219089Spjd return (error); 2133219089Spjd } 2134168404Spjd /* 2135168404Spjd * First make sure the new directory doesn't exist. 2136219089Spjd * 2137219089Spjd * Existence is checked first to make sure we don't return 2138219089Spjd * EACCES instead of EEXIST which can cause some applications 2139219089Spjd * to fail. 2140168404Spjd */ 2141185029Spjdtop: 2142185029Spjd *vpp = NULL; 2143185029Spjd 2144185029Spjd if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 2145185029Spjd NULL, NULL)) { 2146219089Spjd zfs_acl_ids_free(&acl_ids); 2147168404Spjd ZFS_EXIT(zfsvfs); 2148168404Spjd return (error); 2149168404Spjd } 2150168404Spjd 2151185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 2152219089Spjd zfs_acl_ids_free(&acl_ids); 2153168404Spjd zfs_dirent_unlock(dl); 2154168404Spjd ZFS_EXIT(zfsvfs); 2155168404Spjd return (error); 2156168404Spjd } 2157168404Spjd 2158209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2159211932Smm zfs_acl_ids_free(&acl_ids); 2160209962Smm zfs_dirent_unlock(dl); 2161209962Smm ZFS_EXIT(zfsvfs); 2162249195Smm return (SET_ERROR(EDQUOT)); 2163209962Smm } 2164209962Smm 2165168404Spjd /* 2166168404Spjd * Add a new entry to the directory. 2167168404Spjd */ 2168168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2169168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 2170168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2171209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 2172209962Smm if (fuid_dirtied) 2173209962Smm zfs_fuid_txhold(zfsvfs, tx); 2174219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2175219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2176219089Spjd acl_ids.z_aclp->z_acl_bytes); 2177219089Spjd } 2178219089Spjd 2179219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2180219089Spjd ZFS_SA_BASE_ATTR_SIZE); 2181219089Spjd 2182209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 2183168404Spjd if (error) { 2184168404Spjd zfs_dirent_unlock(dl); 2185209962Smm if (error == ERESTART) { 2186168404Spjd dmu_tx_wait(tx); 2187168404Spjd dmu_tx_abort(tx); 2188168404Spjd goto top; 2189168404Spjd } 2190219089Spjd zfs_acl_ids_free(&acl_ids); 2191168404Spjd dmu_tx_abort(tx); 2192168404Spjd ZFS_EXIT(zfsvfs); 2193168404Spjd return (error); 2194168404Spjd } 2195168404Spjd 2196168404Spjd /* 2197168404Spjd * Create new node. 2198168404Spjd */ 2199219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2200168404Spjd 2201209962Smm if (fuid_dirtied) 2202209962Smm zfs_fuid_sync(zfsvfs, tx); 2203219089Spjd 2204168404Spjd /* 2205168404Spjd * Now put new name in parent dir. 2206168404Spjd */ 2207168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 2208168404Spjd 2209168404Spjd *vpp = ZTOV(zp); 2210168404Spjd 2211185029Spjd txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 2212185029Spjd if (flags & FIGNORECASE) 2213185029Spjd txtype |= TX_CI; 2214209962Smm zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 2215209962Smm acl_ids.z_fuidp, vap); 2216185029Spjd 2217209962Smm zfs_acl_ids_free(&acl_ids); 2218219089Spjd 2219168404Spjd dmu_tx_commit(tx); 2220168404Spjd 2221168404Spjd zfs_dirent_unlock(dl); 2222168404Spjd 2223219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2224219089Spjd zil_commit(zilog, 0); 2225219089Spjd 2226168404Spjd ZFS_EXIT(zfsvfs); 2227168404Spjd return (0); 2228168404Spjd} 2229168404Spjd 2230168404Spjd/* 2231168404Spjd * Remove a directory subdir entry. If the current working 2232168404Spjd * directory is the same as the subdir to be removed, the 2233168404Spjd * remove will fail. 2234168404Spjd * 2235168404Spjd * IN: dvp - vnode of directory to remove from. 2236168404Spjd * name - name of directory to be removed. 2237168404Spjd * cwd - vnode of current working directory. 2238168404Spjd * cr - credentials of caller. 2239185029Spjd * ct - caller context 2240185029Spjd * flags - case flags 2241168404Spjd * 2242251631Sdelphij * RETURN: 0 on success, error code on failure. 2243168404Spjd * 2244168404Spjd * Timestamps: 2245168404Spjd * dvp - ctime|mtime updated 2246168404Spjd */ 2247185029Spjd/*ARGSUSED*/ 2248168404Spjdstatic int 2249185029Spjdzfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 2250185029Spjd caller_context_t *ct, int flags) 2251168404Spjd{ 2252168404Spjd znode_t *dzp = VTOZ(dvp); 2253168404Spjd znode_t *zp; 2254168404Spjd vnode_t *vp; 2255168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2256185029Spjd zilog_t *zilog; 2257168404Spjd zfs_dirlock_t *dl; 2258168404Spjd dmu_tx_t *tx; 2259168404Spjd int error; 2260185029Spjd int zflg = ZEXISTS; 2261168404Spjd 2262168962Spjd ZFS_ENTER(zfsvfs); 2263185029Spjd ZFS_VERIFY_ZP(dzp); 2264185029Spjd zilog = zfsvfs->z_log; 2265168404Spjd 2266185029Spjd if (flags & FIGNORECASE) 2267185029Spjd zflg |= ZCILOOK; 2268168404Spjdtop: 2269168404Spjd zp = NULL; 2270168404Spjd 2271168404Spjd /* 2272168404Spjd * Attempt to lock directory; fail if entry doesn't exist. 2273168404Spjd */ 2274185029Spjd if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 2275185029Spjd NULL, NULL)) { 2276168404Spjd ZFS_EXIT(zfsvfs); 2277168404Spjd return (error); 2278168404Spjd } 2279168404Spjd 2280168404Spjd vp = ZTOV(zp); 2281168404Spjd 2282168404Spjd if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2283168404Spjd goto out; 2284168404Spjd } 2285168404Spjd 2286168962Spjd if (vp->v_type != VDIR) { 2287249195Smm error = SET_ERROR(ENOTDIR); 2288168962Spjd goto out; 2289168962Spjd } 2290168962Spjd 2291168962Spjd if (vp == cwd) { 2292249195Smm error = SET_ERROR(EINVAL); 2293168962Spjd goto out; 2294168962Spjd } 2295168962Spjd 2296185029Spjd vnevent_rmdir(vp, dvp, name, ct); 2297168962Spjd 2298168404Spjd /* 2299168404Spjd * Grab a lock on the directory to make sure that noone is 2300168404Spjd * trying to add (or lookup) entries while we are removing it. 2301168404Spjd */ 2302168404Spjd rw_enter(&zp->z_name_lock, RW_WRITER); 2303168404Spjd 2304168404Spjd /* 2305168404Spjd * Grab a lock on the parent pointer to make sure we play well 2306168404Spjd * with the treewalk and directory rename code. 2307168404Spjd */ 2308168404Spjd rw_enter(&zp->z_parent_lock, RW_WRITER); 2309168404Spjd 2310168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 2311168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2312219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2313168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2314219089Spjd zfs_sa_upgrade_txholds(tx, zp); 2315219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 2316209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 2317168404Spjd if (error) { 2318168404Spjd rw_exit(&zp->z_parent_lock); 2319168404Spjd rw_exit(&zp->z_name_lock); 2320168404Spjd zfs_dirent_unlock(dl); 2321168962Spjd VN_RELE(vp); 2322209962Smm if (error == ERESTART) { 2323168404Spjd dmu_tx_wait(tx); 2324168404Spjd dmu_tx_abort(tx); 2325168404Spjd goto top; 2326168404Spjd } 2327168404Spjd dmu_tx_abort(tx); 2328168404Spjd ZFS_EXIT(zfsvfs); 2329168404Spjd return (error); 2330168404Spjd } 2331168404Spjd 2332168404Spjd#ifdef FREEBSD_NAMECACHE 2333168404Spjd cache_purge(dvp); 2334168404Spjd#endif 2335168404Spjd 2336185029Spjd error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 2337168404Spjd 2338185029Spjd if (error == 0) { 2339185029Spjd uint64_t txtype = TX_RMDIR; 2340185029Spjd if (flags & FIGNORECASE) 2341185029Spjd txtype |= TX_CI; 2342219089Spjd zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2343185029Spjd } 2344168404Spjd 2345168404Spjd dmu_tx_commit(tx); 2346168404Spjd 2347168404Spjd rw_exit(&zp->z_parent_lock); 2348168404Spjd rw_exit(&zp->z_name_lock); 2349168404Spjd#ifdef FREEBSD_NAMECACHE 2350168404Spjd cache_purge(vp); 2351168404Spjd#endif 2352168404Spjdout: 2353168404Spjd zfs_dirent_unlock(dl); 2354168404Spjd 2355168962Spjd VN_RELE(vp); 2356168962Spjd 2357219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2358219089Spjd zil_commit(zilog, 0); 2359219089Spjd 2360168404Spjd ZFS_EXIT(zfsvfs); 2361168404Spjd return (error); 2362168404Spjd} 2363168404Spjd 2364168404Spjd/* 2365168404Spjd * Read as many directory entries as will fit into the provided 2366168404Spjd * buffer from the given directory cursor position (specified in 2367251631Sdelphij * the uio structure). 2368168404Spjd * 2369168404Spjd * IN: vp - vnode of directory to read. 2370168404Spjd * uio - structure supplying read location, range info, 2371168404Spjd * and return buffer. 2372168404Spjd * cr - credentials of caller. 2373185029Spjd * ct - caller context 2374185029Spjd * flags - case flags 2375168404Spjd * 2376168404Spjd * OUT: uio - updated offset and range, buffer filled. 2377168404Spjd * eofp - set to true if end-of-file detected. 2378168404Spjd * 2379251631Sdelphij * RETURN: 0 on success, error code on failure. 2380168404Spjd * 2381168404Spjd * Timestamps: 2382168404Spjd * vp - atime updated 2383168404Spjd * 2384168404Spjd * Note that the low 4 bits of the cookie returned by zap is always zero. 2385168404Spjd * This allows us to use the low range for "special" directory entries: 2386168404Spjd * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2387168404Spjd * we use the offset 2 for the '.zfs' directory. 2388168404Spjd */ 2389168404Spjd/* ARGSUSED */ 2390168404Spjdstatic int 2391168962Spjdzfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies) 2392168404Spjd{ 2393168404Spjd znode_t *zp = VTOZ(vp); 2394168404Spjd iovec_t *iovp; 2395185029Spjd edirent_t *eodp; 2396168404Spjd dirent64_t *odp; 2397168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2398168404Spjd objset_t *os; 2399168404Spjd caddr_t outbuf; 2400168404Spjd size_t bufsize; 2401168404Spjd zap_cursor_t zc; 2402168404Spjd zap_attribute_t zap; 2403168404Spjd uint_t bytes_wanted; 2404168404Spjd uint64_t offset; /* must be unsigned; checks for < 1 */ 2405219089Spjd uint64_t parent; 2406168404Spjd int local_eof; 2407168404Spjd int outcount; 2408168404Spjd int error; 2409168404Spjd uint8_t prefetch; 2410185029Spjd boolean_t check_sysattrs; 2411168404Spjd uint8_t type; 2412168962Spjd int ncooks; 2413168962Spjd u_long *cooks = NULL; 2414185029Spjd int flags = 0; 2415168404Spjd 2416168404Spjd ZFS_ENTER(zfsvfs); 2417185029Spjd ZFS_VERIFY_ZP(zp); 2418168404Spjd 2419219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 2420219089Spjd &parent, sizeof (parent))) != 0) { 2421219089Spjd ZFS_EXIT(zfsvfs); 2422219089Spjd return (error); 2423219089Spjd } 2424219089Spjd 2425168404Spjd /* 2426168404Spjd * If we are not given an eof variable, 2427168404Spjd * use a local one. 2428168404Spjd */ 2429168404Spjd if (eofp == NULL) 2430168404Spjd eofp = &local_eof; 2431168404Spjd 2432168404Spjd /* 2433168404Spjd * Check for valid iov_len. 2434168404Spjd */ 2435168404Spjd if (uio->uio_iov->iov_len <= 0) { 2436168404Spjd ZFS_EXIT(zfsvfs); 2437249195Smm return (SET_ERROR(EINVAL)); 2438168404Spjd } 2439168404Spjd 2440168404Spjd /* 2441168404Spjd * Quit if directory has been removed (posix) 2442168404Spjd */ 2443168404Spjd if ((*eofp = zp->z_unlinked) != 0) { 2444168404Spjd ZFS_EXIT(zfsvfs); 2445168404Spjd return (0); 2446168404Spjd } 2447168404Spjd 2448168404Spjd error = 0; 2449168404Spjd os = zfsvfs->z_os; 2450168404Spjd offset = uio->uio_loffset; 2451168404Spjd prefetch = zp->z_zn_prefetch; 2452168404Spjd 2453168404Spjd /* 2454168404Spjd * Initialize the iterator cursor. 2455168404Spjd */ 2456168404Spjd if (offset <= 3) { 2457168404Spjd /* 2458168404Spjd * Start iteration from the beginning of the directory. 2459168404Spjd */ 2460168404Spjd zap_cursor_init(&zc, os, zp->z_id); 2461168404Spjd } else { 2462168404Spjd /* 2463168404Spjd * The offset is a serialized cursor. 2464168404Spjd */ 2465168404Spjd zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2466168404Spjd } 2467168404Spjd 2468168404Spjd /* 2469168404Spjd * Get space to change directory entries into fs independent format. 2470168404Spjd */ 2471168404Spjd iovp = uio->uio_iov; 2472168404Spjd bytes_wanted = iovp->iov_len; 2473168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2474168404Spjd bufsize = bytes_wanted; 2475168404Spjd outbuf = kmem_alloc(bufsize, KM_SLEEP); 2476168404Spjd odp = (struct dirent64 *)outbuf; 2477168404Spjd } else { 2478168404Spjd bufsize = bytes_wanted; 2479247187Smm outbuf = NULL; 2480168404Spjd odp = (struct dirent64 *)iovp->iov_base; 2481168404Spjd } 2482185029Spjd eodp = (struct edirent *)odp; 2483168404Spjd 2484169170Spjd if (ncookies != NULL) { 2485168404Spjd /* 2486168404Spjd * Minimum entry size is dirent size and 1 byte for a file name. 2487168404Spjd */ 2488168962Spjd ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2489219404Spjd cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2490219404Spjd *cookies = cooks; 2491168962Spjd *ncookies = ncooks; 2492168404Spjd } 2493185029Spjd /* 2494185029Spjd * If this VFS supports the system attribute view interface; and 2495185029Spjd * we're looking at an extended attribute directory; and we care 2496185029Spjd * about normalization conflicts on this vfs; then we must check 2497185029Spjd * for normalization conflicts with the sysattr name space. 2498185029Spjd */ 2499185029Spjd#ifdef TODO 2500185029Spjd check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2501185029Spjd (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2502185029Spjd (flags & V_RDDIR_ENTFLAGS); 2503185029Spjd#else 2504185029Spjd check_sysattrs = 0; 2505185029Spjd#endif 2506168404Spjd 2507168404Spjd /* 2508168404Spjd * Transform to file-system independent format 2509168404Spjd */ 2510168404Spjd outcount = 0; 2511168404Spjd while (outcount < bytes_wanted) { 2512168404Spjd ino64_t objnum; 2513168404Spjd ushort_t reclen; 2514219089Spjd off64_t *next = NULL; 2515168404Spjd 2516168404Spjd /* 2517168404Spjd * Special case `.', `..', and `.zfs'. 2518168404Spjd */ 2519168404Spjd if (offset == 0) { 2520168404Spjd (void) strcpy(zap.za_name, "."); 2521185029Spjd zap.za_normalization_conflict = 0; 2522168404Spjd objnum = zp->z_id; 2523169108Spjd type = DT_DIR; 2524168404Spjd } else if (offset == 1) { 2525168404Spjd (void) strcpy(zap.za_name, ".."); 2526185029Spjd zap.za_normalization_conflict = 0; 2527219089Spjd objnum = parent; 2528169108Spjd type = DT_DIR; 2529168404Spjd } else if (offset == 2 && zfs_show_ctldir(zp)) { 2530168404Spjd (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2531185029Spjd zap.za_normalization_conflict = 0; 2532168404Spjd objnum = ZFSCTL_INO_ROOT; 2533169108Spjd type = DT_DIR; 2534168404Spjd } else { 2535168404Spjd /* 2536168404Spjd * Grab next entry. 2537168404Spjd */ 2538168404Spjd if (error = zap_cursor_retrieve(&zc, &zap)) { 2539168404Spjd if ((*eofp = (error == ENOENT)) != 0) 2540168404Spjd break; 2541168404Spjd else 2542168404Spjd goto update; 2543168404Spjd } 2544168404Spjd 2545168404Spjd if (zap.za_integer_length != 8 || 2546168404Spjd zap.za_num_integers != 1) { 2547168404Spjd cmn_err(CE_WARN, "zap_readdir: bad directory " 2548168404Spjd "entry, obj = %lld, offset = %lld\n", 2549168404Spjd (u_longlong_t)zp->z_id, 2550168404Spjd (u_longlong_t)offset); 2551249195Smm error = SET_ERROR(ENXIO); 2552168404Spjd goto update; 2553168404Spjd } 2554168404Spjd 2555168404Spjd objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2556168404Spjd /* 2557168404Spjd * MacOS X can extract the object type here such as: 2558168404Spjd * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2559168404Spjd */ 2560168404Spjd type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2561185029Spjd 2562185029Spjd if (check_sysattrs && !zap.za_normalization_conflict) { 2563185029Spjd#ifdef TODO 2564185029Spjd zap.za_normalization_conflict = 2565185029Spjd xattr_sysattr_casechk(zap.za_name); 2566185029Spjd#else 2567185029Spjd panic("%s:%u: TODO", __func__, __LINE__); 2568185029Spjd#endif 2569185029Spjd } 2570168404Spjd } 2571168404Spjd 2572211932Smm if (flags & V_RDDIR_ACCFILTER) { 2573211932Smm /* 2574211932Smm * If we have no access at all, don't include 2575211932Smm * this entry in the returned information 2576211932Smm */ 2577211932Smm znode_t *ezp; 2578211932Smm if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2579211932Smm goto skip_entry; 2580211932Smm if (!zfs_has_access(ezp, cr)) { 2581211932Smm VN_RELE(ZTOV(ezp)); 2582211932Smm goto skip_entry; 2583211932Smm } 2584211932Smm VN_RELE(ZTOV(ezp)); 2585211932Smm } 2586211932Smm 2587185029Spjd if (flags & V_RDDIR_ENTFLAGS) 2588185029Spjd reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2589185029Spjd else 2590185029Spjd reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2591185029Spjd 2592168404Spjd /* 2593168404Spjd * Will this entry fit in the buffer? 2594168404Spjd */ 2595168404Spjd if (outcount + reclen > bufsize) { 2596168404Spjd /* 2597168404Spjd * Did we manage to fit anything in the buffer? 2598168404Spjd */ 2599168404Spjd if (!outcount) { 2600249195Smm error = SET_ERROR(EINVAL); 2601168404Spjd goto update; 2602168404Spjd } 2603168404Spjd break; 2604168404Spjd } 2605185029Spjd if (flags & V_RDDIR_ENTFLAGS) { 2606185029Spjd /* 2607185029Spjd * Add extended flag entry: 2608185029Spjd */ 2609185029Spjd eodp->ed_ino = objnum; 2610185029Spjd eodp->ed_reclen = reclen; 2611185029Spjd /* NOTE: ed_off is the offset for the *next* entry */ 2612185029Spjd next = &(eodp->ed_off); 2613185029Spjd eodp->ed_eflags = zap.za_normalization_conflict ? 2614185029Spjd ED_CASE_CONFLICT : 0; 2615185029Spjd (void) strncpy(eodp->ed_name, zap.za_name, 2616185029Spjd EDIRENT_NAMELEN(reclen)); 2617185029Spjd eodp = (edirent_t *)((intptr_t)eodp + reclen); 2618185029Spjd } else { 2619185029Spjd /* 2620185029Spjd * Add normal entry: 2621185029Spjd */ 2622185029Spjd odp->d_ino = objnum; 2623185029Spjd odp->d_reclen = reclen; 2624185029Spjd odp->d_namlen = strlen(zap.za_name); 2625185029Spjd (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2626185029Spjd odp->d_type = type; 2627185029Spjd odp = (dirent64_t *)((intptr_t)odp + reclen); 2628185029Spjd } 2629168404Spjd outcount += reclen; 2630168404Spjd 2631168404Spjd ASSERT(outcount <= bufsize); 2632168404Spjd 2633168404Spjd /* Prefetch znode */ 2634168404Spjd if (prefetch) 2635168404Spjd dmu_prefetch(os, objnum, 0, 0); 2636168404Spjd 2637211932Smm skip_entry: 2638168404Spjd /* 2639168404Spjd * Move to the next entry, fill in the previous offset. 2640168404Spjd */ 2641168404Spjd if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2642168404Spjd zap_cursor_advance(&zc); 2643168404Spjd offset = zap_cursor_serialize(&zc); 2644168404Spjd } else { 2645168404Spjd offset += 1; 2646168404Spjd } 2647219404Spjd 2648219404Spjd if (cooks != NULL) { 2649219404Spjd *cooks++ = offset; 2650219404Spjd ncooks--; 2651219404Spjd KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2652219404Spjd } 2653168404Spjd } 2654168404Spjd zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2655168404Spjd 2656168404Spjd /* Subtract unused cookies */ 2657168962Spjd if (ncookies != NULL) 2658168962Spjd *ncookies -= ncooks; 2659168404Spjd 2660168404Spjd if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2661168404Spjd iovp->iov_base += outcount; 2662168404Spjd iovp->iov_len -= outcount; 2663168404Spjd uio->uio_resid -= outcount; 2664168404Spjd } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2665168404Spjd /* 2666168404Spjd * Reset the pointer. 2667168404Spjd */ 2668168404Spjd offset = uio->uio_loffset; 2669168404Spjd } 2670168404Spjd 2671168404Spjdupdate: 2672168404Spjd zap_cursor_fini(&zc); 2673168404Spjd if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2674168404Spjd kmem_free(outbuf, bufsize); 2675168404Spjd 2676168404Spjd if (error == ENOENT) 2677168404Spjd error = 0; 2678168404Spjd 2679168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2680168404Spjd 2681168404Spjd uio->uio_loffset = offset; 2682168404Spjd ZFS_EXIT(zfsvfs); 2683169107Spjd if (error != 0 && cookies != NULL) { 2684168962Spjd free(*cookies, M_TEMP); 2685168962Spjd *cookies = NULL; 2686168962Spjd *ncookies = 0; 2687168404Spjd } 2688168404Spjd return (error); 2689168404Spjd} 2690168404Spjd 2691185029Spjdulong_t zfs_fsync_sync_cnt = 4; 2692185029Spjd 2693168404Spjdstatic int 2694185029Spjdzfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2695168404Spjd{ 2696168962Spjd znode_t *zp = VTOZ(vp); 2697168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2698168404Spjd 2699185029Spjd (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2700185029Spjd 2701219089Spjd if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 2702219089Spjd ZFS_ENTER(zfsvfs); 2703219089Spjd ZFS_VERIFY_ZP(zp); 2704219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 2705219089Spjd ZFS_EXIT(zfsvfs); 2706219089Spjd } 2707168404Spjd return (0); 2708168404Spjd} 2709168404Spjd 2710185029Spjd 2711168404Spjd/* 2712168404Spjd * Get the requested file attributes and place them in the provided 2713168404Spjd * vattr structure. 2714168404Spjd * 2715168404Spjd * IN: vp - vnode of file. 2716168404Spjd * vap - va_mask identifies requested attributes. 2717185029Spjd * If AT_XVATTR set, then optional attrs are requested 2718185029Spjd * flags - ATTR_NOACLCHECK (CIFS server context) 2719168404Spjd * cr - credentials of caller. 2720185029Spjd * ct - caller context 2721168404Spjd * 2722168404Spjd * OUT: vap - attribute values. 2723168404Spjd * 2724251631Sdelphij * RETURN: 0 (always succeeds). 2725168404Spjd */ 2726168404Spjd/* ARGSUSED */ 2727168404Spjdstatic int 2728185029Spjdzfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2729185029Spjd caller_context_t *ct) 2730168404Spjd{ 2731168962Spjd znode_t *zp = VTOZ(vp); 2732168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2733185029Spjd int error = 0; 2734168962Spjd uint32_t blksize; 2735168962Spjd u_longlong_t nblocks; 2736185029Spjd uint64_t links; 2737224251Sdelphij uint64_t mtime[2], ctime[2], crtime[2], rdev; 2738185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2739185029Spjd xoptattr_t *xoap = NULL; 2740185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2741224251Sdelphij sa_bulk_attr_t bulk[4]; 2742219089Spjd int count = 0; 2743168404Spjd 2744168404Spjd ZFS_ENTER(zfsvfs); 2745185029Spjd ZFS_VERIFY_ZP(zp); 2746168404Spjd 2747219089Spjd zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2748219089Spjd 2749219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 2750219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 2751243807Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 2752224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2753224251Sdelphij SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 2754224251Sdelphij &rdev, 8); 2755219089Spjd 2756219089Spjd if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 2757219089Spjd ZFS_EXIT(zfsvfs); 2758219089Spjd return (error); 2759219089Spjd } 2760219089Spjd 2761168404Spjd /* 2762185029Spjd * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2763185029Spjd * Also, if we are the owner don't bother, since owner should 2764185029Spjd * always be allowed to read basic attributes of file. 2765185029Spjd */ 2766219089Spjd if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2767219089Spjd (vap->va_uid != crgetuid(cr))) { 2768185029Spjd if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2769185029Spjd skipaclchk, cr)) { 2770185029Spjd ZFS_EXIT(zfsvfs); 2771185029Spjd return (error); 2772185029Spjd } 2773185029Spjd } 2774185029Spjd 2775185029Spjd /* 2776168404Spjd * Return all attributes. It's cheaper to provide the answer 2777168404Spjd * than to determine whether we were asked the question. 2778168404Spjd */ 2779168404Spjd 2780209097Smm mutex_enter(&zp->z_lock); 2781219089Spjd vap->va_type = IFTOVT(zp->z_mode); 2782219089Spjd vap->va_mode = zp->z_mode & ~S_IFMT; 2783224252Sdelphij#ifdef sun 2784224252Sdelphij vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2785224252Sdelphij#else 2786224252Sdelphij vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 2787224252Sdelphij#endif 2788168404Spjd vap->va_nodeid = zp->z_id; 2789185029Spjd if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2790219089Spjd links = zp->z_links + 1; 2791185029Spjd else 2792219089Spjd links = zp->z_links; 2793229425Sdim vap->va_nlink = MIN(links, LINK_MAX); /* nlink_t limit! */ 2794219089Spjd vap->va_size = zp->z_size; 2795224252Sdelphij#ifdef sun 2796224252Sdelphij vap->va_rdev = vp->v_rdev; 2797224252Sdelphij#else 2798224251Sdelphij if (vp->v_type == VBLK || vp->v_type == VCHR) 2799224251Sdelphij vap->va_rdev = zfs_cmpldev(rdev); 2800224252Sdelphij#endif 2801168404Spjd vap->va_seq = zp->z_seq; 2802168404Spjd vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2803168404Spjd 2804185029Spjd /* 2805185029Spjd * Add in any requested optional attributes and the create time. 2806185029Spjd * Also set the corresponding bits in the returned attribute bitmap. 2807185029Spjd */ 2808185029Spjd if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2809185029Spjd if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2810185029Spjd xoap->xoa_archive = 2811219089Spjd ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2812185029Spjd XVA_SET_RTN(xvap, XAT_ARCHIVE); 2813185029Spjd } 2814185029Spjd 2815185029Spjd if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2816185029Spjd xoap->xoa_readonly = 2817219089Spjd ((zp->z_pflags & ZFS_READONLY) != 0); 2818185029Spjd XVA_SET_RTN(xvap, XAT_READONLY); 2819185029Spjd } 2820185029Spjd 2821185029Spjd if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2822185029Spjd xoap->xoa_system = 2823219089Spjd ((zp->z_pflags & ZFS_SYSTEM) != 0); 2824185029Spjd XVA_SET_RTN(xvap, XAT_SYSTEM); 2825185029Spjd } 2826185029Spjd 2827185029Spjd if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2828185029Spjd xoap->xoa_hidden = 2829219089Spjd ((zp->z_pflags & ZFS_HIDDEN) != 0); 2830185029Spjd XVA_SET_RTN(xvap, XAT_HIDDEN); 2831185029Spjd } 2832185029Spjd 2833185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2834185029Spjd xoap->xoa_nounlink = 2835219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2836185029Spjd XVA_SET_RTN(xvap, XAT_NOUNLINK); 2837185029Spjd } 2838185029Spjd 2839185029Spjd if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2840185029Spjd xoap->xoa_immutable = 2841219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2842185029Spjd XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2843185029Spjd } 2844185029Spjd 2845185029Spjd if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2846185029Spjd xoap->xoa_appendonly = 2847219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2848185029Spjd XVA_SET_RTN(xvap, XAT_APPENDONLY); 2849185029Spjd } 2850185029Spjd 2851185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2852185029Spjd xoap->xoa_nodump = 2853219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0); 2854185029Spjd XVA_SET_RTN(xvap, XAT_NODUMP); 2855185029Spjd } 2856185029Spjd 2857185029Spjd if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2858185029Spjd xoap->xoa_opaque = 2859219089Spjd ((zp->z_pflags & ZFS_OPAQUE) != 0); 2860185029Spjd XVA_SET_RTN(xvap, XAT_OPAQUE); 2861185029Spjd } 2862185029Spjd 2863185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2864185029Spjd xoap->xoa_av_quarantined = 2865219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2866185029Spjd XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2867185029Spjd } 2868185029Spjd 2869185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2870185029Spjd xoap->xoa_av_modified = 2871219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2872185029Spjd XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2873185029Spjd } 2874185029Spjd 2875185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2876219089Spjd vp->v_type == VREG) { 2877219089Spjd zfs_sa_get_scanstamp(zp, xvap); 2878185029Spjd } 2879185029Spjd 2880185029Spjd if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 2881219089Spjd uint64_t times[2]; 2882219089Spjd 2883219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 2884219089Spjd times, sizeof (times)); 2885219089Spjd ZFS_TIME_DECODE(&xoap->xoa_createtime, times); 2886185029Spjd XVA_SET_RTN(xvap, XAT_CREATETIME); 2887185029Spjd } 2888219089Spjd 2889219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2890219089Spjd xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 2891219089Spjd XVA_SET_RTN(xvap, XAT_REPARSE); 2892219089Spjd } 2893219089Spjd if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 2894219089Spjd xoap->xoa_generation = zp->z_gen; 2895219089Spjd XVA_SET_RTN(xvap, XAT_GEN); 2896219089Spjd } 2897219089Spjd 2898219089Spjd if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2899219089Spjd xoap->xoa_offline = 2900219089Spjd ((zp->z_pflags & ZFS_OFFLINE) != 0); 2901219089Spjd XVA_SET_RTN(xvap, XAT_OFFLINE); 2902219089Spjd } 2903219089Spjd 2904219089Spjd if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2905219089Spjd xoap->xoa_sparse = 2906219089Spjd ((zp->z_pflags & ZFS_SPARSE) != 0); 2907219089Spjd XVA_SET_RTN(xvap, XAT_SPARSE); 2908219089Spjd } 2909185029Spjd } 2910185029Spjd 2911219089Spjd ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 2912219089Spjd ZFS_TIME_DECODE(&vap->va_mtime, mtime); 2913219089Spjd ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2914219089Spjd ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 2915168404Spjd 2916168404Spjd mutex_exit(&zp->z_lock); 2917168404Spjd 2918219089Spjd sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 2919168404Spjd vap->va_blksize = blksize; 2920168404Spjd vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2921168404Spjd 2922168404Spjd if (zp->z_blksz == 0) { 2923168404Spjd /* 2924168404Spjd * Block size hasn't been set; suggest maximal I/O transfers. 2925168404Spjd */ 2926168404Spjd vap->va_blksize = zfsvfs->z_max_blksz; 2927168404Spjd } 2928168404Spjd 2929168404Spjd ZFS_EXIT(zfsvfs); 2930168404Spjd return (0); 2931168404Spjd} 2932168404Spjd 2933168404Spjd/* 2934168404Spjd * Set the file attributes to the values contained in the 2935168404Spjd * vattr structure. 2936168404Spjd * 2937168404Spjd * IN: vp - vnode of file to be modified. 2938168404Spjd * vap - new attribute values. 2939185029Spjd * If AT_XVATTR set, then optional attrs are being set 2940168404Spjd * flags - ATTR_UTIME set if non-default time values provided. 2941185029Spjd * - ATTR_NOACLCHECK (CIFS context only). 2942168404Spjd * cr - credentials of caller. 2943185029Spjd * ct - caller context 2944168404Spjd * 2945251631Sdelphij * RETURN: 0 on success, error code on failure. 2946168404Spjd * 2947168404Spjd * Timestamps: 2948168404Spjd * vp - ctime updated, mtime updated if size changed. 2949168404Spjd */ 2950168404Spjd/* ARGSUSED */ 2951168404Spjdstatic int 2952168962Spjdzfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2953251631Sdelphij caller_context_t *ct) 2954168404Spjd{ 2955185029Spjd znode_t *zp = VTOZ(vp); 2956168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2957185029Spjd zilog_t *zilog; 2958168404Spjd dmu_tx_t *tx; 2959168404Spjd vattr_t oldva; 2960209962Smm xvattr_t tmpxvattr; 2961168962Spjd uint_t mask = vap->va_mask; 2962247187Smm uint_t saved_mask = 0; 2963197831Spjd uint64_t saved_mode; 2964168404Spjd int trim_mask = 0; 2965168404Spjd uint64_t new_mode; 2966209962Smm uint64_t new_uid, new_gid; 2967219089Spjd uint64_t xattr_obj; 2968219089Spjd uint64_t mtime[2], ctime[2]; 2969168404Spjd znode_t *attrzp; 2970168404Spjd int need_policy = FALSE; 2971219089Spjd int err, err2; 2972185029Spjd zfs_fuid_info_t *fuidp = NULL; 2973185029Spjd xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2974185029Spjd xoptattr_t *xoap; 2975219089Spjd zfs_acl_t *aclp; 2976185029Spjd boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2977219089Spjd boolean_t fuid_dirtied = B_FALSE; 2978219089Spjd sa_bulk_attr_t bulk[7], xattr_bulk[7]; 2979219089Spjd int count = 0, xattr_count = 0; 2980168404Spjd 2981168404Spjd if (mask == 0) 2982168404Spjd return (0); 2983168404Spjd 2984168962Spjd if (mask & AT_NOSET) 2985249195Smm return (SET_ERROR(EINVAL)); 2986168962Spjd 2987185029Spjd ZFS_ENTER(zfsvfs); 2988185029Spjd ZFS_VERIFY_ZP(zp); 2989185029Spjd 2990185029Spjd zilog = zfsvfs->z_log; 2991185029Spjd 2992185029Spjd /* 2993185029Spjd * Make sure that if we have ephemeral uid/gid or xvattr specified 2994185029Spjd * that file system is at proper version level 2995185029Spjd */ 2996185029Spjd 2997185029Spjd if (zfsvfs->z_use_fuids == B_FALSE && 2998185029Spjd (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2999185029Spjd ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 3000185029Spjd (mask & AT_XVATTR))) { 3001185029Spjd ZFS_EXIT(zfsvfs); 3002249195Smm return (SET_ERROR(EINVAL)); 3003185029Spjd } 3004185029Spjd 3005185029Spjd if (mask & AT_SIZE && vp->v_type == VDIR) { 3006185029Spjd ZFS_EXIT(zfsvfs); 3007249195Smm return (SET_ERROR(EISDIR)); 3008185029Spjd } 3009168404Spjd 3010185029Spjd if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 3011185029Spjd ZFS_EXIT(zfsvfs); 3012249195Smm return (SET_ERROR(EINVAL)); 3013185029Spjd } 3014168404Spjd 3015185029Spjd /* 3016185029Spjd * If this is an xvattr_t, then get a pointer to the structure of 3017185029Spjd * optional attributes. If this is NULL, then we have a vattr_t. 3018185029Spjd */ 3019185029Spjd xoap = xva_getxoptattr(xvap); 3020168404Spjd 3021209962Smm xva_init(&tmpxvattr); 3022209962Smm 3023185029Spjd /* 3024185029Spjd * Immutable files can only alter immutable bit and atime 3025185029Spjd */ 3026219089Spjd if ((zp->z_pflags & ZFS_IMMUTABLE) && 3027185029Spjd ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 3028185029Spjd ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 3029185029Spjd ZFS_EXIT(zfsvfs); 3030249195Smm return (SET_ERROR(EPERM)); 3031185029Spjd } 3032185029Spjd 3033219089Spjd if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { 3034185029Spjd ZFS_EXIT(zfsvfs); 3035249195Smm return (SET_ERROR(EPERM)); 3036185029Spjd } 3037185029Spjd 3038185029Spjd /* 3039185029Spjd * Verify timestamps doesn't overflow 32 bits. 3040185029Spjd * ZFS can handle large timestamps, but 32bit syscalls can't 3041185029Spjd * handle times greater than 2039. This check should be removed 3042185029Spjd * once large timestamps are fully supported. 3043185029Spjd */ 3044185029Spjd if (mask & (AT_ATIME | AT_MTIME)) { 3045185029Spjd if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 3046185029Spjd ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 3047185029Spjd ZFS_EXIT(zfsvfs); 3048249195Smm return (SET_ERROR(EOVERFLOW)); 3049185029Spjd } 3050185029Spjd } 3051185029Spjd 3052168404Spjdtop: 3053168404Spjd attrzp = NULL; 3054219089Spjd aclp = NULL; 3055168404Spjd 3056211932Smm /* Can this be moved to before the top label? */ 3057168404Spjd if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 3058168404Spjd ZFS_EXIT(zfsvfs); 3059249195Smm return (SET_ERROR(EROFS)); 3060168404Spjd } 3061168404Spjd 3062168404Spjd /* 3063168404Spjd * First validate permissions 3064168404Spjd */ 3065168404Spjd 3066168404Spjd if (mask & AT_SIZE) { 3067168404Spjd /* 3068168404Spjd * XXX - Note, we are not providing any open 3069168404Spjd * mode flags here (like FNDELAY), so we may 3070168404Spjd * block if there are locks present... this 3071168404Spjd * should be addressed in openat(). 3072168404Spjd */ 3073185029Spjd /* XXX - would it be OK to generate a log record here? */ 3074185029Spjd err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 3075168404Spjd if (err) { 3076168404Spjd ZFS_EXIT(zfsvfs); 3077168404Spjd return (err); 3078168404Spjd } 3079168404Spjd } 3080168404Spjd 3081185029Spjd if (mask & (AT_ATIME|AT_MTIME) || 3082185029Spjd ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 3083185029Spjd XVA_ISSET_REQ(xvap, XAT_READONLY) || 3084185029Spjd XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 3085219089Spjd XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 3086219089Spjd XVA_ISSET_REQ(xvap, XAT_SPARSE) || 3087185029Spjd XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 3088219089Spjd XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 3089185029Spjd need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 3090185029Spjd skipaclchk, cr); 3091219089Spjd } 3092168404Spjd 3093168404Spjd if (mask & (AT_UID|AT_GID)) { 3094168404Spjd int idmask = (mask & (AT_UID|AT_GID)); 3095168404Spjd int take_owner; 3096168404Spjd int take_group; 3097168404Spjd 3098168404Spjd /* 3099168404Spjd * NOTE: even if a new mode is being set, 3100168404Spjd * we may clear S_ISUID/S_ISGID bits. 3101168404Spjd */ 3102168404Spjd 3103168404Spjd if (!(mask & AT_MODE)) 3104219089Spjd vap->va_mode = zp->z_mode; 3105168404Spjd 3106168404Spjd /* 3107168404Spjd * Take ownership or chgrp to group we are a member of 3108168404Spjd */ 3109168404Spjd 3110168404Spjd take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 3111185029Spjd take_group = (mask & AT_GID) && 3112185029Spjd zfs_groupmember(zfsvfs, vap->va_gid, cr); 3113168404Spjd 3114168404Spjd /* 3115168404Spjd * If both AT_UID and AT_GID are set then take_owner and 3116168404Spjd * take_group must both be set in order to allow taking 3117168404Spjd * ownership. 3118168404Spjd * 3119168404Spjd * Otherwise, send the check through secpolicy_vnode_setattr() 3120168404Spjd * 3121168404Spjd */ 3122168404Spjd 3123168404Spjd if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 3124168404Spjd ((idmask == AT_UID) && take_owner) || 3125168404Spjd ((idmask == AT_GID) && take_group)) { 3126185029Spjd if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 3127185029Spjd skipaclchk, cr) == 0) { 3128168404Spjd /* 3129168404Spjd * Remove setuid/setgid for non-privileged users 3130168404Spjd */ 3131185029Spjd secpolicy_setid_clear(vap, vp, cr); 3132168404Spjd trim_mask = (mask & (AT_UID|AT_GID)); 3133168404Spjd } else { 3134168404Spjd need_policy = TRUE; 3135168404Spjd } 3136168404Spjd } else { 3137168404Spjd need_policy = TRUE; 3138168404Spjd } 3139168404Spjd } 3140168404Spjd 3141168404Spjd mutex_enter(&zp->z_lock); 3142219089Spjd oldva.va_mode = zp->z_mode; 3143185029Spjd zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 3144185029Spjd if (mask & AT_XVATTR) { 3145209962Smm /* 3146209962Smm * Update xvattr mask to include only those attributes 3147209962Smm * that are actually changing. 3148209962Smm * 3149209962Smm * the bits will be restored prior to actually setting 3150209962Smm * the attributes so the caller thinks they were set. 3151209962Smm */ 3152209962Smm if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3153209962Smm if (xoap->xoa_appendonly != 3154219089Spjd ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 3155209962Smm need_policy = TRUE; 3156209962Smm } else { 3157209962Smm XVA_CLR_REQ(xvap, XAT_APPENDONLY); 3158209962Smm XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 3159209962Smm } 3160209962Smm } 3161209962Smm 3162209962Smm if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3163209962Smm if (xoap->xoa_nounlink != 3164219089Spjd ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 3165209962Smm need_policy = TRUE; 3166209962Smm } else { 3167209962Smm XVA_CLR_REQ(xvap, XAT_NOUNLINK); 3168209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 3169209962Smm } 3170209962Smm } 3171209962Smm 3172209962Smm if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3173209962Smm if (xoap->xoa_immutable != 3174219089Spjd ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 3175209962Smm need_policy = TRUE; 3176209962Smm } else { 3177209962Smm XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 3178209962Smm XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 3179209962Smm } 3180209962Smm } 3181209962Smm 3182209962Smm if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3183209962Smm if (xoap->xoa_nodump != 3184219089Spjd ((zp->z_pflags & ZFS_NODUMP) != 0)) { 3185209962Smm need_policy = TRUE; 3186209962Smm } else { 3187209962Smm XVA_CLR_REQ(xvap, XAT_NODUMP); 3188209962Smm XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 3189209962Smm } 3190209962Smm } 3191209962Smm 3192209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3193209962Smm if (xoap->xoa_av_modified != 3194219089Spjd ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 3195209962Smm need_policy = TRUE; 3196209962Smm } else { 3197209962Smm XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 3198209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 3199209962Smm } 3200209962Smm } 3201209962Smm 3202209962Smm if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3203209962Smm if ((vp->v_type != VREG && 3204209962Smm xoap->xoa_av_quarantined) || 3205209962Smm xoap->xoa_av_quarantined != 3206219089Spjd ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 3207209962Smm need_policy = TRUE; 3208209962Smm } else { 3209209962Smm XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 3210209962Smm XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 3211209962Smm } 3212209962Smm } 3213209962Smm 3214219089Spjd if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3215219089Spjd mutex_exit(&zp->z_lock); 3216219089Spjd ZFS_EXIT(zfsvfs); 3217249195Smm return (SET_ERROR(EPERM)); 3218219089Spjd } 3219219089Spjd 3220209962Smm if (need_policy == FALSE && 3221209962Smm (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3222209962Smm XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3223185029Spjd need_policy = TRUE; 3224185029Spjd } 3225185029Spjd } 3226185029Spjd 3227168404Spjd mutex_exit(&zp->z_lock); 3228168404Spjd 3229168404Spjd if (mask & AT_MODE) { 3230185029Spjd if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3231168962Spjd err = secpolicy_setid_setsticky_clear(vp, vap, 3232168962Spjd &oldva, cr); 3233168962Spjd if (err) { 3234168962Spjd ZFS_EXIT(zfsvfs); 3235168962Spjd return (err); 3236168962Spjd } 3237168404Spjd trim_mask |= AT_MODE; 3238168404Spjd } else { 3239168404Spjd need_policy = TRUE; 3240168404Spjd } 3241168404Spjd } 3242168404Spjd 3243168404Spjd if (need_policy) { 3244168404Spjd /* 3245168404Spjd * If trim_mask is set then take ownership 3246168404Spjd * has been granted or write_acl is present and user 3247168404Spjd * has the ability to modify mode. In that case remove 3248168404Spjd * UID|GID and or MODE from mask so that 3249168404Spjd * secpolicy_vnode_setattr() doesn't revoke it. 3250168404Spjd */ 3251168404Spjd 3252168404Spjd if (trim_mask) { 3253168404Spjd saved_mask = vap->va_mask; 3254168404Spjd vap->va_mask &= ~trim_mask; 3255197831Spjd if (trim_mask & AT_MODE) { 3256197831Spjd /* 3257197831Spjd * Save the mode, as secpolicy_vnode_setattr() 3258197831Spjd * will overwrite it with ova.va_mode. 3259197831Spjd */ 3260197831Spjd saved_mode = vap->va_mode; 3261197831Spjd } 3262168404Spjd } 3263168404Spjd err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3264185029Spjd (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3265168404Spjd if (err) { 3266168404Spjd ZFS_EXIT(zfsvfs); 3267168404Spjd return (err); 3268168404Spjd } 3269168404Spjd 3270197831Spjd if (trim_mask) { 3271168404Spjd vap->va_mask |= saved_mask; 3272197831Spjd if (trim_mask & AT_MODE) { 3273197831Spjd /* 3274197831Spjd * Recover the mode after 3275197831Spjd * secpolicy_vnode_setattr(). 3276197831Spjd */ 3277197831Spjd vap->va_mode = saved_mode; 3278197831Spjd } 3279197831Spjd } 3280168404Spjd } 3281168404Spjd 3282168404Spjd /* 3283168404Spjd * secpolicy_vnode_setattr, or take ownership may have 3284168404Spjd * changed va_mask 3285168404Spjd */ 3286168404Spjd mask = vap->va_mask; 3287168404Spjd 3288219089Spjd if ((mask & (AT_UID | AT_GID))) { 3289219089Spjd err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 3290219089Spjd &xattr_obj, sizeof (xattr_obj)); 3291168404Spjd 3292219089Spjd if (err == 0 && xattr_obj) { 3293219089Spjd err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 3294209962Smm if (err) 3295219089Spjd goto out2; 3296168404Spjd } 3297209962Smm if (mask & AT_UID) { 3298209962Smm new_uid = zfs_fuid_create(zfsvfs, 3299209962Smm (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3300219089Spjd if (new_uid != zp->z_uid && 3301219089Spjd zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 3302219089Spjd if (attrzp) 3303219089Spjd VN_RELE(ZTOV(attrzp)); 3304249195Smm err = SET_ERROR(EDQUOT); 3305219089Spjd goto out2; 3306209962Smm } 3307209962Smm } 3308209962Smm 3309209962Smm if (mask & AT_GID) { 3310209962Smm new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3311209962Smm cr, ZFS_GROUP, &fuidp); 3312219089Spjd if (new_gid != zp->z_gid && 3313219089Spjd zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 3314219089Spjd if (attrzp) 3315219089Spjd VN_RELE(ZTOV(attrzp)); 3316249195Smm err = SET_ERROR(EDQUOT); 3317219089Spjd goto out2; 3318209962Smm } 3319209962Smm } 3320219089Spjd } 3321219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 3322219089Spjd 3323219089Spjd if (mask & AT_MODE) { 3324219089Spjd uint64_t pmode = zp->z_mode; 3325219089Spjd uint64_t acl_obj; 3326219089Spjd new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3327219089Spjd 3328243560Smm if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 3329243560Smm !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3330249195Smm err = SET_ERROR(EPERM); 3331243560Smm goto out; 3332243560Smm } 3333243560Smm 3334224174Smm if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3335224174Smm goto out; 3336219089Spjd 3337219089Spjd mutex_enter(&zp->z_lock); 3338219089Spjd if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 3339219089Spjd /* 3340219089Spjd * Are we upgrading ACL from old V0 format 3341219089Spjd * to V1 format? 3342219089Spjd */ 3343219089Spjd if (zfsvfs->z_version >= ZPL_VERSION_FUID && 3344219089Spjd zfs_znode_acl_version(zp) == 3345219089Spjd ZFS_ACL_VERSION_INITIAL) { 3346219089Spjd dmu_tx_hold_free(tx, acl_obj, 0, 3347219089Spjd DMU_OBJECT_END); 3348219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3349219089Spjd 0, aclp->z_acl_bytes); 3350209962Smm } else { 3351219089Spjd dmu_tx_hold_write(tx, acl_obj, 0, 3352219089Spjd aclp->z_acl_bytes); 3353209962Smm } 3354219089Spjd } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3355219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3356219089Spjd 0, aclp->z_acl_bytes); 3357209962Smm } 3358219089Spjd mutex_exit(&zp->z_lock); 3359219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3360219089Spjd } else { 3361219089Spjd if ((mask & AT_XVATTR) && 3362219089Spjd XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3363219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3364219089Spjd else 3365219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3366168404Spjd } 3367168404Spjd 3368219089Spjd if (attrzp) { 3369219089Spjd dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3370219089Spjd } 3371219089Spjd 3372219089Spjd fuid_dirtied = zfsvfs->z_fuid_dirty; 3373219089Spjd if (fuid_dirtied) 3374219089Spjd zfs_fuid_txhold(zfsvfs, tx); 3375219089Spjd 3376219089Spjd zfs_sa_upgrade_txholds(tx, zp); 3377219089Spjd 3378209962Smm err = dmu_tx_assign(tx, TXG_NOWAIT); 3379168404Spjd if (err) { 3380209962Smm if (err == ERESTART) 3381168404Spjd dmu_tx_wait(tx); 3382209962Smm goto out; 3383168404Spjd } 3384168404Spjd 3385219089Spjd count = 0; 3386168404Spjd /* 3387168404Spjd * Set each attribute requested. 3388168404Spjd * We group settings according to the locks they need to acquire. 3389168404Spjd * 3390168404Spjd * Note: you cannot set ctime directly, although it will be 3391168404Spjd * updated as a side-effect of calling this function. 3392168404Spjd */ 3393168404Spjd 3394219089Spjd 3395219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3396219089Spjd mutex_enter(&zp->z_acl_lock); 3397168404Spjd mutex_enter(&zp->z_lock); 3398168404Spjd 3399219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3400219089Spjd &zp->z_pflags, sizeof (zp->z_pflags)); 3401219089Spjd 3402219089Spjd if (attrzp) { 3403219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3404219089Spjd mutex_enter(&attrzp->z_acl_lock); 3405219089Spjd mutex_enter(&attrzp->z_lock); 3406219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3407219089Spjd SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3408219089Spjd sizeof (attrzp->z_pflags)); 3409219089Spjd } 3410219089Spjd 3411219089Spjd if (mask & (AT_UID|AT_GID)) { 3412219089Spjd 3413219089Spjd if (mask & AT_UID) { 3414219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 3415219089Spjd &new_uid, sizeof (new_uid)); 3416219089Spjd zp->z_uid = new_uid; 3417219089Spjd if (attrzp) { 3418219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3419219089Spjd SA_ZPL_UID(zfsvfs), NULL, &new_uid, 3420219089Spjd sizeof (new_uid)); 3421219089Spjd attrzp->z_uid = new_uid; 3422219089Spjd } 3423219089Spjd } 3424219089Spjd 3425219089Spjd if (mask & AT_GID) { 3426219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 3427219089Spjd NULL, &new_gid, sizeof (new_gid)); 3428219089Spjd zp->z_gid = new_gid; 3429219089Spjd if (attrzp) { 3430219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3431219089Spjd SA_ZPL_GID(zfsvfs), NULL, &new_gid, 3432219089Spjd sizeof (new_gid)); 3433219089Spjd attrzp->z_gid = new_gid; 3434219089Spjd } 3435219089Spjd } 3436219089Spjd if (!(mask & AT_MODE)) { 3437219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 3438219089Spjd NULL, &new_mode, sizeof (new_mode)); 3439219089Spjd new_mode = zp->z_mode; 3440219089Spjd } 3441219089Spjd err = zfs_acl_chown_setattr(zp); 3442219089Spjd ASSERT(err == 0); 3443219089Spjd if (attrzp) { 3444219089Spjd err = zfs_acl_chown_setattr(attrzp); 3445219089Spjd ASSERT(err == 0); 3446219089Spjd } 3447219089Spjd } 3448219089Spjd 3449168404Spjd if (mask & AT_MODE) { 3450219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 3451219089Spjd &new_mode, sizeof (new_mode)); 3452219089Spjd zp->z_mode = new_mode; 3453219089Spjd ASSERT3U((uintptr_t)aclp, !=, 0); 3454209962Smm err = zfs_aclset_common(zp, aclp, cr, tx); 3455240415Smm ASSERT0(err); 3456219089Spjd if (zp->z_acl_cached) 3457219089Spjd zfs_acl_free(zp->z_acl_cached); 3458211932Smm zp->z_acl_cached = aclp; 3459211932Smm aclp = NULL; 3460168404Spjd } 3461168404Spjd 3462168404Spjd 3463219089Spjd if (mask & AT_ATIME) { 3464219089Spjd ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 3465219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 3466219089Spjd &zp->z_atime, sizeof (zp->z_atime)); 3467168404Spjd } 3468168404Spjd 3469219089Spjd if (mask & AT_MTIME) { 3470219089Spjd ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 3471219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 3472219089Spjd mtime, sizeof (mtime)); 3473168404Spjd } 3474168404Spjd 3475185029Spjd /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3476219089Spjd if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3477219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3478219089Spjd NULL, mtime, sizeof (mtime)); 3479219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3480219089Spjd &ctime, sizeof (ctime)); 3481219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 3482219089Spjd B_TRUE); 3483219089Spjd } else if (mask != 0) { 3484219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3485219089Spjd &ctime, sizeof (ctime)); 3486219089Spjd zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 3487219089Spjd B_TRUE); 3488219089Spjd if (attrzp) { 3489219089Spjd SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3490219089Spjd SA_ZPL_CTIME(zfsvfs), NULL, 3491219089Spjd &ctime, sizeof (ctime)); 3492219089Spjd zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 3493219089Spjd mtime, ctime, B_TRUE); 3494219089Spjd } 3495219089Spjd } 3496185029Spjd /* 3497185029Spjd * Do this after setting timestamps to prevent timestamp 3498185029Spjd * update from toggling bit 3499185029Spjd */ 3500168404Spjd 3501185029Spjd if (xoap && (mask & AT_XVATTR)) { 3502209962Smm 3503209962Smm /* 3504209962Smm * restore trimmed off masks 3505209962Smm * so that return masks can be set for caller. 3506209962Smm */ 3507209962Smm 3508209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3509209962Smm XVA_SET_REQ(xvap, XAT_APPENDONLY); 3510209962Smm } 3511209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3512209962Smm XVA_SET_REQ(xvap, XAT_NOUNLINK); 3513209962Smm } 3514209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3515209962Smm XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3516209962Smm } 3517209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3518209962Smm XVA_SET_REQ(xvap, XAT_NODUMP); 3519209962Smm } 3520209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3521209962Smm XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3522209962Smm } 3523209962Smm if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3524209962Smm XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3525209962Smm } 3526209962Smm 3527219089Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3528185029Spjd ASSERT(vp->v_type == VREG); 3529185029Spjd 3530219089Spjd zfs_xvattr_set(zp, xvap, tx); 3531185029Spjd } 3532185029Spjd 3533209962Smm if (fuid_dirtied) 3534209962Smm zfs_fuid_sync(zfsvfs, tx); 3535209962Smm 3536168404Spjd if (mask != 0) 3537185029Spjd zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3538168404Spjd 3539168404Spjd mutex_exit(&zp->z_lock); 3540219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3541219089Spjd mutex_exit(&zp->z_acl_lock); 3542168404Spjd 3543219089Spjd if (attrzp) { 3544219089Spjd if (mask & (AT_UID|AT_GID|AT_MODE)) 3545219089Spjd mutex_exit(&attrzp->z_acl_lock); 3546219089Spjd mutex_exit(&attrzp->z_lock); 3547219089Spjd } 3548209962Smmout: 3549219089Spjd if (err == 0 && attrzp) { 3550219089Spjd err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 3551219089Spjd xattr_count, tx); 3552219089Spjd ASSERT(err2 == 0); 3553219089Spjd } 3554219089Spjd 3555168404Spjd if (attrzp) 3556168404Spjd VN_RELE(ZTOV(attrzp)); 3557251631Sdelphij 3558211932Smm if (aclp) 3559209962Smm zfs_acl_free(aclp); 3560168404Spjd 3561209962Smm if (fuidp) { 3562209962Smm zfs_fuid_info_free(fuidp); 3563209962Smm fuidp = NULL; 3564209962Smm } 3565209962Smm 3566219089Spjd if (err) { 3567209962Smm dmu_tx_abort(tx); 3568219089Spjd if (err == ERESTART) 3569219089Spjd goto top; 3570219089Spjd } else { 3571219089Spjd err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 3572209962Smm dmu_tx_commit(tx); 3573219089Spjd } 3574209962Smm 3575219089Spjdout2: 3576219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3577219089Spjd zil_commit(zilog, 0); 3578209962Smm 3579168404Spjd ZFS_EXIT(zfsvfs); 3580168404Spjd return (err); 3581168404Spjd} 3582168404Spjd 3583168404Spjdtypedef struct zfs_zlock { 3584168404Spjd krwlock_t *zl_rwlock; /* lock we acquired */ 3585168404Spjd znode_t *zl_znode; /* znode we held */ 3586168404Spjd struct zfs_zlock *zl_next; /* next in list */ 3587168404Spjd} zfs_zlock_t; 3588168404Spjd 3589168404Spjd/* 3590168404Spjd * Drop locks and release vnodes that were held by zfs_rename_lock(). 3591168404Spjd */ 3592168404Spjdstatic void 3593168404Spjdzfs_rename_unlock(zfs_zlock_t **zlpp) 3594168404Spjd{ 3595168404Spjd zfs_zlock_t *zl; 3596168404Spjd 3597168404Spjd while ((zl = *zlpp) != NULL) { 3598168404Spjd if (zl->zl_znode != NULL) 3599168404Spjd VN_RELE(ZTOV(zl->zl_znode)); 3600168404Spjd rw_exit(zl->zl_rwlock); 3601168404Spjd *zlpp = zl->zl_next; 3602168404Spjd kmem_free(zl, sizeof (*zl)); 3603168404Spjd } 3604168404Spjd} 3605168404Spjd 3606168404Spjd/* 3607168404Spjd * Search back through the directory tree, using the ".." entries. 3608168404Spjd * Lock each directory in the chain to prevent concurrent renames. 3609168404Spjd * Fail any attempt to move a directory into one of its own descendants. 3610168404Spjd * XXX - z_parent_lock can overlap with map or grow locks 3611168404Spjd */ 3612168404Spjdstatic int 3613168404Spjdzfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 3614168404Spjd{ 3615168404Spjd zfs_zlock_t *zl; 3616168404Spjd znode_t *zp = tdzp; 3617168404Spjd uint64_t rootid = zp->z_zfsvfs->z_root; 3618219089Spjd uint64_t oidp = zp->z_id; 3619168404Spjd krwlock_t *rwlp = &szp->z_parent_lock; 3620168404Spjd krw_t rw = RW_WRITER; 3621168404Spjd 3622168404Spjd /* 3623168404Spjd * First pass write-locks szp and compares to zp->z_id. 3624168404Spjd * Later passes read-lock zp and compare to zp->z_parent. 3625168404Spjd */ 3626168404Spjd do { 3627168404Spjd if (!rw_tryenter(rwlp, rw)) { 3628168404Spjd /* 3629168404Spjd * Another thread is renaming in this path. 3630168404Spjd * Note that if we are a WRITER, we don't have any 3631168404Spjd * parent_locks held yet. 3632168404Spjd */ 3633168404Spjd if (rw == RW_READER && zp->z_id > szp->z_id) { 3634168404Spjd /* 3635168404Spjd * Drop our locks and restart 3636168404Spjd */ 3637168404Spjd zfs_rename_unlock(&zl); 3638168404Spjd *zlpp = NULL; 3639168404Spjd zp = tdzp; 3640219089Spjd oidp = zp->z_id; 3641168404Spjd rwlp = &szp->z_parent_lock; 3642168404Spjd rw = RW_WRITER; 3643168404Spjd continue; 3644168404Spjd } else { 3645168404Spjd /* 3646168404Spjd * Wait for other thread to drop its locks 3647168404Spjd */ 3648168404Spjd rw_enter(rwlp, rw); 3649168404Spjd } 3650168404Spjd } 3651168404Spjd 3652168404Spjd zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3653168404Spjd zl->zl_rwlock = rwlp; 3654168404Spjd zl->zl_znode = NULL; 3655168404Spjd zl->zl_next = *zlpp; 3656168404Spjd *zlpp = zl; 3657168404Spjd 3658219089Spjd if (oidp == szp->z_id) /* We're a descendant of szp */ 3659249195Smm return (SET_ERROR(EINVAL)); 3660168404Spjd 3661219089Spjd if (oidp == rootid) /* We've hit the top */ 3662168404Spjd return (0); 3663168404Spjd 3664168404Spjd if (rw == RW_READER) { /* i.e. not the first pass */ 3665219089Spjd int error = zfs_zget(zp->z_zfsvfs, oidp, &zp); 3666168404Spjd if (error) 3667168404Spjd return (error); 3668168404Spjd zl->zl_znode = zp; 3669168404Spjd } 3670219089Spjd (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), 3671219089Spjd &oidp, sizeof (oidp)); 3672168404Spjd rwlp = &zp->z_parent_lock; 3673168404Spjd rw = RW_READER; 3674168404Spjd 3675168404Spjd } while (zp->z_id != sdzp->z_id); 3676168404Spjd 3677168404Spjd return (0); 3678168404Spjd} 3679168404Spjd 3680168404Spjd/* 3681168404Spjd * Move an entry from the provided source directory to the target 3682168404Spjd * directory. Change the entry name as indicated. 3683168404Spjd * 3684168404Spjd * IN: sdvp - Source directory containing the "old entry". 3685168404Spjd * snm - Old entry name. 3686168404Spjd * tdvp - Target directory to contain the "new entry". 3687168404Spjd * tnm - New entry name. 3688168404Spjd * cr - credentials of caller. 3689185029Spjd * ct - caller context 3690185029Spjd * flags - case flags 3691168404Spjd * 3692251631Sdelphij * RETURN: 0 on success, error code on failure. 3693168404Spjd * 3694168404Spjd * Timestamps: 3695168404Spjd * sdvp,tdvp - ctime|mtime updated 3696168404Spjd */ 3697185029Spjd/*ARGSUSED*/ 3698168404Spjdstatic int 3699185029Spjdzfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3700185029Spjd caller_context_t *ct, int flags) 3701168404Spjd{ 3702168404Spjd znode_t *tdzp, *szp, *tzp; 3703168404Spjd znode_t *sdzp = VTOZ(sdvp); 3704168404Spjd zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 3705185029Spjd zilog_t *zilog; 3706168962Spjd vnode_t *realvp; 3707168404Spjd zfs_dirlock_t *sdl, *tdl; 3708168404Spjd dmu_tx_t *tx; 3709168404Spjd zfs_zlock_t *zl; 3710185029Spjd int cmp, serr, terr; 3711185029Spjd int error = 0; 3712185029Spjd int zflg = 0; 3713168404Spjd 3714168404Spjd ZFS_ENTER(zfsvfs); 3715185029Spjd ZFS_VERIFY_ZP(sdzp); 3716185029Spjd zilog = zfsvfs->z_log; 3717168404Spjd 3718168962Spjd /* 3719168962Spjd * Make sure we have the real vp for the target directory. 3720168962Spjd */ 3721185029Spjd if (VOP_REALVP(tdvp, &realvp, ct) == 0) 3722168962Spjd tdvp = realvp; 3723168962Spjd 3724212694Smm if (tdvp->v_vfsp != sdvp->v_vfsp || zfsctl_is_node(tdvp)) { 3725168404Spjd ZFS_EXIT(zfsvfs); 3726249195Smm return (SET_ERROR(EXDEV)); 3727168404Spjd } 3728168404Spjd 3729168404Spjd tdzp = VTOZ(tdvp); 3730185029Spjd ZFS_VERIFY_ZP(tdzp); 3731185029Spjd if (zfsvfs->z_utf8 && u8_validate(tnm, 3732185029Spjd strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3733185029Spjd ZFS_EXIT(zfsvfs); 3734249195Smm return (SET_ERROR(EILSEQ)); 3735185029Spjd } 3736185029Spjd 3737185029Spjd if (flags & FIGNORECASE) 3738185029Spjd zflg |= ZCILOOK; 3739185029Spjd 3740168404Spjdtop: 3741168404Spjd szp = NULL; 3742168404Spjd tzp = NULL; 3743168404Spjd zl = NULL; 3744168404Spjd 3745168404Spjd /* 3746168404Spjd * This is to prevent the creation of links into attribute space 3747168404Spjd * by renaming a linked file into/outof an attribute directory. 3748168404Spjd * See the comment in zfs_link() for why this is considered bad. 3749168404Spjd */ 3750219089Spjd if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3751168962Spjd ZFS_EXIT(zfsvfs); 3752249195Smm return (SET_ERROR(EINVAL)); 3753168404Spjd } 3754168404Spjd 3755168404Spjd /* 3756168404Spjd * Lock source and target directory entries. To prevent deadlock, 3757168404Spjd * a lock ordering must be defined. We lock the directory with 3758168404Spjd * the smallest object id first, or if it's a tie, the one with 3759168404Spjd * the lexically first name. 3760168404Spjd */ 3761168404Spjd if (sdzp->z_id < tdzp->z_id) { 3762168962Spjd cmp = -1; 3763168962Spjd } else if (sdzp->z_id > tdzp->z_id) { 3764168962Spjd cmp = 1; 3765168962Spjd } else { 3766185029Spjd /* 3767185029Spjd * First compare the two name arguments without 3768185029Spjd * considering any case folding. 3769185029Spjd */ 3770185029Spjd int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3771185029Spjd 3772185029Spjd cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3773185029Spjd ASSERT(error == 0 || !zfsvfs->z_utf8); 3774168962Spjd if (cmp == 0) { 3775168962Spjd /* 3776168962Spjd * POSIX: "If the old argument and the new argument 3777168962Spjd * both refer to links to the same existing file, 3778168962Spjd * the rename() function shall return successfully 3779168962Spjd * and perform no other action." 3780168962Spjd */ 3781168962Spjd ZFS_EXIT(zfsvfs); 3782168962Spjd return (0); 3783168962Spjd } 3784185029Spjd /* 3785185029Spjd * If the file system is case-folding, then we may 3786185029Spjd * have some more checking to do. A case-folding file 3787185029Spjd * system is either supporting mixed case sensitivity 3788185029Spjd * access or is completely case-insensitive. Note 3789185029Spjd * that the file system is always case preserving. 3790185029Spjd * 3791185029Spjd * In mixed sensitivity mode case sensitive behavior 3792185029Spjd * is the default. FIGNORECASE must be used to 3793185029Spjd * explicitly request case insensitive behavior. 3794185029Spjd * 3795185029Spjd * If the source and target names provided differ only 3796185029Spjd * by case (e.g., a request to rename 'tim' to 'Tim'), 3797185029Spjd * we will treat this as a special case in the 3798185029Spjd * case-insensitive mode: as long as the source name 3799185029Spjd * is an exact match, we will allow this to proceed as 3800185029Spjd * a name-change request. 3801185029Spjd */ 3802185029Spjd if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3803185029Spjd (zfsvfs->z_case == ZFS_CASE_MIXED && 3804185029Spjd flags & FIGNORECASE)) && 3805185029Spjd u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3806185029Spjd &error) == 0) { 3807185029Spjd /* 3808185029Spjd * case preserving rename request, require exact 3809185029Spjd * name matches 3810185029Spjd */ 3811185029Spjd zflg |= ZCIEXACT; 3812185029Spjd zflg &= ~ZCILOOK; 3813185029Spjd } 3814168962Spjd } 3815185029Spjd 3816208131Smm /* 3817208131Smm * If the source and destination directories are the same, we should 3818208131Smm * grab the z_name_lock of that directory only once. 3819208131Smm */ 3820208131Smm if (sdzp == tdzp) { 3821208131Smm zflg |= ZHAVELOCK; 3822208131Smm rw_enter(&sdzp->z_name_lock, RW_READER); 3823208131Smm } 3824208131Smm 3825168962Spjd if (cmp < 0) { 3826185029Spjd serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3827185029Spjd ZEXISTS | zflg, NULL, NULL); 3828185029Spjd terr = zfs_dirent_lock(&tdl, 3829185029Spjd tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3830168962Spjd } else { 3831185029Spjd terr = zfs_dirent_lock(&tdl, 3832185029Spjd tdzp, tnm, &tzp, zflg, NULL, NULL); 3833185029Spjd serr = zfs_dirent_lock(&sdl, 3834185029Spjd sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3835185029Spjd NULL, NULL); 3836168404Spjd } 3837168404Spjd 3838168962Spjd if (serr) { 3839168404Spjd /* 3840168404Spjd * Source entry invalid or not there. 3841168404Spjd */ 3842168962Spjd if (!terr) { 3843168404Spjd zfs_dirent_unlock(tdl); 3844168962Spjd if (tzp) 3845168962Spjd VN_RELE(ZTOV(tzp)); 3846168962Spjd } 3847208131Smm 3848208131Smm if (sdzp == tdzp) 3849208131Smm rw_exit(&sdzp->z_name_lock); 3850208131Smm 3851219089Spjd /* 3852219089Spjd * FreeBSD: In OpenSolaris they only check if rename source is 3853219089Spjd * ".." here, because "." is handled in their lookup. This is 3854219089Spjd * not the case for FreeBSD, so we check for "." explicitly. 3855219089Spjd */ 3856168404Spjd if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0) 3857249195Smm serr = SET_ERROR(EINVAL); 3858168962Spjd ZFS_EXIT(zfsvfs); 3859168962Spjd return (serr); 3860168404Spjd } 3861168404Spjd if (terr) { 3862168404Spjd zfs_dirent_unlock(sdl); 3863168962Spjd VN_RELE(ZTOV(szp)); 3864208131Smm 3865208131Smm if (sdzp == tdzp) 3866208131Smm rw_exit(&sdzp->z_name_lock); 3867208131Smm 3868168404Spjd if (strcmp(tnm, "..") == 0) 3869249195Smm terr = SET_ERROR(EINVAL); 3870168962Spjd ZFS_EXIT(zfsvfs); 3871168962Spjd return (terr); 3872168404Spjd } 3873168404Spjd 3874168404Spjd /* 3875168404Spjd * Must have write access at the source to remove the old entry 3876168404Spjd * and write access at the target to create the new entry. 3877168404Spjd * Note that if target and source are the same, this can be 3878168404Spjd * done in a single check. 3879168404Spjd */ 3880168404Spjd 3881168404Spjd if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3882168404Spjd goto out; 3883168404Spjd 3884168962Spjd if (ZTOV(szp)->v_type == VDIR) { 3885168404Spjd /* 3886168404Spjd * Check to make sure rename is valid. 3887168404Spjd * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3888168404Spjd */ 3889168404Spjd if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3890168404Spjd goto out; 3891168404Spjd } 3892168404Spjd 3893168404Spjd /* 3894168404Spjd * Does target exist? 3895168404Spjd */ 3896168404Spjd if (tzp) { 3897168404Spjd /* 3898168404Spjd * Source and target must be the same type. 3899168404Spjd */ 3900168962Spjd if (ZTOV(szp)->v_type == VDIR) { 3901168962Spjd if (ZTOV(tzp)->v_type != VDIR) { 3902249195Smm error = SET_ERROR(ENOTDIR); 3903168404Spjd goto out; 3904168404Spjd } 3905168404Spjd } else { 3906168962Spjd if (ZTOV(tzp)->v_type == VDIR) { 3907249195Smm error = SET_ERROR(EISDIR); 3908168404Spjd goto out; 3909168404Spjd } 3910168404Spjd } 3911168404Spjd /* 3912168404Spjd * POSIX dictates that when the source and target 3913168404Spjd * entries refer to the same file object, rename 3914168404Spjd * must do nothing and exit without error. 3915168404Spjd */ 3916168404Spjd if (szp->z_id == tzp->z_id) { 3917168404Spjd error = 0; 3918168404Spjd goto out; 3919168404Spjd } 3920168404Spjd } 3921168404Spjd 3922185029Spjd vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 3923168962Spjd if (tzp) 3924185029Spjd vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3925168962Spjd 3926185029Spjd /* 3927185029Spjd * notify the target directory if it is not the same 3928185029Spjd * as source directory. 3929185029Spjd */ 3930185029Spjd if (tdvp != sdvp) { 3931185029Spjd vnevent_rename_dest_dir(tdvp, ct); 3932185029Spjd } 3933185029Spjd 3934168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 3935219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3936219089Spjd dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3937168404Spjd dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3938168404Spjd dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3939219089Spjd if (sdzp != tdzp) { 3940219089Spjd dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 3941219089Spjd zfs_sa_upgrade_txholds(tx, tdzp); 3942219089Spjd } 3943219089Spjd if (tzp) { 3944219089Spjd dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 3945219089Spjd zfs_sa_upgrade_txholds(tx, tzp); 3946219089Spjd } 3947219089Spjd 3948219089Spjd zfs_sa_upgrade_txholds(tx, szp); 3949168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3950209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 3951168404Spjd if (error) { 3952168404Spjd if (zl != NULL) 3953168404Spjd zfs_rename_unlock(&zl); 3954168404Spjd zfs_dirent_unlock(sdl); 3955168404Spjd zfs_dirent_unlock(tdl); 3956208131Smm 3957208131Smm if (sdzp == tdzp) 3958208131Smm rw_exit(&sdzp->z_name_lock); 3959208131Smm 3960168962Spjd VN_RELE(ZTOV(szp)); 3961168962Spjd if (tzp) 3962168962Spjd VN_RELE(ZTOV(tzp)); 3963209962Smm if (error == ERESTART) { 3964168404Spjd dmu_tx_wait(tx); 3965168404Spjd dmu_tx_abort(tx); 3966168404Spjd goto top; 3967168404Spjd } 3968168404Spjd dmu_tx_abort(tx); 3969168962Spjd ZFS_EXIT(zfsvfs); 3970168962Spjd return (error); 3971168404Spjd } 3972168404Spjd 3973168404Spjd if (tzp) /* Attempt to remove the existing target */ 3974185029Spjd error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 3975168404Spjd 3976168404Spjd if (error == 0) { 3977168404Spjd error = zfs_link_create(tdl, szp, tx, ZRENAMING); 3978168404Spjd if (error == 0) { 3979219089Spjd szp->z_pflags |= ZFS_AV_MODIFIED; 3980185029Spjd 3981219089Spjd error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 3982219089Spjd (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3983240415Smm ASSERT0(error); 3984219089Spjd 3985168404Spjd error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 3986219089Spjd if (error == 0) { 3987219089Spjd zfs_log_rename(zilog, tx, TX_RENAME | 3988219089Spjd (flags & FIGNORECASE ? TX_CI : 0), sdzp, 3989219089Spjd sdl->dl_name, tdzp, tdl->dl_name, szp); 3990185029Spjd 3991219089Spjd /* 3992219089Spjd * Update path information for the target vnode 3993219089Spjd */ 3994219089Spjd vn_renamepath(tdvp, ZTOV(szp), tnm, 3995219089Spjd strlen(tnm)); 3996219089Spjd } else { 3997219089Spjd /* 3998219089Spjd * At this point, we have successfully created 3999219089Spjd * the target name, but have failed to remove 4000219089Spjd * the source name. Since the create was done 4001219089Spjd * with the ZRENAMING flag, there are 4002219089Spjd * complications; for one, the link count is 4003219089Spjd * wrong. The easiest way to deal with this 4004219089Spjd * is to remove the newly created target, and 4005219089Spjd * return the original error. This must 4006219089Spjd * succeed; fortunately, it is very unlikely to 4007219089Spjd * fail, since we just created it. 4008219089Spjd */ 4009219089Spjd VERIFY3U(zfs_link_destroy(tdl, szp, tx, 4010219089Spjd ZRENAMING, NULL), ==, 0); 4011219089Spjd } 4012168404Spjd } 4013168404Spjd#ifdef FREEBSD_NAMECACHE 4014168404Spjd if (error == 0) { 4015168404Spjd cache_purge(sdvp); 4016168404Spjd cache_purge(tdvp); 4017240829Spjd cache_purge(ZTOV(szp)); 4018240829Spjd if (tzp) 4019240829Spjd cache_purge(ZTOV(tzp)); 4020168404Spjd } 4021168404Spjd#endif 4022168404Spjd } 4023168404Spjd 4024168404Spjd dmu_tx_commit(tx); 4025168404Spjdout: 4026168404Spjd if (zl != NULL) 4027168404Spjd zfs_rename_unlock(&zl); 4028168404Spjd 4029168404Spjd zfs_dirent_unlock(sdl); 4030168404Spjd zfs_dirent_unlock(tdl); 4031168404Spjd 4032208131Smm if (sdzp == tdzp) 4033208131Smm rw_exit(&sdzp->z_name_lock); 4034208131Smm 4035219089Spjd 4036168962Spjd VN_RELE(ZTOV(szp)); 4037168404Spjd if (tzp) 4038168962Spjd VN_RELE(ZTOV(tzp)); 4039168404Spjd 4040219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4041219089Spjd zil_commit(zilog, 0); 4042219089Spjd 4043168404Spjd ZFS_EXIT(zfsvfs); 4044168404Spjd 4045168404Spjd return (error); 4046168404Spjd} 4047168404Spjd 4048168404Spjd/* 4049168404Spjd * Insert the indicated symbolic reference entry into the directory. 4050168404Spjd * 4051168404Spjd * IN: dvp - Directory to contain new symbolic link. 4052168404Spjd * link - Name for new symlink entry. 4053168404Spjd * vap - Attributes of new entry. 4054168404Spjd * cr - credentials of caller. 4055185029Spjd * ct - caller context 4056185029Spjd * flags - case flags 4057168404Spjd * 4058251631Sdelphij * RETURN: 0 on success, error code on failure. 4059168404Spjd * 4060168404Spjd * Timestamps: 4061168404Spjd * dvp - ctime|mtime updated 4062168404Spjd */ 4063185029Spjd/*ARGSUSED*/ 4064168404Spjdstatic int 4065185029Spjdzfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 4066185029Spjd cred_t *cr, kthread_t *td) 4067168404Spjd{ 4068168404Spjd znode_t *zp, *dzp = VTOZ(dvp); 4069168404Spjd zfs_dirlock_t *dl; 4070168404Spjd dmu_tx_t *tx; 4071168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4072185029Spjd zilog_t *zilog; 4073219089Spjd uint64_t len = strlen(link); 4074168404Spjd int error; 4075185029Spjd int zflg = ZNEW; 4076209962Smm zfs_acl_ids_t acl_ids; 4077209962Smm boolean_t fuid_dirtied; 4078219089Spjd uint64_t txtype = TX_SYMLINK; 4079185029Spjd int flags = 0; 4080168404Spjd 4081168962Spjd ASSERT(vap->va_type == VLNK); 4082168404Spjd 4083168404Spjd ZFS_ENTER(zfsvfs); 4084185029Spjd ZFS_VERIFY_ZP(dzp); 4085185029Spjd zilog = zfsvfs->z_log; 4086185029Spjd 4087185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 4088185029Spjd NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4089185029Spjd ZFS_EXIT(zfsvfs); 4090249195Smm return (SET_ERROR(EILSEQ)); 4091185029Spjd } 4092185029Spjd if (flags & FIGNORECASE) 4093185029Spjd zflg |= ZCILOOK; 4094168404Spjd 4095168404Spjd if (len > MAXPATHLEN) { 4096168404Spjd ZFS_EXIT(zfsvfs); 4097249195Smm return (SET_ERROR(ENAMETOOLONG)); 4098168404Spjd } 4099168404Spjd 4100219089Spjd if ((error = zfs_acl_ids_create(dzp, 0, 4101219089Spjd vap, cr, NULL, &acl_ids)) != 0) { 4102219089Spjd ZFS_EXIT(zfsvfs); 4103219089Spjd return (error); 4104219089Spjd } 4105219089Spjdtop: 4106168404Spjd /* 4107168404Spjd * Attempt to lock directory; fail if entry already exists. 4108168404Spjd */ 4109185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 4110185029Spjd if (error) { 4111219089Spjd zfs_acl_ids_free(&acl_ids); 4112168404Spjd ZFS_EXIT(zfsvfs); 4113168404Spjd return (error); 4114168404Spjd } 4115168404Spjd 4116219089Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4117219089Spjd zfs_acl_ids_free(&acl_ids); 4118219089Spjd zfs_dirent_unlock(dl); 4119219089Spjd ZFS_EXIT(zfsvfs); 4120219089Spjd return (error); 4121219089Spjd } 4122219089Spjd 4123209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 4124209962Smm zfs_acl_ids_free(&acl_ids); 4125209962Smm zfs_dirent_unlock(dl); 4126209962Smm ZFS_EXIT(zfsvfs); 4127249195Smm return (SET_ERROR(EDQUOT)); 4128209962Smm } 4129168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4130209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 4131168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 4132168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4133219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 4134219089Spjd ZFS_SA_BASE_ATTR_SIZE + len); 4135219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 4136219089Spjd if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 4137219089Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 4138219089Spjd acl_ids.z_aclp->z_acl_bytes); 4139219089Spjd } 4140209962Smm if (fuid_dirtied) 4141209962Smm zfs_fuid_txhold(zfsvfs, tx); 4142209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 4143168404Spjd if (error) { 4144168404Spjd zfs_dirent_unlock(dl); 4145209962Smm if (error == ERESTART) { 4146168404Spjd dmu_tx_wait(tx); 4147168404Spjd dmu_tx_abort(tx); 4148168404Spjd goto top; 4149168404Spjd } 4150219089Spjd zfs_acl_ids_free(&acl_ids); 4151168404Spjd dmu_tx_abort(tx); 4152168404Spjd ZFS_EXIT(zfsvfs); 4153168404Spjd return (error); 4154168404Spjd } 4155168404Spjd 4156168404Spjd /* 4157168404Spjd * Create a new object for the symlink. 4158219089Spjd * for version 4 ZPL datsets the symlink will be an SA attribute 4159168404Spjd */ 4160219089Spjd zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 4161168404Spjd 4162219089Spjd if (fuid_dirtied) 4163219089Spjd zfs_fuid_sync(zfsvfs, tx); 4164209962Smm 4165219089Spjd mutex_enter(&zp->z_lock); 4166219089Spjd if (zp->z_is_sa) 4167219089Spjd error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 4168219089Spjd link, len, tx); 4169219089Spjd else 4170219089Spjd zfs_sa_symlink(zp, link, len, tx); 4171219089Spjd mutex_exit(&zp->z_lock); 4172168404Spjd 4173219089Spjd zp->z_size = len; 4174219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 4175219089Spjd &zp->z_size, sizeof (zp->z_size), tx); 4176168404Spjd /* 4177168404Spjd * Insert the new object into the directory. 4178168404Spjd */ 4179168404Spjd (void) zfs_link_create(dl, zp, tx, ZNEW); 4180168404Spjd 4181219089Spjd if (flags & FIGNORECASE) 4182219089Spjd txtype |= TX_CI; 4183219089Spjd zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 4184219089Spjd *vpp = ZTOV(zp); 4185219089Spjd 4186209962Smm zfs_acl_ids_free(&acl_ids); 4187209962Smm 4188168404Spjd dmu_tx_commit(tx); 4189168404Spjd 4190168404Spjd zfs_dirent_unlock(dl); 4191168404Spjd 4192219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4193219089Spjd zil_commit(zilog, 0); 4194219089Spjd 4195168404Spjd ZFS_EXIT(zfsvfs); 4196168404Spjd return (error); 4197168404Spjd} 4198168404Spjd 4199168404Spjd/* 4200168404Spjd * Return, in the buffer contained in the provided uio structure, 4201168404Spjd * the symbolic path referred to by vp. 4202168404Spjd * 4203168404Spjd * IN: vp - vnode of symbolic link. 4204251631Sdelphij * uio - structure to contain the link path. 4205168404Spjd * cr - credentials of caller. 4206185029Spjd * ct - caller context 4207168404Spjd * 4208251631Sdelphij * OUT: uio - structure containing the link path. 4209168404Spjd * 4210251631Sdelphij * RETURN: 0 on success, error code on failure. 4211168404Spjd * 4212168404Spjd * Timestamps: 4213168404Spjd * vp - atime updated 4214168404Spjd */ 4215168404Spjd/* ARGSUSED */ 4216168404Spjdstatic int 4217185029Spjdzfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 4218168404Spjd{ 4219168404Spjd znode_t *zp = VTOZ(vp); 4220168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4221168404Spjd int error; 4222168404Spjd 4223168404Spjd ZFS_ENTER(zfsvfs); 4224185029Spjd ZFS_VERIFY_ZP(zp); 4225168404Spjd 4226219089Spjd mutex_enter(&zp->z_lock); 4227219089Spjd if (zp->z_is_sa) 4228219089Spjd error = sa_lookup_uio(zp->z_sa_hdl, 4229219089Spjd SA_ZPL_SYMLINK(zfsvfs), uio); 4230219089Spjd else 4231219089Spjd error = zfs_sa_readlink(zp, uio); 4232219089Spjd mutex_exit(&zp->z_lock); 4233168404Spjd 4234168404Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4235219089Spjd 4236168404Spjd ZFS_EXIT(zfsvfs); 4237168404Spjd return (error); 4238168404Spjd} 4239168404Spjd 4240168404Spjd/* 4241168404Spjd * Insert a new entry into directory tdvp referencing svp. 4242168404Spjd * 4243168404Spjd * IN: tdvp - Directory to contain new entry. 4244168404Spjd * svp - vnode of new entry. 4245168404Spjd * name - name of new entry. 4246168404Spjd * cr - credentials of caller. 4247185029Spjd * ct - caller context 4248168404Spjd * 4249251631Sdelphij * RETURN: 0 on success, error code on failure. 4250168404Spjd * 4251168404Spjd * Timestamps: 4252168404Spjd * tdvp - ctime|mtime updated 4253168404Spjd * svp - ctime updated 4254168404Spjd */ 4255168404Spjd/* ARGSUSED */ 4256168404Spjdstatic int 4257185029Spjdzfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4258185029Spjd caller_context_t *ct, int flags) 4259168404Spjd{ 4260168404Spjd znode_t *dzp = VTOZ(tdvp); 4261168404Spjd znode_t *tzp, *szp; 4262168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4263185029Spjd zilog_t *zilog; 4264168404Spjd zfs_dirlock_t *dl; 4265168404Spjd dmu_tx_t *tx; 4266168962Spjd vnode_t *realvp; 4267168404Spjd int error; 4268185029Spjd int zf = ZNEW; 4269212694Smm uint64_t parent; 4270185029Spjd uid_t owner; 4271168404Spjd 4272168404Spjd ASSERT(tdvp->v_type == VDIR); 4273168404Spjd 4274168404Spjd ZFS_ENTER(zfsvfs); 4275185029Spjd ZFS_VERIFY_ZP(dzp); 4276185029Spjd zilog = zfsvfs->z_log; 4277168404Spjd 4278185029Spjd if (VOP_REALVP(svp, &realvp, ct) == 0) 4279168962Spjd svp = realvp; 4280168962Spjd 4281212694Smm /* 4282212694Smm * POSIX dictates that we return EPERM here. 4283212694Smm * Better choices include ENOTSUP or EISDIR. 4284212694Smm */ 4285212694Smm if (svp->v_type == VDIR) { 4286168404Spjd ZFS_EXIT(zfsvfs); 4287249195Smm return (SET_ERROR(EPERM)); 4288212694Smm } 4289212694Smm 4290212694Smm if (svp->v_vfsp != tdvp->v_vfsp || zfsctl_is_node(svp)) { 4291212694Smm ZFS_EXIT(zfsvfs); 4292249195Smm return (SET_ERROR(EXDEV)); 4293168404Spjd } 4294212694Smm 4295185029Spjd szp = VTOZ(svp); 4296185029Spjd ZFS_VERIFY_ZP(szp); 4297168404Spjd 4298212694Smm /* Prevent links to .zfs/shares files */ 4299212694Smm 4300219089Spjd if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4301219089Spjd &parent, sizeof (uint64_t))) != 0) { 4302212694Smm ZFS_EXIT(zfsvfs); 4303219089Spjd return (error); 4304219089Spjd } 4305219089Spjd if (parent == zfsvfs->z_shares_dir) { 4306219089Spjd ZFS_EXIT(zfsvfs); 4307249195Smm return (SET_ERROR(EPERM)); 4308212694Smm } 4309212694Smm 4310185029Spjd if (zfsvfs->z_utf8 && u8_validate(name, 4311185029Spjd strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4312185029Spjd ZFS_EXIT(zfsvfs); 4313249195Smm return (SET_ERROR(EILSEQ)); 4314185029Spjd } 4315185029Spjd if (flags & FIGNORECASE) 4316185029Spjd zf |= ZCILOOK; 4317185029Spjd 4318168404Spjd /* 4319168404Spjd * We do not support links between attributes and non-attributes 4320168404Spjd * because of the potential security risk of creating links 4321168404Spjd * into "normal" file space in order to circumvent restrictions 4322168404Spjd * imposed in attribute space. 4323168404Spjd */ 4324219089Spjd if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4325168404Spjd ZFS_EXIT(zfsvfs); 4326249195Smm return (SET_ERROR(EINVAL)); 4327168404Spjd } 4328168404Spjd 4329168404Spjd 4330219089Spjd owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4331219089Spjd if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { 4332168404Spjd ZFS_EXIT(zfsvfs); 4333249195Smm return (SET_ERROR(EPERM)); 4334168404Spjd } 4335168404Spjd 4336185029Spjd if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4337168404Spjd ZFS_EXIT(zfsvfs); 4338168404Spjd return (error); 4339168404Spjd } 4340168404Spjd 4341212694Smmtop: 4342168404Spjd /* 4343168404Spjd * Attempt to lock directory; fail if entry already exists. 4344168404Spjd */ 4345185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 4346185029Spjd if (error) { 4347168404Spjd ZFS_EXIT(zfsvfs); 4348168404Spjd return (error); 4349168404Spjd } 4350168404Spjd 4351168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 4352219089Spjd dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4353168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4354219089Spjd zfs_sa_upgrade_txholds(tx, szp); 4355219089Spjd zfs_sa_upgrade_txholds(tx, dzp); 4356209962Smm error = dmu_tx_assign(tx, TXG_NOWAIT); 4357168404Spjd if (error) { 4358168404Spjd zfs_dirent_unlock(dl); 4359209962Smm if (error == ERESTART) { 4360168404Spjd dmu_tx_wait(tx); 4361168404Spjd dmu_tx_abort(tx); 4362168404Spjd goto top; 4363168404Spjd } 4364168404Spjd dmu_tx_abort(tx); 4365168404Spjd ZFS_EXIT(zfsvfs); 4366168404Spjd return (error); 4367168404Spjd } 4368168404Spjd 4369168404Spjd error = zfs_link_create(dl, szp, tx, 0); 4370168404Spjd 4371185029Spjd if (error == 0) { 4372185029Spjd uint64_t txtype = TX_LINK; 4373185029Spjd if (flags & FIGNORECASE) 4374185029Spjd txtype |= TX_CI; 4375185029Spjd zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4376185029Spjd } 4377168404Spjd 4378168404Spjd dmu_tx_commit(tx); 4379168404Spjd 4380168404Spjd zfs_dirent_unlock(dl); 4381168404Spjd 4382185029Spjd if (error == 0) { 4383185029Spjd vnevent_link(svp, ct); 4384185029Spjd } 4385185029Spjd 4386219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4387219089Spjd zil_commit(zilog, 0); 4388219089Spjd 4389168404Spjd ZFS_EXIT(zfsvfs); 4390168404Spjd return (error); 4391168404Spjd} 4392168404Spjd 4393219089Spjd#ifdef sun 4394219089Spjd/* 4395219089Spjd * zfs_null_putapage() is used when the file system has been force 4396219089Spjd * unmounted. It just drops the pages. 4397219089Spjd */ 4398219089Spjd/* ARGSUSED */ 4399219089Spjdstatic int 4400219089Spjdzfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4401219089Spjd size_t *lenp, int flags, cred_t *cr) 4402219089Spjd{ 4403219089Spjd pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); 4404219089Spjd return (0); 4405219089Spjd} 4406219089Spjd 4407219089Spjd/* 4408219089Spjd * Push a page out to disk, klustering if possible. 4409219089Spjd * 4410219089Spjd * IN: vp - file to push page to. 4411219089Spjd * pp - page to push. 4412219089Spjd * flags - additional flags. 4413219089Spjd * cr - credentials of caller. 4414219089Spjd * 4415219089Spjd * OUT: offp - start of range pushed. 4416219089Spjd * lenp - len of range pushed. 4417219089Spjd * 4418251631Sdelphij * RETURN: 0 on success, error code on failure. 4419219089Spjd * 4420219089Spjd * NOTE: callers must have locked the page to be pushed. On 4421219089Spjd * exit, the page (and all other pages in the kluster) must be 4422219089Spjd * unlocked. 4423219089Spjd */ 4424219089Spjd/* ARGSUSED */ 4425219089Spjdstatic int 4426219089Spjdzfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4427219089Spjd size_t *lenp, int flags, cred_t *cr) 4428219089Spjd{ 4429219089Spjd znode_t *zp = VTOZ(vp); 4430219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4431219089Spjd dmu_tx_t *tx; 4432219089Spjd u_offset_t off, koff; 4433219089Spjd size_t len, klen; 4434219089Spjd int err; 4435219089Spjd 4436219089Spjd off = pp->p_offset; 4437219089Spjd len = PAGESIZE; 4438219089Spjd /* 4439219089Spjd * If our blocksize is bigger than the page size, try to kluster 4440219089Spjd * multiple pages so that we write a full block (thus avoiding 4441219089Spjd * a read-modify-write). 4442219089Spjd */ 4443219089Spjd if (off < zp->z_size && zp->z_blksz > PAGESIZE) { 4444219089Spjd klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); 4445219089Spjd koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; 4446219089Spjd ASSERT(koff <= zp->z_size); 4447219089Spjd if (koff + klen > zp->z_size) 4448219089Spjd klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE); 4449219089Spjd pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); 4450219089Spjd } 4451219089Spjd ASSERT3U(btop(len), ==, btopr(len)); 4452219089Spjd 4453219089Spjd /* 4454219089Spjd * Can't push pages past end-of-file. 4455219089Spjd */ 4456219089Spjd if (off >= zp->z_size) { 4457219089Spjd /* ignore all pages */ 4458219089Spjd err = 0; 4459219089Spjd goto out; 4460219089Spjd } else if (off + len > zp->z_size) { 4461219089Spjd int npages = btopr(zp->z_size - off); 4462219089Spjd page_t *trunc; 4463219089Spjd 4464219089Spjd page_list_break(&pp, &trunc, npages); 4465219089Spjd /* ignore pages past end of file */ 4466219089Spjd if (trunc) 4467219089Spjd pvn_write_done(trunc, flags); 4468219089Spjd len = zp->z_size - off; 4469219089Spjd } 4470219089Spjd 4471219089Spjd if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 4472219089Spjd zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 4473249195Smm err = SET_ERROR(EDQUOT); 4474219089Spjd goto out; 4475219089Spjd } 4476219089Spjdtop: 4477219089Spjd tx = dmu_tx_create(zfsvfs->z_os); 4478219089Spjd dmu_tx_hold_write(tx, zp->z_id, off, len); 4479219089Spjd 4480219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4481219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4482219089Spjd err = dmu_tx_assign(tx, TXG_NOWAIT); 4483219089Spjd if (err != 0) { 4484219089Spjd if (err == ERESTART) { 4485219089Spjd dmu_tx_wait(tx); 4486219089Spjd dmu_tx_abort(tx); 4487219089Spjd goto top; 4488219089Spjd } 4489219089Spjd dmu_tx_abort(tx); 4490219089Spjd goto out; 4491219089Spjd } 4492219089Spjd 4493219089Spjd if (zp->z_blksz <= PAGESIZE) { 4494219089Spjd caddr_t va = zfs_map_page(pp, S_READ); 4495219089Spjd ASSERT3U(len, <=, PAGESIZE); 4496219089Spjd dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 4497219089Spjd zfs_unmap_page(pp, va); 4498219089Spjd } else { 4499219089Spjd err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 4500219089Spjd } 4501219089Spjd 4502219089Spjd if (err == 0) { 4503219089Spjd uint64_t mtime[2], ctime[2]; 4504219089Spjd sa_bulk_attr_t bulk[3]; 4505219089Spjd int count = 0; 4506219089Spjd 4507219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 4508219089Spjd &mtime, 16); 4509219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 4510219089Spjd &ctime, 16); 4511219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4512219089Spjd &zp->z_pflags, 8); 4513219089Spjd zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 4514219089Spjd B_TRUE); 4515219089Spjd zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 4516219089Spjd } 4517219089Spjd dmu_tx_commit(tx); 4518219089Spjd 4519219089Spjdout: 4520219089Spjd pvn_write_done(pp, (err ? B_ERROR : 0) | flags); 4521219089Spjd if (offp) 4522219089Spjd *offp = off; 4523219089Spjd if (lenp) 4524219089Spjd *lenp = len; 4525219089Spjd 4526219089Spjd return (err); 4527219089Spjd} 4528219089Spjd 4529219089Spjd/* 4530219089Spjd * Copy the portion of the file indicated from pages into the file. 4531219089Spjd * The pages are stored in a page list attached to the files vnode. 4532219089Spjd * 4533219089Spjd * IN: vp - vnode of file to push page data to. 4534219089Spjd * off - position in file to put data. 4535219089Spjd * len - amount of data to write. 4536219089Spjd * flags - flags to control the operation. 4537219089Spjd * cr - credentials of caller. 4538219089Spjd * ct - caller context. 4539219089Spjd * 4540251631Sdelphij * RETURN: 0 on success, error code on failure. 4541219089Spjd * 4542219089Spjd * Timestamps: 4543219089Spjd * vp - ctime|mtime updated 4544219089Spjd */ 4545185029Spjd/*ARGSUSED*/ 4546219089Spjdstatic int 4547219089Spjdzfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 4548219089Spjd caller_context_t *ct) 4549219089Spjd{ 4550219089Spjd znode_t *zp = VTOZ(vp); 4551219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4552219089Spjd page_t *pp; 4553219089Spjd size_t io_len; 4554219089Spjd u_offset_t io_off; 4555219089Spjd uint_t blksz; 4556219089Spjd rl_t *rl; 4557219089Spjd int error = 0; 4558219089Spjd 4559219089Spjd ZFS_ENTER(zfsvfs); 4560219089Spjd ZFS_VERIFY_ZP(zp); 4561219089Spjd 4562219089Spjd /* 4563219089Spjd * Align this request to the file block size in case we kluster. 4564219089Spjd * XXX - this can result in pretty aggresive locking, which can 4565219089Spjd * impact simultanious read/write access. One option might be 4566219089Spjd * to break up long requests (len == 0) into block-by-block 4567219089Spjd * operations to get narrower locking. 4568219089Spjd */ 4569219089Spjd blksz = zp->z_blksz; 4570219089Spjd if (ISP2(blksz)) 4571219089Spjd io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); 4572219089Spjd else 4573219089Spjd io_off = 0; 4574219089Spjd if (len > 0 && ISP2(blksz)) 4575219089Spjd io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); 4576219089Spjd else 4577219089Spjd io_len = 0; 4578219089Spjd 4579219089Spjd if (io_len == 0) { 4580219089Spjd /* 4581219089Spjd * Search the entire vp list for pages >= io_off. 4582219089Spjd */ 4583219089Spjd rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); 4584219089Spjd error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); 4585219089Spjd goto out; 4586219089Spjd } 4587219089Spjd rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); 4588219089Spjd 4589219089Spjd if (off > zp->z_size) { 4590219089Spjd /* past end of file */ 4591219089Spjd zfs_range_unlock(rl); 4592219089Spjd ZFS_EXIT(zfsvfs); 4593219089Spjd return (0); 4594219089Spjd } 4595219089Spjd 4596219089Spjd len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); 4597219089Spjd 4598219089Spjd for (off = io_off; io_off < off + len; io_off += io_len) { 4599219089Spjd if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 4600219089Spjd pp = page_lookup(vp, io_off, 4601219089Spjd (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); 4602219089Spjd } else { 4603219089Spjd pp = page_lookup_nowait(vp, io_off, 4604219089Spjd (flags & B_FREE) ? SE_EXCL : SE_SHARED); 4605219089Spjd } 4606219089Spjd 4607219089Spjd if (pp != NULL && pvn_getdirty(pp, flags)) { 4608219089Spjd int err; 4609219089Spjd 4610219089Spjd /* 4611219089Spjd * Found a dirty page to push 4612219089Spjd */ 4613219089Spjd err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); 4614219089Spjd if (err) 4615219089Spjd error = err; 4616219089Spjd } else { 4617219089Spjd io_len = PAGESIZE; 4618219089Spjd } 4619219089Spjd } 4620219089Spjdout: 4621219089Spjd zfs_range_unlock(rl); 4622219089Spjd if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4623219089Spjd zil_commit(zfsvfs->z_log, zp->z_id); 4624219089Spjd ZFS_EXIT(zfsvfs); 4625219089Spjd return (error); 4626219089Spjd} 4627219089Spjd#endif /* sun */ 4628219089Spjd 4629219089Spjd/*ARGSUSED*/ 4630168962Spjdvoid 4631185029Spjdzfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4632168404Spjd{ 4633168962Spjd znode_t *zp = VTOZ(vp); 4634168962Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4635168962Spjd int error; 4636168404Spjd 4637185029Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4638219089Spjd if (zp->z_sa_hdl == NULL) { 4639185029Spjd /* 4640185029Spjd * The fs has been unmounted, or we did a 4641185029Spjd * suspend/resume and this file no longer exists. 4642185029Spjd */ 4643243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 4644234607Strasz vrecycle(vp); 4645243520Savg return; 4646243520Savg } 4647243520Savg 4648243520Savg mutex_enter(&zp->z_lock); 4649243520Savg if (zp->z_unlinked) { 4650243520Savg /* 4651243520Savg * Fast path to recycle a vnode of a removed file. 4652243520Savg */ 4653243520Savg mutex_exit(&zp->z_lock); 4654185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4655243520Savg vrecycle(vp); 4656168962Spjd return; 4657168404Spjd } 4658243520Savg mutex_exit(&zp->z_lock); 4659168404Spjd 4660168404Spjd if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4661168404Spjd dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4662168404Spjd 4663219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4664219089Spjd zfs_sa_upgrade_txholds(tx, zp); 4665168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 4666168404Spjd if (error) { 4667168404Spjd dmu_tx_abort(tx); 4668168404Spjd } else { 4669168404Spjd mutex_enter(&zp->z_lock); 4670219089Spjd (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 4671219089Spjd (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4672168404Spjd zp->z_atime_dirty = 0; 4673168404Spjd mutex_exit(&zp->z_lock); 4674168404Spjd dmu_tx_commit(tx); 4675168404Spjd } 4676168404Spjd } 4677185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 4678168404Spjd} 4679168404Spjd 4680219089Spjd#ifdef sun 4681219089Spjd/* 4682219089Spjd * Bounds-check the seek operation. 4683219089Spjd * 4684219089Spjd * IN: vp - vnode seeking within 4685219089Spjd * ooff - old file offset 4686219089Spjd * noffp - pointer to new file offset 4687219089Spjd * ct - caller context 4688219089Spjd * 4689251631Sdelphij * RETURN: 0 on success, EINVAL if new offset invalid. 4690219089Spjd */ 4691219089Spjd/* ARGSUSED */ 4692219089Spjdstatic int 4693219089Spjdzfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, 4694219089Spjd caller_context_t *ct) 4695219089Spjd{ 4696219089Spjd if (vp->v_type == VDIR) 4697219089Spjd return (0); 4698219089Spjd return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 4699219089Spjd} 4700219089Spjd 4701219089Spjd/* 4702219089Spjd * Pre-filter the generic locking function to trap attempts to place 4703219089Spjd * a mandatory lock on a memory mapped file. 4704219089Spjd */ 4705219089Spjdstatic int 4706219089Spjdzfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 4707219089Spjd flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 4708219089Spjd{ 4709219089Spjd znode_t *zp = VTOZ(vp); 4710219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4711219089Spjd 4712219089Spjd ZFS_ENTER(zfsvfs); 4713219089Spjd ZFS_VERIFY_ZP(zp); 4714219089Spjd 4715219089Spjd /* 4716219089Spjd * We are following the UFS semantics with respect to mapcnt 4717219089Spjd * here: If we see that the file is mapped already, then we will 4718219089Spjd * return an error, but we don't worry about races between this 4719219089Spjd * function and zfs_map(). 4720219089Spjd */ 4721219089Spjd if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) { 4722219089Spjd ZFS_EXIT(zfsvfs); 4723249195Smm return (SET_ERROR(EAGAIN)); 4724219089Spjd } 4725219089Spjd ZFS_EXIT(zfsvfs); 4726219089Spjd return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 4727219089Spjd} 4728219089Spjd 4729219089Spjd/* 4730219089Spjd * If we can't find a page in the cache, we will create a new page 4731219089Spjd * and fill it with file data. For efficiency, we may try to fill 4732219089Spjd * multiple pages at once (klustering) to fill up the supplied page 4733219089Spjd * list. Note that the pages to be filled are held with an exclusive 4734219089Spjd * lock to prevent access by other threads while they are being filled. 4735219089Spjd */ 4736219089Spjdstatic int 4737219089Spjdzfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, 4738219089Spjd caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) 4739219089Spjd{ 4740219089Spjd znode_t *zp = VTOZ(vp); 4741219089Spjd page_t *pp, *cur_pp; 4742219089Spjd objset_t *os = zp->z_zfsvfs->z_os; 4743219089Spjd u_offset_t io_off, total; 4744219089Spjd size_t io_len; 4745219089Spjd int err; 4746219089Spjd 4747219089Spjd if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { 4748219089Spjd /* 4749219089Spjd * We only have a single page, don't bother klustering 4750219089Spjd */ 4751219089Spjd io_off = off; 4752219089Spjd io_len = PAGESIZE; 4753219089Spjd pp = page_create_va(vp, io_off, io_len, 4754219089Spjd PG_EXCL | PG_WAIT, seg, addr); 4755219089Spjd } else { 4756219089Spjd /* 4757219089Spjd * Try to find enough pages to fill the page list 4758219089Spjd */ 4759219089Spjd pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 4760219089Spjd &io_len, off, plsz, 0); 4761219089Spjd } 4762219089Spjd if (pp == NULL) { 4763219089Spjd /* 4764219089Spjd * The page already exists, nothing to do here. 4765219089Spjd */ 4766219089Spjd *pl = NULL; 4767219089Spjd return (0); 4768219089Spjd } 4769219089Spjd 4770219089Spjd /* 4771219089Spjd * Fill the pages in the kluster. 4772219089Spjd */ 4773219089Spjd cur_pp = pp; 4774219089Spjd for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { 4775219089Spjd caddr_t va; 4776219089Spjd 4777219089Spjd ASSERT3U(io_off, ==, cur_pp->p_offset); 4778219089Spjd va = zfs_map_page(cur_pp, S_WRITE); 4779219089Spjd err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, 4780219089Spjd DMU_READ_PREFETCH); 4781219089Spjd zfs_unmap_page(cur_pp, va); 4782219089Spjd if (err) { 4783219089Spjd /* On error, toss the entire kluster */ 4784219089Spjd pvn_read_done(pp, B_ERROR); 4785219089Spjd /* convert checksum errors into IO errors */ 4786219089Spjd if (err == ECKSUM) 4787249195Smm err = SET_ERROR(EIO); 4788219089Spjd return (err); 4789219089Spjd } 4790219089Spjd cur_pp = cur_pp->p_next; 4791219089Spjd } 4792219089Spjd 4793219089Spjd /* 4794219089Spjd * Fill in the page list array from the kluster starting 4795219089Spjd * from the desired offset `off'. 4796219089Spjd * NOTE: the page list will always be null terminated. 4797219089Spjd */ 4798219089Spjd pvn_plist_init(pp, pl, plsz, off, io_len, rw); 4799219089Spjd ASSERT(pl == NULL || (*pl)->p_offset == off); 4800219089Spjd 4801219089Spjd return (0); 4802219089Spjd} 4803219089Spjd 4804219089Spjd/* 4805219089Spjd * Return pointers to the pages for the file region [off, off + len] 4806219089Spjd * in the pl array. If plsz is greater than len, this function may 4807219089Spjd * also return page pointers from after the specified region 4808219089Spjd * (i.e. the region [off, off + plsz]). These additional pages are 4809219089Spjd * only returned if they are already in the cache, or were created as 4810219089Spjd * part of a klustered read. 4811219089Spjd * 4812219089Spjd * IN: vp - vnode of file to get data from. 4813219089Spjd * off - position in file to get data from. 4814219089Spjd * len - amount of data to retrieve. 4815219089Spjd * plsz - length of provided page list. 4816219089Spjd * seg - segment to obtain pages for. 4817219089Spjd * addr - virtual address of fault. 4818219089Spjd * rw - mode of created pages. 4819219089Spjd * cr - credentials of caller. 4820219089Spjd * ct - caller context. 4821219089Spjd * 4822219089Spjd * OUT: protp - protection mode of created pages. 4823219089Spjd * pl - list of pages created. 4824219089Spjd * 4825251631Sdelphij * RETURN: 0 on success, error code on failure. 4826219089Spjd * 4827219089Spjd * Timestamps: 4828219089Spjd * vp - atime updated 4829219089Spjd */ 4830219089Spjd/* ARGSUSED */ 4831219089Spjdstatic int 4832219089Spjdzfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 4833251631Sdelphij page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 4834251631Sdelphij enum seg_rw rw, cred_t *cr, caller_context_t *ct) 4835219089Spjd{ 4836219089Spjd znode_t *zp = VTOZ(vp); 4837219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4838219089Spjd page_t **pl0 = pl; 4839219089Spjd int err = 0; 4840219089Spjd 4841219089Spjd /* we do our own caching, faultahead is unnecessary */ 4842219089Spjd if (pl == NULL) 4843219089Spjd return (0); 4844219089Spjd else if (len > plsz) 4845219089Spjd len = plsz; 4846219089Spjd else 4847219089Spjd len = P2ROUNDUP(len, PAGESIZE); 4848219089Spjd ASSERT(plsz >= len); 4849219089Spjd 4850219089Spjd ZFS_ENTER(zfsvfs); 4851219089Spjd ZFS_VERIFY_ZP(zp); 4852219089Spjd 4853219089Spjd if (protp) 4854219089Spjd *protp = PROT_ALL; 4855219089Spjd 4856219089Spjd /* 4857219089Spjd * Loop through the requested range [off, off + len) looking 4858219089Spjd * for pages. If we don't find a page, we will need to create 4859219089Spjd * a new page and fill it with data from the file. 4860219089Spjd */ 4861219089Spjd while (len > 0) { 4862219089Spjd if (*pl = page_lookup(vp, off, SE_SHARED)) 4863219089Spjd *(pl+1) = NULL; 4864219089Spjd else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) 4865219089Spjd goto out; 4866219089Spjd while (*pl) { 4867219089Spjd ASSERT3U((*pl)->p_offset, ==, off); 4868219089Spjd off += PAGESIZE; 4869219089Spjd addr += PAGESIZE; 4870219089Spjd if (len > 0) { 4871219089Spjd ASSERT3U(len, >=, PAGESIZE); 4872219089Spjd len -= PAGESIZE; 4873219089Spjd } 4874219089Spjd ASSERT3U(plsz, >=, PAGESIZE); 4875219089Spjd plsz -= PAGESIZE; 4876219089Spjd pl++; 4877219089Spjd } 4878219089Spjd } 4879219089Spjd 4880219089Spjd /* 4881219089Spjd * Fill out the page array with any pages already in the cache. 4882219089Spjd */ 4883219089Spjd while (plsz > 0 && 4884219089Spjd (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { 4885219089Spjd off += PAGESIZE; 4886219089Spjd plsz -= PAGESIZE; 4887219089Spjd } 4888219089Spjdout: 4889219089Spjd if (err) { 4890219089Spjd /* 4891219089Spjd * Release any pages we have previously locked. 4892219089Spjd */ 4893219089Spjd while (pl > pl0) 4894219089Spjd page_unlock(*--pl); 4895219089Spjd } else { 4896219089Spjd ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4897219089Spjd } 4898219089Spjd 4899219089Spjd *pl = NULL; 4900219089Spjd 4901219089Spjd ZFS_EXIT(zfsvfs); 4902219089Spjd return (err); 4903219089Spjd} 4904219089Spjd 4905219089Spjd/* 4906219089Spjd * Request a memory map for a section of a file. This code interacts 4907219089Spjd * with common code and the VM system as follows: 4908219089Spjd * 4909251631Sdelphij * - common code calls mmap(), which ends up in smmap_common() 4910251631Sdelphij * - this calls VOP_MAP(), which takes you into (say) zfs 4911251631Sdelphij * - zfs_map() calls as_map(), passing segvn_create() as the callback 4912251631Sdelphij * - segvn_create() creates the new segment and calls VOP_ADDMAP() 4913251631Sdelphij * - zfs_addmap() updates z_mapcnt 4914219089Spjd */ 4915219089Spjd/*ARGSUSED*/ 4916219089Spjdstatic int 4917219089Spjdzfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 4918219089Spjd size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4919219089Spjd caller_context_t *ct) 4920219089Spjd{ 4921219089Spjd znode_t *zp = VTOZ(vp); 4922219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4923219089Spjd segvn_crargs_t vn_a; 4924219089Spjd int error; 4925219089Spjd 4926219089Spjd ZFS_ENTER(zfsvfs); 4927219089Spjd ZFS_VERIFY_ZP(zp); 4928219089Spjd 4929219089Spjd if ((prot & PROT_WRITE) && (zp->z_pflags & 4930219089Spjd (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) { 4931219089Spjd ZFS_EXIT(zfsvfs); 4932249195Smm return (SET_ERROR(EPERM)); 4933219089Spjd } 4934219089Spjd 4935219089Spjd if ((prot & (PROT_READ | PROT_EXEC)) && 4936219089Spjd (zp->z_pflags & ZFS_AV_QUARANTINED)) { 4937219089Spjd ZFS_EXIT(zfsvfs); 4938249195Smm return (SET_ERROR(EACCES)); 4939219089Spjd } 4940219089Spjd 4941219089Spjd if (vp->v_flag & VNOMAP) { 4942219089Spjd ZFS_EXIT(zfsvfs); 4943249195Smm return (SET_ERROR(ENOSYS)); 4944219089Spjd } 4945219089Spjd 4946219089Spjd if (off < 0 || len > MAXOFFSET_T - off) { 4947219089Spjd ZFS_EXIT(zfsvfs); 4948249195Smm return (SET_ERROR(ENXIO)); 4949219089Spjd } 4950219089Spjd 4951219089Spjd if (vp->v_type != VREG) { 4952219089Spjd ZFS_EXIT(zfsvfs); 4953249195Smm return (SET_ERROR(ENODEV)); 4954219089Spjd } 4955219089Spjd 4956219089Spjd /* 4957219089Spjd * If file is locked, disallow mapping. 4958219089Spjd */ 4959219089Spjd if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) { 4960219089Spjd ZFS_EXIT(zfsvfs); 4961249195Smm return (SET_ERROR(EAGAIN)); 4962219089Spjd } 4963219089Spjd 4964219089Spjd as_rangelock(as); 4965219089Spjd error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 4966219089Spjd if (error != 0) { 4967219089Spjd as_rangeunlock(as); 4968219089Spjd ZFS_EXIT(zfsvfs); 4969219089Spjd return (error); 4970219089Spjd } 4971219089Spjd 4972219089Spjd vn_a.vp = vp; 4973219089Spjd vn_a.offset = (u_offset_t)off; 4974219089Spjd vn_a.type = flags & MAP_TYPE; 4975219089Spjd vn_a.prot = prot; 4976219089Spjd vn_a.maxprot = maxprot; 4977219089Spjd vn_a.cred = cr; 4978219089Spjd vn_a.amp = NULL; 4979219089Spjd vn_a.flags = flags & ~MAP_TYPE; 4980219089Spjd vn_a.szc = 0; 4981219089Spjd vn_a.lgrp_mem_policy_flags = 0; 4982219089Spjd 4983219089Spjd error = as_map(as, *addrp, len, segvn_create, &vn_a); 4984219089Spjd 4985219089Spjd as_rangeunlock(as); 4986219089Spjd ZFS_EXIT(zfsvfs); 4987219089Spjd return (error); 4988219089Spjd} 4989219089Spjd 4990219089Spjd/* ARGSUSED */ 4991219089Spjdstatic int 4992219089Spjdzfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 4993219089Spjd size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4994219089Spjd caller_context_t *ct) 4995219089Spjd{ 4996219089Spjd uint64_t pages = btopr(len); 4997219089Spjd 4998219089Spjd atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); 4999219089Spjd return (0); 5000219089Spjd} 5001219089Spjd 5002219089Spjd/* 5003219089Spjd * The reason we push dirty pages as part of zfs_delmap() is so that we get a 5004219089Spjd * more accurate mtime for the associated file. Since we don't have a way of 5005219089Spjd * detecting when the data was actually modified, we have to resort to 5006219089Spjd * heuristics. If an explicit msync() is done, then we mark the mtime when the 5007219089Spjd * last page is pushed. The problem occurs when the msync() call is omitted, 5008219089Spjd * which by far the most common case: 5009219089Spjd * 5010219089Spjd * open() 5011219089Spjd * mmap() 5012219089Spjd * <modify memory> 5013219089Spjd * munmap() 5014219089Spjd * close() 5015219089Spjd * <time lapse> 5016219089Spjd * putpage() via fsflush 5017219089Spjd * 5018219089Spjd * If we wait until fsflush to come along, we can have a modification time that 5019219089Spjd * is some arbitrary point in the future. In order to prevent this in the 5020219089Spjd * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is 5021219089Spjd * torn down. 5022219089Spjd */ 5023219089Spjd/* ARGSUSED */ 5024219089Spjdstatic int 5025219089Spjdzfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 5026219089Spjd size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, 5027219089Spjd caller_context_t *ct) 5028219089Spjd{ 5029219089Spjd uint64_t pages = btopr(len); 5030219089Spjd 5031219089Spjd ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); 5032219089Spjd atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); 5033219089Spjd 5034219089Spjd if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && 5035219089Spjd vn_has_cached_data(vp)) 5036219089Spjd (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); 5037219089Spjd 5038219089Spjd return (0); 5039219089Spjd} 5040219089Spjd 5041219089Spjd/* 5042219089Spjd * Free or allocate space in a file. Currently, this function only 5043219089Spjd * supports the `F_FREESP' command. However, this command is somewhat 5044219089Spjd * misnamed, as its functionality includes the ability to allocate as 5045219089Spjd * well as free space. 5046219089Spjd * 5047219089Spjd * IN: vp - vnode of file to free data in. 5048219089Spjd * cmd - action to take (only F_FREESP supported). 5049219089Spjd * bfp - section of file to free/alloc. 5050219089Spjd * flag - current file open mode flags. 5051219089Spjd * offset - current file offset. 5052219089Spjd * cr - credentials of caller [UNUSED]. 5053219089Spjd * ct - caller context. 5054219089Spjd * 5055251631Sdelphij * RETURN: 0 on success, error code on failure. 5056219089Spjd * 5057219089Spjd * Timestamps: 5058219089Spjd * vp - ctime|mtime updated 5059219089Spjd */ 5060219089Spjd/* ARGSUSED */ 5061219089Spjdstatic int 5062219089Spjdzfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, 5063219089Spjd offset_t offset, cred_t *cr, caller_context_t *ct) 5064219089Spjd{ 5065219089Spjd znode_t *zp = VTOZ(vp); 5066219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5067219089Spjd uint64_t off, len; 5068219089Spjd int error; 5069219089Spjd 5070219089Spjd ZFS_ENTER(zfsvfs); 5071219089Spjd ZFS_VERIFY_ZP(zp); 5072219089Spjd 5073219089Spjd if (cmd != F_FREESP) { 5074219089Spjd ZFS_EXIT(zfsvfs); 5075249195Smm return (SET_ERROR(EINVAL)); 5076219089Spjd } 5077219089Spjd 5078219089Spjd if (error = convoff(vp, bfp, 0, offset)) { 5079219089Spjd ZFS_EXIT(zfsvfs); 5080219089Spjd return (error); 5081219089Spjd } 5082219089Spjd 5083219089Spjd if (bfp->l_len < 0) { 5084219089Spjd ZFS_EXIT(zfsvfs); 5085249195Smm return (SET_ERROR(EINVAL)); 5086219089Spjd } 5087219089Spjd 5088219089Spjd off = bfp->l_start; 5089219089Spjd len = bfp->l_len; /* 0 means from off to end of file */ 5090219089Spjd 5091219089Spjd error = zfs_freesp(zp, off, len, flag, TRUE); 5092219089Spjd 5093219089Spjd ZFS_EXIT(zfsvfs); 5094219089Spjd return (error); 5095219089Spjd} 5096219089Spjd#endif /* sun */ 5097219089Spjd 5098168404SpjdCTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 5099168404SpjdCTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 5100168404Spjd 5101185029Spjd/*ARGSUSED*/ 5102168404Spjdstatic int 5103185029Spjdzfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 5104168404Spjd{ 5105168404Spjd znode_t *zp = VTOZ(vp); 5106168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5107185029Spjd uint32_t gen; 5108219089Spjd uint64_t gen64; 5109168404Spjd uint64_t object = zp->z_id; 5110168404Spjd zfid_short_t *zfid; 5111219089Spjd int size, i, error; 5112168404Spjd 5113168404Spjd ZFS_ENTER(zfsvfs); 5114185029Spjd ZFS_VERIFY_ZP(zp); 5115168404Spjd 5116219089Spjd if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 5117219089Spjd &gen64, sizeof (uint64_t))) != 0) { 5118219089Spjd ZFS_EXIT(zfsvfs); 5119219089Spjd return (error); 5120219089Spjd } 5121219089Spjd 5122219089Spjd gen = (uint32_t)gen64; 5123219089Spjd 5124168404Spjd size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 5125249195Smm 5126249195Smm#ifdef illumos 5127249195Smm if (fidp->fid_len < size) { 5128249195Smm fidp->fid_len = size; 5129249195Smm ZFS_EXIT(zfsvfs); 5130249195Smm return (SET_ERROR(ENOSPC)); 5131249195Smm } 5132249195Smm#else 5133168404Spjd fidp->fid_len = size; 5134249195Smm#endif 5135168404Spjd 5136168404Spjd zfid = (zfid_short_t *)fidp; 5137168404Spjd 5138168404Spjd zfid->zf_len = size; 5139168404Spjd 5140168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 5141168404Spjd zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 5142168404Spjd 5143168404Spjd /* Must have a non-zero generation number to distinguish from .zfs */ 5144168404Spjd if (gen == 0) 5145168404Spjd gen = 1; 5146168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 5147168404Spjd zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 5148168404Spjd 5149168404Spjd if (size == LONG_FID_LEN) { 5150168404Spjd uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 5151169023Spjd zfid_long_t *zlfid; 5152168404Spjd 5153168404Spjd zlfid = (zfid_long_t *)fidp; 5154168404Spjd 5155168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 5156168404Spjd zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 5157168404Spjd 5158168404Spjd /* XXX - this should be the generation number for the objset */ 5159168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 5160168404Spjd zlfid->zf_setgen[i] = 0; 5161168404Spjd } 5162168404Spjd 5163168404Spjd ZFS_EXIT(zfsvfs); 5164168404Spjd return (0); 5165168404Spjd} 5166168404Spjd 5167168404Spjdstatic int 5168185029Spjdzfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 5169185029Spjd caller_context_t *ct) 5170168404Spjd{ 5171168404Spjd znode_t *zp, *xzp; 5172168404Spjd zfsvfs_t *zfsvfs; 5173168404Spjd zfs_dirlock_t *dl; 5174168404Spjd int error; 5175168404Spjd 5176168404Spjd switch (cmd) { 5177168404Spjd case _PC_LINK_MAX: 5178168404Spjd *valp = INT_MAX; 5179168404Spjd return (0); 5180168404Spjd 5181168404Spjd case _PC_FILESIZEBITS: 5182168404Spjd *valp = 64; 5183168404Spjd return (0); 5184219089Spjd#ifdef sun 5185168404Spjd case _PC_XATTR_EXISTS: 5186168404Spjd zp = VTOZ(vp); 5187168404Spjd zfsvfs = zp->z_zfsvfs; 5188168404Spjd ZFS_ENTER(zfsvfs); 5189185029Spjd ZFS_VERIFY_ZP(zp); 5190168404Spjd *valp = 0; 5191168404Spjd error = zfs_dirent_lock(&dl, zp, "", &xzp, 5192185029Spjd ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 5193168404Spjd if (error == 0) { 5194168404Spjd zfs_dirent_unlock(dl); 5195168404Spjd if (!zfs_dirempty(xzp)) 5196168404Spjd *valp = 1; 5197168404Spjd VN_RELE(ZTOV(xzp)); 5198168404Spjd } else if (error == ENOENT) { 5199168404Spjd /* 5200168404Spjd * If there aren't extended attributes, it's the 5201168404Spjd * same as having zero of them. 5202168404Spjd */ 5203168404Spjd error = 0; 5204168404Spjd } 5205168404Spjd ZFS_EXIT(zfsvfs); 5206168404Spjd return (error); 5207168404Spjd 5208219089Spjd case _PC_SATTR_ENABLED: 5209219089Spjd case _PC_SATTR_EXISTS: 5210219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 5211219089Spjd (vp->v_type == VREG || vp->v_type == VDIR); 5212219089Spjd return (0); 5213219089Spjd 5214219089Spjd case _PC_ACCESS_FILTERING: 5215219089Spjd *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 5216219089Spjd vp->v_type == VDIR; 5217219089Spjd return (0); 5218219089Spjd 5219219089Spjd case _PC_ACL_ENABLED: 5220219089Spjd *valp = _ACL_ACE_ENABLED; 5221219089Spjd return (0); 5222219089Spjd#endif /* sun */ 5223219089Spjd case _PC_MIN_HOLE_SIZE: 5224219089Spjd *valp = (int)SPA_MINBLOCKSIZE; 5225219089Spjd return (0); 5226219089Spjd#ifdef sun 5227219089Spjd case _PC_TIMESTAMP_RESOLUTION: 5228219089Spjd /* nanosecond timestamp resolution */ 5229219089Spjd *valp = 1L; 5230219089Spjd return (0); 5231219089Spjd#endif /* sun */ 5232168404Spjd case _PC_ACL_EXTENDED: 5233196949Strasz *valp = 0; 5234168404Spjd return (0); 5235168404Spjd 5236196949Strasz case _PC_ACL_NFS4: 5237196949Strasz *valp = 1; 5238196949Strasz return (0); 5239196949Strasz 5240196949Strasz case _PC_ACL_PATH_MAX: 5241196949Strasz *valp = ACL_MAX_ENTRIES; 5242196949Strasz return (0); 5243196949Strasz 5244168404Spjd default: 5245168962Spjd return (EOPNOTSUPP); 5246168404Spjd } 5247168404Spjd} 5248168404Spjd 5249168404Spjd/*ARGSUSED*/ 5250168404Spjdstatic int 5251185029Spjdzfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5252185029Spjd caller_context_t *ct) 5253168404Spjd{ 5254168404Spjd znode_t *zp = VTOZ(vp); 5255168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5256168404Spjd int error; 5257185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5258168404Spjd 5259168404Spjd ZFS_ENTER(zfsvfs); 5260185029Spjd ZFS_VERIFY_ZP(zp); 5261185029Spjd error = zfs_getacl(zp, vsecp, skipaclchk, cr); 5262168404Spjd ZFS_EXIT(zfsvfs); 5263168404Spjd 5264168404Spjd return (error); 5265168404Spjd} 5266168404Spjd 5267168404Spjd/*ARGSUSED*/ 5268228685Spjdint 5269185029Spjdzfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5270185029Spjd caller_context_t *ct) 5271168404Spjd{ 5272168404Spjd znode_t *zp = VTOZ(vp); 5273168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5274168404Spjd int error; 5275185029Spjd boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5276219089Spjd zilog_t *zilog = zfsvfs->z_log; 5277168404Spjd 5278168404Spjd ZFS_ENTER(zfsvfs); 5279185029Spjd ZFS_VERIFY_ZP(zp); 5280219089Spjd 5281185029Spjd error = zfs_setacl(zp, vsecp, skipaclchk, cr); 5282219089Spjd 5283219089Spjd if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5284219089Spjd zil_commit(zilog, 0); 5285219089Spjd 5286168404Spjd ZFS_EXIT(zfsvfs); 5287168404Spjd return (error); 5288168404Spjd} 5289168404Spjd 5290219089Spjd#ifdef sun 5291219089Spjd/* 5292251631Sdelphij * The smallest read we may consider to loan out an arcbuf. 5293251631Sdelphij * This must be a power of 2. 5294219089Spjd */ 5295219089Spjdint zcr_blksz_min = (1 << 10); /* 1K */ 5296251631Sdelphij/* 5297251631Sdelphij * If set to less than the file block size, allow loaning out of an 5298251631Sdelphij * arcbuf for a partial block read. This must be a power of 2. 5299251631Sdelphij */ 5300219089Spjdint zcr_blksz_max = (1 << 17); /* 128K */ 5301219089Spjd 5302219089Spjd/*ARGSUSED*/ 5303168962Spjdstatic int 5304219089Spjdzfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, 5305219089Spjd caller_context_t *ct) 5306219089Spjd{ 5307219089Spjd znode_t *zp = VTOZ(vp); 5308219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5309219089Spjd int max_blksz = zfsvfs->z_max_blksz; 5310219089Spjd uio_t *uio = &xuio->xu_uio; 5311219089Spjd ssize_t size = uio->uio_resid; 5312219089Spjd offset_t offset = uio->uio_loffset; 5313219089Spjd int blksz; 5314219089Spjd int fullblk, i; 5315219089Spjd arc_buf_t *abuf; 5316219089Spjd ssize_t maxsize; 5317219089Spjd int preamble, postamble; 5318219089Spjd 5319219089Spjd if (xuio->xu_type != UIOTYPE_ZEROCOPY) 5320249195Smm return (SET_ERROR(EINVAL)); 5321219089Spjd 5322219089Spjd ZFS_ENTER(zfsvfs); 5323219089Spjd ZFS_VERIFY_ZP(zp); 5324219089Spjd switch (ioflag) { 5325219089Spjd case UIO_WRITE: 5326219089Spjd /* 5327219089Spjd * Loan out an arc_buf for write if write size is bigger than 5328219089Spjd * max_blksz, and the file's block size is also max_blksz. 5329219089Spjd */ 5330219089Spjd blksz = max_blksz; 5331219089Spjd if (size < blksz || zp->z_blksz != blksz) { 5332219089Spjd ZFS_EXIT(zfsvfs); 5333249195Smm return (SET_ERROR(EINVAL)); 5334219089Spjd } 5335219089Spjd /* 5336219089Spjd * Caller requests buffers for write before knowing where the 5337219089Spjd * write offset might be (e.g. NFS TCP write). 5338219089Spjd */ 5339219089Spjd if (offset == -1) { 5340219089Spjd preamble = 0; 5341219089Spjd } else { 5342219089Spjd preamble = P2PHASE(offset, blksz); 5343219089Spjd if (preamble) { 5344219089Spjd preamble = blksz - preamble; 5345219089Spjd size -= preamble; 5346219089Spjd } 5347219089Spjd } 5348219089Spjd 5349219089Spjd postamble = P2PHASE(size, blksz); 5350219089Spjd size -= postamble; 5351219089Spjd 5352219089Spjd fullblk = size / blksz; 5353219089Spjd (void) dmu_xuio_init(xuio, 5354219089Spjd (preamble != 0) + fullblk + (postamble != 0)); 5355219089Spjd DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble, 5356219089Spjd int, postamble, int, 5357219089Spjd (preamble != 0) + fullblk + (postamble != 0)); 5358219089Spjd 5359219089Spjd /* 5360219089Spjd * Have to fix iov base/len for partial buffers. They 5361219089Spjd * currently represent full arc_buf's. 5362219089Spjd */ 5363219089Spjd if (preamble) { 5364219089Spjd /* data begins in the middle of the arc_buf */ 5365219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5366219089Spjd blksz); 5367219089Spjd ASSERT(abuf); 5368219089Spjd (void) dmu_xuio_add(xuio, abuf, 5369219089Spjd blksz - preamble, preamble); 5370219089Spjd } 5371219089Spjd 5372219089Spjd for (i = 0; i < fullblk; i++) { 5373219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5374219089Spjd blksz); 5375219089Spjd ASSERT(abuf); 5376219089Spjd (void) dmu_xuio_add(xuio, abuf, 0, blksz); 5377219089Spjd } 5378219089Spjd 5379219089Spjd if (postamble) { 5380219089Spjd /* data ends in the middle of the arc_buf */ 5381219089Spjd abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5382219089Spjd blksz); 5383219089Spjd ASSERT(abuf); 5384219089Spjd (void) dmu_xuio_add(xuio, abuf, 0, postamble); 5385219089Spjd } 5386219089Spjd break; 5387219089Spjd case UIO_READ: 5388219089Spjd /* 5389219089Spjd * Loan out an arc_buf for read if the read size is larger than 5390219089Spjd * the current file block size. Block alignment is not 5391219089Spjd * considered. Partial arc_buf will be loaned out for read. 5392219089Spjd */ 5393219089Spjd blksz = zp->z_blksz; 5394219089Spjd if (blksz < zcr_blksz_min) 5395219089Spjd blksz = zcr_blksz_min; 5396219089Spjd if (blksz > zcr_blksz_max) 5397219089Spjd blksz = zcr_blksz_max; 5398219089Spjd /* avoid potential complexity of dealing with it */ 5399219089Spjd if (blksz > max_blksz) { 5400219089Spjd ZFS_EXIT(zfsvfs); 5401249195Smm return (SET_ERROR(EINVAL)); 5402219089Spjd } 5403219089Spjd 5404219089Spjd maxsize = zp->z_size - uio->uio_loffset; 5405219089Spjd if (size > maxsize) 5406219089Spjd size = maxsize; 5407219089Spjd 5408219089Spjd if (size < blksz || vn_has_cached_data(vp)) { 5409219089Spjd ZFS_EXIT(zfsvfs); 5410249195Smm return (SET_ERROR(EINVAL)); 5411219089Spjd } 5412219089Spjd break; 5413219089Spjd default: 5414219089Spjd ZFS_EXIT(zfsvfs); 5415249195Smm return (SET_ERROR(EINVAL)); 5416219089Spjd } 5417219089Spjd 5418219089Spjd uio->uio_extflg = UIO_XUIO; 5419219089Spjd XUIO_XUZC_RW(xuio) = ioflag; 5420219089Spjd ZFS_EXIT(zfsvfs); 5421219089Spjd return (0); 5422219089Spjd} 5423219089Spjd 5424219089Spjd/*ARGSUSED*/ 5425219089Spjdstatic int 5426219089Spjdzfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) 5427219089Spjd{ 5428219089Spjd int i; 5429219089Spjd arc_buf_t *abuf; 5430219089Spjd int ioflag = XUIO_XUZC_RW(xuio); 5431219089Spjd 5432219089Spjd ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); 5433219089Spjd 5434219089Spjd i = dmu_xuio_cnt(xuio); 5435219089Spjd while (i-- > 0) { 5436219089Spjd abuf = dmu_xuio_arcbuf(xuio, i); 5437219089Spjd /* 5438219089Spjd * if abuf == NULL, it must be a write buffer 5439219089Spjd * that has been returned in zfs_write(). 5440219089Spjd */ 5441219089Spjd if (abuf) 5442219089Spjd dmu_return_arcbuf(abuf); 5443219089Spjd ASSERT(abuf || ioflag == UIO_WRITE); 5444219089Spjd } 5445219089Spjd 5446219089Spjd dmu_xuio_fini(xuio); 5447219089Spjd return (0); 5448219089Spjd} 5449219089Spjd 5450219089Spjd/* 5451219089Spjd * Predeclare these here so that the compiler assumes that 5452219089Spjd * this is an "old style" function declaration that does 5453219089Spjd * not include arguments => we won't get type mismatch errors 5454219089Spjd * in the initializations that follow. 5455219089Spjd */ 5456219089Spjdstatic int zfs_inval(); 5457219089Spjdstatic int zfs_isdir(); 5458219089Spjd 5459219089Spjdstatic int 5460219089Spjdzfs_inval() 5461219089Spjd{ 5462249195Smm return (SET_ERROR(EINVAL)); 5463219089Spjd} 5464219089Spjd 5465219089Spjdstatic int 5466219089Spjdzfs_isdir() 5467219089Spjd{ 5468249195Smm return (SET_ERROR(EISDIR)); 5469219089Spjd} 5470219089Spjd/* 5471219089Spjd * Directory vnode operations template 5472219089Spjd */ 5473219089Spjdvnodeops_t *zfs_dvnodeops; 5474219089Spjdconst fs_operation_def_t zfs_dvnodeops_template[] = { 5475219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5476219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5477219089Spjd VOPNAME_READ, { .error = zfs_isdir }, 5478219089Spjd VOPNAME_WRITE, { .error = zfs_isdir }, 5479219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5480219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5481219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5482219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5483219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5484219089Spjd VOPNAME_CREATE, { .vop_create = zfs_create }, 5485219089Spjd VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5486219089Spjd VOPNAME_LINK, { .vop_link = zfs_link }, 5487219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5488219089Spjd VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, 5489219089Spjd VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5490219089Spjd VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5491219089Spjd VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, 5492219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5493219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5494219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5495219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5496219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5497219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5498219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5499219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5500219089Spjd NULL, NULL 5501219089Spjd}; 5502219089Spjd 5503219089Spjd/* 5504219089Spjd * Regular file vnode operations template 5505219089Spjd */ 5506219089Spjdvnodeops_t *zfs_fvnodeops; 5507219089Spjdconst fs_operation_def_t zfs_fvnodeops_template[] = { 5508219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5509219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5510219089Spjd VOPNAME_READ, { .vop_read = zfs_read }, 5511219089Spjd VOPNAME_WRITE, { .vop_write = zfs_write }, 5512219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5513219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5514219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5515219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5516219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5517219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5518219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5519219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5520219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5521219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5522219089Spjd VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, 5523219089Spjd VOPNAME_SPACE, { .vop_space = zfs_space }, 5524219089Spjd VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, 5525219089Spjd VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, 5526219089Spjd VOPNAME_MAP, { .vop_map = zfs_map }, 5527219089Spjd VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, 5528219089Spjd VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, 5529219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5530219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5531219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5532219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5533219089Spjd VOPNAME_REQZCBUF, { .vop_reqzcbuf = zfs_reqzcbuf }, 5534219089Spjd VOPNAME_RETZCBUF, { .vop_retzcbuf = zfs_retzcbuf }, 5535219089Spjd NULL, NULL 5536219089Spjd}; 5537219089Spjd 5538219089Spjd/* 5539219089Spjd * Symbolic link vnode operations template 5540219089Spjd */ 5541219089Spjdvnodeops_t *zfs_symvnodeops; 5542219089Spjdconst fs_operation_def_t zfs_symvnodeops_template[] = { 5543219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5544219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5545219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5546219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5547219089Spjd VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, 5548219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5549219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5550219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5551219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5552219089Spjd NULL, NULL 5553219089Spjd}; 5554219089Spjd 5555219089Spjd/* 5556219089Spjd * special share hidden files vnode operations template 5557219089Spjd */ 5558219089Spjdvnodeops_t *zfs_sharevnodeops; 5559219089Spjdconst fs_operation_def_t zfs_sharevnodeops_template[] = { 5560219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5561219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5562219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5563219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5564219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5565219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5566219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5567219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5568219089Spjd NULL, NULL 5569219089Spjd}; 5570219089Spjd 5571219089Spjd/* 5572219089Spjd * Extended attribute directory vnode operations template 5573251631Sdelphij * 5574251631Sdelphij * This template is identical to the directory vnodes 5575251631Sdelphij * operation template except for restricted operations: 5576251631Sdelphij * VOP_MKDIR() 5577251631Sdelphij * VOP_SYMLINK() 5578251631Sdelphij * 5579219089Spjd * Note that there are other restrictions embedded in: 5580219089Spjd * zfs_create() - restrict type to VREG 5581219089Spjd * zfs_link() - no links into/out of attribute space 5582219089Spjd * zfs_rename() - no moves into/out of attribute space 5583219089Spjd */ 5584219089Spjdvnodeops_t *zfs_xdvnodeops; 5585219089Spjdconst fs_operation_def_t zfs_xdvnodeops_template[] = { 5586219089Spjd VOPNAME_OPEN, { .vop_open = zfs_open }, 5587219089Spjd VOPNAME_CLOSE, { .vop_close = zfs_close }, 5588219089Spjd VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5589219089Spjd VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5590219089Spjd VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5591219089Spjd VOPNAME_ACCESS, { .vop_access = zfs_access }, 5592219089Spjd VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5593219089Spjd VOPNAME_CREATE, { .vop_create = zfs_create }, 5594219089Spjd VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5595219089Spjd VOPNAME_LINK, { .vop_link = zfs_link }, 5596219089Spjd VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5597219089Spjd VOPNAME_MKDIR, { .error = zfs_inval }, 5598219089Spjd VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5599219089Spjd VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5600219089Spjd VOPNAME_SYMLINK, { .error = zfs_inval }, 5601219089Spjd VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5602219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5603219089Spjd VOPNAME_FID, { .vop_fid = zfs_fid }, 5604219089Spjd VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5605219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5606219089Spjd VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5607219089Spjd VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5608219089Spjd VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5609219089Spjd NULL, NULL 5610219089Spjd}; 5611219089Spjd 5612219089Spjd/* 5613219089Spjd * Error vnode operations template 5614219089Spjd */ 5615219089Spjdvnodeops_t *zfs_evnodeops; 5616219089Spjdconst fs_operation_def_t zfs_evnodeops_template[] = { 5617219089Spjd VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5618219089Spjd VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5619219089Spjd NULL, NULL 5620219089Spjd}; 5621219089Spjd#endif /* sun */ 5622219089Spjd 5623219089Spjdstatic int 5624213673Spjdioflags(int ioflags) 5625213673Spjd{ 5626213673Spjd int flags = 0; 5627213673Spjd 5628213673Spjd if (ioflags & IO_APPEND) 5629213673Spjd flags |= FAPPEND; 5630213673Spjd if (ioflags & IO_NDELAY) 5631213673Spjd flags |= FNONBLOCK; 5632213673Spjd if (ioflags & IO_SYNC) 5633213673Spjd flags |= (FSYNC | FDSYNC | FRSYNC); 5634213673Spjd 5635213673Spjd return (flags); 5636213673Spjd} 5637213673Spjd 5638213673Spjdstatic int 5639213937Savgzfs_getpages(struct vnode *vp, vm_page_t *m, int count, int reqpage) 5640213937Savg{ 5641213937Savg znode_t *zp = VTOZ(vp); 5642213937Savg zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5643213937Savg objset_t *os = zp->z_zfsvfs->z_os; 5644243517Savg vm_page_t mfirst, mlast, mreq; 5645213937Savg vm_object_t object; 5646213937Savg caddr_t va; 5647213937Savg struct sf_buf *sf; 5648243517Savg off_t startoff, endoff; 5649213937Savg int i, error; 5650243517Savg vm_pindex_t reqstart, reqend; 5651243517Savg int pcount, lsize, reqsize, size; 5652213937Savg 5653213937Savg ZFS_ENTER(zfsvfs); 5654213937Savg ZFS_VERIFY_ZP(zp); 5655213937Savg 5656243517Savg pcount = OFF_TO_IDX(round_page(count)); 5657213937Savg mreq = m[reqpage]; 5658213937Savg object = mreq->object; 5659213937Savg error = 0; 5660213937Savg 5661213937Savg KASSERT(vp->v_object == object, ("mismatching object")); 5662213937Savg 5663243517Savg if (pcount > 1 && zp->z_blksz > PAGESIZE) { 5664243517Savg startoff = rounddown(IDX_TO_OFF(mreq->pindex), zp->z_blksz); 5665243517Savg reqstart = OFF_TO_IDX(round_page(startoff)); 5666243517Savg if (reqstart < m[0]->pindex) 5667243517Savg reqstart = 0; 5668243517Savg else 5669243517Savg reqstart = reqstart - m[0]->pindex; 5670243517Savg endoff = roundup(IDX_TO_OFF(mreq->pindex) + PAGE_SIZE, 5671243517Savg zp->z_blksz); 5672243517Savg reqend = OFF_TO_IDX(trunc_page(endoff)) - 1; 5673243517Savg if (reqend > m[pcount - 1]->pindex) 5674243517Savg reqend = m[pcount - 1]->pindex; 5675243517Savg reqsize = reqend - m[reqstart]->pindex + 1; 5676243517Savg KASSERT(reqstart <= reqpage && reqpage < reqstart + reqsize, 5677243517Savg ("reqpage beyond [reqstart, reqstart + reqsize[ bounds")); 5678243517Savg } else { 5679243517Savg reqstart = reqpage; 5680243517Savg reqsize = 1; 5681243517Savg } 5682243517Savg mfirst = m[reqstart]; 5683243517Savg mlast = m[reqstart + reqsize - 1]; 5684243517Savg 5685248084Sattilio zfs_vmobject_wlock(object); 5686213937Savg 5687243517Savg for (i = 0; i < reqstart; i++) { 5688243517Savg vm_page_lock(m[i]); 5689243517Savg vm_page_free(m[i]); 5690243517Savg vm_page_unlock(m[i]); 5691213937Savg } 5692243517Savg for (i = reqstart + reqsize; i < pcount; i++) { 5693243517Savg vm_page_lock(m[i]); 5694243517Savg vm_page_free(m[i]); 5695243517Savg vm_page_unlock(m[i]); 5696243517Savg } 5697213937Savg 5698243517Savg if (mreq->valid && reqsize == 1) { 5699213937Savg if (mreq->valid != VM_PAGE_BITS_ALL) 5700213937Savg vm_page_zero_invalid(mreq, TRUE); 5701248084Sattilio zfs_vmobject_wunlock(object); 5702213937Savg ZFS_EXIT(zfsvfs); 5703248084Sattilio return (zfs_vm_pagerret_ok); 5704213937Savg } 5705213937Savg 5706213937Savg PCPU_INC(cnt.v_vnodein); 5707243517Savg PCPU_ADD(cnt.v_vnodepgsin, reqsize); 5708213937Savg 5709213937Savg if (IDX_TO_OFF(mreq->pindex) >= object->un_pager.vnp.vnp_size) { 5710243517Savg for (i = reqstart; i < reqstart + reqsize; i++) { 5711243517Savg if (i != reqpage) { 5712243517Savg vm_page_lock(m[i]); 5713243517Savg vm_page_free(m[i]); 5714243517Savg vm_page_unlock(m[i]); 5715243517Savg } 5716243517Savg } 5717248084Sattilio zfs_vmobject_wunlock(object); 5718213937Savg ZFS_EXIT(zfsvfs); 5719248084Sattilio return (zfs_vm_pagerret_bad); 5720213937Savg } 5721213937Savg 5722243517Savg lsize = PAGE_SIZE; 5723243517Savg if (IDX_TO_OFF(mlast->pindex) + lsize > object->un_pager.vnp.vnp_size) 5724243517Savg lsize = object->un_pager.vnp.vnp_size - IDX_TO_OFF(mlast->pindex); 5725213937Savg 5726248084Sattilio zfs_vmobject_wunlock(object); 5727243517Savg 5728243517Savg for (i = reqstart; i < reqstart + reqsize; i++) { 5729243517Savg size = PAGE_SIZE; 5730243517Savg if (i == (reqstart + reqsize - 1)) 5731243517Savg size = lsize; 5732243517Savg va = zfs_map_page(m[i], &sf); 5733243517Savg error = dmu_read(os, zp->z_id, IDX_TO_OFF(m[i]->pindex), 5734243517Savg size, va, DMU_READ_PREFETCH); 5735243517Savg if (size != PAGE_SIZE) 5736243517Savg bzero(va + size, PAGE_SIZE - size); 5737243517Savg zfs_unmap_page(sf); 5738243517Savg if (error != 0) 5739243517Savg break; 5740243517Savg } 5741243517Savg 5742248084Sattilio zfs_vmobject_wlock(object); 5743213937Savg 5744243517Savg for (i = reqstart; i < reqstart + reqsize; i++) { 5745243763Savg if (!error) 5746243763Savg m[i]->valid = VM_PAGE_BITS_ALL; 5747243517Savg KASSERT(m[i]->dirty == 0, ("zfs_getpages: page %p is dirty", m[i])); 5748243763Savg if (i != reqpage) 5749243763Savg vm_page_readahead_finish(m[i]); 5750243517Savg } 5751243517Savg 5752248084Sattilio zfs_vmobject_wunlock(object); 5753213937Savg 5754213937Savg ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 5755213937Savg ZFS_EXIT(zfsvfs); 5756248084Sattilio return (error ? zfs_vm_pagerret_error : zfs_vm_pagerret_ok); 5757213937Savg} 5758213937Savg 5759213937Savgstatic int 5760213937Savgzfs_freebsd_getpages(ap) 5761213937Savg struct vop_getpages_args /* { 5762213937Savg struct vnode *a_vp; 5763213937Savg vm_page_t *a_m; 5764213937Savg int a_count; 5765213937Savg int a_reqpage; 5766213937Savg vm_ooffset_t a_offset; 5767213937Savg } */ *ap; 5768213937Savg{ 5769213937Savg 5770213937Savg return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage)); 5771213937Savg} 5772213937Savg 5773213937Savgstatic int 5774243518Savgzfs_freebsd_bmap(ap) 5775243518Savg struct vop_bmap_args /* { 5776243518Savg struct vnode *a_vp; 5777243518Savg daddr_t a_bn; 5778243518Savg struct bufobj **a_bop; 5779243518Savg daddr_t *a_bnp; 5780243518Savg int *a_runp; 5781243518Savg int *a_runb; 5782243518Savg } */ *ap; 5783243518Savg{ 5784243518Savg 5785243518Savg if (ap->a_bop != NULL) 5786243518Savg *ap->a_bop = &ap->a_vp->v_bufobj; 5787243518Savg if (ap->a_bnp != NULL) 5788243518Savg *ap->a_bnp = ap->a_bn; 5789243518Savg if (ap->a_runp != NULL) 5790243518Savg *ap->a_runp = 0; 5791243518Savg if (ap->a_runb != NULL) 5792243518Savg *ap->a_runb = 0; 5793243518Savg 5794243518Savg return (0); 5795243518Savg} 5796243518Savg 5797243518Savgstatic int 5798168962Spjdzfs_freebsd_open(ap) 5799168962Spjd struct vop_open_args /* { 5800168962Spjd struct vnode *a_vp; 5801168962Spjd int a_mode; 5802168962Spjd struct ucred *a_cred; 5803168962Spjd struct thread *a_td; 5804168962Spjd } */ *ap; 5805168962Spjd{ 5806168962Spjd vnode_t *vp = ap->a_vp; 5807168962Spjd znode_t *zp = VTOZ(vp); 5808168962Spjd int error; 5809168962Spjd 5810185029Spjd error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); 5811168962Spjd if (error == 0) 5812219089Spjd vnode_create_vobject(vp, zp->z_size, ap->a_td); 5813168962Spjd return (error); 5814168962Spjd} 5815168962Spjd 5816168962Spjdstatic int 5817168962Spjdzfs_freebsd_close(ap) 5818168962Spjd struct vop_close_args /* { 5819168962Spjd struct vnode *a_vp; 5820168962Spjd int a_fflag; 5821168962Spjd struct ucred *a_cred; 5822168962Spjd struct thread *a_td; 5823168962Spjd } */ *ap; 5824168962Spjd{ 5825168962Spjd 5826242566Savg return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred, NULL)); 5827168962Spjd} 5828168962Spjd 5829168962Spjdstatic int 5830168962Spjdzfs_freebsd_ioctl(ap) 5831168962Spjd struct vop_ioctl_args /* { 5832168962Spjd struct vnode *a_vp; 5833168962Spjd u_long a_command; 5834168962Spjd caddr_t a_data; 5835168962Spjd int a_fflag; 5836168962Spjd struct ucred *cred; 5837168962Spjd struct thread *td; 5838168962Spjd } */ *ap; 5839168962Spjd{ 5840168962Spjd 5841168978Spjd return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 5842185029Spjd ap->a_fflag, ap->a_cred, NULL, NULL)); 5843168962Spjd} 5844168962Spjd 5845168962Spjdstatic int 5846168962Spjdzfs_freebsd_read(ap) 5847168962Spjd struct vop_read_args /* { 5848168962Spjd struct vnode *a_vp; 5849168962Spjd struct uio *a_uio; 5850168962Spjd int a_ioflag; 5851168962Spjd struct ucred *a_cred; 5852168962Spjd } */ *ap; 5853168962Spjd{ 5854168962Spjd 5855213673Spjd return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 5856213673Spjd ap->a_cred, NULL)); 5857168962Spjd} 5858168962Spjd 5859168962Spjdstatic int 5860168962Spjdzfs_freebsd_write(ap) 5861168962Spjd struct vop_write_args /* { 5862168962Spjd struct vnode *a_vp; 5863168962Spjd struct uio *a_uio; 5864168962Spjd int a_ioflag; 5865168962Spjd struct ucred *a_cred; 5866168962Spjd } */ *ap; 5867168962Spjd{ 5868168962Spjd 5869213673Spjd return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 5870213673Spjd ap->a_cred, NULL)); 5871168962Spjd} 5872168962Spjd 5873168962Spjdstatic int 5874168962Spjdzfs_freebsd_access(ap) 5875168962Spjd struct vop_access_args /* { 5876168962Spjd struct vnode *a_vp; 5877192689Strasz accmode_t a_accmode; 5878168962Spjd struct ucred *a_cred; 5879168962Spjd struct thread *a_td; 5880168962Spjd } */ *ap; 5881168962Spjd{ 5882212002Sjh vnode_t *vp = ap->a_vp; 5883212002Sjh znode_t *zp = VTOZ(vp); 5884198703Spjd accmode_t accmode; 5885198703Spjd int error = 0; 5886168962Spjd 5887185172Spjd /* 5888198703Spjd * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 5889185172Spjd */ 5890198703Spjd accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 5891198703Spjd if (accmode != 0) 5892198703Spjd error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL); 5893185172Spjd 5894198703Spjd /* 5895198703Spjd * VADMIN has to be handled by vaccess(). 5896198703Spjd */ 5897198703Spjd if (error == 0) { 5898198703Spjd accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 5899198703Spjd if (accmode != 0) { 5900219089Spjd error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 5901219089Spjd zp->z_gid, accmode, ap->a_cred, NULL); 5902198703Spjd } 5903185172Spjd } 5904185172Spjd 5905212002Sjh /* 5906212002Sjh * For VEXEC, ensure that at least one execute bit is set for 5907212002Sjh * non-directories. 5908212002Sjh */ 5909212002Sjh if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 5910219089Spjd (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 5911212002Sjh error = EACCES; 5912219089Spjd } 5913212002Sjh 5914198703Spjd return (error); 5915168962Spjd} 5916168962Spjd 5917168962Spjdstatic int 5918168962Spjdzfs_freebsd_lookup(ap) 5919168962Spjd struct vop_lookup_args /* { 5920168962Spjd struct vnode *a_dvp; 5921168962Spjd struct vnode **a_vpp; 5922168962Spjd struct componentname *a_cnp; 5923168962Spjd } */ *ap; 5924168962Spjd{ 5925168962Spjd struct componentname *cnp = ap->a_cnp; 5926168962Spjd char nm[NAME_MAX + 1]; 5927168962Spjd 5928168962Spjd ASSERT(cnp->cn_namelen < sizeof(nm)); 5929168962Spjd strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 5930168962Spjd 5931168962Spjd return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 5932185029Spjd cnp->cn_cred, cnp->cn_thread, 0)); 5933168962Spjd} 5934168962Spjd 5935168962Spjdstatic int 5936168962Spjdzfs_freebsd_create(ap) 5937168962Spjd struct vop_create_args /* { 5938168962Spjd struct vnode *a_dvp; 5939168962Spjd struct vnode **a_vpp; 5940168962Spjd struct componentname *a_cnp; 5941168962Spjd struct vattr *a_vap; 5942168962Spjd } */ *ap; 5943168962Spjd{ 5944168962Spjd struct componentname *cnp = ap->a_cnp; 5945168962Spjd vattr_t *vap = ap->a_vap; 5946168962Spjd int mode; 5947168962Spjd 5948168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 5949168962Spjd 5950168962Spjd vattr_init_mask(vap); 5951168962Spjd mode = vap->va_mode & ALLPERMS; 5952168962Spjd 5953168962Spjd return (zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 5954185029Spjd ap->a_vpp, cnp->cn_cred, cnp->cn_thread)); 5955168962Spjd} 5956168962Spjd 5957168962Spjdstatic int 5958168962Spjdzfs_freebsd_remove(ap) 5959168962Spjd struct vop_remove_args /* { 5960168962Spjd struct vnode *a_dvp; 5961168962Spjd struct vnode *a_vp; 5962168962Spjd struct componentname *a_cnp; 5963168962Spjd } */ *ap; 5964168962Spjd{ 5965168962Spjd 5966168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 5967168962Spjd 5968168962Spjd return (zfs_remove(ap->a_dvp, ap->a_cnp->cn_nameptr, 5969185029Spjd ap->a_cnp->cn_cred, NULL, 0)); 5970168962Spjd} 5971168962Spjd 5972168962Spjdstatic int 5973168962Spjdzfs_freebsd_mkdir(ap) 5974168962Spjd struct vop_mkdir_args /* { 5975168962Spjd struct vnode *a_dvp; 5976168962Spjd struct vnode **a_vpp; 5977168962Spjd struct componentname *a_cnp; 5978168962Spjd struct vattr *a_vap; 5979168962Spjd } */ *ap; 5980168962Spjd{ 5981168962Spjd vattr_t *vap = ap->a_vap; 5982168962Spjd 5983168962Spjd ASSERT(ap->a_cnp->cn_flags & SAVENAME); 5984168962Spjd 5985168962Spjd vattr_init_mask(vap); 5986168962Spjd 5987168962Spjd return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 5988185029Spjd ap->a_cnp->cn_cred, NULL, 0, NULL)); 5989168962Spjd} 5990168962Spjd 5991168962Spjdstatic int 5992168962Spjdzfs_freebsd_rmdir(ap) 5993168962Spjd struct vop_rmdir_args /* { 5994168962Spjd struct vnode *a_dvp; 5995168962Spjd struct vnode *a_vp; 5996168962Spjd struct componentname *a_cnp; 5997168962Spjd } */ *ap; 5998168962Spjd{ 5999168962Spjd struct componentname *cnp = ap->a_cnp; 6000168962Spjd 6001168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6002168962Spjd 6003185029Spjd return (zfs_rmdir(ap->a_dvp, cnp->cn_nameptr, NULL, cnp->cn_cred, NULL, 0)); 6004168962Spjd} 6005168962Spjd 6006168962Spjdstatic int 6007168962Spjdzfs_freebsd_readdir(ap) 6008168962Spjd struct vop_readdir_args /* { 6009168962Spjd struct vnode *a_vp; 6010168962Spjd struct uio *a_uio; 6011168962Spjd struct ucred *a_cred; 6012168962Spjd int *a_eofflag; 6013168962Spjd int *a_ncookies; 6014168962Spjd u_long **a_cookies; 6015168962Spjd } */ *ap; 6016168962Spjd{ 6017168962Spjd 6018168962Spjd return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 6019168962Spjd ap->a_ncookies, ap->a_cookies)); 6020168962Spjd} 6021168962Spjd 6022168962Spjdstatic int 6023168962Spjdzfs_freebsd_fsync(ap) 6024168962Spjd struct vop_fsync_args /* { 6025168962Spjd struct vnode *a_vp; 6026168962Spjd int a_waitfor; 6027168962Spjd struct thread *a_td; 6028168962Spjd } */ *ap; 6029168962Spjd{ 6030168962Spjd 6031168962Spjd vop_stdfsync(ap); 6032185029Spjd return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); 6033168962Spjd} 6034168962Spjd 6035168962Spjdstatic int 6036168962Spjdzfs_freebsd_getattr(ap) 6037168962Spjd struct vop_getattr_args /* { 6038168962Spjd struct vnode *a_vp; 6039168962Spjd struct vattr *a_vap; 6040168962Spjd struct ucred *a_cred; 6041168962Spjd } */ *ap; 6042168962Spjd{ 6043185029Spjd vattr_t *vap = ap->a_vap; 6044185029Spjd xvattr_t xvap; 6045185029Spjd u_long fflags = 0; 6046185029Spjd int error; 6047168962Spjd 6048185029Spjd xva_init(&xvap); 6049185029Spjd xvap.xva_vattr = *vap; 6050185029Spjd xvap.xva_vattr.va_mask |= AT_XVATTR; 6051185029Spjd 6052185029Spjd /* Convert chflags into ZFS-type flags. */ 6053185029Spjd /* XXX: what about SF_SETTABLE?. */ 6054185029Spjd XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 6055185029Spjd XVA_SET_REQ(&xvap, XAT_APPENDONLY); 6056185029Spjd XVA_SET_REQ(&xvap, XAT_NOUNLINK); 6057185029Spjd XVA_SET_REQ(&xvap, XAT_NODUMP); 6058185029Spjd error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 6059185029Spjd if (error != 0) 6060185029Spjd return (error); 6061185029Spjd 6062185029Spjd /* Convert ZFS xattr into chflags. */ 6063185029Spjd#define FLAG_CHECK(fflag, xflag, xfield) do { \ 6064185029Spjd if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 6065185029Spjd fflags |= (fflag); \ 6066185029Spjd} while (0) 6067185029Spjd FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 6068185029Spjd xvap.xva_xoptattrs.xoa_immutable); 6069185029Spjd FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 6070185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 6071185029Spjd FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 6072185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 6073185029Spjd FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 6074185029Spjd xvap.xva_xoptattrs.xoa_nodump); 6075185029Spjd#undef FLAG_CHECK 6076185029Spjd *vap = xvap.xva_vattr; 6077185029Spjd vap->va_flags = fflags; 6078185029Spjd return (0); 6079168962Spjd} 6080168962Spjd 6081168962Spjdstatic int 6082168962Spjdzfs_freebsd_setattr(ap) 6083168962Spjd struct vop_setattr_args /* { 6084168962Spjd struct vnode *a_vp; 6085168962Spjd struct vattr *a_vap; 6086168962Spjd struct ucred *a_cred; 6087168962Spjd } */ *ap; 6088168962Spjd{ 6089185172Spjd vnode_t *vp = ap->a_vp; 6090168962Spjd vattr_t *vap = ap->a_vap; 6091185172Spjd cred_t *cred = ap->a_cred; 6092185029Spjd xvattr_t xvap; 6093185029Spjd u_long fflags; 6094185029Spjd uint64_t zflags; 6095168962Spjd 6096168962Spjd vattr_init_mask(vap); 6097170044Spjd vap->va_mask &= ~AT_NOSET; 6098168962Spjd 6099185029Spjd xva_init(&xvap); 6100185029Spjd xvap.xva_vattr = *vap; 6101185029Spjd 6102219089Spjd zflags = VTOZ(vp)->z_pflags; 6103185172Spjd 6104185029Spjd if (vap->va_flags != VNOVAL) { 6105197683Sdelphij zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 6106185172Spjd int error; 6107185172Spjd 6108197683Sdelphij if (zfsvfs->z_use_fuids == B_FALSE) 6109197683Sdelphij return (EOPNOTSUPP); 6110197683Sdelphij 6111185029Spjd fflags = vap->va_flags; 6112185029Spjd if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_NODUMP)) != 0) 6113185029Spjd return (EOPNOTSUPP); 6114185172Spjd /* 6115185172Spjd * Unprivileged processes are not permitted to unset system 6116185172Spjd * flags, or modify flags if any system flags are set. 6117185172Spjd * Privileged non-jail processes may not modify system flags 6118185172Spjd * if securelevel > 0 and any existing system flags are set. 6119185172Spjd * Privileged jail processes behave like privileged non-jail 6120185172Spjd * processes if the security.jail.chflags_allowed sysctl is 6121185172Spjd * is non-zero; otherwise, they behave like unprivileged 6122185172Spjd * processes. 6123185172Spjd */ 6124197861Spjd if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 6125197861Spjd priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) { 6126185172Spjd if (zflags & 6127185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 6128185172Spjd error = securelevel_gt(cred, 0); 6129197861Spjd if (error != 0) 6130185172Spjd return (error); 6131185172Spjd } 6132185172Spjd } else { 6133197861Spjd /* 6134197861Spjd * Callers may only modify the file flags on objects they 6135197861Spjd * have VADMIN rights for. 6136197861Spjd */ 6137197861Spjd if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0) 6138197861Spjd return (error); 6139185172Spjd if (zflags & 6140185172Spjd (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 6141185172Spjd return (EPERM); 6142185172Spjd } 6143185172Spjd if (fflags & 6144185172Spjd (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 6145185172Spjd return (EPERM); 6146185172Spjd } 6147185172Spjd } 6148185029Spjd 6149185029Spjd#define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 6150185029Spjd if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 6151185029Spjd ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 6152185029Spjd XVA_SET_REQ(&xvap, (xflag)); \ 6153185029Spjd (xfield) = ((fflags & (fflag)) != 0); \ 6154185029Spjd } \ 6155185029Spjd} while (0) 6156185029Spjd /* Convert chflags into ZFS-type flags. */ 6157185029Spjd /* XXX: what about SF_SETTABLE?. */ 6158185029Spjd FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 6159185029Spjd xvap.xva_xoptattrs.xoa_immutable); 6160185029Spjd FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 6161185029Spjd xvap.xva_xoptattrs.xoa_appendonly); 6162185029Spjd FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 6163185029Spjd xvap.xva_xoptattrs.xoa_nounlink); 6164185029Spjd FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 6165185172Spjd xvap.xva_xoptattrs.xoa_nodump); 6166185029Spjd#undef FLAG_CHANGE 6167185029Spjd } 6168185172Spjd return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL)); 6169168962Spjd} 6170168962Spjd 6171168962Spjdstatic int 6172168962Spjdzfs_freebsd_rename(ap) 6173168962Spjd struct vop_rename_args /* { 6174168962Spjd struct vnode *a_fdvp; 6175168962Spjd struct vnode *a_fvp; 6176168962Spjd struct componentname *a_fcnp; 6177168962Spjd struct vnode *a_tdvp; 6178168962Spjd struct vnode *a_tvp; 6179168962Spjd struct componentname *a_tcnp; 6180168962Spjd } */ *ap; 6181168962Spjd{ 6182168962Spjd vnode_t *fdvp = ap->a_fdvp; 6183168962Spjd vnode_t *fvp = ap->a_fvp; 6184168962Spjd vnode_t *tdvp = ap->a_tdvp; 6185168962Spjd vnode_t *tvp = ap->a_tvp; 6186168962Spjd int error; 6187168962Spjd 6188192237Skmacy ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 6189192237Skmacy ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 6190168962Spjd 6191168962Spjd error = zfs_rename(fdvp, ap->a_fcnp->cn_nameptr, tdvp, 6192185029Spjd ap->a_tcnp->cn_nameptr, ap->a_fcnp->cn_cred, NULL, 0); 6193168962Spjd 6194168962Spjd if (tdvp == tvp) 6195168962Spjd VN_RELE(tdvp); 6196168962Spjd else 6197168962Spjd VN_URELE(tdvp); 6198168962Spjd if (tvp) 6199168962Spjd VN_URELE(tvp); 6200168962Spjd VN_RELE(fdvp); 6201168962Spjd VN_RELE(fvp); 6202168962Spjd 6203168962Spjd return (error); 6204168962Spjd} 6205168962Spjd 6206168962Spjdstatic int 6207168962Spjdzfs_freebsd_symlink(ap) 6208168962Spjd struct vop_symlink_args /* { 6209168962Spjd struct vnode *a_dvp; 6210168962Spjd struct vnode **a_vpp; 6211168962Spjd struct componentname *a_cnp; 6212168962Spjd struct vattr *a_vap; 6213168962Spjd char *a_target; 6214168962Spjd } */ *ap; 6215168962Spjd{ 6216168962Spjd struct componentname *cnp = ap->a_cnp; 6217168962Spjd vattr_t *vap = ap->a_vap; 6218168962Spjd 6219168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6220168962Spjd 6221168962Spjd vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 6222168962Spjd vattr_init_mask(vap); 6223168962Spjd 6224168962Spjd return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 6225168962Spjd ap->a_target, cnp->cn_cred, cnp->cn_thread)); 6226168962Spjd} 6227168962Spjd 6228168962Spjdstatic int 6229168962Spjdzfs_freebsd_readlink(ap) 6230168962Spjd struct vop_readlink_args /* { 6231168962Spjd struct vnode *a_vp; 6232168962Spjd struct uio *a_uio; 6233168962Spjd struct ucred *a_cred; 6234168962Spjd } */ *ap; 6235168962Spjd{ 6236168962Spjd 6237185029Spjd return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 6238168962Spjd} 6239168962Spjd 6240168962Spjdstatic int 6241168962Spjdzfs_freebsd_link(ap) 6242168962Spjd struct vop_link_args /* { 6243168962Spjd struct vnode *a_tdvp; 6244168962Spjd struct vnode *a_vp; 6245168962Spjd struct componentname *a_cnp; 6246168962Spjd } */ *ap; 6247168962Spjd{ 6248168962Spjd struct componentname *cnp = ap->a_cnp; 6249168962Spjd 6250168962Spjd ASSERT(cnp->cn_flags & SAVENAME); 6251168962Spjd 6252185029Spjd return (zfs_link(ap->a_tdvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); 6253168962Spjd} 6254168962Spjd 6255168962Spjdstatic int 6256168962Spjdzfs_freebsd_inactive(ap) 6257169170Spjd struct vop_inactive_args /* { 6258169170Spjd struct vnode *a_vp; 6259169170Spjd struct thread *a_td; 6260169170Spjd } */ *ap; 6261168962Spjd{ 6262168962Spjd vnode_t *vp = ap->a_vp; 6263168962Spjd 6264185029Spjd zfs_inactive(vp, ap->a_td->td_ucred, NULL); 6265168962Spjd return (0); 6266168962Spjd} 6267168962Spjd 6268168962Spjdstatic int 6269168962Spjdzfs_freebsd_reclaim(ap) 6270168962Spjd struct vop_reclaim_args /* { 6271168962Spjd struct vnode *a_vp; 6272168962Spjd struct thread *a_td; 6273168962Spjd } */ *ap; 6274168962Spjd{ 6275169170Spjd vnode_t *vp = ap->a_vp; 6276168962Spjd znode_t *zp = VTOZ(vp); 6277197133Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6278168962Spjd 6279169025Spjd ASSERT(zp != NULL); 6280169025Spjd 6281243520Savg /* Destroy the vm object and flush associated pages. */ 6282243520Savg vnode_destroy_vobject(vp); 6283243520Savg 6284168962Spjd /* 6285243520Savg * z_teardown_inactive_lock protects from a race with 6286243520Savg * zfs_znode_dmu_fini in zfsvfs_teardown during 6287243520Savg * force unmount. 6288168962Spjd */ 6289243520Savg rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 6290243520Savg if (zp->z_sa_hdl == NULL) 6291196301Spjd zfs_znode_free(zp); 6292243520Savg else 6293243520Savg zfs_zinactive(zp); 6294243520Savg rw_exit(&zfsvfs->z_teardown_inactive_lock); 6295185029Spjd 6296168962Spjd vp->v_data = NULL; 6297168962Spjd return (0); 6298168962Spjd} 6299168962Spjd 6300168962Spjdstatic int 6301168962Spjdzfs_freebsd_fid(ap) 6302168962Spjd struct vop_fid_args /* { 6303168962Spjd struct vnode *a_vp; 6304168962Spjd struct fid *a_fid; 6305168962Spjd } */ *ap; 6306168962Spjd{ 6307168962Spjd 6308185029Spjd return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 6309168962Spjd} 6310168962Spjd 6311168962Spjdstatic int 6312168962Spjdzfs_freebsd_pathconf(ap) 6313168962Spjd struct vop_pathconf_args /* { 6314168962Spjd struct vnode *a_vp; 6315168962Spjd int a_name; 6316168962Spjd register_t *a_retval; 6317168962Spjd } */ *ap; 6318168962Spjd{ 6319168962Spjd ulong_t val; 6320168962Spjd int error; 6321168962Spjd 6322185029Spjd error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); 6323168962Spjd if (error == 0) 6324168962Spjd *ap->a_retval = val; 6325168962Spjd else if (error == EOPNOTSUPP) 6326168962Spjd error = vop_stdpathconf(ap); 6327168962Spjd return (error); 6328168962Spjd} 6329168962Spjd 6330196949Straszstatic int 6331196949Straszzfs_freebsd_fifo_pathconf(ap) 6332196949Strasz struct vop_pathconf_args /* { 6333196949Strasz struct vnode *a_vp; 6334196949Strasz int a_name; 6335196949Strasz register_t *a_retval; 6336196949Strasz } */ *ap; 6337196949Strasz{ 6338196949Strasz 6339196949Strasz switch (ap->a_name) { 6340196949Strasz case _PC_ACL_EXTENDED: 6341196949Strasz case _PC_ACL_NFS4: 6342196949Strasz case _PC_ACL_PATH_MAX: 6343196949Strasz case _PC_MAC_PRESENT: 6344196949Strasz return (zfs_freebsd_pathconf(ap)); 6345196949Strasz default: 6346196949Strasz return (fifo_specops.vop_pathconf(ap)); 6347196949Strasz } 6348196949Strasz} 6349196949Strasz 6350185029Spjd/* 6351185029Spjd * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 6352185029Spjd * extended attribute name: 6353185029Spjd * 6354185029Spjd * NAMESPACE PREFIX 6355185029Spjd * system freebsd:system: 6356185029Spjd * user (none, can be used to access ZFS fsattr(5) attributes 6357185029Spjd * created on Solaris) 6358185029Spjd */ 6359185029Spjdstatic int 6360185029Spjdzfs_create_attrname(int attrnamespace, const char *name, char *attrname, 6361185029Spjd size_t size) 6362185029Spjd{ 6363185029Spjd const char *namespace, *prefix, *suffix; 6364185029Spjd 6365185029Spjd /* We don't allow '/' character in attribute name. */ 6366185029Spjd if (strchr(name, '/') != NULL) 6367185029Spjd return (EINVAL); 6368185029Spjd /* We don't allow attribute names that start with "freebsd:" string. */ 6369185029Spjd if (strncmp(name, "freebsd:", 8) == 0) 6370185029Spjd return (EINVAL); 6371185029Spjd 6372185029Spjd bzero(attrname, size); 6373185029Spjd 6374185029Spjd switch (attrnamespace) { 6375185029Spjd case EXTATTR_NAMESPACE_USER: 6376185029Spjd#if 0 6377185029Spjd prefix = "freebsd:"; 6378185029Spjd namespace = EXTATTR_NAMESPACE_USER_STRING; 6379185029Spjd suffix = ":"; 6380185029Spjd#else 6381185029Spjd /* 6382185029Spjd * This is the default namespace by which we can access all 6383185029Spjd * attributes created on Solaris. 6384185029Spjd */ 6385185029Spjd prefix = namespace = suffix = ""; 6386185029Spjd#endif 6387185029Spjd break; 6388185029Spjd case EXTATTR_NAMESPACE_SYSTEM: 6389185029Spjd prefix = "freebsd:"; 6390185029Spjd namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 6391185029Spjd suffix = ":"; 6392185029Spjd break; 6393185029Spjd case EXTATTR_NAMESPACE_EMPTY: 6394185029Spjd default: 6395185029Spjd return (EINVAL); 6396185029Spjd } 6397185029Spjd if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 6398185029Spjd name) >= size) { 6399185029Spjd return (ENAMETOOLONG); 6400185029Spjd } 6401185029Spjd return (0); 6402185029Spjd} 6403185029Spjd 6404185029Spjd/* 6405185029Spjd * Vnode operating to retrieve a named extended attribute. 6406185029Spjd */ 6407185029Spjdstatic int 6408185029Spjdzfs_getextattr(struct vop_getextattr_args *ap) 6409185029Spjd/* 6410185029Spjdvop_getextattr { 6411185029Spjd IN struct vnode *a_vp; 6412185029Spjd IN int a_attrnamespace; 6413185029Spjd IN const char *a_name; 6414185029Spjd INOUT struct uio *a_uio; 6415185029Spjd OUT size_t *a_size; 6416185029Spjd IN struct ucred *a_cred; 6417185029Spjd IN struct thread *a_td; 6418185029Spjd}; 6419185029Spjd*/ 6420185029Spjd{ 6421185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6422185029Spjd struct thread *td = ap->a_td; 6423185029Spjd struct nameidata nd; 6424185029Spjd char attrname[255]; 6425185029Spjd struct vattr va; 6426185029Spjd vnode_t *xvp = NULL, *vp; 6427185029Spjd int error, flags; 6428185029Spjd 6429195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6430195785Strasz ap->a_cred, ap->a_td, VREAD); 6431195785Strasz if (error != 0) 6432195785Strasz return (error); 6433195785Strasz 6434185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6435185029Spjd sizeof(attrname)); 6436185029Spjd if (error != 0) 6437185029Spjd return (error); 6438185029Spjd 6439185029Spjd ZFS_ENTER(zfsvfs); 6440185029Spjd 6441185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6442185029Spjd LOOKUP_XATTR); 6443185029Spjd if (error != 0) { 6444185029Spjd ZFS_EXIT(zfsvfs); 6445185029Spjd return (error); 6446185029Spjd } 6447185029Spjd 6448185029Spjd flags = FREAD; 6449241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 6450185029Spjd xvp, td); 6451194586Skib error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL); 6452185029Spjd vp = nd.ni_vp; 6453185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6454185029Spjd if (error != 0) { 6455196303Spjd ZFS_EXIT(zfsvfs); 6456195785Strasz if (error == ENOENT) 6457195785Strasz error = ENOATTR; 6458185029Spjd return (error); 6459185029Spjd } 6460185029Spjd 6461185029Spjd if (ap->a_size != NULL) { 6462185029Spjd error = VOP_GETATTR(vp, &va, ap->a_cred); 6463185029Spjd if (error == 0) 6464185029Spjd *ap->a_size = (size_t)va.va_size; 6465185029Spjd } else if (ap->a_uio != NULL) 6466224605Smm error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 6467185029Spjd 6468185029Spjd VOP_UNLOCK(vp, 0); 6469185029Spjd vn_close(vp, flags, ap->a_cred, td); 6470185029Spjd ZFS_EXIT(zfsvfs); 6471185029Spjd 6472185029Spjd return (error); 6473185029Spjd} 6474185029Spjd 6475185029Spjd/* 6476185029Spjd * Vnode operation to remove a named attribute. 6477185029Spjd */ 6478185029Spjdint 6479185029Spjdzfs_deleteextattr(struct vop_deleteextattr_args *ap) 6480185029Spjd/* 6481185029Spjdvop_deleteextattr { 6482185029Spjd IN struct vnode *a_vp; 6483185029Spjd IN int a_attrnamespace; 6484185029Spjd IN const char *a_name; 6485185029Spjd IN struct ucred *a_cred; 6486185029Spjd IN struct thread *a_td; 6487185029Spjd}; 6488185029Spjd*/ 6489185029Spjd{ 6490185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6491185029Spjd struct thread *td = ap->a_td; 6492185029Spjd struct nameidata nd; 6493185029Spjd char attrname[255]; 6494185029Spjd struct vattr va; 6495185029Spjd vnode_t *xvp = NULL, *vp; 6496185029Spjd int error, flags; 6497185029Spjd 6498195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6499195785Strasz ap->a_cred, ap->a_td, VWRITE); 6500195785Strasz if (error != 0) 6501195785Strasz return (error); 6502195785Strasz 6503185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6504185029Spjd sizeof(attrname)); 6505185029Spjd if (error != 0) 6506185029Spjd return (error); 6507185029Spjd 6508185029Spjd ZFS_ENTER(zfsvfs); 6509185029Spjd 6510185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6511185029Spjd LOOKUP_XATTR); 6512185029Spjd if (error != 0) { 6513185029Spjd ZFS_EXIT(zfsvfs); 6514185029Spjd return (error); 6515185029Spjd } 6516185029Spjd 6517241896Skib NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 6518185029Spjd UIO_SYSSPACE, attrname, xvp, td); 6519185029Spjd error = namei(&nd); 6520185029Spjd vp = nd.ni_vp; 6521185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6522185029Spjd if (error != 0) { 6523196303Spjd ZFS_EXIT(zfsvfs); 6524195785Strasz if (error == ENOENT) 6525195785Strasz error = ENOATTR; 6526185029Spjd return (error); 6527185029Spjd } 6528185029Spjd error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 6529185029Spjd 6530185029Spjd vput(nd.ni_dvp); 6531185029Spjd if (vp == nd.ni_dvp) 6532185029Spjd vrele(vp); 6533185029Spjd else 6534185029Spjd vput(vp); 6535185029Spjd ZFS_EXIT(zfsvfs); 6536185029Spjd 6537185029Spjd return (error); 6538185029Spjd} 6539185029Spjd 6540185029Spjd/* 6541185029Spjd * Vnode operation to set a named attribute. 6542185029Spjd */ 6543185029Spjdstatic int 6544185029Spjdzfs_setextattr(struct vop_setextattr_args *ap) 6545185029Spjd/* 6546185029Spjdvop_setextattr { 6547185029Spjd IN struct vnode *a_vp; 6548185029Spjd IN int a_attrnamespace; 6549185029Spjd IN const char *a_name; 6550185029Spjd INOUT struct uio *a_uio; 6551185029Spjd IN struct ucred *a_cred; 6552185029Spjd IN struct thread *a_td; 6553185029Spjd}; 6554185029Spjd*/ 6555185029Spjd{ 6556185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6557185029Spjd struct thread *td = ap->a_td; 6558185029Spjd struct nameidata nd; 6559185029Spjd char attrname[255]; 6560185029Spjd struct vattr va; 6561185029Spjd vnode_t *xvp = NULL, *vp; 6562185029Spjd int error, flags; 6563185029Spjd 6564195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6565195785Strasz ap->a_cred, ap->a_td, VWRITE); 6566195785Strasz if (error != 0) 6567195785Strasz return (error); 6568195785Strasz 6569185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 6570185029Spjd sizeof(attrname)); 6571185029Spjd if (error != 0) 6572185029Spjd return (error); 6573185029Spjd 6574185029Spjd ZFS_ENTER(zfsvfs); 6575185029Spjd 6576185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6577195785Strasz LOOKUP_XATTR | CREATE_XATTR_DIR); 6578185029Spjd if (error != 0) { 6579185029Spjd ZFS_EXIT(zfsvfs); 6580185029Spjd return (error); 6581185029Spjd } 6582185029Spjd 6583185029Spjd flags = FFLAGS(O_WRONLY | O_CREAT); 6584241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 6585185029Spjd xvp, td); 6586194586Skib error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL); 6587185029Spjd vp = nd.ni_vp; 6588185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6589185029Spjd if (error != 0) { 6590185029Spjd ZFS_EXIT(zfsvfs); 6591185029Spjd return (error); 6592185029Spjd } 6593185029Spjd 6594185029Spjd VATTR_NULL(&va); 6595185029Spjd va.va_size = 0; 6596185029Spjd error = VOP_SETATTR(vp, &va, ap->a_cred); 6597185029Spjd if (error == 0) 6598185029Spjd VOP_WRITE(vp, ap->a_uio, IO_UNIT | IO_SYNC, ap->a_cred); 6599185029Spjd 6600185029Spjd VOP_UNLOCK(vp, 0); 6601185029Spjd vn_close(vp, flags, ap->a_cred, td); 6602185029Spjd ZFS_EXIT(zfsvfs); 6603185029Spjd 6604185029Spjd return (error); 6605185029Spjd} 6606185029Spjd 6607185029Spjd/* 6608185029Spjd * Vnode operation to retrieve extended attributes on a vnode. 6609185029Spjd */ 6610185029Spjdstatic int 6611185029Spjdzfs_listextattr(struct vop_listextattr_args *ap) 6612185029Spjd/* 6613185029Spjdvop_listextattr { 6614185029Spjd IN struct vnode *a_vp; 6615185029Spjd IN int a_attrnamespace; 6616185029Spjd INOUT struct uio *a_uio; 6617185029Spjd OUT size_t *a_size; 6618185029Spjd IN struct ucred *a_cred; 6619185029Spjd IN struct thread *a_td; 6620185029Spjd}; 6621185029Spjd*/ 6622185029Spjd{ 6623185029Spjd zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 6624185029Spjd struct thread *td = ap->a_td; 6625185029Spjd struct nameidata nd; 6626185029Spjd char attrprefix[16]; 6627185029Spjd u_char dirbuf[sizeof(struct dirent)]; 6628185029Spjd struct dirent *dp; 6629185029Spjd struct iovec aiov; 6630185029Spjd struct uio auio, *uio = ap->a_uio; 6631185029Spjd size_t *sizep = ap->a_size; 6632185029Spjd size_t plen; 6633185029Spjd vnode_t *xvp = NULL, *vp; 6634185029Spjd int done, error, eof, pos; 6635185029Spjd 6636195785Strasz error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 6637195785Strasz ap->a_cred, ap->a_td, VREAD); 6638196303Spjd if (error != 0) 6639195785Strasz return (error); 6640195785Strasz 6641185029Spjd error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 6642185029Spjd sizeof(attrprefix)); 6643185029Spjd if (error != 0) 6644185029Spjd return (error); 6645185029Spjd plen = strlen(attrprefix); 6646185029Spjd 6647185029Spjd ZFS_ENTER(zfsvfs); 6648185029Spjd 6649195822Strasz if (sizep != NULL) 6650195822Strasz *sizep = 0; 6651195822Strasz 6652185029Spjd error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 6653185029Spjd LOOKUP_XATTR); 6654185029Spjd if (error != 0) { 6655196303Spjd ZFS_EXIT(zfsvfs); 6656195785Strasz /* 6657195785Strasz * ENOATTR means that the EA directory does not yet exist, 6658195785Strasz * i.e. there are no extended attributes there. 6659195785Strasz */ 6660195785Strasz if (error == ENOATTR) 6661195785Strasz error = 0; 6662185029Spjd return (error); 6663185029Spjd } 6664185029Spjd 6665241896Skib NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 6666188588Sjhb UIO_SYSSPACE, ".", xvp, td); 6667185029Spjd error = namei(&nd); 6668185029Spjd vp = nd.ni_vp; 6669185029Spjd NDFREE(&nd, NDF_ONLY_PNBUF); 6670185029Spjd if (error != 0) { 6671185029Spjd ZFS_EXIT(zfsvfs); 6672185029Spjd return (error); 6673185029Spjd } 6674185029Spjd 6675185029Spjd auio.uio_iov = &aiov; 6676185029Spjd auio.uio_iovcnt = 1; 6677185029Spjd auio.uio_segflg = UIO_SYSSPACE; 6678185029Spjd auio.uio_td = td; 6679185029Spjd auio.uio_rw = UIO_READ; 6680185029Spjd auio.uio_offset = 0; 6681185029Spjd 6682185029Spjd do { 6683185029Spjd u_char nlen; 6684185029Spjd 6685185029Spjd aiov.iov_base = (void *)dirbuf; 6686185029Spjd aiov.iov_len = sizeof(dirbuf); 6687185029Spjd auio.uio_resid = sizeof(dirbuf); 6688185029Spjd error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 6689185029Spjd done = sizeof(dirbuf) - auio.uio_resid; 6690185029Spjd if (error != 0) 6691185029Spjd break; 6692185029Spjd for (pos = 0; pos < done;) { 6693185029Spjd dp = (struct dirent *)(dirbuf + pos); 6694185029Spjd pos += dp->d_reclen; 6695185029Spjd /* 6696185029Spjd * XXX: Temporarily we also accept DT_UNKNOWN, as this 6697185029Spjd * is what we get when attribute was created on Solaris. 6698185029Spjd */ 6699185029Spjd if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 6700185029Spjd continue; 6701185029Spjd if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) 6702185029Spjd continue; 6703185029Spjd else if (strncmp(dp->d_name, attrprefix, plen) != 0) 6704185029Spjd continue; 6705185029Spjd nlen = dp->d_namlen - plen; 6706185029Spjd if (sizep != NULL) 6707185029Spjd *sizep += 1 + nlen; 6708185029Spjd else if (uio != NULL) { 6709185029Spjd /* 6710185029Spjd * Format of extattr name entry is one byte for 6711185029Spjd * length and the rest for name. 6712185029Spjd */ 6713185029Spjd error = uiomove(&nlen, 1, uio->uio_rw, uio); 6714185029Spjd if (error == 0) { 6715185029Spjd error = uiomove(dp->d_name + plen, nlen, 6716185029Spjd uio->uio_rw, uio); 6717185029Spjd } 6718185029Spjd if (error != 0) 6719185029Spjd break; 6720185029Spjd } 6721185029Spjd } 6722185029Spjd } while (!eof && error == 0); 6723185029Spjd 6724185029Spjd vput(vp); 6725185029Spjd ZFS_EXIT(zfsvfs); 6726185029Spjd 6727185029Spjd return (error); 6728185029Spjd} 6729185029Spjd 6730192800Straszint 6731192800Straszzfs_freebsd_getacl(ap) 6732192800Strasz struct vop_getacl_args /* { 6733192800Strasz struct vnode *vp; 6734192800Strasz acl_type_t type; 6735192800Strasz struct acl *aclp; 6736192800Strasz struct ucred *cred; 6737192800Strasz struct thread *td; 6738192800Strasz } */ *ap; 6739192800Strasz{ 6740192800Strasz int error; 6741192800Strasz vsecattr_t vsecattr; 6742192800Strasz 6743192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 6744197435Strasz return (EINVAL); 6745192800Strasz 6746192800Strasz vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 6747192800Strasz if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL)) 6748192800Strasz return (error); 6749192800Strasz 6750192800Strasz error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt); 6751196303Spjd if (vsecattr.vsa_aclentp != NULL) 6752196303Spjd kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 6753192800Strasz 6754196303Spjd return (error); 6755192800Strasz} 6756192800Strasz 6757192800Straszint 6758192800Straszzfs_freebsd_setacl(ap) 6759192800Strasz struct vop_setacl_args /* { 6760192800Strasz struct vnode *vp; 6761192800Strasz acl_type_t type; 6762192800Strasz struct acl *aclp; 6763192800Strasz struct ucred *cred; 6764192800Strasz struct thread *td; 6765192800Strasz } */ *ap; 6766192800Strasz{ 6767192800Strasz int error; 6768192800Strasz vsecattr_t vsecattr; 6769192800Strasz int aclbsize; /* size of acl list in bytes */ 6770192800Strasz aclent_t *aaclp; 6771192800Strasz 6772192800Strasz if (ap->a_type != ACL_TYPE_NFS4) 6773197435Strasz return (EINVAL); 6774192800Strasz 6775192800Strasz if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 6776192800Strasz return (EINVAL); 6777192800Strasz 6778192800Strasz /* 6779196949Strasz * With NFSv4 ACLs, chmod(2) may need to add additional entries, 6780192800Strasz * splitting every entry into two and appending "canonical six" 6781192800Strasz * entries at the end. Don't allow for setting an ACL that would 6782192800Strasz * cause chmod(2) to run out of ACL entries. 6783192800Strasz */ 6784192800Strasz if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 6785192800Strasz return (ENOSPC); 6786192800Strasz 6787208030Strasz error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 6788208030Strasz if (error != 0) 6789208030Strasz return (error); 6790208030Strasz 6791192800Strasz vsecattr.vsa_mask = VSA_ACE; 6792192800Strasz aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t); 6793192800Strasz vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 6794192800Strasz aaclp = vsecattr.vsa_aclentp; 6795192800Strasz vsecattr.vsa_aclentsz = aclbsize; 6796192800Strasz 6797192800Strasz aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 6798192800Strasz error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL); 6799192800Strasz kmem_free(aaclp, aclbsize); 6800192800Strasz 6801192800Strasz return (error); 6802192800Strasz} 6803192800Strasz 6804192800Straszint 6805192800Straszzfs_freebsd_aclcheck(ap) 6806192800Strasz struct vop_aclcheck_args /* { 6807192800Strasz struct vnode *vp; 6808192800Strasz acl_type_t type; 6809192800Strasz struct acl *aclp; 6810192800Strasz struct ucred *cred; 6811192800Strasz struct thread *td; 6812192800Strasz } */ *ap; 6813192800Strasz{ 6814192800Strasz 6815192800Strasz return (EOPNOTSUPP); 6816192800Strasz} 6817192800Strasz 6818168404Spjdstruct vop_vector zfs_vnodeops; 6819168404Spjdstruct vop_vector zfs_fifoops; 6820209962Smmstruct vop_vector zfs_shareops; 6821168404Spjd 6822168404Spjdstruct vop_vector zfs_vnodeops = { 6823185029Spjd .vop_default = &default_vnodeops, 6824185029Spjd .vop_inactive = zfs_freebsd_inactive, 6825185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 6826185029Spjd .vop_access = zfs_freebsd_access, 6827168404Spjd#ifdef FREEBSD_NAMECACHE 6828185029Spjd .vop_lookup = vfs_cache_lookup, 6829185029Spjd .vop_cachedlookup = zfs_freebsd_lookup, 6830168404Spjd#else 6831185029Spjd .vop_lookup = zfs_freebsd_lookup, 6832168404Spjd#endif 6833185029Spjd .vop_getattr = zfs_freebsd_getattr, 6834185029Spjd .vop_setattr = zfs_freebsd_setattr, 6835185029Spjd .vop_create = zfs_freebsd_create, 6836185029Spjd .vop_mknod = zfs_freebsd_create, 6837185029Spjd .vop_mkdir = zfs_freebsd_mkdir, 6838185029Spjd .vop_readdir = zfs_freebsd_readdir, 6839185029Spjd .vop_fsync = zfs_freebsd_fsync, 6840185029Spjd .vop_open = zfs_freebsd_open, 6841185029Spjd .vop_close = zfs_freebsd_close, 6842185029Spjd .vop_rmdir = zfs_freebsd_rmdir, 6843185029Spjd .vop_ioctl = zfs_freebsd_ioctl, 6844185029Spjd .vop_link = zfs_freebsd_link, 6845185029Spjd .vop_symlink = zfs_freebsd_symlink, 6846185029Spjd .vop_readlink = zfs_freebsd_readlink, 6847185029Spjd .vop_read = zfs_freebsd_read, 6848185029Spjd .vop_write = zfs_freebsd_write, 6849185029Spjd .vop_remove = zfs_freebsd_remove, 6850185029Spjd .vop_rename = zfs_freebsd_rename, 6851185029Spjd .vop_pathconf = zfs_freebsd_pathconf, 6852243518Savg .vop_bmap = zfs_freebsd_bmap, 6853185029Spjd .vop_fid = zfs_freebsd_fid, 6854185029Spjd .vop_getextattr = zfs_getextattr, 6855185029Spjd .vop_deleteextattr = zfs_deleteextattr, 6856185029Spjd .vop_setextattr = zfs_setextattr, 6857185029Spjd .vop_listextattr = zfs_listextattr, 6858192800Strasz .vop_getacl = zfs_freebsd_getacl, 6859192800Strasz .vop_setacl = zfs_freebsd_setacl, 6860192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 6861213937Savg .vop_getpages = zfs_freebsd_getpages, 6862168404Spjd}; 6863168404Spjd 6864169170Spjdstruct vop_vector zfs_fifoops = { 6865185029Spjd .vop_default = &fifo_specops, 6866200162Skib .vop_fsync = zfs_freebsd_fsync, 6867185029Spjd .vop_access = zfs_freebsd_access, 6868185029Spjd .vop_getattr = zfs_freebsd_getattr, 6869185029Spjd .vop_inactive = zfs_freebsd_inactive, 6870185029Spjd .vop_read = VOP_PANIC, 6871185029Spjd .vop_reclaim = zfs_freebsd_reclaim, 6872185029Spjd .vop_setattr = zfs_freebsd_setattr, 6873185029Spjd .vop_write = VOP_PANIC, 6874196949Strasz .vop_pathconf = zfs_freebsd_fifo_pathconf, 6875185029Spjd .vop_fid = zfs_freebsd_fid, 6876192800Strasz .vop_getacl = zfs_freebsd_getacl, 6877192800Strasz .vop_setacl = zfs_freebsd_setacl, 6878192800Strasz .vop_aclcheck = zfs_freebsd_aclcheck, 6879168404Spjd}; 6880209962Smm 6881209962Smm/* 6882209962Smm * special share hidden files vnode operations template 6883209962Smm */ 6884209962Smmstruct vop_vector zfs_shareops = { 6885209962Smm .vop_default = &default_vnodeops, 6886209962Smm .vop_access = zfs_freebsd_access, 6887209962Smm .vop_inactive = zfs_freebsd_inactive, 6888209962Smm .vop_reclaim = zfs_freebsd_reclaim, 6889209962Smm .vop_fid = zfs_freebsd_fid, 6890209962Smm .vop_pathconf = zfs_freebsd_pathconf, 6891209962Smm}; 6892