zfs_vnops.c revision 331017
1168404Spjd/*
2168404Spjd * CDDL HEADER START
3168404Spjd *
4168404Spjd * The contents of this file are subject to the terms of the
5168404Spjd * Common Development and Distribution License (the "License").
6168404Spjd * You may not use this file except in compliance with the License.
7168404Spjd *
8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9168404Spjd * or http://www.opensolaris.org/os/licensing.
10168404Spjd * See the License for the specific language governing permissions
11168404Spjd * and limitations under the License.
12168404Spjd *
13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each
14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15168404Spjd * If applicable, add the following below this CDDL HEADER, with the
16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18168404Spjd *
19168404Spjd * CDDL HEADER END
20168404Spjd */
21321545Smav
22168404Spjd/*
23212694Smm * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24289562Smav * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25296519Smav * Copyright (c) 2014 Integros [integros.com]
26321545Smav * Copyright 2017 Nexenta Systems, Inc.
27168404Spjd */
28168404Spjd
29169195Spjd/* Portions Copyright 2007 Jeremy Teo */
30219089Spjd/* Portions Copyright 2010 Robert Milkowski */
31169195Spjd
32168404Spjd#include <sys/types.h>
33168404Spjd#include <sys/param.h>
34168404Spjd#include <sys/time.h>
35168404Spjd#include <sys/systm.h>
36168404Spjd#include <sys/sysmacros.h>
37168404Spjd#include <sys/resource.h>
38168404Spjd#include <sys/vfs.h>
39248084Sattilio#include <sys/vm.h>
40168404Spjd#include <sys/vnode.h>
41168404Spjd#include <sys/file.h>
42168404Spjd#include <sys/stat.h>
43168404Spjd#include <sys/kmem.h>
44168404Spjd#include <sys/taskq.h>
45168404Spjd#include <sys/uio.h>
46168404Spjd#include <sys/atomic.h>
47168404Spjd#include <sys/namei.h>
48168404Spjd#include <sys/mman.h>
49168404Spjd#include <sys/cmn_err.h>
50168404Spjd#include <sys/errno.h>
51168404Spjd#include <sys/unistd.h>
52168404Spjd#include <sys/zfs_dir.h>
53168404Spjd#include <sys/zfs_ioctl.h>
54168404Spjd#include <sys/fs/zfs.h>
55168404Spjd#include <sys/dmu.h>
56219089Spjd#include <sys/dmu_objset.h>
57168404Spjd#include <sys/spa.h>
58168404Spjd#include <sys/txg.h>
59168404Spjd#include <sys/dbuf.h>
60168404Spjd#include <sys/zap.h>
61219089Spjd#include <sys/sa.h>
62168404Spjd#include <sys/dirent.h>
63168962Spjd#include <sys/policy.h>
64168962Spjd#include <sys/sunddi.h>
65168404Spjd#include <sys/filio.h>
66209962Smm#include <sys/sid.h>
67168404Spjd#include <sys/zfs_ctldir.h>
68185029Spjd#include <sys/zfs_fuid.h>
69219089Spjd#include <sys/zfs_sa.h>
70168404Spjd#include <sys/zfs_rlock.h>
71185029Spjd#include <sys/extdirent.h>
72185029Spjd#include <sys/kidmap.h>
73168404Spjd#include <sys/bio.h>
74168404Spjd#include <sys/buf.h>
75168404Spjd#include <sys/sched.h>
76192800Strasz#include <sys/acl.h>
77331017Skevans#include <sys/vmmeter.h>
78239077Smarius#include <vm/vm_param.h>
79325132Savg#include <sys/zil.h>
80168404Spjd
81168404Spjd/*
82168404Spjd * Programming rules.
83168404Spjd *
84168404Spjd * Each vnode op performs some logical unit of work.  To do this, the ZPL must
85168404Spjd * properly lock its in-core state, create a DMU transaction, do the work,
86168404Spjd * record this work in the intent log (ZIL), commit the DMU transaction,
87185029Spjd * and wait for the intent log to commit if it is a synchronous operation.
88185029Spjd * Moreover, the vnode ops must work in both normal and log replay context.
89168404Spjd * The ordering of events is important to avoid deadlocks and references
90168404Spjd * to freed memory.  The example below illustrates the following Big Rules:
91168404Spjd *
92251631Sdelphij *  (1)	A check must be made in each zfs thread for a mounted file system.
93168404Spjd *	This is done avoiding races using ZFS_ENTER(zfsvfs).
94251631Sdelphij *	A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
95251631Sdelphij *	must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
96251631Sdelphij *	can return EIO from the calling function.
97168404Spjd *
98168404Spjd *  (2)	VN_RELE() should always be the last thing except for zil_commit()
99168404Spjd *	(if necessary) and ZFS_EXIT(). This is for 3 reasons:
100168404Spjd *	First, if it's the last reference, the vnode/znode
101168404Spjd *	can be freed, so the zp may point to freed memory.  Second, the last
102168404Spjd *	reference will call zfs_zinactive(), which may induce a lot of work --
103168404Spjd *	pushing cached pages (which acquires range locks) and syncing out
104168404Spjd *	cached atime changes.  Third, zfs_zinactive() may require a new tx,
105168404Spjd *	which could deadlock the system if you were already holding one.
106191900Skmacy *	If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
107168404Spjd *
108168404Spjd *  (3)	All range locks must be grabbed before calling dmu_tx_assign(),
109168404Spjd *	as they can span dmu_tx_assign() calls.
110168404Spjd *
111258720Savg *  (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
112258720Savg *      dmu_tx_assign().  This is critical because we don't want to block
113258720Savg *      while holding locks.
114168404Spjd *
115258720Savg *	If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT.  This
116258720Savg *	reduces lock contention and CPU usage when we must wait (note that if
117258720Savg *	throughput is constrained by the storage, nearly every transaction
118258720Savg *	must wait).
119258720Savg *
120258720Savg *      Note, in particular, that if a lock is sometimes acquired before
121258720Savg *      the tx assigns, and sometimes after (e.g. z_lock), then failing
122258720Savg *      to use a non-blocking assign can deadlock the system.  The scenario:
123258720Savg *
124168404Spjd *	Thread A has grabbed a lock before calling dmu_tx_assign().
125168404Spjd *	Thread B is in an already-assigned tx, and blocks for this lock.
126168404Spjd *	Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
127168404Spjd *	forever, because the previous txg can't quiesce until B's tx commits.
128168404Spjd *
129168404Spjd *	If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
130258632Savg *	then drop all locks, call dmu_tx_wait(), and try again.  On subsequent
131330986Savg *	calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
132258632Savg *	to indicate that this operation has already called dmu_tx_wait().
133258632Savg *	This will ensure that we don't retry forever, waiting a short bit
134258632Savg *	each time.
135168404Spjd *
136168404Spjd *  (5)	If the operation succeeded, generate the intent log entry for it
137168404Spjd *	before dropping locks.  This ensures that the ordering of events
138168404Spjd *	in the intent log matches the order in which they actually occurred.
139251631Sdelphij *	During ZIL replay the zfs_log_* functions will update the sequence
140209962Smm *	number to indicate the zil transaction has replayed.
141168404Spjd *
142168404Spjd *  (6)	At the end of each vnode op, the DMU tx must always commit,
143168404Spjd *	regardless of whether there were any errors.
144168404Spjd *
145219089Spjd *  (7)	After dropping all locks, invoke zil_commit(zilog, foid)
146168404Spjd *	to ensure that synchronous semantics are provided when necessary.
147168404Spjd *
148168404Spjd * In general, this is how things should be ordered in each vnode op:
149168404Spjd *
150168404Spjd *	ZFS_ENTER(zfsvfs);		// exit if unmounted
151168404Spjd * top:
152303970Savg *	zfs_dirent_lookup(&dl, ...)	// lock directory entry (may VN_HOLD())
153168404Spjd *	rw_enter(...);			// grab any other locks you need
154168404Spjd *	tx = dmu_tx_create(...);	// get DMU tx
155168404Spjd *	dmu_tx_hold_*();		// hold each object you might modify
156330986Savg *	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
157168404Spjd *	if (error) {
158168404Spjd *		rw_exit(...);		// drop locks
159168404Spjd *		zfs_dirent_unlock(dl);	// unlock directory entry
160168404Spjd *		VN_RELE(...);		// release held vnodes
161209962Smm *		if (error == ERESTART) {
162258632Savg *			waited = B_TRUE;
163168404Spjd *			dmu_tx_wait(tx);
164168404Spjd *			dmu_tx_abort(tx);
165168404Spjd *			goto top;
166168404Spjd *		}
167168404Spjd *		dmu_tx_abort(tx);	// abort DMU tx
168168404Spjd *		ZFS_EXIT(zfsvfs);	// finished in zfs
169168404Spjd *		return (error);		// really out of space
170168404Spjd *	}
171168404Spjd *	error = do_real_work();		// do whatever this VOP does
172168404Spjd *	if (error == 0)
173168404Spjd *		zfs_log_*(...);		// on success, make ZIL entry
174168404Spjd *	dmu_tx_commit(tx);		// commit DMU tx -- error or not
175168404Spjd *	rw_exit(...);			// drop locks
176168404Spjd *	zfs_dirent_unlock(dl);		// unlock directory entry
177168404Spjd *	VN_RELE(...);			// release held vnodes
178219089Spjd *	zil_commit(zilog, foid);	// synchronous when necessary
179168404Spjd *	ZFS_EXIT(zfsvfs);		// finished in zfs
180168404Spjd *	return (error);			// done, report error
181168404Spjd */
182185029Spjd
183168404Spjd/* ARGSUSED */
184168404Spjdstatic int
185185029Spjdzfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
186168404Spjd{
187168962Spjd	znode_t	*zp = VTOZ(*vpp);
188209962Smm	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
189168404Spjd
190209962Smm	ZFS_ENTER(zfsvfs);
191209962Smm	ZFS_VERIFY_ZP(zp);
192209962Smm
193219089Spjd	if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
194185029Spjd	    ((flag & FAPPEND) == 0)) {
195209962Smm		ZFS_EXIT(zfsvfs);
196249195Smm		return (SET_ERROR(EPERM));
197185029Spjd	}
198185029Spjd
199185029Spjd	if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
200185029Spjd	    ZTOV(zp)->v_type == VREG &&
201219089Spjd	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
202209962Smm		if (fs_vscan(*vpp, cr, 0) != 0) {
203209962Smm			ZFS_EXIT(zfsvfs);
204249195Smm			return (SET_ERROR(EACCES));
205209962Smm		}
206209962Smm	}
207185029Spjd
208168404Spjd	/* Keep a count of the synchronous opens in the znode */
209168962Spjd	if (flag & (FSYNC | FDSYNC))
210168404Spjd		atomic_inc_32(&zp->z_sync_cnt);
211185029Spjd
212209962Smm	ZFS_EXIT(zfsvfs);
213168404Spjd	return (0);
214168404Spjd}
215168404Spjd
216168404Spjd/* ARGSUSED */
217168404Spjdstatic int
218185029Spjdzfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
219185029Spjd    caller_context_t *ct)
220168404Spjd{
221168962Spjd	znode_t	*zp = VTOZ(vp);
222209962Smm	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
223168404Spjd
224210470Smm	/*
225210470Smm	 * Clean up any locks held by this process on the vp.
226210470Smm	 */
227210470Smm	cleanlocks(vp, ddi_get_pid(), 0);
228210470Smm	cleanshares(vp, ddi_get_pid());
229210470Smm
230209962Smm	ZFS_ENTER(zfsvfs);
231209962Smm	ZFS_VERIFY_ZP(zp);
232209962Smm
233168404Spjd	/* Decrement the synchronous opens in the znode */
234185029Spjd	if ((flag & (FSYNC | FDSYNC)) && (count == 1))
235168404Spjd		atomic_dec_32(&zp->z_sync_cnt);
236168404Spjd
237185029Spjd	if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
238185029Spjd	    ZTOV(zp)->v_type == VREG &&
239219089Spjd	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
240185029Spjd		VERIFY(fs_vscan(vp, cr, 1) == 0);
241185029Spjd
242209962Smm	ZFS_EXIT(zfsvfs);
243168404Spjd	return (0);
244168404Spjd}
245168404Spjd
246168404Spjd/*
247168404Spjd * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and
248168404Spjd * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter.
249168404Spjd */
250168404Spjdstatic int
251168978Spjdzfs_holey(vnode_t *vp, u_long cmd, offset_t *off)
252168404Spjd{
253168404Spjd	znode_t	*zp = VTOZ(vp);
254168404Spjd	uint64_t noff = (uint64_t)*off; /* new offset */
255168404Spjd	uint64_t file_sz;
256168404Spjd	int error;
257168404Spjd	boolean_t hole;
258168404Spjd
259219089Spjd	file_sz = zp->z_size;
260168404Spjd	if (noff >= file_sz)  {
261249195Smm		return (SET_ERROR(ENXIO));
262168404Spjd	}
263168404Spjd
264168962Spjd	if (cmd == _FIO_SEEK_HOLE)
265168404Spjd		hole = B_TRUE;
266168404Spjd	else
267168404Spjd		hole = B_FALSE;
268168404Spjd
269168404Spjd	error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff);
270168404Spjd
271271536Sdelphij	if (error == ESRCH)
272249195Smm		return (SET_ERROR(ENXIO));
273271536Sdelphij
274271536Sdelphij	/*
275271536Sdelphij	 * We could find a hole that begins after the logical end-of-file,
276271536Sdelphij	 * because dmu_offset_next() only works on whole blocks.  If the
277271536Sdelphij	 * EOF falls mid-block, then indicate that the "virtual hole"
278271536Sdelphij	 * at the end of the file begins at the logical EOF, rather than
279271536Sdelphij	 * at the end of the last block.
280271536Sdelphij	 */
281271536Sdelphij	if (noff > file_sz) {
282271536Sdelphij		ASSERT(hole);
283271536Sdelphij		noff = file_sz;
284168404Spjd	}
285168404Spjd
286168404Spjd	if (noff < *off)
287168404Spjd		return (error);
288168404Spjd	*off = noff;
289168404Spjd	return (error);
290168404Spjd}
291168404Spjd
292168404Spjd/* ARGSUSED */
293168404Spjdstatic int
294168978Spjdzfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred,
295185029Spjd    int *rvalp, caller_context_t *ct)
296168404Spjd{
297168962Spjd	offset_t off;
298287103Savg	offset_t ndata;
299287103Savg	dmu_object_info_t doi;
300168962Spjd	int error;
301168962Spjd	zfsvfs_t *zfsvfs;
302185029Spjd	znode_t *zp;
303168404Spjd
304168404Spjd	switch (com) {
305185029Spjd	case _FIOFFS:
306287103Savg	{
307168962Spjd		return (0);
308168404Spjd
309168962Spjd		/*
310168962Spjd		 * The following two ioctls are used by bfu.  Faking out,
311168962Spjd		 * necessary to avoid bfu errors.
312168962Spjd		 */
313287103Savg	}
314185029Spjd	case _FIOGDIO:
315185029Spjd	case _FIOSDIO:
316287103Savg	{
317168962Spjd		return (0);
318287103Savg	}
319168962Spjd
320185029Spjd	case _FIO_SEEK_DATA:
321185029Spjd	case _FIO_SEEK_HOLE:
322287103Savg	{
323277300Ssmh#ifdef illumos
324168962Spjd		if (ddi_copyin((void *)data, &off, sizeof (off), flag))
325249195Smm			return (SET_ERROR(EFAULT));
326233918Savg#else
327233918Savg		off = *(offset_t *)data;
328233918Savg#endif
329185029Spjd		zp = VTOZ(vp);
330185029Spjd		zfsvfs = zp->z_zfsvfs;
331168404Spjd		ZFS_ENTER(zfsvfs);
332185029Spjd		ZFS_VERIFY_ZP(zp);
333168404Spjd
334168404Spjd		/* offset parameter is in/out */
335168404Spjd		error = zfs_holey(vp, com, &off);
336168404Spjd		ZFS_EXIT(zfsvfs);
337168404Spjd		if (error)
338168404Spjd			return (error);
339277300Ssmh#ifdef illumos
340168962Spjd		if (ddi_copyout(&off, (void *)data, sizeof (off), flag))
341249195Smm			return (SET_ERROR(EFAULT));
342233918Savg#else
343233918Savg		*(offset_t *)data = off;
344233918Savg#endif
345168404Spjd		return (0);
346168404Spjd	}
347287103Savg#ifdef illumos
348287103Savg	case _FIO_COUNT_FILLED:
349287103Savg	{
350287103Savg		/*
351287103Savg		 * _FIO_COUNT_FILLED adds a new ioctl command which
352287103Savg		 * exposes the number of filled blocks in a
353287103Savg		 * ZFS object.
354287103Savg		 */
355287103Savg		zp = VTOZ(vp);
356287103Savg		zfsvfs = zp->z_zfsvfs;
357287103Savg		ZFS_ENTER(zfsvfs);
358287103Savg		ZFS_VERIFY_ZP(zp);
359287103Savg
360287103Savg		/*
361287103Savg		 * Wait for all dirty blocks for this object
362287103Savg		 * to get synced out to disk, and the DMU info
363287103Savg		 * updated.
364287103Savg		 */
365287103Savg		error = dmu_object_wait_synced(zfsvfs->z_os, zp->z_id);
366287103Savg		if (error) {
367287103Savg			ZFS_EXIT(zfsvfs);
368287103Savg			return (error);
369287103Savg		}
370287103Savg
371287103Savg		/*
372287103Savg		 * Retrieve fill count from DMU object.
373287103Savg		 */
374287103Savg		error = dmu_object_info(zfsvfs->z_os, zp->z_id, &doi);
375287103Savg		if (error) {
376287103Savg			ZFS_EXIT(zfsvfs);
377287103Savg			return (error);
378287103Savg		}
379287103Savg
380287103Savg		ndata = doi.doi_fill_count;
381287103Savg
382287103Savg		ZFS_EXIT(zfsvfs);
383287103Savg		if (ddi_copyout(&ndata, (void *)data, sizeof (ndata), flag))
384287103Savg			return (SET_ERROR(EFAULT));
385287103Savg		return (0);
386287103Savg	}
387287103Savg#endif
388287103Savg	}
389249195Smm	return (SET_ERROR(ENOTTY));
390168404Spjd}
391168404Spjd
392209962Smmstatic vm_page_t
393253953Sattiliopage_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
394209962Smm{
395209962Smm	vm_object_t obj;
396209962Smm	vm_page_t pp;
397258353Savg	int64_t end;
398209962Smm
399258353Savg	/*
400258353Savg	 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE
401258353Savg	 * aligned boundaries, if the range is not aligned.  As a result a
402258353Savg	 * DEV_BSIZE subrange with partially dirty data may get marked as clean.
403258353Savg	 * It may happen that all DEV_BSIZE subranges are marked clean and thus
404258353Savg	 * the whole page would be considred clean despite have some dirty data.
405258353Savg	 * For this reason we should shrink the range to DEV_BSIZE aligned
406258353Savg	 * boundaries before calling vm_page_clear_dirty.
407258353Savg	 */
408258353Savg	end = rounddown2(off + nbytes, DEV_BSIZE);
409258353Savg	off = roundup2(off, DEV_BSIZE);
410258353Savg	nbytes = end - off;
411258353Savg
412209962Smm	obj = vp->v_object;
413248084Sattilio	zfs_vmobject_assert_wlocked(obj);
414209962Smm
415209962Smm	for (;;) {
416209962Smm		if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
417246293Savg		    pp->valid) {
418254138Sattilio			if (vm_page_xbusied(pp)) {
419212652Savg				/*
420212652Savg				 * Reference the page before unlocking and
421212652Savg				 * sleeping so that the page daemon is less
422212652Savg				 * likely to reclaim it.
423212652Savg				 */
424225418Skib				vm_page_reference(pp);
425254138Sattilio				vm_page_lock(pp);
426254138Sattilio				zfs_vmobject_wunlock(obj);
427307671Skib				vm_page_busy_sleep(pp, "zfsmwb", true);
428254138Sattilio				zfs_vmobject_wlock(obj);
429209962Smm				continue;
430212652Savg			}
431254138Sattilio			vm_page_sbusy(pp);
432319091Savg		} else if (pp != NULL) {
433319091Savg			ASSERT(!pp->valid);
434252337Sgavin			pp = NULL;
435209962Smm		}
436246293Savg
437246293Savg		if (pp != NULL) {
438246293Savg			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
439253953Sattilio			vm_object_pip_add(obj, 1);
440246293Savg			pmap_remove_write(pp);
441258353Savg			if (nbytes != 0)
442258353Savg				vm_page_clear_dirty(pp, off, nbytes);
443246293Savg		}
444209962Smm		break;
445209962Smm	}
446209962Smm	return (pp);
447209962Smm}
448209962Smm
449209962Smmstatic void
450253953Sattiliopage_unbusy(vm_page_t pp)
451209962Smm{
452209962Smm
453254138Sattilio	vm_page_sunbusy(pp);
454253953Sattilio	vm_object_pip_subtract(pp->object, 1);
455209962Smm}
456209962Smm
457253953Sattiliostatic vm_page_t
458253953Sattiliopage_hold(vnode_t *vp, int64_t start)
459253953Sattilio{
460253953Sattilio	vm_object_t obj;
461253953Sattilio	vm_page_t pp;
462253953Sattilio
463253953Sattilio	obj = vp->v_object;
464253953Sattilio	zfs_vmobject_assert_wlocked(obj);
465253953Sattilio
466253953Sattilio	for (;;) {
467253953Sattilio		if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
468253953Sattilio		    pp->valid) {
469254138Sattilio			if (vm_page_xbusied(pp)) {
470253953Sattilio				/*
471253953Sattilio				 * Reference the page before unlocking and
472253953Sattilio				 * sleeping so that the page daemon is less
473253953Sattilio				 * likely to reclaim it.
474253953Sattilio				 */
475253953Sattilio				vm_page_reference(pp);
476254138Sattilio				vm_page_lock(pp);
477254138Sattilio				zfs_vmobject_wunlock(obj);
478307671Skib				vm_page_busy_sleep(pp, "zfsmwb", true);
479254138Sattilio				zfs_vmobject_wlock(obj);
480253953Sattilio				continue;
481253953Sattilio			}
482253953Sattilio
483253953Sattilio			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
484253953Sattilio			vm_page_lock(pp);
485253953Sattilio			vm_page_hold(pp);
486253953Sattilio			vm_page_unlock(pp);
487253953Sattilio
488253953Sattilio		} else
489253953Sattilio			pp = NULL;
490253953Sattilio		break;
491253953Sattilio	}
492253953Sattilio	return (pp);
493253953Sattilio}
494253953Sattilio
495253953Sattiliostatic void
496253953Sattiliopage_unhold(vm_page_t pp)
497253953Sattilio{
498253953Sattilio
499253953Sattilio	vm_page_lock(pp);
500253953Sattilio	vm_page_unhold(pp);
501253953Sattilio	vm_page_unlock(pp);
502253953Sattilio}
503253953Sattilio
504168404Spjd/*
505168404Spjd * When a file is memory mapped, we must keep the IO data synchronized
506168404Spjd * between the DMU cache and the memory mapped pages.  What this means:
507168404Spjd *
508168404Spjd * On Write:	If we find a memory mapped page, we write to *both*
509168404Spjd *		the page and the dmu buffer.
510168404Spjd */
511209962Smmstatic void
512209962Smmupdate_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid,
513209962Smm    int segflg, dmu_tx_t *tx)
514168404Spjd{
515168404Spjd	vm_object_t obj;
516168404Spjd	struct sf_buf *sf;
517246293Savg	caddr_t va;
518212655Savg	int off;
519168404Spjd
520258746Savg	ASSERT(segflg != UIO_NOCOPY);
521168404Spjd	ASSERT(vp->v_mount != NULL);
522168404Spjd	obj = vp->v_object;
523168404Spjd	ASSERT(obj != NULL);
524168404Spjd
525168404Spjd	off = start & PAGEOFFSET;
526248084Sattilio	zfs_vmobject_wlock(obj);
527168404Spjd	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
528209962Smm		vm_page_t pp;
529246293Savg		int nbytes = imin(PAGESIZE - off, len);
530168404Spjd
531258746Savg		if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
532248084Sattilio			zfs_vmobject_wunlock(obj);
533168404Spjd
534246293Savg			va = zfs_map_page(pp, &sf);
535246293Savg			(void) dmu_read(os, oid, start+off, nbytes,
536246293Savg			    va+off, DMU_READ_PREFETCH);;
537209962Smm			zfs_unmap_page(sf);
538246293Savg
539248084Sattilio			zfs_vmobject_wlock(obj);
540253953Sattilio			page_unbusy(pp);
541168404Spjd		}
542209962Smm		len -= nbytes;
543168404Spjd		off = 0;
544168404Spjd	}
545258746Savg	vm_object_pip_wakeupn(obj, 0);
546248084Sattilio	zfs_vmobject_wunlock(obj);
547168404Spjd}
548168404Spjd
549168404Spjd/*
550219089Spjd * Read with UIO_NOCOPY flag means that sendfile(2) requests
551219089Spjd * ZFS to populate a range of page cache pages with data.
552219089Spjd *
553219089Spjd * NOTE: this function could be optimized to pre-allocate
554254138Sattilio * all pages in advance, drain exclusive busy on all of them,
555219089Spjd * map them into contiguous KVA region and populate them
556219089Spjd * in one single dmu_read() call.
557219089Spjd */
558219089Spjdstatic int
559219089Spjdmappedread_sf(vnode_t *vp, int nbytes, uio_t *uio)
560219089Spjd{
561219089Spjd	znode_t *zp = VTOZ(vp);
562219089Spjd	objset_t *os = zp->z_zfsvfs->z_os;
563219089Spjd	struct sf_buf *sf;
564219089Spjd	vm_object_t obj;
565219089Spjd	vm_page_t pp;
566219089Spjd	int64_t start;
567219089Spjd	caddr_t va;
568219089Spjd	int len = nbytes;
569219089Spjd	int off;
570219089Spjd	int error = 0;
571219089Spjd
572219089Spjd	ASSERT(uio->uio_segflg == UIO_NOCOPY);
573219089Spjd	ASSERT(vp->v_mount != NULL);
574219089Spjd	obj = vp->v_object;
575219089Spjd	ASSERT(obj != NULL);
576219089Spjd	ASSERT((uio->uio_loffset & PAGEOFFSET) == 0);
577219089Spjd
578248084Sattilio	zfs_vmobject_wlock(obj);
579219089Spjd	for (start = uio->uio_loffset; len > 0; start += PAGESIZE) {
580219089Spjd		int bytes = MIN(PAGESIZE, len);
581219089Spjd
582254138Sattilio		pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY |
583254649Skib		    VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
584219089Spjd		if (pp->valid == 0) {
585248084Sattilio			zfs_vmobject_wunlock(obj);
586219089Spjd			va = zfs_map_page(pp, &sf);
587219089Spjd			error = dmu_read(os, zp->z_id, start, bytes, va,
588219089Spjd			    DMU_READ_PREFETCH);
589219089Spjd			if (bytes != PAGESIZE && error == 0)
590219089Spjd				bzero(va + bytes, PAGESIZE - bytes);
591219089Spjd			zfs_unmap_page(sf);
592248084Sattilio			zfs_vmobject_wlock(obj);
593254138Sattilio			vm_page_sunbusy(pp);
594219089Spjd			vm_page_lock(pp);
595219089Spjd			if (error) {
596253073Savg				if (pp->wire_count == 0 && pp->valid == 0 &&
597254138Sattilio				    !vm_page_busied(pp))
598253073Savg					vm_page_free(pp);
599219089Spjd			} else {
600219089Spjd				pp->valid = VM_PAGE_BITS_ALL;
601219089Spjd				vm_page_activate(pp);
602219089Spjd			}
603219089Spjd			vm_page_unlock(pp);
604258739Savg		} else {
605258739Savg			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
606254138Sattilio			vm_page_sunbusy(pp);
607258739Savg		}
608219089Spjd		if (error)
609219089Spjd			break;
610219089Spjd		uio->uio_resid -= bytes;
611219089Spjd		uio->uio_offset += bytes;
612219089Spjd		len -= bytes;
613219089Spjd	}
614248084Sattilio	zfs_vmobject_wunlock(obj);
615219089Spjd	return (error);
616219089Spjd}
617219089Spjd
618219089Spjd/*
619168404Spjd * When a file is memory mapped, we must keep the IO data synchronized
620168404Spjd * between the DMU cache and the memory mapped pages.  What this means:
621168404Spjd *
622168404Spjd * On Read:	We "read" preferentially from memory mapped pages,
623168404Spjd *		else we default from the dmu buffer.
624168404Spjd *
625168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
626251631Sdelphij *	 the file is memory mapped.
627168404Spjd */
628168404Spjdstatic int
629168404Spjdmappedread(vnode_t *vp, int nbytes, uio_t *uio)
630168404Spjd{
631168404Spjd	znode_t *zp = VTOZ(vp);
632168404Spjd	vm_object_t obj;
633212655Savg	int64_t start;
634168926Spjd	caddr_t va;
635168404Spjd	int len = nbytes;
636212655Savg	int off;
637168404Spjd	int error = 0;
638168404Spjd
639168404Spjd	ASSERT(vp->v_mount != NULL);
640168404Spjd	obj = vp->v_object;
641168404Spjd	ASSERT(obj != NULL);
642168404Spjd
643168404Spjd	start = uio->uio_loffset;
644168404Spjd	off = start & PAGEOFFSET;
645248084Sattilio	zfs_vmobject_wlock(obj);
646168404Spjd	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
647219089Spjd		vm_page_t pp;
648219089Spjd		uint64_t bytes = MIN(PAGESIZE - off, len);
649168404Spjd
650253953Sattilio		if (pp = page_hold(vp, start)) {
651219089Spjd			struct sf_buf *sf;
652219089Spjd			caddr_t va;
653212652Savg
654248084Sattilio			zfs_vmobject_wunlock(obj);
655219089Spjd			va = zfs_map_page(pp, &sf);
656298105Savg#ifdef illumos
657219089Spjd			error = uiomove(va + off, bytes, UIO_READ, uio);
658298105Savg#else
659298105Savg			error = vn_io_fault_uiomove(va + off, bytes, uio);
660298105Savg#endif
661219089Spjd			zfs_unmap_page(sf);
662248084Sattilio			zfs_vmobject_wlock(obj);
663253953Sattilio			page_unhold(pp);
664219089Spjd		} else {
665248084Sattilio			zfs_vmobject_wunlock(obj);
666272809Sdelphij			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
667272809Sdelphij			    uio, bytes);
668248084Sattilio			zfs_vmobject_wlock(obj);
669168404Spjd		}
670168404Spjd		len -= bytes;
671168404Spjd		off = 0;
672168404Spjd		if (error)
673168404Spjd			break;
674168404Spjd	}
675248084Sattilio	zfs_vmobject_wunlock(obj);
676168404Spjd	return (error);
677168404Spjd}
678168404Spjd
679168404Spjdoffset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */
680168404Spjd
681168404Spjd/*
682168404Spjd * Read bytes from specified file into supplied buffer.
683168404Spjd *
684168404Spjd *	IN:	vp	- vnode of file to be read from.
685168404Spjd *		uio	- structure supplying read location, range info,
686168404Spjd *			  and return buffer.
687168404Spjd *		ioflag	- SYNC flags; used to provide FRSYNC semantics.
688168404Spjd *		cr	- credentials of caller.
689185029Spjd *		ct	- caller context
690168404Spjd *
691168404Spjd *	OUT:	uio	- updated offset and range, buffer filled.
692168404Spjd *
693251631Sdelphij *	RETURN:	0 on success, error code on failure.
694168404Spjd *
695168404Spjd * Side Effects:
696168404Spjd *	vp - atime updated if byte count > 0
697168404Spjd */
698168404Spjd/* ARGSUSED */
699168404Spjdstatic int
700168962Spjdzfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
701168404Spjd{
702168404Spjd	znode_t		*zp = VTOZ(vp);
703168404Spjd	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
704168404Spjd	ssize_t		n, nbytes;
705247187Smm	int		error = 0;
706168404Spjd	rl_t		*rl;
707219089Spjd	xuio_t		*xuio = NULL;
708168404Spjd
709168404Spjd	ZFS_ENTER(zfsvfs);
710185029Spjd	ZFS_VERIFY_ZP(zp);
711168404Spjd
712219089Spjd	if (zp->z_pflags & ZFS_AV_QUARANTINED) {
713185029Spjd		ZFS_EXIT(zfsvfs);
714249195Smm		return (SET_ERROR(EACCES));
715185029Spjd	}
716185029Spjd
717168404Spjd	/*
718168404Spjd	 * Validate file offset
719168404Spjd	 */
720168404Spjd	if (uio->uio_loffset < (offset_t)0) {
721168404Spjd		ZFS_EXIT(zfsvfs);
722249195Smm		return (SET_ERROR(EINVAL));
723168404Spjd	}
724168404Spjd
725168404Spjd	/*
726168404Spjd	 * Fasttrack empty reads
727168404Spjd	 */
728168404Spjd	if (uio->uio_resid == 0) {
729168404Spjd		ZFS_EXIT(zfsvfs);
730168404Spjd		return (0);
731168404Spjd	}
732168404Spjd
733168404Spjd	/*
734168962Spjd	 * Check for mandatory locks
735168962Spjd	 */
736219089Spjd	if (MANDMODE(zp->z_mode)) {
737168962Spjd		if (error = chklock(vp, FREAD,
738168962Spjd		    uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) {
739168962Spjd			ZFS_EXIT(zfsvfs);
740168962Spjd			return (error);
741168962Spjd		}
742168962Spjd	}
743168962Spjd
744168962Spjd	/*
745168404Spjd	 * If we're in FRSYNC mode, sync out this znode before reading it.
746168404Spjd	 */
747224605Smm	if (zfsvfs->z_log &&
748224605Smm	    (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS))
749219089Spjd		zil_commit(zfsvfs->z_log, zp->z_id);
750168404Spjd
751168404Spjd	/*
752168404Spjd	 * Lock the range against changes.
753168404Spjd	 */
754168404Spjd	rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER);
755168404Spjd
756168404Spjd	/*
757168404Spjd	 * If we are reading past end-of-file we can skip
758168404Spjd	 * to the end; but we might still need to set atime.
759168404Spjd	 */
760219089Spjd	if (uio->uio_loffset >= zp->z_size) {
761168404Spjd		error = 0;
762168404Spjd		goto out;
763168404Spjd	}
764168404Spjd
765219089Spjd	ASSERT(uio->uio_loffset < zp->z_size);
766219089Spjd	n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset);
767168404Spjd
768277300Ssmh#ifdef illumos
769219089Spjd	if ((uio->uio_extflg == UIO_XUIO) &&
770219089Spjd	    (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) {
771219089Spjd		int nblk;
772219089Spjd		int blksz = zp->z_blksz;
773219089Spjd		uint64_t offset = uio->uio_loffset;
774219089Spjd
775219089Spjd		xuio = (xuio_t *)uio;
776219089Spjd		if ((ISP2(blksz))) {
777219089Spjd			nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset,
778219089Spjd			    blksz)) / blksz;
779219089Spjd		} else {
780219089Spjd			ASSERT(offset + n <= blksz);
781219089Spjd			nblk = 1;
782219089Spjd		}
783219089Spjd		(void) dmu_xuio_init(xuio, nblk);
784219089Spjd
785219089Spjd		if (vn_has_cached_data(vp)) {
786219089Spjd			/*
787219089Spjd			 * For simplicity, we always allocate a full buffer
788219089Spjd			 * even if we only expect to read a portion of a block.
789219089Spjd			 */
790219089Spjd			while (--nblk >= 0) {
791219089Spjd				(void) dmu_xuio_add(xuio,
792219089Spjd				    dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
793219089Spjd				    blksz), 0, blksz);
794219089Spjd			}
795219089Spjd		}
796219089Spjd	}
797277300Ssmh#endif	/* illumos */
798219089Spjd
799168404Spjd	while (n > 0) {
800168404Spjd		nbytes = MIN(n, zfs_read_chunk_size -
801168404Spjd		    P2PHASE(uio->uio_loffset, zfs_read_chunk_size));
802168404Spjd
803219089Spjd#ifdef __FreeBSD__
804219089Spjd		if (uio->uio_segflg == UIO_NOCOPY)
805219089Spjd			error = mappedread_sf(vp, nbytes, uio);
806219089Spjd		else
807219089Spjd#endif /* __FreeBSD__ */
808272809Sdelphij		if (vn_has_cached_data(vp)) {
809168404Spjd			error = mappedread(vp, nbytes, uio);
810272809Sdelphij		} else {
811272809Sdelphij			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
812272809Sdelphij			    uio, nbytes);
813272809Sdelphij		}
814185029Spjd		if (error) {
815185029Spjd			/* convert checksum errors into IO errors */
816185029Spjd			if (error == ECKSUM)
817249195Smm				error = SET_ERROR(EIO);
818168404Spjd			break;
819185029Spjd		}
820168962Spjd
821168404Spjd		n -= nbytes;
822168404Spjd	}
823168404Spjdout:
824168404Spjd	zfs_range_unlock(rl);
825168404Spjd
826168404Spjd	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
827168404Spjd	ZFS_EXIT(zfsvfs);
828168404Spjd	return (error);
829168404Spjd}
830168404Spjd
831168404Spjd/*
832168404Spjd * Write the bytes to a file.
833168404Spjd *
834168404Spjd *	IN:	vp	- vnode of file to be written to.
835168404Spjd *		uio	- structure supplying write location, range info,
836168404Spjd *			  and data buffer.
837251631Sdelphij *		ioflag	- FAPPEND, FSYNC, and/or FDSYNC.  FAPPEND is
838251631Sdelphij *			  set if in append mode.
839168404Spjd *		cr	- credentials of caller.
840185029Spjd *		ct	- caller context (NFS/CIFS fem monitor only)
841168404Spjd *
842168404Spjd *	OUT:	uio	- updated offset and range.
843168404Spjd *
844251631Sdelphij *	RETURN:	0 on success, error code on failure.
845168404Spjd *
846168404Spjd * Timestamps:
847168404Spjd *	vp - ctime|mtime updated if byte count > 0
848168404Spjd */
849219089Spjd
850168404Spjd/* ARGSUSED */
851168404Spjdstatic int
852168962Spjdzfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
853168404Spjd{
854168404Spjd	znode_t		*zp = VTOZ(vp);
855168962Spjd	rlim64_t	limit = MAXOFFSET_T;
856168404Spjd	ssize_t		start_resid = uio->uio_resid;
857168404Spjd	ssize_t		tx_bytes;
858168404Spjd	uint64_t	end_size;
859168404Spjd	dmu_tx_t	*tx;
860168404Spjd	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
861185029Spjd	zilog_t		*zilog;
862168404Spjd	offset_t	woff;
863168404Spjd	ssize_t		n, nbytes;
864168404Spjd	rl_t		*rl;
865168404Spjd	int		max_blksz = zfsvfs->z_max_blksz;
866247187Smm	int		error = 0;
867209962Smm	arc_buf_t	*abuf;
868247187Smm	iovec_t		*aiov = NULL;
869219089Spjd	xuio_t		*xuio = NULL;
870219089Spjd	int		i_iov = 0;
871219089Spjd	int		iovcnt = uio->uio_iovcnt;
872219089Spjd	iovec_t		*iovp = uio->uio_iov;
873219089Spjd	int		write_eof;
874219089Spjd	int		count = 0;
875219089Spjd	sa_bulk_attr_t	bulk[4];
876219089Spjd	uint64_t	mtime[2], ctime[2];
877168404Spjd
878168404Spjd	/*
879168404Spjd	 * Fasttrack empty write
880168404Spjd	 */
881168404Spjd	n = start_resid;
882168404Spjd	if (n == 0)
883168404Spjd		return (0);
884168404Spjd
885168962Spjd	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
886168962Spjd		limit = MAXOFFSET_T;
887168962Spjd
888168404Spjd	ZFS_ENTER(zfsvfs);
889185029Spjd	ZFS_VERIFY_ZP(zp);
890168404Spjd
891219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
892219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
893219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
894219089Spjd	    &zp->z_size, 8);
895219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
896219089Spjd	    &zp->z_pflags, 8);
897219089Spjd
898168404Spjd	/*
899262990Sdelphij	 * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our
900262990Sdelphij	 * callers might not be able to detect properly that we are read-only,
901262990Sdelphij	 * so check it explicitly here.
902262990Sdelphij	 */
903262990Sdelphij	if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
904262990Sdelphij		ZFS_EXIT(zfsvfs);
905262990Sdelphij		return (SET_ERROR(EROFS));
906262990Sdelphij	}
907262990Sdelphij
908262990Sdelphij	/*
909321579Smav	 * If immutable or not appending then return EPERM.
910321579Smav	 * Intentionally allow ZFS_READONLY through here.
911321579Smav	 * See zfs_zaccess_common()
912185029Spjd	 */
913321579Smav	if ((zp->z_pflags & ZFS_IMMUTABLE) ||
914219089Spjd	    ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) &&
915219089Spjd	    (uio->uio_loffset < zp->z_size))) {
916185029Spjd		ZFS_EXIT(zfsvfs);
917249195Smm		return (SET_ERROR(EPERM));
918185029Spjd	}
919185029Spjd
920185029Spjd	zilog = zfsvfs->z_log;
921185029Spjd
922185029Spjd	/*
923219089Spjd	 * Validate file offset
924219089Spjd	 */
925219089Spjd	woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
926219089Spjd	if (woff < 0) {
927219089Spjd		ZFS_EXIT(zfsvfs);
928249195Smm		return (SET_ERROR(EINVAL));
929219089Spjd	}
930219089Spjd
931219089Spjd	/*
932219089Spjd	 * Check for mandatory locks before calling zfs_range_lock()
933219089Spjd	 * in order to prevent a deadlock with locks set via fcntl().
934219089Spjd	 */
935219089Spjd	if (MANDMODE((mode_t)zp->z_mode) &&
936219089Spjd	    (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) {
937219089Spjd		ZFS_EXIT(zfsvfs);
938219089Spjd		return (error);
939219089Spjd	}
940219089Spjd
941277300Ssmh#ifdef illumos
942219089Spjd	/*
943168404Spjd	 * Pre-fault the pages to ensure slow (eg NFS) pages
944168404Spjd	 * don't hold up txg.
945219089Spjd	 * Skip this if uio contains loaned arc_buf.
946168404Spjd	 */
947219089Spjd	if ((uio->uio_extflg == UIO_XUIO) &&
948219089Spjd	    (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
949219089Spjd		xuio = (xuio_t *)uio;
950219089Spjd	else
951219089Spjd		uio_prefaultpages(MIN(n, max_blksz), uio);
952277300Ssmh#endif
953168404Spjd
954168404Spjd	/*
955168404Spjd	 * If in append mode, set the io offset pointer to eof.
956168404Spjd	 */
957213673Spjd	if (ioflag & FAPPEND) {
958168404Spjd		/*
959219089Spjd		 * Obtain an appending range lock to guarantee file append
960219089Spjd		 * semantics.  We reset the write offset once we have the lock.
961168404Spjd		 */
962168404Spjd		rl = zfs_range_lock(zp, 0, n, RL_APPEND);
963219089Spjd		woff = rl->r_off;
964168404Spjd		if (rl->r_len == UINT64_MAX) {
965219089Spjd			/*
966219089Spjd			 * We overlocked the file because this write will cause
967219089Spjd			 * the file block size to increase.
968219089Spjd			 * Note that zp_size cannot change with this lock held.
969219089Spjd			 */
970219089Spjd			woff = zp->z_size;
971168404Spjd		}
972219089Spjd		uio->uio_loffset = woff;
973168404Spjd	} else {
974168404Spjd		/*
975219089Spjd		 * Note that if the file block size will change as a result of
976219089Spjd		 * this write, then this range lock will lock the entire file
977219089Spjd		 * so that we can re-write the block safely.
978168404Spjd		 */
979168404Spjd		rl = zfs_range_lock(zp, woff, n, RL_WRITER);
980168404Spjd	}
981168404Spjd
982235781Strasz	if (vn_rlimit_fsize(vp, uio, uio->uio_td)) {
983235781Strasz		zfs_range_unlock(rl);
984235781Strasz		ZFS_EXIT(zfsvfs);
985235781Strasz		return (EFBIG);
986235781Strasz	}
987235781Strasz
988168962Spjd	if (woff >= limit) {
989168962Spjd		zfs_range_unlock(rl);
990168962Spjd		ZFS_EXIT(zfsvfs);
991249195Smm		return (SET_ERROR(EFBIG));
992168962Spjd	}
993168962Spjd
994168962Spjd	if ((woff + n) > limit || woff > (limit - n))
995168962Spjd		n = limit - woff;
996168962Spjd
997219089Spjd	/* Will this write extend the file length? */
998219089Spjd	write_eof = (woff + n > zp->z_size);
999168404Spjd
1000219089Spjd	end_size = MAX(zp->z_size, woff + n);
1001219089Spjd
1002168404Spjd	/*
1003168404Spjd	 * Write the file in reasonable size chunks.  Each chunk is written
1004168404Spjd	 * in a separate transaction; this keeps the intent log records small
1005168404Spjd	 * and allows us to do more fine-grained space accounting.
1006168404Spjd	 */
1007168404Spjd	while (n > 0) {
1008209962Smm		abuf = NULL;
1009209962Smm		woff = uio->uio_loffset;
1010219089Spjd		if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
1011219089Spjd		    zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
1012209962Smm			if (abuf != NULL)
1013209962Smm				dmu_return_arcbuf(abuf);
1014249195Smm			error = SET_ERROR(EDQUOT);
1015209962Smm			break;
1016209962Smm		}
1017209962Smm
1018219089Spjd		if (xuio && abuf == NULL) {
1019219089Spjd			ASSERT(i_iov < iovcnt);
1020219089Spjd			aiov = &iovp[i_iov];
1021219089Spjd			abuf = dmu_xuio_arcbuf(xuio, i_iov);
1022219089Spjd			dmu_xuio_clear(xuio, i_iov);
1023219089Spjd			DTRACE_PROBE3(zfs_cp_write, int, i_iov,
1024219089Spjd			    iovec_t *, aiov, arc_buf_t *, abuf);
1025219089Spjd			ASSERT((aiov->iov_base == abuf->b_data) ||
1026219089Spjd			    ((char *)aiov->iov_base - (char *)abuf->b_data +
1027219089Spjd			    aiov->iov_len == arc_buf_size(abuf)));
1028219089Spjd			i_iov++;
1029219089Spjd		} else if (abuf == NULL && n >= max_blksz &&
1030219089Spjd		    woff >= zp->z_size &&
1031209962Smm		    P2PHASE(woff, max_blksz) == 0 &&
1032209962Smm		    zp->z_blksz == max_blksz) {
1033219089Spjd			/*
1034219089Spjd			 * This write covers a full block.  "Borrow" a buffer
1035219089Spjd			 * from the dmu so that we can fill it before we enter
1036219089Spjd			 * a transaction.  This avoids the possibility of
1037219089Spjd			 * holding up the transaction if the data copy hangs
1038219089Spjd			 * up on a pagefault (e.g., from an NFS server mapping).
1039219089Spjd			 */
1040209962Smm			size_t cbytes;
1041209962Smm
1042219089Spjd			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
1043219089Spjd			    max_blksz);
1044209962Smm			ASSERT(abuf != NULL);
1045209962Smm			ASSERT(arc_buf_size(abuf) == max_blksz);
1046209962Smm			if (error = uiocopy(abuf->b_data, max_blksz,
1047209962Smm			    UIO_WRITE, uio, &cbytes)) {
1048209962Smm				dmu_return_arcbuf(abuf);
1049209962Smm				break;
1050209962Smm			}
1051209962Smm			ASSERT(cbytes == max_blksz);
1052209962Smm		}
1053209962Smm
1054209962Smm		/*
1055168404Spjd		 * Start a transaction.
1056168404Spjd		 */
1057168404Spjd		tx = dmu_tx_create(zfsvfs->z_os);
1058219089Spjd		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1059168404Spjd		dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
1060219089Spjd		zfs_sa_upgrade_txholds(tx, zp);
1061258720Savg		error = dmu_tx_assign(tx, TXG_WAIT);
1062168404Spjd		if (error) {
1063168404Spjd			dmu_tx_abort(tx);
1064209962Smm			if (abuf != NULL)
1065209962Smm				dmu_return_arcbuf(abuf);
1066168404Spjd			break;
1067168404Spjd		}
1068168404Spjd
1069168404Spjd		/*
1070168404Spjd		 * If zfs_range_lock() over-locked we grow the blocksize
1071168404Spjd		 * and then reduce the lock range.  This will only happen
1072168404Spjd		 * on the first iteration since zfs_range_reduce() will
1073168404Spjd		 * shrink down r_len to the appropriate size.
1074168404Spjd		 */
1075168404Spjd		if (rl->r_len == UINT64_MAX) {
1076168404Spjd			uint64_t new_blksz;
1077168404Spjd
1078168404Spjd			if (zp->z_blksz > max_blksz) {
1079274337Sdelphij				/*
1080274337Sdelphij				 * File's blocksize is already larger than the
1081274337Sdelphij				 * "recordsize" property.  Only let it grow to
1082274337Sdelphij				 * the next power of 2.
1083274337Sdelphij				 */
1084168404Spjd				ASSERT(!ISP2(zp->z_blksz));
1085274337Sdelphij				new_blksz = MIN(end_size,
1086274337Sdelphij				    1 << highbit64(zp->z_blksz));
1087168404Spjd			} else {
1088168404Spjd				new_blksz = MIN(end_size, max_blksz);
1089168404Spjd			}
1090168404Spjd			zfs_grow_blocksize(zp, new_blksz, tx);
1091168404Spjd			zfs_range_reduce(rl, woff, n);
1092168404Spjd		}
1093168404Spjd
1094168404Spjd		/*
1095168404Spjd		 * XXX - should we really limit each write to z_max_blksz?
1096168404Spjd		 * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
1097168404Spjd		 */
1098168404Spjd		nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz));
1099168404Spjd
1100219089Spjd		if (woff + nbytes > zp->z_size)
1101168404Spjd			vnode_pager_setsize(vp, woff + nbytes);
1102168404Spjd
1103209962Smm		if (abuf == NULL) {
1104209962Smm			tx_bytes = uio->uio_resid;
1105219089Spjd			error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
1106219089Spjd			    uio, nbytes, tx);
1107209962Smm			tx_bytes -= uio->uio_resid;
1108168404Spjd		} else {
1109209962Smm			tx_bytes = nbytes;
1110219089Spjd			ASSERT(xuio == NULL || tx_bytes == aiov->iov_len);
1111219089Spjd			/*
1112219089Spjd			 * If this is not a full block write, but we are
1113219089Spjd			 * extending the file past EOF and this data starts
1114219089Spjd			 * block-aligned, use assign_arcbuf().  Otherwise,
1115219089Spjd			 * write via dmu_write().
1116219089Spjd			 */
1117219089Spjd			if (tx_bytes < max_blksz && (!write_eof ||
1118219089Spjd			    aiov->iov_base != abuf->b_data)) {
1119219089Spjd				ASSERT(xuio);
1120219089Spjd				dmu_write(zfsvfs->z_os, zp->z_id, woff,
1121219089Spjd				    aiov->iov_len, aiov->iov_base, tx);
1122219089Spjd				dmu_return_arcbuf(abuf);
1123219089Spjd				xuio_stat_wbuf_copied();
1124219089Spjd			} else {
1125219089Spjd				ASSERT(xuio || tx_bytes == max_blksz);
1126219089Spjd				dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
1127219089Spjd				    woff, abuf, tx);
1128219089Spjd			}
1129209962Smm			ASSERT(tx_bytes <= uio->uio_resid);
1130209962Smm			uioskip(uio, tx_bytes);
1131168404Spjd		}
1132212657Savg		if (tx_bytes && vn_has_cached_data(vp)) {
1133209962Smm			update_pages(vp, woff, tx_bytes, zfsvfs->z_os,
1134209962Smm			    zp->z_id, uio->uio_segflg, tx);
1135209962Smm		}
1136209962Smm
1137209962Smm		/*
1138168404Spjd		 * If we made no progress, we're done.  If we made even
1139168404Spjd		 * partial progress, update the znode and ZIL accordingly.
1140168404Spjd		 */
1141168404Spjd		if (tx_bytes == 0) {
1142219089Spjd			(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
1143219089Spjd			    (void *)&zp->z_size, sizeof (uint64_t), tx);
1144168404Spjd			dmu_tx_commit(tx);
1145168404Spjd			ASSERT(error != 0);
1146168404Spjd			break;
1147168404Spjd		}
1148168404Spjd
1149168404Spjd		/*
1150168404Spjd		 * Clear Set-UID/Set-GID bits on successful write if not
1151168404Spjd		 * privileged and at least one of the excute bits is set.
1152168404Spjd		 *
1153168404Spjd		 * It would be nice to to this after all writes have
1154168404Spjd		 * been done, but that would still expose the ISUID/ISGID
1155168404Spjd		 * to another app after the partial write is committed.
1156185029Spjd		 *
1157185029Spjd		 * Note: we don't call zfs_fuid_map_id() here because
1158185029Spjd		 * user 0 is not an ephemeral uid.
1159168404Spjd		 */
1160168404Spjd		mutex_enter(&zp->z_acl_lock);
1161219089Spjd		if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) |
1162168404Spjd		    (S_IXUSR >> 6))) != 0 &&
1163219089Spjd		    (zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
1164185029Spjd		    secpolicy_vnode_setid_retain(vp, cr,
1165219089Spjd		    (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) {
1166219089Spjd			uint64_t newmode;
1167219089Spjd			zp->z_mode &= ~(S_ISUID | S_ISGID);
1168219089Spjd			newmode = zp->z_mode;
1169219089Spjd			(void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
1170219089Spjd			    (void *)&newmode, sizeof (uint64_t), tx);
1171168404Spjd		}
1172168404Spjd		mutex_exit(&zp->z_acl_lock);
1173168404Spjd
1174219089Spjd		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
1175219089Spjd		    B_TRUE);
1176168404Spjd
1177168404Spjd		/*
1178168404Spjd		 * Update the file size (zp_size) if it has changed;
1179168404Spjd		 * account for possible concurrent updates.
1180168404Spjd		 */
1181219089Spjd		while ((end_size = zp->z_size) < uio->uio_loffset) {
1182219089Spjd			(void) atomic_cas_64(&zp->z_size, end_size,
1183168404Spjd			    uio->uio_loffset);
1184298105Savg#ifdef illumos
1185219089Spjd			ASSERT(error == 0);
1186298105Savg#else
1187298105Savg			ASSERT(error == 0 || error == EFAULT);
1188298105Savg#endif
1189219089Spjd		}
1190219089Spjd		/*
1191219089Spjd		 * If we are replaying and eof is non zero then force
1192219089Spjd		 * the file size to the specified eof. Note, there's no
1193219089Spjd		 * concurrency during replay.
1194219089Spjd		 */
1195219089Spjd		if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
1196219089Spjd			zp->z_size = zfsvfs->z_replay_eof;
1197219089Spjd
1198298105Savg		if (error == 0)
1199298105Savg			error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
1200298105Savg		else
1201298105Savg			(void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
1202219089Spjd
1203168404Spjd		zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
1204168404Spjd		dmu_tx_commit(tx);
1205168404Spjd
1206168404Spjd		if (error != 0)
1207168404Spjd			break;
1208168404Spjd		ASSERT(tx_bytes == nbytes);
1209168404Spjd		n -= nbytes;
1210219089Spjd
1211277300Ssmh#ifdef illumos
1212219089Spjd		if (!xuio && n > 0)
1213219089Spjd			uio_prefaultpages(MIN(n, max_blksz), uio);
1214277300Ssmh#endif
1215168404Spjd	}
1216168404Spjd
1217168404Spjd	zfs_range_unlock(rl);
1218168404Spjd
1219168404Spjd	/*
1220168404Spjd	 * If we're in replay mode, or we made no progress, return error.
1221168404Spjd	 * Otherwise, it's at least a partial write, so it's successful.
1222168404Spjd	 */
1223209962Smm	if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
1224168404Spjd		ZFS_EXIT(zfsvfs);
1225168404Spjd		return (error);
1226168404Spjd	}
1227168404Spjd
1228298105Savg#ifdef __FreeBSD__
1229298105Savg	/*
1230298105Savg	 * EFAULT means that at least one page of the source buffer was not
1231298105Savg	 * available.  VFS will re-try remaining I/O upon this error.
1232298105Savg	 */
1233298105Savg	if (error == EFAULT) {
1234298105Savg		ZFS_EXIT(zfsvfs);
1235298105Savg		return (error);
1236298105Savg	}
1237298105Savg#endif
1238298105Savg
1239219089Spjd	if (ioflag & (FSYNC | FDSYNC) ||
1240219089Spjd	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1241219089Spjd		zil_commit(zilog, zp->z_id);
1242168404Spjd
1243168404Spjd	ZFS_EXIT(zfsvfs);
1244168404Spjd	return (0);
1245168404Spjd}
1246168404Spjd
1247168404Spjdvoid
1248219089Spjdzfs_get_done(zgd_t *zgd, int error)
1249168404Spjd{
1250219089Spjd	znode_t *zp = zgd->zgd_private;
1251219089Spjd	objset_t *os = zp->z_zfsvfs->z_os;
1252168404Spjd
1253219089Spjd	if (zgd->zgd_db)
1254219089Spjd		dmu_buf_rele(zgd->zgd_db, zgd);
1255219089Spjd
1256219089Spjd	zfs_range_unlock(zgd->zgd_rl);
1257219089Spjd
1258191900Skmacy	/*
1259191900Skmacy	 * Release the vnode asynchronously as we currently have the
1260191900Skmacy	 * txg stopped from syncing.
1261191900Skmacy	 */
1262219089Spjd	VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
1263219089Spjd
1264219089Spjd	if (error == 0 && zgd->zgd_bp)
1265325132Savg		zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
1266219089Spjd
1267168404Spjd	kmem_free(zgd, sizeof (zgd_t));
1268168404Spjd}
1269168404Spjd
1270214378Smm#ifdef DEBUG
1271214378Smmstatic int zil_fault_io = 0;
1272214378Smm#endif
1273214378Smm
1274168404Spjd/*
1275168404Spjd * Get data to generate a TX_WRITE intent log record.
1276168404Spjd */
1277168404Spjdint
1278325132Savgzfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
1279168404Spjd{
1280168404Spjd	zfsvfs_t *zfsvfs = arg;
1281168404Spjd	objset_t *os = zfsvfs->z_os;
1282168404Spjd	znode_t *zp;
1283219089Spjd	uint64_t object = lr->lr_foid;
1284219089Spjd	uint64_t offset = lr->lr_offset;
1285219089Spjd	uint64_t size = lr->lr_length;
1286168404Spjd	dmu_buf_t *db;
1287168404Spjd	zgd_t *zgd;
1288168404Spjd	int error = 0;
1289168404Spjd
1290325132Savg	ASSERT3P(lwb, !=, NULL);
1291325132Savg	ASSERT3P(zio, !=, NULL);
1292325132Savg	ASSERT3U(size, !=, 0);
1293168404Spjd
1294168404Spjd	/*
1295168404Spjd	 * Nothing to do if the file has been removed
1296168404Spjd	 */
1297219089Spjd	if (zfs_zget(zfsvfs, object, &zp) != 0)
1298249195Smm		return (SET_ERROR(ENOENT));
1299168404Spjd	if (zp->z_unlinked) {
1300191900Skmacy		/*
1301191900Skmacy		 * Release the vnode asynchronously as we currently have the
1302191900Skmacy		 * txg stopped from syncing.
1303191900Skmacy		 */
1304196307Spjd		VN_RELE_ASYNC(ZTOV(zp),
1305196307Spjd		    dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
1306249195Smm		return (SET_ERROR(ENOENT));
1307168404Spjd	}
1308168404Spjd
1309219089Spjd	zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
1310325132Savg	zgd->zgd_lwb = lwb;
1311219089Spjd	zgd->zgd_private = zp;
1312219089Spjd
1313168404Spjd	/*
1314168404Spjd	 * Write records come in two flavors: immediate and indirect.
1315168404Spjd	 * For small writes it's cheaper to store the data with the
1316168404Spjd	 * log record (immediate); for large writes it's cheaper to
1317168404Spjd	 * sync the data and get a pointer to it (indirect) so that
1318168404Spjd	 * we don't have to write the data twice.
1319168404Spjd	 */
1320168404Spjd	if (buf != NULL) { /* immediate write */
1321219089Spjd		zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER);
1322168404Spjd		/* test for truncation needs to be done while range locked */
1323219089Spjd		if (offset >= zp->z_size) {
1324249195Smm			error = SET_ERROR(ENOENT);
1325219089Spjd		} else {
1326219089Spjd			error = dmu_read(os, object, offset, size, buf,
1327219089Spjd			    DMU_READ_NO_PREFETCH);
1328168404Spjd		}
1329219089Spjd		ASSERT(error == 0 || error == ENOENT);
1330168404Spjd	} else { /* indirect write */
1331168404Spjd		/*
1332168404Spjd		 * Have to lock the whole block to ensure when it's
1333324203Savg		 * written out and its checksum is being calculated
1334168404Spjd		 * that no one can change the data. We need to re-check
1335168404Spjd		 * blocksize after we get the lock in case it's changed!
1336168404Spjd		 */
1337168404Spjd		for (;;) {
1338219089Spjd			uint64_t blkoff;
1339219089Spjd			size = zp->z_blksz;
1340219089Spjd			blkoff = ISP2(size) ? P2PHASE(offset, size) : offset;
1341219089Spjd			offset -= blkoff;
1342219089Spjd			zgd->zgd_rl = zfs_range_lock(zp, offset, size,
1343219089Spjd			    RL_READER);
1344219089Spjd			if (zp->z_blksz == size)
1345168404Spjd				break;
1346219089Spjd			offset += blkoff;
1347219089Spjd			zfs_range_unlock(zgd->zgd_rl);
1348168404Spjd		}
1349168404Spjd		/* test for truncation needs to be done while range locked */
1350219089Spjd		if (lr->lr_offset >= zp->z_size)
1351249195Smm			error = SET_ERROR(ENOENT);
1352214378Smm#ifdef DEBUG
1353214378Smm		if (zil_fault_io) {
1354249195Smm			error = SET_ERROR(EIO);
1355214378Smm			zil_fault_io = 0;
1356214378Smm		}
1357214378Smm#endif
1358219089Spjd		if (error == 0)
1359219089Spjd			error = dmu_buf_hold(os, object, offset, zgd, &db,
1360219089Spjd			    DMU_READ_NO_PREFETCH);
1361214378Smm
1362209962Smm		if (error == 0) {
1363323748Savg			blkptr_t *bp = &lr->lr_blkptr;
1364243524Smm
1365219089Spjd			zgd->zgd_db = db;
1366219089Spjd			zgd->zgd_bp = bp;
1367219089Spjd
1368219089Spjd			ASSERT(db->db_offset == offset);
1369219089Spjd			ASSERT(db->db_size == size);
1370219089Spjd
1371219089Spjd			error = dmu_sync(zio, lr->lr_common.lrc_txg,
1372219089Spjd			    zfs_get_done, zgd);
1373321559Smav			ASSERT(error || lr->lr_length <= size);
1374219089Spjd
1375209962Smm			/*
1376219089Spjd			 * On success, we need to wait for the write I/O
1377219089Spjd			 * initiated by dmu_sync() to complete before we can
1378219089Spjd			 * release this dbuf.  We will finish everything up
1379219089Spjd			 * in the zfs_get_done() callback.
1380209962Smm			 */
1381219089Spjd			if (error == 0)
1382219089Spjd				return (0);
1383209962Smm
1384219089Spjd			if (error == EALREADY) {
1385219089Spjd				lr->lr_common.lrc_txtype = TX_WRITE2;
1386219089Spjd				error = 0;
1387219089Spjd			}
1388209962Smm		}
1389168404Spjd	}
1390219089Spjd
1391219089Spjd	zfs_get_done(zgd, error);
1392219089Spjd
1393168404Spjd	return (error);
1394168404Spjd}
1395168404Spjd
1396168404Spjd/*ARGSUSED*/
1397168404Spjdstatic int
1398185029Spjdzfs_access(vnode_t *vp, int mode, int flag, cred_t *cr,
1399185029Spjd    caller_context_t *ct)
1400168404Spjd{
1401168404Spjd	znode_t *zp = VTOZ(vp);
1402168404Spjd	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1403168404Spjd	int error;
1404168404Spjd
1405168404Spjd	ZFS_ENTER(zfsvfs);
1406185029Spjd	ZFS_VERIFY_ZP(zp);
1407185029Spjd
1408185029Spjd	if (flag & V_ACE_MASK)
1409185029Spjd		error = zfs_zaccess(zp, mode, flag, B_FALSE, cr);
1410185029Spjd	else
1411185029Spjd		error = zfs_zaccess_rwx(zp, mode, flag, cr);
1412185029Spjd
1413168404Spjd	ZFS_EXIT(zfsvfs);
1414168404Spjd	return (error);
1415168404Spjd}
1416168404Spjd
1417211932Smmstatic int
1418303970Savgzfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
1419211932Smm{
1420303970Savg	int error;
1421211932Smm
1422303970Savg	*vpp = arg;
1423303970Savg	error = vn_lock(*vpp, lkflags);
1424303970Savg	if (error != 0)
1425303970Savg		vrele(*vpp);
1426303970Savg	return (error);
1427303970Savg}
1428211932Smm
1429303970Savgstatic int
1430303970Savgzfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags)
1431303970Savg{
1432303970Savg	znode_t *zdp = VTOZ(dvp);
1433303970Savg	zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
1434303970Savg	int error;
1435303970Savg	int ltype;
1436303970Savg
1437303970Savg	ASSERT_VOP_LOCKED(dvp, __func__);
1438303970Savg#ifdef DIAGNOSTIC
1439307142Savg	if ((zdp->z_pflags & ZFS_XATTR) == 0)
1440307142Savg		VERIFY(!RRM_LOCK_HELD(&zfsvfs->z_teardown_lock));
1441303970Savg#endif
1442303970Savg
1443303970Savg	if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
1444303970Savg		ASSERT3P(dvp, ==, vp);
1445303970Savg		vref(dvp);
1446303970Savg		ltype = lkflags & LK_TYPE_MASK;
1447303970Savg		if (ltype != VOP_ISLOCKED(dvp)) {
1448303970Savg			if (ltype == LK_EXCLUSIVE)
1449303970Savg				vn_lock(dvp, LK_UPGRADE | LK_RETRY);
1450303970Savg			else /* if (ltype == LK_SHARED) */
1451303970Savg				vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
1452303970Savg
1453303970Savg			/*
1454303970Savg			 * Relock for the "." case could leave us with
1455303970Savg			 * reclaimed vnode.
1456303970Savg			 */
1457303970Savg			if (dvp->v_iflag & VI_DOOMED) {
1458303970Savg				vrele(dvp);
1459303970Savg				return (SET_ERROR(ENOENT));
1460303970Savg			}
1461303970Savg		}
1462303970Savg		return (0);
1463303970Savg	} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
1464303970Savg		/*
1465303970Savg		 * Note that in this case, dvp is the child vnode, and we
1466303970Savg		 * are looking up the parent vnode - exactly reverse from
1467303970Savg		 * normal operation.  Unlocking dvp requires some rather
1468303970Savg		 * tricky unlock/relock dance to prevent mp from being freed;
1469303970Savg		 * use vn_vget_ino_gen() which takes care of all that.
1470303970Savg		 *
1471303970Savg		 * XXX Note that there is a time window when both vnodes are
1472303970Savg		 * unlocked.  It is possible, although highly unlikely, that
1473303970Savg		 * during that window the parent-child relationship between
1474303970Savg		 * the vnodes may change, for example, get reversed.
1475303970Savg		 * In that case we would have a wrong lock order for the vnodes.
1476303970Savg		 * All other filesystems seem to ignore this problem, so we
1477303970Savg		 * do the same here.
1478303970Savg		 * A potential solution could be implemented as follows:
1479303970Savg		 * - using LK_NOWAIT when locking the second vnode and retrying
1480303970Savg		 *   if necessary
1481303970Savg		 * - checking that the parent-child relationship still holds
1482303970Savg		 *   after locking both vnodes and retrying if it doesn't
1483303970Savg		 */
1484303970Savg		error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp);
1485303970Savg		return (error);
1486303970Savg	} else {
1487303970Savg		error = vn_lock(vp, lkflags);
1488303970Savg		if (error != 0)
1489303970Savg			vrele(vp);
1490303970Savg		return (error);
1491211932Smm	}
1492211932Smm}
1493211932Smm
1494211932Smm/*
1495168404Spjd * Lookup an entry in a directory, or an extended attribute directory.
1496168404Spjd * If it exists, return a held vnode reference for it.
1497168404Spjd *
1498168404Spjd *	IN:	dvp	- vnode of directory to search.
1499168404Spjd *		nm	- name of entry to lookup.
1500168404Spjd *		pnp	- full pathname to lookup [UNUSED].
1501168404Spjd *		flags	- LOOKUP_XATTR set if looking for an attribute.
1502168404Spjd *		rdir	- root directory vnode [UNUSED].
1503168404Spjd *		cr	- credentials of caller.
1504185029Spjd *		ct	- caller context
1505168404Spjd *
1506168404Spjd *	OUT:	vpp	- vnode of located entry, NULL if not found.
1507168404Spjd *
1508251631Sdelphij *	RETURN:	0 on success, error code on failure.
1509168404Spjd *
1510168404Spjd * Timestamps:
1511168404Spjd *	NA
1512168404Spjd */
1513168404Spjd/* ARGSUSED */
1514168962Spjdstatic int
1515168962Spjdzfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp,
1516185029Spjd    int nameiop, cred_t *cr, kthread_t *td, int flags)
1517168404Spjd{
1518168962Spjd	znode_t *zdp = VTOZ(dvp);
1519303970Savg	znode_t *zp;
1520168962Spjd	zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
1521211932Smm	int	error = 0;
1522168404Spjd
1523321545Smav	/*
1524321545Smav	 * Fast path lookup, however we must skip DNLC lookup
1525321545Smav	 * for case folding or normalizing lookups because the
1526321545Smav	 * DNLC code only stores the passed in name.  This means
1527321545Smav	 * creating 'a' and removing 'A' on a case insensitive
1528321545Smav	 * file system would work, but DNLC still thinks 'a'
1529321545Smav	 * exists and won't let you create it again on the next
1530321545Smav	 * pass through fast path.
1531321545Smav	 */
1532303970Savg	if (!(flags & LOOKUP_XATTR)) {
1533211932Smm		if (dvp->v_type != VDIR) {
1534249195Smm			return (SET_ERROR(ENOTDIR));
1535219089Spjd		} else if (zdp->z_sa_hdl == NULL) {
1536249195Smm			return (SET_ERROR(EIO));
1537211932Smm		}
1538211932Smm	}
1539211932Smm
1540211932Smm	DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm);
1541211932Smm
1542168404Spjd	ZFS_ENTER(zfsvfs);
1543185029Spjd	ZFS_VERIFY_ZP(zdp);
1544168404Spjd
1545168404Spjd	*vpp = NULL;
1546168404Spjd
1547185029Spjd	if (flags & LOOKUP_XATTR) {
1548168404Spjd#ifdef TODO
1549168404Spjd		/*
1550168404Spjd		 * If the xattr property is off, refuse the lookup request.
1551168404Spjd		 */
1552168404Spjd		if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) {
1553168404Spjd			ZFS_EXIT(zfsvfs);
1554249195Smm			return (SET_ERROR(EINVAL));
1555168404Spjd		}
1556185029Spjd#endif
1557168404Spjd
1558168404Spjd		/*
1559168404Spjd		 * We don't allow recursive attributes..
1560168404Spjd		 * Maybe someday we will.
1561168404Spjd		 */
1562219089Spjd		if (zdp->z_pflags & ZFS_XATTR) {
1563168404Spjd			ZFS_EXIT(zfsvfs);
1564249195Smm			return (SET_ERROR(EINVAL));
1565168404Spjd		}
1566168404Spjd
1567168404Spjd		if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) {
1568168404Spjd			ZFS_EXIT(zfsvfs);
1569168404Spjd			return (error);
1570168404Spjd		}
1571168404Spjd
1572168404Spjd		/*
1573168404Spjd		 * Do we have permission to get into attribute directory?
1574168404Spjd		 */
1575185029Spjd		if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0,
1576185029Spjd		    B_FALSE, cr)) {
1577303970Savg			vrele(*vpp);
1578185029Spjd			*vpp = NULL;
1579168404Spjd		}
1580168404Spjd
1581168404Spjd		ZFS_EXIT(zfsvfs);
1582168404Spjd		return (error);
1583168404Spjd	}
1584168404Spjd
1585168404Spjd	/*
1586168404Spjd	 * Check accessibility of directory.
1587168404Spjd	 */
1588185029Spjd	if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) {
1589168404Spjd		ZFS_EXIT(zfsvfs);
1590168404Spjd		return (error);
1591168404Spjd	}
1592168404Spjd
1593185029Spjd	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
1594185029Spjd	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1595185029Spjd		ZFS_EXIT(zfsvfs);
1596249195Smm		return (SET_ERROR(EILSEQ));
1597185029Spjd	}
1598168404Spjd
1599168962Spjd
1600303970Savg	/*
1601303970Savg	 * First handle the special cases.
1602303970Savg	 */
1603303970Savg	if ((cnp->cn_flags & ISDOTDOT) != 0) {
1604303970Savg		/*
1605303970Savg		 * If we are a snapshot mounted under .zfs, return
1606303970Savg		 * the vp for the snapshot directory.
1607303970Savg		 */
1608303970Savg		if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) {
1609315842Savg			struct componentname cn;
1610315842Savg			vnode_t *zfsctl_vp;
1611315842Savg			int ltype;
1612315842Savg
1613303970Savg			ZFS_EXIT(zfsvfs);
1614315842Savg			ltype = VOP_ISLOCKED(dvp);
1615315842Savg			VOP_UNLOCK(dvp, 0);
1616315842Savg			error = zfsctl_root(zfsvfs->z_parent, LK_SHARED,
1617315842Savg			    &zfsctl_vp);
1618303970Savg			if (error == 0) {
1619315842Savg				cn.cn_nameptr = "snapshot";
1620315842Savg				cn.cn_namelen = strlen(cn.cn_nameptr);
1621315842Savg				cn.cn_nameiop = cnp->cn_nameiop;
1622319415Savg				cn.cn_flags = cnp->cn_flags & ~ISDOTDOT;
1623315842Savg				cn.cn_lkflags = cnp->cn_lkflags;
1624315842Savg				error = VOP_LOOKUP(zfsctl_vp, vpp, &cn);
1625315842Savg				vput(zfsctl_vp);
1626303970Savg			}
1627315842Savg			vn_lock(dvp, ltype | LK_RETRY);
1628315842Savg			return (error);
1629303970Savg		}
1630303970Savg	}
1631303970Savg	if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) {
1632315842Savg		ZFS_EXIT(zfsvfs);
1633303970Savg		if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP)
1634315842Savg			return (SET_ERROR(ENOTSUP));
1635315842Savg		error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp);
1636315842Savg		return (error);
1637303970Savg	}
1638303970Savg
1639303970Savg	/*
1640303970Savg	 * The loop is retry the lookup if the parent-child relationship
1641303970Savg	 * changes during the dot-dot locking complexities.
1642303970Savg	 */
1643303970Savg	for (;;) {
1644303970Savg		uint64_t parent;
1645303970Savg
1646303970Savg		error = zfs_dirlook(zdp, nm, &zp);
1647303970Savg		if (error == 0)
1648303970Savg			*vpp = ZTOV(zp);
1649303970Savg
1650303970Savg		ZFS_EXIT(zfsvfs);
1651303970Savg		if (error != 0)
1652303970Savg			break;
1653303970Savg
1654303970Savg		error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags);
1655303970Savg		if (error != 0) {
1656303970Savg			/*
1657303970Savg			 * If we've got a locking error, then the vnode
1658303970Savg			 * got reclaimed because of a force unmount.
1659303970Savg			 * We never enter doomed vnodes into the name cache.
1660303970Savg			 */
1661303970Savg			*vpp = NULL;
1662303970Savg			return (error);
1663303970Savg		}
1664303970Savg
1665303970Savg		if ((cnp->cn_flags & ISDOTDOT) == 0)
1666303970Savg			break;
1667303970Savg
1668303970Savg		ZFS_ENTER(zfsvfs);
1669303970Savg		if (zdp->z_sa_hdl == NULL) {
1670303970Savg			error = SET_ERROR(EIO);
1671303970Savg		} else {
1672303970Savg			error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
1673303970Savg			    &parent, sizeof (parent));
1674303970Savg		}
1675303970Savg		if (error != 0) {
1676303970Savg			ZFS_EXIT(zfsvfs);
1677303970Savg			vput(ZTOV(zp));
1678303970Savg			break;
1679303970Savg		}
1680303970Savg		if (zp->z_id == parent) {
1681303970Savg			ZFS_EXIT(zfsvfs);
1682303970Savg			break;
1683303970Savg		}
1684303970Savg		vput(ZTOV(zp));
1685303970Savg	}
1686303970Savg
1687303970Savgout:
1688303970Savg	if (error != 0)
1689303970Savg		*vpp = NULL;
1690303970Savg
1691168404Spjd	/* Translate errors and add SAVENAME when needed. */
1692168404Spjd	if (cnp->cn_flags & ISLASTCN) {
1693168404Spjd		switch (nameiop) {
1694168404Spjd		case CREATE:
1695168404Spjd		case RENAME:
1696168404Spjd			if (error == ENOENT) {
1697168404Spjd				error = EJUSTRETURN;
1698168404Spjd				cnp->cn_flags |= SAVENAME;
1699168404Spjd				break;
1700168404Spjd			}
1701168404Spjd			/* FALLTHROUGH */
1702168404Spjd		case DELETE:
1703168404Spjd			if (error == 0)
1704168404Spjd				cnp->cn_flags |= SAVENAME;
1705168404Spjd			break;
1706168404Spjd		}
1707168404Spjd	}
1708169198Spjd
1709303970Savg	/* Insert name into cache (as non-existent) if appropriate. */
1710303970Savg	if (zfsvfs->z_use_namecache &&
1711303970Savg	    error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0)
1712303970Savg		cache_enter(dvp, NULL, cnp);
1713168404Spjd
1714303970Savg	/* Insert name into cache if appropriate. */
1715303970Savg	if (zfsvfs->z_use_namecache &&
1716303970Savg	    error == 0 && (cnp->cn_flags & MAKEENTRY)) {
1717168404Spjd		if (!(cnp->cn_flags & ISLASTCN) ||
1718168404Spjd		    (nameiop != DELETE && nameiop != RENAME)) {
1719168404Spjd			cache_enter(dvp, *vpp, cnp);
1720168404Spjd		}
1721168404Spjd	}
1722168404Spjd
1723168404Spjd	return (error);
1724168404Spjd}
1725168404Spjd
1726168404Spjd/*
1727168404Spjd * Attempt to create a new entry in a directory.  If the entry
1728168404Spjd * already exists, truncate the file if permissible, else return
1729168404Spjd * an error.  Return the vp of the created or trunc'd file.
1730168404Spjd *
1731168404Spjd *	IN:	dvp	- vnode of directory to put new file entry in.
1732168404Spjd *		name	- name of new file entry.
1733168404Spjd *		vap	- attributes of new file.
1734168404Spjd *		excl	- flag indicating exclusive or non-exclusive mode.
1735168404Spjd *		mode	- mode to open file with.
1736168404Spjd *		cr	- credentials of caller.
1737168404Spjd *		flag	- large file flag [UNUSED].
1738185029Spjd *		ct	- caller context
1739268464Sdelphij *		vsecp	- ACL to be set
1740168404Spjd *
1741168404Spjd *	OUT:	vpp	- vnode of created or trunc'd entry.
1742168404Spjd *
1743251631Sdelphij *	RETURN:	0 on success, error code on failure.
1744168404Spjd *
1745168404Spjd * Timestamps:
1746168404Spjd *	dvp - ctime|mtime updated if new entry created
1747168404Spjd *	 vp - ctime|mtime always, atime if new
1748168404Spjd */
1749185029Spjd
1750168404Spjd/* ARGSUSED */
1751168404Spjdstatic int
1752168962Spjdzfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode,
1753185029Spjd    vnode_t **vpp, cred_t *cr, kthread_t *td)
1754168404Spjd{
1755168404Spjd	znode_t		*zp, *dzp = VTOZ(dvp);
1756168404Spjd	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
1757185029Spjd	zilog_t		*zilog;
1758185029Spjd	objset_t	*os;
1759168404Spjd	dmu_tx_t	*tx;
1760168404Spjd	int		error;
1761209962Smm	ksid_t		*ksid;
1762209962Smm	uid_t		uid;
1763209962Smm	gid_t		gid = crgetgid(cr);
1764219089Spjd	zfs_acl_ids_t   acl_ids;
1765209962Smm	boolean_t	fuid_dirtied;
1766185029Spjd	void		*vsecp = NULL;
1767185029Spjd	int		flag = 0;
1768303970Savg	uint64_t	txtype;
1769168404Spjd
1770185029Spjd	/*
1771185029Spjd	 * If we have an ephemeral id, ACL, or XVATTR then
1772185029Spjd	 * make sure file system is at proper version
1773185029Spjd	 */
1774185029Spjd
1775209962Smm	ksid = crgetsid(cr, KSID_OWNER);
1776209962Smm	if (ksid)
1777209962Smm		uid = ksid_getid(ksid);
1778209962Smm	else
1779209962Smm		uid = crgetuid(cr);
1780219089Spjd
1781185029Spjd	if (zfsvfs->z_use_fuids == B_FALSE &&
1782185029Spjd	    (vsecp || (vap->va_mask & AT_XVATTR) ||
1783219089Spjd	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1784249195Smm		return (SET_ERROR(EINVAL));
1785185029Spjd
1786168404Spjd	ZFS_ENTER(zfsvfs);
1787185029Spjd	ZFS_VERIFY_ZP(dzp);
1788185029Spjd	os = zfsvfs->z_os;
1789185029Spjd	zilog = zfsvfs->z_log;
1790168404Spjd
1791185029Spjd	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
1792185029Spjd	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1793185029Spjd		ZFS_EXIT(zfsvfs);
1794249195Smm		return (SET_ERROR(EILSEQ));
1795185029Spjd	}
1796185029Spjd
1797185029Spjd	if (vap->va_mask & AT_XVATTR) {
1798197861Spjd		if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap,
1799185029Spjd		    crgetuid(cr), cr, vap->va_type)) != 0) {
1800185029Spjd			ZFS_EXIT(zfsvfs);
1801185029Spjd			return (error);
1802185029Spjd		}
1803185029Spjd	}
1804260704Savg
1805168404Spjd	*vpp = NULL;
1806168404Spjd
1807182905Strasz	if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr))
1808182905Strasz		vap->va_mode &= ~S_ISVTX;
1809168404Spjd
1810303970Savg	error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
1811303970Savg	if (error) {
1812303970Savg		ZFS_EXIT(zfsvfs);
1813303970Savg		return (error);
1814303970Savg	}
1815303970Savg	ASSERT3P(zp, ==, NULL);
1816185029Spjd
1817303970Savg	/*
1818303970Savg	 * Create a new file object and update the directory
1819303970Savg	 * to reference it.
1820303970Savg	 */
1821303970Savg	if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
1822303970Savg		goto out;
1823168404Spjd	}
1824219089Spjd
1825303970Savg	/*
1826303970Savg	 * We only support the creation of regular files in
1827303970Savg	 * extended attribute directories.
1828303970Savg	 */
1829168404Spjd
1830303970Savg	if ((dzp->z_pflags & ZFS_XATTR) &&
1831303970Savg	    (vap->va_type != VREG)) {
1832303970Savg		error = SET_ERROR(EINVAL);
1833303970Savg		goto out;
1834303970Savg	}
1835168404Spjd
1836303970Savg	if ((error = zfs_acl_ids_create(dzp, 0, vap,
1837303970Savg	    cr, vsecp, &acl_ids)) != 0)
1838303970Savg		goto out;
1839219089Spjd
1840303970Savg	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
1841303970Savg		zfs_acl_ids_free(&acl_ids);
1842303970Savg		error = SET_ERROR(EDQUOT);
1843303970Savg		goto out;
1844303970Savg	}
1845168404Spjd
1846303970Savg	getnewvnode_reserve(1);
1847209962Smm
1848303970Savg	tx = dmu_tx_create(os);
1849209962Smm
1850303970Savg	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1851303970Savg	    ZFS_SA_BASE_ATTR_SIZE);
1852219089Spjd
1853303970Savg	fuid_dirtied = zfsvfs->z_fuid_dirty;
1854303970Savg	if (fuid_dirtied)
1855303970Savg		zfs_fuid_txhold(zfsvfs, tx);
1856303970Savg	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
1857303970Savg	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
1858303970Savg	if (!zfsvfs->z_use_sa &&
1859303970Savg	    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1860303970Savg		dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
1861303970Savg		    0, acl_ids.z_aclp->z_acl_bytes);
1862303970Savg	}
1863303970Savg	error = dmu_tx_assign(tx, TXG_WAIT);
1864303970Savg	if (error) {
1865209962Smm		zfs_acl_ids_free(&acl_ids);
1866303970Savg		dmu_tx_abort(tx);
1867303970Savg		getnewvnode_drop_reserve();
1868303970Savg		ZFS_EXIT(zfsvfs);
1869303970Savg		return (error);
1870303970Savg	}
1871303970Savg	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1872185029Spjd
1873303970Savg	if (fuid_dirtied)
1874303970Savg		zfs_fuid_sync(zfsvfs, tx);
1875219089Spjd
1876303970Savg	(void) zfs_link_create(dzp, name, zp, tx, ZNEW);
1877303970Savg	txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
1878303970Savg	zfs_log_create(zilog, tx, txtype, dzp, zp, name,
1879303970Savg	    vsecp, acl_ids.z_fuidp, vap);
1880303970Savg	zfs_acl_ids_free(&acl_ids);
1881303970Savg	dmu_tx_commit(tx);
1882168404Spjd
1883303970Savg	getnewvnode_drop_reserve();
1884168404Spjd
1885168404Spjdout:
1886303970Savg	if (error == 0) {
1887168962Spjd		*vpp = ZTOV(zp);
1888168404Spjd	}
1889168404Spjd
1890219089Spjd	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1891219089Spjd		zil_commit(zilog, 0);
1892219089Spjd
1893168404Spjd	ZFS_EXIT(zfsvfs);
1894168404Spjd	return (error);
1895168404Spjd}
1896168404Spjd
1897168404Spjd/*
1898168404Spjd * Remove an entry from a directory.
1899168404Spjd *
1900168404Spjd *	IN:	dvp	- vnode of directory to remove entry from.
1901168404Spjd *		name	- name of entry to remove.
1902168404Spjd *		cr	- credentials of caller.
1903185029Spjd *		ct	- caller context
1904185029Spjd *		flags	- case flags
1905168404Spjd *
1906251631Sdelphij *	RETURN:	0 on success, error code on failure.
1907168404Spjd *
1908168404Spjd * Timestamps:
1909168404Spjd *	dvp - ctime|mtime
1910168404Spjd *	 vp - ctime (if nlink > 0)
1911168404Spjd */
1912219089Spjd
1913185029Spjd/*ARGSUSED*/
1914168404Spjdstatic int
1915303970Savgzfs_remove(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr)
1916168404Spjd{
1917303970Savg	znode_t		*dzp = VTOZ(dvp);
1918303970Savg	znode_t		*zp = VTOZ(vp);
1919219089Spjd	znode_t		*xzp;
1920168404Spjd	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
1921185029Spjd	zilog_t		*zilog;
1922168962Spjd	uint64_t	acl_obj, xattr_obj;
1923219089Spjd	uint64_t	obj = 0;
1924168404Spjd	dmu_tx_t	*tx;
1925185029Spjd	boolean_t	unlinked, toobig = FALSE;
1926185029Spjd	uint64_t	txtype;
1927168404Spjd	int		error;
1928168404Spjd
1929168404Spjd	ZFS_ENTER(zfsvfs);
1930185029Spjd	ZFS_VERIFY_ZP(dzp);
1931303970Savg	ZFS_VERIFY_ZP(zp);
1932185029Spjd	zilog = zfsvfs->z_log;
1933303970Savg	zp = VTOZ(vp);
1934168404Spjd
1935219089Spjd	xattr_obj = 0;
1936219089Spjd	xzp = NULL;
1937168404Spjd
1938168962Spjd	if (error = zfs_zaccess_delete(dzp, zp, cr)) {
1939168404Spjd		goto out;
1940168962Spjd	}
1941168404Spjd
1942168962Spjd	/*
1943168962Spjd	 * Need to use rmdir for removing directories.
1944168962Spjd	 */
1945168962Spjd	if (vp->v_type == VDIR) {
1946249195Smm		error = SET_ERROR(EPERM);
1947168962Spjd		goto out;
1948168962Spjd	}
1949168962Spjd
1950185029Spjd	vnevent_remove(vp, dvp, name, ct);
1951168962Spjd
1952303970Savg	obj = zp->z_id;
1953168404Spjd
1954303970Savg	/* are there any extended attributes? */
1955303970Savg	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
1956303970Savg	    &xattr_obj, sizeof (xattr_obj));
1957303970Savg	if (error == 0 && xattr_obj) {
1958303970Savg		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
1959303970Savg		ASSERT0(error);
1960303970Savg	}
1961168962Spjd
1962168404Spjd	/*
1963168404Spjd	 * We may delete the znode now, or we may put it in the unlinked set;
1964168404Spjd	 * it depends on whether we're the last link, and on whether there are
1965168404Spjd	 * other holds on the vnode.  So we dmu_tx_hold() the right things to
1966168404Spjd	 * allow for either case.
1967168404Spjd	 */
1968168404Spjd	tx = dmu_tx_create(zfsvfs->z_os);
1969168404Spjd	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1970219089Spjd	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1971219089Spjd	zfs_sa_upgrade_txholds(tx, zp);
1972219089Spjd	zfs_sa_upgrade_txholds(tx, dzp);
1973168404Spjd
1974303970Savg	if (xzp) {
1975219089Spjd		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1976219089Spjd		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
1977168404Spjd	}
1978168404Spjd
1979168404Spjd	/* charge as an update -- would be nice not to charge at all */
1980168404Spjd	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1981168404Spjd
1982268464Sdelphij	/*
1983294803Smav	 * Mark this transaction as typically resulting in a net free of space
1984268464Sdelphij	 */
1985294803Smav	dmu_tx_mark_netfree(tx);
1986268464Sdelphij
1987303970Savg	error = dmu_tx_assign(tx, TXG_WAIT);
1988168404Spjd	if (error) {
1989168404Spjd		dmu_tx_abort(tx);
1990168404Spjd		ZFS_EXIT(zfsvfs);
1991168404Spjd		return (error);
1992168404Spjd	}
1993168404Spjd
1994168404Spjd	/*
1995168404Spjd	 * Remove the directory entry.
1996168404Spjd	 */
1997303970Savg	error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked);
1998168404Spjd
1999168404Spjd	if (error) {
2000168404Spjd		dmu_tx_commit(tx);
2001168404Spjd		goto out;
2002168404Spjd	}
2003168404Spjd
2004219089Spjd	if (unlinked) {
2005168404Spjd		zfs_unlinked_add(zp, tx);
2006243268Savg		vp->v_vflag |= VV_NOSYNC;
2007168962Spjd	}
2008168404Spjd
2009185029Spjd	txtype = TX_REMOVE;
2010219089Spjd	zfs_log_remove(zilog, tx, txtype, dzp, name, obj);
2011168404Spjd
2012168404Spjd	dmu_tx_commit(tx);
2013168404Spjdout:
2014185029Spjd
2015219089Spjd	if (xzp)
2016303970Savg		vrele(ZTOV(xzp));
2017168962Spjd
2018219089Spjd	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
2019219089Spjd		zil_commit(zilog, 0);
2020219089Spjd
2021168404Spjd	ZFS_EXIT(zfsvfs);
2022168404Spjd	return (error);
2023168404Spjd}
2024168404Spjd
2025168404Spjd/*
2026168404Spjd * Create a new directory and insert it into dvp using the name
2027168404Spjd * provided.  Return a pointer to the inserted directory.
2028168404Spjd *
2029168404Spjd *	IN:	dvp	- vnode of directory to add subdir to.
2030168404Spjd *		dirname	- name of new directory.
2031168404Spjd *		vap	- attributes of new directory.
2032168404Spjd *		cr	- credentials of caller.
2033185029Spjd *		ct	- caller context
2034251631Sdelphij *		flags	- case flags
2035185029Spjd *		vsecp	- ACL to be set
2036168404Spjd *
2037168404Spjd *	OUT:	vpp	- vnode of created directory.
2038168404Spjd *
2039251631Sdelphij *	RETURN:	0 on success, error code on failure.
2040168404Spjd *
2041168404Spjd * Timestamps:
2042168404Spjd *	dvp - ctime|mtime updated
2043168404Spjd *	 vp - ctime|mtime|atime updated
2044168404Spjd */
2045185029Spjd/*ARGSUSED*/
2046168404Spjdstatic int
2047303970Savgzfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr)
2048168404Spjd{
2049168404Spjd	znode_t		*zp, *dzp = VTOZ(dvp);
2050168404Spjd	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
2051185029Spjd	zilog_t		*zilog;
2052185029Spjd	uint64_t	txtype;
2053168404Spjd	dmu_tx_t	*tx;
2054168404Spjd	int		error;
2055209962Smm	ksid_t		*ksid;
2056209962Smm	uid_t		uid;
2057209962Smm	gid_t		gid = crgetgid(cr);
2058219089Spjd	zfs_acl_ids_t   acl_ids;
2059209962Smm	boolean_t	fuid_dirtied;
2060168404Spjd
2061168404Spjd	ASSERT(vap->va_type == VDIR);
2062168404Spjd
2063185029Spjd	/*
2064185029Spjd	 * If we have an ephemeral id, ACL, or XVATTR then
2065185029Spjd	 * make sure file system is at proper version
2066185029Spjd	 */
2067185029Spjd
2068209962Smm	ksid = crgetsid(cr, KSID_OWNER);
2069209962Smm	if (ksid)
2070209962Smm		uid = ksid_getid(ksid);
2071209962Smm	else
2072209962Smm		uid = crgetuid(cr);
2073185029Spjd	if (zfsvfs->z_use_fuids == B_FALSE &&
2074303970Savg	    ((vap->va_mask & AT_XVATTR) ||
2075219089Spjd	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
2076249195Smm		return (SET_ERROR(EINVAL));
2077185029Spjd
2078168404Spjd	ZFS_ENTER(zfsvfs);
2079185029Spjd	ZFS_VERIFY_ZP(dzp);
2080185029Spjd	zilog = zfsvfs->z_log;
2081168404Spjd
2082219089Spjd	if (dzp->z_pflags & ZFS_XATTR) {
2083168404Spjd		ZFS_EXIT(zfsvfs);
2084249195Smm		return (SET_ERROR(EINVAL));
2085168404Spjd	}
2086168404Spjd
2087185029Spjd	if (zfsvfs->z_utf8 && u8_validate(dirname,
2088185029Spjd	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
2089185029Spjd		ZFS_EXIT(zfsvfs);
2090249195Smm		return (SET_ERROR(EILSEQ));
2091185029Spjd	}
2092185029Spjd
2093219089Spjd	if (vap->va_mask & AT_XVATTR) {
2094197861Spjd		if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap,
2095185029Spjd		    crgetuid(cr), cr, vap->va_type)) != 0) {
2096185029Spjd			ZFS_EXIT(zfsvfs);
2097185029Spjd			return (error);
2098185029Spjd		}
2099219089Spjd	}
2100185029Spjd
2101219089Spjd	if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
2102303970Savg	    NULL, &acl_ids)) != 0) {
2103219089Spjd		ZFS_EXIT(zfsvfs);
2104219089Spjd		return (error);
2105219089Spjd	}
2106260704Savg
2107168404Spjd	/*
2108168404Spjd	 * First make sure the new directory doesn't exist.
2109219089Spjd	 *
2110219089Spjd	 * Existence is checked first to make sure we don't return
2111219089Spjd	 * EACCES instead of EEXIST which can cause some applications
2112219089Spjd	 * to fail.
2113168404Spjd	 */
2114185029Spjd	*vpp = NULL;
2115185029Spjd
2116303970Savg	if (error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW)) {
2117219089Spjd		zfs_acl_ids_free(&acl_ids);
2118168404Spjd		ZFS_EXIT(zfsvfs);
2119168404Spjd		return (error);
2120168404Spjd	}
2121303970Savg	ASSERT3P(zp, ==, NULL);
2122168404Spjd
2123185029Spjd	if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) {
2124219089Spjd		zfs_acl_ids_free(&acl_ids);
2125168404Spjd		ZFS_EXIT(zfsvfs);
2126168404Spjd		return (error);
2127168404Spjd	}
2128168404Spjd
2129209962Smm	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
2130211932Smm		zfs_acl_ids_free(&acl_ids);
2131209962Smm		ZFS_EXIT(zfsvfs);
2132249195Smm		return (SET_ERROR(EDQUOT));
2133209962Smm	}
2134209962Smm
2135168404Spjd	/*
2136168404Spjd	 * Add a new entry to the directory.
2137168404Spjd	 */
2138303970Savg	getnewvnode_reserve(1);
2139168404Spjd	tx = dmu_tx_create(zfsvfs->z_os);
2140168404Spjd	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
2141168404Spjd	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
2142209962Smm	fuid_dirtied = zfsvfs->z_fuid_dirty;
2143209962Smm	if (fuid_dirtied)
2144209962Smm		zfs_fuid_txhold(zfsvfs, tx);
2145219089Spjd	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
2146219089Spjd		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
2147219089Spjd		    acl_ids.z_aclp->z_acl_bytes);
2148219089Spjd	}
2149219089Spjd
2150219089Spjd	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
2151219089Spjd	    ZFS_SA_BASE_ATTR_SIZE);
2152219089Spjd
2153303970Savg	error = dmu_tx_assign(tx, TXG_WAIT);
2154168404Spjd	if (error) {
2155219089Spjd		zfs_acl_ids_free(&acl_ids);
2156168404Spjd		dmu_tx_abort(tx);
2157260704Savg		getnewvnode_drop_reserve();
2158168404Spjd		ZFS_EXIT(zfsvfs);
2159168404Spjd		return (error);
2160168404Spjd	}
2161168404Spjd
2162168404Spjd	/*
2163168404Spjd	 * Create new node.
2164168404Spjd	 */
2165219089Spjd	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
2166168404Spjd
2167209962Smm	if (fuid_dirtied)
2168209962Smm		zfs_fuid_sync(zfsvfs, tx);
2169219089Spjd
2170168404Spjd	/*
2171168404Spjd	 * Now put new name in parent dir.
2172168404Spjd	 */
2173303970Savg	(void) zfs_link_create(dzp, dirname, zp, tx, ZNEW);
2174168404Spjd
2175168404Spjd	*vpp = ZTOV(zp);
2176168404Spjd
2177303970Savg	txtype = zfs_log_create_txtype(Z_DIR, NULL, vap);
2178303970Savg	zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL,
2179209962Smm	    acl_ids.z_fuidp, vap);
2180185029Spjd
2181209962Smm	zfs_acl_ids_free(&acl_ids);
2182219089Spjd
2183168404Spjd	dmu_tx_commit(tx);
2184168404Spjd
2185260704Savg	getnewvnode_drop_reserve();
2186260704Savg
2187219089Spjd	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
2188219089Spjd		zil_commit(zilog, 0);
2189219089Spjd
2190168404Spjd	ZFS_EXIT(zfsvfs);
2191168404Spjd	return (0);
2192168404Spjd}
2193168404Spjd
2194168404Spjd/*
2195168404Spjd * Remove a directory subdir entry.  If the current working
2196168404Spjd * directory is the same as the subdir to be removed, the
2197168404Spjd * remove will fail.
2198168404Spjd *
2199168404Spjd *	IN:	dvp	- vnode of directory to remove from.
2200168404Spjd *		name	- name of directory to be removed.
2201168404Spjd *		cwd	- vnode of current working directory.
2202168404Spjd *		cr	- credentials of caller.
2203185029Spjd *		ct	- caller context
2204185029Spjd *		flags	- case flags
2205168404Spjd *
2206251631Sdelphij *	RETURN:	0 on success, error code on failure.
2207168404Spjd *
2208168404Spjd * Timestamps:
2209168404Spjd *	dvp - ctime|mtime updated
2210168404Spjd */
2211185029Spjd/*ARGSUSED*/
2212168404Spjdstatic int
2213303970Savgzfs_rmdir(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr)
2214168404Spjd{
2215168404Spjd	znode_t		*dzp = VTOZ(dvp);
2216303970Savg	znode_t		*zp = VTOZ(vp);
2217168404Spjd	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
2218185029Spjd	zilog_t		*zilog;
2219168404Spjd	dmu_tx_t	*tx;
2220168404Spjd	int		error;
2221168404Spjd
2222168962Spjd	ZFS_ENTER(zfsvfs);
2223185029Spjd	ZFS_VERIFY_ZP(dzp);
2224303970Savg	ZFS_VERIFY_ZP(zp);
2225185029Spjd	zilog = zfsvfs->z_log;
2226168404Spjd
2227168404Spjd
2228168404Spjd	if (error = zfs_zaccess_delete(dzp, zp, cr)) {
2229168404Spjd		goto out;
2230168404Spjd	}
2231168404Spjd
2232168962Spjd	if (vp->v_type != VDIR) {
2233249195Smm		error = SET_ERROR(ENOTDIR);
2234168962Spjd		goto out;
2235168962Spjd	}
2236168962Spjd
2237185029Spjd	vnevent_rmdir(vp, dvp, name, ct);
2238168962Spjd
2239168404Spjd	tx = dmu_tx_create(zfsvfs->z_os);
2240168404Spjd	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
2241219089Spjd	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
2242168404Spjd	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
2243219089Spjd	zfs_sa_upgrade_txholds(tx, zp);
2244219089Spjd	zfs_sa_upgrade_txholds(tx, dzp);
2245304122Savg	dmu_tx_mark_netfree(tx);
2246303970Savg	error = dmu_tx_assign(tx, TXG_WAIT);
2247168404Spjd	if (error) {
2248168404Spjd		dmu_tx_abort(tx);
2249168404Spjd		ZFS_EXIT(zfsvfs);
2250168404Spjd		return (error);
2251168404Spjd	}
2252168404Spjd
2253168404Spjd	cache_purge(dvp);
2254168404Spjd
2255303970Savg	error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL);
2256168404Spjd
2257185029Spjd	if (error == 0) {
2258185029Spjd		uint64_t txtype = TX_RMDIR;
2259219089Spjd		zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT);
2260185029Spjd	}
2261168404Spjd
2262168404Spjd	dmu_tx_commit(tx);
2263168404Spjd
2264168404Spjd	cache_purge(vp);
2265168404Spjdout:
2266219089Spjd	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
2267219089Spjd		zil_commit(zilog, 0);
2268219089Spjd
2269168404Spjd	ZFS_EXIT(zfsvfs);
2270168404Spjd	return (error);
2271168404Spjd}
2272168404Spjd
2273168404Spjd/*
2274168404Spjd * Read as many directory entries as will fit into the provided
2275168404Spjd * buffer from the given directory cursor position (specified in
2276251631Sdelphij * the uio structure).
2277168404Spjd *
2278168404Spjd *	IN:	vp	- vnode of directory to read.
2279168404Spjd *		uio	- structure supplying read location, range info,
2280168404Spjd *			  and return buffer.
2281168404Spjd *		cr	- credentials of caller.
2282185029Spjd *		ct	- caller context
2283185029Spjd *		flags	- case flags
2284168404Spjd *
2285168404Spjd *	OUT:	uio	- updated offset and range, buffer filled.
2286168404Spjd *		eofp	- set to true if end-of-file detected.
2287168404Spjd *
2288251631Sdelphij *	RETURN:	0 on success, error code on failure.
2289168404Spjd *
2290168404Spjd * Timestamps:
2291168404Spjd *	vp - atime updated
2292168404Spjd *
2293168404Spjd * Note that the low 4 bits of the cookie returned by zap is always zero.
2294168404Spjd * This allows us to use the low range for "special" directory entries:
2295168404Spjd * We use 0 for '.', and 1 for '..'.  If this is the root of the filesystem,
2296168404Spjd * we use the offset 2 for the '.zfs' directory.
2297168404Spjd */
2298168404Spjd/* ARGSUSED */
2299168404Spjdstatic int
2300168962Spjdzfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies)
2301168404Spjd{
2302168404Spjd	znode_t		*zp = VTOZ(vp);
2303168404Spjd	iovec_t		*iovp;
2304185029Spjd	edirent_t	*eodp;
2305168404Spjd	dirent64_t	*odp;
2306168404Spjd	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
2307168404Spjd	objset_t	*os;
2308168404Spjd	caddr_t		outbuf;
2309168404Spjd	size_t		bufsize;
2310168404Spjd	zap_cursor_t	zc;
2311168404Spjd	zap_attribute_t	zap;
2312168404Spjd	uint_t		bytes_wanted;
2313168404Spjd	uint64_t	offset; /* must be unsigned; checks for < 1 */
2314219089Spjd	uint64_t	parent;
2315168404Spjd	int		local_eof;
2316168404Spjd	int		outcount;
2317168404Spjd	int		error;
2318168404Spjd	uint8_t		prefetch;
2319185029Spjd	boolean_t	check_sysattrs;
2320168404Spjd	uint8_t		type;
2321168962Spjd	int		ncooks;
2322168962Spjd	u_long		*cooks = NULL;
2323185029Spjd	int		flags = 0;
2324168404Spjd
2325168404Spjd	ZFS_ENTER(zfsvfs);
2326185029Spjd	ZFS_VERIFY_ZP(zp);
2327168404Spjd
2328219089Spjd	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
2329219089Spjd	    &parent, sizeof (parent))) != 0) {
2330219089Spjd		ZFS_EXIT(zfsvfs);
2331219089Spjd		return (error);
2332219089Spjd	}
2333219089Spjd
2334168404Spjd	/*
2335168404Spjd	 * If we are not given an eof variable,
2336168404Spjd	 * use a local one.
2337168404Spjd	 */
2338168404Spjd	if (eofp == NULL)
2339168404Spjd		eofp = &local_eof;
2340168404Spjd
2341168404Spjd	/*
2342168404Spjd	 * Check for valid iov_len.
2343168404Spjd	 */
2344168404Spjd	if (uio->uio_iov->iov_len <= 0) {
2345168404Spjd		ZFS_EXIT(zfsvfs);
2346249195Smm		return (SET_ERROR(EINVAL));
2347168404Spjd	}
2348168404Spjd
2349168404Spjd	/*
2350168404Spjd	 * Quit if directory has been removed (posix)
2351168404Spjd	 */
2352168404Spjd	if ((*eofp = zp->z_unlinked) != 0) {
2353168404Spjd		ZFS_EXIT(zfsvfs);
2354168404Spjd		return (0);
2355168404Spjd	}
2356168404Spjd
2357168404Spjd	error = 0;
2358168404Spjd	os = zfsvfs->z_os;
2359168404Spjd	offset = uio->uio_loffset;
2360168404Spjd	prefetch = zp->z_zn_prefetch;
2361168404Spjd
2362168404Spjd	/*
2363168404Spjd	 * Initialize the iterator cursor.
2364168404Spjd	 */
2365168404Spjd	if (offset <= 3) {
2366168404Spjd		/*
2367168404Spjd		 * Start iteration from the beginning of the directory.
2368168404Spjd		 */
2369168404Spjd		zap_cursor_init(&zc, os, zp->z_id);
2370168404Spjd	} else {
2371168404Spjd		/*
2372168404Spjd		 * The offset is a serialized cursor.
2373168404Spjd		 */
2374168404Spjd		zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
2375168404Spjd	}
2376168404Spjd
2377168404Spjd	/*
2378168404Spjd	 * Get space to change directory entries into fs independent format.
2379168404Spjd	 */
2380168404Spjd	iovp = uio->uio_iov;
2381168404Spjd	bytes_wanted = iovp->iov_len;
2382168404Spjd	if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) {
2383168404Spjd		bufsize = bytes_wanted;
2384168404Spjd		outbuf = kmem_alloc(bufsize, KM_SLEEP);
2385168404Spjd		odp = (struct dirent64 *)outbuf;
2386168404Spjd	} else {
2387168404Spjd		bufsize = bytes_wanted;
2388247187Smm		outbuf = NULL;
2389168404Spjd		odp = (struct dirent64 *)iovp->iov_base;
2390168404Spjd	}
2391185029Spjd	eodp = (struct edirent *)odp;
2392168404Spjd
2393169170Spjd	if (ncookies != NULL) {
2394168404Spjd		/*
2395168404Spjd		 * Minimum entry size is dirent size and 1 byte for a file name.
2396168404Spjd		 */
2397168962Spjd		ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1);
2398219404Spjd		cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK);
2399219404Spjd		*cookies = cooks;
2400168962Spjd		*ncookies = ncooks;
2401168404Spjd	}
2402185029Spjd	/*
2403185029Spjd	 * If this VFS supports the system attribute view interface; and
2404185029Spjd	 * we're looking at an extended attribute directory; and we care
2405185029Spjd	 * about normalization conflicts on this vfs; then we must check
2406185029Spjd	 * for normalization conflicts with the sysattr name space.
2407185029Spjd	 */
2408185029Spjd#ifdef TODO
2409185029Spjd	check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
2410185029Spjd	    (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm &&
2411185029Spjd	    (flags & V_RDDIR_ENTFLAGS);
2412185029Spjd#else
2413185029Spjd	check_sysattrs = 0;
2414185029Spjd#endif
2415168404Spjd
2416168404Spjd	/*
2417168404Spjd	 * Transform to file-system independent format
2418168404Spjd	 */
2419168404Spjd	outcount = 0;
2420168404Spjd	while (outcount < bytes_wanted) {
2421168404Spjd		ino64_t objnum;
2422168404Spjd		ushort_t reclen;
2423219089Spjd		off64_t *next = NULL;
2424168404Spjd
2425168404Spjd		/*
2426168404Spjd		 * Special case `.', `..', and `.zfs'.
2427168404Spjd		 */
2428168404Spjd		if (offset == 0) {
2429168404Spjd			(void) strcpy(zap.za_name, ".");
2430185029Spjd			zap.za_normalization_conflict = 0;
2431168404Spjd			objnum = zp->z_id;
2432169108Spjd			type = DT_DIR;
2433168404Spjd		} else if (offset == 1) {
2434168404Spjd			(void) strcpy(zap.za_name, "..");
2435185029Spjd			zap.za_normalization_conflict = 0;
2436219089Spjd			objnum = parent;
2437169108Spjd			type = DT_DIR;
2438168404Spjd		} else if (offset == 2 && zfs_show_ctldir(zp)) {
2439168404Spjd			(void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
2440185029Spjd			zap.za_normalization_conflict = 0;
2441168404Spjd			objnum = ZFSCTL_INO_ROOT;
2442169108Spjd			type = DT_DIR;
2443168404Spjd		} else {
2444168404Spjd			/*
2445168404Spjd			 * Grab next entry.
2446168404Spjd			 */
2447168404Spjd			if (error = zap_cursor_retrieve(&zc, &zap)) {
2448168404Spjd				if ((*eofp = (error == ENOENT)) != 0)
2449168404Spjd					break;
2450168404Spjd				else
2451168404Spjd					goto update;
2452168404Spjd			}
2453168404Spjd
2454168404Spjd			if (zap.za_integer_length != 8 ||
2455168404Spjd			    zap.za_num_integers != 1) {
2456168404Spjd				cmn_err(CE_WARN, "zap_readdir: bad directory "
2457168404Spjd				    "entry, obj = %lld, offset = %lld\n",
2458168404Spjd				    (u_longlong_t)zp->z_id,
2459168404Spjd				    (u_longlong_t)offset);
2460249195Smm				error = SET_ERROR(ENXIO);
2461168404Spjd				goto update;
2462168404Spjd			}
2463168404Spjd
2464168404Spjd			objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
2465168404Spjd			/*
2466168404Spjd			 * MacOS X can extract the object type here such as:
2467168404Spjd			 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
2468168404Spjd			 */
2469168404Spjd			type = ZFS_DIRENT_TYPE(zap.za_first_integer);
2470185029Spjd
2471185029Spjd			if (check_sysattrs && !zap.za_normalization_conflict) {
2472185029Spjd#ifdef TODO
2473185029Spjd				zap.za_normalization_conflict =
2474185029Spjd				    xattr_sysattr_casechk(zap.za_name);
2475185029Spjd#else
2476185029Spjd				panic("%s:%u: TODO", __func__, __LINE__);
2477185029Spjd#endif
2478185029Spjd			}
2479168404Spjd		}
2480168404Spjd
2481211932Smm		if (flags & V_RDDIR_ACCFILTER) {
2482211932Smm			/*
2483211932Smm			 * If we have no access at all, don't include
2484211932Smm			 * this entry in the returned information
2485211932Smm			 */
2486211932Smm			znode_t	*ezp;
2487211932Smm			if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0)
2488211932Smm				goto skip_entry;
2489211932Smm			if (!zfs_has_access(ezp, cr)) {
2490303970Savg				vrele(ZTOV(ezp));
2491211932Smm				goto skip_entry;
2492211932Smm			}
2493303970Savg			vrele(ZTOV(ezp));
2494211932Smm		}
2495211932Smm
2496185029Spjd		if (flags & V_RDDIR_ENTFLAGS)
2497185029Spjd			reclen = EDIRENT_RECLEN(strlen(zap.za_name));
2498185029Spjd		else
2499185029Spjd			reclen = DIRENT64_RECLEN(strlen(zap.za_name));
2500185029Spjd
2501168404Spjd		/*
2502168404Spjd		 * Will this entry fit in the buffer?
2503168404Spjd		 */
2504168404Spjd		if (outcount + reclen > bufsize) {
2505168404Spjd			/*
2506168404Spjd			 * Did we manage to fit anything in the buffer?
2507168404Spjd			 */
2508168404Spjd			if (!outcount) {
2509249195Smm				error = SET_ERROR(EINVAL);
2510168404Spjd				goto update;
2511168404Spjd			}
2512168404Spjd			break;
2513168404Spjd		}
2514185029Spjd		if (flags & V_RDDIR_ENTFLAGS) {
2515185029Spjd			/*
2516185029Spjd			 * Add extended flag entry:
2517185029Spjd			 */
2518185029Spjd			eodp->ed_ino = objnum;
2519185029Spjd			eodp->ed_reclen = reclen;
2520185029Spjd			/* NOTE: ed_off is the offset for the *next* entry */
2521185029Spjd			next = &(eodp->ed_off);
2522185029Spjd			eodp->ed_eflags = zap.za_normalization_conflict ?
2523185029Spjd			    ED_CASE_CONFLICT : 0;
2524185029Spjd			(void) strncpy(eodp->ed_name, zap.za_name,
2525185029Spjd			    EDIRENT_NAMELEN(reclen));
2526185029Spjd			eodp = (edirent_t *)((intptr_t)eodp + reclen);
2527185029Spjd		} else {
2528185029Spjd			/*
2529185029Spjd			 * Add normal entry:
2530185029Spjd			 */
2531185029Spjd			odp->d_ino = objnum;
2532185029Spjd			odp->d_reclen = reclen;
2533185029Spjd			odp->d_namlen = strlen(zap.za_name);
2534185029Spjd			(void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1);
2535185029Spjd			odp->d_type = type;
2536185029Spjd			odp = (dirent64_t *)((intptr_t)odp + reclen);
2537185029Spjd		}
2538168404Spjd		outcount += reclen;
2539168404Spjd
2540168404Spjd		ASSERT(outcount <= bufsize);
2541168404Spjd
2542168404Spjd		/* Prefetch znode */
2543168404Spjd		if (prefetch)
2544286705Smav			dmu_prefetch(os, objnum, 0, 0, 0,
2545286705Smav			    ZIO_PRIORITY_SYNC_READ);
2546168404Spjd
2547211932Smm	skip_entry:
2548168404Spjd		/*
2549168404Spjd		 * Move to the next entry, fill in the previous offset.
2550168404Spjd		 */
2551168404Spjd		if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
2552168404Spjd			zap_cursor_advance(&zc);
2553168404Spjd			offset = zap_cursor_serialize(&zc);
2554168404Spjd		} else {
2555168404Spjd			offset += 1;
2556168404Spjd		}
2557219404Spjd
2558219404Spjd		if (cooks != NULL) {
2559219404Spjd			*cooks++ = offset;
2560219404Spjd			ncooks--;
2561219404Spjd			KASSERT(ncooks >= 0, ("ncookies=%d", ncooks));
2562219404Spjd		}
2563168404Spjd	}
2564168404Spjd	zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
2565168404Spjd
2566168404Spjd	/* Subtract unused cookies */
2567168962Spjd	if (ncookies != NULL)
2568168962Spjd		*ncookies -= ncooks;
2569168404Spjd
2570168404Spjd	if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) {
2571168404Spjd		iovp->iov_base += outcount;
2572168404Spjd		iovp->iov_len -= outcount;
2573168404Spjd		uio->uio_resid -= outcount;
2574168404Spjd	} else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) {
2575168404Spjd		/*
2576168404Spjd		 * Reset the pointer.
2577168404Spjd		 */
2578168404Spjd		offset = uio->uio_loffset;
2579168404Spjd	}
2580168404Spjd
2581168404Spjdupdate:
2582168404Spjd	zap_cursor_fini(&zc);
2583168404Spjd	if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
2584168404Spjd		kmem_free(outbuf, bufsize);
2585168404Spjd
2586168404Spjd	if (error == ENOENT)
2587168404Spjd		error = 0;
2588168404Spjd
2589168404Spjd	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
2590168404Spjd
2591168404Spjd	uio->uio_loffset = offset;
2592168404Spjd	ZFS_EXIT(zfsvfs);
2593169107Spjd	if (error != 0 && cookies != NULL) {
2594168962Spjd		free(*cookies, M_TEMP);
2595168962Spjd		*cookies = NULL;
2596168962Spjd		*ncookies = 0;
2597168404Spjd	}
2598168404Spjd	return (error);
2599168404Spjd}
2600168404Spjd
2601185029Spjdulong_t zfs_fsync_sync_cnt = 4;
2602185029Spjd
2603168404Spjdstatic int
2604185029Spjdzfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
2605168404Spjd{
2606168962Spjd	znode_t	*zp = VTOZ(vp);
2607168962Spjd	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2608168404Spjd
2609185029Spjd	(void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
2610185029Spjd
2611219089Spjd	if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
2612219089Spjd		ZFS_ENTER(zfsvfs);
2613219089Spjd		ZFS_VERIFY_ZP(zp);
2614219089Spjd		zil_commit(zfsvfs->z_log, zp->z_id);
2615219089Spjd		ZFS_EXIT(zfsvfs);
2616219089Spjd	}
2617168404Spjd	return (0);
2618168404Spjd}
2619168404Spjd
2620185029Spjd
2621168404Spjd/*
2622168404Spjd * Get the requested file attributes and place them in the provided
2623168404Spjd * vattr structure.
2624168404Spjd *
2625168404Spjd *	IN:	vp	- vnode of file.
2626168404Spjd *		vap	- va_mask identifies requested attributes.
2627185029Spjd *			  If AT_XVATTR set, then optional attrs are requested
2628185029Spjd *		flags	- ATTR_NOACLCHECK (CIFS server context)
2629168404Spjd *		cr	- credentials of caller.
2630185029Spjd *		ct	- caller context
2631168404Spjd *
2632168404Spjd *	OUT:	vap	- attribute values.
2633168404Spjd *
2634251631Sdelphij *	RETURN:	0 (always succeeds).
2635168404Spjd */
2636168404Spjd/* ARGSUSED */
2637168404Spjdstatic int
2638185029Spjdzfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
2639185029Spjd    caller_context_t *ct)
2640168404Spjd{
2641168962Spjd	znode_t *zp = VTOZ(vp);
2642168962Spjd	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2643185029Spjd	int	error = 0;
2644168962Spjd	uint32_t blksize;
2645168962Spjd	u_longlong_t nblocks;
2646185029Spjd	uint64_t links;
2647224251Sdelphij	uint64_t mtime[2], ctime[2], crtime[2], rdev;
2648185029Spjd	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
2649185029Spjd	xoptattr_t *xoap = NULL;
2650185029Spjd	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
2651224251Sdelphij	sa_bulk_attr_t bulk[4];
2652219089Spjd	int count = 0;
2653168404Spjd
2654168404Spjd	ZFS_ENTER(zfsvfs);
2655185029Spjd	ZFS_VERIFY_ZP(zp);
2656168404Spjd
2657219089Spjd	zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
2658219089Spjd
2659219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
2660219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
2661243807Sdelphij	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
2662224251Sdelphij	if (vp->v_type == VBLK || vp->v_type == VCHR)
2663224251Sdelphij		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL,
2664224251Sdelphij		    &rdev, 8);
2665219089Spjd
2666219089Spjd	if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
2667219089Spjd		ZFS_EXIT(zfsvfs);
2668219089Spjd		return (error);
2669219089Spjd	}
2670219089Spjd
2671168404Spjd	/*
2672185029Spjd	 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
2673185029Spjd	 * Also, if we are the owner don't bother, since owner should
2674185029Spjd	 * always be allowed to read basic attributes of file.
2675185029Spjd	 */
2676219089Spjd	if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
2677219089Spjd	    (vap->va_uid != crgetuid(cr))) {
2678185029Spjd		if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
2679185029Spjd		    skipaclchk, cr)) {
2680185029Spjd			ZFS_EXIT(zfsvfs);
2681185029Spjd			return (error);
2682185029Spjd		}
2683185029Spjd	}
2684185029Spjd
2685185029Spjd	/*
2686168404Spjd	 * Return all attributes.  It's cheaper to provide the answer
2687168404Spjd	 * than to determine whether we were asked the question.
2688168404Spjd	 */
2689168404Spjd
2690219089Spjd	vap->va_type = IFTOVT(zp->z_mode);
2691219089Spjd	vap->va_mode = zp->z_mode & ~S_IFMT;
2692277300Ssmh#ifdef illumos
2693224252Sdelphij	vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev;
2694224252Sdelphij#else
2695224252Sdelphij	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
2696224252Sdelphij#endif
2697168404Spjd	vap->va_nodeid = zp->z_id;
2698185029Spjd	if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp))
2699219089Spjd		links = zp->z_links + 1;
2700185029Spjd	else
2701219089Spjd		links = zp->z_links;
2702229425Sdim	vap->va_nlink = MIN(links, LINK_MAX);	/* nlink_t limit! */
2703219089Spjd	vap->va_size = zp->z_size;
2704277300Ssmh#ifdef illumos
2705224252Sdelphij	vap->va_rdev = vp->v_rdev;
2706224252Sdelphij#else
2707224251Sdelphij	if (vp->v_type == VBLK || vp->v_type == VCHR)
2708224251Sdelphij		vap->va_rdev = zfs_cmpldev(rdev);
2709224252Sdelphij#endif
2710168404Spjd	vap->va_seq = zp->z_seq;
2711168404Spjd	vap->va_flags = 0;	/* FreeBSD: Reset chflags(2) flags. */
2712272467Saraujo     	vap->va_filerev = zp->z_seq;
2713168404Spjd
2714185029Spjd	/*
2715185029Spjd	 * Add in any requested optional attributes and the create time.
2716185029Spjd	 * Also set the corresponding bits in the returned attribute bitmap.
2717185029Spjd	 */
2718185029Spjd	if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
2719185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
2720185029Spjd			xoap->xoa_archive =
2721219089Spjd			    ((zp->z_pflags & ZFS_ARCHIVE) != 0);
2722185029Spjd			XVA_SET_RTN(xvap, XAT_ARCHIVE);
2723185029Spjd		}
2724185029Spjd
2725185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
2726185029Spjd			xoap->xoa_readonly =
2727219089Spjd			    ((zp->z_pflags & ZFS_READONLY) != 0);
2728185029Spjd			XVA_SET_RTN(xvap, XAT_READONLY);
2729185029Spjd		}
2730185029Spjd
2731185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
2732185029Spjd			xoap->xoa_system =
2733219089Spjd			    ((zp->z_pflags & ZFS_SYSTEM) != 0);
2734185029Spjd			XVA_SET_RTN(xvap, XAT_SYSTEM);
2735185029Spjd		}
2736185029Spjd
2737185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
2738185029Spjd			xoap->xoa_hidden =
2739219089Spjd			    ((zp->z_pflags & ZFS_HIDDEN) != 0);
2740185029Spjd			XVA_SET_RTN(xvap, XAT_HIDDEN);
2741185029Spjd		}
2742185029Spjd
2743185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2744185029Spjd			xoap->xoa_nounlink =
2745219089Spjd			    ((zp->z_pflags & ZFS_NOUNLINK) != 0);
2746185029Spjd			XVA_SET_RTN(xvap, XAT_NOUNLINK);
2747185029Spjd		}
2748185029Spjd
2749185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2750185029Spjd			xoap->xoa_immutable =
2751219089Spjd			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
2752185029Spjd			XVA_SET_RTN(xvap, XAT_IMMUTABLE);
2753185029Spjd		}
2754185029Spjd
2755185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2756185029Spjd			xoap->xoa_appendonly =
2757219089Spjd			    ((zp->z_pflags & ZFS_APPENDONLY) != 0);
2758185029Spjd			XVA_SET_RTN(xvap, XAT_APPENDONLY);
2759185029Spjd		}
2760185029Spjd
2761185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2762185029Spjd			xoap->xoa_nodump =
2763219089Spjd			    ((zp->z_pflags & ZFS_NODUMP) != 0);
2764185029Spjd			XVA_SET_RTN(xvap, XAT_NODUMP);
2765185029Spjd		}
2766185029Spjd
2767185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
2768185029Spjd			xoap->xoa_opaque =
2769219089Spjd			    ((zp->z_pflags & ZFS_OPAQUE) != 0);
2770185029Spjd			XVA_SET_RTN(xvap, XAT_OPAQUE);
2771185029Spjd		}
2772185029Spjd
2773185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2774185029Spjd			xoap->xoa_av_quarantined =
2775219089Spjd			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
2776185029Spjd			XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
2777185029Spjd		}
2778185029Spjd
2779185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2780185029Spjd			xoap->xoa_av_modified =
2781219089Spjd			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
2782185029Spjd			XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
2783185029Spjd		}
2784185029Spjd
2785185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
2786219089Spjd		    vp->v_type == VREG) {
2787219089Spjd			zfs_sa_get_scanstamp(zp, xvap);
2788185029Spjd		}
2789185029Spjd
2790219089Spjd		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2791219089Spjd			xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
2792219089Spjd			XVA_SET_RTN(xvap, XAT_REPARSE);
2793219089Spjd		}
2794219089Spjd		if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
2795219089Spjd			xoap->xoa_generation = zp->z_gen;
2796219089Spjd			XVA_SET_RTN(xvap, XAT_GEN);
2797219089Spjd		}
2798219089Spjd
2799219089Spjd		if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
2800219089Spjd			xoap->xoa_offline =
2801219089Spjd			    ((zp->z_pflags & ZFS_OFFLINE) != 0);
2802219089Spjd			XVA_SET_RTN(xvap, XAT_OFFLINE);
2803219089Spjd		}
2804219089Spjd
2805219089Spjd		if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
2806219089Spjd			xoap->xoa_sparse =
2807219089Spjd			    ((zp->z_pflags & ZFS_SPARSE) != 0);
2808219089Spjd			XVA_SET_RTN(xvap, XAT_SPARSE);
2809219089Spjd		}
2810185029Spjd	}
2811185029Spjd
2812219089Spjd	ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
2813219089Spjd	ZFS_TIME_DECODE(&vap->va_mtime, mtime);
2814219089Spjd	ZFS_TIME_DECODE(&vap->va_ctime, ctime);
2815219089Spjd	ZFS_TIME_DECODE(&vap->va_birthtime, crtime);
2816168404Spjd
2817168404Spjd
2818219089Spjd	sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
2819168404Spjd	vap->va_blksize = blksize;
2820168404Spjd	vap->va_bytes = nblocks << 9;	/* nblocks * 512 */
2821168404Spjd
2822168404Spjd	if (zp->z_blksz == 0) {
2823168404Spjd		/*
2824168404Spjd		 * Block size hasn't been set; suggest maximal I/O transfers.
2825168404Spjd		 */
2826168404Spjd		vap->va_blksize = zfsvfs->z_max_blksz;
2827168404Spjd	}
2828168404Spjd
2829168404Spjd	ZFS_EXIT(zfsvfs);
2830168404Spjd	return (0);
2831168404Spjd}
2832168404Spjd
2833168404Spjd/*
2834168404Spjd * Set the file attributes to the values contained in the
2835168404Spjd * vattr structure.
2836168404Spjd *
2837168404Spjd *	IN:	vp	- vnode of file to be modified.
2838168404Spjd *		vap	- new attribute values.
2839185029Spjd *			  If AT_XVATTR set, then optional attrs are being set
2840168404Spjd *		flags	- ATTR_UTIME set if non-default time values provided.
2841185029Spjd *			- ATTR_NOACLCHECK (CIFS context only).
2842168404Spjd *		cr	- credentials of caller.
2843185029Spjd *		ct	- caller context
2844168404Spjd *
2845251631Sdelphij *	RETURN:	0 on success, error code on failure.
2846168404Spjd *
2847168404Spjd * Timestamps:
2848168404Spjd *	vp - ctime updated, mtime updated if size changed.
2849168404Spjd */
2850168404Spjd/* ARGSUSED */
2851168404Spjdstatic int
2852168962Spjdzfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
2853251631Sdelphij    caller_context_t *ct)
2854168404Spjd{
2855185029Spjd	znode_t		*zp = VTOZ(vp);
2856168404Spjd	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
2857185029Spjd	zilog_t		*zilog;
2858168404Spjd	dmu_tx_t	*tx;
2859168404Spjd	vattr_t		oldva;
2860209962Smm	xvattr_t	tmpxvattr;
2861168962Spjd	uint_t		mask = vap->va_mask;
2862247187Smm	uint_t		saved_mask = 0;
2863197831Spjd	uint64_t	saved_mode;
2864168404Spjd	int		trim_mask = 0;
2865168404Spjd	uint64_t	new_mode;
2866209962Smm	uint64_t	new_uid, new_gid;
2867219089Spjd	uint64_t	xattr_obj;
2868219089Spjd	uint64_t	mtime[2], ctime[2];
2869168404Spjd	znode_t		*attrzp;
2870168404Spjd	int		need_policy = FALSE;
2871219089Spjd	int		err, err2;
2872185029Spjd	zfs_fuid_info_t *fuidp = NULL;
2873185029Spjd	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
2874185029Spjd	xoptattr_t	*xoap;
2875219089Spjd	zfs_acl_t	*aclp;
2876185029Spjd	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
2877219089Spjd	boolean_t	fuid_dirtied = B_FALSE;
2878219089Spjd	sa_bulk_attr_t	bulk[7], xattr_bulk[7];
2879219089Spjd	int		count = 0, xattr_count = 0;
2880168404Spjd
2881168404Spjd	if (mask == 0)
2882168404Spjd		return (0);
2883168404Spjd
2884168962Spjd	if (mask & AT_NOSET)
2885249195Smm		return (SET_ERROR(EINVAL));
2886168962Spjd
2887185029Spjd	ZFS_ENTER(zfsvfs);
2888185029Spjd	ZFS_VERIFY_ZP(zp);
2889185029Spjd
2890185029Spjd	zilog = zfsvfs->z_log;
2891185029Spjd
2892185029Spjd	/*
2893185029Spjd	 * Make sure that if we have ephemeral uid/gid or xvattr specified
2894185029Spjd	 * that file system is at proper version level
2895185029Spjd	 */
2896185029Spjd
2897185029Spjd	if (zfsvfs->z_use_fuids == B_FALSE &&
2898185029Spjd	    (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
2899185029Spjd	    ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
2900185029Spjd	    (mask & AT_XVATTR))) {
2901185029Spjd		ZFS_EXIT(zfsvfs);
2902249195Smm		return (SET_ERROR(EINVAL));
2903185029Spjd	}
2904185029Spjd
2905185029Spjd	if (mask & AT_SIZE && vp->v_type == VDIR) {
2906185029Spjd		ZFS_EXIT(zfsvfs);
2907249195Smm		return (SET_ERROR(EISDIR));
2908185029Spjd	}
2909168404Spjd
2910185029Spjd	if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
2911185029Spjd		ZFS_EXIT(zfsvfs);
2912249195Smm		return (SET_ERROR(EINVAL));
2913185029Spjd	}
2914168404Spjd
2915185029Spjd	/*
2916185029Spjd	 * If this is an xvattr_t, then get a pointer to the structure of
2917185029Spjd	 * optional attributes.  If this is NULL, then we have a vattr_t.
2918185029Spjd	 */
2919185029Spjd	xoap = xva_getxoptattr(xvap);
2920168404Spjd
2921209962Smm	xva_init(&tmpxvattr);
2922209962Smm
2923185029Spjd	/*
2924185029Spjd	 * Immutable files can only alter immutable bit and atime
2925185029Spjd	 */
2926219089Spjd	if ((zp->z_pflags & ZFS_IMMUTABLE) &&
2927185029Spjd	    ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
2928185029Spjd	    ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
2929185029Spjd		ZFS_EXIT(zfsvfs);
2930249195Smm		return (SET_ERROR(EPERM));
2931185029Spjd	}
2932185029Spjd
2933321579Smav	/*
2934321579Smav	 * Note: ZFS_READONLY is handled in zfs_zaccess_common.
2935321579Smav	 */
2936185029Spjd
2937185029Spjd	/*
2938185029Spjd	 * Verify timestamps doesn't overflow 32 bits.
2939185029Spjd	 * ZFS can handle large timestamps, but 32bit syscalls can't
2940185029Spjd	 * handle times greater than 2039.  This check should be removed
2941185029Spjd	 * once large timestamps are fully supported.
2942185029Spjd	 */
2943185029Spjd	if (mask & (AT_ATIME | AT_MTIME)) {
2944185029Spjd		if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
2945185029Spjd		    ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
2946185029Spjd			ZFS_EXIT(zfsvfs);
2947249195Smm			return (SET_ERROR(EOVERFLOW));
2948185029Spjd		}
2949185029Spjd	}
2950316391Sasomers	if (xoap && (mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME) &&
2951316391Sasomers	    TIMESPEC_OVERFLOW(&vap->va_birthtime)) {
2952316391Sasomers		ZFS_EXIT(zfsvfs);
2953316391Sasomers		return (SET_ERROR(EOVERFLOW));
2954316391Sasomers	}
2955185029Spjd
2956168404Spjd	attrzp = NULL;
2957219089Spjd	aclp = NULL;
2958168404Spjd
2959211932Smm	/* Can this be moved to before the top label? */
2960168404Spjd	if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
2961168404Spjd		ZFS_EXIT(zfsvfs);
2962249195Smm		return (SET_ERROR(EROFS));
2963168404Spjd	}
2964168404Spjd
2965168404Spjd	/*
2966168404Spjd	 * First validate permissions
2967168404Spjd	 */
2968168404Spjd
2969168404Spjd	if (mask & AT_SIZE) {
2970168404Spjd		/*
2971168404Spjd		 * XXX - Note, we are not providing any open
2972168404Spjd		 * mode flags here (like FNDELAY), so we may
2973168404Spjd		 * block if there are locks present... this
2974168404Spjd		 * should be addressed in openat().
2975168404Spjd		 */
2976185029Spjd		/* XXX - would it be OK to generate a log record here? */
2977185029Spjd		err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
2978168404Spjd		if (err) {
2979168404Spjd			ZFS_EXIT(zfsvfs);
2980168404Spjd			return (err);
2981168404Spjd		}
2982168404Spjd	}
2983168404Spjd
2984185029Spjd	if (mask & (AT_ATIME|AT_MTIME) ||
2985185029Spjd	    ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
2986185029Spjd	    XVA_ISSET_REQ(xvap, XAT_READONLY) ||
2987185029Spjd	    XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
2988219089Spjd	    XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
2989219089Spjd	    XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
2990185029Spjd	    XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
2991219089Spjd	    XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
2992185029Spjd		need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
2993185029Spjd		    skipaclchk, cr);
2994219089Spjd	}
2995168404Spjd
2996168404Spjd	if (mask & (AT_UID|AT_GID)) {
2997168404Spjd		int	idmask = (mask & (AT_UID|AT_GID));
2998168404Spjd		int	take_owner;
2999168404Spjd		int	take_group;
3000168404Spjd
3001168404Spjd		/*
3002168404Spjd		 * NOTE: even if a new mode is being set,
3003168404Spjd		 * we may clear S_ISUID/S_ISGID bits.
3004168404Spjd		 */
3005168404Spjd
3006168404Spjd		if (!(mask & AT_MODE))
3007219089Spjd			vap->va_mode = zp->z_mode;
3008168404Spjd
3009168404Spjd		/*
3010168404Spjd		 * Take ownership or chgrp to group we are a member of
3011168404Spjd		 */
3012168404Spjd
3013168404Spjd		take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr));
3014185029Spjd		take_group = (mask & AT_GID) &&
3015185029Spjd		    zfs_groupmember(zfsvfs, vap->va_gid, cr);
3016168404Spjd
3017168404Spjd		/*
3018168404Spjd		 * If both AT_UID and AT_GID are set then take_owner and
3019168404Spjd		 * take_group must both be set in order to allow taking
3020168404Spjd		 * ownership.
3021168404Spjd		 *
3022168404Spjd		 * Otherwise, send the check through secpolicy_vnode_setattr()
3023168404Spjd		 *
3024168404Spjd		 */
3025168404Spjd
3026168404Spjd		if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) ||
3027168404Spjd		    ((idmask == AT_UID) && take_owner) ||
3028168404Spjd		    ((idmask == AT_GID) && take_group)) {
3029185029Spjd			if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
3030185029Spjd			    skipaclchk, cr) == 0) {
3031168404Spjd				/*
3032168404Spjd				 * Remove setuid/setgid for non-privileged users
3033168404Spjd				 */
3034185029Spjd				secpolicy_setid_clear(vap, vp, cr);
3035168404Spjd				trim_mask = (mask & (AT_UID|AT_GID));
3036168404Spjd			} else {
3037168404Spjd				need_policy =  TRUE;
3038168404Spjd			}
3039168404Spjd		} else {
3040168404Spjd			need_policy =  TRUE;
3041168404Spjd		}
3042168404Spjd	}
3043168404Spjd
3044219089Spjd	oldva.va_mode = zp->z_mode;
3045185029Spjd	zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
3046185029Spjd	if (mask & AT_XVATTR) {
3047209962Smm		/*
3048209962Smm		 * Update xvattr mask to include only those attributes
3049209962Smm		 * that are actually changing.
3050209962Smm		 *
3051209962Smm		 * the bits will be restored prior to actually setting
3052209962Smm		 * the attributes so the caller thinks they were set.
3053209962Smm		 */
3054209962Smm		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
3055209962Smm			if (xoap->xoa_appendonly !=
3056219089Spjd			    ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
3057209962Smm				need_policy = TRUE;
3058209962Smm			} else {
3059209962Smm				XVA_CLR_REQ(xvap, XAT_APPENDONLY);
3060209962Smm				XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY);
3061209962Smm			}
3062209962Smm		}
3063209962Smm
3064209962Smm		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
3065209962Smm			if (xoap->xoa_nounlink !=
3066219089Spjd			    ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
3067209962Smm				need_policy = TRUE;
3068209962Smm			} else {
3069209962Smm				XVA_CLR_REQ(xvap, XAT_NOUNLINK);
3070209962Smm				XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK);
3071209962Smm			}
3072209962Smm		}
3073209962Smm
3074209962Smm		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
3075209962Smm			if (xoap->xoa_immutable !=
3076219089Spjd			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
3077209962Smm				need_policy = TRUE;
3078209962Smm			} else {
3079209962Smm				XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
3080209962Smm				XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE);
3081209962Smm			}
3082209962Smm		}
3083209962Smm
3084209962Smm		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
3085209962Smm			if (xoap->xoa_nodump !=
3086219089Spjd			    ((zp->z_pflags & ZFS_NODUMP) != 0)) {
3087209962Smm				need_policy = TRUE;
3088209962Smm			} else {
3089209962Smm				XVA_CLR_REQ(xvap, XAT_NODUMP);
3090209962Smm				XVA_SET_REQ(&tmpxvattr, XAT_NODUMP);
3091209962Smm			}
3092209962Smm		}
3093209962Smm
3094209962Smm		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
3095209962Smm			if (xoap->xoa_av_modified !=
3096219089Spjd			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
3097209962Smm				need_policy = TRUE;
3098209962Smm			} else {
3099209962Smm				XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
3100209962Smm				XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED);
3101209962Smm			}
3102209962Smm		}
3103209962Smm
3104209962Smm		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
3105209962Smm			if ((vp->v_type != VREG &&
3106209962Smm			    xoap->xoa_av_quarantined) ||
3107209962Smm			    xoap->xoa_av_quarantined !=
3108219089Spjd			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
3109209962Smm				need_policy = TRUE;
3110209962Smm			} else {
3111209962Smm				XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
3112209962Smm				XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED);
3113209962Smm			}
3114209962Smm		}
3115209962Smm
3116219089Spjd		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
3117219089Spjd			ZFS_EXIT(zfsvfs);
3118249195Smm			return (SET_ERROR(EPERM));
3119219089Spjd		}
3120219089Spjd
3121209962Smm		if (need_policy == FALSE &&
3122209962Smm		    (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
3123209962Smm		    XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
3124185029Spjd			need_policy = TRUE;
3125185029Spjd		}
3126185029Spjd	}
3127185029Spjd
3128168404Spjd	if (mask & AT_MODE) {
3129185029Spjd		if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
3130168962Spjd			err = secpolicy_setid_setsticky_clear(vp, vap,
3131168962Spjd			    &oldva, cr);
3132168962Spjd			if (err) {
3133168962Spjd				ZFS_EXIT(zfsvfs);
3134168962Spjd				return (err);
3135168962Spjd			}
3136168404Spjd			trim_mask |= AT_MODE;
3137168404Spjd		} else {
3138168404Spjd			need_policy = TRUE;
3139168404Spjd		}
3140168404Spjd	}
3141168404Spjd
3142168404Spjd	if (need_policy) {
3143168404Spjd		/*
3144168404Spjd		 * If trim_mask is set then take ownership
3145168404Spjd		 * has been granted or write_acl is present and user
3146168404Spjd		 * has the ability to modify mode.  In that case remove
3147168404Spjd		 * UID|GID and or MODE from mask so that
3148168404Spjd		 * secpolicy_vnode_setattr() doesn't revoke it.
3149168404Spjd		 */
3150168404Spjd
3151168404Spjd		if (trim_mask) {
3152168404Spjd			saved_mask = vap->va_mask;
3153168404Spjd			vap->va_mask &= ~trim_mask;
3154197831Spjd			if (trim_mask & AT_MODE) {
3155197831Spjd				/*
3156197831Spjd				 * Save the mode, as secpolicy_vnode_setattr()
3157197831Spjd				 * will overwrite it with ova.va_mode.
3158197831Spjd				 */
3159197831Spjd				saved_mode = vap->va_mode;
3160197831Spjd			}
3161168404Spjd		}
3162168404Spjd		err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
3163185029Spjd		    (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
3164168404Spjd		if (err) {
3165168404Spjd			ZFS_EXIT(zfsvfs);
3166168404Spjd			return (err);
3167168404Spjd		}
3168168404Spjd
3169197831Spjd		if (trim_mask) {
3170168404Spjd			vap->va_mask |= saved_mask;
3171197831Spjd			if (trim_mask & AT_MODE) {
3172197831Spjd				/*
3173197831Spjd				 * Recover the mode after
3174197831Spjd				 * secpolicy_vnode_setattr().
3175197831Spjd				 */
3176197831Spjd				vap->va_mode = saved_mode;
3177197831Spjd			}
3178197831Spjd		}
3179168404Spjd	}
3180168404Spjd
3181168404Spjd	/*
3182168404Spjd	 * secpolicy_vnode_setattr, or take ownership may have
3183168404Spjd	 * changed va_mask
3184168404Spjd	 */
3185168404Spjd	mask = vap->va_mask;
3186168404Spjd
3187219089Spjd	if ((mask & (AT_UID | AT_GID))) {
3188219089Spjd		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
3189219089Spjd		    &xattr_obj, sizeof (xattr_obj));
3190168404Spjd
3191219089Spjd		if (err == 0 && xattr_obj) {
3192219089Spjd			err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp);
3193306818Savg			if (err == 0) {
3194306818Savg				err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE);
3195306818Savg				if (err != 0)
3196306818Savg					vrele(ZTOV(attrzp));
3197306818Savg			}
3198209962Smm			if (err)
3199219089Spjd				goto out2;
3200168404Spjd		}
3201209962Smm		if (mask & AT_UID) {
3202209962Smm			new_uid = zfs_fuid_create(zfsvfs,
3203209962Smm			    (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
3204219089Spjd			if (new_uid != zp->z_uid &&
3205219089Spjd			    zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) {
3206219089Spjd				if (attrzp)
3207306818Savg					vput(ZTOV(attrzp));
3208249195Smm				err = SET_ERROR(EDQUOT);
3209219089Spjd				goto out2;
3210209962Smm			}
3211209962Smm		}
3212209962Smm
3213209962Smm		if (mask & AT_GID) {
3214209962Smm			new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
3215209962Smm			    cr, ZFS_GROUP, &fuidp);
3216219089Spjd			if (new_gid != zp->z_gid &&
3217219089Spjd			    zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) {
3218219089Spjd				if (attrzp)
3219306818Savg					vput(ZTOV(attrzp));
3220249195Smm				err = SET_ERROR(EDQUOT);
3221219089Spjd				goto out2;
3222209962Smm			}
3223209962Smm		}
3224219089Spjd	}
3225219089Spjd	tx = dmu_tx_create(zfsvfs->z_os);
3226219089Spjd
3227219089Spjd	if (mask & AT_MODE) {
3228219089Spjd		uint64_t pmode = zp->z_mode;
3229219089Spjd		uint64_t acl_obj;
3230219089Spjd		new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
3231219089Spjd
3232243560Smm		if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
3233243560Smm		    !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
3234249195Smm			err = SET_ERROR(EPERM);
3235243560Smm			goto out;
3236243560Smm		}
3237243560Smm
3238224174Smm		if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode))
3239224174Smm			goto out;
3240219089Spjd
3241219089Spjd		if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
3242219089Spjd			/*
3243219089Spjd			 * Are we upgrading ACL from old V0 format
3244219089Spjd			 * to V1 format?
3245219089Spjd			 */
3246219089Spjd			if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
3247219089Spjd			    zfs_znode_acl_version(zp) ==
3248219089Spjd			    ZFS_ACL_VERSION_INITIAL) {
3249219089Spjd				dmu_tx_hold_free(tx, acl_obj, 0,
3250219089Spjd				    DMU_OBJECT_END);
3251219089Spjd				dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
3252219089Spjd				    0, aclp->z_acl_bytes);
3253209962Smm			} else {
3254219089Spjd				dmu_tx_hold_write(tx, acl_obj, 0,
3255219089Spjd				    aclp->z_acl_bytes);
3256209962Smm			}
3257219089Spjd		} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
3258219089Spjd			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
3259219089Spjd			    0, aclp->z_acl_bytes);
3260209962Smm		}
3261219089Spjd		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
3262219089Spjd	} else {
3263219089Spjd		if ((mask & AT_XVATTR) &&
3264219089Spjd		    XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
3265219089Spjd			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
3266219089Spjd		else
3267219089Spjd			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
3268168404Spjd	}
3269168404Spjd
3270219089Spjd	if (attrzp) {
3271219089Spjd		dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
3272219089Spjd	}
3273219089Spjd
3274219089Spjd	fuid_dirtied = zfsvfs->z_fuid_dirty;
3275219089Spjd	if (fuid_dirtied)
3276219089Spjd		zfs_fuid_txhold(zfsvfs, tx);
3277219089Spjd
3278219089Spjd	zfs_sa_upgrade_txholds(tx, zp);
3279219089Spjd
3280258720Savg	err = dmu_tx_assign(tx, TXG_WAIT);
3281258720Savg	if (err)
3282209962Smm		goto out;
3283168404Spjd
3284219089Spjd	count = 0;
3285168404Spjd	/*
3286168404Spjd	 * Set each attribute requested.
3287168404Spjd	 * We group settings according to the locks they need to acquire.
3288168404Spjd	 *
3289168404Spjd	 * Note: you cannot set ctime directly, although it will be
3290168404Spjd	 * updated as a side-effect of calling this function.
3291168404Spjd	 */
3292168404Spjd
3293219089Spjd	if (mask & (AT_UID|AT_GID|AT_MODE))
3294219089Spjd		mutex_enter(&zp->z_acl_lock);
3295168404Spjd
3296219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
3297219089Spjd	    &zp->z_pflags, sizeof (zp->z_pflags));
3298219089Spjd
3299219089Spjd	if (attrzp) {
3300219089Spjd		if (mask & (AT_UID|AT_GID|AT_MODE))
3301219089Spjd			mutex_enter(&attrzp->z_acl_lock);
3302219089Spjd		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
3303219089Spjd		    SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
3304219089Spjd		    sizeof (attrzp->z_pflags));
3305219089Spjd	}
3306219089Spjd
3307219089Spjd	if (mask & (AT_UID|AT_GID)) {
3308219089Spjd
3309219089Spjd		if (mask & AT_UID) {
3310219089Spjd			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
3311219089Spjd			    &new_uid, sizeof (new_uid));
3312219089Spjd			zp->z_uid = new_uid;
3313219089Spjd			if (attrzp) {
3314219089Spjd				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
3315219089Spjd				    SA_ZPL_UID(zfsvfs), NULL, &new_uid,
3316219089Spjd				    sizeof (new_uid));
3317219089Spjd				attrzp->z_uid = new_uid;
3318219089Spjd			}
3319219089Spjd		}
3320219089Spjd
3321219089Spjd		if (mask & AT_GID) {
3322219089Spjd			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
3323219089Spjd			    NULL, &new_gid, sizeof (new_gid));
3324219089Spjd			zp->z_gid = new_gid;
3325219089Spjd			if (attrzp) {
3326219089Spjd				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
3327219089Spjd				    SA_ZPL_GID(zfsvfs), NULL, &new_gid,
3328219089Spjd				    sizeof (new_gid));
3329219089Spjd				attrzp->z_gid = new_gid;
3330219089Spjd			}
3331219089Spjd		}
3332219089Spjd		if (!(mask & AT_MODE)) {
3333219089Spjd			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
3334219089Spjd			    NULL, &new_mode, sizeof (new_mode));
3335219089Spjd			new_mode = zp->z_mode;
3336219089Spjd		}
3337219089Spjd		err = zfs_acl_chown_setattr(zp);
3338219089Spjd		ASSERT(err == 0);
3339219089Spjd		if (attrzp) {
3340219089Spjd			err = zfs_acl_chown_setattr(attrzp);
3341219089Spjd			ASSERT(err == 0);
3342219089Spjd		}
3343219089Spjd	}
3344219089Spjd
3345168404Spjd	if (mask & AT_MODE) {
3346219089Spjd		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
3347219089Spjd		    &new_mode, sizeof (new_mode));
3348219089Spjd		zp->z_mode = new_mode;
3349219089Spjd		ASSERT3U((uintptr_t)aclp, !=, 0);
3350209962Smm		err = zfs_aclset_common(zp, aclp, cr, tx);
3351240415Smm		ASSERT0(err);
3352219089Spjd		if (zp->z_acl_cached)
3353219089Spjd			zfs_acl_free(zp->z_acl_cached);
3354211932Smm		zp->z_acl_cached = aclp;
3355211932Smm		aclp = NULL;
3356168404Spjd	}
3357168404Spjd
3358168404Spjd
3359219089Spjd	if (mask & AT_ATIME) {
3360219089Spjd		ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
3361219089Spjd		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
3362219089Spjd		    &zp->z_atime, sizeof (zp->z_atime));
3363168404Spjd	}
3364168404Spjd
3365219089Spjd	if (mask & AT_MTIME) {
3366219089Spjd		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
3367219089Spjd		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
3368219089Spjd		    mtime, sizeof (mtime));
3369168404Spjd	}
3370168404Spjd
3371185029Spjd	/* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
3372219089Spjd	if (mask & AT_SIZE && !(mask & AT_MTIME)) {
3373219089Spjd		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
3374219089Spjd		    NULL, mtime, sizeof (mtime));
3375219089Spjd		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
3376219089Spjd		    &ctime, sizeof (ctime));
3377219089Spjd		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
3378219089Spjd		    B_TRUE);
3379219089Spjd	} else if (mask != 0) {
3380219089Spjd		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
3381219089Spjd		    &ctime, sizeof (ctime));
3382219089Spjd		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
3383219089Spjd		    B_TRUE);
3384219089Spjd		if (attrzp) {
3385219089Spjd			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
3386219089Spjd			    SA_ZPL_CTIME(zfsvfs), NULL,
3387219089Spjd			    &ctime, sizeof (ctime));
3388219089Spjd			zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
3389219089Spjd			    mtime, ctime, B_TRUE);
3390219089Spjd		}
3391219089Spjd	}
3392185029Spjd	/*
3393185029Spjd	 * Do this after setting timestamps to prevent timestamp
3394185029Spjd	 * update from toggling bit
3395185029Spjd	 */
3396168404Spjd
3397185029Spjd	if (xoap && (mask & AT_XVATTR)) {
3398209962Smm
3399316391Sasomers		if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
3400316391Sasomers			xoap->xoa_createtime = vap->va_birthtime;
3401209962Smm		/*
3402209962Smm		 * restore trimmed off masks
3403209962Smm		 * so that return masks can be set for caller.
3404209962Smm		 */
3405209962Smm
3406209962Smm		if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) {
3407209962Smm			XVA_SET_REQ(xvap, XAT_APPENDONLY);
3408209962Smm		}
3409209962Smm		if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) {
3410209962Smm			XVA_SET_REQ(xvap, XAT_NOUNLINK);
3411209962Smm		}
3412209962Smm		if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) {
3413209962Smm			XVA_SET_REQ(xvap, XAT_IMMUTABLE);
3414209962Smm		}
3415209962Smm		if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) {
3416209962Smm			XVA_SET_REQ(xvap, XAT_NODUMP);
3417209962Smm		}
3418209962Smm		if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) {
3419209962Smm			XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
3420209962Smm		}
3421209962Smm		if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) {
3422209962Smm			XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
3423209962Smm		}
3424209962Smm
3425219089Spjd		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
3426185029Spjd			ASSERT(vp->v_type == VREG);
3427185029Spjd
3428219089Spjd		zfs_xvattr_set(zp, xvap, tx);
3429185029Spjd	}
3430185029Spjd
3431209962Smm	if (fuid_dirtied)
3432209962Smm		zfs_fuid_sync(zfsvfs, tx);
3433209962Smm
3434168404Spjd	if (mask != 0)
3435185029Spjd		zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
3436168404Spjd
3437219089Spjd	if (mask & (AT_UID|AT_GID|AT_MODE))
3438219089Spjd		mutex_exit(&zp->z_acl_lock);
3439168404Spjd
3440219089Spjd	if (attrzp) {
3441219089Spjd		if (mask & (AT_UID|AT_GID|AT_MODE))
3442219089Spjd			mutex_exit(&attrzp->z_acl_lock);
3443219089Spjd	}
3444209962Smmout:
3445219089Spjd	if (err == 0 && attrzp) {
3446219089Spjd		err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
3447219089Spjd		    xattr_count, tx);
3448219089Spjd		ASSERT(err2 == 0);
3449219089Spjd	}
3450219089Spjd
3451168404Spjd	if (attrzp)
3452306818Savg		vput(ZTOV(attrzp));
3453251631Sdelphij
3454211932Smm	if (aclp)
3455209962Smm		zfs_acl_free(aclp);
3456168404Spjd
3457209962Smm	if (fuidp) {
3458209962Smm		zfs_fuid_info_free(fuidp);
3459209962Smm		fuidp = NULL;
3460209962Smm	}
3461209962Smm
3462219089Spjd	if (err) {
3463209962Smm		dmu_tx_abort(tx);
3464219089Spjd	} else {
3465219089Spjd		err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
3466209962Smm		dmu_tx_commit(tx);
3467219089Spjd	}
3468209962Smm
3469219089Spjdout2:
3470219089Spjd	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3471219089Spjd		zil_commit(zilog, 0);
3472209962Smm
3473168404Spjd	ZFS_EXIT(zfsvfs);
3474168404Spjd	return (err);
3475168404Spjd}
3476168404Spjd
3477168404Spjd/*
3478303970Savg * We acquire all but fdvp locks using non-blocking acquisitions.  If we
3479303970Savg * fail to acquire any lock in the path we will drop all held locks,
3480303970Savg * acquire the new lock in a blocking fashion, and then release it and
3481303970Savg * restart the rename.  This acquire/release step ensures that we do not
3482303970Savg * spin on a lock waiting for release.  On error release all vnode locks
3483303970Savg * and decrement references the way tmpfs_rename() would do.
3484168404Spjd */
3485303970Savgstatic int
3486303970Savgzfs_rename_relock(struct vnode *sdvp, struct vnode **svpp,
3487303970Savg    struct vnode *tdvp, struct vnode **tvpp,
3488303970Savg    const struct componentname *scnp, const struct componentname *tcnp)
3489168404Spjd{
3490303970Savg	zfsvfs_t	*zfsvfs;
3491303970Savg	struct vnode	*nvp, *svp, *tvp;
3492303970Savg	znode_t		*sdzp, *tdzp, *szp, *tzp;
3493303970Savg	const char	*snm = scnp->cn_nameptr;
3494303970Savg	const char	*tnm = tcnp->cn_nameptr;
3495303970Savg	int error;
3496168404Spjd
3497303970Savg	VOP_UNLOCK(tdvp, 0);
3498303970Savg	if (*tvpp != NULL && *tvpp != tdvp)
3499303970Savg		VOP_UNLOCK(*tvpp, 0);
3500303970Savg
3501303970Savgrelock:
3502303970Savg	error = vn_lock(sdvp, LK_EXCLUSIVE);
3503303970Savg	if (error)
3504303970Savg		goto out;
3505303970Savg	sdzp = VTOZ(sdvp);
3506303970Savg
3507303970Savg	error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
3508303970Savg	if (error != 0) {
3509303970Savg		VOP_UNLOCK(sdvp, 0);
3510303970Savg		if (error != EBUSY)
3511303970Savg			goto out;
3512303970Savg		error = vn_lock(tdvp, LK_EXCLUSIVE);
3513303970Savg		if (error)
3514303970Savg			goto out;
3515303970Savg		VOP_UNLOCK(tdvp, 0);
3516303970Savg		goto relock;
3517168404Spjd	}
3518303970Savg	tdzp = VTOZ(tdvp);
3519168404Spjd
3520303970Savg	/*
3521303970Savg	 * Before using sdzp and tdzp we must ensure that they are live.
3522303970Savg	 * As a porting legacy from illumos we have two things to worry
3523303970Savg	 * about.  One is typical for FreeBSD and it is that the vnode is
3524303970Savg	 * not reclaimed (doomed).  The other is that the znode is live.
3525303970Savg	 * The current code can invalidate the znode without acquiring the
3526303970Savg	 * corresponding vnode lock if the object represented by the znode
3527303970Savg	 * and vnode is no longer valid after a rollback or receive operation.
3528303970Savg	 * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock
3529303970Savg	 * that protects the znodes from the invalidation.
3530303970Savg	 */
3531303970Savg	zfsvfs = sdzp->z_zfsvfs;
3532303970Savg	ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs);
3533303970Savg	ZFS_ENTER(zfsvfs);
3534168404Spjd
3535168404Spjd	/*
3536303970Savg	 * We can not use ZFS_VERIFY_ZP() here because it could directly return
3537303970Savg	 * bypassing the cleanup code in the case of an error.
3538168404Spjd	 */
3539303970Savg	if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) {
3540303970Savg		ZFS_EXIT(zfsvfs);
3541303970Savg		VOP_UNLOCK(sdvp, 0);
3542303970Savg		VOP_UNLOCK(tdvp, 0);
3543303970Savg		error = SET_ERROR(EIO);
3544303970Savg		goto out;
3545303970Savg	}
3546303970Savg
3547303970Savg	/*
3548303970Savg	 * Re-resolve svp to be certain it still exists and fetch the
3549303970Savg	 * correct vnode.
3550303970Savg	 */
3551303970Savg	error = zfs_dirent_lookup(sdzp, snm, &szp, ZEXISTS);
3552303970Savg	if (error != 0) {
3553303970Savg		/* Source entry invalid or not there. */
3554303970Savg		ZFS_EXIT(zfsvfs);
3555303970Savg		VOP_UNLOCK(sdvp, 0);
3556303970Savg		VOP_UNLOCK(tdvp, 0);
3557303970Savg		if ((scnp->cn_flags & ISDOTDOT) != 0 ||
3558303970Savg		    (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.'))
3559303970Savg			error = SET_ERROR(EINVAL);
3560303970Savg		goto out;
3561303970Savg	}
3562303970Savg	svp = ZTOV(szp);
3563303970Savg
3564303970Savg	/*
3565303970Savg	 * Re-resolve tvp, if it disappeared we just carry on.
3566303970Savg	 */
3567303970Savg	error = zfs_dirent_lookup(tdzp, tnm, &tzp, 0);
3568303970Savg	if (error != 0) {
3569303970Savg		ZFS_EXIT(zfsvfs);
3570303970Savg		VOP_UNLOCK(sdvp, 0);
3571303970Savg		VOP_UNLOCK(tdvp, 0);
3572303970Savg		vrele(svp);
3573303970Savg		if ((tcnp->cn_flags & ISDOTDOT) != 0)
3574303970Savg			error = SET_ERROR(EINVAL);
3575303970Savg		goto out;
3576303970Savg	}
3577303970Savg	if (tzp != NULL)
3578303970Savg		tvp = ZTOV(tzp);
3579303970Savg	else
3580303970Savg		tvp = NULL;
3581303970Savg
3582303970Savg	/*
3583303970Savg	 * At present the vnode locks must be acquired before z_teardown_lock,
3584303970Savg	 * although it would be more logical to use the opposite order.
3585303970Savg	 */
3586303970Savg	ZFS_EXIT(zfsvfs);
3587303970Savg
3588303970Savg	/*
3589303970Savg	 * Now try acquire locks on svp and tvp.
3590303970Savg	 */
3591303970Savg	nvp = svp;
3592303970Savg	error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
3593303970Savg	if (error != 0) {
3594303970Savg		VOP_UNLOCK(sdvp, 0);
3595303970Savg		VOP_UNLOCK(tdvp, 0);
3596303970Savg		if (tvp != NULL)
3597303970Savg			vrele(tvp);
3598303970Savg		if (error != EBUSY) {
3599303970Savg			vrele(nvp);
3600303970Savg			goto out;
3601303970Savg		}
3602303970Savg		error = vn_lock(nvp, LK_EXCLUSIVE);
3603303970Savg		if (error != 0) {
3604303970Savg			vrele(nvp);
3605303970Savg			goto out;
3606303970Savg		}
3607303970Savg		VOP_UNLOCK(nvp, 0);
3608303970Savg		/*
3609303970Savg		 * Concurrent rename race.
3610303970Savg		 * XXX ?
3611303970Savg		 */
3612303970Savg		if (nvp == tdvp) {
3613303970Savg			vrele(nvp);
3614303970Savg			error = SET_ERROR(EINVAL);
3615303970Savg			goto out;
3616303970Savg		}
3617303970Savg		vrele(*svpp);
3618303970Savg		*svpp = nvp;
3619303970Savg		goto relock;
3620303970Savg	}
3621303970Savg	vrele(*svpp);
3622303970Savg	*svpp = nvp;
3623303970Savg
3624303970Savg	if (*tvpp != NULL)
3625303970Savg		vrele(*tvpp);
3626303970Savg	*tvpp = NULL;
3627303970Savg	if (tvp != NULL) {
3628303970Savg		nvp = tvp;
3629303970Savg		error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
3630303970Savg		if (error != 0) {
3631303970Savg			VOP_UNLOCK(sdvp, 0);
3632303970Savg			VOP_UNLOCK(tdvp, 0);
3633303970Savg			VOP_UNLOCK(*svpp, 0);
3634303970Savg			if (error != EBUSY) {
3635303970Savg				vrele(nvp);
3636303970Savg				goto out;
3637168404Spjd			}
3638303970Savg			error = vn_lock(nvp, LK_EXCLUSIVE);
3639303970Savg			if (error != 0) {
3640303970Savg				vrele(nvp);
3641303970Savg				goto out;
3642303970Savg			}
3643303970Savg			vput(nvp);
3644303970Savg			goto relock;
3645168404Spjd		}
3646303970Savg		*tvpp = nvp;
3647303970Savg	}
3648168404Spjd
3649303970Savg	return (0);
3650168404Spjd
3651303970Savgout:
3652303970Savg	return (error);
3653303970Savg}
3654168404Spjd
3655303970Savg/*
3656303970Savg * Note that we must use VRELE_ASYNC in this function as it walks
3657303970Savg * up the directory tree and vrele may need to acquire an exclusive
3658303970Savg * lock if a last reference to a vnode is dropped.
3659303970Savg */
3660303970Savgstatic int
3661303970Savgzfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp)
3662303970Savg{
3663303970Savg	zfsvfs_t	*zfsvfs;
3664303970Savg	znode_t		*zp, *zp1;
3665303970Savg	uint64_t	parent;
3666303970Savg	int		error;
3667168404Spjd
3668303970Savg	zfsvfs = tdzp->z_zfsvfs;
3669303970Savg	if (tdzp == szp)
3670303970Savg		return (SET_ERROR(EINVAL));
3671303970Savg	if (tdzp == sdzp)
3672303970Savg		return (0);
3673303970Savg	if (tdzp->z_id == zfsvfs->z_root)
3674303970Savg		return (0);
3675303970Savg	zp = tdzp;
3676303970Savg	for (;;) {
3677303970Savg		ASSERT(!zp->z_unlinked);
3678303970Savg		if ((error = sa_lookup(zp->z_sa_hdl,
3679303970Savg		    SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
3680303970Savg			break;
3681303970Savg
3682303970Savg		if (parent == szp->z_id) {
3683303970Savg			error = SET_ERROR(EINVAL);
3684303970Savg			break;
3685168404Spjd		}
3686303970Savg		if (parent == zfsvfs->z_root)
3687303970Savg			break;
3688303970Savg		if (parent == sdzp->z_id)
3689303970Savg			break;
3690168404Spjd
3691303970Savg		error = zfs_zget(zfsvfs, parent, &zp1);
3692303970Savg		if (error != 0)
3693303970Savg			break;
3694168404Spjd
3695303970Savg		if (zp != tdzp)
3696303970Savg			VN_RELE_ASYNC(ZTOV(zp),
3697303970Savg			    dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
3698303970Savg		zp = zp1;
3699303970Savg	}
3700303970Savg
3701303970Savg	if (error == ENOTDIR)
3702303970Savg		panic("checkpath: .. not a directory\n");
3703303970Savg	if (zp != tdzp)
3704303970Savg		VN_RELE_ASYNC(ZTOV(zp),
3705303970Savg		    dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
3706303970Savg	return (error);
3707168404Spjd}
3708168404Spjd
3709168404Spjd/*
3710168404Spjd * Move an entry from the provided source directory to the target
3711168404Spjd * directory.  Change the entry name as indicated.
3712168404Spjd *
3713168404Spjd *	IN:	sdvp	- Source directory containing the "old entry".
3714168404Spjd *		snm	- Old entry name.
3715168404Spjd *		tdvp	- Target directory to contain the "new entry".
3716168404Spjd *		tnm	- New entry name.
3717168404Spjd *		cr	- credentials of caller.
3718185029Spjd *		ct	- caller context
3719185029Spjd *		flags	- case flags
3720168404Spjd *
3721251631Sdelphij *	RETURN:	0 on success, error code on failure.
3722168404Spjd *
3723168404Spjd * Timestamps:
3724168404Spjd *	sdvp,tdvp - ctime|mtime updated
3725168404Spjd */
3726185029Spjd/*ARGSUSED*/
3727168404Spjdstatic int
3728303970Savgzfs_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3729303970Savg    vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3730303970Savg    cred_t *cr)
3731168404Spjd{
3732303970Savg	zfsvfs_t	*zfsvfs;
3733303970Savg	znode_t		*sdzp, *tdzp, *szp, *tzp;
3734303970Savg	zilog_t		*zilog = NULL;
3735168404Spjd	dmu_tx_t	*tx;
3736303970Savg	char		*snm = scnp->cn_nameptr;
3737303970Savg	char		*tnm = tcnp->cn_nameptr;
3738185029Spjd	int		error = 0;
3739168404Spjd
3740303970Savg	/* Reject renames across filesystems. */
3741303970Savg	if ((*svpp)->v_mount != tdvp->v_mount ||
3742303970Savg	    ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) {
3743303970Savg		error = SET_ERROR(EXDEV);
3744303970Savg		goto out;
3745303970Savg	}
3746168404Spjd
3747303970Savg	if (zfsctl_is_node(tdvp)) {
3748303970Savg		error = SET_ERROR(EXDEV);
3749303970Savg		goto out;
3750303970Savg	}
3751303970Savg
3752168962Spjd	/*
3753303970Savg	 * Lock all four vnodes to ensure safety and semantics of renaming.
3754168962Spjd	 */
3755303970Savg	error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp);
3756303970Savg	if (error != 0) {
3757303970Savg		/* no vnodes are locked in the case of error here */
3758303970Savg		return (error);
3759264392Sdavide	}
3760168962Spjd
3761303970Savg	tdzp = VTOZ(tdvp);
3762303970Savg	sdzp = VTOZ(sdvp);
3763303970Savg	zfsvfs = tdzp->z_zfsvfs;
3764303970Savg	zilog = zfsvfs->z_log;
3765303970Savg
3766254585Sdelphij	/*
3767303970Savg	 * After we re-enter ZFS_ENTER() we will have to revalidate all
3768303970Savg	 * znodes involved.
3769254585Sdelphij	 */
3770303970Savg	ZFS_ENTER(zfsvfs);
3771168404Spjd
3772185029Spjd	if (zfsvfs->z_utf8 && u8_validate(tnm,
3773185029Spjd	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3774303970Savg		error = SET_ERROR(EILSEQ);
3775303970Savg		goto unlockout;
3776185029Spjd	}
3777185029Spjd
3778303970Savg	/* If source and target are the same file, there is nothing to do. */
3779303970Savg	if ((*svpp) == (*tvpp)) {
3780303970Savg		error = 0;
3781303970Savg		goto unlockout;
3782303970Savg	}
3783185029Spjd
3784303970Savg	if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) ||
3785303970Savg	    ((*tvpp) != NULL && (*tvpp)->v_type == VDIR &&
3786303970Savg	    (*tvpp)->v_mountedhere != NULL)) {
3787303970Savg		error = SET_ERROR(EXDEV);
3788303970Savg		goto unlockout;
3789303970Savg	}
3790168404Spjd
3791168404Spjd	/*
3792303970Savg	 * We can not use ZFS_VERIFY_ZP() here because it could directly return
3793303970Savg	 * bypassing the cleanup code in the case of an error.
3794168404Spjd	 */
3795303970Savg	if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) {
3796303970Savg		error = SET_ERROR(EIO);
3797303970Savg		goto unlockout;
3798168404Spjd	}
3799168404Spjd
3800303970Savg	szp = VTOZ(*svpp);
3801303970Savg	tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp);
3802303970Savg	if (szp->z_sa_hdl == NULL || (tzp != NULL && tzp->z_sa_hdl == NULL)) {
3803303970Savg		error = SET_ERROR(EIO);
3804303970Savg		goto unlockout;
3805168962Spjd	}
3806185029Spjd
3807208131Smm	/*
3808303970Savg	 * This is to prevent the creation of links into attribute space
3809303970Savg	 * by renaming a linked file into/outof an attribute directory.
3810303970Savg	 * See the comment in zfs_link() for why this is considered bad.
3811208131Smm	 */
3812303970Savg	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
3813303970Savg		error = SET_ERROR(EINVAL);
3814303970Savg		goto unlockout;
3815208131Smm	}
3816208131Smm
3817168404Spjd	/*
3818168404Spjd	 * Must have write access at the source to remove the old entry
3819168404Spjd	 * and write access at the target to create the new entry.
3820168404Spjd	 * Note that if target and source are the same, this can be
3821168404Spjd	 * done in a single check.
3822168404Spjd	 */
3823168404Spjd	if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr))
3824303970Savg		goto unlockout;
3825168404Spjd
3826303970Savg	if ((*svpp)->v_type == VDIR) {
3827168404Spjd		/*
3828303970Savg		 * Avoid ".", "..", and aliases of "." for obvious reasons.
3829303970Savg		 */
3830303970Savg		if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') ||
3831303970Savg		    sdzp == szp ||
3832303970Savg		    (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
3833303970Savg			error = EINVAL;
3834303970Savg			goto unlockout;
3835303970Savg		}
3836303970Savg
3837303970Savg		/*
3838168404Spjd		 * Check to make sure rename is valid.
3839168404Spjd		 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
3840168404Spjd		 */
3841303970Savg		if (error = zfs_rename_check(szp, sdzp, tdzp))
3842303970Savg			goto unlockout;
3843168404Spjd	}
3844168404Spjd
3845168404Spjd	/*
3846168404Spjd	 * Does target exist?
3847168404Spjd	 */
3848168404Spjd	if (tzp) {
3849168404Spjd		/*
3850168404Spjd		 * Source and target must be the same type.
3851168404Spjd		 */
3852303970Savg		if ((*svpp)->v_type == VDIR) {
3853303970Savg			if ((*tvpp)->v_type != VDIR) {
3854249195Smm				error = SET_ERROR(ENOTDIR);
3855303970Savg				goto unlockout;
3856303970Savg			} else {
3857303970Savg				cache_purge(tdvp);
3858303970Savg				if (sdvp != tdvp)
3859303970Savg					cache_purge(sdvp);
3860168404Spjd			}
3861168404Spjd		} else {
3862303970Savg			if ((*tvpp)->v_type == VDIR) {
3863249195Smm				error = SET_ERROR(EISDIR);
3864303970Savg				goto unlockout;
3865168404Spjd			}
3866168404Spjd		}
3867168404Spjd	}
3868168404Spjd
3869303970Savg	vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct);
3870168962Spjd	if (tzp)
3871303970Savg		vnevent_rename_dest(*tvpp, tdvp, tnm, ct);
3872168962Spjd
3873185029Spjd	/*
3874185029Spjd	 * notify the target directory if it is not the same
3875185029Spjd	 * as source directory.
3876185029Spjd	 */
3877185029Spjd	if (tdvp != sdvp) {
3878185029Spjd		vnevent_rename_dest_dir(tdvp, ct);
3879185029Spjd	}
3880185029Spjd
3881168404Spjd	tx = dmu_tx_create(zfsvfs->z_os);
3882219089Spjd	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3883219089Spjd	dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
3884168404Spjd	dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
3885168404Spjd	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
3886219089Spjd	if (sdzp != tdzp) {
3887219089Spjd		dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
3888219089Spjd		zfs_sa_upgrade_txholds(tx, tdzp);
3889219089Spjd	}
3890219089Spjd	if (tzp) {
3891219089Spjd		dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
3892219089Spjd		zfs_sa_upgrade_txholds(tx, tzp);
3893219089Spjd	}
3894219089Spjd
3895219089Spjd	zfs_sa_upgrade_txholds(tx, szp);
3896168404Spjd	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
3897303970Savg	error = dmu_tx_assign(tx, TXG_WAIT);
3898168404Spjd	if (error) {
3899168404Spjd		dmu_tx_abort(tx);
3900303970Savg		goto unlockout;
3901168404Spjd	}
3902168404Spjd
3903303970Savg
3904168404Spjd	if (tzp)	/* Attempt to remove the existing target */
3905303970Savg		error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL);
3906168404Spjd
3907168404Spjd	if (error == 0) {
3908303970Savg		error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING);
3909168404Spjd		if (error == 0) {
3910219089Spjd			szp->z_pflags |= ZFS_AV_MODIFIED;
3911185029Spjd
3912219089Spjd			error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
3913219089Spjd			    (void *)&szp->z_pflags, sizeof (uint64_t), tx);
3914240415Smm			ASSERT0(error);
3915219089Spjd
3916303970Savg			error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING,
3917303970Savg			    NULL);
3918219089Spjd			if (error == 0) {
3919303970Savg				zfs_log_rename(zilog, tx, TX_RENAME, sdzp,
3920303970Savg				    snm, tdzp, tnm, szp);
3921185029Spjd
3922219089Spjd				/*
3923219089Spjd				 * Update path information for the target vnode
3924219089Spjd				 */
3925303970Savg				vn_renamepath(tdvp, *svpp, tnm, strlen(tnm));
3926219089Spjd			} else {
3927219089Spjd				/*
3928219089Spjd				 * At this point, we have successfully created
3929219089Spjd				 * the target name, but have failed to remove
3930219089Spjd				 * the source name.  Since the create was done
3931219089Spjd				 * with the ZRENAMING flag, there are
3932219089Spjd				 * complications; for one, the link count is
3933219089Spjd				 * wrong.  The easiest way to deal with this
3934219089Spjd				 * is to remove the newly created target, and
3935219089Spjd				 * return the original error.  This must
3936219089Spjd				 * succeed; fortunately, it is very unlikely to
3937219089Spjd				 * fail, since we just created it.
3938219089Spjd				 */
3939303970Savg				VERIFY3U(zfs_link_destroy(tdzp, tnm, szp, tx,
3940219089Spjd				    ZRENAMING, NULL), ==, 0);
3941219089Spjd			}
3942168404Spjd		}
3943168404Spjd		if (error == 0) {
3944303970Savg			cache_purge(*svpp);
3945303970Savg			if (*tvpp != NULL)
3946303970Savg				cache_purge(*tvpp);
3947303970Savg			cache_purge_negative(tdvp);
3948168404Spjd		}
3949168404Spjd	}
3950168404Spjd
3951168404Spjd	dmu_tx_commit(tx);
3952168404Spjd
3953303970Savgunlockout:			/* all 4 vnodes are locked, ZFS_ENTER called */
3954303970Savg	ZFS_EXIT(zfsvfs);
3955303970Savg	VOP_UNLOCK(*svpp, 0);
3956303970Savg	VOP_UNLOCK(sdvp, 0);
3957168404Spjd
3958303970Savgout:				/* original two vnodes are locked */
3959303970Savg	if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3960219089Spjd		zil_commit(zilog, 0);
3961219089Spjd
3962303970Savg	if (*tvpp != NULL)
3963303970Savg		VOP_UNLOCK(*tvpp, 0);
3964303970Savg	if (tdvp != *tvpp)
3965303970Savg		VOP_UNLOCK(tdvp, 0);
3966168404Spjd	return (error);
3967168404Spjd}
3968168404Spjd
3969168404Spjd/*
3970168404Spjd * Insert the indicated symbolic reference entry into the directory.
3971168404Spjd *
3972168404Spjd *	IN:	dvp	- Directory to contain new symbolic link.
3973168404Spjd *		link	- Name for new symlink entry.
3974168404Spjd *		vap	- Attributes of new entry.
3975168404Spjd *		cr	- credentials of caller.
3976185029Spjd *		ct	- caller context
3977185029Spjd *		flags	- case flags
3978168404Spjd *
3979251631Sdelphij *	RETURN:	0 on success, error code on failure.
3980168404Spjd *
3981168404Spjd * Timestamps:
3982168404Spjd *	dvp - ctime|mtime updated
3983168404Spjd */
3984185029Spjd/*ARGSUSED*/
3985168404Spjdstatic int
3986185029Spjdzfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link,
3987185029Spjd    cred_t *cr, kthread_t *td)
3988168404Spjd{
3989168404Spjd	znode_t		*zp, *dzp = VTOZ(dvp);
3990168404Spjd	dmu_tx_t	*tx;
3991168404Spjd	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
3992185029Spjd	zilog_t		*zilog;
3993219089Spjd	uint64_t	len = strlen(link);
3994168404Spjd	int		error;
3995209962Smm	zfs_acl_ids_t	acl_ids;
3996209962Smm	boolean_t	fuid_dirtied;
3997219089Spjd	uint64_t	txtype = TX_SYMLINK;
3998185029Spjd	int		flags = 0;
3999168404Spjd
4000168962Spjd	ASSERT(vap->va_type == VLNK);
4001168404Spjd
4002168404Spjd	ZFS_ENTER(zfsvfs);
4003185029Spjd	ZFS_VERIFY_ZP(dzp);
4004185029Spjd	zilog = zfsvfs->z_log;
4005185029Spjd
4006185029Spjd	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
4007185029Spjd	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
4008185029Spjd		ZFS_EXIT(zfsvfs);
4009249195Smm		return (SET_ERROR(EILSEQ));
4010185029Spjd	}
4011168404Spjd
4012168404Spjd	if (len > MAXPATHLEN) {
4013168404Spjd		ZFS_EXIT(zfsvfs);
4014249195Smm		return (SET_ERROR(ENAMETOOLONG));
4015168404Spjd	}
4016168404Spjd
4017219089Spjd	if ((error = zfs_acl_ids_create(dzp, 0,
4018219089Spjd	    vap, cr, NULL, &acl_ids)) != 0) {
4019219089Spjd		ZFS_EXIT(zfsvfs);
4020219089Spjd		return (error);
4021219089Spjd	}
4022260704Savg
4023168404Spjd	/*
4024168404Spjd	 * Attempt to lock directory; fail if entry already exists.
4025168404Spjd	 */
4026303970Savg	error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
4027185029Spjd	if (error) {
4028219089Spjd		zfs_acl_ids_free(&acl_ids);
4029168404Spjd		ZFS_EXIT(zfsvfs);
4030168404Spjd		return (error);
4031168404Spjd	}
4032168404Spjd
4033219089Spjd	if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
4034219089Spjd		zfs_acl_ids_free(&acl_ids);
4035219089Spjd		ZFS_EXIT(zfsvfs);
4036219089Spjd		return (error);
4037219089Spjd	}
4038219089Spjd
4039209962Smm	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
4040209962Smm		zfs_acl_ids_free(&acl_ids);
4041209962Smm		ZFS_EXIT(zfsvfs);
4042249195Smm		return (SET_ERROR(EDQUOT));
4043209962Smm	}
4044303970Savg
4045303970Savg	getnewvnode_reserve(1);
4046168404Spjd	tx = dmu_tx_create(zfsvfs->z_os);
4047209962Smm	fuid_dirtied = zfsvfs->z_fuid_dirty;
4048168404Spjd	dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
4049168404Spjd	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
4050219089Spjd	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
4051219089Spjd	    ZFS_SA_BASE_ATTR_SIZE + len);
4052219089Spjd	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
4053219089Spjd	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
4054219089Spjd		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
4055219089Spjd		    acl_ids.z_aclp->z_acl_bytes);
4056219089Spjd	}
4057209962Smm	if (fuid_dirtied)
4058209962Smm		zfs_fuid_txhold(zfsvfs, tx);
4059303970Savg	error = dmu_tx_assign(tx, TXG_WAIT);
4060168404Spjd	if (error) {
4061219089Spjd		zfs_acl_ids_free(&acl_ids);
4062168404Spjd		dmu_tx_abort(tx);
4063260704Savg		getnewvnode_drop_reserve();
4064168404Spjd		ZFS_EXIT(zfsvfs);
4065168404Spjd		return (error);
4066168404Spjd	}
4067168404Spjd
4068168404Spjd	/*
4069168404Spjd	 * Create a new object for the symlink.
4070219089Spjd	 * for version 4 ZPL datsets the symlink will be an SA attribute
4071168404Spjd	 */
4072219089Spjd	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
4073168404Spjd
4074219089Spjd	if (fuid_dirtied)
4075219089Spjd		zfs_fuid_sync(zfsvfs, tx);
4076209962Smm
4077219089Spjd	if (zp->z_is_sa)
4078219089Spjd		error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
4079219089Spjd		    link, len, tx);
4080219089Spjd	else
4081219089Spjd		zfs_sa_symlink(zp, link, len, tx);
4082168404Spjd
4083219089Spjd	zp->z_size = len;
4084219089Spjd	(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
4085219089Spjd	    &zp->z_size, sizeof (zp->z_size), tx);
4086168404Spjd	/*
4087168404Spjd	 * Insert the new object into the directory.
4088168404Spjd	 */
4089303970Savg	(void) zfs_link_create(dzp, name, zp, tx, ZNEW);
4090168404Spjd
4091219089Spjd	zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
4092219089Spjd	*vpp = ZTOV(zp);
4093219089Spjd
4094209962Smm	zfs_acl_ids_free(&acl_ids);
4095209962Smm
4096168404Spjd	dmu_tx_commit(tx);
4097168404Spjd
4098260704Savg	getnewvnode_drop_reserve();
4099260704Savg
4100219089Spjd	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
4101219089Spjd		zil_commit(zilog, 0);
4102219089Spjd
4103168404Spjd	ZFS_EXIT(zfsvfs);
4104168404Spjd	return (error);
4105168404Spjd}
4106168404Spjd
4107168404Spjd/*
4108168404Spjd * Return, in the buffer contained in the provided uio structure,
4109168404Spjd * the symbolic path referred to by vp.
4110168404Spjd *
4111168404Spjd *	IN:	vp	- vnode of symbolic link.
4112251631Sdelphij *		uio	- structure to contain the link path.
4113168404Spjd *		cr	- credentials of caller.
4114185029Spjd *		ct	- caller context
4115168404Spjd *
4116251631Sdelphij *	OUT:	uio	- structure containing the link path.
4117168404Spjd *
4118251631Sdelphij *	RETURN:	0 on success, error code on failure.
4119168404Spjd *
4120168404Spjd * Timestamps:
4121168404Spjd *	vp - atime updated
4122168404Spjd */
4123168404Spjd/* ARGSUSED */
4124168404Spjdstatic int
4125185029Spjdzfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct)
4126168404Spjd{
4127168404Spjd	znode_t		*zp = VTOZ(vp);
4128168404Spjd	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
4129168404Spjd	int		error;
4130168404Spjd
4131168404Spjd	ZFS_ENTER(zfsvfs);
4132185029Spjd	ZFS_VERIFY_ZP(zp);
4133168404Spjd
4134219089Spjd	if (zp->z_is_sa)
4135219089Spjd		error = sa_lookup_uio(zp->z_sa_hdl,
4136219089Spjd		    SA_ZPL_SYMLINK(zfsvfs), uio);
4137219089Spjd	else
4138219089Spjd		error = zfs_sa_readlink(zp, uio);
4139168404Spjd
4140168404Spjd	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
4141219089Spjd
4142168404Spjd	ZFS_EXIT(zfsvfs);
4143168404Spjd	return (error);
4144168404Spjd}
4145168404Spjd
4146168404Spjd/*
4147168404Spjd * Insert a new entry into directory tdvp referencing svp.
4148168404Spjd *
4149168404Spjd *	IN:	tdvp	- Directory to contain new entry.
4150168404Spjd *		svp	- vnode of new entry.
4151168404Spjd *		name	- name of new entry.
4152168404Spjd *		cr	- credentials of caller.
4153185029Spjd *		ct	- caller context
4154168404Spjd *
4155251631Sdelphij *	RETURN:	0 on success, error code on failure.
4156168404Spjd *
4157168404Spjd * Timestamps:
4158168404Spjd *	tdvp - ctime|mtime updated
4159168404Spjd *	 svp - ctime updated
4160168404Spjd */
4161168404Spjd/* ARGSUSED */
4162168404Spjdstatic int
4163185029Spjdzfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr,
4164185029Spjd    caller_context_t *ct, int flags)
4165168404Spjd{
4166168404Spjd	znode_t		*dzp = VTOZ(tdvp);
4167168404Spjd	znode_t		*tzp, *szp;
4168168404Spjd	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
4169185029Spjd	zilog_t		*zilog;
4170168404Spjd	dmu_tx_t	*tx;
4171168404Spjd	int		error;
4172212694Smm	uint64_t	parent;
4173185029Spjd	uid_t		owner;
4174168404Spjd
4175168404Spjd	ASSERT(tdvp->v_type == VDIR);
4176168404Spjd
4177168404Spjd	ZFS_ENTER(zfsvfs);
4178185029Spjd	ZFS_VERIFY_ZP(dzp);
4179185029Spjd	zilog = zfsvfs->z_log;
4180168404Spjd
4181212694Smm	/*
4182212694Smm	 * POSIX dictates that we return EPERM here.
4183212694Smm	 * Better choices include ENOTSUP or EISDIR.
4184212694Smm	 */
4185212694Smm	if (svp->v_type == VDIR) {
4186168404Spjd		ZFS_EXIT(zfsvfs);
4187249195Smm		return (SET_ERROR(EPERM));
4188212694Smm	}
4189212694Smm
4190254585Sdelphij	szp = VTOZ(svp);
4191254585Sdelphij	ZFS_VERIFY_ZP(szp);
4192254585Sdelphij
4193258597Spjd	if (szp->z_pflags & (ZFS_APPENDONLY | ZFS_IMMUTABLE | ZFS_READONLY)) {
4194258597Spjd		ZFS_EXIT(zfsvfs);
4195258597Spjd		return (SET_ERROR(EPERM));
4196258597Spjd	}
4197258597Spjd
4198212694Smm	/* Prevent links to .zfs/shares files */
4199212694Smm
4200219089Spjd	if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
4201219089Spjd	    &parent, sizeof (uint64_t))) != 0) {
4202212694Smm		ZFS_EXIT(zfsvfs);
4203219089Spjd		return (error);
4204219089Spjd	}
4205219089Spjd	if (parent == zfsvfs->z_shares_dir) {
4206219089Spjd		ZFS_EXIT(zfsvfs);
4207249195Smm		return (SET_ERROR(EPERM));
4208212694Smm	}
4209212694Smm
4210185029Spjd	if (zfsvfs->z_utf8 && u8_validate(name,
4211185029Spjd	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
4212185029Spjd		ZFS_EXIT(zfsvfs);
4213249195Smm		return (SET_ERROR(EILSEQ));
4214185029Spjd	}
4215185029Spjd
4216168404Spjd	/*
4217168404Spjd	 * We do not support links between attributes and non-attributes
4218168404Spjd	 * because of the potential security risk of creating links
4219168404Spjd	 * into "normal" file space in order to circumvent restrictions
4220168404Spjd	 * imposed in attribute space.
4221168404Spjd	 */
4222219089Spjd	if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) {
4223168404Spjd		ZFS_EXIT(zfsvfs);
4224249195Smm		return (SET_ERROR(EINVAL));
4225168404Spjd	}
4226168404Spjd
4227168404Spjd
4228219089Spjd	owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
4229219089Spjd	if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) {
4230168404Spjd		ZFS_EXIT(zfsvfs);
4231249195Smm		return (SET_ERROR(EPERM));
4232168404Spjd	}
4233168404Spjd
4234185029Spjd	if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
4235168404Spjd		ZFS_EXIT(zfsvfs);
4236168404Spjd		return (error);
4237168404Spjd	}
4238168404Spjd
4239168404Spjd	/*
4240168404Spjd	 * Attempt to lock directory; fail if entry already exists.
4241168404Spjd	 */
4242303970Savg	error = zfs_dirent_lookup(dzp, name, &tzp, ZNEW);
4243185029Spjd	if (error) {
4244168404Spjd		ZFS_EXIT(zfsvfs);
4245168404Spjd		return (error);
4246168404Spjd	}
4247168404Spjd
4248168404Spjd	tx = dmu_tx_create(zfsvfs->z_os);
4249219089Spjd	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
4250168404Spjd	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
4251219089Spjd	zfs_sa_upgrade_txholds(tx, szp);
4252219089Spjd	zfs_sa_upgrade_txholds(tx, dzp);
4253303970Savg	error = dmu_tx_assign(tx, TXG_WAIT);
4254168404Spjd	if (error) {
4255168404Spjd		dmu_tx_abort(tx);
4256168404Spjd		ZFS_EXIT(zfsvfs);
4257168404Spjd		return (error);
4258168404Spjd	}
4259168404Spjd
4260303970Savg	error = zfs_link_create(dzp, name, szp, tx, 0);
4261168404Spjd
4262185029Spjd	if (error == 0) {
4263185029Spjd		uint64_t txtype = TX_LINK;
4264185029Spjd		zfs_log_link(zilog, tx, txtype, dzp, szp, name);
4265185029Spjd	}
4266168404Spjd
4267168404Spjd	dmu_tx_commit(tx);
4268168404Spjd
4269185029Spjd	if (error == 0) {
4270185029Spjd		vnevent_link(svp, ct);
4271185029Spjd	}
4272185029Spjd
4273219089Spjd	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
4274219089Spjd		zil_commit(zilog, 0);
4275219089Spjd
4276168404Spjd	ZFS_EXIT(zfsvfs);
4277168404Spjd	return (error);
4278168404Spjd}
4279168404Spjd
4280219089Spjd
4281185029Spjd/*ARGSUSED*/
4282168962Spjdvoid
4283185029Spjdzfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
4284168404Spjd{
4285168962Spjd	znode_t	*zp = VTOZ(vp);
4286168962Spjd	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4287168962Spjd	int error;
4288168404Spjd
4289185029Spjd	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
4290219089Spjd	if (zp->z_sa_hdl == NULL) {
4291185029Spjd		/*
4292185029Spjd		 * The fs has been unmounted, or we did a
4293185029Spjd		 * suspend/resume and this file no longer exists.
4294185029Spjd		 */
4295243520Savg		rw_exit(&zfsvfs->z_teardown_inactive_lock);
4296234607Strasz		vrecycle(vp);
4297243520Savg		return;
4298243520Savg	}
4299243520Savg
4300243520Savg	if (zp->z_unlinked) {
4301243520Savg		/*
4302243520Savg		 * Fast path to recycle a vnode of a removed file.
4303243520Savg		 */
4304185029Spjd		rw_exit(&zfsvfs->z_teardown_inactive_lock);
4305243520Savg		vrecycle(vp);
4306168962Spjd		return;
4307168404Spjd	}
4308168404Spjd
4309168404Spjd	if (zp->z_atime_dirty && zp->z_unlinked == 0) {
4310168404Spjd		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
4311168404Spjd
4312219089Spjd		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4313219089Spjd		zfs_sa_upgrade_txholds(tx, zp);
4314168404Spjd		error = dmu_tx_assign(tx, TXG_WAIT);
4315168404Spjd		if (error) {
4316168404Spjd			dmu_tx_abort(tx);
4317168404Spjd		} else {
4318219089Spjd			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
4319219089Spjd			    (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
4320168404Spjd			zp->z_atime_dirty = 0;
4321168404Spjd			dmu_tx_commit(tx);
4322168404Spjd		}
4323168404Spjd	}
4324185029Spjd	rw_exit(&zfsvfs->z_teardown_inactive_lock);
4325168404Spjd}
4326168404Spjd
4327219089Spjd
4328168404SpjdCTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid));
4329168404SpjdCTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid));
4330168404Spjd
4331185029Spjd/*ARGSUSED*/
4332168404Spjdstatic int
4333185029Spjdzfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
4334168404Spjd{
4335168404Spjd	znode_t		*zp = VTOZ(vp);
4336168404Spjd	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
4337185029Spjd	uint32_t	gen;
4338219089Spjd	uint64_t	gen64;
4339168404Spjd	uint64_t	object = zp->z_id;
4340168404Spjd	zfid_short_t	*zfid;
4341219089Spjd	int		size, i, error;
4342168404Spjd
4343168404Spjd	ZFS_ENTER(zfsvfs);
4344185029Spjd	ZFS_VERIFY_ZP(zp);
4345168404Spjd
4346219089Spjd	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
4347219089Spjd	    &gen64, sizeof (uint64_t))) != 0) {
4348219089Spjd		ZFS_EXIT(zfsvfs);
4349219089Spjd		return (error);
4350219089Spjd	}
4351219089Spjd
4352219089Spjd	gen = (uint32_t)gen64;
4353219089Spjd
4354168404Spjd	size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
4355249195Smm
4356249195Smm#ifdef illumos
4357249195Smm	if (fidp->fid_len < size) {
4358249195Smm		fidp->fid_len = size;
4359249195Smm		ZFS_EXIT(zfsvfs);
4360249195Smm		return (SET_ERROR(ENOSPC));
4361249195Smm	}
4362249195Smm#else
4363168404Spjd	fidp->fid_len = size;
4364249195Smm#endif
4365168404Spjd
4366168404Spjd	zfid = (zfid_short_t *)fidp;
4367168404Spjd
4368168404Spjd	zfid->zf_len = size;
4369168404Spjd
4370168404Spjd	for (i = 0; i < sizeof (zfid->zf_object); i++)
4371168404Spjd		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
4372168404Spjd
4373168404Spjd	/* Must have a non-zero generation number to distinguish from .zfs */
4374168404Spjd	if (gen == 0)
4375168404Spjd		gen = 1;
4376168404Spjd	for (i = 0; i < sizeof (zfid->zf_gen); i++)
4377168404Spjd		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
4378168404Spjd
4379168404Spjd	if (size == LONG_FID_LEN) {
4380168404Spjd		uint64_t	objsetid = dmu_objset_id(zfsvfs->z_os);
4381169023Spjd		zfid_long_t	*zlfid;
4382168404Spjd
4383168404Spjd		zlfid = (zfid_long_t *)fidp;
4384168404Spjd
4385168404Spjd		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
4386168404Spjd			zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
4387168404Spjd
4388168404Spjd		/* XXX - this should be the generation number for the objset */
4389168404Spjd		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
4390168404Spjd			zlfid->zf_setgen[i] = 0;
4391168404Spjd	}
4392168404Spjd
4393168404Spjd	ZFS_EXIT(zfsvfs);
4394168404Spjd	return (0);
4395168404Spjd}
4396168404Spjd
4397168404Spjdstatic int
4398185029Spjdzfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
4399185029Spjd    caller_context_t *ct)
4400168404Spjd{
4401168404Spjd	znode_t		*zp, *xzp;
4402168404Spjd	zfsvfs_t	*zfsvfs;
4403168404Spjd	int		error;
4404168404Spjd
4405168404Spjd	switch (cmd) {
4406168404Spjd	case _PC_LINK_MAX:
4407168404Spjd		*valp = INT_MAX;
4408168404Spjd		return (0);
4409168404Spjd
4410168404Spjd	case _PC_FILESIZEBITS:
4411168404Spjd		*valp = 64;
4412168404Spjd		return (0);
4413277300Ssmh#ifdef illumos
4414168404Spjd	case _PC_XATTR_EXISTS:
4415168404Spjd		zp = VTOZ(vp);
4416168404Spjd		zfsvfs = zp->z_zfsvfs;
4417168404Spjd		ZFS_ENTER(zfsvfs);
4418185029Spjd		ZFS_VERIFY_ZP(zp);
4419168404Spjd		*valp = 0;
4420303970Savg		error = zfs_dirent_lookup(zp, "", &xzp,
4421303970Savg		    ZXATTR | ZEXISTS | ZSHARED);
4422168404Spjd		if (error == 0) {
4423168404Spjd			if (!zfs_dirempty(xzp))
4424168404Spjd				*valp = 1;
4425303970Savg			vrele(ZTOV(xzp));
4426168404Spjd		} else if (error == ENOENT) {
4427168404Spjd			/*
4428168404Spjd			 * If there aren't extended attributes, it's the
4429168404Spjd			 * same as having zero of them.
4430168404Spjd			 */
4431168404Spjd			error = 0;
4432168404Spjd		}
4433168404Spjd		ZFS_EXIT(zfsvfs);
4434168404Spjd		return (error);
4435168404Spjd
4436219089Spjd	case _PC_SATTR_ENABLED:
4437219089Spjd	case _PC_SATTR_EXISTS:
4438219089Spjd		*valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
4439219089Spjd		    (vp->v_type == VREG || vp->v_type == VDIR);
4440219089Spjd		return (0);
4441219089Spjd
4442219089Spjd	case _PC_ACCESS_FILTERING:
4443219089Spjd		*valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) &&
4444219089Spjd		    vp->v_type == VDIR;
4445219089Spjd		return (0);
4446219089Spjd
4447219089Spjd	case _PC_ACL_ENABLED:
4448219089Spjd		*valp = _ACL_ACE_ENABLED;
4449219089Spjd		return (0);
4450277300Ssmh#endif	/* illumos */
4451219089Spjd	case _PC_MIN_HOLE_SIZE:
4452219089Spjd		*valp = (int)SPA_MINBLOCKSIZE;
4453219089Spjd		return (0);
4454277300Ssmh#ifdef illumos
4455219089Spjd	case _PC_TIMESTAMP_RESOLUTION:
4456219089Spjd		/* nanosecond timestamp resolution */
4457219089Spjd		*valp = 1L;
4458219089Spjd		return (0);
4459277300Ssmh#endif
4460168404Spjd	case _PC_ACL_EXTENDED:
4461196949Strasz		*valp = 0;
4462168404Spjd		return (0);
4463168404Spjd
4464196949Strasz	case _PC_ACL_NFS4:
4465196949Strasz		*valp = 1;
4466196949Strasz		return (0);
4467196949Strasz
4468196949Strasz	case _PC_ACL_PATH_MAX:
4469196949Strasz		*valp = ACL_MAX_ENTRIES;
4470196949Strasz		return (0);
4471196949Strasz
4472168404Spjd	default:
4473168962Spjd		return (EOPNOTSUPP);
4474168404Spjd	}
4475168404Spjd}
4476168404Spjd
4477168404Spjd/*ARGSUSED*/
4478168404Spjdstatic int
4479185029Spjdzfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr,
4480185029Spjd    caller_context_t *ct)
4481168404Spjd{
4482168404Spjd	znode_t *zp = VTOZ(vp);
4483168404Spjd	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4484168404Spjd	int error;
4485185029Spjd	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
4486168404Spjd
4487168404Spjd	ZFS_ENTER(zfsvfs);
4488185029Spjd	ZFS_VERIFY_ZP(zp);
4489185029Spjd	error = zfs_getacl(zp, vsecp, skipaclchk, cr);
4490168404Spjd	ZFS_EXIT(zfsvfs);
4491168404Spjd
4492168404Spjd	return (error);
4493168404Spjd}
4494168404Spjd
4495168404Spjd/*ARGSUSED*/
4496228685Spjdint
4497185029Spjdzfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr,
4498185029Spjd    caller_context_t *ct)
4499168404Spjd{
4500168404Spjd	znode_t *zp = VTOZ(vp);
4501168404Spjd	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4502168404Spjd	int error;
4503185029Spjd	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
4504219089Spjd	zilog_t	*zilog = zfsvfs->z_log;
4505168404Spjd
4506168404Spjd	ZFS_ENTER(zfsvfs);
4507185029Spjd	ZFS_VERIFY_ZP(zp);
4508219089Spjd
4509185029Spjd	error = zfs_setacl(zp, vsecp, skipaclchk, cr);
4510219089Spjd
4511219089Spjd	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
4512219089Spjd		zil_commit(zilog, 0);
4513219089Spjd
4514168404Spjd	ZFS_EXIT(zfsvfs);
4515168404Spjd	return (error);
4516168404Spjd}
4517168404Spjd
4518168962Spjdstatic int
4519330991Savgzfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
4520292373Sglebius    int *rahead)
4521213937Savg{
4522213937Savg	znode_t *zp = VTOZ(vp);
4523213937Savg	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4524213937Savg	objset_t *os = zp->z_zfsvfs->z_os;
4525330991Savg	rl_t *rl;
4526213937Savg	vm_object_t object;
4527330991Savg	off_t start, end, obj_size;
4528330991Savg	uint_t blksz;
4529330991Savg	int pgsin_b, pgsin_a;
4530330991Savg	int error;
4531213937Savg
4532213937Savg	ZFS_ENTER(zfsvfs);
4533213937Savg	ZFS_VERIFY_ZP(zp);
4534213937Savg
4535330991Savg	start = IDX_TO_OFF(ma[0]->pindex);
4536330991Savg	end = IDX_TO_OFF(ma[count - 1]->pindex + 1);
4537330991Savg
4538330991Savg	/*
4539330991Savg	 * Lock a range covering all required and optional pages.
4540330991Savg	 * Note that we need to handle the case of the block size growing.
4541330991Savg	 */
4542330991Savg	for (;;) {
4543330991Savg		blksz = zp->z_blksz;
4544330991Savg		rl = zfs_range_lock(zp, rounddown(start, blksz),
4545330991Savg		    roundup(end, blksz) - rounddown(start, blksz), RL_READER);
4546330991Savg		if (blksz == zp->z_blksz)
4547330991Savg			break;
4548330991Savg		zfs_range_unlock(rl);
4549213937Savg	}
4550213937Savg
4551330991Savg	object = ma[0]->object;
4552330991Savg	zfs_vmobject_wlock(object);
4553330991Savg	obj_size = object->un_pager.vnp.vnp_size;
4554330991Savg	zfs_vmobject_wunlock(object);
4555330991Savg	if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
4556330991Savg		zfs_range_unlock(rl);
4557213937Savg		ZFS_EXIT(zfsvfs);
4558248084Sattilio		return (zfs_vm_pagerret_bad);
4559213937Savg	}
4560213937Savg
4561330991Savg	pgsin_b = 0;
4562330991Savg	if (rbehind != NULL) {
4563330991Savg		pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz));
4564330991Savg		pgsin_b = MIN(*rbehind, pgsin_b);
4565330991Savg	}
4566292373Sglebius
4567330991Savg	pgsin_a = 0;
4568330991Savg	if (rahead != NULL) {
4569330991Savg		pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end);
4570330991Savg		if (end + IDX_TO_OFF(pgsin_a) >= obj_size)
4571330991Savg			pgsin_a = OFF_TO_IDX(round_page(obj_size) - end);
4572330991Savg		pgsin_a = MIN(*rahead, pgsin_a);
4573243517Savg	}
4574243517Savg
4575330991Savg	/*
4576330991Savg	 * NB: we need to pass the exact byte size of the data that we expect
4577330991Savg	 * to read after accounting for the file size.  This is required because
4578330991Savg	 * ZFS will panic if we request DMU to read beyond the end of the last
4579330991Savg	 * allocated block.
4580330991Savg	 */
4581330991Savg	error = dmu_read_pages(os, zp->z_id, ma, count, &pgsin_b, &pgsin_a,
4582330991Savg	    MIN(end, obj_size) - (end - PAGE_SIZE));
4583213937Savg
4584330991Savg	zfs_range_unlock(rl);
4585213937Savg	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
4586213937Savg	ZFS_EXIT(zfsvfs);
4587330991Savg
4588330991Savg	if (error != 0)
4589292386Sglebius		return (zfs_vm_pagerret_error);
4590330991Savg
4591330991Savg	PCPU_INC(cnt.v_vnodein);
4592330991Savg	PCPU_ADD(cnt.v_vnodepgsin, count + pgsin_b + pgsin_a);
4593330991Savg	if (rbehind != NULL)
4594330991Savg		*rbehind = pgsin_b;
4595330991Savg	if (rahead != NULL)
4596330991Savg		*rahead = pgsin_a;
4597330991Savg	return (zfs_vm_pagerret_ok);
4598213937Savg}
4599213937Savg
4600213937Savgstatic int
4601213937Savgzfs_freebsd_getpages(ap)
4602213937Savg	struct vop_getpages_args /* {
4603213937Savg		struct vnode *a_vp;
4604213937Savg		vm_page_t *a_m;
4605213937Savg		int a_count;
4606292373Sglebius		int *a_rbehind;
4607292373Sglebius		int *a_rahead;
4608213937Savg	} */ *ap;
4609213937Savg{
4610213937Savg
4611292373Sglebius	return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
4612292373Sglebius	    ap->a_rahead));
4613213937Savg}
4614213937Savg
4615213937Savgstatic int
4616258746Savgzfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
4617258746Savg    int *rtvals)
4618258746Savg{
4619258746Savg	znode_t		*zp = VTOZ(vp);
4620258746Savg	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
4621258746Savg	rl_t		*rl;
4622258746Savg	dmu_tx_t	*tx;
4623258746Savg	struct sf_buf	*sf;
4624258746Savg	vm_object_t	object;
4625258746Savg	vm_page_t	m;
4626258746Savg	caddr_t		va;
4627258746Savg	size_t		tocopy;
4628258746Savg	size_t		lo_len;
4629258746Savg	vm_ooffset_t	lo_off;
4630258746Savg	vm_ooffset_t	off;
4631258746Savg	uint_t		blksz;
4632258746Savg	int		ncount;
4633258746Savg	int		pcount;
4634258746Savg	int		err;
4635258746Savg	int		i;
4636258746Savg
4637258746Savg	ZFS_ENTER(zfsvfs);
4638258746Savg	ZFS_VERIFY_ZP(zp);
4639258746Savg
4640258746Savg	object = vp->v_object;
4641258746Savg	pcount = btoc(len);
4642258746Savg	ncount = pcount;
4643258746Savg
4644258746Savg	KASSERT(ma[0]->object == object, ("mismatching object"));
4645258746Savg	KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length"));
4646258746Savg
4647258746Savg	for (i = 0; i < pcount; i++)
4648258746Savg		rtvals[i] = zfs_vm_pagerret_error;
4649258746Savg
4650258746Savg	off = IDX_TO_OFF(ma[0]->pindex);
4651258746Savg	blksz = zp->z_blksz;
4652258746Savg	lo_off = rounddown(off, blksz);
4653258746Savg	lo_len = roundup(len + (off - lo_off), blksz);
4654258746Savg	rl = zfs_range_lock(zp, lo_off, lo_len, RL_WRITER);
4655258746Savg
4656258746Savg	zfs_vmobject_wlock(object);
4657258746Savg	if (len + off > object->un_pager.vnp.vnp_size) {
4658258746Savg		if (object->un_pager.vnp.vnp_size > off) {
4659258746Savg			int pgoff;
4660258746Savg
4661258746Savg			len = object->un_pager.vnp.vnp_size - off;
4662258746Savg			ncount = btoc(len);
4663258746Savg			if ((pgoff = (int)len & PAGE_MASK) != 0) {
4664258746Savg				/*
4665258746Savg				 * If the object is locked and the following
4666258746Savg				 * conditions hold, then the page's dirty
4667258746Savg				 * field cannot be concurrently changed by a
4668258746Savg				 * pmap operation.
4669258746Savg				 */
4670258746Savg				m = ma[ncount - 1];
4671258746Savg				vm_page_assert_sbusied(m);
4672258746Savg				KASSERT(!pmap_page_is_write_mapped(m),
4673258746Savg				    ("zfs_putpages: page %p is not read-only", m));
4674258746Savg				vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
4675258746Savg				    pgoff);
4676258746Savg			}
4677258746Savg		} else {
4678258746Savg			len = 0;
4679258746Savg			ncount = 0;
4680258746Savg		}
4681258746Savg		if (ncount < pcount) {
4682258746Savg			for (i = ncount; i < pcount; i++) {
4683258746Savg				rtvals[i] = zfs_vm_pagerret_bad;
4684258746Savg			}
4685258746Savg		}
4686258746Savg	}
4687258746Savg	zfs_vmobject_wunlock(object);
4688258746Savg
4689258746Savg	if (ncount == 0)
4690258746Savg		goto out;
4691258746Savg
4692258746Savg	if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
4693258746Savg	    zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
4694258746Savg		goto out;
4695258746Savg	}
4696258746Savg
4697258746Savg	tx = dmu_tx_create(zfsvfs->z_os);
4698258746Savg	dmu_tx_hold_write(tx, zp->z_id, off, len);
4699258746Savg
4700258746Savg	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4701258746Savg	zfs_sa_upgrade_txholds(tx, zp);
4702316847Savg	err = dmu_tx_assign(tx, TXG_WAIT);
4703258746Savg	if (err != 0) {
4704258746Savg		dmu_tx_abort(tx);
4705258746Savg		goto out;
4706258746Savg	}
4707258746Savg
4708258746Savg	if (zp->z_blksz < PAGE_SIZE) {
4709258746Savg		for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) {
4710258746Savg			tocopy = len > PAGE_SIZE ? PAGE_SIZE : len;
4711258746Savg			va = zfs_map_page(ma[i], &sf);
4712258746Savg			dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx);
4713258746Savg			zfs_unmap_page(sf);
4714258746Savg		}
4715258746Savg	} else {
4716258746Savg		err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx);
4717258746Savg	}
4718258746Savg
4719258746Savg	if (err == 0) {
4720258746Savg		uint64_t mtime[2], ctime[2];
4721258746Savg		sa_bulk_attr_t bulk[3];
4722258746Savg		int count = 0;
4723258746Savg
4724258746Savg		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
4725258746Savg		    &mtime, 16);
4726258746Savg		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
4727258746Savg		    &ctime, 16);
4728258746Savg		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
4729258746Savg		    &zp->z_pflags, 8);
4730258746Savg		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
4731258746Savg		    B_TRUE);
4732321561Smav		err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
4733321561Smav		ASSERT0(err);
4734258746Savg		zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0);
4735258746Savg
4736258746Savg		zfs_vmobject_wlock(object);
4737258746Savg		for (i = 0; i < ncount; i++) {
4738258746Savg			rtvals[i] = zfs_vm_pagerret_ok;
4739258746Savg			vm_page_undirty(ma[i]);
4740258746Savg		}
4741258746Savg		zfs_vmobject_wunlock(object);
4742258746Savg		PCPU_INC(cnt.v_vnodeout);
4743258746Savg		PCPU_ADD(cnt.v_vnodepgsout, ncount);
4744258746Savg	}
4745258746Savg	dmu_tx_commit(tx);
4746258746Savg
4747258746Savgout:
4748258746Savg	zfs_range_unlock(rl);
4749258746Savg	if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 ||
4750258746Savg	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
4751258746Savg		zil_commit(zfsvfs->z_log, zp->z_id);
4752258746Savg	ZFS_EXIT(zfsvfs);
4753258746Savg	return (rtvals[0]);
4754258746Savg}
4755258746Savg
4756258746Savgint
4757258746Savgzfs_freebsd_putpages(ap)
4758258746Savg	struct vop_putpages_args /* {
4759258746Savg		struct vnode *a_vp;
4760258746Savg		vm_page_t *a_m;
4761258746Savg		int a_count;
4762258746Savg		int a_sync;
4763258746Savg		int *a_rtvals;
4764258746Savg	} */ *ap;
4765258746Savg{
4766258746Savg
4767258746Savg	return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync,
4768258746Savg	    ap->a_rtvals));
4769258746Savg}
4770258746Savg
4771258746Savgstatic int
4772243518Savgzfs_freebsd_bmap(ap)
4773243518Savg	struct vop_bmap_args /* {
4774243518Savg		struct vnode *a_vp;
4775243518Savg		daddr_t  a_bn;
4776243518Savg		struct bufobj **a_bop;
4777243518Savg		daddr_t *a_bnp;
4778243518Savg		int *a_runp;
4779243518Savg		int *a_runb;
4780243518Savg	} */ *ap;
4781243518Savg{
4782243518Savg
4783243518Savg	if (ap->a_bop != NULL)
4784243518Savg		*ap->a_bop = &ap->a_vp->v_bufobj;
4785243518Savg	if (ap->a_bnp != NULL)
4786243518Savg		*ap->a_bnp = ap->a_bn;
4787243518Savg	if (ap->a_runp != NULL)
4788243518Savg		*ap->a_runp = 0;
4789243518Savg	if (ap->a_runb != NULL)
4790243518Savg		*ap->a_runb = 0;
4791243518Savg
4792243518Savg	return (0);
4793243518Savg}
4794243518Savg
4795243518Savgstatic int
4796168962Spjdzfs_freebsd_open(ap)
4797168962Spjd	struct vop_open_args /* {
4798168962Spjd		struct vnode *a_vp;
4799168962Spjd		int a_mode;
4800168962Spjd		struct ucred *a_cred;
4801168962Spjd		struct thread *a_td;
4802168962Spjd	} */ *ap;
4803168962Spjd{
4804168962Spjd	vnode_t	*vp = ap->a_vp;
4805168962Spjd	znode_t *zp = VTOZ(vp);
4806168962Spjd	int error;
4807168962Spjd
4808185029Spjd	error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL);
4809168962Spjd	if (error == 0)
4810219089Spjd		vnode_create_vobject(vp, zp->z_size, ap->a_td);
4811168962Spjd	return (error);
4812168962Spjd}
4813168962Spjd
4814168962Spjdstatic int
4815168962Spjdzfs_freebsd_close(ap)
4816168962Spjd	struct vop_close_args /* {
4817168962Spjd		struct vnode *a_vp;
4818168962Spjd		int  a_fflag;
4819168962Spjd		struct ucred *a_cred;
4820168962Spjd		struct thread *a_td;
4821168962Spjd	} */ *ap;
4822168962Spjd{
4823168962Spjd
4824242566Savg	return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred, NULL));
4825168962Spjd}
4826168962Spjd
4827168962Spjdstatic int
4828168962Spjdzfs_freebsd_ioctl(ap)
4829168962Spjd	struct vop_ioctl_args /* {
4830168962Spjd		struct vnode *a_vp;
4831168962Spjd		u_long a_command;
4832168962Spjd		caddr_t a_data;
4833168962Spjd		int a_fflag;
4834168962Spjd		struct ucred *cred;
4835168962Spjd		struct thread *td;
4836168962Spjd	} */ *ap;
4837168962Spjd{
4838168962Spjd
4839168978Spjd	return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data,
4840185029Spjd	    ap->a_fflag, ap->a_cred, NULL, NULL));
4841168962Spjd}
4842168962Spjd
4843168962Spjdstatic int
4844330062Savgioflags(int ioflags)
4845330062Savg{
4846330062Savg	int flags = 0;
4847330062Savg
4848330062Savg	if (ioflags & IO_APPEND)
4849330062Savg		flags |= FAPPEND;
4850330062Savg	if (ioflags & IO_NDELAY)
4851330062Savg		flags |= FNONBLOCK;
4852330062Savg	if (ioflags & IO_SYNC)
4853330062Savg		flags |= (FSYNC | FDSYNC | FRSYNC);
4854330062Savg
4855330062Savg	return (flags);
4856330062Savg}
4857330062Savg
4858330062Savgstatic int
4859168962Spjdzfs_freebsd_read(ap)
4860168962Spjd	struct vop_read_args /* {
4861168962Spjd		struct vnode *a_vp;
4862168962Spjd		struct uio *a_uio;
4863168962Spjd		int a_ioflag;
4864168962Spjd		struct ucred *a_cred;
4865168962Spjd	} */ *ap;
4866168962Spjd{
4867168962Spjd
4868213673Spjd	return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag),
4869213673Spjd	    ap->a_cred, NULL));
4870168962Spjd}
4871168962Spjd
4872168962Spjdstatic int
4873168962Spjdzfs_freebsd_write(ap)
4874168962Spjd	struct vop_write_args /* {
4875168962Spjd		struct vnode *a_vp;
4876168962Spjd		struct uio *a_uio;
4877168962Spjd		int a_ioflag;
4878168962Spjd		struct ucred *a_cred;
4879168962Spjd	} */ *ap;
4880168962Spjd{
4881168962Spjd
4882213673Spjd	return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag),
4883213673Spjd	    ap->a_cred, NULL));
4884168962Spjd}
4885168962Spjd
4886168962Spjdstatic int
4887168962Spjdzfs_freebsd_access(ap)
4888168962Spjd	struct vop_access_args /* {
4889168962Spjd		struct vnode *a_vp;
4890192689Strasz		accmode_t a_accmode;
4891168962Spjd		struct ucred *a_cred;
4892168962Spjd		struct thread *a_td;
4893168962Spjd	} */ *ap;
4894168962Spjd{
4895212002Sjh	vnode_t *vp = ap->a_vp;
4896212002Sjh	znode_t *zp = VTOZ(vp);
4897198703Spjd	accmode_t accmode;
4898198703Spjd	int error = 0;
4899168962Spjd
4900185172Spjd	/*
4901198703Spjd	 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND,
4902185172Spjd	 */
4903198703Spjd	accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND);
4904198703Spjd	if (accmode != 0)
4905198703Spjd		error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL);
4906185172Spjd
4907198703Spjd	/*
4908198703Spjd	 * VADMIN has to be handled by vaccess().
4909198703Spjd	 */
4910198703Spjd	if (error == 0) {
4911198703Spjd		accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND);
4912198703Spjd		if (accmode != 0) {
4913219089Spjd			error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
4914219089Spjd			    zp->z_gid, accmode, ap->a_cred, NULL);
4915198703Spjd		}
4916185172Spjd	}
4917185172Spjd
4918212002Sjh	/*
4919212002Sjh	 * For VEXEC, ensure that at least one execute bit is set for
4920212002Sjh	 * non-directories.
4921212002Sjh	 */
4922212002Sjh	if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR &&
4923219089Spjd	    (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
4924212002Sjh		error = EACCES;
4925219089Spjd	}
4926212002Sjh
4927198703Spjd	return (error);
4928168962Spjd}
4929168962Spjd
4930168962Spjdstatic int
4931168962Spjdzfs_freebsd_lookup(ap)
4932168962Spjd	struct vop_lookup_args /* {
4933168962Spjd		struct vnode *a_dvp;
4934168962Spjd		struct vnode **a_vpp;
4935168962Spjd		struct componentname *a_cnp;
4936168962Spjd	} */ *ap;
4937168962Spjd{
4938168962Spjd	struct componentname *cnp = ap->a_cnp;
4939168962Spjd	char nm[NAME_MAX + 1];
4940168962Spjd
4941168962Spjd	ASSERT(cnp->cn_namelen < sizeof(nm));
4942168962Spjd	strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm)));
4943168962Spjd
4944168962Spjd	return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop,
4945185029Spjd	    cnp->cn_cred, cnp->cn_thread, 0));
4946168962Spjd}
4947168962Spjd
4948168962Spjdstatic int
4949303970Savgzfs_cache_lookup(ap)
4950303970Savg	struct vop_lookup_args /* {
4951303970Savg		struct vnode *a_dvp;
4952303970Savg		struct vnode **a_vpp;
4953303970Savg		struct componentname *a_cnp;
4954303970Savg	} */ *ap;
4955303970Savg{
4956303970Savg	zfsvfs_t *zfsvfs;
4957303970Savg
4958303970Savg	zfsvfs = ap->a_dvp->v_mount->mnt_data;
4959303970Savg	if (zfsvfs->z_use_namecache)
4960303970Savg		return (vfs_cache_lookup(ap));
4961303970Savg	else
4962303970Savg		return (zfs_freebsd_lookup(ap));
4963303970Savg}
4964303970Savg
4965303970Savgstatic int
4966168962Spjdzfs_freebsd_create(ap)
4967168962Spjd	struct vop_create_args /* {
4968168962Spjd		struct vnode *a_dvp;
4969168962Spjd		struct vnode **a_vpp;
4970168962Spjd		struct componentname *a_cnp;
4971168962Spjd		struct vattr *a_vap;
4972168962Spjd	} */ *ap;
4973168962Spjd{
4974303970Savg	zfsvfs_t *zfsvfs;
4975168962Spjd	struct componentname *cnp = ap->a_cnp;
4976168962Spjd	vattr_t *vap = ap->a_vap;
4977276007Skib	int error, mode;
4978168962Spjd
4979168962Spjd	ASSERT(cnp->cn_flags & SAVENAME);
4980168962Spjd
4981168962Spjd	vattr_init_mask(vap);
4982168962Spjd	mode = vap->va_mode & ALLPERMS;
4983303970Savg	zfsvfs = ap->a_dvp->v_mount->mnt_data;
4984168962Spjd
4985276007Skib	error = zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode,
4986276007Skib	    ap->a_vpp, cnp->cn_cred, cnp->cn_thread);
4987303970Savg	if (zfsvfs->z_use_namecache &&
4988303970Savg	    error == 0 && (cnp->cn_flags & MAKEENTRY) != 0)
4989276007Skib		cache_enter(ap->a_dvp, *ap->a_vpp, cnp);
4990276007Skib	return (error);
4991168962Spjd}
4992168962Spjd
4993168962Spjdstatic int
4994168962Spjdzfs_freebsd_remove(ap)
4995168962Spjd	struct vop_remove_args /* {
4996168962Spjd		struct vnode *a_dvp;
4997168962Spjd		struct vnode *a_vp;
4998168962Spjd		struct componentname *a_cnp;
4999168962Spjd	} */ *ap;
5000168962Spjd{
5001168962Spjd
5002168962Spjd	ASSERT(ap->a_cnp->cn_flags & SAVENAME);
5003168962Spjd
5004303970Savg	return (zfs_remove(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr,
5005303970Savg	    ap->a_cnp->cn_cred));
5006168962Spjd}
5007168962Spjd
5008168962Spjdstatic int
5009168962Spjdzfs_freebsd_mkdir(ap)
5010168962Spjd	struct vop_mkdir_args /* {
5011168962Spjd		struct vnode *a_dvp;
5012168962Spjd		struct vnode **a_vpp;
5013168962Spjd		struct componentname *a_cnp;
5014168962Spjd		struct vattr *a_vap;
5015168962Spjd	} */ *ap;
5016168962Spjd{
5017168962Spjd	vattr_t *vap = ap->a_vap;
5018168962Spjd
5019168962Spjd	ASSERT(ap->a_cnp->cn_flags & SAVENAME);
5020168962Spjd
5021168962Spjd	vattr_init_mask(vap);
5022168962Spjd
5023168962Spjd	return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp,
5024303970Savg	    ap->a_cnp->cn_cred));
5025168962Spjd}
5026168962Spjd
5027168962Spjdstatic int
5028168962Spjdzfs_freebsd_rmdir(ap)
5029168962Spjd	struct vop_rmdir_args /* {
5030168962Spjd		struct vnode *a_dvp;
5031168962Spjd		struct vnode *a_vp;
5032168962Spjd		struct componentname *a_cnp;
5033168962Spjd	} */ *ap;
5034168962Spjd{
5035168962Spjd	struct componentname *cnp = ap->a_cnp;
5036168962Spjd
5037168962Spjd	ASSERT(cnp->cn_flags & SAVENAME);
5038168962Spjd
5039303970Savg	return (zfs_rmdir(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred));
5040168962Spjd}
5041168962Spjd
5042168962Spjdstatic int
5043168962Spjdzfs_freebsd_readdir(ap)
5044168962Spjd	struct vop_readdir_args /* {
5045168962Spjd		struct vnode *a_vp;
5046168962Spjd		struct uio *a_uio;
5047168962Spjd		struct ucred *a_cred;
5048168962Spjd		int *a_eofflag;
5049168962Spjd		int *a_ncookies;
5050168962Spjd		u_long **a_cookies;
5051168962Spjd	} */ *ap;
5052168962Spjd{
5053168962Spjd
5054168962Spjd	return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag,
5055168962Spjd	    ap->a_ncookies, ap->a_cookies));
5056168962Spjd}
5057168962Spjd
5058168962Spjdstatic int
5059168962Spjdzfs_freebsd_fsync(ap)
5060168962Spjd	struct vop_fsync_args /* {
5061168962Spjd		struct vnode *a_vp;
5062168962Spjd		int a_waitfor;
5063168962Spjd		struct thread *a_td;
5064168962Spjd	} */ *ap;
5065168962Spjd{
5066168962Spjd
5067168962Spjd	vop_stdfsync(ap);
5068185029Spjd	return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL));
5069168962Spjd}
5070168962Spjd
5071168962Spjdstatic int
5072168962Spjdzfs_freebsd_getattr(ap)
5073168962Spjd	struct vop_getattr_args /* {
5074168962Spjd		struct vnode *a_vp;
5075168962Spjd		struct vattr *a_vap;
5076168962Spjd		struct ucred *a_cred;
5077168962Spjd	} */ *ap;
5078168962Spjd{
5079185029Spjd	vattr_t *vap = ap->a_vap;
5080185029Spjd	xvattr_t xvap;
5081185029Spjd	u_long fflags = 0;
5082185029Spjd	int error;
5083168962Spjd
5084185029Spjd	xva_init(&xvap);
5085185029Spjd	xvap.xva_vattr = *vap;
5086185029Spjd	xvap.xva_vattr.va_mask |= AT_XVATTR;
5087185029Spjd
5088185029Spjd	/* Convert chflags into ZFS-type flags. */
5089185029Spjd	/* XXX: what about SF_SETTABLE?. */
5090185029Spjd	XVA_SET_REQ(&xvap, XAT_IMMUTABLE);
5091185029Spjd	XVA_SET_REQ(&xvap, XAT_APPENDONLY);
5092185029Spjd	XVA_SET_REQ(&xvap, XAT_NOUNLINK);
5093185029Spjd	XVA_SET_REQ(&xvap, XAT_NODUMP);
5094254627Sken	XVA_SET_REQ(&xvap, XAT_READONLY);
5095254627Sken	XVA_SET_REQ(&xvap, XAT_ARCHIVE);
5096254627Sken	XVA_SET_REQ(&xvap, XAT_SYSTEM);
5097254627Sken	XVA_SET_REQ(&xvap, XAT_HIDDEN);
5098254627Sken	XVA_SET_REQ(&xvap, XAT_REPARSE);
5099254627Sken	XVA_SET_REQ(&xvap, XAT_OFFLINE);
5100254627Sken	XVA_SET_REQ(&xvap, XAT_SPARSE);
5101254627Sken
5102185029Spjd	error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL);
5103185029Spjd	if (error != 0)
5104185029Spjd		return (error);
5105185029Spjd
5106185029Spjd	/* Convert ZFS xattr into chflags. */
5107185029Spjd#define	FLAG_CHECK(fflag, xflag, xfield)	do {			\
5108185029Spjd	if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0)		\
5109185029Spjd		fflags |= (fflag);					\
5110185029Spjd} while (0)
5111185029Spjd	FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE,
5112185029Spjd	    xvap.xva_xoptattrs.xoa_immutable);
5113185029Spjd	FLAG_CHECK(SF_APPEND, XAT_APPENDONLY,
5114185029Spjd	    xvap.xva_xoptattrs.xoa_appendonly);
5115185029Spjd	FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK,
5116185029Spjd	    xvap.xva_xoptattrs.xoa_nounlink);
5117254627Sken	FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE,
5118254627Sken	    xvap.xva_xoptattrs.xoa_archive);
5119185029Spjd	FLAG_CHECK(UF_NODUMP, XAT_NODUMP,
5120185029Spjd	    xvap.xva_xoptattrs.xoa_nodump);
5121254627Sken	FLAG_CHECK(UF_READONLY, XAT_READONLY,
5122254627Sken	    xvap.xva_xoptattrs.xoa_readonly);
5123254627Sken	FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM,
5124254627Sken	    xvap.xva_xoptattrs.xoa_system);
5125254627Sken	FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN,
5126254627Sken	    xvap.xva_xoptattrs.xoa_hidden);
5127254627Sken	FLAG_CHECK(UF_REPARSE, XAT_REPARSE,
5128254627Sken	    xvap.xva_xoptattrs.xoa_reparse);
5129254627Sken	FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE,
5130254627Sken	    xvap.xva_xoptattrs.xoa_offline);
5131254627Sken	FLAG_CHECK(UF_SPARSE, XAT_SPARSE,
5132254627Sken	    xvap.xva_xoptattrs.xoa_sparse);
5133254627Sken
5134185029Spjd#undef	FLAG_CHECK
5135185029Spjd	*vap = xvap.xva_vattr;
5136185029Spjd	vap->va_flags = fflags;
5137185029Spjd	return (0);
5138168962Spjd}
5139168962Spjd
5140168962Spjdstatic int
5141168962Spjdzfs_freebsd_setattr(ap)
5142168962Spjd	struct vop_setattr_args /* {
5143168962Spjd		struct vnode *a_vp;
5144168962Spjd		struct vattr *a_vap;
5145168962Spjd		struct ucred *a_cred;
5146168962Spjd	} */ *ap;
5147168962Spjd{
5148185172Spjd	vnode_t *vp = ap->a_vp;
5149168962Spjd	vattr_t *vap = ap->a_vap;
5150185172Spjd	cred_t *cred = ap->a_cred;
5151185029Spjd	xvattr_t xvap;
5152185029Spjd	u_long fflags;
5153185029Spjd	uint64_t zflags;
5154168962Spjd
5155168962Spjd	vattr_init_mask(vap);
5156170044Spjd	vap->va_mask &= ~AT_NOSET;
5157168962Spjd
5158185029Spjd	xva_init(&xvap);
5159185029Spjd	xvap.xva_vattr = *vap;
5160185029Spjd
5161219089Spjd	zflags = VTOZ(vp)->z_pflags;
5162185172Spjd
5163185029Spjd	if (vap->va_flags != VNOVAL) {
5164197683Sdelphij		zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs;
5165185172Spjd		int error;
5166185172Spjd
5167197683Sdelphij		if (zfsvfs->z_use_fuids == B_FALSE)
5168197683Sdelphij			return (EOPNOTSUPP);
5169197683Sdelphij
5170185029Spjd		fflags = vap->va_flags;
5171254627Sken		/*
5172254627Sken		 * XXX KDM
5173254627Sken		 * We need to figure out whether it makes sense to allow
5174254627Sken		 * UF_REPARSE through, since we don't really have other
5175254627Sken		 * facilities to handle reparse points and zfs_setattr()
5176254627Sken		 * doesn't currently allow setting that attribute anyway.
5177254627Sken		 */
5178254627Sken		if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE|
5179254627Sken		     UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE|
5180254627Sken		     UF_OFFLINE|UF_SPARSE)) != 0)
5181185029Spjd			return (EOPNOTSUPP);
5182185172Spjd		/*
5183185172Spjd		 * Unprivileged processes are not permitted to unset system
5184185172Spjd		 * flags, or modify flags if any system flags are set.
5185185172Spjd		 * Privileged non-jail processes may not modify system flags
5186185172Spjd		 * if securelevel > 0 and any existing system flags are set.
5187185172Spjd		 * Privileged jail processes behave like privileged non-jail
5188185172Spjd		 * processes if the security.jail.chflags_allowed sysctl is
5189185172Spjd		 * is non-zero; otherwise, they behave like unprivileged
5190185172Spjd		 * processes.
5191185172Spjd		 */
5192197861Spjd		if (secpolicy_fs_owner(vp->v_mount, cred) == 0 ||
5193197861Spjd		    priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) {
5194185172Spjd			if (zflags &
5195185172Spjd			    (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
5196185172Spjd				error = securelevel_gt(cred, 0);
5197197861Spjd				if (error != 0)
5198185172Spjd					return (error);
5199185172Spjd			}
5200185172Spjd		} else {
5201197861Spjd			/*
5202197861Spjd			 * Callers may only modify the file flags on objects they
5203197861Spjd			 * have VADMIN rights for.
5204197861Spjd			 */
5205197861Spjd			if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0)
5206197861Spjd				return (error);
5207185172Spjd			if (zflags &
5208185172Spjd			    (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
5209185172Spjd				return (EPERM);
5210185172Spjd			}
5211185172Spjd			if (fflags &
5212185172Spjd			    (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) {
5213185172Spjd				return (EPERM);
5214185172Spjd			}
5215185172Spjd		}
5216185029Spjd
5217185029Spjd#define	FLAG_CHANGE(fflag, zflag, xflag, xfield)	do {		\
5218185029Spjd	if (((fflags & (fflag)) && !(zflags & (zflag))) ||		\
5219185029Spjd	    ((zflags & (zflag)) && !(fflags & (fflag)))) {		\
5220185029Spjd		XVA_SET_REQ(&xvap, (xflag));				\
5221185029Spjd		(xfield) = ((fflags & (fflag)) != 0);			\
5222185029Spjd	}								\
5223185029Spjd} while (0)
5224185029Spjd		/* Convert chflags into ZFS-type flags. */
5225185029Spjd		/* XXX: what about SF_SETTABLE?. */
5226185029Spjd		FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE,
5227185029Spjd		    xvap.xva_xoptattrs.xoa_immutable);
5228185029Spjd		FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY,
5229185029Spjd		    xvap.xva_xoptattrs.xoa_appendonly);
5230185029Spjd		FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK,
5231185029Spjd		    xvap.xva_xoptattrs.xoa_nounlink);
5232254627Sken		FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE,
5233254627Sken		    xvap.xva_xoptattrs.xoa_archive);
5234185029Spjd		FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP,
5235185172Spjd		    xvap.xva_xoptattrs.xoa_nodump);
5236254627Sken		FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY,
5237254627Sken		    xvap.xva_xoptattrs.xoa_readonly);
5238254627Sken		FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM,
5239254627Sken		    xvap.xva_xoptattrs.xoa_system);
5240254627Sken		FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN,
5241254627Sken		    xvap.xva_xoptattrs.xoa_hidden);
5242254627Sken		FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE,
5243254627Sken		    xvap.xva_xoptattrs.xoa_hidden);
5244254627Sken		FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE,
5245254627Sken		    xvap.xva_xoptattrs.xoa_offline);
5246254627Sken		FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE,
5247254627Sken		    xvap.xva_xoptattrs.xoa_sparse);
5248185029Spjd#undef	FLAG_CHANGE
5249185029Spjd	}
5250316391Sasomers	if (vap->va_birthtime.tv_sec != VNOVAL) {
5251316391Sasomers		xvap.xva_vattr.va_mask |= AT_XVATTR;
5252316391Sasomers		XVA_SET_REQ(&xvap, XAT_CREATETIME);
5253316391Sasomers	}
5254185172Spjd	return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL));
5255168962Spjd}
5256168962Spjd
5257168962Spjdstatic int
5258168962Spjdzfs_freebsd_rename(ap)
5259168962Spjd	struct vop_rename_args  /* {
5260168962Spjd		struct vnode *a_fdvp;
5261168962Spjd		struct vnode *a_fvp;
5262168962Spjd		struct componentname *a_fcnp;
5263168962Spjd		struct vnode *a_tdvp;
5264168962Spjd		struct vnode *a_tvp;
5265168962Spjd		struct componentname *a_tcnp;
5266168962Spjd	} */ *ap;
5267168962Spjd{
5268168962Spjd	vnode_t *fdvp = ap->a_fdvp;
5269168962Spjd	vnode_t *fvp = ap->a_fvp;
5270168962Spjd	vnode_t *tdvp = ap->a_tdvp;
5271168962Spjd	vnode_t *tvp = ap->a_tvp;
5272168962Spjd	int error;
5273168962Spjd
5274192237Skmacy	ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
5275192237Skmacy	ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
5276168962Spjd
5277303970Savg	error = zfs_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
5278303970Savg	    ap->a_tcnp, ap->a_fcnp->cn_cred);
5279168962Spjd
5280303970Savg	vrele(fdvp);
5281303970Savg	vrele(fvp);
5282303970Savg	vrele(tdvp);
5283303970Savg	if (tvp != NULL)
5284303970Savg		vrele(tvp);
5285303970Savg
5286168962Spjd	return (error);
5287168962Spjd}
5288168962Spjd
5289168962Spjdstatic int
5290168962Spjdzfs_freebsd_symlink(ap)
5291168962Spjd	struct vop_symlink_args /* {
5292168962Spjd		struct vnode *a_dvp;
5293168962Spjd		struct vnode **a_vpp;
5294168962Spjd		struct componentname *a_cnp;
5295168962Spjd		struct vattr *a_vap;
5296168962Spjd		char *a_target;
5297168962Spjd	} */ *ap;
5298168962Spjd{
5299168962Spjd	struct componentname *cnp = ap->a_cnp;
5300168962Spjd	vattr_t *vap = ap->a_vap;
5301168962Spjd
5302168962Spjd	ASSERT(cnp->cn_flags & SAVENAME);
5303168962Spjd
5304168962Spjd	vap->va_type = VLNK;	/* FreeBSD: Syscall only sets va_mode. */
5305168962Spjd	vattr_init_mask(vap);
5306168962Spjd
5307168962Spjd	return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap,
5308168962Spjd	    ap->a_target, cnp->cn_cred, cnp->cn_thread));
5309168962Spjd}
5310168962Spjd
5311168962Spjdstatic int
5312168962Spjdzfs_freebsd_readlink(ap)
5313168962Spjd	struct vop_readlink_args /* {
5314168962Spjd		struct vnode *a_vp;
5315168962Spjd		struct uio *a_uio;
5316168962Spjd		struct ucred *a_cred;
5317168962Spjd	} */ *ap;
5318168962Spjd{
5319168962Spjd
5320185029Spjd	return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL));
5321168962Spjd}
5322168962Spjd
5323168962Spjdstatic int
5324168962Spjdzfs_freebsd_link(ap)
5325168962Spjd	struct vop_link_args /* {
5326168962Spjd		struct vnode *a_tdvp;
5327168962Spjd		struct vnode *a_vp;
5328168962Spjd		struct componentname *a_cnp;
5329168962Spjd	} */ *ap;
5330168962Spjd{
5331168962Spjd	struct componentname *cnp = ap->a_cnp;
5332254982Sdelphij	vnode_t *vp = ap->a_vp;
5333254982Sdelphij	vnode_t *tdvp = ap->a_tdvp;
5334168962Spjd
5335254982Sdelphij	if (tdvp->v_mount != vp->v_mount)
5336254982Sdelphij		return (EXDEV);
5337254982Sdelphij
5338168962Spjd	ASSERT(cnp->cn_flags & SAVENAME);
5339168962Spjd
5340254982Sdelphij	return (zfs_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0));
5341168962Spjd}
5342168962Spjd
5343168962Spjdstatic int
5344168962Spjdzfs_freebsd_inactive(ap)
5345169170Spjd	struct vop_inactive_args /* {
5346169170Spjd		struct vnode *a_vp;
5347169170Spjd		struct thread *a_td;
5348169170Spjd	} */ *ap;
5349168962Spjd{
5350168962Spjd	vnode_t *vp = ap->a_vp;
5351168962Spjd
5352185029Spjd	zfs_inactive(vp, ap->a_td->td_ucred, NULL);
5353168962Spjd	return (0);
5354168962Spjd}
5355168962Spjd
5356168962Spjdstatic int
5357168962Spjdzfs_freebsd_reclaim(ap)
5358168962Spjd	struct vop_reclaim_args /* {
5359168962Spjd		struct vnode *a_vp;
5360168962Spjd		struct thread *a_td;
5361168962Spjd	} */ *ap;
5362168962Spjd{
5363169170Spjd	vnode_t	*vp = ap->a_vp;
5364168962Spjd	znode_t	*zp = VTOZ(vp);
5365197133Spjd	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
5366168962Spjd
5367169025Spjd	ASSERT(zp != NULL);
5368169025Spjd
5369243520Savg	/* Destroy the vm object and flush associated pages. */
5370243520Savg	vnode_destroy_vobject(vp);
5371243520Savg
5372168962Spjd	/*
5373243520Savg	 * z_teardown_inactive_lock protects from a race with
5374243520Savg	 * zfs_znode_dmu_fini in zfsvfs_teardown during
5375243520Savg	 * force unmount.
5376168962Spjd	 */
5377243520Savg	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
5378243520Savg	if (zp->z_sa_hdl == NULL)
5379196301Spjd		zfs_znode_free(zp);
5380243520Savg	else
5381243520Savg		zfs_zinactive(zp);
5382243520Savg	rw_exit(&zfsvfs->z_teardown_inactive_lock);
5383185029Spjd
5384168962Spjd	vp->v_data = NULL;
5385168962Spjd	return (0);
5386168962Spjd}
5387168962Spjd
5388168962Spjdstatic int
5389168962Spjdzfs_freebsd_fid(ap)
5390168962Spjd	struct vop_fid_args /* {
5391168962Spjd		struct vnode *a_vp;
5392168962Spjd		struct fid *a_fid;
5393168962Spjd	} */ *ap;
5394168962Spjd{
5395168962Spjd
5396185029Spjd	return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL));
5397168962Spjd}
5398168962Spjd
5399168962Spjdstatic int
5400168962Spjdzfs_freebsd_pathconf(ap)
5401168962Spjd	struct vop_pathconf_args /* {
5402168962Spjd		struct vnode *a_vp;
5403168962Spjd		int a_name;
5404168962Spjd		register_t *a_retval;
5405168962Spjd	} */ *ap;
5406168962Spjd{
5407168962Spjd	ulong_t val;
5408168962Spjd	int error;
5409168962Spjd
5410185029Spjd	error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL);
5411328298Sjhb	if (error == 0) {
5412168962Spjd		*ap->a_retval = val;
5413328298Sjhb		return (error);
5414328298Sjhb	}
5415328298Sjhb	if (error != EOPNOTSUPP)
5416328298Sjhb		return (error);
5417168962Spjd
5418196949Strasz	switch (ap->a_name) {
5419328298Sjhb	case _PC_NAME_MAX:
5420328298Sjhb		*ap->a_retval = NAME_MAX;
5421328298Sjhb		return (0);
5422328298Sjhb	case _PC_PIPE_BUF:
5423328298Sjhb		if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) {
5424328298Sjhb			*ap->a_retval = PIPE_BUF;
5425328298Sjhb			return (0);
5426328298Sjhb		}
5427328298Sjhb		return (EINVAL);
5428196949Strasz	default:
5429328298Sjhb		return (vop_stdpathconf(ap));
5430196949Strasz	}
5431196949Strasz}
5432196949Strasz
5433185029Spjd/*
5434185029Spjd * FreeBSD's extended attributes namespace defines file name prefix for ZFS'
5435185029Spjd * extended attribute name:
5436185029Spjd *
5437185029Spjd *	NAMESPACE	PREFIX
5438185029Spjd *	system		freebsd:system:
5439185029Spjd *	user		(none, can be used to access ZFS fsattr(5) attributes
5440185029Spjd *			created on Solaris)
5441185029Spjd */
5442185029Spjdstatic int
5443185029Spjdzfs_create_attrname(int attrnamespace, const char *name, char *attrname,
5444185029Spjd    size_t size)
5445185029Spjd{
5446185029Spjd	const char *namespace, *prefix, *suffix;
5447185029Spjd
5448185029Spjd	/* We don't allow '/' character in attribute name. */
5449185029Spjd	if (strchr(name, '/') != NULL)
5450185029Spjd		return (EINVAL);
5451185029Spjd	/* We don't allow attribute names that start with "freebsd:" string. */
5452185029Spjd	if (strncmp(name, "freebsd:", 8) == 0)
5453185029Spjd		return (EINVAL);
5454185029Spjd
5455185029Spjd	bzero(attrname, size);
5456185029Spjd
5457185029Spjd	switch (attrnamespace) {
5458185029Spjd	case EXTATTR_NAMESPACE_USER:
5459185029Spjd#if 0
5460185029Spjd		prefix = "freebsd:";
5461185029Spjd		namespace = EXTATTR_NAMESPACE_USER_STRING;
5462185029Spjd		suffix = ":";
5463185029Spjd#else
5464185029Spjd		/*
5465185029Spjd		 * This is the default namespace by which we can access all
5466185029Spjd		 * attributes created on Solaris.
5467185029Spjd		 */
5468185029Spjd		prefix = namespace = suffix = "";
5469185029Spjd#endif
5470185029Spjd		break;
5471185029Spjd	case EXTATTR_NAMESPACE_SYSTEM:
5472185029Spjd		prefix = "freebsd:";
5473185029Spjd		namespace = EXTATTR_NAMESPACE_SYSTEM_STRING;
5474185029Spjd		suffix = ":";
5475185029Spjd		break;
5476185029Spjd	case EXTATTR_NAMESPACE_EMPTY:
5477185029Spjd	default:
5478185029Spjd		return (EINVAL);
5479185029Spjd	}
5480185029Spjd	if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix,
5481185029Spjd	    name) >= size) {
5482185029Spjd		return (ENAMETOOLONG);
5483185029Spjd	}
5484185029Spjd	return (0);
5485185029Spjd}
5486185029Spjd
5487185029Spjd/*
5488185029Spjd * Vnode operating to retrieve a named extended attribute.
5489185029Spjd */
5490185029Spjdstatic int
5491185029Spjdzfs_getextattr(struct vop_getextattr_args *ap)
5492185029Spjd/*
5493185029Spjdvop_getextattr {
5494185029Spjd	IN struct vnode *a_vp;
5495185029Spjd	IN int a_attrnamespace;
5496185029Spjd	IN const char *a_name;
5497185029Spjd	INOUT struct uio *a_uio;
5498185029Spjd	OUT size_t *a_size;
5499185029Spjd	IN struct ucred *a_cred;
5500185029Spjd	IN struct thread *a_td;
5501185029Spjd};
5502185029Spjd*/
5503185029Spjd{
5504185029Spjd	zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
5505185029Spjd	struct thread *td = ap->a_td;
5506185029Spjd	struct nameidata nd;
5507185029Spjd	char attrname[255];
5508185029Spjd	struct vattr va;
5509185029Spjd	vnode_t *xvp = NULL, *vp;
5510185029Spjd	int error, flags;
5511185029Spjd
5512195785Strasz	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5513195785Strasz	    ap->a_cred, ap->a_td, VREAD);
5514195785Strasz	if (error != 0)
5515195785Strasz		return (error);
5516195785Strasz
5517185029Spjd	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5518185029Spjd	    sizeof(attrname));
5519185029Spjd	if (error != 0)
5520185029Spjd		return (error);
5521185029Spjd
5522185029Spjd	ZFS_ENTER(zfsvfs);
5523185029Spjd
5524185029Spjd	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
5525185029Spjd	    LOOKUP_XATTR);
5526185029Spjd	if (error != 0) {
5527185029Spjd		ZFS_EXIT(zfsvfs);
5528185029Spjd		return (error);
5529185029Spjd	}
5530185029Spjd
5531185029Spjd	flags = FREAD;
5532241896Skib	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
5533185029Spjd	    xvp, td);
5534194586Skib	error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL);
5535185029Spjd	vp = nd.ni_vp;
5536185029Spjd	NDFREE(&nd, NDF_ONLY_PNBUF);
5537185029Spjd	if (error != 0) {
5538196303Spjd		ZFS_EXIT(zfsvfs);
5539195785Strasz		if (error == ENOENT)
5540195785Strasz			error = ENOATTR;
5541185029Spjd		return (error);
5542185029Spjd	}
5543185029Spjd
5544185029Spjd	if (ap->a_size != NULL) {
5545185029Spjd		error = VOP_GETATTR(vp, &va, ap->a_cred);
5546185029Spjd		if (error == 0)
5547185029Spjd			*ap->a_size = (size_t)va.va_size;
5548185029Spjd	} else if (ap->a_uio != NULL)
5549224605Smm		error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5550185029Spjd
5551185029Spjd	VOP_UNLOCK(vp, 0);
5552185029Spjd	vn_close(vp, flags, ap->a_cred, td);
5553185029Spjd	ZFS_EXIT(zfsvfs);
5554185029Spjd
5555185029Spjd	return (error);
5556185029Spjd}
5557185029Spjd
5558185029Spjd/*
5559185029Spjd * Vnode operation to remove a named attribute.
5560185029Spjd */
5561185029Spjdint
5562185029Spjdzfs_deleteextattr(struct vop_deleteextattr_args *ap)
5563185029Spjd/*
5564185029Spjdvop_deleteextattr {
5565185029Spjd	IN struct vnode *a_vp;
5566185029Spjd	IN int a_attrnamespace;
5567185029Spjd	IN const char *a_name;
5568185029Spjd	IN struct ucred *a_cred;
5569185029Spjd	IN struct thread *a_td;
5570185029Spjd};
5571185029Spjd*/
5572185029Spjd{
5573185029Spjd	zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
5574185029Spjd	struct thread *td = ap->a_td;
5575185029Spjd	struct nameidata nd;
5576185029Spjd	char attrname[255];
5577185029Spjd	struct vattr va;
5578185029Spjd	vnode_t *xvp = NULL, *vp;
5579185029Spjd	int error, flags;
5580185029Spjd
5581195785Strasz	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5582195785Strasz	    ap->a_cred, ap->a_td, VWRITE);
5583195785Strasz	if (error != 0)
5584195785Strasz		return (error);
5585195785Strasz
5586185029Spjd	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5587185029Spjd	    sizeof(attrname));
5588185029Spjd	if (error != 0)
5589185029Spjd		return (error);
5590185029Spjd
5591185029Spjd	ZFS_ENTER(zfsvfs);
5592185029Spjd
5593185029Spjd	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
5594185029Spjd	    LOOKUP_XATTR);
5595185029Spjd	if (error != 0) {
5596185029Spjd		ZFS_EXIT(zfsvfs);
5597185029Spjd		return (error);
5598185029Spjd	}
5599185029Spjd
5600241896Skib	NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5601185029Spjd	    UIO_SYSSPACE, attrname, xvp, td);
5602185029Spjd	error = namei(&nd);
5603185029Spjd	vp = nd.ni_vp;
5604185029Spjd	if (error != 0) {
5605196303Spjd		ZFS_EXIT(zfsvfs);
5606260706Savg		NDFREE(&nd, NDF_ONLY_PNBUF);
5607195785Strasz		if (error == ENOENT)
5608195785Strasz			error = ENOATTR;
5609185029Spjd		return (error);
5610185029Spjd	}
5611260706Savg
5612185029Spjd	error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
5613260706Savg	NDFREE(&nd, NDF_ONLY_PNBUF);
5614185029Spjd
5615185029Spjd	vput(nd.ni_dvp);
5616185029Spjd	if (vp == nd.ni_dvp)
5617185029Spjd		vrele(vp);
5618185029Spjd	else
5619185029Spjd		vput(vp);
5620185029Spjd	ZFS_EXIT(zfsvfs);
5621185029Spjd
5622185029Spjd	return (error);
5623185029Spjd}
5624185029Spjd
5625185029Spjd/*
5626185029Spjd * Vnode operation to set a named attribute.
5627185029Spjd */
5628185029Spjdstatic int
5629185029Spjdzfs_setextattr(struct vop_setextattr_args *ap)
5630185029Spjd/*
5631185029Spjdvop_setextattr {
5632185029Spjd	IN struct vnode *a_vp;
5633185029Spjd	IN int a_attrnamespace;
5634185029Spjd	IN const char *a_name;
5635185029Spjd	INOUT struct uio *a_uio;
5636185029Spjd	IN struct ucred *a_cred;
5637185029Spjd	IN struct thread *a_td;
5638185029Spjd};
5639185029Spjd*/
5640185029Spjd{
5641185029Spjd	zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
5642185029Spjd	struct thread *td = ap->a_td;
5643185029Spjd	struct nameidata nd;
5644185029Spjd	char attrname[255];
5645185029Spjd	struct vattr va;
5646185029Spjd	vnode_t *xvp = NULL, *vp;
5647185029Spjd	int error, flags;
5648185029Spjd
5649195785Strasz	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5650195785Strasz	    ap->a_cred, ap->a_td, VWRITE);
5651195785Strasz	if (error != 0)
5652195785Strasz		return (error);
5653195785Strasz
5654185029Spjd	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5655185029Spjd	    sizeof(attrname));
5656185029Spjd	if (error != 0)
5657185029Spjd		return (error);
5658185029Spjd
5659185029Spjd	ZFS_ENTER(zfsvfs);
5660185029Spjd
5661185029Spjd	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
5662195785Strasz	    LOOKUP_XATTR | CREATE_XATTR_DIR);
5663185029Spjd	if (error != 0) {
5664185029Spjd		ZFS_EXIT(zfsvfs);
5665185029Spjd		return (error);
5666185029Spjd	}
5667185029Spjd
5668185029Spjd	flags = FFLAGS(O_WRONLY | O_CREAT);
5669241896Skib	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
5670185029Spjd	    xvp, td);
5671194586Skib	error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL);
5672185029Spjd	vp = nd.ni_vp;
5673185029Spjd	NDFREE(&nd, NDF_ONLY_PNBUF);
5674185029Spjd	if (error != 0) {
5675185029Spjd		ZFS_EXIT(zfsvfs);
5676185029Spjd		return (error);
5677185029Spjd	}
5678185029Spjd
5679185029Spjd	VATTR_NULL(&va);
5680185029Spjd	va.va_size = 0;
5681185029Spjd	error = VOP_SETATTR(vp, &va, ap->a_cred);
5682185029Spjd	if (error == 0)
5683268420Smav		VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5684185029Spjd
5685185029Spjd	VOP_UNLOCK(vp, 0);
5686185029Spjd	vn_close(vp, flags, ap->a_cred, td);
5687185029Spjd	ZFS_EXIT(zfsvfs);
5688185029Spjd
5689185029Spjd	return (error);
5690185029Spjd}
5691185029Spjd
5692185029Spjd/*
5693185029Spjd * Vnode operation to retrieve extended attributes on a vnode.
5694185029Spjd */
5695185029Spjdstatic int
5696185029Spjdzfs_listextattr(struct vop_listextattr_args *ap)
5697185029Spjd/*
5698185029Spjdvop_listextattr {
5699185029Spjd	IN struct vnode *a_vp;
5700185029Spjd	IN int a_attrnamespace;
5701185029Spjd	INOUT struct uio *a_uio;
5702185029Spjd	OUT size_t *a_size;
5703185029Spjd	IN struct ucred *a_cred;
5704185029Spjd	IN struct thread *a_td;
5705185029Spjd};
5706185029Spjd*/
5707185029Spjd{
5708185029Spjd	zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
5709185029Spjd	struct thread *td = ap->a_td;
5710185029Spjd	struct nameidata nd;
5711185029Spjd	char attrprefix[16];
5712185029Spjd	u_char dirbuf[sizeof(struct dirent)];
5713185029Spjd	struct dirent *dp;
5714185029Spjd	struct iovec aiov;
5715185029Spjd	struct uio auio, *uio = ap->a_uio;
5716185029Spjd	size_t *sizep = ap->a_size;
5717185029Spjd	size_t plen;
5718185029Spjd	vnode_t *xvp = NULL, *vp;
5719185029Spjd	int done, error, eof, pos;
5720185029Spjd
5721195785Strasz	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5722195785Strasz	    ap->a_cred, ap->a_td, VREAD);
5723196303Spjd	if (error != 0)
5724195785Strasz		return (error);
5725195785Strasz
5726185029Spjd	error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix,
5727185029Spjd	    sizeof(attrprefix));
5728185029Spjd	if (error != 0)
5729185029Spjd		return (error);
5730185029Spjd	plen = strlen(attrprefix);
5731185029Spjd
5732185029Spjd	ZFS_ENTER(zfsvfs);
5733185029Spjd
5734195822Strasz	if (sizep != NULL)
5735195822Strasz		*sizep = 0;
5736195822Strasz
5737185029Spjd	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
5738185029Spjd	    LOOKUP_XATTR);
5739185029Spjd	if (error != 0) {
5740196303Spjd		ZFS_EXIT(zfsvfs);
5741195785Strasz		/*
5742195785Strasz		 * ENOATTR means that the EA directory does not yet exist,
5743195785Strasz		 * i.e. there are no extended attributes there.
5744195785Strasz		 */
5745195785Strasz		if (error == ENOATTR)
5746195785Strasz			error = 0;
5747185029Spjd		return (error);
5748185029Spjd	}
5749185029Spjd
5750241896Skib	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
5751188588Sjhb	    UIO_SYSSPACE, ".", xvp, td);
5752185029Spjd	error = namei(&nd);
5753185029Spjd	vp = nd.ni_vp;
5754185029Spjd	NDFREE(&nd, NDF_ONLY_PNBUF);
5755185029Spjd	if (error != 0) {
5756185029Spjd		ZFS_EXIT(zfsvfs);
5757185029Spjd		return (error);
5758185029Spjd	}
5759185029Spjd
5760185029Spjd	auio.uio_iov = &aiov;
5761185029Spjd	auio.uio_iovcnt = 1;
5762185029Spjd	auio.uio_segflg = UIO_SYSSPACE;
5763185029Spjd	auio.uio_td = td;
5764185029Spjd	auio.uio_rw = UIO_READ;
5765185029Spjd	auio.uio_offset = 0;
5766185029Spjd
5767185029Spjd	do {
5768185029Spjd		u_char nlen;
5769185029Spjd
5770185029Spjd		aiov.iov_base = (void *)dirbuf;
5771185029Spjd		aiov.iov_len = sizeof(dirbuf);
5772185029Spjd		auio.uio_resid = sizeof(dirbuf);
5773185029Spjd		error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL);
5774185029Spjd		done = sizeof(dirbuf) - auio.uio_resid;
5775185029Spjd		if (error != 0)
5776185029Spjd			break;
5777185029Spjd		for (pos = 0; pos < done;) {
5778185029Spjd			dp = (struct dirent *)(dirbuf + pos);
5779185029Spjd			pos += dp->d_reclen;
5780185029Spjd			/*
5781185029Spjd			 * XXX: Temporarily we also accept DT_UNKNOWN, as this
5782185029Spjd			 * is what we get when attribute was created on Solaris.
5783185029Spjd			 */
5784185029Spjd			if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN)
5785185029Spjd				continue;
5786185029Spjd			if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0)
5787185029Spjd				continue;
5788185029Spjd			else if (strncmp(dp->d_name, attrprefix, plen) != 0)
5789185029Spjd				continue;
5790185029Spjd			nlen = dp->d_namlen - plen;
5791185029Spjd			if (sizep != NULL)
5792185029Spjd				*sizep += 1 + nlen;
5793185029Spjd			else if (uio != NULL) {
5794185029Spjd				/*
5795185029Spjd				 * Format of extattr name entry is one byte for
5796185029Spjd				 * length and the rest for name.
5797185029Spjd				 */
5798185029Spjd				error = uiomove(&nlen, 1, uio->uio_rw, uio);
5799185029Spjd				if (error == 0) {
5800185029Spjd					error = uiomove(dp->d_name + plen, nlen,
5801185029Spjd					    uio->uio_rw, uio);
5802185029Spjd				}
5803185029Spjd				if (error != 0)
5804185029Spjd					break;
5805185029Spjd			}
5806185029Spjd		}
5807185029Spjd	} while (!eof && error == 0);
5808185029Spjd
5809185029Spjd	vput(vp);
5810185029Spjd	ZFS_EXIT(zfsvfs);
5811185029Spjd
5812185029Spjd	return (error);
5813185029Spjd}
5814185029Spjd
5815192800Straszint
5816192800Straszzfs_freebsd_getacl(ap)
5817192800Strasz	struct vop_getacl_args /* {
5818192800Strasz		struct vnode *vp;
5819192800Strasz		acl_type_t type;
5820192800Strasz		struct acl *aclp;
5821192800Strasz		struct ucred *cred;
5822192800Strasz		struct thread *td;
5823192800Strasz	} */ *ap;
5824192800Strasz{
5825192800Strasz	int		error;
5826192800Strasz	vsecattr_t      vsecattr;
5827192800Strasz
5828192800Strasz	if (ap->a_type != ACL_TYPE_NFS4)
5829197435Strasz		return (EINVAL);
5830192800Strasz
5831192800Strasz	vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT;
5832192800Strasz	if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL))
5833192800Strasz		return (error);
5834192800Strasz
5835192800Strasz	error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt);
5836196303Spjd	if (vsecattr.vsa_aclentp != NULL)
5837196303Spjd		kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz);
5838192800Strasz
5839196303Spjd	return (error);
5840192800Strasz}
5841192800Strasz
5842192800Straszint
5843192800Straszzfs_freebsd_setacl(ap)
5844192800Strasz	struct vop_setacl_args /* {
5845192800Strasz		struct vnode *vp;
5846192800Strasz		acl_type_t type;
5847192800Strasz		struct acl *aclp;
5848192800Strasz		struct ucred *cred;
5849192800Strasz		struct thread *td;
5850192800Strasz	} */ *ap;
5851192800Strasz{
5852192800Strasz	int		error;
5853192800Strasz	vsecattr_t      vsecattr;
5854192800Strasz	int		aclbsize;	/* size of acl list in bytes */
5855192800Strasz	aclent_t	*aaclp;
5856192800Strasz
5857192800Strasz	if (ap->a_type != ACL_TYPE_NFS4)
5858197435Strasz		return (EINVAL);
5859192800Strasz
5860314710Smm	if (ap->a_aclp == NULL)
5861314710Smm		return (EINVAL);
5862314710Smm
5863192800Strasz	if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES)
5864192800Strasz		return (EINVAL);
5865192800Strasz
5866192800Strasz	/*
5867196949Strasz	 * With NFSv4 ACLs, chmod(2) may need to add additional entries,
5868192800Strasz	 * splitting every entry into two and appending "canonical six"
5869192800Strasz	 * entries at the end.  Don't allow for setting an ACL that would
5870192800Strasz	 * cause chmod(2) to run out of ACL entries.
5871192800Strasz	 */
5872192800Strasz	if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES)
5873192800Strasz		return (ENOSPC);
5874192800Strasz
5875208030Strasz	error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR);
5876208030Strasz	if (error != 0)
5877208030Strasz		return (error);
5878208030Strasz
5879192800Strasz	vsecattr.vsa_mask = VSA_ACE;
5880192800Strasz	aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t);
5881192800Strasz	vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP);
5882192800Strasz	aaclp = vsecattr.vsa_aclentp;
5883192800Strasz	vsecattr.vsa_aclentsz = aclbsize;
5884192800Strasz
5885192800Strasz	aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp);
5886192800Strasz	error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL);
5887192800Strasz	kmem_free(aaclp, aclbsize);
5888192800Strasz
5889192800Strasz	return (error);
5890192800Strasz}
5891192800Strasz
5892192800Straszint
5893192800Straszzfs_freebsd_aclcheck(ap)
5894192800Strasz	struct vop_aclcheck_args /* {
5895192800Strasz		struct vnode *vp;
5896192800Strasz		acl_type_t type;
5897192800Strasz		struct acl *aclp;
5898192800Strasz		struct ucred *cred;
5899192800Strasz		struct thread *td;
5900192800Strasz	} */ *ap;
5901192800Strasz{
5902192800Strasz
5903192800Strasz	return (EOPNOTSUPP);
5904192800Strasz}
5905192800Strasz
5906299906Savgstatic int
5907299906Savgzfs_vptocnp(struct vop_vptocnp_args *ap)
5908299906Savg{
5909299906Savg	vnode_t *covered_vp;
5910299906Savg	vnode_t *vp = ap->a_vp;;
5911299906Savg	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
5912299906Savg	znode_t *zp = VTOZ(vp);
5913299906Savg	int ltype;
5914299906Savg	int error;
5915299906Savg
5916301870Savg	ZFS_ENTER(zfsvfs);
5917301870Savg	ZFS_VERIFY_ZP(zp);
5918301870Savg
5919299906Savg	/*
5920299906Savg	 * If we are a snapshot mounted under .zfs, run the operation
5921299906Savg	 * on the covered vnode.
5922299906Savg	 */
5923324158Savg	if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) {
5924307995Savg		char name[MAXNAMLEN + 1];
5925307995Savg		znode_t *dzp;
5926307995Savg		size_t len;
5927307995Savg
5928307995Savg		error = zfs_znode_parent_and_name(zp, &dzp, name);
5929307995Savg		if (error == 0) {
5930307995Savg			len = strlen(name);
5931314030Savg			if (*ap->a_buflen < len)
5932314030Savg				error = SET_ERROR(ENOMEM);
5933314030Savg		}
5934314030Savg		if (error == 0) {
5935307995Savg			*ap->a_buflen -= len;
5936307995Savg			bcopy(name, ap->a_buf + *ap->a_buflen, len);
5937307995Savg			*ap->a_vpp = ZTOV(dzp);
5938307995Savg		}
5939301870Savg		ZFS_EXIT(zfsvfs);
5940307995Savg		return (error);
5941301870Savg	}
5942301870Savg	ZFS_EXIT(zfsvfs);
5943299906Savg
5944299906Savg	covered_vp = vp->v_mount->mnt_vnodecovered;
5945299906Savg	vhold(covered_vp);
5946299906Savg	ltype = VOP_ISLOCKED(vp);
5947299906Savg	VOP_UNLOCK(vp, 0);
5948315842Savg	error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread);
5949299906Savg	if (error == 0) {
5950299906Savg		error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred,
5951299906Savg		    ap->a_buf, ap->a_buflen);
5952299906Savg		vput(covered_vp);
5953299906Savg	}
5954299906Savg	vn_lock(vp, ltype | LK_RETRY);
5955299906Savg	if ((vp->v_iflag & VI_DOOMED) != 0)
5956299906Savg		error = SET_ERROR(ENOENT);
5957299906Savg	return (error);
5958299906Savg}
5959299906Savg
5960303970Savg#ifdef DIAGNOSTIC
5961303970Savgstatic int
5962303970Savgzfs_lock(ap)
5963303970Savg	struct vop_lock1_args /* {
5964303970Savg		struct vnode *a_vp;
5965303970Savg		int a_flags;
5966303970Savg		char *file;
5967303970Savg		int line;
5968303970Savg	} */ *ap;
5969303970Savg{
5970310066Savg	vnode_t *vp;
5971303970Savg	znode_t *zp;
5972303970Savg	int err;
5973303970Savg
5974303970Savg	err = vop_stdlock(ap);
5975310066Savg	if (err == 0 && (ap->a_flags & LK_NOWAIT) == 0) {
5976310066Savg		vp = ap->a_vp;
5977310066Savg		zp = vp->v_data;
5978310066Savg		if (vp->v_mount != NULL && (vp->v_iflag & VI_DOOMED) == 0 &&
5979310066Savg		    zp != NULL && (zp->z_pflags & ZFS_XATTR) == 0)
5980310066Savg			VERIFY(!RRM_LOCK_HELD(&zp->z_zfsvfs->z_teardown_lock));
5981303970Savg	}
5982303970Savg	return (err);
5983303970Savg}
5984303970Savg#endif
5985303970Savg
5986168404Spjdstruct vop_vector zfs_vnodeops;
5987168404Spjdstruct vop_vector zfs_fifoops;
5988209962Smmstruct vop_vector zfs_shareops;
5989168404Spjd
5990168404Spjdstruct vop_vector zfs_vnodeops = {
5991185029Spjd	.vop_default =		&default_vnodeops,
5992185029Spjd	.vop_inactive =		zfs_freebsd_inactive,
5993185029Spjd	.vop_reclaim =		zfs_freebsd_reclaim,
5994185029Spjd	.vop_access =		zfs_freebsd_access,
5995303970Savg	.vop_lookup =		zfs_cache_lookup,
5996185029Spjd	.vop_cachedlookup =	zfs_freebsd_lookup,
5997185029Spjd	.vop_getattr =		zfs_freebsd_getattr,
5998185029Spjd	.vop_setattr =		zfs_freebsd_setattr,
5999185029Spjd	.vop_create =		zfs_freebsd_create,
6000185029Spjd	.vop_mknod =		zfs_freebsd_create,
6001185029Spjd	.vop_mkdir =		zfs_freebsd_mkdir,
6002185029Spjd	.vop_readdir =		zfs_freebsd_readdir,
6003185029Spjd	.vop_fsync =		zfs_freebsd_fsync,
6004185029Spjd	.vop_open =		zfs_freebsd_open,
6005185029Spjd	.vop_close =		zfs_freebsd_close,
6006185029Spjd	.vop_rmdir =		zfs_freebsd_rmdir,
6007185029Spjd	.vop_ioctl =		zfs_freebsd_ioctl,
6008185029Spjd	.vop_link =		zfs_freebsd_link,
6009185029Spjd	.vop_symlink =		zfs_freebsd_symlink,
6010185029Spjd	.vop_readlink =		zfs_freebsd_readlink,
6011185029Spjd	.vop_read =		zfs_freebsd_read,
6012185029Spjd	.vop_write =		zfs_freebsd_write,
6013185029Spjd	.vop_remove =		zfs_freebsd_remove,
6014185029Spjd	.vop_rename =		zfs_freebsd_rename,
6015185029Spjd	.vop_pathconf =		zfs_freebsd_pathconf,
6016243518Savg	.vop_bmap =		zfs_freebsd_bmap,
6017185029Spjd	.vop_fid =		zfs_freebsd_fid,
6018185029Spjd	.vop_getextattr =	zfs_getextattr,
6019185029Spjd	.vop_deleteextattr =	zfs_deleteextattr,
6020185029Spjd	.vop_setextattr =	zfs_setextattr,
6021185029Spjd	.vop_listextattr =	zfs_listextattr,
6022192800Strasz	.vop_getacl =		zfs_freebsd_getacl,
6023192800Strasz	.vop_setacl =		zfs_freebsd_setacl,
6024192800Strasz	.vop_aclcheck =		zfs_freebsd_aclcheck,
6025213937Savg	.vop_getpages =		zfs_freebsd_getpages,
6026258746Savg	.vop_putpages =		zfs_freebsd_putpages,
6027299906Savg	.vop_vptocnp =		zfs_vptocnp,
6028303970Savg#ifdef DIAGNOSTIC
6029303970Savg	.vop_lock1 =		zfs_lock,
6030303970Savg#endif
6031168404Spjd};
6032168404Spjd
6033169170Spjdstruct vop_vector zfs_fifoops = {
6034185029Spjd	.vop_default =		&fifo_specops,
6035200162Skib	.vop_fsync =		zfs_freebsd_fsync,
6036185029Spjd	.vop_access =		zfs_freebsd_access,
6037185029Spjd	.vop_getattr =		zfs_freebsd_getattr,
6038185029Spjd	.vop_inactive =		zfs_freebsd_inactive,
6039185029Spjd	.vop_read =		VOP_PANIC,
6040185029Spjd	.vop_reclaim =		zfs_freebsd_reclaim,
6041185029Spjd	.vop_setattr =		zfs_freebsd_setattr,
6042185029Spjd	.vop_write =		VOP_PANIC,
6043328298Sjhb	.vop_pathconf = 	zfs_freebsd_pathconf,
6044185029Spjd	.vop_fid =		zfs_freebsd_fid,
6045192800Strasz	.vop_getacl =		zfs_freebsd_getacl,
6046192800Strasz	.vop_setacl =		zfs_freebsd_setacl,
6047192800Strasz	.vop_aclcheck =		zfs_freebsd_aclcheck,
6048168404Spjd};
6049209962Smm
6050209962Smm/*
6051209962Smm * special share hidden files vnode operations template
6052209962Smm */
6053209962Smmstruct vop_vector zfs_shareops = {
6054209962Smm	.vop_default =		&default_vnodeops,
6055209962Smm	.vop_access =		zfs_freebsd_access,
6056209962Smm	.vop_inactive =		zfs_freebsd_inactive,
6057209962Smm	.vop_reclaim =		zfs_freebsd_reclaim,
6058209962Smm	.vop_fid =		zfs_freebsd_fid,
6059209962Smm	.vop_pathconf =		zfs_freebsd_pathconf,
6060209962Smm};
6061