zfs_vnops.c revision 330062
1168404Spjd/*
2168404Spjd * CDDL HEADER START
3168404Spjd *
4168404Spjd * The contents of this file are subject to the terms of the
5168404Spjd * Common Development and Distribution License (the "License").
6168404Spjd * You may not use this file except in compliance with the License.
7168404Spjd *
8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9168404Spjd * or http://www.opensolaris.org/os/licensing.
10168404Spjd * See the License for the specific language governing permissions
11168404Spjd * and limitations under the License.
12168404Spjd *
13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each
14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15168404Spjd * If applicable, add the following below this CDDL HEADER, with the
16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18168404Spjd *
19168404Spjd * CDDL HEADER END
20168404Spjd */
21321545Smav
22168404Spjd/*
23212694Smm * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24289562Smav * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25296519Smav * Copyright (c) 2014 Integros [integros.com]
26321545Smav * Copyright 2017 Nexenta Systems, Inc.
27168404Spjd */
28168404Spjd
29169195Spjd/* Portions Copyright 2007 Jeremy Teo */
30219089Spjd/* Portions Copyright 2010 Robert Milkowski */
31169195Spjd
32168404Spjd#include <sys/types.h>
33168404Spjd#include <sys/param.h>
34168404Spjd#include <sys/time.h>
35168404Spjd#include <sys/systm.h>
36168404Spjd#include <sys/sysmacros.h>
37168404Spjd#include <sys/resource.h>
38168404Spjd#include <sys/vfs.h>
39248084Sattilio#include <sys/vm.h>
40168404Spjd#include <sys/vnode.h>
41168404Spjd#include <sys/file.h>
42168404Spjd#include <sys/stat.h>
43168404Spjd#include <sys/kmem.h>
44168404Spjd#include <sys/taskq.h>
45168404Spjd#include <sys/uio.h>
46168404Spjd#include <sys/atomic.h>
47168404Spjd#include <sys/namei.h>
48168404Spjd#include <sys/mman.h>
49168404Spjd#include <sys/cmn_err.h>
50168404Spjd#include <sys/errno.h>
51168404Spjd#include <sys/unistd.h>
52168404Spjd#include <sys/zfs_dir.h>
53168404Spjd#include <sys/zfs_ioctl.h>
54168404Spjd#include <sys/fs/zfs.h>
55168404Spjd#include <sys/dmu.h>
56219089Spjd#include <sys/dmu_objset.h>
57168404Spjd#include <sys/spa.h>
58168404Spjd#include <sys/txg.h>
59168404Spjd#include <sys/dbuf.h>
60168404Spjd#include <sys/zap.h>
61219089Spjd#include <sys/sa.h>
62168404Spjd#include <sys/dirent.h>
63168962Spjd#include <sys/policy.h>
64168962Spjd#include <sys/sunddi.h>
65168404Spjd#include <sys/filio.h>
66209962Smm#include <sys/sid.h>
67168404Spjd#include <sys/zfs_ctldir.h>
68185029Spjd#include <sys/zfs_fuid.h>
69219089Spjd#include <sys/zfs_sa.h>
70168404Spjd#include <sys/zfs_rlock.h>
71185029Spjd#include <sys/extdirent.h>
72185029Spjd#include <sys/kidmap.h>
73168404Spjd#include <sys/bio.h>
74168404Spjd#include <sys/buf.h>
75168404Spjd#include <sys/sched.h>
76192800Strasz#include <sys/acl.h>
77239077Smarius#include <vm/vm_param.h>
78325132Savg#include <sys/zil.h>
79168404Spjd
80168404Spjd/*
81168404Spjd * Programming rules.
82168404Spjd *
83168404Spjd * Each vnode op performs some logical unit of work.  To do this, the ZPL must
84168404Spjd * properly lock its in-core state, create a DMU transaction, do the work,
85168404Spjd * record this work in the intent log (ZIL), commit the DMU transaction,
86185029Spjd * and wait for the intent log to commit if it is a synchronous operation.
87185029Spjd * Moreover, the vnode ops must work in both normal and log replay context.
88168404Spjd * The ordering of events is important to avoid deadlocks and references
89168404Spjd * to freed memory.  The example below illustrates the following Big Rules:
90168404Spjd *
91251631Sdelphij *  (1)	A check must be made in each zfs thread for a mounted file system.
92168404Spjd *	This is done avoiding races using ZFS_ENTER(zfsvfs).
93251631Sdelphij *	A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
94251631Sdelphij *	must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
95251631Sdelphij *	can return EIO from the calling function.
96168404Spjd *
97168404Spjd *  (2)	VN_RELE() should always be the last thing except for zil_commit()
98168404Spjd *	(if necessary) and ZFS_EXIT(). This is for 3 reasons:
99168404Spjd *	First, if it's the last reference, the vnode/znode
100168404Spjd *	can be freed, so the zp may point to freed memory.  Second, the last
101168404Spjd *	reference will call zfs_zinactive(), which may induce a lot of work --
102168404Spjd *	pushing cached pages (which acquires range locks) and syncing out
103168404Spjd *	cached atime changes.  Third, zfs_zinactive() may require a new tx,
104168404Spjd *	which could deadlock the system if you were already holding one.
105191900Skmacy *	If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
106168404Spjd *
107168404Spjd *  (3)	All range locks must be grabbed before calling dmu_tx_assign(),
108168404Spjd *	as they can span dmu_tx_assign() calls.
109168404Spjd *
110258720Savg *  (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
111258720Savg *      dmu_tx_assign().  This is critical because we don't want to block
112258720Savg *      while holding locks.
113168404Spjd *
114258720Savg *	If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT.  This
115258720Savg *	reduces lock contention and CPU usage when we must wait (note that if
116258720Savg *	throughput is constrained by the storage, nearly every transaction
117258720Savg *	must wait).
118258720Savg *
119258720Savg *      Note, in particular, that if a lock is sometimes acquired before
120258720Savg *      the tx assigns, and sometimes after (e.g. z_lock), then failing
121258720Savg *      to use a non-blocking assign can deadlock the system.  The scenario:
122258720Savg *
123168404Spjd *	Thread A has grabbed a lock before calling dmu_tx_assign().
124168404Spjd *	Thread B is in an already-assigned tx, and blocks for this lock.
125168404Spjd *	Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
126168404Spjd *	forever, because the previous txg can't quiesce until B's tx commits.
127168404Spjd *
128168404Spjd *	If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
129258632Savg *	then drop all locks, call dmu_tx_wait(), and try again.  On subsequent
130258632Savg *	calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT,
131258632Savg *	to indicate that this operation has already called dmu_tx_wait().
132258632Savg *	This will ensure that we don't retry forever, waiting a short bit
133258632Savg *	each time.
134168404Spjd *
135168404Spjd *  (5)	If the operation succeeded, generate the intent log entry for it
136168404Spjd *	before dropping locks.  This ensures that the ordering of events
137168404Spjd *	in the intent log matches the order in which they actually occurred.
138251631Sdelphij *	During ZIL replay the zfs_log_* functions will update the sequence
139209962Smm *	number to indicate the zil transaction has replayed.
140168404Spjd *
141168404Spjd *  (6)	At the end of each vnode op, the DMU tx must always commit,
142168404Spjd *	regardless of whether there were any errors.
143168404Spjd *
144219089Spjd *  (7)	After dropping all locks, invoke zil_commit(zilog, foid)
145168404Spjd *	to ensure that synchronous semantics are provided when necessary.
146168404Spjd *
147168404Spjd * In general, this is how things should be ordered in each vnode op:
148168404Spjd *
149168404Spjd *	ZFS_ENTER(zfsvfs);		// exit if unmounted
150168404Spjd * top:
151303970Savg *	zfs_dirent_lookup(&dl, ...)	// lock directory entry (may VN_HOLD())
152168404Spjd *	rw_enter(...);			// grab any other locks you need
153168404Spjd *	tx = dmu_tx_create(...);	// get DMU tx
154168404Spjd *	dmu_tx_hold_*();		// hold each object you might modify
155258632Savg *	error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
156168404Spjd *	if (error) {
157168404Spjd *		rw_exit(...);		// drop locks
158168404Spjd *		zfs_dirent_unlock(dl);	// unlock directory entry
159168404Spjd *		VN_RELE(...);		// release held vnodes
160209962Smm *		if (error == ERESTART) {
161258632Savg *			waited = B_TRUE;
162168404Spjd *			dmu_tx_wait(tx);
163168404Spjd *			dmu_tx_abort(tx);
164168404Spjd *			goto top;
165168404Spjd *		}
166168404Spjd *		dmu_tx_abort(tx);	// abort DMU tx
167168404Spjd *		ZFS_EXIT(zfsvfs);	// finished in zfs
168168404Spjd *		return (error);		// really out of space
169168404Spjd *	}
170168404Spjd *	error = do_real_work();		// do whatever this VOP does
171168404Spjd *	if (error == 0)
172168404Spjd *		zfs_log_*(...);		// on success, make ZIL entry
173168404Spjd *	dmu_tx_commit(tx);		// commit DMU tx -- error or not
174168404Spjd *	rw_exit(...);			// drop locks
175168404Spjd *	zfs_dirent_unlock(dl);		// unlock directory entry
176168404Spjd *	VN_RELE(...);			// release held vnodes
177219089Spjd *	zil_commit(zilog, foid);	// synchronous when necessary
178168404Spjd *	ZFS_EXIT(zfsvfs);		// finished in zfs
179168404Spjd *	return (error);			// done, report error
180168404Spjd */
181185029Spjd
182168404Spjd/* ARGSUSED */
183168404Spjdstatic int
184185029Spjdzfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
185168404Spjd{
186168962Spjd	znode_t	*zp = VTOZ(*vpp);
187209962Smm	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
188168404Spjd
189209962Smm	ZFS_ENTER(zfsvfs);
190209962Smm	ZFS_VERIFY_ZP(zp);
191209962Smm
192219089Spjd	if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
193185029Spjd	    ((flag & FAPPEND) == 0)) {
194209962Smm		ZFS_EXIT(zfsvfs);
195249195Smm		return (SET_ERROR(EPERM));
196185029Spjd	}
197185029Spjd
198185029Spjd	if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
199185029Spjd	    ZTOV(zp)->v_type == VREG &&
200219089Spjd	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
201209962Smm		if (fs_vscan(*vpp, cr, 0) != 0) {
202209962Smm			ZFS_EXIT(zfsvfs);
203249195Smm			return (SET_ERROR(EACCES));
204209962Smm		}
205209962Smm	}
206185029Spjd
207168404Spjd	/* Keep a count of the synchronous opens in the znode */
208168962Spjd	if (flag & (FSYNC | FDSYNC))
209168404Spjd		atomic_inc_32(&zp->z_sync_cnt);
210185029Spjd
211209962Smm	ZFS_EXIT(zfsvfs);
212168404Spjd	return (0);
213168404Spjd}
214168404Spjd
215168404Spjd/* ARGSUSED */
216168404Spjdstatic int
217185029Spjdzfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
218185029Spjd    caller_context_t *ct)
219168404Spjd{
220168962Spjd	znode_t	*zp = VTOZ(vp);
221209962Smm	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
222168404Spjd
223210470Smm	/*
224210470Smm	 * Clean up any locks held by this process on the vp.
225210470Smm	 */
226210470Smm	cleanlocks(vp, ddi_get_pid(), 0);
227210470Smm	cleanshares(vp, ddi_get_pid());
228210470Smm
229209962Smm	ZFS_ENTER(zfsvfs);
230209962Smm	ZFS_VERIFY_ZP(zp);
231209962Smm
232168404Spjd	/* Decrement the synchronous opens in the znode */
233185029Spjd	if ((flag & (FSYNC | FDSYNC)) && (count == 1))
234168404Spjd		atomic_dec_32(&zp->z_sync_cnt);
235168404Spjd
236185029Spjd	if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
237185029Spjd	    ZTOV(zp)->v_type == VREG &&
238219089Spjd	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
239185029Spjd		VERIFY(fs_vscan(vp, cr, 1) == 0);
240185029Spjd
241209962Smm	ZFS_EXIT(zfsvfs);
242168404Spjd	return (0);
243168404Spjd}
244168404Spjd
245168404Spjd/*
246168404Spjd * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and
247168404Spjd * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter.
248168404Spjd */
249168404Spjdstatic int
250168978Spjdzfs_holey(vnode_t *vp, u_long cmd, offset_t *off)
251168404Spjd{
252168404Spjd	znode_t	*zp = VTOZ(vp);
253168404Spjd	uint64_t noff = (uint64_t)*off; /* new offset */
254168404Spjd	uint64_t file_sz;
255168404Spjd	int error;
256168404Spjd	boolean_t hole;
257168404Spjd
258219089Spjd	file_sz = zp->z_size;
259168404Spjd	if (noff >= file_sz)  {
260249195Smm		return (SET_ERROR(ENXIO));
261168404Spjd	}
262168404Spjd
263168962Spjd	if (cmd == _FIO_SEEK_HOLE)
264168404Spjd		hole = B_TRUE;
265168404Spjd	else
266168404Spjd		hole = B_FALSE;
267168404Spjd
268168404Spjd	error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff);
269168404Spjd
270271536Sdelphij	if (error == ESRCH)
271249195Smm		return (SET_ERROR(ENXIO));
272271536Sdelphij
273271536Sdelphij	/*
274271536Sdelphij	 * We could find a hole that begins after the logical end-of-file,
275271536Sdelphij	 * because dmu_offset_next() only works on whole blocks.  If the
276271536Sdelphij	 * EOF falls mid-block, then indicate that the "virtual hole"
277271536Sdelphij	 * at the end of the file begins at the logical EOF, rather than
278271536Sdelphij	 * at the end of the last block.
279271536Sdelphij	 */
280271536Sdelphij	if (noff > file_sz) {
281271536Sdelphij		ASSERT(hole);
282271536Sdelphij		noff = file_sz;
283168404Spjd	}
284168404Spjd
285168404Spjd	if (noff < *off)
286168404Spjd		return (error);
287168404Spjd	*off = noff;
288168404Spjd	return (error);
289168404Spjd}
290168404Spjd
291168404Spjd/* ARGSUSED */
292168404Spjdstatic int
293168978Spjdzfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred,
294185029Spjd    int *rvalp, caller_context_t *ct)
295168404Spjd{
296168962Spjd	offset_t off;
297287103Savg	offset_t ndata;
298287103Savg	dmu_object_info_t doi;
299168962Spjd	int error;
300168962Spjd	zfsvfs_t *zfsvfs;
301185029Spjd	znode_t *zp;
302168404Spjd
303168404Spjd	switch (com) {
304185029Spjd	case _FIOFFS:
305287103Savg	{
306168962Spjd		return (0);
307168404Spjd
308168962Spjd		/*
309168962Spjd		 * The following two ioctls are used by bfu.  Faking out,
310168962Spjd		 * necessary to avoid bfu errors.
311168962Spjd		 */
312287103Savg	}
313185029Spjd	case _FIOGDIO:
314185029Spjd	case _FIOSDIO:
315287103Savg	{
316168962Spjd		return (0);
317287103Savg	}
318168962Spjd
319185029Spjd	case _FIO_SEEK_DATA:
320185029Spjd	case _FIO_SEEK_HOLE:
321287103Savg	{
322277300Ssmh#ifdef illumos
323168962Spjd		if (ddi_copyin((void *)data, &off, sizeof (off), flag))
324249195Smm			return (SET_ERROR(EFAULT));
325233918Savg#else
326233918Savg		off = *(offset_t *)data;
327233918Savg#endif
328185029Spjd		zp = VTOZ(vp);
329185029Spjd		zfsvfs = zp->z_zfsvfs;
330168404Spjd		ZFS_ENTER(zfsvfs);
331185029Spjd		ZFS_VERIFY_ZP(zp);
332168404Spjd
333168404Spjd		/* offset parameter is in/out */
334168404Spjd		error = zfs_holey(vp, com, &off);
335168404Spjd		ZFS_EXIT(zfsvfs);
336168404Spjd		if (error)
337168404Spjd			return (error);
338277300Ssmh#ifdef illumos
339168962Spjd		if (ddi_copyout(&off, (void *)data, sizeof (off), flag))
340249195Smm			return (SET_ERROR(EFAULT));
341233918Savg#else
342233918Savg		*(offset_t *)data = off;
343233918Savg#endif
344168404Spjd		return (0);
345168404Spjd	}
346287103Savg#ifdef illumos
347287103Savg	case _FIO_COUNT_FILLED:
348287103Savg	{
349287103Savg		/*
350287103Savg		 * _FIO_COUNT_FILLED adds a new ioctl command which
351287103Savg		 * exposes the number of filled blocks in a
352287103Savg		 * ZFS object.
353287103Savg		 */
354287103Savg		zp = VTOZ(vp);
355287103Savg		zfsvfs = zp->z_zfsvfs;
356287103Savg		ZFS_ENTER(zfsvfs);
357287103Savg		ZFS_VERIFY_ZP(zp);
358287103Savg
359287103Savg		/*
360287103Savg		 * Wait for all dirty blocks for this object
361287103Savg		 * to get synced out to disk, and the DMU info
362287103Savg		 * updated.
363287103Savg		 */
364287103Savg		error = dmu_object_wait_synced(zfsvfs->z_os, zp->z_id);
365287103Savg		if (error) {
366287103Savg			ZFS_EXIT(zfsvfs);
367287103Savg			return (error);
368287103Savg		}
369287103Savg
370287103Savg		/*
371287103Savg		 * Retrieve fill count from DMU object.
372287103Savg		 */
373287103Savg		error = dmu_object_info(zfsvfs->z_os, zp->z_id, &doi);
374287103Savg		if (error) {
375287103Savg			ZFS_EXIT(zfsvfs);
376287103Savg			return (error);
377287103Savg		}
378287103Savg
379287103Savg		ndata = doi.doi_fill_count;
380287103Savg
381287103Savg		ZFS_EXIT(zfsvfs);
382287103Savg		if (ddi_copyout(&ndata, (void *)data, sizeof (ndata), flag))
383287103Savg			return (SET_ERROR(EFAULT));
384287103Savg		return (0);
385287103Savg	}
386287103Savg#endif
387287103Savg	}
388249195Smm	return (SET_ERROR(ENOTTY));
389168404Spjd}
390168404Spjd
391209962Smmstatic vm_page_t
392253953Sattiliopage_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
393209962Smm{
394209962Smm	vm_object_t obj;
395209962Smm	vm_page_t pp;
396258353Savg	int64_t end;
397209962Smm
398258353Savg	/*
399258353Savg	 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE
400258353Savg	 * aligned boundaries, if the range is not aligned.  As a result a
401258353Savg	 * DEV_BSIZE subrange with partially dirty data may get marked as clean.
402258353Savg	 * It may happen that all DEV_BSIZE subranges are marked clean and thus
403258353Savg	 * the whole page would be considred clean despite have some dirty data.
404258353Savg	 * For this reason we should shrink the range to DEV_BSIZE aligned
405258353Savg	 * boundaries before calling vm_page_clear_dirty.
406258353Savg	 */
407258353Savg	end = rounddown2(off + nbytes, DEV_BSIZE);
408258353Savg	off = roundup2(off, DEV_BSIZE);
409258353Savg	nbytes = end - off;
410258353Savg
411209962Smm	obj = vp->v_object;
412248084Sattilio	zfs_vmobject_assert_wlocked(obj);
413209962Smm
414209962Smm	for (;;) {
415209962Smm		if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
416246293Savg		    pp->valid) {
417254138Sattilio			if (vm_page_xbusied(pp)) {
418212652Savg				/*
419212652Savg				 * Reference the page before unlocking and
420212652Savg				 * sleeping so that the page daemon is less
421212652Savg				 * likely to reclaim it.
422212652Savg				 */
423225418Skib				vm_page_reference(pp);
424254138Sattilio				vm_page_lock(pp);
425254138Sattilio				zfs_vmobject_wunlock(obj);
426307671Skib				vm_page_busy_sleep(pp, "zfsmwb", true);
427254138Sattilio				zfs_vmobject_wlock(obj);
428209962Smm				continue;
429212652Savg			}
430254138Sattilio			vm_page_sbusy(pp);
431319091Savg		} else if (pp != NULL) {
432319091Savg			ASSERT(!pp->valid);
433252337Sgavin			pp = NULL;
434209962Smm		}
435246293Savg
436246293Savg		if (pp != NULL) {
437246293Savg			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
438253953Sattilio			vm_object_pip_add(obj, 1);
439246293Savg			pmap_remove_write(pp);
440258353Savg			if (nbytes != 0)
441258353Savg				vm_page_clear_dirty(pp, off, nbytes);
442246293Savg		}
443209962Smm		break;
444209962Smm	}
445209962Smm	return (pp);
446209962Smm}
447209962Smm
448209962Smmstatic void
449253953Sattiliopage_unbusy(vm_page_t pp)
450209962Smm{
451209962Smm
452254138Sattilio	vm_page_sunbusy(pp);
453253953Sattilio	vm_object_pip_subtract(pp->object, 1);
454209962Smm}
455209962Smm
456253953Sattiliostatic vm_page_t
457253953Sattiliopage_hold(vnode_t *vp, int64_t start)
458253953Sattilio{
459253953Sattilio	vm_object_t obj;
460253953Sattilio	vm_page_t pp;
461253953Sattilio
462253953Sattilio	obj = vp->v_object;
463253953Sattilio	zfs_vmobject_assert_wlocked(obj);
464253953Sattilio
465253953Sattilio	for (;;) {
466253953Sattilio		if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
467253953Sattilio		    pp->valid) {
468254138Sattilio			if (vm_page_xbusied(pp)) {
469253953Sattilio				/*
470253953Sattilio				 * Reference the page before unlocking and
471253953Sattilio				 * sleeping so that the page daemon is less
472253953Sattilio				 * likely to reclaim it.
473253953Sattilio				 */
474253953Sattilio				vm_page_reference(pp);
475254138Sattilio				vm_page_lock(pp);
476254138Sattilio				zfs_vmobject_wunlock(obj);
477307671Skib				vm_page_busy_sleep(pp, "zfsmwb", true);
478254138Sattilio				zfs_vmobject_wlock(obj);
479253953Sattilio				continue;
480253953Sattilio			}
481253953Sattilio
482253953Sattilio			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
483253953Sattilio			vm_page_lock(pp);
484253953Sattilio			vm_page_hold(pp);
485253953Sattilio			vm_page_unlock(pp);
486253953Sattilio
487253953Sattilio		} else
488253953Sattilio			pp = NULL;
489253953Sattilio		break;
490253953Sattilio	}
491253953Sattilio	return (pp);
492253953Sattilio}
493253953Sattilio
494253953Sattiliostatic void
495253953Sattiliopage_unhold(vm_page_t pp)
496253953Sattilio{
497253953Sattilio
498253953Sattilio	vm_page_lock(pp);
499253953Sattilio	vm_page_unhold(pp);
500253953Sattilio	vm_page_unlock(pp);
501253953Sattilio}
502253953Sattilio
503168404Spjd/*
504168404Spjd * When a file is memory mapped, we must keep the IO data synchronized
505168404Spjd * between the DMU cache and the memory mapped pages.  What this means:
506168404Spjd *
507168404Spjd * On Write:	If we find a memory mapped page, we write to *both*
508168404Spjd *		the page and the dmu buffer.
509168404Spjd */
510209962Smmstatic void
511209962Smmupdate_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid,
512209962Smm    int segflg, dmu_tx_t *tx)
513168404Spjd{
514168404Spjd	vm_object_t obj;
515168404Spjd	struct sf_buf *sf;
516246293Savg	caddr_t va;
517212655Savg	int off;
518168404Spjd
519258746Savg	ASSERT(segflg != UIO_NOCOPY);
520168404Spjd	ASSERT(vp->v_mount != NULL);
521168404Spjd	obj = vp->v_object;
522168404Spjd	ASSERT(obj != NULL);
523168404Spjd
524168404Spjd	off = start & PAGEOFFSET;
525248084Sattilio	zfs_vmobject_wlock(obj);
526168404Spjd	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
527209962Smm		vm_page_t pp;
528246293Savg		int nbytes = imin(PAGESIZE - off, len);
529168404Spjd
530258746Savg		if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
531248084Sattilio			zfs_vmobject_wunlock(obj);
532168404Spjd
533246293Savg			va = zfs_map_page(pp, &sf);
534246293Savg			(void) dmu_read(os, oid, start+off, nbytes,
535246293Savg			    va+off, DMU_READ_PREFETCH);;
536209962Smm			zfs_unmap_page(sf);
537246293Savg
538248084Sattilio			zfs_vmobject_wlock(obj);
539253953Sattilio			page_unbusy(pp);
540168404Spjd		}
541209962Smm		len -= nbytes;
542168404Spjd		off = 0;
543168404Spjd	}
544258746Savg	vm_object_pip_wakeupn(obj, 0);
545248084Sattilio	zfs_vmobject_wunlock(obj);
546168404Spjd}
547168404Spjd
548168404Spjd/*
549219089Spjd * Read with UIO_NOCOPY flag means that sendfile(2) requests
550219089Spjd * ZFS to populate a range of page cache pages with data.
551219089Spjd *
552219089Spjd * NOTE: this function could be optimized to pre-allocate
553254138Sattilio * all pages in advance, drain exclusive busy on all of them,
554219089Spjd * map them into contiguous KVA region and populate them
555219089Spjd * in one single dmu_read() call.
556219089Spjd */
557219089Spjdstatic int
558219089Spjdmappedread_sf(vnode_t *vp, int nbytes, uio_t *uio)
559219089Spjd{
560219089Spjd	znode_t *zp = VTOZ(vp);
561219089Spjd	objset_t *os = zp->z_zfsvfs->z_os;
562219089Spjd	struct sf_buf *sf;
563219089Spjd	vm_object_t obj;
564219089Spjd	vm_page_t pp;
565219089Spjd	int64_t start;
566219089Spjd	caddr_t va;
567219089Spjd	int len = nbytes;
568219089Spjd	int off;
569219089Spjd	int error = 0;
570219089Spjd
571219089Spjd	ASSERT(uio->uio_segflg == UIO_NOCOPY);
572219089Spjd	ASSERT(vp->v_mount != NULL);
573219089Spjd	obj = vp->v_object;
574219089Spjd	ASSERT(obj != NULL);
575219089Spjd	ASSERT((uio->uio_loffset & PAGEOFFSET) == 0);
576219089Spjd
577248084Sattilio	zfs_vmobject_wlock(obj);
578219089Spjd	for (start = uio->uio_loffset; len > 0; start += PAGESIZE) {
579219089Spjd		int bytes = MIN(PAGESIZE, len);
580219089Spjd
581254138Sattilio		pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY |
582254649Skib		    VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
583219089Spjd		if (pp->valid == 0) {
584248084Sattilio			zfs_vmobject_wunlock(obj);
585219089Spjd			va = zfs_map_page(pp, &sf);
586219089Spjd			error = dmu_read(os, zp->z_id, start, bytes, va,
587219089Spjd			    DMU_READ_PREFETCH);
588219089Spjd			if (bytes != PAGESIZE && error == 0)
589219089Spjd				bzero(va + bytes, PAGESIZE - bytes);
590219089Spjd			zfs_unmap_page(sf);
591248084Sattilio			zfs_vmobject_wlock(obj);
592254138Sattilio			vm_page_sunbusy(pp);
593219089Spjd			vm_page_lock(pp);
594219089Spjd			if (error) {
595253073Savg				if (pp->wire_count == 0 && pp->valid == 0 &&
596254138Sattilio				    !vm_page_busied(pp))
597253073Savg					vm_page_free(pp);
598219089Spjd			} else {
599219089Spjd				pp->valid = VM_PAGE_BITS_ALL;
600219089Spjd				vm_page_activate(pp);
601219089Spjd			}
602219089Spjd			vm_page_unlock(pp);
603258739Savg		} else {
604258739Savg			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
605254138Sattilio			vm_page_sunbusy(pp);
606258739Savg		}
607219089Spjd		if (error)
608219089Spjd			break;
609219089Spjd		uio->uio_resid -= bytes;
610219089Spjd		uio->uio_offset += bytes;
611219089Spjd		len -= bytes;
612219089Spjd	}
613248084Sattilio	zfs_vmobject_wunlock(obj);
614219089Spjd	return (error);
615219089Spjd}
616219089Spjd
617219089Spjd/*
618168404Spjd * When a file is memory mapped, we must keep the IO data synchronized
619168404Spjd * between the DMU cache and the memory mapped pages.  What this means:
620168404Spjd *
621168404Spjd * On Read:	We "read" preferentially from memory mapped pages,
622168404Spjd *		else we default from the dmu buffer.
623168404Spjd *
624168404Spjd * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
625251631Sdelphij *	 the file is memory mapped.
626168404Spjd */
627168404Spjdstatic int
628168404Spjdmappedread(vnode_t *vp, int nbytes, uio_t *uio)
629168404Spjd{
630168404Spjd	znode_t *zp = VTOZ(vp);
631168404Spjd	vm_object_t obj;
632212655Savg	int64_t start;
633168926Spjd	caddr_t va;
634168404Spjd	int len = nbytes;
635212655Savg	int off;
636168404Spjd	int error = 0;
637168404Spjd
638168404Spjd	ASSERT(vp->v_mount != NULL);
639168404Spjd	obj = vp->v_object;
640168404Spjd	ASSERT(obj != NULL);
641168404Spjd
642168404Spjd	start = uio->uio_loffset;
643168404Spjd	off = start & PAGEOFFSET;
644248084Sattilio	zfs_vmobject_wlock(obj);
645168404Spjd	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
646219089Spjd		vm_page_t pp;
647219089Spjd		uint64_t bytes = MIN(PAGESIZE - off, len);
648168404Spjd
649253953Sattilio		if (pp = page_hold(vp, start)) {
650219089Spjd			struct sf_buf *sf;
651219089Spjd			caddr_t va;
652212652Savg
653248084Sattilio			zfs_vmobject_wunlock(obj);
654219089Spjd			va = zfs_map_page(pp, &sf);
655298105Savg#ifdef illumos
656219089Spjd			error = uiomove(va + off, bytes, UIO_READ, uio);
657298105Savg#else
658298105Savg			error = vn_io_fault_uiomove(va + off, bytes, uio);
659298105Savg#endif
660219089Spjd			zfs_unmap_page(sf);
661248084Sattilio			zfs_vmobject_wlock(obj);
662253953Sattilio			page_unhold(pp);
663219089Spjd		} else {
664248084Sattilio			zfs_vmobject_wunlock(obj);
665272809Sdelphij			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
666272809Sdelphij			    uio, bytes);
667248084Sattilio			zfs_vmobject_wlock(obj);
668168404Spjd		}
669168404Spjd		len -= bytes;
670168404Spjd		off = 0;
671168404Spjd		if (error)
672168404Spjd			break;
673168404Spjd	}
674248084Sattilio	zfs_vmobject_wunlock(obj);
675168404Spjd	return (error);
676168404Spjd}
677168404Spjd
678168404Spjdoffset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */
679168404Spjd
680168404Spjd/*
681168404Spjd * Read bytes from specified file into supplied buffer.
682168404Spjd *
683168404Spjd *	IN:	vp	- vnode of file to be read from.
684168404Spjd *		uio	- structure supplying read location, range info,
685168404Spjd *			  and return buffer.
686168404Spjd *		ioflag	- SYNC flags; used to provide FRSYNC semantics.
687168404Spjd *		cr	- credentials of caller.
688185029Spjd *		ct	- caller context
689168404Spjd *
690168404Spjd *	OUT:	uio	- updated offset and range, buffer filled.
691168404Spjd *
692251631Sdelphij *	RETURN:	0 on success, error code on failure.
693168404Spjd *
694168404Spjd * Side Effects:
695168404Spjd *	vp - atime updated if byte count > 0
696168404Spjd */
697168404Spjd/* ARGSUSED */
698168404Spjdstatic int
699168962Spjdzfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
700168404Spjd{
701168404Spjd	znode_t		*zp = VTOZ(vp);
702168404Spjd	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
703168404Spjd	ssize_t		n, nbytes;
704247187Smm	int		error = 0;
705168404Spjd	rl_t		*rl;
706219089Spjd	xuio_t		*xuio = NULL;
707168404Spjd
708168404Spjd	ZFS_ENTER(zfsvfs);
709185029Spjd	ZFS_VERIFY_ZP(zp);
710168404Spjd
711219089Spjd	if (zp->z_pflags & ZFS_AV_QUARANTINED) {
712185029Spjd		ZFS_EXIT(zfsvfs);
713249195Smm		return (SET_ERROR(EACCES));
714185029Spjd	}
715185029Spjd
716168404Spjd	/*
717168404Spjd	 * Validate file offset
718168404Spjd	 */
719168404Spjd	if (uio->uio_loffset < (offset_t)0) {
720168404Spjd		ZFS_EXIT(zfsvfs);
721249195Smm		return (SET_ERROR(EINVAL));
722168404Spjd	}
723168404Spjd
724168404Spjd	/*
725168404Spjd	 * Fasttrack empty reads
726168404Spjd	 */
727168404Spjd	if (uio->uio_resid == 0) {
728168404Spjd		ZFS_EXIT(zfsvfs);
729168404Spjd		return (0);
730168404Spjd	}
731168404Spjd
732168404Spjd	/*
733168962Spjd	 * Check for mandatory locks
734168962Spjd	 */
735219089Spjd	if (MANDMODE(zp->z_mode)) {
736168962Spjd		if (error = chklock(vp, FREAD,
737168962Spjd		    uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) {
738168962Spjd			ZFS_EXIT(zfsvfs);
739168962Spjd			return (error);
740168962Spjd		}
741168962Spjd	}
742168962Spjd
743168962Spjd	/*
744168404Spjd	 * If we're in FRSYNC mode, sync out this znode before reading it.
745168404Spjd	 */
746224605Smm	if (zfsvfs->z_log &&
747224605Smm	    (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS))
748219089Spjd		zil_commit(zfsvfs->z_log, zp->z_id);
749168404Spjd
750168404Spjd	/*
751168404Spjd	 * Lock the range against changes.
752168404Spjd	 */
753168404Spjd	rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER);
754168404Spjd
755168404Spjd	/*
756168404Spjd	 * If we are reading past end-of-file we can skip
757168404Spjd	 * to the end; but we might still need to set atime.
758168404Spjd	 */
759219089Spjd	if (uio->uio_loffset >= zp->z_size) {
760168404Spjd		error = 0;
761168404Spjd		goto out;
762168404Spjd	}
763168404Spjd
764219089Spjd	ASSERT(uio->uio_loffset < zp->z_size);
765219089Spjd	n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset);
766168404Spjd
767277300Ssmh#ifdef illumos
768219089Spjd	if ((uio->uio_extflg == UIO_XUIO) &&
769219089Spjd	    (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) {
770219089Spjd		int nblk;
771219089Spjd		int blksz = zp->z_blksz;
772219089Spjd		uint64_t offset = uio->uio_loffset;
773219089Spjd
774219089Spjd		xuio = (xuio_t *)uio;
775219089Spjd		if ((ISP2(blksz))) {
776219089Spjd			nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset,
777219089Spjd			    blksz)) / blksz;
778219089Spjd		} else {
779219089Spjd			ASSERT(offset + n <= blksz);
780219089Spjd			nblk = 1;
781219089Spjd		}
782219089Spjd		(void) dmu_xuio_init(xuio, nblk);
783219089Spjd
784219089Spjd		if (vn_has_cached_data(vp)) {
785219089Spjd			/*
786219089Spjd			 * For simplicity, we always allocate a full buffer
787219089Spjd			 * even if we only expect to read a portion of a block.
788219089Spjd			 */
789219089Spjd			while (--nblk >= 0) {
790219089Spjd				(void) dmu_xuio_add(xuio,
791219089Spjd				    dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
792219089Spjd				    blksz), 0, blksz);
793219089Spjd			}
794219089Spjd		}
795219089Spjd	}
796277300Ssmh#endif	/* illumos */
797219089Spjd
798168404Spjd	while (n > 0) {
799168404Spjd		nbytes = MIN(n, zfs_read_chunk_size -
800168404Spjd		    P2PHASE(uio->uio_loffset, zfs_read_chunk_size));
801168404Spjd
802219089Spjd#ifdef __FreeBSD__
803219089Spjd		if (uio->uio_segflg == UIO_NOCOPY)
804219089Spjd			error = mappedread_sf(vp, nbytes, uio);
805219089Spjd		else
806219089Spjd#endif /* __FreeBSD__ */
807272809Sdelphij		if (vn_has_cached_data(vp)) {
808168404Spjd			error = mappedread(vp, nbytes, uio);
809272809Sdelphij		} else {
810272809Sdelphij			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
811272809Sdelphij			    uio, nbytes);
812272809Sdelphij		}
813185029Spjd		if (error) {
814185029Spjd			/* convert checksum errors into IO errors */
815185029Spjd			if (error == ECKSUM)
816249195Smm				error = SET_ERROR(EIO);
817168404Spjd			break;
818185029Spjd		}
819168962Spjd
820168404Spjd		n -= nbytes;
821168404Spjd	}
822168404Spjdout:
823168404Spjd	zfs_range_unlock(rl);
824168404Spjd
825168404Spjd	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
826168404Spjd	ZFS_EXIT(zfsvfs);
827168404Spjd	return (error);
828168404Spjd}
829168404Spjd
830168404Spjd/*
831168404Spjd * Write the bytes to a file.
832168404Spjd *
833168404Spjd *	IN:	vp	- vnode of file to be written to.
834168404Spjd *		uio	- structure supplying write location, range info,
835168404Spjd *			  and data buffer.
836251631Sdelphij *		ioflag	- FAPPEND, FSYNC, and/or FDSYNC.  FAPPEND is
837251631Sdelphij *			  set if in append mode.
838168404Spjd *		cr	- credentials of caller.
839185029Spjd *		ct	- caller context (NFS/CIFS fem monitor only)
840168404Spjd *
841168404Spjd *	OUT:	uio	- updated offset and range.
842168404Spjd *
843251631Sdelphij *	RETURN:	0 on success, error code on failure.
844168404Spjd *
845168404Spjd * Timestamps:
846168404Spjd *	vp - ctime|mtime updated if byte count > 0
847168404Spjd */
848219089Spjd
849168404Spjd/* ARGSUSED */
850168404Spjdstatic int
851168962Spjdzfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
852168404Spjd{
853168404Spjd	znode_t		*zp = VTOZ(vp);
854168962Spjd	rlim64_t	limit = MAXOFFSET_T;
855168404Spjd	ssize_t		start_resid = uio->uio_resid;
856168404Spjd	ssize_t		tx_bytes;
857168404Spjd	uint64_t	end_size;
858168404Spjd	dmu_tx_t	*tx;
859168404Spjd	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
860185029Spjd	zilog_t		*zilog;
861168404Spjd	offset_t	woff;
862168404Spjd	ssize_t		n, nbytes;
863168404Spjd	rl_t		*rl;
864168404Spjd	int		max_blksz = zfsvfs->z_max_blksz;
865247187Smm	int		error = 0;
866209962Smm	arc_buf_t	*abuf;
867247187Smm	iovec_t		*aiov = NULL;
868219089Spjd	xuio_t		*xuio = NULL;
869219089Spjd	int		i_iov = 0;
870219089Spjd	int		iovcnt = uio->uio_iovcnt;
871219089Spjd	iovec_t		*iovp = uio->uio_iov;
872219089Spjd	int		write_eof;
873219089Spjd	int		count = 0;
874219089Spjd	sa_bulk_attr_t	bulk[4];
875219089Spjd	uint64_t	mtime[2], ctime[2];
876168404Spjd
877168404Spjd	/*
878168404Spjd	 * Fasttrack empty write
879168404Spjd	 */
880168404Spjd	n = start_resid;
881168404Spjd	if (n == 0)
882168404Spjd		return (0);
883168404Spjd
884168962Spjd	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
885168962Spjd		limit = MAXOFFSET_T;
886168962Spjd
887168404Spjd	ZFS_ENTER(zfsvfs);
888185029Spjd	ZFS_VERIFY_ZP(zp);
889168404Spjd
890219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
891219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
892219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
893219089Spjd	    &zp->z_size, 8);
894219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
895219089Spjd	    &zp->z_pflags, 8);
896219089Spjd
897168404Spjd	/*
898262990Sdelphij	 * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our
899262990Sdelphij	 * callers might not be able to detect properly that we are read-only,
900262990Sdelphij	 * so check it explicitly here.
901262990Sdelphij	 */
902262990Sdelphij	if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
903262990Sdelphij		ZFS_EXIT(zfsvfs);
904262990Sdelphij		return (SET_ERROR(EROFS));
905262990Sdelphij	}
906262990Sdelphij
907262990Sdelphij	/*
908321579Smav	 * If immutable or not appending then return EPERM.
909321579Smav	 * Intentionally allow ZFS_READONLY through here.
910321579Smav	 * See zfs_zaccess_common()
911185029Spjd	 */
912321579Smav	if ((zp->z_pflags & ZFS_IMMUTABLE) ||
913219089Spjd	    ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) &&
914219089Spjd	    (uio->uio_loffset < zp->z_size))) {
915185029Spjd		ZFS_EXIT(zfsvfs);
916249195Smm		return (SET_ERROR(EPERM));
917185029Spjd	}
918185029Spjd
919185029Spjd	zilog = zfsvfs->z_log;
920185029Spjd
921185029Spjd	/*
922219089Spjd	 * Validate file offset
923219089Spjd	 */
924219089Spjd	woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
925219089Spjd	if (woff < 0) {
926219089Spjd		ZFS_EXIT(zfsvfs);
927249195Smm		return (SET_ERROR(EINVAL));
928219089Spjd	}
929219089Spjd
930219089Spjd	/*
931219089Spjd	 * Check for mandatory locks before calling zfs_range_lock()
932219089Spjd	 * in order to prevent a deadlock with locks set via fcntl().
933219089Spjd	 */
934219089Spjd	if (MANDMODE((mode_t)zp->z_mode) &&
935219089Spjd	    (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) {
936219089Spjd		ZFS_EXIT(zfsvfs);
937219089Spjd		return (error);
938219089Spjd	}
939219089Spjd
940277300Ssmh#ifdef illumos
941219089Spjd	/*
942168404Spjd	 * Pre-fault the pages to ensure slow (eg NFS) pages
943168404Spjd	 * don't hold up txg.
944219089Spjd	 * Skip this if uio contains loaned arc_buf.
945168404Spjd	 */
946219089Spjd	if ((uio->uio_extflg == UIO_XUIO) &&
947219089Spjd	    (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
948219089Spjd		xuio = (xuio_t *)uio;
949219089Spjd	else
950219089Spjd		uio_prefaultpages(MIN(n, max_blksz), uio);
951277300Ssmh#endif
952168404Spjd
953168404Spjd	/*
954168404Spjd	 * If in append mode, set the io offset pointer to eof.
955168404Spjd	 */
956213673Spjd	if (ioflag & FAPPEND) {
957168404Spjd		/*
958219089Spjd		 * Obtain an appending range lock to guarantee file append
959219089Spjd		 * semantics.  We reset the write offset once we have the lock.
960168404Spjd		 */
961168404Spjd		rl = zfs_range_lock(zp, 0, n, RL_APPEND);
962219089Spjd		woff = rl->r_off;
963168404Spjd		if (rl->r_len == UINT64_MAX) {
964219089Spjd			/*
965219089Spjd			 * We overlocked the file because this write will cause
966219089Spjd			 * the file block size to increase.
967219089Spjd			 * Note that zp_size cannot change with this lock held.
968219089Spjd			 */
969219089Spjd			woff = zp->z_size;
970168404Spjd		}
971219089Spjd		uio->uio_loffset = woff;
972168404Spjd	} else {
973168404Spjd		/*
974219089Spjd		 * Note that if the file block size will change as a result of
975219089Spjd		 * this write, then this range lock will lock the entire file
976219089Spjd		 * so that we can re-write the block safely.
977168404Spjd		 */
978168404Spjd		rl = zfs_range_lock(zp, woff, n, RL_WRITER);
979168404Spjd	}
980168404Spjd
981235781Strasz	if (vn_rlimit_fsize(vp, uio, uio->uio_td)) {
982235781Strasz		zfs_range_unlock(rl);
983235781Strasz		ZFS_EXIT(zfsvfs);
984235781Strasz		return (EFBIG);
985235781Strasz	}
986235781Strasz
987168962Spjd	if (woff >= limit) {
988168962Spjd		zfs_range_unlock(rl);
989168962Spjd		ZFS_EXIT(zfsvfs);
990249195Smm		return (SET_ERROR(EFBIG));
991168962Spjd	}
992168962Spjd
993168962Spjd	if ((woff + n) > limit || woff > (limit - n))
994168962Spjd		n = limit - woff;
995168962Spjd
996219089Spjd	/* Will this write extend the file length? */
997219089Spjd	write_eof = (woff + n > zp->z_size);
998168404Spjd
999219089Spjd	end_size = MAX(zp->z_size, woff + n);
1000219089Spjd
1001168404Spjd	/*
1002168404Spjd	 * Write the file in reasonable size chunks.  Each chunk is written
1003168404Spjd	 * in a separate transaction; this keeps the intent log records small
1004168404Spjd	 * and allows us to do more fine-grained space accounting.
1005168404Spjd	 */
1006168404Spjd	while (n > 0) {
1007209962Smm		abuf = NULL;
1008209962Smm		woff = uio->uio_loffset;
1009219089Spjd		if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
1010219089Spjd		    zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
1011209962Smm			if (abuf != NULL)
1012209962Smm				dmu_return_arcbuf(abuf);
1013249195Smm			error = SET_ERROR(EDQUOT);
1014209962Smm			break;
1015209962Smm		}
1016209962Smm
1017219089Spjd		if (xuio && abuf == NULL) {
1018219089Spjd			ASSERT(i_iov < iovcnt);
1019219089Spjd			aiov = &iovp[i_iov];
1020219089Spjd			abuf = dmu_xuio_arcbuf(xuio, i_iov);
1021219089Spjd			dmu_xuio_clear(xuio, i_iov);
1022219089Spjd			DTRACE_PROBE3(zfs_cp_write, int, i_iov,
1023219089Spjd			    iovec_t *, aiov, arc_buf_t *, abuf);
1024219089Spjd			ASSERT((aiov->iov_base == abuf->b_data) ||
1025219089Spjd			    ((char *)aiov->iov_base - (char *)abuf->b_data +
1026219089Spjd			    aiov->iov_len == arc_buf_size(abuf)));
1027219089Spjd			i_iov++;
1028219089Spjd		} else if (abuf == NULL && n >= max_blksz &&
1029219089Spjd		    woff >= zp->z_size &&
1030209962Smm		    P2PHASE(woff, max_blksz) == 0 &&
1031209962Smm		    zp->z_blksz == max_blksz) {
1032219089Spjd			/*
1033219089Spjd			 * This write covers a full block.  "Borrow" a buffer
1034219089Spjd			 * from the dmu so that we can fill it before we enter
1035219089Spjd			 * a transaction.  This avoids the possibility of
1036219089Spjd			 * holding up the transaction if the data copy hangs
1037219089Spjd			 * up on a pagefault (e.g., from an NFS server mapping).
1038219089Spjd			 */
1039209962Smm			size_t cbytes;
1040209962Smm
1041219089Spjd			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
1042219089Spjd			    max_blksz);
1043209962Smm			ASSERT(abuf != NULL);
1044209962Smm			ASSERT(arc_buf_size(abuf) == max_blksz);
1045209962Smm			if (error = uiocopy(abuf->b_data, max_blksz,
1046209962Smm			    UIO_WRITE, uio, &cbytes)) {
1047209962Smm				dmu_return_arcbuf(abuf);
1048209962Smm				break;
1049209962Smm			}
1050209962Smm			ASSERT(cbytes == max_blksz);
1051209962Smm		}
1052209962Smm
1053209962Smm		/*
1054168404Spjd		 * Start a transaction.
1055168404Spjd		 */
1056168404Spjd		tx = dmu_tx_create(zfsvfs->z_os);
1057219089Spjd		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1058168404Spjd		dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
1059219089Spjd		zfs_sa_upgrade_txholds(tx, zp);
1060258720Savg		error = dmu_tx_assign(tx, TXG_WAIT);
1061168404Spjd		if (error) {
1062168404Spjd			dmu_tx_abort(tx);
1063209962Smm			if (abuf != NULL)
1064209962Smm				dmu_return_arcbuf(abuf);
1065168404Spjd			break;
1066168404Spjd		}
1067168404Spjd
1068168404Spjd		/*
1069168404Spjd		 * If zfs_range_lock() over-locked we grow the blocksize
1070168404Spjd		 * and then reduce the lock range.  This will only happen
1071168404Spjd		 * on the first iteration since zfs_range_reduce() will
1072168404Spjd		 * shrink down r_len to the appropriate size.
1073168404Spjd		 */
1074168404Spjd		if (rl->r_len == UINT64_MAX) {
1075168404Spjd			uint64_t new_blksz;
1076168404Spjd
1077168404Spjd			if (zp->z_blksz > max_blksz) {
1078274337Sdelphij				/*
1079274337Sdelphij				 * File's blocksize is already larger than the
1080274337Sdelphij				 * "recordsize" property.  Only let it grow to
1081274337Sdelphij				 * the next power of 2.
1082274337Sdelphij				 */
1083168404Spjd				ASSERT(!ISP2(zp->z_blksz));
1084274337Sdelphij				new_blksz = MIN(end_size,
1085274337Sdelphij				    1 << highbit64(zp->z_blksz));
1086168404Spjd			} else {
1087168404Spjd				new_blksz = MIN(end_size, max_blksz);
1088168404Spjd			}
1089168404Spjd			zfs_grow_blocksize(zp, new_blksz, tx);
1090168404Spjd			zfs_range_reduce(rl, woff, n);
1091168404Spjd		}
1092168404Spjd
1093168404Spjd		/*
1094168404Spjd		 * XXX - should we really limit each write to z_max_blksz?
1095168404Spjd		 * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
1096168404Spjd		 */
1097168404Spjd		nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz));
1098168404Spjd
1099219089Spjd		if (woff + nbytes > zp->z_size)
1100168404Spjd			vnode_pager_setsize(vp, woff + nbytes);
1101168404Spjd
1102209962Smm		if (abuf == NULL) {
1103209962Smm			tx_bytes = uio->uio_resid;
1104219089Spjd			error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
1105219089Spjd			    uio, nbytes, tx);
1106209962Smm			tx_bytes -= uio->uio_resid;
1107168404Spjd		} else {
1108209962Smm			tx_bytes = nbytes;
1109219089Spjd			ASSERT(xuio == NULL || tx_bytes == aiov->iov_len);
1110219089Spjd			/*
1111219089Spjd			 * If this is not a full block write, but we are
1112219089Spjd			 * extending the file past EOF and this data starts
1113219089Spjd			 * block-aligned, use assign_arcbuf().  Otherwise,
1114219089Spjd			 * write via dmu_write().
1115219089Spjd			 */
1116219089Spjd			if (tx_bytes < max_blksz && (!write_eof ||
1117219089Spjd			    aiov->iov_base != abuf->b_data)) {
1118219089Spjd				ASSERT(xuio);
1119219089Spjd				dmu_write(zfsvfs->z_os, zp->z_id, woff,
1120219089Spjd				    aiov->iov_len, aiov->iov_base, tx);
1121219089Spjd				dmu_return_arcbuf(abuf);
1122219089Spjd				xuio_stat_wbuf_copied();
1123219089Spjd			} else {
1124219089Spjd				ASSERT(xuio || tx_bytes == max_blksz);
1125219089Spjd				dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
1126219089Spjd				    woff, abuf, tx);
1127219089Spjd			}
1128209962Smm			ASSERT(tx_bytes <= uio->uio_resid);
1129209962Smm			uioskip(uio, tx_bytes);
1130168404Spjd		}
1131212657Savg		if (tx_bytes && vn_has_cached_data(vp)) {
1132209962Smm			update_pages(vp, woff, tx_bytes, zfsvfs->z_os,
1133209962Smm			    zp->z_id, uio->uio_segflg, tx);
1134209962Smm		}
1135209962Smm
1136209962Smm		/*
1137168404Spjd		 * If we made no progress, we're done.  If we made even
1138168404Spjd		 * partial progress, update the znode and ZIL accordingly.
1139168404Spjd		 */
1140168404Spjd		if (tx_bytes == 0) {
1141219089Spjd			(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
1142219089Spjd			    (void *)&zp->z_size, sizeof (uint64_t), tx);
1143168404Spjd			dmu_tx_commit(tx);
1144168404Spjd			ASSERT(error != 0);
1145168404Spjd			break;
1146168404Spjd		}
1147168404Spjd
1148168404Spjd		/*
1149168404Spjd		 * Clear Set-UID/Set-GID bits on successful write if not
1150168404Spjd		 * privileged and at least one of the excute bits is set.
1151168404Spjd		 *
1152168404Spjd		 * It would be nice to to this after all writes have
1153168404Spjd		 * been done, but that would still expose the ISUID/ISGID
1154168404Spjd		 * to another app after the partial write is committed.
1155185029Spjd		 *
1156185029Spjd		 * Note: we don't call zfs_fuid_map_id() here because
1157185029Spjd		 * user 0 is not an ephemeral uid.
1158168404Spjd		 */
1159168404Spjd		mutex_enter(&zp->z_acl_lock);
1160219089Spjd		if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) |
1161168404Spjd		    (S_IXUSR >> 6))) != 0 &&
1162219089Spjd		    (zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
1163185029Spjd		    secpolicy_vnode_setid_retain(vp, cr,
1164219089Spjd		    (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) {
1165219089Spjd			uint64_t newmode;
1166219089Spjd			zp->z_mode &= ~(S_ISUID | S_ISGID);
1167219089Spjd			newmode = zp->z_mode;
1168219089Spjd			(void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
1169219089Spjd			    (void *)&newmode, sizeof (uint64_t), tx);
1170168404Spjd		}
1171168404Spjd		mutex_exit(&zp->z_acl_lock);
1172168404Spjd
1173219089Spjd		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
1174219089Spjd		    B_TRUE);
1175168404Spjd
1176168404Spjd		/*
1177168404Spjd		 * Update the file size (zp_size) if it has changed;
1178168404Spjd		 * account for possible concurrent updates.
1179168404Spjd		 */
1180219089Spjd		while ((end_size = zp->z_size) < uio->uio_loffset) {
1181219089Spjd			(void) atomic_cas_64(&zp->z_size, end_size,
1182168404Spjd			    uio->uio_loffset);
1183298105Savg#ifdef illumos
1184219089Spjd			ASSERT(error == 0);
1185298105Savg#else
1186298105Savg			ASSERT(error == 0 || error == EFAULT);
1187298105Savg#endif
1188219089Spjd		}
1189219089Spjd		/*
1190219089Spjd		 * If we are replaying and eof is non zero then force
1191219089Spjd		 * the file size to the specified eof. Note, there's no
1192219089Spjd		 * concurrency during replay.
1193219089Spjd		 */
1194219089Spjd		if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
1195219089Spjd			zp->z_size = zfsvfs->z_replay_eof;
1196219089Spjd
1197298105Savg		if (error == 0)
1198298105Savg			error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
1199298105Savg		else
1200298105Savg			(void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
1201219089Spjd
1202168404Spjd		zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
1203168404Spjd		dmu_tx_commit(tx);
1204168404Spjd
1205168404Spjd		if (error != 0)
1206168404Spjd			break;
1207168404Spjd		ASSERT(tx_bytes == nbytes);
1208168404Spjd		n -= nbytes;
1209219089Spjd
1210277300Ssmh#ifdef illumos
1211219089Spjd		if (!xuio && n > 0)
1212219089Spjd			uio_prefaultpages(MIN(n, max_blksz), uio);
1213277300Ssmh#endif
1214168404Spjd	}
1215168404Spjd
1216168404Spjd	zfs_range_unlock(rl);
1217168404Spjd
1218168404Spjd	/*
1219168404Spjd	 * If we're in replay mode, or we made no progress, return error.
1220168404Spjd	 * Otherwise, it's at least a partial write, so it's successful.
1221168404Spjd	 */
1222209962Smm	if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
1223168404Spjd		ZFS_EXIT(zfsvfs);
1224168404Spjd		return (error);
1225168404Spjd	}
1226168404Spjd
1227298105Savg#ifdef __FreeBSD__
1228298105Savg	/*
1229298105Savg	 * EFAULT means that at least one page of the source buffer was not
1230298105Savg	 * available.  VFS will re-try remaining I/O upon this error.
1231298105Savg	 */
1232298105Savg	if (error == EFAULT) {
1233298105Savg		ZFS_EXIT(zfsvfs);
1234298105Savg		return (error);
1235298105Savg	}
1236298105Savg#endif
1237298105Savg
1238219089Spjd	if (ioflag & (FSYNC | FDSYNC) ||
1239219089Spjd	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1240219089Spjd		zil_commit(zilog, zp->z_id);
1241168404Spjd
1242168404Spjd	ZFS_EXIT(zfsvfs);
1243168404Spjd	return (0);
1244168404Spjd}
1245168404Spjd
1246168404Spjdvoid
1247219089Spjdzfs_get_done(zgd_t *zgd, int error)
1248168404Spjd{
1249219089Spjd	znode_t *zp = zgd->zgd_private;
1250219089Spjd	objset_t *os = zp->z_zfsvfs->z_os;
1251168404Spjd
1252219089Spjd	if (zgd->zgd_db)
1253219089Spjd		dmu_buf_rele(zgd->zgd_db, zgd);
1254219089Spjd
1255219089Spjd	zfs_range_unlock(zgd->zgd_rl);
1256219089Spjd
1257191900Skmacy	/*
1258191900Skmacy	 * Release the vnode asynchronously as we currently have the
1259191900Skmacy	 * txg stopped from syncing.
1260191900Skmacy	 */
1261219089Spjd	VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
1262219089Spjd
1263219089Spjd	if (error == 0 && zgd->zgd_bp)
1264325132Savg		zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
1265219089Spjd
1266168404Spjd	kmem_free(zgd, sizeof (zgd_t));
1267168404Spjd}
1268168404Spjd
1269214378Smm#ifdef DEBUG
1270214378Smmstatic int zil_fault_io = 0;
1271214378Smm#endif
1272214378Smm
1273168404Spjd/*
1274168404Spjd * Get data to generate a TX_WRITE intent log record.
1275168404Spjd */
1276168404Spjdint
1277325132Savgzfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
1278168404Spjd{
1279168404Spjd	zfsvfs_t *zfsvfs = arg;
1280168404Spjd	objset_t *os = zfsvfs->z_os;
1281168404Spjd	znode_t *zp;
1282219089Spjd	uint64_t object = lr->lr_foid;
1283219089Spjd	uint64_t offset = lr->lr_offset;
1284219089Spjd	uint64_t size = lr->lr_length;
1285168404Spjd	dmu_buf_t *db;
1286168404Spjd	zgd_t *zgd;
1287168404Spjd	int error = 0;
1288168404Spjd
1289325132Savg	ASSERT3P(lwb, !=, NULL);
1290325132Savg	ASSERT3P(zio, !=, NULL);
1291325132Savg	ASSERT3U(size, !=, 0);
1292168404Spjd
1293168404Spjd	/*
1294168404Spjd	 * Nothing to do if the file has been removed
1295168404Spjd	 */
1296219089Spjd	if (zfs_zget(zfsvfs, object, &zp) != 0)
1297249195Smm		return (SET_ERROR(ENOENT));
1298168404Spjd	if (zp->z_unlinked) {
1299191900Skmacy		/*
1300191900Skmacy		 * Release the vnode asynchronously as we currently have the
1301191900Skmacy		 * txg stopped from syncing.
1302191900Skmacy		 */
1303196307Spjd		VN_RELE_ASYNC(ZTOV(zp),
1304196307Spjd		    dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
1305249195Smm		return (SET_ERROR(ENOENT));
1306168404Spjd	}
1307168404Spjd
1308219089Spjd	zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
1309325132Savg	zgd->zgd_lwb = lwb;
1310219089Spjd	zgd->zgd_private = zp;
1311219089Spjd
1312168404Spjd	/*
1313168404Spjd	 * Write records come in two flavors: immediate and indirect.
1314168404Spjd	 * For small writes it's cheaper to store the data with the
1315168404Spjd	 * log record (immediate); for large writes it's cheaper to
1316168404Spjd	 * sync the data and get a pointer to it (indirect) so that
1317168404Spjd	 * we don't have to write the data twice.
1318168404Spjd	 */
1319168404Spjd	if (buf != NULL) { /* immediate write */
1320219089Spjd		zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER);
1321168404Spjd		/* test for truncation needs to be done while range locked */
1322219089Spjd		if (offset >= zp->z_size) {
1323249195Smm			error = SET_ERROR(ENOENT);
1324219089Spjd		} else {
1325219089Spjd			error = dmu_read(os, object, offset, size, buf,
1326219089Spjd			    DMU_READ_NO_PREFETCH);
1327168404Spjd		}
1328219089Spjd		ASSERT(error == 0 || error == ENOENT);
1329168404Spjd	} else { /* indirect write */
1330168404Spjd		/*
1331168404Spjd		 * Have to lock the whole block to ensure when it's
1332324203Savg		 * written out and its checksum is being calculated
1333168404Spjd		 * that no one can change the data. We need to re-check
1334168404Spjd		 * blocksize after we get the lock in case it's changed!
1335168404Spjd		 */
1336168404Spjd		for (;;) {
1337219089Spjd			uint64_t blkoff;
1338219089Spjd			size = zp->z_blksz;
1339219089Spjd			blkoff = ISP2(size) ? P2PHASE(offset, size) : offset;
1340219089Spjd			offset -= blkoff;
1341219089Spjd			zgd->zgd_rl = zfs_range_lock(zp, offset, size,
1342219089Spjd			    RL_READER);
1343219089Spjd			if (zp->z_blksz == size)
1344168404Spjd				break;
1345219089Spjd			offset += blkoff;
1346219089Spjd			zfs_range_unlock(zgd->zgd_rl);
1347168404Spjd		}
1348168404Spjd		/* test for truncation needs to be done while range locked */
1349219089Spjd		if (lr->lr_offset >= zp->z_size)
1350249195Smm			error = SET_ERROR(ENOENT);
1351214378Smm#ifdef DEBUG
1352214378Smm		if (zil_fault_io) {
1353249195Smm			error = SET_ERROR(EIO);
1354214378Smm			zil_fault_io = 0;
1355214378Smm		}
1356214378Smm#endif
1357219089Spjd		if (error == 0)
1358219089Spjd			error = dmu_buf_hold(os, object, offset, zgd, &db,
1359219089Spjd			    DMU_READ_NO_PREFETCH);
1360214378Smm
1361209962Smm		if (error == 0) {
1362323748Savg			blkptr_t *bp = &lr->lr_blkptr;
1363243524Smm
1364219089Spjd			zgd->zgd_db = db;
1365219089Spjd			zgd->zgd_bp = bp;
1366219089Spjd
1367219089Spjd			ASSERT(db->db_offset == offset);
1368219089Spjd			ASSERT(db->db_size == size);
1369219089Spjd
1370219089Spjd			error = dmu_sync(zio, lr->lr_common.lrc_txg,
1371219089Spjd			    zfs_get_done, zgd);
1372321559Smav			ASSERT(error || lr->lr_length <= size);
1373219089Spjd
1374209962Smm			/*
1375219089Spjd			 * On success, we need to wait for the write I/O
1376219089Spjd			 * initiated by dmu_sync() to complete before we can
1377219089Spjd			 * release this dbuf.  We will finish everything up
1378219089Spjd			 * in the zfs_get_done() callback.
1379209962Smm			 */
1380219089Spjd			if (error == 0)
1381219089Spjd				return (0);
1382209962Smm
1383219089Spjd			if (error == EALREADY) {
1384219089Spjd				lr->lr_common.lrc_txtype = TX_WRITE2;
1385219089Spjd				error = 0;
1386219089Spjd			}
1387209962Smm		}
1388168404Spjd	}
1389219089Spjd
1390219089Spjd	zfs_get_done(zgd, error);
1391219089Spjd
1392168404Spjd	return (error);
1393168404Spjd}
1394168404Spjd
1395168404Spjd/*ARGSUSED*/
1396168404Spjdstatic int
1397185029Spjdzfs_access(vnode_t *vp, int mode, int flag, cred_t *cr,
1398185029Spjd    caller_context_t *ct)
1399168404Spjd{
1400168404Spjd	znode_t *zp = VTOZ(vp);
1401168404Spjd	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1402168404Spjd	int error;
1403168404Spjd
1404168404Spjd	ZFS_ENTER(zfsvfs);
1405185029Spjd	ZFS_VERIFY_ZP(zp);
1406185029Spjd
1407185029Spjd	if (flag & V_ACE_MASK)
1408185029Spjd		error = zfs_zaccess(zp, mode, flag, B_FALSE, cr);
1409185029Spjd	else
1410185029Spjd		error = zfs_zaccess_rwx(zp, mode, flag, cr);
1411185029Spjd
1412168404Spjd	ZFS_EXIT(zfsvfs);
1413168404Spjd	return (error);
1414168404Spjd}
1415168404Spjd
1416211932Smmstatic int
1417303970Savgzfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
1418211932Smm{
1419303970Savg	int error;
1420211932Smm
1421303970Savg	*vpp = arg;
1422303970Savg	error = vn_lock(*vpp, lkflags);
1423303970Savg	if (error != 0)
1424303970Savg		vrele(*vpp);
1425303970Savg	return (error);
1426303970Savg}
1427211932Smm
1428303970Savgstatic int
1429303970Savgzfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags)
1430303970Savg{
1431303970Savg	znode_t *zdp = VTOZ(dvp);
1432303970Savg	zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
1433303970Savg	int error;
1434303970Savg	int ltype;
1435303970Savg
1436303970Savg	ASSERT_VOP_LOCKED(dvp, __func__);
1437303970Savg#ifdef DIAGNOSTIC
1438307142Savg	if ((zdp->z_pflags & ZFS_XATTR) == 0)
1439307142Savg		VERIFY(!RRM_LOCK_HELD(&zfsvfs->z_teardown_lock));
1440303970Savg#endif
1441303970Savg
1442303970Savg	if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
1443303970Savg		ASSERT3P(dvp, ==, vp);
1444303970Savg		vref(dvp);
1445303970Savg		ltype = lkflags & LK_TYPE_MASK;
1446303970Savg		if (ltype != VOP_ISLOCKED(dvp)) {
1447303970Savg			if (ltype == LK_EXCLUSIVE)
1448303970Savg				vn_lock(dvp, LK_UPGRADE | LK_RETRY);
1449303970Savg			else /* if (ltype == LK_SHARED) */
1450303970Savg				vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
1451303970Savg
1452303970Savg			/*
1453303970Savg			 * Relock for the "." case could leave us with
1454303970Savg			 * reclaimed vnode.
1455303970Savg			 */
1456303970Savg			if (dvp->v_iflag & VI_DOOMED) {
1457303970Savg				vrele(dvp);
1458303970Savg				return (SET_ERROR(ENOENT));
1459303970Savg			}
1460303970Savg		}
1461303970Savg		return (0);
1462303970Savg	} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
1463303970Savg		/*
1464303970Savg		 * Note that in this case, dvp is the child vnode, and we
1465303970Savg		 * are looking up the parent vnode - exactly reverse from
1466303970Savg		 * normal operation.  Unlocking dvp requires some rather
1467303970Savg		 * tricky unlock/relock dance to prevent mp from being freed;
1468303970Savg		 * use vn_vget_ino_gen() which takes care of all that.
1469303970Savg		 *
1470303970Savg		 * XXX Note that there is a time window when both vnodes are
1471303970Savg		 * unlocked.  It is possible, although highly unlikely, that
1472303970Savg		 * during that window the parent-child relationship between
1473303970Savg		 * the vnodes may change, for example, get reversed.
1474303970Savg		 * In that case we would have a wrong lock order for the vnodes.
1475303970Savg		 * All other filesystems seem to ignore this problem, so we
1476303970Savg		 * do the same here.
1477303970Savg		 * A potential solution could be implemented as follows:
1478303970Savg		 * - using LK_NOWAIT when locking the second vnode and retrying
1479303970Savg		 *   if necessary
1480303970Savg		 * - checking that the parent-child relationship still holds
1481303970Savg		 *   after locking both vnodes and retrying if it doesn't
1482303970Savg		 */
1483303970Savg		error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp);
1484303970Savg		return (error);
1485303970Savg	} else {
1486303970Savg		error = vn_lock(vp, lkflags);
1487303970Savg		if (error != 0)
1488303970Savg			vrele(vp);
1489303970Savg		return (error);
1490211932Smm	}
1491211932Smm}
1492211932Smm
1493211932Smm/*
1494168404Spjd * Lookup an entry in a directory, or an extended attribute directory.
1495168404Spjd * If it exists, return a held vnode reference for it.
1496168404Spjd *
1497168404Spjd *	IN:	dvp	- vnode of directory to search.
1498168404Spjd *		nm	- name of entry to lookup.
1499168404Spjd *		pnp	- full pathname to lookup [UNUSED].
1500168404Spjd *		flags	- LOOKUP_XATTR set if looking for an attribute.
1501168404Spjd *		rdir	- root directory vnode [UNUSED].
1502168404Spjd *		cr	- credentials of caller.
1503185029Spjd *		ct	- caller context
1504168404Spjd *
1505168404Spjd *	OUT:	vpp	- vnode of located entry, NULL if not found.
1506168404Spjd *
1507251631Sdelphij *	RETURN:	0 on success, error code on failure.
1508168404Spjd *
1509168404Spjd * Timestamps:
1510168404Spjd *	NA
1511168404Spjd */
1512168404Spjd/* ARGSUSED */
1513168962Spjdstatic int
1514168962Spjdzfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp,
1515185029Spjd    int nameiop, cred_t *cr, kthread_t *td, int flags)
1516168404Spjd{
1517168962Spjd	znode_t *zdp = VTOZ(dvp);
1518303970Savg	znode_t *zp;
1519168962Spjd	zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
1520211932Smm	int	error = 0;
1521168404Spjd
1522321545Smav	/*
1523321545Smav	 * Fast path lookup, however we must skip DNLC lookup
1524321545Smav	 * for case folding or normalizing lookups because the
1525321545Smav	 * DNLC code only stores the passed in name.  This means
1526321545Smav	 * creating 'a' and removing 'A' on a case insensitive
1527321545Smav	 * file system would work, but DNLC still thinks 'a'
1528321545Smav	 * exists and won't let you create it again on the next
1529321545Smav	 * pass through fast path.
1530321545Smav	 */
1531303970Savg	if (!(flags & LOOKUP_XATTR)) {
1532211932Smm		if (dvp->v_type != VDIR) {
1533249195Smm			return (SET_ERROR(ENOTDIR));
1534219089Spjd		} else if (zdp->z_sa_hdl == NULL) {
1535249195Smm			return (SET_ERROR(EIO));
1536211932Smm		}
1537211932Smm	}
1538211932Smm
1539211932Smm	DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm);
1540211932Smm
1541168404Spjd	ZFS_ENTER(zfsvfs);
1542185029Spjd	ZFS_VERIFY_ZP(zdp);
1543168404Spjd
1544168404Spjd	*vpp = NULL;
1545168404Spjd
1546185029Spjd	if (flags & LOOKUP_XATTR) {
1547168404Spjd#ifdef TODO
1548168404Spjd		/*
1549168404Spjd		 * If the xattr property is off, refuse the lookup request.
1550168404Spjd		 */
1551168404Spjd		if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) {
1552168404Spjd			ZFS_EXIT(zfsvfs);
1553249195Smm			return (SET_ERROR(EINVAL));
1554168404Spjd		}
1555185029Spjd#endif
1556168404Spjd
1557168404Spjd		/*
1558168404Spjd		 * We don't allow recursive attributes..
1559168404Spjd		 * Maybe someday we will.
1560168404Spjd		 */
1561219089Spjd		if (zdp->z_pflags & ZFS_XATTR) {
1562168404Spjd			ZFS_EXIT(zfsvfs);
1563249195Smm			return (SET_ERROR(EINVAL));
1564168404Spjd		}
1565168404Spjd
1566168404Spjd		if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) {
1567168404Spjd			ZFS_EXIT(zfsvfs);
1568168404Spjd			return (error);
1569168404Spjd		}
1570168404Spjd
1571168404Spjd		/*
1572168404Spjd		 * Do we have permission to get into attribute directory?
1573168404Spjd		 */
1574185029Spjd		if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0,
1575185029Spjd		    B_FALSE, cr)) {
1576303970Savg			vrele(*vpp);
1577185029Spjd			*vpp = NULL;
1578168404Spjd		}
1579168404Spjd
1580168404Spjd		ZFS_EXIT(zfsvfs);
1581168404Spjd		return (error);
1582168404Spjd	}
1583168404Spjd
1584168404Spjd	/*
1585168404Spjd	 * Check accessibility of directory.
1586168404Spjd	 */
1587185029Spjd	if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) {
1588168404Spjd		ZFS_EXIT(zfsvfs);
1589168404Spjd		return (error);
1590168404Spjd	}
1591168404Spjd
1592185029Spjd	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
1593185029Spjd	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1594185029Spjd		ZFS_EXIT(zfsvfs);
1595249195Smm		return (SET_ERROR(EILSEQ));
1596185029Spjd	}
1597168404Spjd
1598168962Spjd
1599303970Savg	/*
1600303970Savg	 * First handle the special cases.
1601303970Savg	 */
1602303970Savg	if ((cnp->cn_flags & ISDOTDOT) != 0) {
1603303970Savg		/*
1604303970Savg		 * If we are a snapshot mounted under .zfs, return
1605303970Savg		 * the vp for the snapshot directory.
1606303970Savg		 */
1607303970Savg		if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) {
1608315842Savg			struct componentname cn;
1609315842Savg			vnode_t *zfsctl_vp;
1610315842Savg			int ltype;
1611315842Savg
1612303970Savg			ZFS_EXIT(zfsvfs);
1613315842Savg			ltype = VOP_ISLOCKED(dvp);
1614315842Savg			VOP_UNLOCK(dvp, 0);
1615315842Savg			error = zfsctl_root(zfsvfs->z_parent, LK_SHARED,
1616315842Savg			    &zfsctl_vp);
1617303970Savg			if (error == 0) {
1618315842Savg				cn.cn_nameptr = "snapshot";
1619315842Savg				cn.cn_namelen = strlen(cn.cn_nameptr);
1620315842Savg				cn.cn_nameiop = cnp->cn_nameiop;
1621319415Savg				cn.cn_flags = cnp->cn_flags & ~ISDOTDOT;
1622315842Savg				cn.cn_lkflags = cnp->cn_lkflags;
1623315842Savg				error = VOP_LOOKUP(zfsctl_vp, vpp, &cn);
1624315842Savg				vput(zfsctl_vp);
1625303970Savg			}
1626315842Savg			vn_lock(dvp, ltype | LK_RETRY);
1627315842Savg			return (error);
1628303970Savg		}
1629303970Savg	}
1630303970Savg	if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) {
1631315842Savg		ZFS_EXIT(zfsvfs);
1632303970Savg		if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP)
1633315842Savg			return (SET_ERROR(ENOTSUP));
1634315842Savg		error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp);
1635315842Savg		return (error);
1636303970Savg	}
1637303970Savg
1638303970Savg	/*
1639303970Savg	 * The loop is retry the lookup if the parent-child relationship
1640303970Savg	 * changes during the dot-dot locking complexities.
1641303970Savg	 */
1642303970Savg	for (;;) {
1643303970Savg		uint64_t parent;
1644303970Savg
1645303970Savg		error = zfs_dirlook(zdp, nm, &zp);
1646303970Savg		if (error == 0)
1647303970Savg			*vpp = ZTOV(zp);
1648303970Savg
1649303970Savg		ZFS_EXIT(zfsvfs);
1650303970Savg		if (error != 0)
1651303970Savg			break;
1652303970Savg
1653303970Savg		error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags);
1654303970Savg		if (error != 0) {
1655303970Savg			/*
1656303970Savg			 * If we've got a locking error, then the vnode
1657303970Savg			 * got reclaimed because of a force unmount.
1658303970Savg			 * We never enter doomed vnodes into the name cache.
1659303970Savg			 */
1660303970Savg			*vpp = NULL;
1661303970Savg			return (error);
1662303970Savg		}
1663303970Savg
1664303970Savg		if ((cnp->cn_flags & ISDOTDOT) == 0)
1665303970Savg			break;
1666303970Savg
1667303970Savg		ZFS_ENTER(zfsvfs);
1668303970Savg		if (zdp->z_sa_hdl == NULL) {
1669303970Savg			error = SET_ERROR(EIO);
1670303970Savg		} else {
1671303970Savg			error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
1672303970Savg			    &parent, sizeof (parent));
1673303970Savg		}
1674303970Savg		if (error != 0) {
1675303970Savg			ZFS_EXIT(zfsvfs);
1676303970Savg			vput(ZTOV(zp));
1677303970Savg			break;
1678303970Savg		}
1679303970Savg		if (zp->z_id == parent) {
1680303970Savg			ZFS_EXIT(zfsvfs);
1681303970Savg			break;
1682303970Savg		}
1683303970Savg		vput(ZTOV(zp));
1684303970Savg	}
1685303970Savg
1686303970Savgout:
1687303970Savg	if (error != 0)
1688303970Savg		*vpp = NULL;
1689303970Savg
1690168404Spjd	/* Translate errors and add SAVENAME when needed. */
1691168404Spjd	if (cnp->cn_flags & ISLASTCN) {
1692168404Spjd		switch (nameiop) {
1693168404Spjd		case CREATE:
1694168404Spjd		case RENAME:
1695168404Spjd			if (error == ENOENT) {
1696168404Spjd				error = EJUSTRETURN;
1697168404Spjd				cnp->cn_flags |= SAVENAME;
1698168404Spjd				break;
1699168404Spjd			}
1700168404Spjd			/* FALLTHROUGH */
1701168404Spjd		case DELETE:
1702168404Spjd			if (error == 0)
1703168404Spjd				cnp->cn_flags |= SAVENAME;
1704168404Spjd			break;
1705168404Spjd		}
1706168404Spjd	}
1707169198Spjd
1708303970Savg	/* Insert name into cache (as non-existent) if appropriate. */
1709303970Savg	if (zfsvfs->z_use_namecache &&
1710303970Savg	    error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0)
1711303970Savg		cache_enter(dvp, NULL, cnp);
1712168404Spjd
1713303970Savg	/* Insert name into cache if appropriate. */
1714303970Savg	if (zfsvfs->z_use_namecache &&
1715303970Savg	    error == 0 && (cnp->cn_flags & MAKEENTRY)) {
1716168404Spjd		if (!(cnp->cn_flags & ISLASTCN) ||
1717168404Spjd		    (nameiop != DELETE && nameiop != RENAME)) {
1718168404Spjd			cache_enter(dvp, *vpp, cnp);
1719168404Spjd		}
1720168404Spjd	}
1721168404Spjd
1722168404Spjd	return (error);
1723168404Spjd}
1724168404Spjd
1725168404Spjd/*
1726168404Spjd * Attempt to create a new entry in a directory.  If the entry
1727168404Spjd * already exists, truncate the file if permissible, else return
1728168404Spjd * an error.  Return the vp of the created or trunc'd file.
1729168404Spjd *
1730168404Spjd *	IN:	dvp	- vnode of directory to put new file entry in.
1731168404Spjd *		name	- name of new file entry.
1732168404Spjd *		vap	- attributes of new file.
1733168404Spjd *		excl	- flag indicating exclusive or non-exclusive mode.
1734168404Spjd *		mode	- mode to open file with.
1735168404Spjd *		cr	- credentials of caller.
1736168404Spjd *		flag	- large file flag [UNUSED].
1737185029Spjd *		ct	- caller context
1738268464Sdelphij *		vsecp	- ACL to be set
1739168404Spjd *
1740168404Spjd *	OUT:	vpp	- vnode of created or trunc'd entry.
1741168404Spjd *
1742251631Sdelphij *	RETURN:	0 on success, error code on failure.
1743168404Spjd *
1744168404Spjd * Timestamps:
1745168404Spjd *	dvp - ctime|mtime updated if new entry created
1746168404Spjd *	 vp - ctime|mtime always, atime if new
1747168404Spjd */
1748185029Spjd
1749168404Spjd/* ARGSUSED */
1750168404Spjdstatic int
1751168962Spjdzfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode,
1752185029Spjd    vnode_t **vpp, cred_t *cr, kthread_t *td)
1753168404Spjd{
1754168404Spjd	znode_t		*zp, *dzp = VTOZ(dvp);
1755168404Spjd	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
1756185029Spjd	zilog_t		*zilog;
1757185029Spjd	objset_t	*os;
1758168404Spjd	dmu_tx_t	*tx;
1759168404Spjd	int		error;
1760209962Smm	ksid_t		*ksid;
1761209962Smm	uid_t		uid;
1762209962Smm	gid_t		gid = crgetgid(cr);
1763219089Spjd	zfs_acl_ids_t   acl_ids;
1764209962Smm	boolean_t	fuid_dirtied;
1765185029Spjd	void		*vsecp = NULL;
1766185029Spjd	int		flag = 0;
1767303970Savg	uint64_t	txtype;
1768168404Spjd
1769185029Spjd	/*
1770185029Spjd	 * If we have an ephemeral id, ACL, or XVATTR then
1771185029Spjd	 * make sure file system is at proper version
1772185029Spjd	 */
1773185029Spjd
1774209962Smm	ksid = crgetsid(cr, KSID_OWNER);
1775209962Smm	if (ksid)
1776209962Smm		uid = ksid_getid(ksid);
1777209962Smm	else
1778209962Smm		uid = crgetuid(cr);
1779219089Spjd
1780185029Spjd	if (zfsvfs->z_use_fuids == B_FALSE &&
1781185029Spjd	    (vsecp || (vap->va_mask & AT_XVATTR) ||
1782219089Spjd	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1783249195Smm		return (SET_ERROR(EINVAL));
1784185029Spjd
1785168404Spjd	ZFS_ENTER(zfsvfs);
1786185029Spjd	ZFS_VERIFY_ZP(dzp);
1787185029Spjd	os = zfsvfs->z_os;
1788185029Spjd	zilog = zfsvfs->z_log;
1789168404Spjd
1790185029Spjd	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
1791185029Spjd	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1792185029Spjd		ZFS_EXIT(zfsvfs);
1793249195Smm		return (SET_ERROR(EILSEQ));
1794185029Spjd	}
1795185029Spjd
1796185029Spjd	if (vap->va_mask & AT_XVATTR) {
1797197861Spjd		if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap,
1798185029Spjd		    crgetuid(cr), cr, vap->va_type)) != 0) {
1799185029Spjd			ZFS_EXIT(zfsvfs);
1800185029Spjd			return (error);
1801185029Spjd		}
1802185029Spjd	}
1803260704Savg
1804168404Spjd	*vpp = NULL;
1805168404Spjd
1806182905Strasz	if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr))
1807182905Strasz		vap->va_mode &= ~S_ISVTX;
1808168404Spjd
1809303970Savg	error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
1810303970Savg	if (error) {
1811303970Savg		ZFS_EXIT(zfsvfs);
1812303970Savg		return (error);
1813303970Savg	}
1814303970Savg	ASSERT3P(zp, ==, NULL);
1815185029Spjd
1816303970Savg	/*
1817303970Savg	 * Create a new file object and update the directory
1818303970Savg	 * to reference it.
1819303970Savg	 */
1820303970Savg	if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
1821303970Savg		goto out;
1822168404Spjd	}
1823219089Spjd
1824303970Savg	/*
1825303970Savg	 * We only support the creation of regular files in
1826303970Savg	 * extended attribute directories.
1827303970Savg	 */
1828168404Spjd
1829303970Savg	if ((dzp->z_pflags & ZFS_XATTR) &&
1830303970Savg	    (vap->va_type != VREG)) {
1831303970Savg		error = SET_ERROR(EINVAL);
1832303970Savg		goto out;
1833303970Savg	}
1834168404Spjd
1835303970Savg	if ((error = zfs_acl_ids_create(dzp, 0, vap,
1836303970Savg	    cr, vsecp, &acl_ids)) != 0)
1837303970Savg		goto out;
1838219089Spjd
1839303970Savg	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
1840303970Savg		zfs_acl_ids_free(&acl_ids);
1841303970Savg		error = SET_ERROR(EDQUOT);
1842303970Savg		goto out;
1843303970Savg	}
1844168404Spjd
1845303970Savg	getnewvnode_reserve(1);
1846209962Smm
1847303970Savg	tx = dmu_tx_create(os);
1848209962Smm
1849303970Savg	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1850303970Savg	    ZFS_SA_BASE_ATTR_SIZE);
1851219089Spjd
1852303970Savg	fuid_dirtied = zfsvfs->z_fuid_dirty;
1853303970Savg	if (fuid_dirtied)
1854303970Savg		zfs_fuid_txhold(zfsvfs, tx);
1855303970Savg	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
1856303970Savg	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
1857303970Savg	if (!zfsvfs->z_use_sa &&
1858303970Savg	    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1859303970Savg		dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
1860303970Savg		    0, acl_ids.z_aclp->z_acl_bytes);
1861303970Savg	}
1862303970Savg	error = dmu_tx_assign(tx, TXG_WAIT);
1863303970Savg	if (error) {
1864209962Smm		zfs_acl_ids_free(&acl_ids);
1865303970Savg		dmu_tx_abort(tx);
1866303970Savg		getnewvnode_drop_reserve();
1867303970Savg		ZFS_EXIT(zfsvfs);
1868303970Savg		return (error);
1869303970Savg	}
1870303970Savg	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1871185029Spjd
1872303970Savg	if (fuid_dirtied)
1873303970Savg		zfs_fuid_sync(zfsvfs, tx);
1874219089Spjd
1875303970Savg	(void) zfs_link_create(dzp, name, zp, tx, ZNEW);
1876303970Savg	txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
1877303970Savg	zfs_log_create(zilog, tx, txtype, dzp, zp, name,
1878303970Savg	    vsecp, acl_ids.z_fuidp, vap);
1879303970Savg	zfs_acl_ids_free(&acl_ids);
1880303970Savg	dmu_tx_commit(tx);
1881168404Spjd
1882303970Savg	getnewvnode_drop_reserve();
1883168404Spjd
1884168404Spjdout:
1885303970Savg	if (error == 0) {
1886168962Spjd		*vpp = ZTOV(zp);
1887168404Spjd	}
1888168404Spjd
1889219089Spjd	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1890219089Spjd		zil_commit(zilog, 0);
1891219089Spjd
1892168404Spjd	ZFS_EXIT(zfsvfs);
1893168404Spjd	return (error);
1894168404Spjd}
1895168404Spjd
1896168404Spjd/*
1897168404Spjd * Remove an entry from a directory.
1898168404Spjd *
1899168404Spjd *	IN:	dvp	- vnode of directory to remove entry from.
1900168404Spjd *		name	- name of entry to remove.
1901168404Spjd *		cr	- credentials of caller.
1902185029Spjd *		ct	- caller context
1903185029Spjd *		flags	- case flags
1904168404Spjd *
1905251631Sdelphij *	RETURN:	0 on success, error code on failure.
1906168404Spjd *
1907168404Spjd * Timestamps:
1908168404Spjd *	dvp - ctime|mtime
1909168404Spjd *	 vp - ctime (if nlink > 0)
1910168404Spjd */
1911219089Spjd
1912185029Spjd/*ARGSUSED*/
1913168404Spjdstatic int
1914303970Savgzfs_remove(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr)
1915168404Spjd{
1916303970Savg	znode_t		*dzp = VTOZ(dvp);
1917303970Savg	znode_t		*zp = VTOZ(vp);
1918219089Spjd	znode_t		*xzp;
1919168404Spjd	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
1920185029Spjd	zilog_t		*zilog;
1921168962Spjd	uint64_t	acl_obj, xattr_obj;
1922219089Spjd	uint64_t	obj = 0;
1923168404Spjd	dmu_tx_t	*tx;
1924185029Spjd	boolean_t	unlinked, toobig = FALSE;
1925185029Spjd	uint64_t	txtype;
1926168404Spjd	int		error;
1927168404Spjd
1928168404Spjd	ZFS_ENTER(zfsvfs);
1929185029Spjd	ZFS_VERIFY_ZP(dzp);
1930303970Savg	ZFS_VERIFY_ZP(zp);
1931185029Spjd	zilog = zfsvfs->z_log;
1932303970Savg	zp = VTOZ(vp);
1933168404Spjd
1934219089Spjd	xattr_obj = 0;
1935219089Spjd	xzp = NULL;
1936168404Spjd
1937168962Spjd	if (error = zfs_zaccess_delete(dzp, zp, cr)) {
1938168404Spjd		goto out;
1939168962Spjd	}
1940168404Spjd
1941168962Spjd	/*
1942168962Spjd	 * Need to use rmdir for removing directories.
1943168962Spjd	 */
1944168962Spjd	if (vp->v_type == VDIR) {
1945249195Smm		error = SET_ERROR(EPERM);
1946168962Spjd		goto out;
1947168962Spjd	}
1948168962Spjd
1949185029Spjd	vnevent_remove(vp, dvp, name, ct);
1950168962Spjd
1951303970Savg	obj = zp->z_id;
1952168404Spjd
1953303970Savg	/* are there any extended attributes? */
1954303970Savg	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
1955303970Savg	    &xattr_obj, sizeof (xattr_obj));
1956303970Savg	if (error == 0 && xattr_obj) {
1957303970Savg		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
1958303970Savg		ASSERT0(error);
1959303970Savg	}
1960168962Spjd
1961168404Spjd	/*
1962168404Spjd	 * We may delete the znode now, or we may put it in the unlinked set;
1963168404Spjd	 * it depends on whether we're the last link, and on whether there are
1964168404Spjd	 * other holds on the vnode.  So we dmu_tx_hold() the right things to
1965168404Spjd	 * allow for either case.
1966168404Spjd	 */
1967168404Spjd	tx = dmu_tx_create(zfsvfs->z_os);
1968168404Spjd	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1969219089Spjd	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1970219089Spjd	zfs_sa_upgrade_txholds(tx, zp);
1971219089Spjd	zfs_sa_upgrade_txholds(tx, dzp);
1972168404Spjd
1973303970Savg	if (xzp) {
1974219089Spjd		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1975219089Spjd		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
1976168404Spjd	}
1977168404Spjd
1978168404Spjd	/* charge as an update -- would be nice not to charge at all */
1979168404Spjd	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1980168404Spjd
1981268464Sdelphij	/*
1982294803Smav	 * Mark this transaction as typically resulting in a net free of space
1983268464Sdelphij	 */
1984294803Smav	dmu_tx_mark_netfree(tx);
1985268464Sdelphij
1986303970Savg	error = dmu_tx_assign(tx, TXG_WAIT);
1987168404Spjd	if (error) {
1988168404Spjd		dmu_tx_abort(tx);
1989168404Spjd		ZFS_EXIT(zfsvfs);
1990168404Spjd		return (error);
1991168404Spjd	}
1992168404Spjd
1993168404Spjd	/*
1994168404Spjd	 * Remove the directory entry.
1995168404Spjd	 */
1996303970Savg	error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked);
1997168404Spjd
1998168404Spjd	if (error) {
1999168404Spjd		dmu_tx_commit(tx);
2000168404Spjd		goto out;
2001168404Spjd	}
2002168404Spjd
2003219089Spjd	if (unlinked) {
2004168404Spjd		zfs_unlinked_add(zp, tx);
2005243268Savg		vp->v_vflag |= VV_NOSYNC;
2006168962Spjd	}
2007168404Spjd
2008185029Spjd	txtype = TX_REMOVE;
2009219089Spjd	zfs_log_remove(zilog, tx, txtype, dzp, name, obj);
2010168404Spjd
2011168404Spjd	dmu_tx_commit(tx);
2012168404Spjdout:
2013185029Spjd
2014219089Spjd	if (xzp)
2015303970Savg		vrele(ZTOV(xzp));
2016168962Spjd
2017219089Spjd	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
2018219089Spjd		zil_commit(zilog, 0);
2019219089Spjd
2020168404Spjd	ZFS_EXIT(zfsvfs);
2021168404Spjd	return (error);
2022168404Spjd}
2023168404Spjd
2024168404Spjd/*
2025168404Spjd * Create a new directory and insert it into dvp using the name
2026168404Spjd * provided.  Return a pointer to the inserted directory.
2027168404Spjd *
2028168404Spjd *	IN:	dvp	- vnode of directory to add subdir to.
2029168404Spjd *		dirname	- name of new directory.
2030168404Spjd *		vap	- attributes of new directory.
2031168404Spjd *		cr	- credentials of caller.
2032185029Spjd *		ct	- caller context
2033251631Sdelphij *		flags	- case flags
2034185029Spjd *		vsecp	- ACL to be set
2035168404Spjd *
2036168404Spjd *	OUT:	vpp	- vnode of created directory.
2037168404Spjd *
2038251631Sdelphij *	RETURN:	0 on success, error code on failure.
2039168404Spjd *
2040168404Spjd * Timestamps:
2041168404Spjd *	dvp - ctime|mtime updated
2042168404Spjd *	 vp - ctime|mtime|atime updated
2043168404Spjd */
2044185029Spjd/*ARGSUSED*/
2045168404Spjdstatic int
2046303970Savgzfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr)
2047168404Spjd{
2048168404Spjd	znode_t		*zp, *dzp = VTOZ(dvp);
2049168404Spjd	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
2050185029Spjd	zilog_t		*zilog;
2051185029Spjd	uint64_t	txtype;
2052168404Spjd	dmu_tx_t	*tx;
2053168404Spjd	int		error;
2054209962Smm	ksid_t		*ksid;
2055209962Smm	uid_t		uid;
2056209962Smm	gid_t		gid = crgetgid(cr);
2057219089Spjd	zfs_acl_ids_t   acl_ids;
2058209962Smm	boolean_t	fuid_dirtied;
2059168404Spjd
2060168404Spjd	ASSERT(vap->va_type == VDIR);
2061168404Spjd
2062185029Spjd	/*
2063185029Spjd	 * If we have an ephemeral id, ACL, or XVATTR then
2064185029Spjd	 * make sure file system is at proper version
2065185029Spjd	 */
2066185029Spjd
2067209962Smm	ksid = crgetsid(cr, KSID_OWNER);
2068209962Smm	if (ksid)
2069209962Smm		uid = ksid_getid(ksid);
2070209962Smm	else
2071209962Smm		uid = crgetuid(cr);
2072185029Spjd	if (zfsvfs->z_use_fuids == B_FALSE &&
2073303970Savg	    ((vap->va_mask & AT_XVATTR) ||
2074219089Spjd	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
2075249195Smm		return (SET_ERROR(EINVAL));
2076185029Spjd
2077168404Spjd	ZFS_ENTER(zfsvfs);
2078185029Spjd	ZFS_VERIFY_ZP(dzp);
2079185029Spjd	zilog = zfsvfs->z_log;
2080168404Spjd
2081219089Spjd	if (dzp->z_pflags & ZFS_XATTR) {
2082168404Spjd		ZFS_EXIT(zfsvfs);
2083249195Smm		return (SET_ERROR(EINVAL));
2084168404Spjd	}
2085168404Spjd
2086185029Spjd	if (zfsvfs->z_utf8 && u8_validate(dirname,
2087185029Spjd	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
2088185029Spjd		ZFS_EXIT(zfsvfs);
2089249195Smm		return (SET_ERROR(EILSEQ));
2090185029Spjd	}
2091185029Spjd
2092219089Spjd	if (vap->va_mask & AT_XVATTR) {
2093197861Spjd		if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap,
2094185029Spjd		    crgetuid(cr), cr, vap->va_type)) != 0) {
2095185029Spjd			ZFS_EXIT(zfsvfs);
2096185029Spjd			return (error);
2097185029Spjd		}
2098219089Spjd	}
2099185029Spjd
2100219089Spjd	if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
2101303970Savg	    NULL, &acl_ids)) != 0) {
2102219089Spjd		ZFS_EXIT(zfsvfs);
2103219089Spjd		return (error);
2104219089Spjd	}
2105260704Savg
2106168404Spjd	/*
2107168404Spjd	 * First make sure the new directory doesn't exist.
2108219089Spjd	 *
2109219089Spjd	 * Existence is checked first to make sure we don't return
2110219089Spjd	 * EACCES instead of EEXIST which can cause some applications
2111219089Spjd	 * to fail.
2112168404Spjd	 */
2113185029Spjd	*vpp = NULL;
2114185029Spjd
2115303970Savg	if (error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW)) {
2116219089Spjd		zfs_acl_ids_free(&acl_ids);
2117168404Spjd		ZFS_EXIT(zfsvfs);
2118168404Spjd		return (error);
2119168404Spjd	}
2120303970Savg	ASSERT3P(zp, ==, NULL);
2121168404Spjd
2122185029Spjd	if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) {
2123219089Spjd		zfs_acl_ids_free(&acl_ids);
2124168404Spjd		ZFS_EXIT(zfsvfs);
2125168404Spjd		return (error);
2126168404Spjd	}
2127168404Spjd
2128209962Smm	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
2129211932Smm		zfs_acl_ids_free(&acl_ids);
2130209962Smm		ZFS_EXIT(zfsvfs);
2131249195Smm		return (SET_ERROR(EDQUOT));
2132209962Smm	}
2133209962Smm
2134168404Spjd	/*
2135168404Spjd	 * Add a new entry to the directory.
2136168404Spjd	 */
2137303970Savg	getnewvnode_reserve(1);
2138168404Spjd	tx = dmu_tx_create(zfsvfs->z_os);
2139168404Spjd	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
2140168404Spjd	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
2141209962Smm	fuid_dirtied = zfsvfs->z_fuid_dirty;
2142209962Smm	if (fuid_dirtied)
2143209962Smm		zfs_fuid_txhold(zfsvfs, tx);
2144219089Spjd	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
2145219089Spjd		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
2146219089Spjd		    acl_ids.z_aclp->z_acl_bytes);
2147219089Spjd	}
2148219089Spjd
2149219089Spjd	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
2150219089Spjd	    ZFS_SA_BASE_ATTR_SIZE);
2151219089Spjd
2152303970Savg	error = dmu_tx_assign(tx, TXG_WAIT);
2153168404Spjd	if (error) {
2154219089Spjd		zfs_acl_ids_free(&acl_ids);
2155168404Spjd		dmu_tx_abort(tx);
2156260704Savg		getnewvnode_drop_reserve();
2157168404Spjd		ZFS_EXIT(zfsvfs);
2158168404Spjd		return (error);
2159168404Spjd	}
2160168404Spjd
2161168404Spjd	/*
2162168404Spjd	 * Create new node.
2163168404Spjd	 */
2164219089Spjd	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
2165168404Spjd
2166209962Smm	if (fuid_dirtied)
2167209962Smm		zfs_fuid_sync(zfsvfs, tx);
2168219089Spjd
2169168404Spjd	/*
2170168404Spjd	 * Now put new name in parent dir.
2171168404Spjd	 */
2172303970Savg	(void) zfs_link_create(dzp, dirname, zp, tx, ZNEW);
2173168404Spjd
2174168404Spjd	*vpp = ZTOV(zp);
2175168404Spjd
2176303970Savg	txtype = zfs_log_create_txtype(Z_DIR, NULL, vap);
2177303970Savg	zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL,
2178209962Smm	    acl_ids.z_fuidp, vap);
2179185029Spjd
2180209962Smm	zfs_acl_ids_free(&acl_ids);
2181219089Spjd
2182168404Spjd	dmu_tx_commit(tx);
2183168404Spjd
2184260704Savg	getnewvnode_drop_reserve();
2185260704Savg
2186219089Spjd	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
2187219089Spjd		zil_commit(zilog, 0);
2188219089Spjd
2189168404Spjd	ZFS_EXIT(zfsvfs);
2190168404Spjd	return (0);
2191168404Spjd}
2192168404Spjd
2193168404Spjd/*
2194168404Spjd * Remove a directory subdir entry.  If the current working
2195168404Spjd * directory is the same as the subdir to be removed, the
2196168404Spjd * remove will fail.
2197168404Spjd *
2198168404Spjd *	IN:	dvp	- vnode of directory to remove from.
2199168404Spjd *		name	- name of directory to be removed.
2200168404Spjd *		cwd	- vnode of current working directory.
2201168404Spjd *		cr	- credentials of caller.
2202185029Spjd *		ct	- caller context
2203185029Spjd *		flags	- case flags
2204168404Spjd *
2205251631Sdelphij *	RETURN:	0 on success, error code on failure.
2206168404Spjd *
2207168404Spjd * Timestamps:
2208168404Spjd *	dvp - ctime|mtime updated
2209168404Spjd */
2210185029Spjd/*ARGSUSED*/
2211168404Spjdstatic int
2212303970Savgzfs_rmdir(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr)
2213168404Spjd{
2214168404Spjd	znode_t		*dzp = VTOZ(dvp);
2215303970Savg	znode_t		*zp = VTOZ(vp);
2216168404Spjd	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
2217185029Spjd	zilog_t		*zilog;
2218168404Spjd	dmu_tx_t	*tx;
2219168404Spjd	int		error;
2220168404Spjd
2221168962Spjd	ZFS_ENTER(zfsvfs);
2222185029Spjd	ZFS_VERIFY_ZP(dzp);
2223303970Savg	ZFS_VERIFY_ZP(zp);
2224185029Spjd	zilog = zfsvfs->z_log;
2225168404Spjd
2226168404Spjd
2227168404Spjd	if (error = zfs_zaccess_delete(dzp, zp, cr)) {
2228168404Spjd		goto out;
2229168404Spjd	}
2230168404Spjd
2231168962Spjd	if (vp->v_type != VDIR) {
2232249195Smm		error = SET_ERROR(ENOTDIR);
2233168962Spjd		goto out;
2234168962Spjd	}
2235168962Spjd
2236185029Spjd	vnevent_rmdir(vp, dvp, name, ct);
2237168962Spjd
2238168404Spjd	tx = dmu_tx_create(zfsvfs->z_os);
2239168404Spjd	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
2240219089Spjd	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
2241168404Spjd	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
2242219089Spjd	zfs_sa_upgrade_txholds(tx, zp);
2243219089Spjd	zfs_sa_upgrade_txholds(tx, dzp);
2244304122Savg	dmu_tx_mark_netfree(tx);
2245303970Savg	error = dmu_tx_assign(tx, TXG_WAIT);
2246168404Spjd	if (error) {
2247168404Spjd		dmu_tx_abort(tx);
2248168404Spjd		ZFS_EXIT(zfsvfs);
2249168404Spjd		return (error);
2250168404Spjd	}
2251168404Spjd
2252168404Spjd	cache_purge(dvp);
2253168404Spjd
2254303970Savg	error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL);
2255168404Spjd
2256185029Spjd	if (error == 0) {
2257185029Spjd		uint64_t txtype = TX_RMDIR;
2258219089Spjd		zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT);
2259185029Spjd	}
2260168404Spjd
2261168404Spjd	dmu_tx_commit(tx);
2262168404Spjd
2263168404Spjd	cache_purge(vp);
2264168404Spjdout:
2265219089Spjd	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
2266219089Spjd		zil_commit(zilog, 0);
2267219089Spjd
2268168404Spjd	ZFS_EXIT(zfsvfs);
2269168404Spjd	return (error);
2270168404Spjd}
2271168404Spjd
2272168404Spjd/*
2273168404Spjd * Read as many directory entries as will fit into the provided
2274168404Spjd * buffer from the given directory cursor position (specified in
2275251631Sdelphij * the uio structure).
2276168404Spjd *
2277168404Spjd *	IN:	vp	- vnode of directory to read.
2278168404Spjd *		uio	- structure supplying read location, range info,
2279168404Spjd *			  and return buffer.
2280168404Spjd *		cr	- credentials of caller.
2281185029Spjd *		ct	- caller context
2282185029Spjd *		flags	- case flags
2283168404Spjd *
2284168404Spjd *	OUT:	uio	- updated offset and range, buffer filled.
2285168404Spjd *		eofp	- set to true if end-of-file detected.
2286168404Spjd *
2287251631Sdelphij *	RETURN:	0 on success, error code on failure.
2288168404Spjd *
2289168404Spjd * Timestamps:
2290168404Spjd *	vp - atime updated
2291168404Spjd *
2292168404Spjd * Note that the low 4 bits of the cookie returned by zap is always zero.
2293168404Spjd * This allows us to use the low range for "special" directory entries:
2294168404Spjd * We use 0 for '.', and 1 for '..'.  If this is the root of the filesystem,
2295168404Spjd * we use the offset 2 for the '.zfs' directory.
2296168404Spjd */
2297168404Spjd/* ARGSUSED */
2298168404Spjdstatic int
2299168962Spjdzfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies)
2300168404Spjd{
2301168404Spjd	znode_t		*zp = VTOZ(vp);
2302168404Spjd	iovec_t		*iovp;
2303185029Spjd	edirent_t	*eodp;
2304168404Spjd	dirent64_t	*odp;
2305168404Spjd	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
2306168404Spjd	objset_t	*os;
2307168404Spjd	caddr_t		outbuf;
2308168404Spjd	size_t		bufsize;
2309168404Spjd	zap_cursor_t	zc;
2310168404Spjd	zap_attribute_t	zap;
2311168404Spjd	uint_t		bytes_wanted;
2312168404Spjd	uint64_t	offset; /* must be unsigned; checks for < 1 */
2313219089Spjd	uint64_t	parent;
2314168404Spjd	int		local_eof;
2315168404Spjd	int		outcount;
2316168404Spjd	int		error;
2317168404Spjd	uint8_t		prefetch;
2318185029Spjd	boolean_t	check_sysattrs;
2319168404Spjd	uint8_t		type;
2320168962Spjd	int		ncooks;
2321168962Spjd	u_long		*cooks = NULL;
2322185029Spjd	int		flags = 0;
2323168404Spjd
2324168404Spjd	ZFS_ENTER(zfsvfs);
2325185029Spjd	ZFS_VERIFY_ZP(zp);
2326168404Spjd
2327219089Spjd	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
2328219089Spjd	    &parent, sizeof (parent))) != 0) {
2329219089Spjd		ZFS_EXIT(zfsvfs);
2330219089Spjd		return (error);
2331219089Spjd	}
2332219089Spjd
2333168404Spjd	/*
2334168404Spjd	 * If we are not given an eof variable,
2335168404Spjd	 * use a local one.
2336168404Spjd	 */
2337168404Spjd	if (eofp == NULL)
2338168404Spjd		eofp = &local_eof;
2339168404Spjd
2340168404Spjd	/*
2341168404Spjd	 * Check for valid iov_len.
2342168404Spjd	 */
2343168404Spjd	if (uio->uio_iov->iov_len <= 0) {
2344168404Spjd		ZFS_EXIT(zfsvfs);
2345249195Smm		return (SET_ERROR(EINVAL));
2346168404Spjd	}
2347168404Spjd
2348168404Spjd	/*
2349168404Spjd	 * Quit if directory has been removed (posix)
2350168404Spjd	 */
2351168404Spjd	if ((*eofp = zp->z_unlinked) != 0) {
2352168404Spjd		ZFS_EXIT(zfsvfs);
2353168404Spjd		return (0);
2354168404Spjd	}
2355168404Spjd
2356168404Spjd	error = 0;
2357168404Spjd	os = zfsvfs->z_os;
2358168404Spjd	offset = uio->uio_loffset;
2359168404Spjd	prefetch = zp->z_zn_prefetch;
2360168404Spjd
2361168404Spjd	/*
2362168404Spjd	 * Initialize the iterator cursor.
2363168404Spjd	 */
2364168404Spjd	if (offset <= 3) {
2365168404Spjd		/*
2366168404Spjd		 * Start iteration from the beginning of the directory.
2367168404Spjd		 */
2368168404Spjd		zap_cursor_init(&zc, os, zp->z_id);
2369168404Spjd	} else {
2370168404Spjd		/*
2371168404Spjd		 * The offset is a serialized cursor.
2372168404Spjd		 */
2373168404Spjd		zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
2374168404Spjd	}
2375168404Spjd
2376168404Spjd	/*
2377168404Spjd	 * Get space to change directory entries into fs independent format.
2378168404Spjd	 */
2379168404Spjd	iovp = uio->uio_iov;
2380168404Spjd	bytes_wanted = iovp->iov_len;
2381168404Spjd	if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) {
2382168404Spjd		bufsize = bytes_wanted;
2383168404Spjd		outbuf = kmem_alloc(bufsize, KM_SLEEP);
2384168404Spjd		odp = (struct dirent64 *)outbuf;
2385168404Spjd	} else {
2386168404Spjd		bufsize = bytes_wanted;
2387247187Smm		outbuf = NULL;
2388168404Spjd		odp = (struct dirent64 *)iovp->iov_base;
2389168404Spjd	}
2390185029Spjd	eodp = (struct edirent *)odp;
2391168404Spjd
2392169170Spjd	if (ncookies != NULL) {
2393168404Spjd		/*
2394168404Spjd		 * Minimum entry size is dirent size and 1 byte for a file name.
2395168404Spjd		 */
2396168962Spjd		ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1);
2397219404Spjd		cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK);
2398219404Spjd		*cookies = cooks;
2399168962Spjd		*ncookies = ncooks;
2400168404Spjd	}
2401185029Spjd	/*
2402185029Spjd	 * If this VFS supports the system attribute view interface; and
2403185029Spjd	 * we're looking at an extended attribute directory; and we care
2404185029Spjd	 * about normalization conflicts on this vfs; then we must check
2405185029Spjd	 * for normalization conflicts with the sysattr name space.
2406185029Spjd	 */
2407185029Spjd#ifdef TODO
2408185029Spjd	check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
2409185029Spjd	    (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm &&
2410185029Spjd	    (flags & V_RDDIR_ENTFLAGS);
2411185029Spjd#else
2412185029Spjd	check_sysattrs = 0;
2413185029Spjd#endif
2414168404Spjd
2415168404Spjd	/*
2416168404Spjd	 * Transform to file-system independent format
2417168404Spjd	 */
2418168404Spjd	outcount = 0;
2419168404Spjd	while (outcount < bytes_wanted) {
2420168404Spjd		ino64_t objnum;
2421168404Spjd		ushort_t reclen;
2422219089Spjd		off64_t *next = NULL;
2423168404Spjd
2424168404Spjd		/*
2425168404Spjd		 * Special case `.', `..', and `.zfs'.
2426168404Spjd		 */
2427168404Spjd		if (offset == 0) {
2428168404Spjd			(void) strcpy(zap.za_name, ".");
2429185029Spjd			zap.za_normalization_conflict = 0;
2430168404Spjd			objnum = zp->z_id;
2431169108Spjd			type = DT_DIR;
2432168404Spjd		} else if (offset == 1) {
2433168404Spjd			(void) strcpy(zap.za_name, "..");
2434185029Spjd			zap.za_normalization_conflict = 0;
2435219089Spjd			objnum = parent;
2436169108Spjd			type = DT_DIR;
2437168404Spjd		} else if (offset == 2 && zfs_show_ctldir(zp)) {
2438168404Spjd			(void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
2439185029Spjd			zap.za_normalization_conflict = 0;
2440168404Spjd			objnum = ZFSCTL_INO_ROOT;
2441169108Spjd			type = DT_DIR;
2442168404Spjd		} else {
2443168404Spjd			/*
2444168404Spjd			 * Grab next entry.
2445168404Spjd			 */
2446168404Spjd			if (error = zap_cursor_retrieve(&zc, &zap)) {
2447168404Spjd				if ((*eofp = (error == ENOENT)) != 0)
2448168404Spjd					break;
2449168404Spjd				else
2450168404Spjd					goto update;
2451168404Spjd			}
2452168404Spjd
2453168404Spjd			if (zap.za_integer_length != 8 ||
2454168404Spjd			    zap.za_num_integers != 1) {
2455168404Spjd				cmn_err(CE_WARN, "zap_readdir: bad directory "
2456168404Spjd				    "entry, obj = %lld, offset = %lld\n",
2457168404Spjd				    (u_longlong_t)zp->z_id,
2458168404Spjd				    (u_longlong_t)offset);
2459249195Smm				error = SET_ERROR(ENXIO);
2460168404Spjd				goto update;
2461168404Spjd			}
2462168404Spjd
2463168404Spjd			objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
2464168404Spjd			/*
2465168404Spjd			 * MacOS X can extract the object type here such as:
2466168404Spjd			 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
2467168404Spjd			 */
2468168404Spjd			type = ZFS_DIRENT_TYPE(zap.za_first_integer);
2469185029Spjd
2470185029Spjd			if (check_sysattrs && !zap.za_normalization_conflict) {
2471185029Spjd#ifdef TODO
2472185029Spjd				zap.za_normalization_conflict =
2473185029Spjd				    xattr_sysattr_casechk(zap.za_name);
2474185029Spjd#else
2475185029Spjd				panic("%s:%u: TODO", __func__, __LINE__);
2476185029Spjd#endif
2477185029Spjd			}
2478168404Spjd		}
2479168404Spjd
2480211932Smm		if (flags & V_RDDIR_ACCFILTER) {
2481211932Smm			/*
2482211932Smm			 * If we have no access at all, don't include
2483211932Smm			 * this entry in the returned information
2484211932Smm			 */
2485211932Smm			znode_t	*ezp;
2486211932Smm			if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0)
2487211932Smm				goto skip_entry;
2488211932Smm			if (!zfs_has_access(ezp, cr)) {
2489303970Savg				vrele(ZTOV(ezp));
2490211932Smm				goto skip_entry;
2491211932Smm			}
2492303970Savg			vrele(ZTOV(ezp));
2493211932Smm		}
2494211932Smm
2495185029Spjd		if (flags & V_RDDIR_ENTFLAGS)
2496185029Spjd			reclen = EDIRENT_RECLEN(strlen(zap.za_name));
2497185029Spjd		else
2498185029Spjd			reclen = DIRENT64_RECLEN(strlen(zap.za_name));
2499185029Spjd
2500168404Spjd		/*
2501168404Spjd		 * Will this entry fit in the buffer?
2502168404Spjd		 */
2503168404Spjd		if (outcount + reclen > bufsize) {
2504168404Spjd			/*
2505168404Spjd			 * Did we manage to fit anything in the buffer?
2506168404Spjd			 */
2507168404Spjd			if (!outcount) {
2508249195Smm				error = SET_ERROR(EINVAL);
2509168404Spjd				goto update;
2510168404Spjd			}
2511168404Spjd			break;
2512168404Spjd		}
2513185029Spjd		if (flags & V_RDDIR_ENTFLAGS) {
2514185029Spjd			/*
2515185029Spjd			 * Add extended flag entry:
2516185029Spjd			 */
2517185029Spjd			eodp->ed_ino = objnum;
2518185029Spjd			eodp->ed_reclen = reclen;
2519185029Spjd			/* NOTE: ed_off is the offset for the *next* entry */
2520185029Spjd			next = &(eodp->ed_off);
2521185029Spjd			eodp->ed_eflags = zap.za_normalization_conflict ?
2522185029Spjd			    ED_CASE_CONFLICT : 0;
2523185029Spjd			(void) strncpy(eodp->ed_name, zap.za_name,
2524185029Spjd			    EDIRENT_NAMELEN(reclen));
2525185029Spjd			eodp = (edirent_t *)((intptr_t)eodp + reclen);
2526185029Spjd		} else {
2527185029Spjd			/*
2528185029Spjd			 * Add normal entry:
2529185029Spjd			 */
2530185029Spjd			odp->d_ino = objnum;
2531185029Spjd			odp->d_reclen = reclen;
2532185029Spjd			odp->d_namlen = strlen(zap.za_name);
2533185029Spjd			(void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1);
2534185029Spjd			odp->d_type = type;
2535185029Spjd			odp = (dirent64_t *)((intptr_t)odp + reclen);
2536185029Spjd		}
2537168404Spjd		outcount += reclen;
2538168404Spjd
2539168404Spjd		ASSERT(outcount <= bufsize);
2540168404Spjd
2541168404Spjd		/* Prefetch znode */
2542168404Spjd		if (prefetch)
2543286705Smav			dmu_prefetch(os, objnum, 0, 0, 0,
2544286705Smav			    ZIO_PRIORITY_SYNC_READ);
2545168404Spjd
2546211932Smm	skip_entry:
2547168404Spjd		/*
2548168404Spjd		 * Move to the next entry, fill in the previous offset.
2549168404Spjd		 */
2550168404Spjd		if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
2551168404Spjd			zap_cursor_advance(&zc);
2552168404Spjd			offset = zap_cursor_serialize(&zc);
2553168404Spjd		} else {
2554168404Spjd			offset += 1;
2555168404Spjd		}
2556219404Spjd
2557219404Spjd		if (cooks != NULL) {
2558219404Spjd			*cooks++ = offset;
2559219404Spjd			ncooks--;
2560219404Spjd			KASSERT(ncooks >= 0, ("ncookies=%d", ncooks));
2561219404Spjd		}
2562168404Spjd	}
2563168404Spjd	zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
2564168404Spjd
2565168404Spjd	/* Subtract unused cookies */
2566168962Spjd	if (ncookies != NULL)
2567168962Spjd		*ncookies -= ncooks;
2568168404Spjd
2569168404Spjd	if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) {
2570168404Spjd		iovp->iov_base += outcount;
2571168404Spjd		iovp->iov_len -= outcount;
2572168404Spjd		uio->uio_resid -= outcount;
2573168404Spjd	} else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) {
2574168404Spjd		/*
2575168404Spjd		 * Reset the pointer.
2576168404Spjd		 */
2577168404Spjd		offset = uio->uio_loffset;
2578168404Spjd	}
2579168404Spjd
2580168404Spjdupdate:
2581168404Spjd	zap_cursor_fini(&zc);
2582168404Spjd	if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
2583168404Spjd		kmem_free(outbuf, bufsize);
2584168404Spjd
2585168404Spjd	if (error == ENOENT)
2586168404Spjd		error = 0;
2587168404Spjd
2588168404Spjd	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
2589168404Spjd
2590168404Spjd	uio->uio_loffset = offset;
2591168404Spjd	ZFS_EXIT(zfsvfs);
2592169107Spjd	if (error != 0 && cookies != NULL) {
2593168962Spjd		free(*cookies, M_TEMP);
2594168962Spjd		*cookies = NULL;
2595168962Spjd		*ncookies = 0;
2596168404Spjd	}
2597168404Spjd	return (error);
2598168404Spjd}
2599168404Spjd
2600185029Spjdulong_t zfs_fsync_sync_cnt = 4;
2601185029Spjd
2602168404Spjdstatic int
2603185029Spjdzfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
2604168404Spjd{
2605168962Spjd	znode_t	*zp = VTOZ(vp);
2606168962Spjd	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2607168404Spjd
2608185029Spjd	(void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
2609185029Spjd
2610219089Spjd	if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
2611219089Spjd		ZFS_ENTER(zfsvfs);
2612219089Spjd		ZFS_VERIFY_ZP(zp);
2613219089Spjd		zil_commit(zfsvfs->z_log, zp->z_id);
2614219089Spjd		ZFS_EXIT(zfsvfs);
2615219089Spjd	}
2616168404Spjd	return (0);
2617168404Spjd}
2618168404Spjd
2619185029Spjd
2620168404Spjd/*
2621168404Spjd * Get the requested file attributes and place them in the provided
2622168404Spjd * vattr structure.
2623168404Spjd *
2624168404Spjd *	IN:	vp	- vnode of file.
2625168404Spjd *		vap	- va_mask identifies requested attributes.
2626185029Spjd *			  If AT_XVATTR set, then optional attrs are requested
2627185029Spjd *		flags	- ATTR_NOACLCHECK (CIFS server context)
2628168404Spjd *		cr	- credentials of caller.
2629185029Spjd *		ct	- caller context
2630168404Spjd *
2631168404Spjd *	OUT:	vap	- attribute values.
2632168404Spjd *
2633251631Sdelphij *	RETURN:	0 (always succeeds).
2634168404Spjd */
2635168404Spjd/* ARGSUSED */
2636168404Spjdstatic int
2637185029Spjdzfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
2638185029Spjd    caller_context_t *ct)
2639168404Spjd{
2640168962Spjd	znode_t *zp = VTOZ(vp);
2641168962Spjd	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2642185029Spjd	int	error = 0;
2643168962Spjd	uint32_t blksize;
2644168962Spjd	u_longlong_t nblocks;
2645185029Spjd	uint64_t links;
2646224251Sdelphij	uint64_t mtime[2], ctime[2], crtime[2], rdev;
2647185029Spjd	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
2648185029Spjd	xoptattr_t *xoap = NULL;
2649185029Spjd	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
2650224251Sdelphij	sa_bulk_attr_t bulk[4];
2651219089Spjd	int count = 0;
2652168404Spjd
2653168404Spjd	ZFS_ENTER(zfsvfs);
2654185029Spjd	ZFS_VERIFY_ZP(zp);
2655168404Spjd
2656219089Spjd	zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
2657219089Spjd
2658219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
2659219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
2660243807Sdelphij	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
2661224251Sdelphij	if (vp->v_type == VBLK || vp->v_type == VCHR)
2662224251Sdelphij		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL,
2663224251Sdelphij		    &rdev, 8);
2664219089Spjd
2665219089Spjd	if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
2666219089Spjd		ZFS_EXIT(zfsvfs);
2667219089Spjd		return (error);
2668219089Spjd	}
2669219089Spjd
2670168404Spjd	/*
2671185029Spjd	 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
2672185029Spjd	 * Also, if we are the owner don't bother, since owner should
2673185029Spjd	 * always be allowed to read basic attributes of file.
2674185029Spjd	 */
2675219089Spjd	if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
2676219089Spjd	    (vap->va_uid != crgetuid(cr))) {
2677185029Spjd		if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
2678185029Spjd		    skipaclchk, cr)) {
2679185029Spjd			ZFS_EXIT(zfsvfs);
2680185029Spjd			return (error);
2681185029Spjd		}
2682185029Spjd	}
2683185029Spjd
2684185029Spjd	/*
2685168404Spjd	 * Return all attributes.  It's cheaper to provide the answer
2686168404Spjd	 * than to determine whether we were asked the question.
2687168404Spjd	 */
2688168404Spjd
2689219089Spjd	vap->va_type = IFTOVT(zp->z_mode);
2690219089Spjd	vap->va_mode = zp->z_mode & ~S_IFMT;
2691277300Ssmh#ifdef illumos
2692224252Sdelphij	vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev;
2693224252Sdelphij#else
2694224252Sdelphij	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
2695224252Sdelphij#endif
2696168404Spjd	vap->va_nodeid = zp->z_id;
2697185029Spjd	if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp))
2698219089Spjd		links = zp->z_links + 1;
2699185029Spjd	else
2700219089Spjd		links = zp->z_links;
2701229425Sdim	vap->va_nlink = MIN(links, LINK_MAX);	/* nlink_t limit! */
2702219089Spjd	vap->va_size = zp->z_size;
2703277300Ssmh#ifdef illumos
2704224252Sdelphij	vap->va_rdev = vp->v_rdev;
2705224252Sdelphij#else
2706224251Sdelphij	if (vp->v_type == VBLK || vp->v_type == VCHR)
2707224251Sdelphij		vap->va_rdev = zfs_cmpldev(rdev);
2708224252Sdelphij#endif
2709168404Spjd	vap->va_seq = zp->z_seq;
2710168404Spjd	vap->va_flags = 0;	/* FreeBSD: Reset chflags(2) flags. */
2711272467Saraujo     	vap->va_filerev = zp->z_seq;
2712168404Spjd
2713185029Spjd	/*
2714185029Spjd	 * Add in any requested optional attributes and the create time.
2715185029Spjd	 * Also set the corresponding bits in the returned attribute bitmap.
2716185029Spjd	 */
2717185029Spjd	if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
2718185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
2719185029Spjd			xoap->xoa_archive =
2720219089Spjd			    ((zp->z_pflags & ZFS_ARCHIVE) != 0);
2721185029Spjd			XVA_SET_RTN(xvap, XAT_ARCHIVE);
2722185029Spjd		}
2723185029Spjd
2724185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
2725185029Spjd			xoap->xoa_readonly =
2726219089Spjd			    ((zp->z_pflags & ZFS_READONLY) != 0);
2727185029Spjd			XVA_SET_RTN(xvap, XAT_READONLY);
2728185029Spjd		}
2729185029Spjd
2730185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
2731185029Spjd			xoap->xoa_system =
2732219089Spjd			    ((zp->z_pflags & ZFS_SYSTEM) != 0);
2733185029Spjd			XVA_SET_RTN(xvap, XAT_SYSTEM);
2734185029Spjd		}
2735185029Spjd
2736185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
2737185029Spjd			xoap->xoa_hidden =
2738219089Spjd			    ((zp->z_pflags & ZFS_HIDDEN) != 0);
2739185029Spjd			XVA_SET_RTN(xvap, XAT_HIDDEN);
2740185029Spjd		}
2741185029Spjd
2742185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2743185029Spjd			xoap->xoa_nounlink =
2744219089Spjd			    ((zp->z_pflags & ZFS_NOUNLINK) != 0);
2745185029Spjd			XVA_SET_RTN(xvap, XAT_NOUNLINK);
2746185029Spjd		}
2747185029Spjd
2748185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2749185029Spjd			xoap->xoa_immutable =
2750219089Spjd			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
2751185029Spjd			XVA_SET_RTN(xvap, XAT_IMMUTABLE);
2752185029Spjd		}
2753185029Spjd
2754185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2755185029Spjd			xoap->xoa_appendonly =
2756219089Spjd			    ((zp->z_pflags & ZFS_APPENDONLY) != 0);
2757185029Spjd			XVA_SET_RTN(xvap, XAT_APPENDONLY);
2758185029Spjd		}
2759185029Spjd
2760185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2761185029Spjd			xoap->xoa_nodump =
2762219089Spjd			    ((zp->z_pflags & ZFS_NODUMP) != 0);
2763185029Spjd			XVA_SET_RTN(xvap, XAT_NODUMP);
2764185029Spjd		}
2765185029Spjd
2766185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
2767185029Spjd			xoap->xoa_opaque =
2768219089Spjd			    ((zp->z_pflags & ZFS_OPAQUE) != 0);
2769185029Spjd			XVA_SET_RTN(xvap, XAT_OPAQUE);
2770185029Spjd		}
2771185029Spjd
2772185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2773185029Spjd			xoap->xoa_av_quarantined =
2774219089Spjd			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
2775185029Spjd			XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
2776185029Spjd		}
2777185029Spjd
2778185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2779185029Spjd			xoap->xoa_av_modified =
2780219089Spjd			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
2781185029Spjd			XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
2782185029Spjd		}
2783185029Spjd
2784185029Spjd		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
2785219089Spjd		    vp->v_type == VREG) {
2786219089Spjd			zfs_sa_get_scanstamp(zp, xvap);
2787185029Spjd		}
2788185029Spjd
2789219089Spjd		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2790219089Spjd			xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
2791219089Spjd			XVA_SET_RTN(xvap, XAT_REPARSE);
2792219089Spjd		}
2793219089Spjd		if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
2794219089Spjd			xoap->xoa_generation = zp->z_gen;
2795219089Spjd			XVA_SET_RTN(xvap, XAT_GEN);
2796219089Spjd		}
2797219089Spjd
2798219089Spjd		if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
2799219089Spjd			xoap->xoa_offline =
2800219089Spjd			    ((zp->z_pflags & ZFS_OFFLINE) != 0);
2801219089Spjd			XVA_SET_RTN(xvap, XAT_OFFLINE);
2802219089Spjd		}
2803219089Spjd
2804219089Spjd		if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
2805219089Spjd			xoap->xoa_sparse =
2806219089Spjd			    ((zp->z_pflags & ZFS_SPARSE) != 0);
2807219089Spjd			XVA_SET_RTN(xvap, XAT_SPARSE);
2808219089Spjd		}
2809185029Spjd	}
2810185029Spjd
2811219089Spjd	ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
2812219089Spjd	ZFS_TIME_DECODE(&vap->va_mtime, mtime);
2813219089Spjd	ZFS_TIME_DECODE(&vap->va_ctime, ctime);
2814219089Spjd	ZFS_TIME_DECODE(&vap->va_birthtime, crtime);
2815168404Spjd
2816168404Spjd
2817219089Spjd	sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
2818168404Spjd	vap->va_blksize = blksize;
2819168404Spjd	vap->va_bytes = nblocks << 9;	/* nblocks * 512 */
2820168404Spjd
2821168404Spjd	if (zp->z_blksz == 0) {
2822168404Spjd		/*
2823168404Spjd		 * Block size hasn't been set; suggest maximal I/O transfers.
2824168404Spjd		 */
2825168404Spjd		vap->va_blksize = zfsvfs->z_max_blksz;
2826168404Spjd	}
2827168404Spjd
2828168404Spjd	ZFS_EXIT(zfsvfs);
2829168404Spjd	return (0);
2830168404Spjd}
2831168404Spjd
2832168404Spjd/*
2833168404Spjd * Set the file attributes to the values contained in the
2834168404Spjd * vattr structure.
2835168404Spjd *
2836168404Spjd *	IN:	vp	- vnode of file to be modified.
2837168404Spjd *		vap	- new attribute values.
2838185029Spjd *			  If AT_XVATTR set, then optional attrs are being set
2839168404Spjd *		flags	- ATTR_UTIME set if non-default time values provided.
2840185029Spjd *			- ATTR_NOACLCHECK (CIFS context only).
2841168404Spjd *		cr	- credentials of caller.
2842185029Spjd *		ct	- caller context
2843168404Spjd *
2844251631Sdelphij *	RETURN:	0 on success, error code on failure.
2845168404Spjd *
2846168404Spjd * Timestamps:
2847168404Spjd *	vp - ctime updated, mtime updated if size changed.
2848168404Spjd */
2849168404Spjd/* ARGSUSED */
2850168404Spjdstatic int
2851168962Spjdzfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
2852251631Sdelphij    caller_context_t *ct)
2853168404Spjd{
2854185029Spjd	znode_t		*zp = VTOZ(vp);
2855168404Spjd	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
2856185029Spjd	zilog_t		*zilog;
2857168404Spjd	dmu_tx_t	*tx;
2858168404Spjd	vattr_t		oldva;
2859209962Smm	xvattr_t	tmpxvattr;
2860168962Spjd	uint_t		mask = vap->va_mask;
2861247187Smm	uint_t		saved_mask = 0;
2862197831Spjd	uint64_t	saved_mode;
2863168404Spjd	int		trim_mask = 0;
2864168404Spjd	uint64_t	new_mode;
2865209962Smm	uint64_t	new_uid, new_gid;
2866219089Spjd	uint64_t	xattr_obj;
2867219089Spjd	uint64_t	mtime[2], ctime[2];
2868168404Spjd	znode_t		*attrzp;
2869168404Spjd	int		need_policy = FALSE;
2870219089Spjd	int		err, err2;
2871185029Spjd	zfs_fuid_info_t *fuidp = NULL;
2872185029Spjd	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
2873185029Spjd	xoptattr_t	*xoap;
2874219089Spjd	zfs_acl_t	*aclp;
2875185029Spjd	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
2876219089Spjd	boolean_t	fuid_dirtied = B_FALSE;
2877219089Spjd	sa_bulk_attr_t	bulk[7], xattr_bulk[7];
2878219089Spjd	int		count = 0, xattr_count = 0;
2879168404Spjd
2880168404Spjd	if (mask == 0)
2881168404Spjd		return (0);
2882168404Spjd
2883168962Spjd	if (mask & AT_NOSET)
2884249195Smm		return (SET_ERROR(EINVAL));
2885168962Spjd
2886185029Spjd	ZFS_ENTER(zfsvfs);
2887185029Spjd	ZFS_VERIFY_ZP(zp);
2888185029Spjd
2889185029Spjd	zilog = zfsvfs->z_log;
2890185029Spjd
2891185029Spjd	/*
2892185029Spjd	 * Make sure that if we have ephemeral uid/gid or xvattr specified
2893185029Spjd	 * that file system is at proper version level
2894185029Spjd	 */
2895185029Spjd
2896185029Spjd	if (zfsvfs->z_use_fuids == B_FALSE &&
2897185029Spjd	    (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
2898185029Spjd	    ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
2899185029Spjd	    (mask & AT_XVATTR))) {
2900185029Spjd		ZFS_EXIT(zfsvfs);
2901249195Smm		return (SET_ERROR(EINVAL));
2902185029Spjd	}
2903185029Spjd
2904185029Spjd	if (mask & AT_SIZE && vp->v_type == VDIR) {
2905185029Spjd		ZFS_EXIT(zfsvfs);
2906249195Smm		return (SET_ERROR(EISDIR));
2907185029Spjd	}
2908168404Spjd
2909185029Spjd	if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
2910185029Spjd		ZFS_EXIT(zfsvfs);
2911249195Smm		return (SET_ERROR(EINVAL));
2912185029Spjd	}
2913168404Spjd
2914185029Spjd	/*
2915185029Spjd	 * If this is an xvattr_t, then get a pointer to the structure of
2916185029Spjd	 * optional attributes.  If this is NULL, then we have a vattr_t.
2917185029Spjd	 */
2918185029Spjd	xoap = xva_getxoptattr(xvap);
2919168404Spjd
2920209962Smm	xva_init(&tmpxvattr);
2921209962Smm
2922185029Spjd	/*
2923185029Spjd	 * Immutable files can only alter immutable bit and atime
2924185029Spjd	 */
2925219089Spjd	if ((zp->z_pflags & ZFS_IMMUTABLE) &&
2926185029Spjd	    ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
2927185029Spjd	    ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
2928185029Spjd		ZFS_EXIT(zfsvfs);
2929249195Smm		return (SET_ERROR(EPERM));
2930185029Spjd	}
2931185029Spjd
2932321579Smav	/*
2933321579Smav	 * Note: ZFS_READONLY is handled in zfs_zaccess_common.
2934321579Smav	 */
2935185029Spjd
2936185029Spjd	/*
2937185029Spjd	 * Verify timestamps doesn't overflow 32 bits.
2938185029Spjd	 * ZFS can handle large timestamps, but 32bit syscalls can't
2939185029Spjd	 * handle times greater than 2039.  This check should be removed
2940185029Spjd	 * once large timestamps are fully supported.
2941185029Spjd	 */
2942185029Spjd	if (mask & (AT_ATIME | AT_MTIME)) {
2943185029Spjd		if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
2944185029Spjd		    ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
2945185029Spjd			ZFS_EXIT(zfsvfs);
2946249195Smm			return (SET_ERROR(EOVERFLOW));
2947185029Spjd		}
2948185029Spjd	}
2949316391Sasomers	if (xoap && (mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME) &&
2950316391Sasomers	    TIMESPEC_OVERFLOW(&vap->va_birthtime)) {
2951316391Sasomers		ZFS_EXIT(zfsvfs);
2952316391Sasomers		return (SET_ERROR(EOVERFLOW));
2953316391Sasomers	}
2954185029Spjd
2955168404Spjd	attrzp = NULL;
2956219089Spjd	aclp = NULL;
2957168404Spjd
2958211932Smm	/* Can this be moved to before the top label? */
2959168404Spjd	if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
2960168404Spjd		ZFS_EXIT(zfsvfs);
2961249195Smm		return (SET_ERROR(EROFS));
2962168404Spjd	}
2963168404Spjd
2964168404Spjd	/*
2965168404Spjd	 * First validate permissions
2966168404Spjd	 */
2967168404Spjd
2968168404Spjd	if (mask & AT_SIZE) {
2969168404Spjd		/*
2970168404Spjd		 * XXX - Note, we are not providing any open
2971168404Spjd		 * mode flags here (like FNDELAY), so we may
2972168404Spjd		 * block if there are locks present... this
2973168404Spjd		 * should be addressed in openat().
2974168404Spjd		 */
2975185029Spjd		/* XXX - would it be OK to generate a log record here? */
2976185029Spjd		err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
2977168404Spjd		if (err) {
2978168404Spjd			ZFS_EXIT(zfsvfs);
2979168404Spjd			return (err);
2980168404Spjd		}
2981168404Spjd	}
2982168404Spjd
2983185029Spjd	if (mask & (AT_ATIME|AT_MTIME) ||
2984185029Spjd	    ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
2985185029Spjd	    XVA_ISSET_REQ(xvap, XAT_READONLY) ||
2986185029Spjd	    XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
2987219089Spjd	    XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
2988219089Spjd	    XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
2989185029Spjd	    XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
2990219089Spjd	    XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
2991185029Spjd		need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
2992185029Spjd		    skipaclchk, cr);
2993219089Spjd	}
2994168404Spjd
2995168404Spjd	if (mask & (AT_UID|AT_GID)) {
2996168404Spjd		int	idmask = (mask & (AT_UID|AT_GID));
2997168404Spjd		int	take_owner;
2998168404Spjd		int	take_group;
2999168404Spjd
3000168404Spjd		/*
3001168404Spjd		 * NOTE: even if a new mode is being set,
3002168404Spjd		 * we may clear S_ISUID/S_ISGID bits.
3003168404Spjd		 */
3004168404Spjd
3005168404Spjd		if (!(mask & AT_MODE))
3006219089Spjd			vap->va_mode = zp->z_mode;
3007168404Spjd
3008168404Spjd		/*
3009168404Spjd		 * Take ownership or chgrp to group we are a member of
3010168404Spjd		 */
3011168404Spjd
3012168404Spjd		take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr));
3013185029Spjd		take_group = (mask & AT_GID) &&
3014185029Spjd		    zfs_groupmember(zfsvfs, vap->va_gid, cr);
3015168404Spjd
3016168404Spjd		/*
3017168404Spjd		 * If both AT_UID and AT_GID are set then take_owner and
3018168404Spjd		 * take_group must both be set in order to allow taking
3019168404Spjd		 * ownership.
3020168404Spjd		 *
3021168404Spjd		 * Otherwise, send the check through secpolicy_vnode_setattr()
3022168404Spjd		 *
3023168404Spjd		 */
3024168404Spjd
3025168404Spjd		if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) ||
3026168404Spjd		    ((idmask == AT_UID) && take_owner) ||
3027168404Spjd		    ((idmask == AT_GID) && take_group)) {
3028185029Spjd			if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
3029185029Spjd			    skipaclchk, cr) == 0) {
3030168404Spjd				/*
3031168404Spjd				 * Remove setuid/setgid for non-privileged users
3032168404Spjd				 */
3033185029Spjd				secpolicy_setid_clear(vap, vp, cr);
3034168404Spjd				trim_mask = (mask & (AT_UID|AT_GID));
3035168404Spjd			} else {
3036168404Spjd				need_policy =  TRUE;
3037168404Spjd			}
3038168404Spjd		} else {
3039168404Spjd			need_policy =  TRUE;
3040168404Spjd		}
3041168404Spjd	}
3042168404Spjd
3043219089Spjd	oldva.va_mode = zp->z_mode;
3044185029Spjd	zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
3045185029Spjd	if (mask & AT_XVATTR) {
3046209962Smm		/*
3047209962Smm		 * Update xvattr mask to include only those attributes
3048209962Smm		 * that are actually changing.
3049209962Smm		 *
3050209962Smm		 * the bits will be restored prior to actually setting
3051209962Smm		 * the attributes so the caller thinks they were set.
3052209962Smm		 */
3053209962Smm		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
3054209962Smm			if (xoap->xoa_appendonly !=
3055219089Spjd			    ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
3056209962Smm				need_policy = TRUE;
3057209962Smm			} else {
3058209962Smm				XVA_CLR_REQ(xvap, XAT_APPENDONLY);
3059209962Smm				XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY);
3060209962Smm			}
3061209962Smm		}
3062209962Smm
3063209962Smm		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
3064209962Smm			if (xoap->xoa_nounlink !=
3065219089Spjd			    ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
3066209962Smm				need_policy = TRUE;
3067209962Smm			} else {
3068209962Smm				XVA_CLR_REQ(xvap, XAT_NOUNLINK);
3069209962Smm				XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK);
3070209962Smm			}
3071209962Smm		}
3072209962Smm
3073209962Smm		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
3074209962Smm			if (xoap->xoa_immutable !=
3075219089Spjd			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
3076209962Smm				need_policy = TRUE;
3077209962Smm			} else {
3078209962Smm				XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
3079209962Smm				XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE);
3080209962Smm			}
3081209962Smm		}
3082209962Smm
3083209962Smm		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
3084209962Smm			if (xoap->xoa_nodump !=
3085219089Spjd			    ((zp->z_pflags & ZFS_NODUMP) != 0)) {
3086209962Smm				need_policy = TRUE;
3087209962Smm			} else {
3088209962Smm				XVA_CLR_REQ(xvap, XAT_NODUMP);
3089209962Smm				XVA_SET_REQ(&tmpxvattr, XAT_NODUMP);
3090209962Smm			}
3091209962Smm		}
3092209962Smm
3093209962Smm		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
3094209962Smm			if (xoap->xoa_av_modified !=
3095219089Spjd			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
3096209962Smm				need_policy = TRUE;
3097209962Smm			} else {
3098209962Smm				XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
3099209962Smm				XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED);
3100209962Smm			}
3101209962Smm		}
3102209962Smm
3103209962Smm		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
3104209962Smm			if ((vp->v_type != VREG &&
3105209962Smm			    xoap->xoa_av_quarantined) ||
3106209962Smm			    xoap->xoa_av_quarantined !=
3107219089Spjd			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
3108209962Smm				need_policy = TRUE;
3109209962Smm			} else {
3110209962Smm				XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
3111209962Smm				XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED);
3112209962Smm			}
3113209962Smm		}
3114209962Smm
3115219089Spjd		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
3116219089Spjd			ZFS_EXIT(zfsvfs);
3117249195Smm			return (SET_ERROR(EPERM));
3118219089Spjd		}
3119219089Spjd
3120209962Smm		if (need_policy == FALSE &&
3121209962Smm		    (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
3122209962Smm		    XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
3123185029Spjd			need_policy = TRUE;
3124185029Spjd		}
3125185029Spjd	}
3126185029Spjd
3127168404Spjd	if (mask & AT_MODE) {
3128185029Spjd		if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
3129168962Spjd			err = secpolicy_setid_setsticky_clear(vp, vap,
3130168962Spjd			    &oldva, cr);
3131168962Spjd			if (err) {
3132168962Spjd				ZFS_EXIT(zfsvfs);
3133168962Spjd				return (err);
3134168962Spjd			}
3135168404Spjd			trim_mask |= AT_MODE;
3136168404Spjd		} else {
3137168404Spjd			need_policy = TRUE;
3138168404Spjd		}
3139168404Spjd	}
3140168404Spjd
3141168404Spjd	if (need_policy) {
3142168404Spjd		/*
3143168404Spjd		 * If trim_mask is set then take ownership
3144168404Spjd		 * has been granted or write_acl is present and user
3145168404Spjd		 * has the ability to modify mode.  In that case remove
3146168404Spjd		 * UID|GID and or MODE from mask so that
3147168404Spjd		 * secpolicy_vnode_setattr() doesn't revoke it.
3148168404Spjd		 */
3149168404Spjd
3150168404Spjd		if (trim_mask) {
3151168404Spjd			saved_mask = vap->va_mask;
3152168404Spjd			vap->va_mask &= ~trim_mask;
3153197831Spjd			if (trim_mask & AT_MODE) {
3154197831Spjd				/*
3155197831Spjd				 * Save the mode, as secpolicy_vnode_setattr()
3156197831Spjd				 * will overwrite it with ova.va_mode.
3157197831Spjd				 */
3158197831Spjd				saved_mode = vap->va_mode;
3159197831Spjd			}
3160168404Spjd		}
3161168404Spjd		err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
3162185029Spjd		    (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
3163168404Spjd		if (err) {
3164168404Spjd			ZFS_EXIT(zfsvfs);
3165168404Spjd			return (err);
3166168404Spjd		}
3167168404Spjd
3168197831Spjd		if (trim_mask) {
3169168404Spjd			vap->va_mask |= saved_mask;
3170197831Spjd			if (trim_mask & AT_MODE) {
3171197831Spjd				/*
3172197831Spjd				 * Recover the mode after
3173197831Spjd				 * secpolicy_vnode_setattr().
3174197831Spjd				 */
3175197831Spjd				vap->va_mode = saved_mode;
3176197831Spjd			}
3177197831Spjd		}
3178168404Spjd	}
3179168404Spjd
3180168404Spjd	/*
3181168404Spjd	 * secpolicy_vnode_setattr, or take ownership may have
3182168404Spjd	 * changed va_mask
3183168404Spjd	 */
3184168404Spjd	mask = vap->va_mask;
3185168404Spjd
3186219089Spjd	if ((mask & (AT_UID | AT_GID))) {
3187219089Spjd		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
3188219089Spjd		    &xattr_obj, sizeof (xattr_obj));
3189168404Spjd
3190219089Spjd		if (err == 0 && xattr_obj) {
3191219089Spjd			err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp);
3192306818Savg			if (err == 0) {
3193306818Savg				err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE);
3194306818Savg				if (err != 0)
3195306818Savg					vrele(ZTOV(attrzp));
3196306818Savg			}
3197209962Smm			if (err)
3198219089Spjd				goto out2;
3199168404Spjd		}
3200209962Smm		if (mask & AT_UID) {
3201209962Smm			new_uid = zfs_fuid_create(zfsvfs,
3202209962Smm			    (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
3203219089Spjd			if (new_uid != zp->z_uid &&
3204219089Spjd			    zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) {
3205219089Spjd				if (attrzp)
3206306818Savg					vput(ZTOV(attrzp));
3207249195Smm				err = SET_ERROR(EDQUOT);
3208219089Spjd				goto out2;
3209209962Smm			}
3210209962Smm		}
3211209962Smm
3212209962Smm		if (mask & AT_GID) {
3213209962Smm			new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
3214209962Smm			    cr, ZFS_GROUP, &fuidp);
3215219089Spjd			if (new_gid != zp->z_gid &&
3216219089Spjd			    zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) {
3217219089Spjd				if (attrzp)
3218306818Savg					vput(ZTOV(attrzp));
3219249195Smm				err = SET_ERROR(EDQUOT);
3220219089Spjd				goto out2;
3221209962Smm			}
3222209962Smm		}
3223219089Spjd	}
3224219089Spjd	tx = dmu_tx_create(zfsvfs->z_os);
3225219089Spjd
3226219089Spjd	if (mask & AT_MODE) {
3227219089Spjd		uint64_t pmode = zp->z_mode;
3228219089Spjd		uint64_t acl_obj;
3229219089Spjd		new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
3230219089Spjd
3231243560Smm		if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
3232243560Smm		    !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
3233249195Smm			err = SET_ERROR(EPERM);
3234243560Smm			goto out;
3235243560Smm		}
3236243560Smm
3237224174Smm		if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode))
3238224174Smm			goto out;
3239219089Spjd
3240219089Spjd		if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
3241219089Spjd			/*
3242219089Spjd			 * Are we upgrading ACL from old V0 format
3243219089Spjd			 * to V1 format?
3244219089Spjd			 */
3245219089Spjd			if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
3246219089Spjd			    zfs_znode_acl_version(zp) ==
3247219089Spjd			    ZFS_ACL_VERSION_INITIAL) {
3248219089Spjd				dmu_tx_hold_free(tx, acl_obj, 0,
3249219089Spjd				    DMU_OBJECT_END);
3250219089Spjd				dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
3251219089Spjd				    0, aclp->z_acl_bytes);
3252209962Smm			} else {
3253219089Spjd				dmu_tx_hold_write(tx, acl_obj, 0,
3254219089Spjd				    aclp->z_acl_bytes);
3255209962Smm			}
3256219089Spjd		} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
3257219089Spjd			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
3258219089Spjd			    0, aclp->z_acl_bytes);
3259209962Smm		}
3260219089Spjd		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
3261219089Spjd	} else {
3262219089Spjd		if ((mask & AT_XVATTR) &&
3263219089Spjd		    XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
3264219089Spjd			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
3265219089Spjd		else
3266219089Spjd			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
3267168404Spjd	}
3268168404Spjd
3269219089Spjd	if (attrzp) {
3270219089Spjd		dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
3271219089Spjd	}
3272219089Spjd
3273219089Spjd	fuid_dirtied = zfsvfs->z_fuid_dirty;
3274219089Spjd	if (fuid_dirtied)
3275219089Spjd		zfs_fuid_txhold(zfsvfs, tx);
3276219089Spjd
3277219089Spjd	zfs_sa_upgrade_txholds(tx, zp);
3278219089Spjd
3279258720Savg	err = dmu_tx_assign(tx, TXG_WAIT);
3280258720Savg	if (err)
3281209962Smm		goto out;
3282168404Spjd
3283219089Spjd	count = 0;
3284168404Spjd	/*
3285168404Spjd	 * Set each attribute requested.
3286168404Spjd	 * We group settings according to the locks they need to acquire.
3287168404Spjd	 *
3288168404Spjd	 * Note: you cannot set ctime directly, although it will be
3289168404Spjd	 * updated as a side-effect of calling this function.
3290168404Spjd	 */
3291168404Spjd
3292219089Spjd	if (mask & (AT_UID|AT_GID|AT_MODE))
3293219089Spjd		mutex_enter(&zp->z_acl_lock);
3294168404Spjd
3295219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
3296219089Spjd	    &zp->z_pflags, sizeof (zp->z_pflags));
3297219089Spjd
3298219089Spjd	if (attrzp) {
3299219089Spjd		if (mask & (AT_UID|AT_GID|AT_MODE))
3300219089Spjd			mutex_enter(&attrzp->z_acl_lock);
3301219089Spjd		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
3302219089Spjd		    SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
3303219089Spjd		    sizeof (attrzp->z_pflags));
3304219089Spjd	}
3305219089Spjd
3306219089Spjd	if (mask & (AT_UID|AT_GID)) {
3307219089Spjd
3308219089Spjd		if (mask & AT_UID) {
3309219089Spjd			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
3310219089Spjd			    &new_uid, sizeof (new_uid));
3311219089Spjd			zp->z_uid = new_uid;
3312219089Spjd			if (attrzp) {
3313219089Spjd				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
3314219089Spjd				    SA_ZPL_UID(zfsvfs), NULL, &new_uid,
3315219089Spjd				    sizeof (new_uid));
3316219089Spjd				attrzp->z_uid = new_uid;
3317219089Spjd			}
3318219089Spjd		}
3319219089Spjd
3320219089Spjd		if (mask & AT_GID) {
3321219089Spjd			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
3322219089Spjd			    NULL, &new_gid, sizeof (new_gid));
3323219089Spjd			zp->z_gid = new_gid;
3324219089Spjd			if (attrzp) {
3325219089Spjd				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
3326219089Spjd				    SA_ZPL_GID(zfsvfs), NULL, &new_gid,
3327219089Spjd				    sizeof (new_gid));
3328219089Spjd				attrzp->z_gid = new_gid;
3329219089Spjd			}
3330219089Spjd		}
3331219089Spjd		if (!(mask & AT_MODE)) {
3332219089Spjd			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
3333219089Spjd			    NULL, &new_mode, sizeof (new_mode));
3334219089Spjd			new_mode = zp->z_mode;
3335219089Spjd		}
3336219089Spjd		err = zfs_acl_chown_setattr(zp);
3337219089Spjd		ASSERT(err == 0);
3338219089Spjd		if (attrzp) {
3339219089Spjd			err = zfs_acl_chown_setattr(attrzp);
3340219089Spjd			ASSERT(err == 0);
3341219089Spjd		}
3342219089Spjd	}
3343219089Spjd
3344168404Spjd	if (mask & AT_MODE) {
3345219089Spjd		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
3346219089Spjd		    &new_mode, sizeof (new_mode));
3347219089Spjd		zp->z_mode = new_mode;
3348219089Spjd		ASSERT3U((uintptr_t)aclp, !=, 0);
3349209962Smm		err = zfs_aclset_common(zp, aclp, cr, tx);
3350240415Smm		ASSERT0(err);
3351219089Spjd		if (zp->z_acl_cached)
3352219089Spjd			zfs_acl_free(zp->z_acl_cached);
3353211932Smm		zp->z_acl_cached = aclp;
3354211932Smm		aclp = NULL;
3355168404Spjd	}
3356168404Spjd
3357168404Spjd
3358219089Spjd	if (mask & AT_ATIME) {
3359219089Spjd		ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
3360219089Spjd		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
3361219089Spjd		    &zp->z_atime, sizeof (zp->z_atime));
3362168404Spjd	}
3363168404Spjd
3364219089Spjd	if (mask & AT_MTIME) {
3365219089Spjd		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
3366219089Spjd		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
3367219089Spjd		    mtime, sizeof (mtime));
3368168404Spjd	}
3369168404Spjd
3370185029Spjd	/* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
3371219089Spjd	if (mask & AT_SIZE && !(mask & AT_MTIME)) {
3372219089Spjd		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
3373219089Spjd		    NULL, mtime, sizeof (mtime));
3374219089Spjd		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
3375219089Spjd		    &ctime, sizeof (ctime));
3376219089Spjd		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
3377219089Spjd		    B_TRUE);
3378219089Spjd	} else if (mask != 0) {
3379219089Spjd		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
3380219089Spjd		    &ctime, sizeof (ctime));
3381219089Spjd		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
3382219089Spjd		    B_TRUE);
3383219089Spjd		if (attrzp) {
3384219089Spjd			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
3385219089Spjd			    SA_ZPL_CTIME(zfsvfs), NULL,
3386219089Spjd			    &ctime, sizeof (ctime));
3387219089Spjd			zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
3388219089Spjd			    mtime, ctime, B_TRUE);
3389219089Spjd		}
3390219089Spjd	}
3391185029Spjd	/*
3392185029Spjd	 * Do this after setting timestamps to prevent timestamp
3393185029Spjd	 * update from toggling bit
3394185029Spjd	 */
3395168404Spjd
3396185029Spjd	if (xoap && (mask & AT_XVATTR)) {
3397209962Smm
3398316391Sasomers		if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
3399316391Sasomers			xoap->xoa_createtime = vap->va_birthtime;
3400209962Smm		/*
3401209962Smm		 * restore trimmed off masks
3402209962Smm		 * so that return masks can be set for caller.
3403209962Smm		 */
3404209962Smm
3405209962Smm		if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) {
3406209962Smm			XVA_SET_REQ(xvap, XAT_APPENDONLY);
3407209962Smm		}
3408209962Smm		if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) {
3409209962Smm			XVA_SET_REQ(xvap, XAT_NOUNLINK);
3410209962Smm		}
3411209962Smm		if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) {
3412209962Smm			XVA_SET_REQ(xvap, XAT_IMMUTABLE);
3413209962Smm		}
3414209962Smm		if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) {
3415209962Smm			XVA_SET_REQ(xvap, XAT_NODUMP);
3416209962Smm		}
3417209962Smm		if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) {
3418209962Smm			XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
3419209962Smm		}
3420209962Smm		if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) {
3421209962Smm			XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
3422209962Smm		}
3423209962Smm
3424219089Spjd		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
3425185029Spjd			ASSERT(vp->v_type == VREG);
3426185029Spjd
3427219089Spjd		zfs_xvattr_set(zp, xvap, tx);
3428185029Spjd	}
3429185029Spjd
3430209962Smm	if (fuid_dirtied)
3431209962Smm		zfs_fuid_sync(zfsvfs, tx);
3432209962Smm
3433168404Spjd	if (mask != 0)
3434185029Spjd		zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
3435168404Spjd
3436219089Spjd	if (mask & (AT_UID|AT_GID|AT_MODE))
3437219089Spjd		mutex_exit(&zp->z_acl_lock);
3438168404Spjd
3439219089Spjd	if (attrzp) {
3440219089Spjd		if (mask & (AT_UID|AT_GID|AT_MODE))
3441219089Spjd			mutex_exit(&attrzp->z_acl_lock);
3442219089Spjd	}
3443209962Smmout:
3444219089Spjd	if (err == 0 && attrzp) {
3445219089Spjd		err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
3446219089Spjd		    xattr_count, tx);
3447219089Spjd		ASSERT(err2 == 0);
3448219089Spjd	}
3449219089Spjd
3450168404Spjd	if (attrzp)
3451306818Savg		vput(ZTOV(attrzp));
3452251631Sdelphij
3453211932Smm	if (aclp)
3454209962Smm		zfs_acl_free(aclp);
3455168404Spjd
3456209962Smm	if (fuidp) {
3457209962Smm		zfs_fuid_info_free(fuidp);
3458209962Smm		fuidp = NULL;
3459209962Smm	}
3460209962Smm
3461219089Spjd	if (err) {
3462209962Smm		dmu_tx_abort(tx);
3463219089Spjd	} else {
3464219089Spjd		err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
3465209962Smm		dmu_tx_commit(tx);
3466219089Spjd	}
3467209962Smm
3468219089Spjdout2:
3469219089Spjd	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3470219089Spjd		zil_commit(zilog, 0);
3471209962Smm
3472168404Spjd	ZFS_EXIT(zfsvfs);
3473168404Spjd	return (err);
3474168404Spjd}
3475168404Spjd
3476168404Spjd/*
3477303970Savg * We acquire all but fdvp locks using non-blocking acquisitions.  If we
3478303970Savg * fail to acquire any lock in the path we will drop all held locks,
3479303970Savg * acquire the new lock in a blocking fashion, and then release it and
3480303970Savg * restart the rename.  This acquire/release step ensures that we do not
3481303970Savg * spin on a lock waiting for release.  On error release all vnode locks
3482303970Savg * and decrement references the way tmpfs_rename() would do.
3483168404Spjd */
3484303970Savgstatic int
3485303970Savgzfs_rename_relock(struct vnode *sdvp, struct vnode **svpp,
3486303970Savg    struct vnode *tdvp, struct vnode **tvpp,
3487303970Savg    const struct componentname *scnp, const struct componentname *tcnp)
3488168404Spjd{
3489303970Savg	zfsvfs_t	*zfsvfs;
3490303970Savg	struct vnode	*nvp, *svp, *tvp;
3491303970Savg	znode_t		*sdzp, *tdzp, *szp, *tzp;
3492303970Savg	const char	*snm = scnp->cn_nameptr;
3493303970Savg	const char	*tnm = tcnp->cn_nameptr;
3494303970Savg	int error;
3495168404Spjd
3496303970Savg	VOP_UNLOCK(tdvp, 0);
3497303970Savg	if (*tvpp != NULL && *tvpp != tdvp)
3498303970Savg		VOP_UNLOCK(*tvpp, 0);
3499303970Savg
3500303970Savgrelock:
3501303970Savg	error = vn_lock(sdvp, LK_EXCLUSIVE);
3502303970Savg	if (error)
3503303970Savg		goto out;
3504303970Savg	sdzp = VTOZ(sdvp);
3505303970Savg
3506303970Savg	error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
3507303970Savg	if (error != 0) {
3508303970Savg		VOP_UNLOCK(sdvp, 0);
3509303970Savg		if (error != EBUSY)
3510303970Savg			goto out;
3511303970Savg		error = vn_lock(tdvp, LK_EXCLUSIVE);
3512303970Savg		if (error)
3513303970Savg			goto out;
3514303970Savg		VOP_UNLOCK(tdvp, 0);
3515303970Savg		goto relock;
3516168404Spjd	}
3517303970Savg	tdzp = VTOZ(tdvp);
3518168404Spjd
3519303970Savg	/*
3520303970Savg	 * Before using sdzp and tdzp we must ensure that they are live.
3521303970Savg	 * As a porting legacy from illumos we have two things to worry
3522303970Savg	 * about.  One is typical for FreeBSD and it is that the vnode is
3523303970Savg	 * not reclaimed (doomed).  The other is that the znode is live.
3524303970Savg	 * The current code can invalidate the znode without acquiring the
3525303970Savg	 * corresponding vnode lock if the object represented by the znode
3526303970Savg	 * and vnode is no longer valid after a rollback or receive operation.
3527303970Savg	 * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock
3528303970Savg	 * that protects the znodes from the invalidation.
3529303970Savg	 */
3530303970Savg	zfsvfs = sdzp->z_zfsvfs;
3531303970Savg	ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs);
3532303970Savg	ZFS_ENTER(zfsvfs);
3533168404Spjd
3534168404Spjd	/*
3535303970Savg	 * We can not use ZFS_VERIFY_ZP() here because it could directly return
3536303970Savg	 * bypassing the cleanup code in the case of an error.
3537168404Spjd	 */
3538303970Savg	if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) {
3539303970Savg		ZFS_EXIT(zfsvfs);
3540303970Savg		VOP_UNLOCK(sdvp, 0);
3541303970Savg		VOP_UNLOCK(tdvp, 0);
3542303970Savg		error = SET_ERROR(EIO);
3543303970Savg		goto out;
3544303970Savg	}
3545303970Savg
3546303970Savg	/*
3547303970Savg	 * Re-resolve svp to be certain it still exists and fetch the
3548303970Savg	 * correct vnode.
3549303970Savg	 */
3550303970Savg	error = zfs_dirent_lookup(sdzp, snm, &szp, ZEXISTS);
3551303970Savg	if (error != 0) {
3552303970Savg		/* Source entry invalid or not there. */
3553303970Savg		ZFS_EXIT(zfsvfs);
3554303970Savg		VOP_UNLOCK(sdvp, 0);
3555303970Savg		VOP_UNLOCK(tdvp, 0);
3556303970Savg		if ((scnp->cn_flags & ISDOTDOT) != 0 ||
3557303970Savg		    (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.'))
3558303970Savg			error = SET_ERROR(EINVAL);
3559303970Savg		goto out;
3560303970Savg	}
3561303970Savg	svp = ZTOV(szp);
3562303970Savg
3563303970Savg	/*
3564303970Savg	 * Re-resolve tvp, if it disappeared we just carry on.
3565303970Savg	 */
3566303970Savg	error = zfs_dirent_lookup(tdzp, tnm, &tzp, 0);
3567303970Savg	if (error != 0) {
3568303970Savg		ZFS_EXIT(zfsvfs);
3569303970Savg		VOP_UNLOCK(sdvp, 0);
3570303970Savg		VOP_UNLOCK(tdvp, 0);
3571303970Savg		vrele(svp);
3572303970Savg		if ((tcnp->cn_flags & ISDOTDOT) != 0)
3573303970Savg			error = SET_ERROR(EINVAL);
3574303970Savg		goto out;
3575303970Savg	}
3576303970Savg	if (tzp != NULL)
3577303970Savg		tvp = ZTOV(tzp);
3578303970Savg	else
3579303970Savg		tvp = NULL;
3580303970Savg
3581303970Savg	/*
3582303970Savg	 * At present the vnode locks must be acquired before z_teardown_lock,
3583303970Savg	 * although it would be more logical to use the opposite order.
3584303970Savg	 */
3585303970Savg	ZFS_EXIT(zfsvfs);
3586303970Savg
3587303970Savg	/*
3588303970Savg	 * Now try acquire locks on svp and tvp.
3589303970Savg	 */
3590303970Savg	nvp = svp;
3591303970Savg	error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
3592303970Savg	if (error != 0) {
3593303970Savg		VOP_UNLOCK(sdvp, 0);
3594303970Savg		VOP_UNLOCK(tdvp, 0);
3595303970Savg		if (tvp != NULL)
3596303970Savg			vrele(tvp);
3597303970Savg		if (error != EBUSY) {
3598303970Savg			vrele(nvp);
3599303970Savg			goto out;
3600303970Savg		}
3601303970Savg		error = vn_lock(nvp, LK_EXCLUSIVE);
3602303970Savg		if (error != 0) {
3603303970Savg			vrele(nvp);
3604303970Savg			goto out;
3605303970Savg		}
3606303970Savg		VOP_UNLOCK(nvp, 0);
3607303970Savg		/*
3608303970Savg		 * Concurrent rename race.
3609303970Savg		 * XXX ?
3610303970Savg		 */
3611303970Savg		if (nvp == tdvp) {
3612303970Savg			vrele(nvp);
3613303970Savg			error = SET_ERROR(EINVAL);
3614303970Savg			goto out;
3615303970Savg		}
3616303970Savg		vrele(*svpp);
3617303970Savg		*svpp = nvp;
3618303970Savg		goto relock;
3619303970Savg	}
3620303970Savg	vrele(*svpp);
3621303970Savg	*svpp = nvp;
3622303970Savg
3623303970Savg	if (*tvpp != NULL)
3624303970Savg		vrele(*tvpp);
3625303970Savg	*tvpp = NULL;
3626303970Savg	if (tvp != NULL) {
3627303970Savg		nvp = tvp;
3628303970Savg		error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
3629303970Savg		if (error != 0) {
3630303970Savg			VOP_UNLOCK(sdvp, 0);
3631303970Savg			VOP_UNLOCK(tdvp, 0);
3632303970Savg			VOP_UNLOCK(*svpp, 0);
3633303970Savg			if (error != EBUSY) {
3634303970Savg				vrele(nvp);
3635303970Savg				goto out;
3636168404Spjd			}
3637303970Savg			error = vn_lock(nvp, LK_EXCLUSIVE);
3638303970Savg			if (error != 0) {
3639303970Savg				vrele(nvp);
3640303970Savg				goto out;
3641303970Savg			}
3642303970Savg			vput(nvp);
3643303970Savg			goto relock;
3644168404Spjd		}
3645303970Savg		*tvpp = nvp;
3646303970Savg	}
3647168404Spjd
3648303970Savg	return (0);
3649168404Spjd
3650303970Savgout:
3651303970Savg	return (error);
3652303970Savg}
3653168404Spjd
3654303970Savg/*
3655303970Savg * Note that we must use VRELE_ASYNC in this function as it walks
3656303970Savg * up the directory tree and vrele may need to acquire an exclusive
3657303970Savg * lock if a last reference to a vnode is dropped.
3658303970Savg */
3659303970Savgstatic int
3660303970Savgzfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp)
3661303970Savg{
3662303970Savg	zfsvfs_t	*zfsvfs;
3663303970Savg	znode_t		*zp, *zp1;
3664303970Savg	uint64_t	parent;
3665303970Savg	int		error;
3666168404Spjd
3667303970Savg	zfsvfs = tdzp->z_zfsvfs;
3668303970Savg	if (tdzp == szp)
3669303970Savg		return (SET_ERROR(EINVAL));
3670303970Savg	if (tdzp == sdzp)
3671303970Savg		return (0);
3672303970Savg	if (tdzp->z_id == zfsvfs->z_root)
3673303970Savg		return (0);
3674303970Savg	zp = tdzp;
3675303970Savg	for (;;) {
3676303970Savg		ASSERT(!zp->z_unlinked);
3677303970Savg		if ((error = sa_lookup(zp->z_sa_hdl,
3678303970Savg		    SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
3679303970Savg			break;
3680303970Savg
3681303970Savg		if (parent == szp->z_id) {
3682303970Savg			error = SET_ERROR(EINVAL);
3683303970Savg			break;
3684168404Spjd		}
3685303970Savg		if (parent == zfsvfs->z_root)
3686303970Savg			break;
3687303970Savg		if (parent == sdzp->z_id)
3688303970Savg			break;
3689168404Spjd
3690303970Savg		error = zfs_zget(zfsvfs, parent, &zp1);
3691303970Savg		if (error != 0)
3692303970Savg			break;
3693168404Spjd
3694303970Savg		if (zp != tdzp)
3695303970Savg			VN_RELE_ASYNC(ZTOV(zp),
3696303970Savg			    dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
3697303970Savg		zp = zp1;
3698303970Savg	}
3699303970Savg
3700303970Savg	if (error == ENOTDIR)
3701303970Savg		panic("checkpath: .. not a directory\n");
3702303970Savg	if (zp != tdzp)
3703303970Savg		VN_RELE_ASYNC(ZTOV(zp),
3704303970Savg		    dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
3705303970Savg	return (error);
3706168404Spjd}
3707168404Spjd
3708168404Spjd/*
3709168404Spjd * Move an entry from the provided source directory to the target
3710168404Spjd * directory.  Change the entry name as indicated.
3711168404Spjd *
3712168404Spjd *	IN:	sdvp	- Source directory containing the "old entry".
3713168404Spjd *		snm	- Old entry name.
3714168404Spjd *		tdvp	- Target directory to contain the "new entry".
3715168404Spjd *		tnm	- New entry name.
3716168404Spjd *		cr	- credentials of caller.
3717185029Spjd *		ct	- caller context
3718185029Spjd *		flags	- case flags
3719168404Spjd *
3720251631Sdelphij *	RETURN:	0 on success, error code on failure.
3721168404Spjd *
3722168404Spjd * Timestamps:
3723168404Spjd *	sdvp,tdvp - ctime|mtime updated
3724168404Spjd */
3725185029Spjd/*ARGSUSED*/
3726168404Spjdstatic int
3727303970Savgzfs_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3728303970Savg    vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3729303970Savg    cred_t *cr)
3730168404Spjd{
3731303970Savg	zfsvfs_t	*zfsvfs;
3732303970Savg	znode_t		*sdzp, *tdzp, *szp, *tzp;
3733303970Savg	zilog_t		*zilog = NULL;
3734168404Spjd	dmu_tx_t	*tx;
3735303970Savg	char		*snm = scnp->cn_nameptr;
3736303970Savg	char		*tnm = tcnp->cn_nameptr;
3737185029Spjd	int		error = 0;
3738168404Spjd
3739303970Savg	/* Reject renames across filesystems. */
3740303970Savg	if ((*svpp)->v_mount != tdvp->v_mount ||
3741303970Savg	    ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) {
3742303970Savg		error = SET_ERROR(EXDEV);
3743303970Savg		goto out;
3744303970Savg	}
3745168404Spjd
3746303970Savg	if (zfsctl_is_node(tdvp)) {
3747303970Savg		error = SET_ERROR(EXDEV);
3748303970Savg		goto out;
3749303970Savg	}
3750303970Savg
3751168962Spjd	/*
3752303970Savg	 * Lock all four vnodes to ensure safety and semantics of renaming.
3753168962Spjd	 */
3754303970Savg	error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp);
3755303970Savg	if (error != 0) {
3756303970Savg		/* no vnodes are locked in the case of error here */
3757303970Savg		return (error);
3758264392Sdavide	}
3759168962Spjd
3760303970Savg	tdzp = VTOZ(tdvp);
3761303970Savg	sdzp = VTOZ(sdvp);
3762303970Savg	zfsvfs = tdzp->z_zfsvfs;
3763303970Savg	zilog = zfsvfs->z_log;
3764303970Savg
3765254585Sdelphij	/*
3766303970Savg	 * After we re-enter ZFS_ENTER() we will have to revalidate all
3767303970Savg	 * znodes involved.
3768254585Sdelphij	 */
3769303970Savg	ZFS_ENTER(zfsvfs);
3770168404Spjd
3771185029Spjd	if (zfsvfs->z_utf8 && u8_validate(tnm,
3772185029Spjd	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3773303970Savg		error = SET_ERROR(EILSEQ);
3774303970Savg		goto unlockout;
3775185029Spjd	}
3776185029Spjd
3777303970Savg	/* If source and target are the same file, there is nothing to do. */
3778303970Savg	if ((*svpp) == (*tvpp)) {
3779303970Savg		error = 0;
3780303970Savg		goto unlockout;
3781303970Savg	}
3782185029Spjd
3783303970Savg	if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) ||
3784303970Savg	    ((*tvpp) != NULL && (*tvpp)->v_type == VDIR &&
3785303970Savg	    (*tvpp)->v_mountedhere != NULL)) {
3786303970Savg		error = SET_ERROR(EXDEV);
3787303970Savg		goto unlockout;
3788303970Savg	}
3789168404Spjd
3790168404Spjd	/*
3791303970Savg	 * We can not use ZFS_VERIFY_ZP() here because it could directly return
3792303970Savg	 * bypassing the cleanup code in the case of an error.
3793168404Spjd	 */
3794303970Savg	if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) {
3795303970Savg		error = SET_ERROR(EIO);
3796303970Savg		goto unlockout;
3797168404Spjd	}
3798168404Spjd
3799303970Savg	szp = VTOZ(*svpp);
3800303970Savg	tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp);
3801303970Savg	if (szp->z_sa_hdl == NULL || (tzp != NULL && tzp->z_sa_hdl == NULL)) {
3802303970Savg		error = SET_ERROR(EIO);
3803303970Savg		goto unlockout;
3804168962Spjd	}
3805185029Spjd
3806208131Smm	/*
3807303970Savg	 * This is to prevent the creation of links into attribute space
3808303970Savg	 * by renaming a linked file into/outof an attribute directory.
3809303970Savg	 * See the comment in zfs_link() for why this is considered bad.
3810208131Smm	 */
3811303970Savg	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
3812303970Savg		error = SET_ERROR(EINVAL);
3813303970Savg		goto unlockout;
3814208131Smm	}
3815208131Smm
3816168404Spjd	/*
3817168404Spjd	 * Must have write access at the source to remove the old entry
3818168404Spjd	 * and write access at the target to create the new entry.
3819168404Spjd	 * Note that if target and source are the same, this can be
3820168404Spjd	 * done in a single check.
3821168404Spjd	 */
3822168404Spjd	if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr))
3823303970Savg		goto unlockout;
3824168404Spjd
3825303970Savg	if ((*svpp)->v_type == VDIR) {
3826168404Spjd		/*
3827303970Savg		 * Avoid ".", "..", and aliases of "." for obvious reasons.
3828303970Savg		 */
3829303970Savg		if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') ||
3830303970Savg		    sdzp == szp ||
3831303970Savg		    (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
3832303970Savg			error = EINVAL;
3833303970Savg			goto unlockout;
3834303970Savg		}
3835303970Savg
3836303970Savg		/*
3837168404Spjd		 * Check to make sure rename is valid.
3838168404Spjd		 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
3839168404Spjd		 */
3840303970Savg		if (error = zfs_rename_check(szp, sdzp, tdzp))
3841303970Savg			goto unlockout;
3842168404Spjd	}
3843168404Spjd
3844168404Spjd	/*
3845168404Spjd	 * Does target exist?
3846168404Spjd	 */
3847168404Spjd	if (tzp) {
3848168404Spjd		/*
3849168404Spjd		 * Source and target must be the same type.
3850168404Spjd		 */
3851303970Savg		if ((*svpp)->v_type == VDIR) {
3852303970Savg			if ((*tvpp)->v_type != VDIR) {
3853249195Smm				error = SET_ERROR(ENOTDIR);
3854303970Savg				goto unlockout;
3855303970Savg			} else {
3856303970Savg				cache_purge(tdvp);
3857303970Savg				if (sdvp != tdvp)
3858303970Savg					cache_purge(sdvp);
3859168404Spjd			}
3860168404Spjd		} else {
3861303970Savg			if ((*tvpp)->v_type == VDIR) {
3862249195Smm				error = SET_ERROR(EISDIR);
3863303970Savg				goto unlockout;
3864168404Spjd			}
3865168404Spjd		}
3866168404Spjd	}
3867168404Spjd
3868303970Savg	vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct);
3869168962Spjd	if (tzp)
3870303970Savg		vnevent_rename_dest(*tvpp, tdvp, tnm, ct);
3871168962Spjd
3872185029Spjd	/*
3873185029Spjd	 * notify the target directory if it is not the same
3874185029Spjd	 * as source directory.
3875185029Spjd	 */
3876185029Spjd	if (tdvp != sdvp) {
3877185029Spjd		vnevent_rename_dest_dir(tdvp, ct);
3878185029Spjd	}
3879185029Spjd
3880168404Spjd	tx = dmu_tx_create(zfsvfs->z_os);
3881219089Spjd	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3882219089Spjd	dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
3883168404Spjd	dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
3884168404Spjd	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
3885219089Spjd	if (sdzp != tdzp) {
3886219089Spjd		dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
3887219089Spjd		zfs_sa_upgrade_txholds(tx, tdzp);
3888219089Spjd	}
3889219089Spjd	if (tzp) {
3890219089Spjd		dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
3891219089Spjd		zfs_sa_upgrade_txholds(tx, tzp);
3892219089Spjd	}
3893219089Spjd
3894219089Spjd	zfs_sa_upgrade_txholds(tx, szp);
3895168404Spjd	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
3896303970Savg	error = dmu_tx_assign(tx, TXG_WAIT);
3897168404Spjd	if (error) {
3898168404Spjd		dmu_tx_abort(tx);
3899303970Savg		goto unlockout;
3900168404Spjd	}
3901168404Spjd
3902303970Savg
3903168404Spjd	if (tzp)	/* Attempt to remove the existing target */
3904303970Savg		error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL);
3905168404Spjd
3906168404Spjd	if (error == 0) {
3907303970Savg		error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING);
3908168404Spjd		if (error == 0) {
3909219089Spjd			szp->z_pflags |= ZFS_AV_MODIFIED;
3910185029Spjd
3911219089Spjd			error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
3912219089Spjd			    (void *)&szp->z_pflags, sizeof (uint64_t), tx);
3913240415Smm			ASSERT0(error);
3914219089Spjd
3915303970Savg			error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING,
3916303970Savg			    NULL);
3917219089Spjd			if (error == 0) {
3918303970Savg				zfs_log_rename(zilog, tx, TX_RENAME, sdzp,
3919303970Savg				    snm, tdzp, tnm, szp);
3920185029Spjd
3921219089Spjd				/*
3922219089Spjd				 * Update path information for the target vnode
3923219089Spjd				 */
3924303970Savg				vn_renamepath(tdvp, *svpp, tnm, strlen(tnm));
3925219089Spjd			} else {
3926219089Spjd				/*
3927219089Spjd				 * At this point, we have successfully created
3928219089Spjd				 * the target name, but have failed to remove
3929219089Spjd				 * the source name.  Since the create was done
3930219089Spjd				 * with the ZRENAMING flag, there are
3931219089Spjd				 * complications; for one, the link count is
3932219089Spjd				 * wrong.  The easiest way to deal with this
3933219089Spjd				 * is to remove the newly created target, and
3934219089Spjd				 * return the original error.  This must
3935219089Spjd				 * succeed; fortunately, it is very unlikely to
3936219089Spjd				 * fail, since we just created it.
3937219089Spjd				 */
3938303970Savg				VERIFY3U(zfs_link_destroy(tdzp, tnm, szp, tx,
3939219089Spjd				    ZRENAMING, NULL), ==, 0);
3940219089Spjd			}
3941168404Spjd		}
3942168404Spjd		if (error == 0) {
3943303970Savg			cache_purge(*svpp);
3944303970Savg			if (*tvpp != NULL)
3945303970Savg				cache_purge(*tvpp);
3946303970Savg			cache_purge_negative(tdvp);
3947168404Spjd		}
3948168404Spjd	}
3949168404Spjd
3950168404Spjd	dmu_tx_commit(tx);
3951168404Spjd
3952303970Savgunlockout:			/* all 4 vnodes are locked, ZFS_ENTER called */
3953303970Savg	ZFS_EXIT(zfsvfs);
3954303970Savg	VOP_UNLOCK(*svpp, 0);
3955303970Savg	VOP_UNLOCK(sdvp, 0);
3956168404Spjd
3957303970Savgout:				/* original two vnodes are locked */
3958303970Savg	if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3959219089Spjd		zil_commit(zilog, 0);
3960219089Spjd
3961303970Savg	if (*tvpp != NULL)
3962303970Savg		VOP_UNLOCK(*tvpp, 0);
3963303970Savg	if (tdvp != *tvpp)
3964303970Savg		VOP_UNLOCK(tdvp, 0);
3965168404Spjd	return (error);
3966168404Spjd}
3967168404Spjd
3968168404Spjd/*
3969168404Spjd * Insert the indicated symbolic reference entry into the directory.
3970168404Spjd *
3971168404Spjd *	IN:	dvp	- Directory to contain new symbolic link.
3972168404Spjd *		link	- Name for new symlink entry.
3973168404Spjd *		vap	- Attributes of new entry.
3974168404Spjd *		cr	- credentials of caller.
3975185029Spjd *		ct	- caller context
3976185029Spjd *		flags	- case flags
3977168404Spjd *
3978251631Sdelphij *	RETURN:	0 on success, error code on failure.
3979168404Spjd *
3980168404Spjd * Timestamps:
3981168404Spjd *	dvp - ctime|mtime updated
3982168404Spjd */
3983185029Spjd/*ARGSUSED*/
3984168404Spjdstatic int
3985185029Spjdzfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link,
3986185029Spjd    cred_t *cr, kthread_t *td)
3987168404Spjd{
3988168404Spjd	znode_t		*zp, *dzp = VTOZ(dvp);
3989168404Spjd	dmu_tx_t	*tx;
3990168404Spjd	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
3991185029Spjd	zilog_t		*zilog;
3992219089Spjd	uint64_t	len = strlen(link);
3993168404Spjd	int		error;
3994209962Smm	zfs_acl_ids_t	acl_ids;
3995209962Smm	boolean_t	fuid_dirtied;
3996219089Spjd	uint64_t	txtype = TX_SYMLINK;
3997185029Spjd	int		flags = 0;
3998168404Spjd
3999168962Spjd	ASSERT(vap->va_type == VLNK);
4000168404Spjd
4001168404Spjd	ZFS_ENTER(zfsvfs);
4002185029Spjd	ZFS_VERIFY_ZP(dzp);
4003185029Spjd	zilog = zfsvfs->z_log;
4004185029Spjd
4005185029Spjd	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
4006185029Spjd	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
4007185029Spjd		ZFS_EXIT(zfsvfs);
4008249195Smm		return (SET_ERROR(EILSEQ));
4009185029Spjd	}
4010168404Spjd
4011168404Spjd	if (len > MAXPATHLEN) {
4012168404Spjd		ZFS_EXIT(zfsvfs);
4013249195Smm		return (SET_ERROR(ENAMETOOLONG));
4014168404Spjd	}
4015168404Spjd
4016219089Spjd	if ((error = zfs_acl_ids_create(dzp, 0,
4017219089Spjd	    vap, cr, NULL, &acl_ids)) != 0) {
4018219089Spjd		ZFS_EXIT(zfsvfs);
4019219089Spjd		return (error);
4020219089Spjd	}
4021260704Savg
4022168404Spjd	/*
4023168404Spjd	 * Attempt to lock directory; fail if entry already exists.
4024168404Spjd	 */
4025303970Savg	error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
4026185029Spjd	if (error) {
4027219089Spjd		zfs_acl_ids_free(&acl_ids);
4028168404Spjd		ZFS_EXIT(zfsvfs);
4029168404Spjd		return (error);
4030168404Spjd	}
4031168404Spjd
4032219089Spjd	if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
4033219089Spjd		zfs_acl_ids_free(&acl_ids);
4034219089Spjd		ZFS_EXIT(zfsvfs);
4035219089Spjd		return (error);
4036219089Spjd	}
4037219089Spjd
4038209962Smm	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
4039209962Smm		zfs_acl_ids_free(&acl_ids);
4040209962Smm		ZFS_EXIT(zfsvfs);
4041249195Smm		return (SET_ERROR(EDQUOT));
4042209962Smm	}
4043303970Savg
4044303970Savg	getnewvnode_reserve(1);
4045168404Spjd	tx = dmu_tx_create(zfsvfs->z_os);
4046209962Smm	fuid_dirtied = zfsvfs->z_fuid_dirty;
4047168404Spjd	dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
4048168404Spjd	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
4049219089Spjd	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
4050219089Spjd	    ZFS_SA_BASE_ATTR_SIZE + len);
4051219089Spjd	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
4052219089Spjd	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
4053219089Spjd		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
4054219089Spjd		    acl_ids.z_aclp->z_acl_bytes);
4055219089Spjd	}
4056209962Smm	if (fuid_dirtied)
4057209962Smm		zfs_fuid_txhold(zfsvfs, tx);
4058303970Savg	error = dmu_tx_assign(tx, TXG_WAIT);
4059168404Spjd	if (error) {
4060219089Spjd		zfs_acl_ids_free(&acl_ids);
4061168404Spjd		dmu_tx_abort(tx);
4062260704Savg		getnewvnode_drop_reserve();
4063168404Spjd		ZFS_EXIT(zfsvfs);
4064168404Spjd		return (error);
4065168404Spjd	}
4066168404Spjd
4067168404Spjd	/*
4068168404Spjd	 * Create a new object for the symlink.
4069219089Spjd	 * for version 4 ZPL datsets the symlink will be an SA attribute
4070168404Spjd	 */
4071219089Spjd	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
4072168404Spjd
4073219089Spjd	if (fuid_dirtied)
4074219089Spjd		zfs_fuid_sync(zfsvfs, tx);
4075209962Smm
4076219089Spjd	if (zp->z_is_sa)
4077219089Spjd		error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
4078219089Spjd		    link, len, tx);
4079219089Spjd	else
4080219089Spjd		zfs_sa_symlink(zp, link, len, tx);
4081168404Spjd
4082219089Spjd	zp->z_size = len;
4083219089Spjd	(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
4084219089Spjd	    &zp->z_size, sizeof (zp->z_size), tx);
4085168404Spjd	/*
4086168404Spjd	 * Insert the new object into the directory.
4087168404Spjd	 */
4088303970Savg	(void) zfs_link_create(dzp, name, zp, tx, ZNEW);
4089168404Spjd
4090219089Spjd	zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
4091219089Spjd	*vpp = ZTOV(zp);
4092219089Spjd
4093209962Smm	zfs_acl_ids_free(&acl_ids);
4094209962Smm
4095168404Spjd	dmu_tx_commit(tx);
4096168404Spjd
4097260704Savg	getnewvnode_drop_reserve();
4098260704Savg
4099219089Spjd	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
4100219089Spjd		zil_commit(zilog, 0);
4101219089Spjd
4102168404Spjd	ZFS_EXIT(zfsvfs);
4103168404Spjd	return (error);
4104168404Spjd}
4105168404Spjd
4106168404Spjd/*
4107168404Spjd * Return, in the buffer contained in the provided uio structure,
4108168404Spjd * the symbolic path referred to by vp.
4109168404Spjd *
4110168404Spjd *	IN:	vp	- vnode of symbolic link.
4111251631Sdelphij *		uio	- structure to contain the link path.
4112168404Spjd *		cr	- credentials of caller.
4113185029Spjd *		ct	- caller context
4114168404Spjd *
4115251631Sdelphij *	OUT:	uio	- structure containing the link path.
4116168404Spjd *
4117251631Sdelphij *	RETURN:	0 on success, error code on failure.
4118168404Spjd *
4119168404Spjd * Timestamps:
4120168404Spjd *	vp - atime updated
4121168404Spjd */
4122168404Spjd/* ARGSUSED */
4123168404Spjdstatic int
4124185029Spjdzfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct)
4125168404Spjd{
4126168404Spjd	znode_t		*zp = VTOZ(vp);
4127168404Spjd	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
4128168404Spjd	int		error;
4129168404Spjd
4130168404Spjd	ZFS_ENTER(zfsvfs);
4131185029Spjd	ZFS_VERIFY_ZP(zp);
4132168404Spjd
4133219089Spjd	if (zp->z_is_sa)
4134219089Spjd		error = sa_lookup_uio(zp->z_sa_hdl,
4135219089Spjd		    SA_ZPL_SYMLINK(zfsvfs), uio);
4136219089Spjd	else
4137219089Spjd		error = zfs_sa_readlink(zp, uio);
4138168404Spjd
4139168404Spjd	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
4140219089Spjd
4141168404Spjd	ZFS_EXIT(zfsvfs);
4142168404Spjd	return (error);
4143168404Spjd}
4144168404Spjd
4145168404Spjd/*
4146168404Spjd * Insert a new entry into directory tdvp referencing svp.
4147168404Spjd *
4148168404Spjd *	IN:	tdvp	- Directory to contain new entry.
4149168404Spjd *		svp	- vnode of new entry.
4150168404Spjd *		name	- name of new entry.
4151168404Spjd *		cr	- credentials of caller.
4152185029Spjd *		ct	- caller context
4153168404Spjd *
4154251631Sdelphij *	RETURN:	0 on success, error code on failure.
4155168404Spjd *
4156168404Spjd * Timestamps:
4157168404Spjd *	tdvp - ctime|mtime updated
4158168404Spjd *	 svp - ctime updated
4159168404Spjd */
4160168404Spjd/* ARGSUSED */
4161168404Spjdstatic int
4162185029Spjdzfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr,
4163185029Spjd    caller_context_t *ct, int flags)
4164168404Spjd{
4165168404Spjd	znode_t		*dzp = VTOZ(tdvp);
4166168404Spjd	znode_t		*tzp, *szp;
4167168404Spjd	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
4168185029Spjd	zilog_t		*zilog;
4169168404Spjd	dmu_tx_t	*tx;
4170168404Spjd	int		error;
4171212694Smm	uint64_t	parent;
4172185029Spjd	uid_t		owner;
4173168404Spjd
4174168404Spjd	ASSERT(tdvp->v_type == VDIR);
4175168404Spjd
4176168404Spjd	ZFS_ENTER(zfsvfs);
4177185029Spjd	ZFS_VERIFY_ZP(dzp);
4178185029Spjd	zilog = zfsvfs->z_log;
4179168404Spjd
4180212694Smm	/*
4181212694Smm	 * POSIX dictates that we return EPERM here.
4182212694Smm	 * Better choices include ENOTSUP or EISDIR.
4183212694Smm	 */
4184212694Smm	if (svp->v_type == VDIR) {
4185168404Spjd		ZFS_EXIT(zfsvfs);
4186249195Smm		return (SET_ERROR(EPERM));
4187212694Smm	}
4188212694Smm
4189254585Sdelphij	szp = VTOZ(svp);
4190254585Sdelphij	ZFS_VERIFY_ZP(szp);
4191254585Sdelphij
4192258597Spjd	if (szp->z_pflags & (ZFS_APPENDONLY | ZFS_IMMUTABLE | ZFS_READONLY)) {
4193258597Spjd		ZFS_EXIT(zfsvfs);
4194258597Spjd		return (SET_ERROR(EPERM));
4195258597Spjd	}
4196258597Spjd
4197212694Smm	/* Prevent links to .zfs/shares files */
4198212694Smm
4199219089Spjd	if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
4200219089Spjd	    &parent, sizeof (uint64_t))) != 0) {
4201212694Smm		ZFS_EXIT(zfsvfs);
4202219089Spjd		return (error);
4203219089Spjd	}
4204219089Spjd	if (parent == zfsvfs->z_shares_dir) {
4205219089Spjd		ZFS_EXIT(zfsvfs);
4206249195Smm		return (SET_ERROR(EPERM));
4207212694Smm	}
4208212694Smm
4209185029Spjd	if (zfsvfs->z_utf8 && u8_validate(name,
4210185029Spjd	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
4211185029Spjd		ZFS_EXIT(zfsvfs);
4212249195Smm		return (SET_ERROR(EILSEQ));
4213185029Spjd	}
4214185029Spjd
4215168404Spjd	/*
4216168404Spjd	 * We do not support links between attributes and non-attributes
4217168404Spjd	 * because of the potential security risk of creating links
4218168404Spjd	 * into "normal" file space in order to circumvent restrictions
4219168404Spjd	 * imposed in attribute space.
4220168404Spjd	 */
4221219089Spjd	if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) {
4222168404Spjd		ZFS_EXIT(zfsvfs);
4223249195Smm		return (SET_ERROR(EINVAL));
4224168404Spjd	}
4225168404Spjd
4226168404Spjd
4227219089Spjd	owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
4228219089Spjd	if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) {
4229168404Spjd		ZFS_EXIT(zfsvfs);
4230249195Smm		return (SET_ERROR(EPERM));
4231168404Spjd	}
4232168404Spjd
4233185029Spjd	if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
4234168404Spjd		ZFS_EXIT(zfsvfs);
4235168404Spjd		return (error);
4236168404Spjd	}
4237168404Spjd
4238168404Spjd	/*
4239168404Spjd	 * Attempt to lock directory; fail if entry already exists.
4240168404Spjd	 */
4241303970Savg	error = zfs_dirent_lookup(dzp, name, &tzp, ZNEW);
4242185029Spjd	if (error) {
4243168404Spjd		ZFS_EXIT(zfsvfs);
4244168404Spjd		return (error);
4245168404Spjd	}
4246168404Spjd
4247168404Spjd	tx = dmu_tx_create(zfsvfs->z_os);
4248219089Spjd	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
4249168404Spjd	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
4250219089Spjd	zfs_sa_upgrade_txholds(tx, szp);
4251219089Spjd	zfs_sa_upgrade_txholds(tx, dzp);
4252303970Savg	error = dmu_tx_assign(tx, TXG_WAIT);
4253168404Spjd	if (error) {
4254168404Spjd		dmu_tx_abort(tx);
4255168404Spjd		ZFS_EXIT(zfsvfs);
4256168404Spjd		return (error);
4257168404Spjd	}
4258168404Spjd
4259303970Savg	error = zfs_link_create(dzp, name, szp, tx, 0);
4260168404Spjd
4261185029Spjd	if (error == 0) {
4262185029Spjd		uint64_t txtype = TX_LINK;
4263185029Spjd		zfs_log_link(zilog, tx, txtype, dzp, szp, name);
4264185029Spjd	}
4265168404Spjd
4266168404Spjd	dmu_tx_commit(tx);
4267168404Spjd
4268185029Spjd	if (error == 0) {
4269185029Spjd		vnevent_link(svp, ct);
4270185029Spjd	}
4271185029Spjd
4272219089Spjd	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
4273219089Spjd		zil_commit(zilog, 0);
4274219089Spjd
4275168404Spjd	ZFS_EXIT(zfsvfs);
4276168404Spjd	return (error);
4277168404Spjd}
4278168404Spjd
4279219089Spjd
4280185029Spjd/*ARGSUSED*/
4281168962Spjdvoid
4282185029Spjdzfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
4283168404Spjd{
4284168962Spjd	znode_t	*zp = VTOZ(vp);
4285168962Spjd	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4286168962Spjd	int error;
4287168404Spjd
4288185029Spjd	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
4289219089Spjd	if (zp->z_sa_hdl == NULL) {
4290185029Spjd		/*
4291185029Spjd		 * The fs has been unmounted, or we did a
4292185029Spjd		 * suspend/resume and this file no longer exists.
4293185029Spjd		 */
4294243520Savg		rw_exit(&zfsvfs->z_teardown_inactive_lock);
4295234607Strasz		vrecycle(vp);
4296243520Savg		return;
4297243520Savg	}
4298243520Savg
4299243520Savg	if (zp->z_unlinked) {
4300243520Savg		/*
4301243520Savg		 * Fast path to recycle a vnode of a removed file.
4302243520Savg		 */
4303185029Spjd		rw_exit(&zfsvfs->z_teardown_inactive_lock);
4304243520Savg		vrecycle(vp);
4305168962Spjd		return;
4306168404Spjd	}
4307168404Spjd
4308168404Spjd	if (zp->z_atime_dirty && zp->z_unlinked == 0) {
4309168404Spjd		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
4310168404Spjd
4311219089Spjd		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4312219089Spjd		zfs_sa_upgrade_txholds(tx, zp);
4313168404Spjd		error = dmu_tx_assign(tx, TXG_WAIT);
4314168404Spjd		if (error) {
4315168404Spjd			dmu_tx_abort(tx);
4316168404Spjd		} else {
4317219089Spjd			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
4318219089Spjd			    (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
4319168404Spjd			zp->z_atime_dirty = 0;
4320168404Spjd			dmu_tx_commit(tx);
4321168404Spjd		}
4322168404Spjd	}
4323185029Spjd	rw_exit(&zfsvfs->z_teardown_inactive_lock);
4324168404Spjd}
4325168404Spjd
4326219089Spjd
4327168404SpjdCTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid));
4328168404SpjdCTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid));
4329168404Spjd
4330185029Spjd/*ARGSUSED*/
4331168404Spjdstatic int
4332185029Spjdzfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
4333168404Spjd{
4334168404Spjd	znode_t		*zp = VTOZ(vp);
4335168404Spjd	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
4336185029Spjd	uint32_t	gen;
4337219089Spjd	uint64_t	gen64;
4338168404Spjd	uint64_t	object = zp->z_id;
4339168404Spjd	zfid_short_t	*zfid;
4340219089Spjd	int		size, i, error;
4341168404Spjd
4342168404Spjd	ZFS_ENTER(zfsvfs);
4343185029Spjd	ZFS_VERIFY_ZP(zp);
4344168404Spjd
4345219089Spjd	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
4346219089Spjd	    &gen64, sizeof (uint64_t))) != 0) {
4347219089Spjd		ZFS_EXIT(zfsvfs);
4348219089Spjd		return (error);
4349219089Spjd	}
4350219089Spjd
4351219089Spjd	gen = (uint32_t)gen64;
4352219089Spjd
4353168404Spjd	size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
4354249195Smm
4355249195Smm#ifdef illumos
4356249195Smm	if (fidp->fid_len < size) {
4357249195Smm		fidp->fid_len = size;
4358249195Smm		ZFS_EXIT(zfsvfs);
4359249195Smm		return (SET_ERROR(ENOSPC));
4360249195Smm	}
4361249195Smm#else
4362168404Spjd	fidp->fid_len = size;
4363249195Smm#endif
4364168404Spjd
4365168404Spjd	zfid = (zfid_short_t *)fidp;
4366168404Spjd
4367168404Spjd	zfid->zf_len = size;
4368168404Spjd
4369168404Spjd	for (i = 0; i < sizeof (zfid->zf_object); i++)
4370168404Spjd		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
4371168404Spjd
4372168404Spjd	/* Must have a non-zero generation number to distinguish from .zfs */
4373168404Spjd	if (gen == 0)
4374168404Spjd		gen = 1;
4375168404Spjd	for (i = 0; i < sizeof (zfid->zf_gen); i++)
4376168404Spjd		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
4377168404Spjd
4378168404Spjd	if (size == LONG_FID_LEN) {
4379168404Spjd		uint64_t	objsetid = dmu_objset_id(zfsvfs->z_os);
4380169023Spjd		zfid_long_t	*zlfid;
4381168404Spjd
4382168404Spjd		zlfid = (zfid_long_t *)fidp;
4383168404Spjd
4384168404Spjd		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
4385168404Spjd			zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
4386168404Spjd
4387168404Spjd		/* XXX - this should be the generation number for the objset */
4388168404Spjd		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
4389168404Spjd			zlfid->zf_setgen[i] = 0;
4390168404Spjd	}
4391168404Spjd
4392168404Spjd	ZFS_EXIT(zfsvfs);
4393168404Spjd	return (0);
4394168404Spjd}
4395168404Spjd
4396168404Spjdstatic int
4397185029Spjdzfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
4398185029Spjd    caller_context_t *ct)
4399168404Spjd{
4400168404Spjd	znode_t		*zp, *xzp;
4401168404Spjd	zfsvfs_t	*zfsvfs;
4402168404Spjd	int		error;
4403168404Spjd
4404168404Spjd	switch (cmd) {
4405168404Spjd	case _PC_LINK_MAX:
4406168404Spjd		*valp = INT_MAX;
4407168404Spjd		return (0);
4408168404Spjd
4409168404Spjd	case _PC_FILESIZEBITS:
4410168404Spjd		*valp = 64;
4411168404Spjd		return (0);
4412277300Ssmh#ifdef illumos
4413168404Spjd	case _PC_XATTR_EXISTS:
4414168404Spjd		zp = VTOZ(vp);
4415168404Spjd		zfsvfs = zp->z_zfsvfs;
4416168404Spjd		ZFS_ENTER(zfsvfs);
4417185029Spjd		ZFS_VERIFY_ZP(zp);
4418168404Spjd		*valp = 0;
4419303970Savg		error = zfs_dirent_lookup(zp, "", &xzp,
4420303970Savg		    ZXATTR | ZEXISTS | ZSHARED);
4421168404Spjd		if (error == 0) {
4422168404Spjd			if (!zfs_dirempty(xzp))
4423168404Spjd				*valp = 1;
4424303970Savg			vrele(ZTOV(xzp));
4425168404Spjd		} else if (error == ENOENT) {
4426168404Spjd			/*
4427168404Spjd			 * If there aren't extended attributes, it's the
4428168404Spjd			 * same as having zero of them.
4429168404Spjd			 */
4430168404Spjd			error = 0;
4431168404Spjd		}
4432168404Spjd		ZFS_EXIT(zfsvfs);
4433168404Spjd		return (error);
4434168404Spjd
4435219089Spjd	case _PC_SATTR_ENABLED:
4436219089Spjd	case _PC_SATTR_EXISTS:
4437219089Spjd		*valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
4438219089Spjd		    (vp->v_type == VREG || vp->v_type == VDIR);
4439219089Spjd		return (0);
4440219089Spjd
4441219089Spjd	case _PC_ACCESS_FILTERING:
4442219089Spjd		*valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) &&
4443219089Spjd		    vp->v_type == VDIR;
4444219089Spjd		return (0);
4445219089Spjd
4446219089Spjd	case _PC_ACL_ENABLED:
4447219089Spjd		*valp = _ACL_ACE_ENABLED;
4448219089Spjd		return (0);
4449277300Ssmh#endif	/* illumos */
4450219089Spjd	case _PC_MIN_HOLE_SIZE:
4451219089Spjd		*valp = (int)SPA_MINBLOCKSIZE;
4452219089Spjd		return (0);
4453277300Ssmh#ifdef illumos
4454219089Spjd	case _PC_TIMESTAMP_RESOLUTION:
4455219089Spjd		/* nanosecond timestamp resolution */
4456219089Spjd		*valp = 1L;
4457219089Spjd		return (0);
4458277300Ssmh#endif
4459168404Spjd	case _PC_ACL_EXTENDED:
4460196949Strasz		*valp = 0;
4461168404Spjd		return (0);
4462168404Spjd
4463196949Strasz	case _PC_ACL_NFS4:
4464196949Strasz		*valp = 1;
4465196949Strasz		return (0);
4466196949Strasz
4467196949Strasz	case _PC_ACL_PATH_MAX:
4468196949Strasz		*valp = ACL_MAX_ENTRIES;
4469196949Strasz		return (0);
4470196949Strasz
4471168404Spjd	default:
4472168962Spjd		return (EOPNOTSUPP);
4473168404Spjd	}
4474168404Spjd}
4475168404Spjd
4476168404Spjd/*ARGSUSED*/
4477168404Spjdstatic int
4478185029Spjdzfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr,
4479185029Spjd    caller_context_t *ct)
4480168404Spjd{
4481168404Spjd	znode_t *zp = VTOZ(vp);
4482168404Spjd	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4483168404Spjd	int error;
4484185029Spjd	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
4485168404Spjd
4486168404Spjd	ZFS_ENTER(zfsvfs);
4487185029Spjd	ZFS_VERIFY_ZP(zp);
4488185029Spjd	error = zfs_getacl(zp, vsecp, skipaclchk, cr);
4489168404Spjd	ZFS_EXIT(zfsvfs);
4490168404Spjd
4491168404Spjd	return (error);
4492168404Spjd}
4493168404Spjd
4494168404Spjd/*ARGSUSED*/
4495228685Spjdint
4496185029Spjdzfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr,
4497185029Spjd    caller_context_t *ct)
4498168404Spjd{
4499168404Spjd	znode_t *zp = VTOZ(vp);
4500168404Spjd	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4501168404Spjd	int error;
4502185029Spjd	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
4503219089Spjd	zilog_t	*zilog = zfsvfs->z_log;
4504168404Spjd
4505168404Spjd	ZFS_ENTER(zfsvfs);
4506185029Spjd	ZFS_VERIFY_ZP(zp);
4507219089Spjd
4508185029Spjd	error = zfs_setacl(zp, vsecp, skipaclchk, cr);
4509219089Spjd
4510219089Spjd	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
4511219089Spjd		zil_commit(zilog, 0);
4512219089Spjd
4513168404Spjd	ZFS_EXIT(zfsvfs);
4514168404Spjd	return (error);
4515168404Spjd}
4516168404Spjd
4517168962Spjdstatic int
4518292373Sglebiuszfs_getpages(struct vnode *vp, vm_page_t *m, int count, int *rbehind,
4519292373Sglebius    int *rahead)
4520213937Savg{
4521213937Savg	znode_t *zp = VTOZ(vp);
4522213937Savg	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4523213937Savg	objset_t *os = zp->z_zfsvfs->z_os;
4524292373Sglebius	vm_page_t mlast;
4525213937Savg	vm_object_t object;
4526213937Savg	caddr_t va;
4527213937Savg	struct sf_buf *sf;
4528243517Savg	off_t startoff, endoff;
4529213937Savg	int i, error;
4530243517Savg	vm_pindex_t reqstart, reqend;
4531297473Sglebius	int lsize, size;
4532213937Savg
4533292386Sglebius	object = m[0]->object;
4534292386Sglebius	error = 0;
4535292373Sglebius
4536213937Savg	ZFS_ENTER(zfsvfs);
4537213937Savg	ZFS_VERIFY_ZP(zp);
4538213937Savg
4539248084Sattilio	zfs_vmobject_wlock(object);
4540292386Sglebius	if (m[count - 1]->valid != 0 && --count == 0) {
4541248084Sattilio		zfs_vmobject_wunlock(object);
4542292386Sglebius		goto out;
4543213937Savg	}
4544213937Savg
4545292386Sglebius	mlast = m[count - 1];
4546213937Savg
4547292373Sglebius	if (IDX_TO_OFF(mlast->pindex) >=
4548292373Sglebius	    object->un_pager.vnp.vnp_size) {
4549248084Sattilio		zfs_vmobject_wunlock(object);
4550213937Savg		ZFS_EXIT(zfsvfs);
4551248084Sattilio		return (zfs_vm_pagerret_bad);
4552213937Savg	}
4553213937Savg
4554292373Sglebius	PCPU_INC(cnt.v_vnodein);
4555297473Sglebius	PCPU_ADD(cnt.v_vnodepgsin, count);
4556292373Sglebius
4557243517Savg	lsize = PAGE_SIZE;
4558243517Savg	if (IDX_TO_OFF(mlast->pindex) + lsize > object->un_pager.vnp.vnp_size)
4559292373Sglebius		lsize = object->un_pager.vnp.vnp_size -
4560292373Sglebius		    IDX_TO_OFF(mlast->pindex);
4561248084Sattilio	zfs_vmobject_wunlock(object);
4562243517Savg
4563292386Sglebius	for (i = 0; i < count; i++) {
4564243517Savg		size = PAGE_SIZE;
4565292386Sglebius		if (i == count - 1)
4566243517Savg			size = lsize;
4567243517Savg		va = zfs_map_page(m[i], &sf);
4568243517Savg		error = dmu_read(os, zp->z_id, IDX_TO_OFF(m[i]->pindex),
4569243517Savg		    size, va, DMU_READ_PREFETCH);
4570243517Savg		if (size != PAGE_SIZE)
4571243517Savg			bzero(va + size, PAGE_SIZE - size);
4572243517Savg		zfs_unmap_page(sf);
4573243517Savg		if (error != 0)
4574292373Sglebius			goto out;
4575243517Savg	}
4576243517Savg
4577248084Sattilio	zfs_vmobject_wlock(object);
4578292386Sglebius	for (i = 0; i < count; i++)
4579292373Sglebius		m[i]->valid = VM_PAGE_BITS_ALL;
4580248084Sattilio	zfs_vmobject_wunlock(object);
4581213937Savg
4582292373Sglebiusout:
4583213937Savg	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
4584213937Savg	ZFS_EXIT(zfsvfs);
4585292386Sglebius	if (error == 0) {
4586292386Sglebius		if (rbehind)
4587292386Sglebius			*rbehind = 0;
4588292386Sglebius		if (rahead)
4589292386Sglebius			*rahead = 0;
4590292386Sglebius		return (zfs_vm_pagerret_ok);
4591292386Sglebius	} else
4592292386Sglebius		return (zfs_vm_pagerret_error);
4593213937Savg}
4594213937Savg
4595213937Savgstatic int
4596213937Savgzfs_freebsd_getpages(ap)
4597213937Savg	struct vop_getpages_args /* {
4598213937Savg		struct vnode *a_vp;
4599213937Savg		vm_page_t *a_m;
4600213937Savg		int a_count;
4601292373Sglebius		int *a_rbehind;
4602292373Sglebius		int *a_rahead;
4603213937Savg	} */ *ap;
4604213937Savg{
4605213937Savg
4606292373Sglebius	return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
4607292373Sglebius	    ap->a_rahead));
4608213937Savg}
4609213937Savg
4610213937Savgstatic int
4611258746Savgzfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
4612258746Savg    int *rtvals)
4613258746Savg{
4614258746Savg	znode_t		*zp = VTOZ(vp);
4615258746Savg	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
4616258746Savg	rl_t		*rl;
4617258746Savg	dmu_tx_t	*tx;
4618258746Savg	struct sf_buf	*sf;
4619258746Savg	vm_object_t	object;
4620258746Savg	vm_page_t	m;
4621258746Savg	caddr_t		va;
4622258746Savg	size_t		tocopy;
4623258746Savg	size_t		lo_len;
4624258746Savg	vm_ooffset_t	lo_off;
4625258746Savg	vm_ooffset_t	off;
4626258746Savg	uint_t		blksz;
4627258746Savg	int		ncount;
4628258746Savg	int		pcount;
4629258746Savg	int		err;
4630258746Savg	int		i;
4631258746Savg
4632258746Savg	ZFS_ENTER(zfsvfs);
4633258746Savg	ZFS_VERIFY_ZP(zp);
4634258746Savg
4635258746Savg	object = vp->v_object;
4636258746Savg	pcount = btoc(len);
4637258746Savg	ncount = pcount;
4638258746Savg
4639258746Savg	KASSERT(ma[0]->object == object, ("mismatching object"));
4640258746Savg	KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length"));
4641258746Savg
4642258746Savg	for (i = 0; i < pcount; i++)
4643258746Savg		rtvals[i] = zfs_vm_pagerret_error;
4644258746Savg
4645258746Savg	off = IDX_TO_OFF(ma[0]->pindex);
4646258746Savg	blksz = zp->z_blksz;
4647258746Savg	lo_off = rounddown(off, blksz);
4648258746Savg	lo_len = roundup(len + (off - lo_off), blksz);
4649258746Savg	rl = zfs_range_lock(zp, lo_off, lo_len, RL_WRITER);
4650258746Savg
4651258746Savg	zfs_vmobject_wlock(object);
4652258746Savg	if (len + off > object->un_pager.vnp.vnp_size) {
4653258746Savg		if (object->un_pager.vnp.vnp_size > off) {
4654258746Savg			int pgoff;
4655258746Savg
4656258746Savg			len = object->un_pager.vnp.vnp_size - off;
4657258746Savg			ncount = btoc(len);
4658258746Savg			if ((pgoff = (int)len & PAGE_MASK) != 0) {
4659258746Savg				/*
4660258746Savg				 * If the object is locked and the following
4661258746Savg				 * conditions hold, then the page's dirty
4662258746Savg				 * field cannot be concurrently changed by a
4663258746Savg				 * pmap operation.
4664258746Savg				 */
4665258746Savg				m = ma[ncount - 1];
4666258746Savg				vm_page_assert_sbusied(m);
4667258746Savg				KASSERT(!pmap_page_is_write_mapped(m),
4668258746Savg				    ("zfs_putpages: page %p is not read-only", m));
4669258746Savg				vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
4670258746Savg				    pgoff);
4671258746Savg			}
4672258746Savg		} else {
4673258746Savg			len = 0;
4674258746Savg			ncount = 0;
4675258746Savg		}
4676258746Savg		if (ncount < pcount) {
4677258746Savg			for (i = ncount; i < pcount; i++) {
4678258746Savg				rtvals[i] = zfs_vm_pagerret_bad;
4679258746Savg			}
4680258746Savg		}
4681258746Savg	}
4682258746Savg	zfs_vmobject_wunlock(object);
4683258746Savg
4684258746Savg	if (ncount == 0)
4685258746Savg		goto out;
4686258746Savg
4687258746Savg	if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
4688258746Savg	    zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
4689258746Savg		goto out;
4690258746Savg	}
4691258746Savg
4692258746Savg	tx = dmu_tx_create(zfsvfs->z_os);
4693258746Savg	dmu_tx_hold_write(tx, zp->z_id, off, len);
4694258746Savg
4695258746Savg	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4696258746Savg	zfs_sa_upgrade_txholds(tx, zp);
4697316847Savg	err = dmu_tx_assign(tx, TXG_WAIT);
4698258746Savg	if (err != 0) {
4699258746Savg		dmu_tx_abort(tx);
4700258746Savg		goto out;
4701258746Savg	}
4702258746Savg
4703258746Savg	if (zp->z_blksz < PAGE_SIZE) {
4704258746Savg		i = 0;
4705258746Savg		for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) {
4706258746Savg			tocopy = len > PAGE_SIZE ? PAGE_SIZE : len;
4707258746Savg			va = zfs_map_page(ma[i], &sf);
4708258746Savg			dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx);
4709258746Savg			zfs_unmap_page(sf);
4710258746Savg		}
4711258746Savg	} else {
4712258746Savg		err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx);
4713258746Savg	}
4714258746Savg
4715258746Savg	if (err == 0) {
4716258746Savg		uint64_t mtime[2], ctime[2];
4717258746Savg		sa_bulk_attr_t bulk[3];
4718258746Savg		int count = 0;
4719258746Savg
4720258746Savg		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
4721258746Savg		    &mtime, 16);
4722258746Savg		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
4723258746Savg		    &ctime, 16);
4724258746Savg		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
4725258746Savg		    &zp->z_pflags, 8);
4726258746Savg		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
4727258746Savg		    B_TRUE);
4728321561Smav		err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
4729321561Smav		ASSERT0(err);
4730258746Savg		zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0);
4731258746Savg
4732258746Savg		zfs_vmobject_wlock(object);
4733258746Savg		for (i = 0; i < ncount; i++) {
4734258746Savg			rtvals[i] = zfs_vm_pagerret_ok;
4735258746Savg			vm_page_undirty(ma[i]);
4736258746Savg		}
4737258746Savg		zfs_vmobject_wunlock(object);
4738258746Savg		PCPU_INC(cnt.v_vnodeout);
4739258746Savg		PCPU_ADD(cnt.v_vnodepgsout, ncount);
4740258746Savg	}
4741258746Savg	dmu_tx_commit(tx);
4742258746Savg
4743258746Savgout:
4744258746Savg	zfs_range_unlock(rl);
4745258746Savg	if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 ||
4746258746Savg	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
4747258746Savg		zil_commit(zfsvfs->z_log, zp->z_id);
4748258746Savg	ZFS_EXIT(zfsvfs);
4749258746Savg	return (rtvals[0]);
4750258746Savg}
4751258746Savg
4752258746Savgint
4753258746Savgzfs_freebsd_putpages(ap)
4754258746Savg	struct vop_putpages_args /* {
4755258746Savg		struct vnode *a_vp;
4756258746Savg		vm_page_t *a_m;
4757258746Savg		int a_count;
4758258746Savg		int a_sync;
4759258746Savg		int *a_rtvals;
4760258746Savg	} */ *ap;
4761258746Savg{
4762258746Savg
4763258746Savg	return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync,
4764258746Savg	    ap->a_rtvals));
4765258746Savg}
4766258746Savg
4767258746Savgstatic int
4768243518Savgzfs_freebsd_bmap(ap)
4769243518Savg	struct vop_bmap_args /* {
4770243518Savg		struct vnode *a_vp;
4771243518Savg		daddr_t  a_bn;
4772243518Savg		struct bufobj **a_bop;
4773243518Savg		daddr_t *a_bnp;
4774243518Savg		int *a_runp;
4775243518Savg		int *a_runb;
4776243518Savg	} */ *ap;
4777243518Savg{
4778243518Savg
4779243518Savg	if (ap->a_bop != NULL)
4780243518Savg		*ap->a_bop = &ap->a_vp->v_bufobj;
4781243518Savg	if (ap->a_bnp != NULL)
4782243518Savg		*ap->a_bnp = ap->a_bn;
4783243518Savg	if (ap->a_runp != NULL)
4784243518Savg		*ap->a_runp = 0;
4785243518Savg	if (ap->a_runb != NULL)
4786243518Savg		*ap->a_runb = 0;
4787243518Savg
4788243518Savg	return (0);
4789243518Savg}
4790243518Savg
4791243518Savgstatic int
4792168962Spjdzfs_freebsd_open(ap)
4793168962Spjd	struct vop_open_args /* {
4794168962Spjd		struct vnode *a_vp;
4795168962Spjd		int a_mode;
4796168962Spjd		struct ucred *a_cred;
4797168962Spjd		struct thread *a_td;
4798168962Spjd	} */ *ap;
4799168962Spjd{
4800168962Spjd	vnode_t	*vp = ap->a_vp;
4801168962Spjd	znode_t *zp = VTOZ(vp);
4802168962Spjd	int error;
4803168962Spjd
4804185029Spjd	error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL);
4805168962Spjd	if (error == 0)
4806219089Spjd		vnode_create_vobject(vp, zp->z_size, ap->a_td);
4807168962Spjd	return (error);
4808168962Spjd}
4809168962Spjd
4810168962Spjdstatic int
4811168962Spjdzfs_freebsd_close(ap)
4812168962Spjd	struct vop_close_args /* {
4813168962Spjd		struct vnode *a_vp;
4814168962Spjd		int  a_fflag;
4815168962Spjd		struct ucred *a_cred;
4816168962Spjd		struct thread *a_td;
4817168962Spjd	} */ *ap;
4818168962Spjd{
4819168962Spjd
4820242566Savg	return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred, NULL));
4821168962Spjd}
4822168962Spjd
4823168962Spjdstatic int
4824168962Spjdzfs_freebsd_ioctl(ap)
4825168962Spjd	struct vop_ioctl_args /* {
4826168962Spjd		struct vnode *a_vp;
4827168962Spjd		u_long a_command;
4828168962Spjd		caddr_t a_data;
4829168962Spjd		int a_fflag;
4830168962Spjd		struct ucred *cred;
4831168962Spjd		struct thread *td;
4832168962Spjd	} */ *ap;
4833168962Spjd{
4834168962Spjd
4835168978Spjd	return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data,
4836185029Spjd	    ap->a_fflag, ap->a_cred, NULL, NULL));
4837168962Spjd}
4838168962Spjd
4839168962Spjdstatic int
4840330062Savgioflags(int ioflags)
4841330062Savg{
4842330062Savg	int flags = 0;
4843330062Savg
4844330062Savg	if (ioflags & IO_APPEND)
4845330062Savg		flags |= FAPPEND;
4846330062Savg	if (ioflags & IO_NDELAY)
4847330062Savg		flags |= FNONBLOCK;
4848330062Savg	if (ioflags & IO_SYNC)
4849330062Savg		flags |= (FSYNC | FDSYNC | FRSYNC);
4850330062Savg
4851330062Savg	return (flags);
4852330062Savg}
4853330062Savg
4854330062Savgstatic int
4855168962Spjdzfs_freebsd_read(ap)
4856168962Spjd	struct vop_read_args /* {
4857168962Spjd		struct vnode *a_vp;
4858168962Spjd		struct uio *a_uio;
4859168962Spjd		int a_ioflag;
4860168962Spjd		struct ucred *a_cred;
4861168962Spjd	} */ *ap;
4862168962Spjd{
4863168962Spjd
4864213673Spjd	return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag),
4865213673Spjd	    ap->a_cred, NULL));
4866168962Spjd}
4867168962Spjd
4868168962Spjdstatic int
4869168962Spjdzfs_freebsd_write(ap)
4870168962Spjd	struct vop_write_args /* {
4871168962Spjd		struct vnode *a_vp;
4872168962Spjd		struct uio *a_uio;
4873168962Spjd		int a_ioflag;
4874168962Spjd		struct ucred *a_cred;
4875168962Spjd	} */ *ap;
4876168962Spjd{
4877168962Spjd
4878213673Spjd	return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag),
4879213673Spjd	    ap->a_cred, NULL));
4880168962Spjd}
4881168962Spjd
4882168962Spjdstatic int
4883168962Spjdzfs_freebsd_access(ap)
4884168962Spjd	struct vop_access_args /* {
4885168962Spjd		struct vnode *a_vp;
4886192689Strasz		accmode_t a_accmode;
4887168962Spjd		struct ucred *a_cred;
4888168962Spjd		struct thread *a_td;
4889168962Spjd	} */ *ap;
4890168962Spjd{
4891212002Sjh	vnode_t *vp = ap->a_vp;
4892212002Sjh	znode_t *zp = VTOZ(vp);
4893198703Spjd	accmode_t accmode;
4894198703Spjd	int error = 0;
4895168962Spjd
4896185172Spjd	/*
4897198703Spjd	 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND,
4898185172Spjd	 */
4899198703Spjd	accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND);
4900198703Spjd	if (accmode != 0)
4901198703Spjd		error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL);
4902185172Spjd
4903198703Spjd	/*
4904198703Spjd	 * VADMIN has to be handled by vaccess().
4905198703Spjd	 */
4906198703Spjd	if (error == 0) {
4907198703Spjd		accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND);
4908198703Spjd		if (accmode != 0) {
4909219089Spjd			error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
4910219089Spjd			    zp->z_gid, accmode, ap->a_cred, NULL);
4911198703Spjd		}
4912185172Spjd	}
4913185172Spjd
4914212002Sjh	/*
4915212002Sjh	 * For VEXEC, ensure that at least one execute bit is set for
4916212002Sjh	 * non-directories.
4917212002Sjh	 */
4918212002Sjh	if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR &&
4919219089Spjd	    (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
4920212002Sjh		error = EACCES;
4921219089Spjd	}
4922212002Sjh
4923198703Spjd	return (error);
4924168962Spjd}
4925168962Spjd
4926168962Spjdstatic int
4927168962Spjdzfs_freebsd_lookup(ap)
4928168962Spjd	struct vop_lookup_args /* {
4929168962Spjd		struct vnode *a_dvp;
4930168962Spjd		struct vnode **a_vpp;
4931168962Spjd		struct componentname *a_cnp;
4932168962Spjd	} */ *ap;
4933168962Spjd{
4934168962Spjd	struct componentname *cnp = ap->a_cnp;
4935168962Spjd	char nm[NAME_MAX + 1];
4936168962Spjd
4937168962Spjd	ASSERT(cnp->cn_namelen < sizeof(nm));
4938168962Spjd	strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm)));
4939168962Spjd
4940168962Spjd	return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop,
4941185029Spjd	    cnp->cn_cred, cnp->cn_thread, 0));
4942168962Spjd}
4943168962Spjd
4944168962Spjdstatic int
4945303970Savgzfs_cache_lookup(ap)
4946303970Savg	struct vop_lookup_args /* {
4947303970Savg		struct vnode *a_dvp;
4948303970Savg		struct vnode **a_vpp;
4949303970Savg		struct componentname *a_cnp;
4950303970Savg	} */ *ap;
4951303970Savg{
4952303970Savg	zfsvfs_t *zfsvfs;
4953303970Savg
4954303970Savg	zfsvfs = ap->a_dvp->v_mount->mnt_data;
4955303970Savg	if (zfsvfs->z_use_namecache)
4956303970Savg		return (vfs_cache_lookup(ap));
4957303970Savg	else
4958303970Savg		return (zfs_freebsd_lookup(ap));
4959303970Savg}
4960303970Savg
4961303970Savgstatic int
4962168962Spjdzfs_freebsd_create(ap)
4963168962Spjd	struct vop_create_args /* {
4964168962Spjd		struct vnode *a_dvp;
4965168962Spjd		struct vnode **a_vpp;
4966168962Spjd		struct componentname *a_cnp;
4967168962Spjd		struct vattr *a_vap;
4968168962Spjd	} */ *ap;
4969168962Spjd{
4970303970Savg	zfsvfs_t *zfsvfs;
4971168962Spjd	struct componentname *cnp = ap->a_cnp;
4972168962Spjd	vattr_t *vap = ap->a_vap;
4973276007Skib	int error, mode;
4974168962Spjd
4975168962Spjd	ASSERT(cnp->cn_flags & SAVENAME);
4976168962Spjd
4977168962Spjd	vattr_init_mask(vap);
4978168962Spjd	mode = vap->va_mode & ALLPERMS;
4979303970Savg	zfsvfs = ap->a_dvp->v_mount->mnt_data;
4980168962Spjd
4981276007Skib	error = zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode,
4982276007Skib	    ap->a_vpp, cnp->cn_cred, cnp->cn_thread);
4983303970Savg	if (zfsvfs->z_use_namecache &&
4984303970Savg	    error == 0 && (cnp->cn_flags & MAKEENTRY) != 0)
4985276007Skib		cache_enter(ap->a_dvp, *ap->a_vpp, cnp);
4986276007Skib	return (error);
4987168962Spjd}
4988168962Spjd
4989168962Spjdstatic int
4990168962Spjdzfs_freebsd_remove(ap)
4991168962Spjd	struct vop_remove_args /* {
4992168962Spjd		struct vnode *a_dvp;
4993168962Spjd		struct vnode *a_vp;
4994168962Spjd		struct componentname *a_cnp;
4995168962Spjd	} */ *ap;
4996168962Spjd{
4997168962Spjd
4998168962Spjd	ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4999168962Spjd
5000303970Savg	return (zfs_remove(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr,
5001303970Savg	    ap->a_cnp->cn_cred));
5002168962Spjd}
5003168962Spjd
5004168962Spjdstatic int
5005168962Spjdzfs_freebsd_mkdir(ap)
5006168962Spjd	struct vop_mkdir_args /* {
5007168962Spjd		struct vnode *a_dvp;
5008168962Spjd		struct vnode **a_vpp;
5009168962Spjd		struct componentname *a_cnp;
5010168962Spjd		struct vattr *a_vap;
5011168962Spjd	} */ *ap;
5012168962Spjd{
5013168962Spjd	vattr_t *vap = ap->a_vap;
5014168962Spjd
5015168962Spjd	ASSERT(ap->a_cnp->cn_flags & SAVENAME);
5016168962Spjd
5017168962Spjd	vattr_init_mask(vap);
5018168962Spjd
5019168962Spjd	return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp,
5020303970Savg	    ap->a_cnp->cn_cred));
5021168962Spjd}
5022168962Spjd
5023168962Spjdstatic int
5024168962Spjdzfs_freebsd_rmdir(ap)
5025168962Spjd	struct vop_rmdir_args /* {
5026168962Spjd		struct vnode *a_dvp;
5027168962Spjd		struct vnode *a_vp;
5028168962Spjd		struct componentname *a_cnp;
5029168962Spjd	} */ *ap;
5030168962Spjd{
5031168962Spjd	struct componentname *cnp = ap->a_cnp;
5032168962Spjd
5033168962Spjd	ASSERT(cnp->cn_flags & SAVENAME);
5034168962Spjd
5035303970Savg	return (zfs_rmdir(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred));
5036168962Spjd}
5037168962Spjd
5038168962Spjdstatic int
5039168962Spjdzfs_freebsd_readdir(ap)
5040168962Spjd	struct vop_readdir_args /* {
5041168962Spjd		struct vnode *a_vp;
5042168962Spjd		struct uio *a_uio;
5043168962Spjd		struct ucred *a_cred;
5044168962Spjd		int *a_eofflag;
5045168962Spjd		int *a_ncookies;
5046168962Spjd		u_long **a_cookies;
5047168962Spjd	} */ *ap;
5048168962Spjd{
5049168962Spjd
5050168962Spjd	return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag,
5051168962Spjd	    ap->a_ncookies, ap->a_cookies));
5052168962Spjd}
5053168962Spjd
5054168962Spjdstatic int
5055168962Spjdzfs_freebsd_fsync(ap)
5056168962Spjd	struct vop_fsync_args /* {
5057168962Spjd		struct vnode *a_vp;
5058168962Spjd		int a_waitfor;
5059168962Spjd		struct thread *a_td;
5060168962Spjd	} */ *ap;
5061168962Spjd{
5062168962Spjd
5063168962Spjd	vop_stdfsync(ap);
5064185029Spjd	return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL));
5065168962Spjd}
5066168962Spjd
5067168962Spjdstatic int
5068168962Spjdzfs_freebsd_getattr(ap)
5069168962Spjd	struct vop_getattr_args /* {
5070168962Spjd		struct vnode *a_vp;
5071168962Spjd		struct vattr *a_vap;
5072168962Spjd		struct ucred *a_cred;
5073168962Spjd	} */ *ap;
5074168962Spjd{
5075185029Spjd	vattr_t *vap = ap->a_vap;
5076185029Spjd	xvattr_t xvap;
5077185029Spjd	u_long fflags = 0;
5078185029Spjd	int error;
5079168962Spjd
5080185029Spjd	xva_init(&xvap);
5081185029Spjd	xvap.xva_vattr = *vap;
5082185029Spjd	xvap.xva_vattr.va_mask |= AT_XVATTR;
5083185029Spjd
5084185029Spjd	/* Convert chflags into ZFS-type flags. */
5085185029Spjd	/* XXX: what about SF_SETTABLE?. */
5086185029Spjd	XVA_SET_REQ(&xvap, XAT_IMMUTABLE);
5087185029Spjd	XVA_SET_REQ(&xvap, XAT_APPENDONLY);
5088185029Spjd	XVA_SET_REQ(&xvap, XAT_NOUNLINK);
5089185029Spjd	XVA_SET_REQ(&xvap, XAT_NODUMP);
5090254627Sken	XVA_SET_REQ(&xvap, XAT_READONLY);
5091254627Sken	XVA_SET_REQ(&xvap, XAT_ARCHIVE);
5092254627Sken	XVA_SET_REQ(&xvap, XAT_SYSTEM);
5093254627Sken	XVA_SET_REQ(&xvap, XAT_HIDDEN);
5094254627Sken	XVA_SET_REQ(&xvap, XAT_REPARSE);
5095254627Sken	XVA_SET_REQ(&xvap, XAT_OFFLINE);
5096254627Sken	XVA_SET_REQ(&xvap, XAT_SPARSE);
5097254627Sken
5098185029Spjd	error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL);
5099185029Spjd	if (error != 0)
5100185029Spjd		return (error);
5101185029Spjd
5102185029Spjd	/* Convert ZFS xattr into chflags. */
5103185029Spjd#define	FLAG_CHECK(fflag, xflag, xfield)	do {			\
5104185029Spjd	if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0)		\
5105185029Spjd		fflags |= (fflag);					\
5106185029Spjd} while (0)
5107185029Spjd	FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE,
5108185029Spjd	    xvap.xva_xoptattrs.xoa_immutable);
5109185029Spjd	FLAG_CHECK(SF_APPEND, XAT_APPENDONLY,
5110185029Spjd	    xvap.xva_xoptattrs.xoa_appendonly);
5111185029Spjd	FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK,
5112185029Spjd	    xvap.xva_xoptattrs.xoa_nounlink);
5113254627Sken	FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE,
5114254627Sken	    xvap.xva_xoptattrs.xoa_archive);
5115185029Spjd	FLAG_CHECK(UF_NODUMP, XAT_NODUMP,
5116185029Spjd	    xvap.xva_xoptattrs.xoa_nodump);
5117254627Sken	FLAG_CHECK(UF_READONLY, XAT_READONLY,
5118254627Sken	    xvap.xva_xoptattrs.xoa_readonly);
5119254627Sken	FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM,
5120254627Sken	    xvap.xva_xoptattrs.xoa_system);
5121254627Sken	FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN,
5122254627Sken	    xvap.xva_xoptattrs.xoa_hidden);
5123254627Sken	FLAG_CHECK(UF_REPARSE, XAT_REPARSE,
5124254627Sken	    xvap.xva_xoptattrs.xoa_reparse);
5125254627Sken	FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE,
5126254627Sken	    xvap.xva_xoptattrs.xoa_offline);
5127254627Sken	FLAG_CHECK(UF_SPARSE, XAT_SPARSE,
5128254627Sken	    xvap.xva_xoptattrs.xoa_sparse);
5129254627Sken
5130185029Spjd#undef	FLAG_CHECK
5131185029Spjd	*vap = xvap.xva_vattr;
5132185029Spjd	vap->va_flags = fflags;
5133185029Spjd	return (0);
5134168962Spjd}
5135168962Spjd
5136168962Spjdstatic int
5137168962Spjdzfs_freebsd_setattr(ap)
5138168962Spjd	struct vop_setattr_args /* {
5139168962Spjd		struct vnode *a_vp;
5140168962Spjd		struct vattr *a_vap;
5141168962Spjd		struct ucred *a_cred;
5142168962Spjd	} */ *ap;
5143168962Spjd{
5144185172Spjd	vnode_t *vp = ap->a_vp;
5145168962Spjd	vattr_t *vap = ap->a_vap;
5146185172Spjd	cred_t *cred = ap->a_cred;
5147185029Spjd	xvattr_t xvap;
5148185029Spjd	u_long fflags;
5149185029Spjd	uint64_t zflags;
5150168962Spjd
5151168962Spjd	vattr_init_mask(vap);
5152170044Spjd	vap->va_mask &= ~AT_NOSET;
5153168962Spjd
5154185029Spjd	xva_init(&xvap);
5155185029Spjd	xvap.xva_vattr = *vap;
5156185029Spjd
5157219089Spjd	zflags = VTOZ(vp)->z_pflags;
5158185172Spjd
5159185029Spjd	if (vap->va_flags != VNOVAL) {
5160197683Sdelphij		zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs;
5161185172Spjd		int error;
5162185172Spjd
5163197683Sdelphij		if (zfsvfs->z_use_fuids == B_FALSE)
5164197683Sdelphij			return (EOPNOTSUPP);
5165197683Sdelphij
5166185029Spjd		fflags = vap->va_flags;
5167254627Sken		/*
5168254627Sken		 * XXX KDM
5169254627Sken		 * We need to figure out whether it makes sense to allow
5170254627Sken		 * UF_REPARSE through, since we don't really have other
5171254627Sken		 * facilities to handle reparse points and zfs_setattr()
5172254627Sken		 * doesn't currently allow setting that attribute anyway.
5173254627Sken		 */
5174254627Sken		if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE|
5175254627Sken		     UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE|
5176254627Sken		     UF_OFFLINE|UF_SPARSE)) != 0)
5177185029Spjd			return (EOPNOTSUPP);
5178185172Spjd		/*
5179185172Spjd		 * Unprivileged processes are not permitted to unset system
5180185172Spjd		 * flags, or modify flags if any system flags are set.
5181185172Spjd		 * Privileged non-jail processes may not modify system flags
5182185172Spjd		 * if securelevel > 0 and any existing system flags are set.
5183185172Spjd		 * Privileged jail processes behave like privileged non-jail
5184185172Spjd		 * processes if the security.jail.chflags_allowed sysctl is
5185185172Spjd		 * is non-zero; otherwise, they behave like unprivileged
5186185172Spjd		 * processes.
5187185172Spjd		 */
5188197861Spjd		if (secpolicy_fs_owner(vp->v_mount, cred) == 0 ||
5189197861Spjd		    priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) {
5190185172Spjd			if (zflags &
5191185172Spjd			    (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
5192185172Spjd				error = securelevel_gt(cred, 0);
5193197861Spjd				if (error != 0)
5194185172Spjd					return (error);
5195185172Spjd			}
5196185172Spjd		} else {
5197197861Spjd			/*
5198197861Spjd			 * Callers may only modify the file flags on objects they
5199197861Spjd			 * have VADMIN rights for.
5200197861Spjd			 */
5201197861Spjd			if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0)
5202197861Spjd				return (error);
5203185172Spjd			if (zflags &
5204185172Spjd			    (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
5205185172Spjd				return (EPERM);
5206185172Spjd			}
5207185172Spjd			if (fflags &
5208185172Spjd			    (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) {
5209185172Spjd				return (EPERM);
5210185172Spjd			}
5211185172Spjd		}
5212185029Spjd
5213185029Spjd#define	FLAG_CHANGE(fflag, zflag, xflag, xfield)	do {		\
5214185029Spjd	if (((fflags & (fflag)) && !(zflags & (zflag))) ||		\
5215185029Spjd	    ((zflags & (zflag)) && !(fflags & (fflag)))) {		\
5216185029Spjd		XVA_SET_REQ(&xvap, (xflag));				\
5217185029Spjd		(xfield) = ((fflags & (fflag)) != 0);			\
5218185029Spjd	}								\
5219185029Spjd} while (0)
5220185029Spjd		/* Convert chflags into ZFS-type flags. */
5221185029Spjd		/* XXX: what about SF_SETTABLE?. */
5222185029Spjd		FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE,
5223185029Spjd		    xvap.xva_xoptattrs.xoa_immutable);
5224185029Spjd		FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY,
5225185029Spjd		    xvap.xva_xoptattrs.xoa_appendonly);
5226185029Spjd		FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK,
5227185029Spjd		    xvap.xva_xoptattrs.xoa_nounlink);
5228254627Sken		FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE,
5229254627Sken		    xvap.xva_xoptattrs.xoa_archive);
5230185029Spjd		FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP,
5231185172Spjd		    xvap.xva_xoptattrs.xoa_nodump);
5232254627Sken		FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY,
5233254627Sken		    xvap.xva_xoptattrs.xoa_readonly);
5234254627Sken		FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM,
5235254627Sken		    xvap.xva_xoptattrs.xoa_system);
5236254627Sken		FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN,
5237254627Sken		    xvap.xva_xoptattrs.xoa_hidden);
5238254627Sken		FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE,
5239254627Sken		    xvap.xva_xoptattrs.xoa_hidden);
5240254627Sken		FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE,
5241254627Sken		    xvap.xva_xoptattrs.xoa_offline);
5242254627Sken		FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE,
5243254627Sken		    xvap.xva_xoptattrs.xoa_sparse);
5244185029Spjd#undef	FLAG_CHANGE
5245185029Spjd	}
5246316391Sasomers	if (vap->va_birthtime.tv_sec != VNOVAL) {
5247316391Sasomers		xvap.xva_vattr.va_mask |= AT_XVATTR;
5248316391Sasomers		XVA_SET_REQ(&xvap, XAT_CREATETIME);
5249316391Sasomers	}
5250185172Spjd	return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL));
5251168962Spjd}
5252168962Spjd
5253168962Spjdstatic int
5254168962Spjdzfs_freebsd_rename(ap)
5255168962Spjd	struct vop_rename_args  /* {
5256168962Spjd		struct vnode *a_fdvp;
5257168962Spjd		struct vnode *a_fvp;
5258168962Spjd		struct componentname *a_fcnp;
5259168962Spjd		struct vnode *a_tdvp;
5260168962Spjd		struct vnode *a_tvp;
5261168962Spjd		struct componentname *a_tcnp;
5262168962Spjd	} */ *ap;
5263168962Spjd{
5264168962Spjd	vnode_t *fdvp = ap->a_fdvp;
5265168962Spjd	vnode_t *fvp = ap->a_fvp;
5266168962Spjd	vnode_t *tdvp = ap->a_tdvp;
5267168962Spjd	vnode_t *tvp = ap->a_tvp;
5268168962Spjd	int error;
5269168962Spjd
5270192237Skmacy	ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
5271192237Skmacy	ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
5272168962Spjd
5273303970Savg	error = zfs_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
5274303970Savg	    ap->a_tcnp, ap->a_fcnp->cn_cred);
5275168962Spjd
5276303970Savg	vrele(fdvp);
5277303970Savg	vrele(fvp);
5278303970Savg	vrele(tdvp);
5279303970Savg	if (tvp != NULL)
5280303970Savg		vrele(tvp);
5281303970Savg
5282168962Spjd	return (error);
5283168962Spjd}
5284168962Spjd
5285168962Spjdstatic int
5286168962Spjdzfs_freebsd_symlink(ap)
5287168962Spjd	struct vop_symlink_args /* {
5288168962Spjd		struct vnode *a_dvp;
5289168962Spjd		struct vnode **a_vpp;
5290168962Spjd		struct componentname *a_cnp;
5291168962Spjd		struct vattr *a_vap;
5292168962Spjd		char *a_target;
5293168962Spjd	} */ *ap;
5294168962Spjd{
5295168962Spjd	struct componentname *cnp = ap->a_cnp;
5296168962Spjd	vattr_t *vap = ap->a_vap;
5297168962Spjd
5298168962Spjd	ASSERT(cnp->cn_flags & SAVENAME);
5299168962Spjd
5300168962Spjd	vap->va_type = VLNK;	/* FreeBSD: Syscall only sets va_mode. */
5301168962Spjd	vattr_init_mask(vap);
5302168962Spjd
5303168962Spjd	return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap,
5304168962Spjd	    ap->a_target, cnp->cn_cred, cnp->cn_thread));
5305168962Spjd}
5306168962Spjd
5307168962Spjdstatic int
5308168962Spjdzfs_freebsd_readlink(ap)
5309168962Spjd	struct vop_readlink_args /* {
5310168962Spjd		struct vnode *a_vp;
5311168962Spjd		struct uio *a_uio;
5312168962Spjd		struct ucred *a_cred;
5313168962Spjd	} */ *ap;
5314168962Spjd{
5315168962Spjd
5316185029Spjd	return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL));
5317168962Spjd}
5318168962Spjd
5319168962Spjdstatic int
5320168962Spjdzfs_freebsd_link(ap)
5321168962Spjd	struct vop_link_args /* {
5322168962Spjd		struct vnode *a_tdvp;
5323168962Spjd		struct vnode *a_vp;
5324168962Spjd		struct componentname *a_cnp;
5325168962Spjd	} */ *ap;
5326168962Spjd{
5327168962Spjd	struct componentname *cnp = ap->a_cnp;
5328254982Sdelphij	vnode_t *vp = ap->a_vp;
5329254982Sdelphij	vnode_t *tdvp = ap->a_tdvp;
5330168962Spjd
5331254982Sdelphij	if (tdvp->v_mount != vp->v_mount)
5332254982Sdelphij		return (EXDEV);
5333254982Sdelphij
5334168962Spjd	ASSERT(cnp->cn_flags & SAVENAME);
5335168962Spjd
5336254982Sdelphij	return (zfs_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0));
5337168962Spjd}
5338168962Spjd
5339168962Spjdstatic int
5340168962Spjdzfs_freebsd_inactive(ap)
5341169170Spjd	struct vop_inactive_args /* {
5342169170Spjd		struct vnode *a_vp;
5343169170Spjd		struct thread *a_td;
5344169170Spjd	} */ *ap;
5345168962Spjd{
5346168962Spjd	vnode_t *vp = ap->a_vp;
5347168962Spjd
5348185029Spjd	zfs_inactive(vp, ap->a_td->td_ucred, NULL);
5349168962Spjd	return (0);
5350168962Spjd}
5351168962Spjd
5352168962Spjdstatic int
5353168962Spjdzfs_freebsd_reclaim(ap)
5354168962Spjd	struct vop_reclaim_args /* {
5355168962Spjd		struct vnode *a_vp;
5356168962Spjd		struct thread *a_td;
5357168962Spjd	} */ *ap;
5358168962Spjd{
5359169170Spjd	vnode_t	*vp = ap->a_vp;
5360168962Spjd	znode_t	*zp = VTOZ(vp);
5361197133Spjd	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
5362168962Spjd
5363169025Spjd	ASSERT(zp != NULL);
5364169025Spjd
5365243520Savg	/* Destroy the vm object and flush associated pages. */
5366243520Savg	vnode_destroy_vobject(vp);
5367243520Savg
5368168962Spjd	/*
5369243520Savg	 * z_teardown_inactive_lock protects from a race with
5370243520Savg	 * zfs_znode_dmu_fini in zfsvfs_teardown during
5371243520Savg	 * force unmount.
5372168962Spjd	 */
5373243520Savg	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
5374243520Savg	if (zp->z_sa_hdl == NULL)
5375196301Spjd		zfs_znode_free(zp);
5376243520Savg	else
5377243520Savg		zfs_zinactive(zp);
5378243520Savg	rw_exit(&zfsvfs->z_teardown_inactive_lock);
5379185029Spjd
5380168962Spjd	vp->v_data = NULL;
5381168962Spjd	return (0);
5382168962Spjd}
5383168962Spjd
5384168962Spjdstatic int
5385168962Spjdzfs_freebsd_fid(ap)
5386168962Spjd	struct vop_fid_args /* {
5387168962Spjd		struct vnode *a_vp;
5388168962Spjd		struct fid *a_fid;
5389168962Spjd	} */ *ap;
5390168962Spjd{
5391168962Spjd
5392185029Spjd	return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL));
5393168962Spjd}
5394168962Spjd
5395168962Spjdstatic int
5396168962Spjdzfs_freebsd_pathconf(ap)
5397168962Spjd	struct vop_pathconf_args /* {
5398168962Spjd		struct vnode *a_vp;
5399168962Spjd		int a_name;
5400168962Spjd		register_t *a_retval;
5401168962Spjd	} */ *ap;
5402168962Spjd{
5403168962Spjd	ulong_t val;
5404168962Spjd	int error;
5405168962Spjd
5406185029Spjd	error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL);
5407328298Sjhb	if (error == 0) {
5408168962Spjd		*ap->a_retval = val;
5409328298Sjhb		return (error);
5410328298Sjhb	}
5411328298Sjhb	if (error != EOPNOTSUPP)
5412328298Sjhb		return (error);
5413168962Spjd
5414196949Strasz	switch (ap->a_name) {
5415328298Sjhb	case _PC_NAME_MAX:
5416328298Sjhb		*ap->a_retval = NAME_MAX;
5417328298Sjhb		return (0);
5418328298Sjhb	case _PC_PIPE_BUF:
5419328298Sjhb		if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) {
5420328298Sjhb			*ap->a_retval = PIPE_BUF;
5421328298Sjhb			return (0);
5422328298Sjhb		}
5423328298Sjhb		return (EINVAL);
5424196949Strasz	default:
5425328298Sjhb		return (vop_stdpathconf(ap));
5426196949Strasz	}
5427196949Strasz}
5428196949Strasz
5429185029Spjd/*
5430185029Spjd * FreeBSD's extended attributes namespace defines file name prefix for ZFS'
5431185029Spjd * extended attribute name:
5432185029Spjd *
5433185029Spjd *	NAMESPACE	PREFIX
5434185029Spjd *	system		freebsd:system:
5435185029Spjd *	user		(none, can be used to access ZFS fsattr(5) attributes
5436185029Spjd *			created on Solaris)
5437185029Spjd */
5438185029Spjdstatic int
5439185029Spjdzfs_create_attrname(int attrnamespace, const char *name, char *attrname,
5440185029Spjd    size_t size)
5441185029Spjd{
5442185029Spjd	const char *namespace, *prefix, *suffix;
5443185029Spjd
5444185029Spjd	/* We don't allow '/' character in attribute name. */
5445185029Spjd	if (strchr(name, '/') != NULL)
5446185029Spjd		return (EINVAL);
5447185029Spjd	/* We don't allow attribute names that start with "freebsd:" string. */
5448185029Spjd	if (strncmp(name, "freebsd:", 8) == 0)
5449185029Spjd		return (EINVAL);
5450185029Spjd
5451185029Spjd	bzero(attrname, size);
5452185029Spjd
5453185029Spjd	switch (attrnamespace) {
5454185029Spjd	case EXTATTR_NAMESPACE_USER:
5455185029Spjd#if 0
5456185029Spjd		prefix = "freebsd:";
5457185029Spjd		namespace = EXTATTR_NAMESPACE_USER_STRING;
5458185029Spjd		suffix = ":";
5459185029Spjd#else
5460185029Spjd		/*
5461185029Spjd		 * This is the default namespace by which we can access all
5462185029Spjd		 * attributes created on Solaris.
5463185029Spjd		 */
5464185029Spjd		prefix = namespace = suffix = "";
5465185029Spjd#endif
5466185029Spjd		break;
5467185029Spjd	case EXTATTR_NAMESPACE_SYSTEM:
5468185029Spjd		prefix = "freebsd:";
5469185029Spjd		namespace = EXTATTR_NAMESPACE_SYSTEM_STRING;
5470185029Spjd		suffix = ":";
5471185029Spjd		break;
5472185029Spjd	case EXTATTR_NAMESPACE_EMPTY:
5473185029Spjd	default:
5474185029Spjd		return (EINVAL);
5475185029Spjd	}
5476185029Spjd	if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix,
5477185029Spjd	    name) >= size) {
5478185029Spjd		return (ENAMETOOLONG);
5479185029Spjd	}
5480185029Spjd	return (0);
5481185029Spjd}
5482185029Spjd
5483185029Spjd/*
5484185029Spjd * Vnode operating to retrieve a named extended attribute.
5485185029Spjd */
5486185029Spjdstatic int
5487185029Spjdzfs_getextattr(struct vop_getextattr_args *ap)
5488185029Spjd/*
5489185029Spjdvop_getextattr {
5490185029Spjd	IN struct vnode *a_vp;
5491185029Spjd	IN int a_attrnamespace;
5492185029Spjd	IN const char *a_name;
5493185029Spjd	INOUT struct uio *a_uio;
5494185029Spjd	OUT size_t *a_size;
5495185029Spjd	IN struct ucred *a_cred;
5496185029Spjd	IN struct thread *a_td;
5497185029Spjd};
5498185029Spjd*/
5499185029Spjd{
5500185029Spjd	zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
5501185029Spjd	struct thread *td = ap->a_td;
5502185029Spjd	struct nameidata nd;
5503185029Spjd	char attrname[255];
5504185029Spjd	struct vattr va;
5505185029Spjd	vnode_t *xvp = NULL, *vp;
5506185029Spjd	int error, flags;
5507185029Spjd
5508195785Strasz	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5509195785Strasz	    ap->a_cred, ap->a_td, VREAD);
5510195785Strasz	if (error != 0)
5511195785Strasz		return (error);
5512195785Strasz
5513185029Spjd	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5514185029Spjd	    sizeof(attrname));
5515185029Spjd	if (error != 0)
5516185029Spjd		return (error);
5517185029Spjd
5518185029Spjd	ZFS_ENTER(zfsvfs);
5519185029Spjd
5520185029Spjd	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
5521185029Spjd	    LOOKUP_XATTR);
5522185029Spjd	if (error != 0) {
5523185029Spjd		ZFS_EXIT(zfsvfs);
5524185029Spjd		return (error);
5525185029Spjd	}
5526185029Spjd
5527185029Spjd	flags = FREAD;
5528241896Skib	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
5529185029Spjd	    xvp, td);
5530194586Skib	error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL);
5531185029Spjd	vp = nd.ni_vp;
5532185029Spjd	NDFREE(&nd, NDF_ONLY_PNBUF);
5533185029Spjd	if (error != 0) {
5534196303Spjd		ZFS_EXIT(zfsvfs);
5535195785Strasz		if (error == ENOENT)
5536195785Strasz			error = ENOATTR;
5537185029Spjd		return (error);
5538185029Spjd	}
5539185029Spjd
5540185029Spjd	if (ap->a_size != NULL) {
5541185029Spjd		error = VOP_GETATTR(vp, &va, ap->a_cred);
5542185029Spjd		if (error == 0)
5543185029Spjd			*ap->a_size = (size_t)va.va_size;
5544185029Spjd	} else if (ap->a_uio != NULL)
5545224605Smm		error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5546185029Spjd
5547185029Spjd	VOP_UNLOCK(vp, 0);
5548185029Spjd	vn_close(vp, flags, ap->a_cred, td);
5549185029Spjd	ZFS_EXIT(zfsvfs);
5550185029Spjd
5551185029Spjd	return (error);
5552185029Spjd}
5553185029Spjd
5554185029Spjd/*
5555185029Spjd * Vnode operation to remove a named attribute.
5556185029Spjd */
5557185029Spjdint
5558185029Spjdzfs_deleteextattr(struct vop_deleteextattr_args *ap)
5559185029Spjd/*
5560185029Spjdvop_deleteextattr {
5561185029Spjd	IN struct vnode *a_vp;
5562185029Spjd	IN int a_attrnamespace;
5563185029Spjd	IN const char *a_name;
5564185029Spjd	IN struct ucred *a_cred;
5565185029Spjd	IN struct thread *a_td;
5566185029Spjd};
5567185029Spjd*/
5568185029Spjd{
5569185029Spjd	zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
5570185029Spjd	struct thread *td = ap->a_td;
5571185029Spjd	struct nameidata nd;
5572185029Spjd	char attrname[255];
5573185029Spjd	struct vattr va;
5574185029Spjd	vnode_t *xvp = NULL, *vp;
5575185029Spjd	int error, flags;
5576185029Spjd
5577195785Strasz	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5578195785Strasz	    ap->a_cred, ap->a_td, VWRITE);
5579195785Strasz	if (error != 0)
5580195785Strasz		return (error);
5581195785Strasz
5582185029Spjd	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5583185029Spjd	    sizeof(attrname));
5584185029Spjd	if (error != 0)
5585185029Spjd		return (error);
5586185029Spjd
5587185029Spjd	ZFS_ENTER(zfsvfs);
5588185029Spjd
5589185029Spjd	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
5590185029Spjd	    LOOKUP_XATTR);
5591185029Spjd	if (error != 0) {
5592185029Spjd		ZFS_EXIT(zfsvfs);
5593185029Spjd		return (error);
5594185029Spjd	}
5595185029Spjd
5596241896Skib	NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5597185029Spjd	    UIO_SYSSPACE, attrname, xvp, td);
5598185029Spjd	error = namei(&nd);
5599185029Spjd	vp = nd.ni_vp;
5600185029Spjd	if (error != 0) {
5601196303Spjd		ZFS_EXIT(zfsvfs);
5602260706Savg		NDFREE(&nd, NDF_ONLY_PNBUF);
5603195785Strasz		if (error == ENOENT)
5604195785Strasz			error = ENOATTR;
5605185029Spjd		return (error);
5606185029Spjd	}
5607260706Savg
5608185029Spjd	error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
5609260706Savg	NDFREE(&nd, NDF_ONLY_PNBUF);
5610185029Spjd
5611185029Spjd	vput(nd.ni_dvp);
5612185029Spjd	if (vp == nd.ni_dvp)
5613185029Spjd		vrele(vp);
5614185029Spjd	else
5615185029Spjd		vput(vp);
5616185029Spjd	ZFS_EXIT(zfsvfs);
5617185029Spjd
5618185029Spjd	return (error);
5619185029Spjd}
5620185029Spjd
5621185029Spjd/*
5622185029Spjd * Vnode operation to set a named attribute.
5623185029Spjd */
5624185029Spjdstatic int
5625185029Spjdzfs_setextattr(struct vop_setextattr_args *ap)
5626185029Spjd/*
5627185029Spjdvop_setextattr {
5628185029Spjd	IN struct vnode *a_vp;
5629185029Spjd	IN int a_attrnamespace;
5630185029Spjd	IN const char *a_name;
5631185029Spjd	INOUT struct uio *a_uio;
5632185029Spjd	IN struct ucred *a_cred;
5633185029Spjd	IN struct thread *a_td;
5634185029Spjd};
5635185029Spjd*/
5636185029Spjd{
5637185029Spjd	zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
5638185029Spjd	struct thread *td = ap->a_td;
5639185029Spjd	struct nameidata nd;
5640185029Spjd	char attrname[255];
5641185029Spjd	struct vattr va;
5642185029Spjd	vnode_t *xvp = NULL, *vp;
5643185029Spjd	int error, flags;
5644185029Spjd
5645195785Strasz	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5646195785Strasz	    ap->a_cred, ap->a_td, VWRITE);
5647195785Strasz	if (error != 0)
5648195785Strasz		return (error);
5649195785Strasz
5650185029Spjd	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5651185029Spjd	    sizeof(attrname));
5652185029Spjd	if (error != 0)
5653185029Spjd		return (error);
5654185029Spjd
5655185029Spjd	ZFS_ENTER(zfsvfs);
5656185029Spjd
5657185029Spjd	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
5658195785Strasz	    LOOKUP_XATTR | CREATE_XATTR_DIR);
5659185029Spjd	if (error != 0) {
5660185029Spjd		ZFS_EXIT(zfsvfs);
5661185029Spjd		return (error);
5662185029Spjd	}
5663185029Spjd
5664185029Spjd	flags = FFLAGS(O_WRONLY | O_CREAT);
5665241896Skib	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
5666185029Spjd	    xvp, td);
5667194586Skib	error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL);
5668185029Spjd	vp = nd.ni_vp;
5669185029Spjd	NDFREE(&nd, NDF_ONLY_PNBUF);
5670185029Spjd	if (error != 0) {
5671185029Spjd		ZFS_EXIT(zfsvfs);
5672185029Spjd		return (error);
5673185029Spjd	}
5674185029Spjd
5675185029Spjd	VATTR_NULL(&va);
5676185029Spjd	va.va_size = 0;
5677185029Spjd	error = VOP_SETATTR(vp, &va, ap->a_cred);
5678185029Spjd	if (error == 0)
5679268420Smav		VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5680185029Spjd
5681185029Spjd	VOP_UNLOCK(vp, 0);
5682185029Spjd	vn_close(vp, flags, ap->a_cred, td);
5683185029Spjd	ZFS_EXIT(zfsvfs);
5684185029Spjd
5685185029Spjd	return (error);
5686185029Spjd}
5687185029Spjd
5688185029Spjd/*
5689185029Spjd * Vnode operation to retrieve extended attributes on a vnode.
5690185029Spjd */
5691185029Spjdstatic int
5692185029Spjdzfs_listextattr(struct vop_listextattr_args *ap)
5693185029Spjd/*
5694185029Spjdvop_listextattr {
5695185029Spjd	IN struct vnode *a_vp;
5696185029Spjd	IN int a_attrnamespace;
5697185029Spjd	INOUT struct uio *a_uio;
5698185029Spjd	OUT size_t *a_size;
5699185029Spjd	IN struct ucred *a_cred;
5700185029Spjd	IN struct thread *a_td;
5701185029Spjd};
5702185029Spjd*/
5703185029Spjd{
5704185029Spjd	zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
5705185029Spjd	struct thread *td = ap->a_td;
5706185029Spjd	struct nameidata nd;
5707185029Spjd	char attrprefix[16];
5708185029Spjd	u_char dirbuf[sizeof(struct dirent)];
5709185029Spjd	struct dirent *dp;
5710185029Spjd	struct iovec aiov;
5711185029Spjd	struct uio auio, *uio = ap->a_uio;
5712185029Spjd	size_t *sizep = ap->a_size;
5713185029Spjd	size_t plen;
5714185029Spjd	vnode_t *xvp = NULL, *vp;
5715185029Spjd	int done, error, eof, pos;
5716185029Spjd
5717195785Strasz	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5718195785Strasz	    ap->a_cred, ap->a_td, VREAD);
5719196303Spjd	if (error != 0)
5720195785Strasz		return (error);
5721195785Strasz
5722185029Spjd	error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix,
5723185029Spjd	    sizeof(attrprefix));
5724185029Spjd	if (error != 0)
5725185029Spjd		return (error);
5726185029Spjd	plen = strlen(attrprefix);
5727185029Spjd
5728185029Spjd	ZFS_ENTER(zfsvfs);
5729185029Spjd
5730195822Strasz	if (sizep != NULL)
5731195822Strasz		*sizep = 0;
5732195822Strasz
5733185029Spjd	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
5734185029Spjd	    LOOKUP_XATTR);
5735185029Spjd	if (error != 0) {
5736196303Spjd		ZFS_EXIT(zfsvfs);
5737195785Strasz		/*
5738195785Strasz		 * ENOATTR means that the EA directory does not yet exist,
5739195785Strasz		 * i.e. there are no extended attributes there.
5740195785Strasz		 */
5741195785Strasz		if (error == ENOATTR)
5742195785Strasz			error = 0;
5743185029Spjd		return (error);
5744185029Spjd	}
5745185029Spjd
5746241896Skib	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
5747188588Sjhb	    UIO_SYSSPACE, ".", xvp, td);
5748185029Spjd	error = namei(&nd);
5749185029Spjd	vp = nd.ni_vp;
5750185029Spjd	NDFREE(&nd, NDF_ONLY_PNBUF);
5751185029Spjd	if (error != 0) {
5752185029Spjd		ZFS_EXIT(zfsvfs);
5753185029Spjd		return (error);
5754185029Spjd	}
5755185029Spjd
5756185029Spjd	auio.uio_iov = &aiov;
5757185029Spjd	auio.uio_iovcnt = 1;
5758185029Spjd	auio.uio_segflg = UIO_SYSSPACE;
5759185029Spjd	auio.uio_td = td;
5760185029Spjd	auio.uio_rw = UIO_READ;
5761185029Spjd	auio.uio_offset = 0;
5762185029Spjd
5763185029Spjd	do {
5764185029Spjd		u_char nlen;
5765185029Spjd
5766185029Spjd		aiov.iov_base = (void *)dirbuf;
5767185029Spjd		aiov.iov_len = sizeof(dirbuf);
5768185029Spjd		auio.uio_resid = sizeof(dirbuf);
5769185029Spjd		error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL);
5770185029Spjd		done = sizeof(dirbuf) - auio.uio_resid;
5771185029Spjd		if (error != 0)
5772185029Spjd			break;
5773185029Spjd		for (pos = 0; pos < done;) {
5774185029Spjd			dp = (struct dirent *)(dirbuf + pos);
5775185029Spjd			pos += dp->d_reclen;
5776185029Spjd			/*
5777185029Spjd			 * XXX: Temporarily we also accept DT_UNKNOWN, as this
5778185029Spjd			 * is what we get when attribute was created on Solaris.
5779185029Spjd			 */
5780185029Spjd			if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN)
5781185029Spjd				continue;
5782185029Spjd			if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0)
5783185029Spjd				continue;
5784185029Spjd			else if (strncmp(dp->d_name, attrprefix, plen) != 0)
5785185029Spjd				continue;
5786185029Spjd			nlen = dp->d_namlen - plen;
5787185029Spjd			if (sizep != NULL)
5788185029Spjd				*sizep += 1 + nlen;
5789185029Spjd			else if (uio != NULL) {
5790185029Spjd				/*
5791185029Spjd				 * Format of extattr name entry is one byte for
5792185029Spjd				 * length and the rest for name.
5793185029Spjd				 */
5794185029Spjd				error = uiomove(&nlen, 1, uio->uio_rw, uio);
5795185029Spjd				if (error == 0) {
5796185029Spjd					error = uiomove(dp->d_name + plen, nlen,
5797185029Spjd					    uio->uio_rw, uio);
5798185029Spjd				}
5799185029Spjd				if (error != 0)
5800185029Spjd					break;
5801185029Spjd			}
5802185029Spjd		}
5803185029Spjd	} while (!eof && error == 0);
5804185029Spjd
5805185029Spjd	vput(vp);
5806185029Spjd	ZFS_EXIT(zfsvfs);
5807185029Spjd
5808185029Spjd	return (error);
5809185029Spjd}
5810185029Spjd
5811192800Straszint
5812192800Straszzfs_freebsd_getacl(ap)
5813192800Strasz	struct vop_getacl_args /* {
5814192800Strasz		struct vnode *vp;
5815192800Strasz		acl_type_t type;
5816192800Strasz		struct acl *aclp;
5817192800Strasz		struct ucred *cred;
5818192800Strasz		struct thread *td;
5819192800Strasz	} */ *ap;
5820192800Strasz{
5821192800Strasz	int		error;
5822192800Strasz	vsecattr_t      vsecattr;
5823192800Strasz
5824192800Strasz	if (ap->a_type != ACL_TYPE_NFS4)
5825197435Strasz		return (EINVAL);
5826192800Strasz
5827192800Strasz	vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT;
5828192800Strasz	if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL))
5829192800Strasz		return (error);
5830192800Strasz
5831192800Strasz	error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt);
5832196303Spjd	if (vsecattr.vsa_aclentp != NULL)
5833196303Spjd		kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz);
5834192800Strasz
5835196303Spjd	return (error);
5836192800Strasz}
5837192800Strasz
5838192800Straszint
5839192800Straszzfs_freebsd_setacl(ap)
5840192800Strasz	struct vop_setacl_args /* {
5841192800Strasz		struct vnode *vp;
5842192800Strasz		acl_type_t type;
5843192800Strasz		struct acl *aclp;
5844192800Strasz		struct ucred *cred;
5845192800Strasz		struct thread *td;
5846192800Strasz	} */ *ap;
5847192800Strasz{
5848192800Strasz	int		error;
5849192800Strasz	vsecattr_t      vsecattr;
5850192800Strasz	int		aclbsize;	/* size of acl list in bytes */
5851192800Strasz	aclent_t	*aaclp;
5852192800Strasz
5853192800Strasz	if (ap->a_type != ACL_TYPE_NFS4)
5854197435Strasz		return (EINVAL);
5855192800Strasz
5856314710Smm	if (ap->a_aclp == NULL)
5857314710Smm		return (EINVAL);
5858314710Smm
5859192800Strasz	if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES)
5860192800Strasz		return (EINVAL);
5861192800Strasz
5862192800Strasz	/*
5863196949Strasz	 * With NFSv4 ACLs, chmod(2) may need to add additional entries,
5864192800Strasz	 * splitting every entry into two and appending "canonical six"
5865192800Strasz	 * entries at the end.  Don't allow for setting an ACL that would
5866192800Strasz	 * cause chmod(2) to run out of ACL entries.
5867192800Strasz	 */
5868192800Strasz	if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES)
5869192800Strasz		return (ENOSPC);
5870192800Strasz
5871208030Strasz	error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR);
5872208030Strasz	if (error != 0)
5873208030Strasz		return (error);
5874208030Strasz
5875192800Strasz	vsecattr.vsa_mask = VSA_ACE;
5876192800Strasz	aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t);
5877192800Strasz	vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP);
5878192800Strasz	aaclp = vsecattr.vsa_aclentp;
5879192800Strasz	vsecattr.vsa_aclentsz = aclbsize;
5880192800Strasz
5881192800Strasz	aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp);
5882192800Strasz	error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL);
5883192800Strasz	kmem_free(aaclp, aclbsize);
5884192800Strasz
5885192800Strasz	return (error);
5886192800Strasz}
5887192800Strasz
5888192800Straszint
5889192800Straszzfs_freebsd_aclcheck(ap)
5890192800Strasz	struct vop_aclcheck_args /* {
5891192800Strasz		struct vnode *vp;
5892192800Strasz		acl_type_t type;
5893192800Strasz		struct acl *aclp;
5894192800Strasz		struct ucred *cred;
5895192800Strasz		struct thread *td;
5896192800Strasz	} */ *ap;
5897192800Strasz{
5898192800Strasz
5899192800Strasz	return (EOPNOTSUPP);
5900192800Strasz}
5901192800Strasz
5902299906Savgstatic int
5903299906Savgzfs_vptocnp(struct vop_vptocnp_args *ap)
5904299906Savg{
5905299906Savg	vnode_t *covered_vp;
5906299906Savg	vnode_t *vp = ap->a_vp;;
5907299906Savg	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
5908299906Savg	znode_t *zp = VTOZ(vp);
5909299906Savg	int ltype;
5910299906Savg	int error;
5911299906Savg
5912301870Savg	ZFS_ENTER(zfsvfs);
5913301870Savg	ZFS_VERIFY_ZP(zp);
5914301870Savg
5915299906Savg	/*
5916299906Savg	 * If we are a snapshot mounted under .zfs, run the operation
5917299906Savg	 * on the covered vnode.
5918299906Savg	 */
5919324158Savg	if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) {
5920307995Savg		char name[MAXNAMLEN + 1];
5921307995Savg		znode_t *dzp;
5922307995Savg		size_t len;
5923307995Savg
5924307995Savg		error = zfs_znode_parent_and_name(zp, &dzp, name);
5925307995Savg		if (error == 0) {
5926307995Savg			len = strlen(name);
5927314030Savg			if (*ap->a_buflen < len)
5928314030Savg				error = SET_ERROR(ENOMEM);
5929314030Savg		}
5930314030Savg		if (error == 0) {
5931307995Savg			*ap->a_buflen -= len;
5932307995Savg			bcopy(name, ap->a_buf + *ap->a_buflen, len);
5933307995Savg			*ap->a_vpp = ZTOV(dzp);
5934307995Savg		}
5935301870Savg		ZFS_EXIT(zfsvfs);
5936307995Savg		return (error);
5937301870Savg	}
5938301870Savg	ZFS_EXIT(zfsvfs);
5939299906Savg
5940299906Savg	covered_vp = vp->v_mount->mnt_vnodecovered;
5941299906Savg	vhold(covered_vp);
5942299906Savg	ltype = VOP_ISLOCKED(vp);
5943299906Savg	VOP_UNLOCK(vp, 0);
5944315842Savg	error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread);
5945299906Savg	if (error == 0) {
5946299906Savg		error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred,
5947299906Savg		    ap->a_buf, ap->a_buflen);
5948299906Savg		vput(covered_vp);
5949299906Savg	}
5950299906Savg	vn_lock(vp, ltype | LK_RETRY);
5951299906Savg	if ((vp->v_iflag & VI_DOOMED) != 0)
5952299906Savg		error = SET_ERROR(ENOENT);
5953299906Savg	return (error);
5954299906Savg}
5955299906Savg
5956303970Savg#ifdef DIAGNOSTIC
5957303970Savgstatic int
5958303970Savgzfs_lock(ap)
5959303970Savg	struct vop_lock1_args /* {
5960303970Savg		struct vnode *a_vp;
5961303970Savg		int a_flags;
5962303970Savg		char *file;
5963303970Savg		int line;
5964303970Savg	} */ *ap;
5965303970Savg{
5966310066Savg	vnode_t *vp;
5967303970Savg	znode_t *zp;
5968303970Savg	int err;
5969303970Savg
5970303970Savg	err = vop_stdlock(ap);
5971310066Savg	if (err == 0 && (ap->a_flags & LK_NOWAIT) == 0) {
5972310066Savg		vp = ap->a_vp;
5973310066Savg		zp = vp->v_data;
5974310066Savg		if (vp->v_mount != NULL && (vp->v_iflag & VI_DOOMED) == 0 &&
5975310066Savg		    zp != NULL && (zp->z_pflags & ZFS_XATTR) == 0)
5976310066Savg			VERIFY(!RRM_LOCK_HELD(&zp->z_zfsvfs->z_teardown_lock));
5977303970Savg	}
5978303970Savg	return (err);
5979303970Savg}
5980303970Savg#endif
5981303970Savg
5982168404Spjdstruct vop_vector zfs_vnodeops;
5983168404Spjdstruct vop_vector zfs_fifoops;
5984209962Smmstruct vop_vector zfs_shareops;
5985168404Spjd
5986168404Spjdstruct vop_vector zfs_vnodeops = {
5987185029Spjd	.vop_default =		&default_vnodeops,
5988185029Spjd	.vop_inactive =		zfs_freebsd_inactive,
5989185029Spjd	.vop_reclaim =		zfs_freebsd_reclaim,
5990185029Spjd	.vop_access =		zfs_freebsd_access,
5991303970Savg	.vop_lookup =		zfs_cache_lookup,
5992185029Spjd	.vop_cachedlookup =	zfs_freebsd_lookup,
5993185029Spjd	.vop_getattr =		zfs_freebsd_getattr,
5994185029Spjd	.vop_setattr =		zfs_freebsd_setattr,
5995185029Spjd	.vop_create =		zfs_freebsd_create,
5996185029Spjd	.vop_mknod =		zfs_freebsd_create,
5997185029Spjd	.vop_mkdir =		zfs_freebsd_mkdir,
5998185029Spjd	.vop_readdir =		zfs_freebsd_readdir,
5999185029Spjd	.vop_fsync =		zfs_freebsd_fsync,
6000185029Spjd	.vop_open =		zfs_freebsd_open,
6001185029Spjd	.vop_close =		zfs_freebsd_close,
6002185029Spjd	.vop_rmdir =		zfs_freebsd_rmdir,
6003185029Spjd	.vop_ioctl =		zfs_freebsd_ioctl,
6004185029Spjd	.vop_link =		zfs_freebsd_link,
6005185029Spjd	.vop_symlink =		zfs_freebsd_symlink,
6006185029Spjd	.vop_readlink =		zfs_freebsd_readlink,
6007185029Spjd	.vop_read =		zfs_freebsd_read,
6008185029Spjd	.vop_write =		zfs_freebsd_write,
6009185029Spjd	.vop_remove =		zfs_freebsd_remove,
6010185029Spjd	.vop_rename =		zfs_freebsd_rename,
6011185029Spjd	.vop_pathconf =		zfs_freebsd_pathconf,
6012243518Savg	.vop_bmap =		zfs_freebsd_bmap,
6013185029Spjd	.vop_fid =		zfs_freebsd_fid,
6014185029Spjd	.vop_getextattr =	zfs_getextattr,
6015185029Spjd	.vop_deleteextattr =	zfs_deleteextattr,
6016185029Spjd	.vop_setextattr =	zfs_setextattr,
6017185029Spjd	.vop_listextattr =	zfs_listextattr,
6018192800Strasz	.vop_getacl =		zfs_freebsd_getacl,
6019192800Strasz	.vop_setacl =		zfs_freebsd_setacl,
6020192800Strasz	.vop_aclcheck =		zfs_freebsd_aclcheck,
6021213937Savg	.vop_getpages =		zfs_freebsd_getpages,
6022258746Savg	.vop_putpages =		zfs_freebsd_putpages,
6023299906Savg	.vop_vptocnp =		zfs_vptocnp,
6024303970Savg#ifdef DIAGNOSTIC
6025303970Savg	.vop_lock1 =		zfs_lock,
6026303970Savg#endif
6027168404Spjd};
6028168404Spjd
6029169170Spjdstruct vop_vector zfs_fifoops = {
6030185029Spjd	.vop_default =		&fifo_specops,
6031200162Skib	.vop_fsync =		zfs_freebsd_fsync,
6032185029Spjd	.vop_access =		zfs_freebsd_access,
6033185029Spjd	.vop_getattr =		zfs_freebsd_getattr,
6034185029Spjd	.vop_inactive =		zfs_freebsd_inactive,
6035185029Spjd	.vop_read =		VOP_PANIC,
6036185029Spjd	.vop_reclaim =		zfs_freebsd_reclaim,
6037185029Spjd	.vop_setattr =		zfs_freebsd_setattr,
6038185029Spjd	.vop_write =		VOP_PANIC,
6039328298Sjhb	.vop_pathconf = 	zfs_freebsd_pathconf,
6040185029Spjd	.vop_fid =		zfs_freebsd_fid,
6041192800Strasz	.vop_getacl =		zfs_freebsd_getacl,
6042192800Strasz	.vop_setacl =		zfs_freebsd_setacl,
6043192800Strasz	.vop_aclcheck =		zfs_freebsd_aclcheck,
6044168404Spjd};
6045209962Smm
6046209962Smm/*
6047209962Smm * special share hidden files vnode operations template
6048209962Smm */
6049209962Smmstruct vop_vector zfs_shareops = {
6050209962Smm	.vop_default =		&default_vnodeops,
6051209962Smm	.vop_access =		zfs_freebsd_access,
6052209962Smm	.vop_inactive =		zfs_freebsd_inactive,
6053209962Smm	.vop_reclaim =		zfs_freebsd_reclaim,
6054209962Smm	.vop_fid =		zfs_freebsd_fid,
6055209962Smm	.vop_pathconf =		zfs_freebsd_pathconf,
6056209962Smm};
6057