1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24 * Copyright (c) 2013 by Delphix. All rights reserved.
25 */
26
27#include <sys/types.h>
28#include <sys/param.h>
29#include <sys/time.h>
30#include <sys/systm.h>
31#include <sys/sysmacros.h>
32#include <sys/resource.h>
33#include <sys/vfs.h>
34#include <sys/vnode.h>
35#include <sys/file.h>
36#include <sys/stat.h>
37#include <sys/kmem.h>
38#include <sys/cmn_err.h>
39#include <sys/errno.h>
40#include <sys/unistd.h>
41#include <sys/sdt.h>
42#include <sys/fs/zfs.h>
43#include <sys/policy.h>
44#include <sys/zfs_znode.h>
45#include <sys/zfs_fuid.h>
46#include <sys/zfs_acl.h>
47#include <sys/zfs_dir.h>
48#include <sys/zfs_vfsops.h>
49#include <sys/dmu.h>
50#include <sys/dnode.h>
51#include <sys/zap.h>
52#include <sys/sa.h>
53#include <acl/acl_common.h>
54
55#define	ALLOW	ACE_ACCESS_ALLOWED_ACE_TYPE
56#define	DENY	ACE_ACCESS_DENIED_ACE_TYPE
57#define	MAX_ACE_TYPE	ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE
58#define	MIN_ACE_TYPE	ALLOW
59
60#define	OWNING_GROUP		(ACE_GROUP|ACE_IDENTIFIER_GROUP)
61#define	EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \
62    ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE)
63#define	EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \
64    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
65#define	OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \
66    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
67
68#define	ZFS_CHECKED_MASKS (ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_DATA| \
69    ACE_READ_NAMED_ATTRS|ACE_WRITE_DATA|ACE_WRITE_ATTRIBUTES| \
70    ACE_WRITE_NAMED_ATTRS|ACE_APPEND_DATA|ACE_EXECUTE|ACE_WRITE_OWNER| \
71    ACE_WRITE_ACL|ACE_DELETE|ACE_DELETE_CHILD|ACE_SYNCHRONIZE)
72
73#define	WRITE_MASK_DATA (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS)
74#define	WRITE_MASK_ATTRS (ACE_WRITE_ACL|ACE_WRITE_OWNER|ACE_WRITE_ATTRIBUTES| \
75    ACE_DELETE|ACE_DELETE_CHILD)
76#define	WRITE_MASK (WRITE_MASK_DATA|WRITE_MASK_ATTRS)
77
78#define	OGE_CLEAR	(ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
79    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
80
81#define	OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
82    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
83
84#define	ALL_INHERIT	(ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \
85    ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE|ACE_INHERITED_ACE)
86
87#define	RESTRICTED_CLEAR	(ACE_WRITE_ACL|ACE_WRITE_OWNER)
88
89#define	V4_ACL_WIDE_FLAGS (ZFS_ACL_AUTO_INHERIT|ZFS_ACL_DEFAULTED|\
90    ZFS_ACL_PROTECTED)
91
92#define	ZFS_ACL_WIDE_FLAGS (V4_ACL_WIDE_FLAGS|ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|\
93    ZFS_ACL_OBJ_ACE)
94
95#define	ALL_MODE_EXECS (S_IXUSR | S_IXGRP | S_IXOTH)
96
97static uint16_t
98zfs_ace_v0_get_type(void *acep)
99{
100	return (((zfs_oldace_t *)acep)->z_type);
101}
102
103static uint16_t
104zfs_ace_v0_get_flags(void *acep)
105{
106	return (((zfs_oldace_t *)acep)->z_flags);
107}
108
109static uint32_t
110zfs_ace_v0_get_mask(void *acep)
111{
112	return (((zfs_oldace_t *)acep)->z_access_mask);
113}
114
115static uint64_t
116zfs_ace_v0_get_who(void *acep)
117{
118	return (((zfs_oldace_t *)acep)->z_fuid);
119}
120
121static void
122zfs_ace_v0_set_type(void *acep, uint16_t type)
123{
124	((zfs_oldace_t *)acep)->z_type = type;
125}
126
127static void
128zfs_ace_v0_set_flags(void *acep, uint16_t flags)
129{
130	((zfs_oldace_t *)acep)->z_flags = flags;
131}
132
133static void
134zfs_ace_v0_set_mask(void *acep, uint32_t mask)
135{
136	((zfs_oldace_t *)acep)->z_access_mask = mask;
137}
138
139static void
140zfs_ace_v0_set_who(void *acep, uint64_t who)
141{
142	((zfs_oldace_t *)acep)->z_fuid = who;
143}
144
145/*ARGSUSED*/
146static size_t
147zfs_ace_v0_size(void *acep)
148{
149	return (sizeof (zfs_oldace_t));
150}
151
152static size_t
153zfs_ace_v0_abstract_size(void)
154{
155	return (sizeof (zfs_oldace_t));
156}
157
158static int
159zfs_ace_v0_mask_off(void)
160{
161	return (offsetof(zfs_oldace_t, z_access_mask));
162}
163
164/*ARGSUSED*/
165static int
166zfs_ace_v0_data(void *acep, void **datap)
167{
168	*datap = NULL;
169	return (0);
170}
171
172static acl_ops_t zfs_acl_v0_ops = {
173	zfs_ace_v0_get_mask,
174	zfs_ace_v0_set_mask,
175	zfs_ace_v0_get_flags,
176	zfs_ace_v0_set_flags,
177	zfs_ace_v0_get_type,
178	zfs_ace_v0_set_type,
179	zfs_ace_v0_get_who,
180	zfs_ace_v0_set_who,
181	zfs_ace_v0_size,
182	zfs_ace_v0_abstract_size,
183	zfs_ace_v0_mask_off,
184	zfs_ace_v0_data
185};
186
187static uint16_t
188zfs_ace_fuid_get_type(void *acep)
189{
190	return (((zfs_ace_hdr_t *)acep)->z_type);
191}
192
193static uint16_t
194zfs_ace_fuid_get_flags(void *acep)
195{
196	return (((zfs_ace_hdr_t *)acep)->z_flags);
197}
198
199static uint32_t
200zfs_ace_fuid_get_mask(void *acep)
201{
202	return (((zfs_ace_hdr_t *)acep)->z_access_mask);
203}
204
205static uint64_t
206zfs_ace_fuid_get_who(void *args)
207{
208	uint16_t entry_type;
209	zfs_ace_t *acep = args;
210
211	entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
212
213	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
214	    entry_type == ACE_EVERYONE)
215		return (-1);
216	return (((zfs_ace_t *)acep)->z_fuid);
217}
218
219static void
220zfs_ace_fuid_set_type(void *acep, uint16_t type)
221{
222	((zfs_ace_hdr_t *)acep)->z_type = type;
223}
224
225static void
226zfs_ace_fuid_set_flags(void *acep, uint16_t flags)
227{
228	((zfs_ace_hdr_t *)acep)->z_flags = flags;
229}
230
231static void
232zfs_ace_fuid_set_mask(void *acep, uint32_t mask)
233{
234	((zfs_ace_hdr_t *)acep)->z_access_mask = mask;
235}
236
237static void
238zfs_ace_fuid_set_who(void *arg, uint64_t who)
239{
240	zfs_ace_t *acep = arg;
241
242	uint16_t entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
243
244	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
245	    entry_type == ACE_EVERYONE)
246		return;
247	acep->z_fuid = who;
248}
249
250static size_t
251zfs_ace_fuid_size(void *acep)
252{
253	zfs_ace_hdr_t *zacep = acep;
254	uint16_t entry_type;
255
256	switch (zacep->z_type) {
257	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
258	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
259	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
260	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
261		return (sizeof (zfs_object_ace_t));
262	case ALLOW:
263	case DENY:
264		entry_type =
265		    (((zfs_ace_hdr_t *)acep)->z_flags & ACE_TYPE_FLAGS);
266		if (entry_type == ACE_OWNER ||
267		    entry_type == OWNING_GROUP ||
268		    entry_type == ACE_EVERYONE)
269			return (sizeof (zfs_ace_hdr_t));
270		/*FALLTHROUGH*/
271	default:
272		return (sizeof (zfs_ace_t));
273	}
274}
275
276static size_t
277zfs_ace_fuid_abstract_size(void)
278{
279	return (sizeof (zfs_ace_hdr_t));
280}
281
282static int
283zfs_ace_fuid_mask_off(void)
284{
285	return (offsetof(zfs_ace_hdr_t, z_access_mask));
286}
287
288static int
289zfs_ace_fuid_data(void *acep, void **datap)
290{
291	zfs_ace_t *zacep = acep;
292	zfs_object_ace_t *zobjp;
293
294	switch (zacep->z_hdr.z_type) {
295	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
296	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
297	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
298	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
299		zobjp = acep;
300		*datap = (caddr_t)zobjp + sizeof (zfs_ace_t);
301		return (sizeof (zfs_object_ace_t) - sizeof (zfs_ace_t));
302	default:
303		*datap = NULL;
304		return (0);
305	}
306}
307
308static acl_ops_t zfs_acl_fuid_ops = {
309	zfs_ace_fuid_get_mask,
310	zfs_ace_fuid_set_mask,
311	zfs_ace_fuid_get_flags,
312	zfs_ace_fuid_set_flags,
313	zfs_ace_fuid_get_type,
314	zfs_ace_fuid_set_type,
315	zfs_ace_fuid_get_who,
316	zfs_ace_fuid_set_who,
317	zfs_ace_fuid_size,
318	zfs_ace_fuid_abstract_size,
319	zfs_ace_fuid_mask_off,
320	zfs_ace_fuid_data
321};
322
323/*
324 * The following three functions are provided for compatibility with
325 * older ZPL version in order to determine if the file use to have
326 * an external ACL and what version of ACL previously existed on the
327 * file.  Would really be nice to not need this, sigh.
328 */
329uint64_t
330zfs_external_acl(znode_t *zp)
331{
332	zfs_acl_phys_t acl_phys;
333	int error;
334
335	if (zp->z_is_sa)
336		return (0);
337
338	/*
339	 * Need to deal with a potential
340	 * race where zfs_sa_upgrade could cause
341	 * z_isa_sa to change.
342	 *
343	 * If the lookup fails then the state of z_is_sa should have
344	 * changed.
345	 */
346
347	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
348	    &acl_phys, sizeof (acl_phys))) == 0)
349		return (acl_phys.z_acl_extern_obj);
350	else {
351		/*
352		 * after upgrade the SA_ZPL_ZNODE_ACL should have been
353		 * removed
354		 */
355		VERIFY(zp->z_is_sa && error == ENOENT);
356		return (0);
357	}
358}
359
360/*
361 * Determine size of ACL in bytes
362 *
363 * This is more complicated than it should be since we have to deal
364 * with old external ACLs.
365 */
366static int
367zfs_acl_znode_info(znode_t *zp, int *aclsize, int *aclcount,
368    zfs_acl_phys_t *aclphys)
369{
370	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
371	uint64_t acl_count;
372	int size;
373	int error;
374
375	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
376	if (zp->z_is_sa) {
377		if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zfsvfs),
378		    &size)) != 0)
379			return (error);
380		*aclsize = size;
381		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zfsvfs),
382		    &acl_count, sizeof (acl_count))) != 0)
383			return (error);
384		*aclcount = acl_count;
385	} else {
386		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
387		    aclphys, sizeof (*aclphys))) != 0)
388			return (error);
389
390		if (aclphys->z_acl_version == ZFS_ACL_VERSION_INITIAL) {
391			*aclsize = ZFS_ACL_SIZE(aclphys->z_acl_size);
392			*aclcount = aclphys->z_acl_size;
393		} else {
394			*aclsize = aclphys->z_acl_size;
395			*aclcount = aclphys->z_acl_count;
396		}
397	}
398	return (0);
399}
400
401int
402zfs_znode_acl_version(znode_t *zp)
403{
404	zfs_acl_phys_t acl_phys;
405
406	if (zp->z_is_sa)
407		return (ZFS_ACL_VERSION_FUID);
408	else {
409		int error;
410
411		/*
412		 * Need to deal with a potential
413		 * race where zfs_sa_upgrade could cause
414		 * z_isa_sa to change.
415		 *
416		 * If the lookup fails then the state of z_is_sa should have
417		 * changed.
418		 */
419		if ((error = sa_lookup(zp->z_sa_hdl,
420		    SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
421		    &acl_phys, sizeof (acl_phys))) == 0)
422			return (acl_phys.z_acl_version);
423		else {
424			/*
425			 * After upgrade SA_ZPL_ZNODE_ACL should have
426			 * been removed.
427			 */
428			VERIFY(zp->z_is_sa && error == ENOENT);
429			return (ZFS_ACL_VERSION_FUID);
430		}
431	}
432}
433
434static int
435zfs_acl_version(int version)
436{
437	if (version < ZPL_VERSION_FUID)
438		return (ZFS_ACL_VERSION_INITIAL);
439	else
440		return (ZFS_ACL_VERSION_FUID);
441}
442
443static int
444zfs_acl_version_zp(znode_t *zp)
445{
446	return (zfs_acl_version(zp->z_zfsvfs->z_version));
447}
448
449zfs_acl_t *
450zfs_acl_alloc(int vers)
451{
452	zfs_acl_t *aclp;
453
454	aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP);
455	list_create(&aclp->z_acl, sizeof (zfs_acl_node_t),
456	    offsetof(zfs_acl_node_t, z_next));
457	aclp->z_version = vers;
458	if (vers == ZFS_ACL_VERSION_FUID)
459		aclp->z_ops = zfs_acl_fuid_ops;
460	else
461		aclp->z_ops = zfs_acl_v0_ops;
462	return (aclp);
463}
464
465zfs_acl_node_t *
466zfs_acl_node_alloc(size_t bytes)
467{
468	zfs_acl_node_t *aclnode;
469
470	aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_SLEEP);
471	if (bytes) {
472		aclnode->z_acldata = kmem_alloc(bytes, KM_SLEEP);
473		aclnode->z_allocdata = aclnode->z_acldata;
474		aclnode->z_allocsize = bytes;
475		aclnode->z_size = bytes;
476	}
477
478	return (aclnode);
479}
480
481static void
482zfs_acl_node_free(zfs_acl_node_t *aclnode)
483{
484	if (aclnode->z_allocsize)
485		kmem_free(aclnode->z_allocdata, aclnode->z_allocsize);
486	kmem_free(aclnode, sizeof (zfs_acl_node_t));
487}
488
489static void
490zfs_acl_release_nodes(zfs_acl_t *aclp)
491{
492	zfs_acl_node_t *aclnode;
493
494	while (aclnode = list_head(&aclp->z_acl)) {
495		list_remove(&aclp->z_acl, aclnode);
496		zfs_acl_node_free(aclnode);
497	}
498	aclp->z_acl_count = 0;
499	aclp->z_acl_bytes = 0;
500}
501
502void
503zfs_acl_free(zfs_acl_t *aclp)
504{
505	zfs_acl_release_nodes(aclp);
506	list_destroy(&aclp->z_acl);
507	kmem_free(aclp, sizeof (zfs_acl_t));
508}
509
510static boolean_t
511zfs_acl_valid_ace_type(uint_t type, uint_t flags)
512{
513	uint16_t entry_type;
514
515	switch (type) {
516	case ALLOW:
517	case DENY:
518	case ACE_SYSTEM_AUDIT_ACE_TYPE:
519	case ACE_SYSTEM_ALARM_ACE_TYPE:
520		entry_type = flags & ACE_TYPE_FLAGS;
521		return (entry_type == ACE_OWNER ||
522		    entry_type == OWNING_GROUP ||
523		    entry_type == ACE_EVERYONE || entry_type == 0 ||
524		    entry_type == ACE_IDENTIFIER_GROUP);
525	default:
526		if (type >= MIN_ACE_TYPE && type <= MAX_ACE_TYPE)
527			return (B_TRUE);
528	}
529	return (B_FALSE);
530}
531
532static boolean_t
533zfs_ace_valid(vtype_t obj_type, zfs_acl_t *aclp, uint16_t type, uint16_t iflags)
534{
535	/*
536	 * first check type of entry
537	 */
538
539	if (!zfs_acl_valid_ace_type(type, iflags))
540		return (B_FALSE);
541
542	switch (type) {
543	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
544	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
545	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
546	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
547		if (aclp->z_version < ZFS_ACL_VERSION_FUID)
548			return (B_FALSE);
549		aclp->z_hints |= ZFS_ACL_OBJ_ACE;
550	}
551
552	/*
553	 * next check inheritance level flags
554	 */
555
556	if (obj_type == VDIR &&
557	    (iflags & (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
558		aclp->z_hints |= ZFS_INHERIT_ACE;
559
560	if (iflags & (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) {
561		if ((iflags & (ACE_FILE_INHERIT_ACE|
562		    ACE_DIRECTORY_INHERIT_ACE)) == 0) {
563			return (B_FALSE);
564		}
565	}
566
567	return (B_TRUE);
568}
569
570static void *
571zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who,
572    uint32_t *access_mask, uint16_t *iflags, uint16_t *type)
573{
574	zfs_acl_node_t *aclnode;
575
576	ASSERT(aclp);
577
578	if (start == NULL) {
579		aclnode = list_head(&aclp->z_acl);
580		if (aclnode == NULL)
581			return (NULL);
582
583		aclp->z_next_ace = aclnode->z_acldata;
584		aclp->z_curr_node = aclnode;
585		aclnode->z_ace_idx = 0;
586	}
587
588	aclnode = aclp->z_curr_node;
589
590	if (aclnode == NULL)
591		return (NULL);
592
593	if (aclnode->z_ace_idx >= aclnode->z_ace_count) {
594		aclnode = list_next(&aclp->z_acl, aclnode);
595		if (aclnode == NULL)
596			return (NULL);
597		else {
598			aclp->z_curr_node = aclnode;
599			aclnode->z_ace_idx = 0;
600			aclp->z_next_ace = aclnode->z_acldata;
601		}
602	}
603
604	if (aclnode->z_ace_idx < aclnode->z_ace_count) {
605		void *acep = aclp->z_next_ace;
606		size_t ace_size;
607
608		/*
609		 * Make sure we don't overstep our bounds
610		 */
611		ace_size = aclp->z_ops.ace_size(acep);
612
613		if (((caddr_t)acep + ace_size) >
614		    ((caddr_t)aclnode->z_acldata + aclnode->z_size)) {
615			return (NULL);
616		}
617
618		*iflags = aclp->z_ops.ace_flags_get(acep);
619		*type = aclp->z_ops.ace_type_get(acep);
620		*access_mask = aclp->z_ops.ace_mask_get(acep);
621		*who = aclp->z_ops.ace_who_get(acep);
622		aclp->z_next_ace = (caddr_t)aclp->z_next_ace + ace_size;
623		aclnode->z_ace_idx++;
624
625		return ((void *)acep);
626	}
627	return (NULL);
628}
629
630/*ARGSUSED*/
631static uint64_t
632zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt,
633    uint16_t *flags, uint16_t *type, uint32_t *mask)
634{
635	zfs_acl_t *aclp = datap;
636	zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)(uintptr_t)cookie;
637	uint64_t who;
638
639	acep = zfs_acl_next_ace(aclp, acep, &who, mask,
640	    flags, type);
641	return ((uint64_t)(uintptr_t)acep);
642}
643
644static zfs_acl_node_t *
645zfs_acl_curr_node(zfs_acl_t *aclp)
646{
647	ASSERT(aclp->z_curr_node);
648	return (aclp->z_curr_node);
649}
650
651/*
652 * Copy ACE to internal ZFS format.
653 * While processing the ACL each ACE will be validated for correctness.
654 * ACE FUIDs will be created later.
655 */
656int
657zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, vtype_t obj_type, zfs_acl_t *aclp,
658    void *datap, zfs_ace_t *z_acl, uint64_t aclcnt, size_t *size,
659    zfs_fuid_info_t **fuidp, cred_t *cr)
660{
661	int i;
662	uint16_t entry_type;
663	zfs_ace_t *aceptr = z_acl;
664	ace_t *acep = datap;
665	zfs_object_ace_t *zobjacep;
666	ace_object_t *aceobjp;
667
668	for (i = 0; i != aclcnt; i++) {
669		aceptr->z_hdr.z_access_mask = acep->a_access_mask;
670		aceptr->z_hdr.z_flags = acep->a_flags;
671		aceptr->z_hdr.z_type = acep->a_type;
672		entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS;
673		if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP &&
674		    entry_type != ACE_EVERYONE) {
675			aceptr->z_fuid = zfs_fuid_create(zfsvfs, acep->a_who,
676			    cr, (entry_type == 0) ?
677			    ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp);
678		}
679
680		/*
681		 * Make sure ACE is valid
682		 */
683		if (zfs_ace_valid(obj_type, aclp, aceptr->z_hdr.z_type,
684		    aceptr->z_hdr.z_flags) != B_TRUE)
685			return (SET_ERROR(EINVAL));
686
687		switch (acep->a_type) {
688		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
689		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
690		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
691		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
692			zobjacep = (zfs_object_ace_t *)aceptr;
693			aceobjp = (ace_object_t *)acep;
694
695			bcopy(aceobjp->a_obj_type, zobjacep->z_object_type,
696			    sizeof (aceobjp->a_obj_type));
697			bcopy(aceobjp->a_inherit_obj_type,
698			    zobjacep->z_inherit_type,
699			    sizeof (aceobjp->a_inherit_obj_type));
700			acep = (ace_t *)((caddr_t)acep + sizeof (ace_object_t));
701			break;
702		default:
703			acep = (ace_t *)((caddr_t)acep + sizeof (ace_t));
704		}
705
706		aceptr = (zfs_ace_t *)((caddr_t)aceptr +
707		    aclp->z_ops.ace_size(aceptr));
708	}
709
710	*size = (caddr_t)aceptr - (caddr_t)z_acl;
711
712	return (0);
713}
714
715/*
716 * Copy ZFS ACEs to fixed size ace_t layout
717 */
718static void
719zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr,
720    void *datap, int filter)
721{
722	uint64_t who;
723	uint32_t access_mask;
724	uint16_t iflags, type;
725	zfs_ace_hdr_t *zacep = NULL;
726	ace_t *acep = datap;
727	ace_object_t *objacep;
728	zfs_object_ace_t *zobjacep;
729	size_t ace_size;
730	uint16_t entry_type;
731
732	while (zacep = zfs_acl_next_ace(aclp, zacep,
733	    &who, &access_mask, &iflags, &type)) {
734
735		switch (type) {
736		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
737		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
738		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
739		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
740			if (filter) {
741				continue;
742			}
743			zobjacep = (zfs_object_ace_t *)zacep;
744			objacep = (ace_object_t *)acep;
745			bcopy(zobjacep->z_object_type,
746			    objacep->a_obj_type,
747			    sizeof (zobjacep->z_object_type));
748			bcopy(zobjacep->z_inherit_type,
749			    objacep->a_inherit_obj_type,
750			    sizeof (zobjacep->z_inherit_type));
751			ace_size = sizeof (ace_object_t);
752			break;
753		default:
754			ace_size = sizeof (ace_t);
755			break;
756		}
757
758		entry_type = (iflags & ACE_TYPE_FLAGS);
759		if ((entry_type != ACE_OWNER &&
760		    entry_type != OWNING_GROUP &&
761		    entry_type != ACE_EVERYONE)) {
762			acep->a_who = zfs_fuid_map_id(zfsvfs, who,
763			    cr, (entry_type & ACE_IDENTIFIER_GROUP) ?
764			    ZFS_ACE_GROUP : ZFS_ACE_USER);
765		} else {
766			acep->a_who = (uid_t)(int64_t)who;
767		}
768		acep->a_access_mask = access_mask;
769		acep->a_flags = iflags;
770		acep->a_type = type;
771		acep = (ace_t *)((caddr_t)acep + ace_size);
772	}
773}
774
775static int
776zfs_copy_ace_2_oldace(vtype_t obj_type, zfs_acl_t *aclp, ace_t *acep,
777    zfs_oldace_t *z_acl, int aclcnt, size_t *size)
778{
779	int i;
780	zfs_oldace_t *aceptr = z_acl;
781
782	for (i = 0; i != aclcnt; i++, aceptr++) {
783		aceptr->z_access_mask = acep[i].a_access_mask;
784		aceptr->z_type = acep[i].a_type;
785		aceptr->z_flags = acep[i].a_flags;
786		aceptr->z_fuid = acep[i].a_who;
787		/*
788		 * Make sure ACE is valid
789		 */
790		if (zfs_ace_valid(obj_type, aclp, aceptr->z_type,
791		    aceptr->z_flags) != B_TRUE)
792			return (SET_ERROR(EINVAL));
793	}
794	*size = (caddr_t)aceptr - (caddr_t)z_acl;
795	return (0);
796}
797
798/*
799 * convert old ACL format to new
800 */
801void
802zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr)
803{
804	zfs_oldace_t *oldaclp;
805	int i;
806	uint16_t type, iflags;
807	uint32_t access_mask;
808	uint64_t who;
809	void *cookie = NULL;
810	zfs_acl_node_t *newaclnode;
811
812	ASSERT(aclp->z_version == ZFS_ACL_VERSION_INITIAL);
813	/*
814	 * First create the ACE in a contiguous piece of memory
815	 * for zfs_copy_ace_2_fuid().
816	 *
817	 * We only convert an ACL once, so this won't happen
818	 * everytime.
819	 */
820	oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count,
821	    KM_SLEEP);
822	i = 0;
823	while (cookie = zfs_acl_next_ace(aclp, cookie, &who,
824	    &access_mask, &iflags, &type)) {
825		oldaclp[i].z_flags = iflags;
826		oldaclp[i].z_type = type;
827		oldaclp[i].z_fuid = who;
828		oldaclp[i++].z_access_mask = access_mask;
829	}
830
831	newaclnode = zfs_acl_node_alloc(aclp->z_acl_count *
832	    sizeof (zfs_object_ace_t));
833	aclp->z_ops = zfs_acl_fuid_ops;
834	VERIFY(zfs_copy_ace_2_fuid(zp->z_zfsvfs, ZTOV(zp)->v_type, aclp,
835	    oldaclp, newaclnode->z_acldata, aclp->z_acl_count,
836	    &newaclnode->z_size, NULL, cr) == 0);
837	newaclnode->z_ace_count = aclp->z_acl_count;
838	aclp->z_version = ZFS_ACL_VERSION;
839	kmem_free(oldaclp, aclp->z_acl_count * sizeof (zfs_oldace_t));
840
841	/*
842	 * Release all previous ACL nodes
843	 */
844
845	zfs_acl_release_nodes(aclp);
846
847	list_insert_head(&aclp->z_acl, newaclnode);
848
849	aclp->z_acl_bytes = newaclnode->z_size;
850	aclp->z_acl_count = newaclnode->z_ace_count;
851
852}
853
854/*
855 * Convert unix access mask to v4 access mask
856 */
857static uint32_t
858zfs_unix_to_v4(uint32_t access_mask)
859{
860	uint32_t new_mask = 0;
861
862	if (access_mask & S_IXOTH)
863		new_mask |= ACE_EXECUTE;
864	if (access_mask & S_IWOTH)
865		new_mask |= ACE_WRITE_DATA;
866	if (access_mask & S_IROTH)
867		new_mask |= ACE_READ_DATA;
868	return (new_mask);
869}
870
871static void
872zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask,
873    uint16_t access_type, uint64_t fuid, uint16_t entry_type)
874{
875	uint16_t type = entry_type & ACE_TYPE_FLAGS;
876
877	aclp->z_ops.ace_mask_set(acep, access_mask);
878	aclp->z_ops.ace_type_set(acep, access_type);
879	aclp->z_ops.ace_flags_set(acep, entry_type);
880	if ((type != ACE_OWNER && type != OWNING_GROUP &&
881	    type != ACE_EVERYONE))
882		aclp->z_ops.ace_who_set(acep, fuid);
883}
884
885/*
886 * Determine mode of file based on ACL.
887 */
888uint64_t
889zfs_mode_compute(uint64_t fmode, zfs_acl_t *aclp,
890    uint64_t *pflags, uint64_t fuid, uint64_t fgid)
891{
892	int		entry_type;
893	mode_t		mode;
894	mode_t		seen = 0;
895	zfs_ace_hdr_t 	*acep = NULL;
896	uint64_t	who;
897	uint16_t	iflags, type;
898	uint32_t	access_mask;
899	boolean_t	an_exec_denied = B_FALSE;
900
901	mode = (fmode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
902
903	while (acep = zfs_acl_next_ace(aclp, acep, &who,
904	    &access_mask, &iflags, &type)) {
905
906		if (!zfs_acl_valid_ace_type(type, iflags))
907			continue;
908
909		entry_type = (iflags & ACE_TYPE_FLAGS);
910
911		/*
912		 * Skip over any inherit_only ACEs
913		 */
914		if (iflags & ACE_INHERIT_ONLY_ACE)
915			continue;
916
917		if (entry_type == ACE_OWNER || (entry_type == 0 &&
918		    who == fuid)) {
919			if ((access_mask & ACE_READ_DATA) &&
920			    (!(seen & S_IRUSR))) {
921				seen |= S_IRUSR;
922				if (type == ALLOW) {
923					mode |= S_IRUSR;
924				}
925			}
926			if ((access_mask & ACE_WRITE_DATA) &&
927			    (!(seen & S_IWUSR))) {
928				seen |= S_IWUSR;
929				if (type == ALLOW) {
930					mode |= S_IWUSR;
931				}
932			}
933			if ((access_mask & ACE_EXECUTE) &&
934			    (!(seen & S_IXUSR))) {
935				seen |= S_IXUSR;
936				if (type == ALLOW) {
937					mode |= S_IXUSR;
938				}
939			}
940		} else if (entry_type == OWNING_GROUP ||
941		    (entry_type == ACE_IDENTIFIER_GROUP && who == fgid)) {
942			if ((access_mask & ACE_READ_DATA) &&
943			    (!(seen & S_IRGRP))) {
944				seen |= S_IRGRP;
945				if (type == ALLOW) {
946					mode |= S_IRGRP;
947				}
948			}
949			if ((access_mask & ACE_WRITE_DATA) &&
950			    (!(seen & S_IWGRP))) {
951				seen |= S_IWGRP;
952				if (type == ALLOW) {
953					mode |= S_IWGRP;
954				}
955			}
956			if ((access_mask & ACE_EXECUTE) &&
957			    (!(seen & S_IXGRP))) {
958				seen |= S_IXGRP;
959				if (type == ALLOW) {
960					mode |= S_IXGRP;
961				}
962			}
963		} else if (entry_type == ACE_EVERYONE) {
964			if ((access_mask & ACE_READ_DATA)) {
965				if (!(seen & S_IRUSR)) {
966					seen |= S_IRUSR;
967					if (type == ALLOW) {
968						mode |= S_IRUSR;
969					}
970				}
971				if (!(seen & S_IRGRP)) {
972					seen |= S_IRGRP;
973					if (type == ALLOW) {
974						mode |= S_IRGRP;
975					}
976				}
977				if (!(seen & S_IROTH)) {
978					seen |= S_IROTH;
979					if (type == ALLOW) {
980						mode |= S_IROTH;
981					}
982				}
983			}
984			if ((access_mask & ACE_WRITE_DATA)) {
985				if (!(seen & S_IWUSR)) {
986					seen |= S_IWUSR;
987					if (type == ALLOW) {
988						mode |= S_IWUSR;
989					}
990				}
991				if (!(seen & S_IWGRP)) {
992					seen |= S_IWGRP;
993					if (type == ALLOW) {
994						mode |= S_IWGRP;
995					}
996				}
997				if (!(seen & S_IWOTH)) {
998					seen |= S_IWOTH;
999					if (type == ALLOW) {
1000						mode |= S_IWOTH;
1001					}
1002				}
1003			}
1004			if ((access_mask & ACE_EXECUTE)) {
1005				if (!(seen & S_IXUSR)) {
1006					seen |= S_IXUSR;
1007					if (type == ALLOW) {
1008						mode |= S_IXUSR;
1009					}
1010				}
1011				if (!(seen & S_IXGRP)) {
1012					seen |= S_IXGRP;
1013					if (type == ALLOW) {
1014						mode |= S_IXGRP;
1015					}
1016				}
1017				if (!(seen & S_IXOTH)) {
1018					seen |= S_IXOTH;
1019					if (type == ALLOW) {
1020						mode |= S_IXOTH;
1021					}
1022				}
1023			}
1024		} else {
1025			/*
1026			 * Only care if this IDENTIFIER_GROUP or
1027			 * USER ACE denies execute access to someone,
1028			 * mode is not affected
1029			 */
1030			if ((access_mask & ACE_EXECUTE) && type == DENY)
1031				an_exec_denied = B_TRUE;
1032		}
1033	}
1034
1035	/*
1036	 * Failure to allow is effectively a deny, so execute permission
1037	 * is denied if it was never mentioned or if we explicitly
1038	 * weren't allowed it.
1039	 */
1040	if (!an_exec_denied &&
1041	    ((seen & ALL_MODE_EXECS) != ALL_MODE_EXECS ||
1042	    (mode & ALL_MODE_EXECS) != ALL_MODE_EXECS))
1043		an_exec_denied = B_TRUE;
1044
1045	if (an_exec_denied)
1046		*pflags &= ~ZFS_NO_EXECS_DENIED;
1047	else
1048		*pflags |= ZFS_NO_EXECS_DENIED;
1049
1050	return (mode);
1051}
1052
1053/*
1054 * Read an external acl object.  If the intent is to modify, always
1055 * create a new acl and leave any cached acl in place.
1056 */
1057static int
1058zfs_acl_node_read(znode_t *zp, zfs_acl_t **aclpp, boolean_t will_modify)
1059{
1060	zfs_acl_t	*aclp;
1061	int		aclsize;
1062	int		acl_count;
1063	zfs_acl_node_t	*aclnode;
1064	zfs_acl_phys_t	znode_acl;
1065	int		version;
1066	int		error;
1067
1068	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
1069	ASSERT_VOP_LOCKED(ZTOV(zp), __func__);
1070
1071	if (zp->z_acl_cached && !will_modify) {
1072		*aclpp = zp->z_acl_cached;
1073		return (0);
1074	}
1075
1076	version = zfs_znode_acl_version(zp);
1077
1078	if ((error = zfs_acl_znode_info(zp, &aclsize,
1079	    &acl_count, &znode_acl)) != 0) {
1080		goto done;
1081	}
1082
1083	aclp = zfs_acl_alloc(version);
1084
1085	aclp->z_acl_count = acl_count;
1086	aclp->z_acl_bytes = aclsize;
1087
1088	aclnode = zfs_acl_node_alloc(aclsize);
1089	aclnode->z_ace_count = aclp->z_acl_count;
1090	aclnode->z_size = aclsize;
1091
1092	if (!zp->z_is_sa) {
1093		if (znode_acl.z_acl_extern_obj) {
1094			error = dmu_read(zp->z_zfsvfs->z_os,
1095			    znode_acl.z_acl_extern_obj, 0, aclnode->z_size,
1096			    aclnode->z_acldata, DMU_READ_PREFETCH);
1097		} else {
1098			bcopy(znode_acl.z_ace_data, aclnode->z_acldata,
1099			    aclnode->z_size);
1100		}
1101	} else {
1102		error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zp->z_zfsvfs),
1103		    aclnode->z_acldata, aclnode->z_size);
1104	}
1105
1106	if (error != 0) {
1107		zfs_acl_free(aclp);
1108		zfs_acl_node_free(aclnode);
1109		/* convert checksum errors into IO errors */
1110		if (error == ECKSUM)
1111			error = SET_ERROR(EIO);
1112		goto done;
1113	}
1114
1115	list_insert_head(&aclp->z_acl, aclnode);
1116
1117	*aclpp = aclp;
1118	if (!will_modify)
1119		zp->z_acl_cached = aclp;
1120done:
1121	return (error);
1122}
1123
1124/*ARGSUSED*/
1125void
1126zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen,
1127    boolean_t start, void *userdata)
1128{
1129	zfs_acl_locator_cb_t *cb = (zfs_acl_locator_cb_t *)userdata;
1130
1131	if (start) {
1132		cb->cb_acl_node = list_head(&cb->cb_aclp->z_acl);
1133	} else {
1134		cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl,
1135		    cb->cb_acl_node);
1136	}
1137	*dataptr = cb->cb_acl_node->z_acldata;
1138	*length = cb->cb_acl_node->z_size;
1139}
1140
1141int
1142zfs_acl_chown_setattr(znode_t *zp)
1143{
1144	int error;
1145	zfs_acl_t *aclp;
1146
1147	ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
1148	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
1149
1150	if ((error = zfs_acl_node_read(zp, &aclp, B_FALSE)) == 0)
1151		zp->z_mode = zfs_mode_compute(zp->z_mode, aclp,
1152		    &zp->z_pflags, zp->z_uid, zp->z_gid);
1153	return (error);
1154}
1155
1156/*
1157 * common code for setting ACLs.
1158 *
1159 * This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl.
1160 * zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's
1161 * already checked the acl and knows whether to inherit.
1162 */
1163int
1164zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
1165{
1166	int			error;
1167	zfsvfs_t		*zfsvfs = zp->z_zfsvfs;
1168	dmu_object_type_t	otype;
1169	zfs_acl_locator_cb_t	locate = { 0 };
1170	uint64_t		mode;
1171	sa_bulk_attr_t		bulk[5];
1172	uint64_t		ctime[2];
1173	int			count = 0;
1174
1175	mode = zp->z_mode;
1176
1177	mode = zfs_mode_compute(mode, aclp, &zp->z_pflags,
1178	    zp->z_uid, zp->z_gid);
1179
1180	zp->z_mode = mode;
1181	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
1182	    &mode, sizeof (mode));
1183	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
1184	    &zp->z_pflags, sizeof (zp->z_pflags));
1185	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
1186	    &ctime, sizeof (ctime));
1187
1188	if (zp->z_acl_cached) {
1189		zfs_acl_free(zp->z_acl_cached);
1190		zp->z_acl_cached = NULL;
1191	}
1192
1193	/*
1194	 * Upgrade needed?
1195	 */
1196	if (!zfsvfs->z_use_fuids) {
1197		otype = DMU_OT_OLDACL;
1198	} else {
1199		if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) &&
1200		    (zfsvfs->z_version >= ZPL_VERSION_FUID))
1201			zfs_acl_xform(zp, aclp, cr);
1202		ASSERT(aclp->z_version >= ZFS_ACL_VERSION_FUID);
1203		otype = DMU_OT_ACL;
1204	}
1205
1206	/*
1207	 * Arrgh, we have to handle old on disk format
1208	 * as well as newer (preferred) SA format.
1209	 */
1210
1211	if (zp->z_is_sa) { /* the easy case, just update the ACL attribute */
1212		locate.cb_aclp = aclp;
1213		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zfsvfs),
1214		    zfs_acl_data_locator, &locate, aclp->z_acl_bytes);
1215		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zfsvfs),
1216		    NULL, &aclp->z_acl_count, sizeof (uint64_t));
1217	} else { /* Painful legacy way */
1218		zfs_acl_node_t *aclnode;
1219		uint64_t off = 0;
1220		zfs_acl_phys_t acl_phys;
1221		uint64_t aoid;
1222
1223		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
1224		    &acl_phys, sizeof (acl_phys))) != 0)
1225			return (error);
1226
1227		aoid = acl_phys.z_acl_extern_obj;
1228
1229		if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1230			/*
1231			 * If ACL was previously external and we are now
1232			 * converting to new ACL format then release old
1233			 * ACL object and create a new one.
1234			 */
1235			if (aoid &&
1236			    aclp->z_version != acl_phys.z_acl_version) {
1237				error = dmu_object_free(zfsvfs->z_os, aoid, tx);
1238				if (error)
1239					return (error);
1240				aoid = 0;
1241			}
1242			if (aoid == 0) {
1243				aoid = dmu_object_alloc(zfsvfs->z_os,
1244				    otype, aclp->z_acl_bytes,
1245				    otype == DMU_OT_ACL ?
1246				    DMU_OT_SYSACL : DMU_OT_NONE,
1247				    otype == DMU_OT_ACL ?
1248				    DN_MAX_BONUSLEN : 0, tx);
1249			} else {
1250				(void) dmu_object_set_blocksize(zfsvfs->z_os,
1251				    aoid, aclp->z_acl_bytes, 0, tx);
1252			}
1253			acl_phys.z_acl_extern_obj = aoid;
1254			for (aclnode = list_head(&aclp->z_acl); aclnode;
1255			    aclnode = list_next(&aclp->z_acl, aclnode)) {
1256				if (aclnode->z_ace_count == 0)
1257					continue;
1258				dmu_write(zfsvfs->z_os, aoid, off,
1259				    aclnode->z_size, aclnode->z_acldata, tx);
1260				off += aclnode->z_size;
1261			}
1262		} else {
1263			void *start = acl_phys.z_ace_data;
1264			/*
1265			 * Migrating back embedded?
1266			 */
1267			if (acl_phys.z_acl_extern_obj) {
1268				error = dmu_object_free(zfsvfs->z_os,
1269				    acl_phys.z_acl_extern_obj, tx);
1270				if (error)
1271					return (error);
1272				acl_phys.z_acl_extern_obj = 0;
1273			}
1274
1275			for (aclnode = list_head(&aclp->z_acl); aclnode;
1276			    aclnode = list_next(&aclp->z_acl, aclnode)) {
1277				if (aclnode->z_ace_count == 0)
1278					continue;
1279				bcopy(aclnode->z_acldata, start,
1280				    aclnode->z_size);
1281				start = (caddr_t)start + aclnode->z_size;
1282			}
1283		}
1284		/*
1285		 * If Old version then swap count/bytes to match old
1286		 * layout of znode_acl_phys_t.
1287		 */
1288		if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
1289			acl_phys.z_acl_size = aclp->z_acl_count;
1290			acl_phys.z_acl_count = aclp->z_acl_bytes;
1291		} else {
1292			acl_phys.z_acl_size = aclp->z_acl_bytes;
1293			acl_phys.z_acl_count = aclp->z_acl_count;
1294		}
1295		acl_phys.z_acl_version = aclp->z_version;
1296
1297		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
1298		    &acl_phys, sizeof (acl_phys));
1299	}
1300
1301	/*
1302	 * Replace ACL wide bits, but first clear them.
1303	 */
1304	zp->z_pflags &= ~ZFS_ACL_WIDE_FLAGS;
1305
1306	zp->z_pflags |= aclp->z_hints;
1307
1308	if (ace_trivial_common(aclp, 0, zfs_ace_walk) == 0)
1309		zp->z_pflags |= ZFS_ACL_TRIVIAL;
1310
1311	zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime, B_TRUE);
1312	return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));
1313}
1314
1315static void
1316zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t split, boolean_t trim,
1317    zfs_acl_t *aclp)
1318{
1319	void		*acep = NULL;
1320	uint64_t	who;
1321	int		new_count, new_bytes;
1322	int		ace_size;
1323	int 		entry_type;
1324	uint16_t	iflags, type;
1325	uint32_t	access_mask;
1326	zfs_acl_node_t	*newnode;
1327	size_t 		abstract_size = aclp->z_ops.ace_abstract_size();
1328	void 		*zacep;
1329	boolean_t	isdir;
1330	trivial_acl_t	masks;
1331
1332	new_count = new_bytes = 0;
1333
1334	isdir = (vtype == VDIR);
1335
1336	acl_trivial_access_masks((mode_t)mode, isdir, &masks);
1337
1338	newnode = zfs_acl_node_alloc((abstract_size * 6) + aclp->z_acl_bytes);
1339
1340	zacep = newnode->z_acldata;
1341	if (masks.allow0) {
1342		zfs_set_ace(aclp, zacep, masks.allow0, ALLOW, -1, ACE_OWNER);
1343		zacep = (void *)((uintptr_t)zacep + abstract_size);
1344		new_count++;
1345		new_bytes += abstract_size;
1346	}
1347	if (masks.deny1) {
1348		zfs_set_ace(aclp, zacep, masks.deny1, DENY, -1, ACE_OWNER);
1349		zacep = (void *)((uintptr_t)zacep + abstract_size);
1350		new_count++;
1351		new_bytes += abstract_size;
1352	}
1353	if (masks.deny2) {
1354		zfs_set_ace(aclp, zacep, masks.deny2, DENY, -1, OWNING_GROUP);
1355		zacep = (void *)((uintptr_t)zacep + abstract_size);
1356		new_count++;
1357		new_bytes += abstract_size;
1358	}
1359
1360	while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
1361	    &iflags, &type)) {
1362		entry_type = (iflags & ACE_TYPE_FLAGS);
1363		/*
1364		 * ACEs used to represent the file mode may be divided
1365		 * into an equivalent pair of inherit-only and regular
1366		 * ACEs, if they are inheritable.
1367		 * Skip regular ACEs, which are replaced by the new mode.
1368		 */
1369		if (split && (entry_type == ACE_OWNER ||
1370		    entry_type == OWNING_GROUP ||
1371		    entry_type == ACE_EVERYONE)) {
1372			if (!isdir || !(iflags &
1373			    (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
1374				continue;
1375			/*
1376			 * We preserve owner@, group@, or @everyone
1377			 * permissions, if they are inheritable, by
1378			 * copying them to inherit_only ACEs. This
1379			 * prevents inheritable permissions from being
1380			 * altered along with the file mode.
1381			 */
1382			iflags |= ACE_INHERIT_ONLY_ACE;
1383		}
1384
1385		/*
1386		 * If this ACL has any inheritable ACEs, mark that in
1387		 * the hints (which are later masked into the pflags)
1388		 * so create knows to do inheritance.
1389		 */
1390		if (isdir && (iflags &
1391		    (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
1392			aclp->z_hints |= ZFS_INHERIT_ACE;
1393
1394		if ((type != ALLOW && type != DENY) ||
1395		    (iflags & ACE_INHERIT_ONLY_ACE)) {
1396			switch (type) {
1397			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
1398			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
1399			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
1400			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
1401				aclp->z_hints |= ZFS_ACL_OBJ_ACE;
1402				break;
1403			}
1404		} else {
1405			/*
1406			 * Limit permissions granted by ACEs to be no greater
1407			 * than permissions of the requested group mode.
1408			 * Applies when the "aclmode" property is set to
1409			 * "groupmask".
1410			 */
1411			if ((type == ALLOW) && trim)
1412				access_mask &= masks.group;
1413		}
1414		zfs_set_ace(aclp, zacep, access_mask, type, who, iflags);
1415		ace_size = aclp->z_ops.ace_size(acep);
1416		zacep = (void *)((uintptr_t)zacep + ace_size);
1417		new_count++;
1418		new_bytes += ace_size;
1419	}
1420	zfs_set_ace(aclp, zacep, masks.owner, ALLOW, -1, ACE_OWNER);
1421	zacep = (void *)((uintptr_t)zacep + abstract_size);
1422	zfs_set_ace(aclp, zacep, masks.group, ALLOW, -1, OWNING_GROUP);
1423	zacep = (void *)((uintptr_t)zacep + abstract_size);
1424	zfs_set_ace(aclp, zacep, masks.everyone, ALLOW, -1, ACE_EVERYONE);
1425
1426	new_count += 3;
1427	new_bytes += abstract_size * 3;
1428	zfs_acl_release_nodes(aclp);
1429	aclp->z_acl_count = new_count;
1430	aclp->z_acl_bytes = new_bytes;
1431	newnode->z_ace_count = new_count;
1432	newnode->z_size = new_bytes;
1433	list_insert_tail(&aclp->z_acl, newnode);
1434}
1435
1436int
1437zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)
1438{
1439	int error = 0;
1440
1441	mutex_enter(&zp->z_acl_lock);
1442	ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
1443	if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_DISCARD)
1444		*aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));
1445	else
1446		error = zfs_acl_node_read(zp, aclp, B_TRUE);
1447
1448	if (error == 0) {
1449		(*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;
1450		zfs_acl_chmod(ZTOV(zp)->v_type, mode, B_TRUE,
1451		    (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK), *aclp);
1452	}
1453	mutex_exit(&zp->z_acl_lock);
1454
1455	return (error);
1456}
1457
1458/*
1459 * Should ACE be inherited?
1460 */
1461static int
1462zfs_ace_can_use(vtype_t vtype, uint16_t acep_flags)
1463{
1464	int	iflags = (acep_flags & 0xf);
1465
1466	if ((vtype == VDIR) && (iflags & ACE_DIRECTORY_INHERIT_ACE))
1467		return (1);
1468	else if (iflags & ACE_FILE_INHERIT_ACE)
1469		return (!((vtype == VDIR) &&
1470		    (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)));
1471	return (0);
1472}
1473
1474/*
1475 * inherit inheritable ACEs from parent
1476 */
1477static zfs_acl_t *
1478zfs_acl_inherit(zfsvfs_t *zfsvfs, vtype_t vtype, zfs_acl_t *paclp,
1479    uint64_t mode)
1480{
1481	void		*pacep = NULL;
1482	void		*acep;
1483	zfs_acl_node_t  *aclnode;
1484	zfs_acl_t	*aclp = NULL;
1485	uint64_t	who;
1486	uint32_t	access_mask;
1487	uint16_t	iflags, newflags, type;
1488	size_t		ace_size;
1489	void		*data1, *data2;
1490	size_t		data1sz, data2sz;
1491	uint_t		aclinherit;
1492	boolean_t	isdir = (vtype == VDIR);
1493
1494	aclp = zfs_acl_alloc(paclp->z_version);
1495	aclinherit = zfsvfs->z_acl_inherit;
1496	if (aclinherit == ZFS_ACL_DISCARD || vtype == VLNK)
1497		return (aclp);
1498
1499	while (pacep = zfs_acl_next_ace(paclp, pacep, &who,
1500	    &access_mask, &iflags, &type)) {
1501
1502		/*
1503		 * don't inherit bogus ACEs
1504		 */
1505		if (!zfs_acl_valid_ace_type(type, iflags))
1506			continue;
1507
1508		/*
1509		 * Check if ACE is inheritable by this vnode
1510		 */
1511		if ((aclinherit == ZFS_ACL_NOALLOW && type == ALLOW) ||
1512		    !zfs_ace_can_use(vtype, iflags))
1513			continue;
1514
1515		/*
1516		 * Strip inherited execute permission from file if
1517		 * not in mode
1518		 */
1519		if (aclinherit == ZFS_ACL_PASSTHROUGH_X && type == ALLOW &&
1520		    !isdir && ((mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)) {
1521			access_mask &= ~ACE_EXECUTE;
1522		}
1523
1524		/*
1525		 * Strip write_acl and write_owner from permissions
1526		 * when inheriting an ACE
1527		 */
1528		if (aclinherit == ZFS_ACL_RESTRICTED && type == ALLOW) {
1529			access_mask &= ~RESTRICTED_CLEAR;
1530		}
1531
1532		ace_size = aclp->z_ops.ace_size(pacep);
1533		aclnode = zfs_acl_node_alloc(ace_size);
1534		list_insert_tail(&aclp->z_acl, aclnode);
1535		acep = aclnode->z_acldata;
1536
1537		zfs_set_ace(aclp, acep, access_mask, type,
1538		    who, iflags|ACE_INHERITED_ACE);
1539
1540		/*
1541		 * Copy special opaque data if any
1542		 */
1543		if ((data1sz = paclp->z_ops.ace_data(pacep, &data1)) != 0) {
1544			VERIFY((data2sz = aclp->z_ops.ace_data(acep,
1545			    &data2)) == data1sz);
1546			bcopy(data1, data2, data2sz);
1547		}
1548
1549		aclp->z_acl_count++;
1550		aclnode->z_ace_count++;
1551		aclp->z_acl_bytes += aclnode->z_size;
1552		newflags = aclp->z_ops.ace_flags_get(acep);
1553
1554		/*
1555		 * If ACE is not to be inherited further, or if the vnode is
1556		 * not a directory, remove all inheritance flags
1557		 */
1558		if (!isdir || (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)) {
1559			newflags &= ~ALL_INHERIT;
1560			aclp->z_ops.ace_flags_set(acep,
1561			    newflags|ACE_INHERITED_ACE);
1562			continue;
1563		}
1564
1565		/*
1566		 * This directory has an inheritable ACE
1567		 */
1568		aclp->z_hints |= ZFS_INHERIT_ACE;
1569
1570		/*
1571		 * If only FILE_INHERIT is set then turn on
1572		 * inherit_only
1573		 */
1574		if ((iflags & (ACE_FILE_INHERIT_ACE |
1575		    ACE_DIRECTORY_INHERIT_ACE)) == ACE_FILE_INHERIT_ACE) {
1576			newflags |= ACE_INHERIT_ONLY_ACE;
1577			aclp->z_ops.ace_flags_set(acep,
1578			    newflags|ACE_INHERITED_ACE);
1579		} else {
1580			newflags &= ~ACE_INHERIT_ONLY_ACE;
1581			aclp->z_ops.ace_flags_set(acep,
1582			    newflags|ACE_INHERITED_ACE);
1583		}
1584	}
1585
1586	return (aclp);
1587}
1588
1589/*
1590 * Create file system object initial permissions
1591 * including inheritable ACEs.
1592 * Also, create FUIDs for owner and group.
1593 */
1594int
1595zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
1596    vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids)
1597{
1598	int		error;
1599	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
1600	zfs_acl_t	*paclp;
1601	gid_t		gid = vap->va_gid;
1602	boolean_t	trim = B_FALSE;
1603	boolean_t	inherited = B_FALSE;
1604
1605#ifndef __NetBSD__
1606	ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__);
1607#endif
1608	bzero(acl_ids, sizeof (zfs_acl_ids_t));
1609	acl_ids->z_mode = MAKEIMODE(vap->va_type, vap->va_mode);
1610
1611	if (vsecp)
1612		if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, cr,
1613		    &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0)
1614			return (error);
1615	/*
1616	 * Determine uid and gid.
1617	 */
1618	if ((flag & IS_ROOT_NODE) || zfsvfs->z_replay ||
1619	    ((flag & IS_XATTR) && (vap->va_type == VDIR))) {
1620		acl_ids->z_fuid = zfs_fuid_create(zfsvfs,
1621		    (uint64_t)vap->va_uid, cr,
1622		    ZFS_OWNER, &acl_ids->z_fuidp);
1623		acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
1624		    (uint64_t)vap->va_gid, cr,
1625		    ZFS_GROUP, &acl_ids->z_fuidp);
1626		gid = vap->va_gid;
1627	} else {
1628		acl_ids->z_fuid = zfs_fuid_create_cred(zfsvfs, ZFS_OWNER,
1629		    cr, &acl_ids->z_fuidp);
1630		acl_ids->z_fgid = 0;
1631		if (vap->va_mask & AT_GID)  {
1632			acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
1633			    (uint64_t)vap->va_gid,
1634			    cr, ZFS_GROUP, &acl_ids->z_fuidp);
1635			gid = vap->va_gid;
1636			if (acl_ids->z_fgid != dzp->z_gid &&
1637			    !groupmember(vap->va_gid, cr) &&
1638			    secpolicy_vnode_create_gid(cr) != 0)
1639				acl_ids->z_fgid = 0;
1640		}
1641		if (acl_ids->z_fgid == 0) {
1642			if (dzp->z_mode & S_ISGID) {
1643				char		*domain;
1644				uint32_t	rid;
1645
1646				acl_ids->z_fgid = dzp->z_gid;
1647				gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid,
1648				    cr, ZFS_GROUP);
1649
1650				if (zfsvfs->z_use_fuids &&
1651				    IS_EPHEMERAL(acl_ids->z_fgid)) {
1652					domain = zfs_fuid_idx_domain(
1653					    &zfsvfs->z_fuid_idx,
1654					    FUID_INDEX(acl_ids->z_fgid));
1655					rid = FUID_RID(acl_ids->z_fgid);
1656					zfs_fuid_node_add(&acl_ids->z_fuidp,
1657					    domain, rid,
1658					    FUID_INDEX(acl_ids->z_fgid),
1659					    acl_ids->z_fgid, ZFS_GROUP);
1660				}
1661			} else {
1662				acl_ids->z_fgid = zfs_fuid_create_cred(zfsvfs,
1663				    ZFS_GROUP, cr, &acl_ids->z_fuidp);
1664#if defined(__FreeBSD_kernel__) || defined(__NetBSD__)
1665				gid = acl_ids->z_fgid = dzp->z_gid;
1666#else
1667				gid = crgetgid(cr);
1668#endif
1669			}
1670		}
1671	}
1672
1673	/*
1674	 * If we're creating a directory, and the parent directory has the
1675	 * set-GID bit set, set in on the new directory.
1676	 * Otherwise, if the user is neither privileged nor a member of the
1677	 * file's new group, clear the file's set-GID bit.
1678	 */
1679
1680	if (!(flag & IS_ROOT_NODE) && (dzp->z_mode & S_ISGID) &&
1681	    (vap->va_type == VDIR)) {
1682		acl_ids->z_mode |= S_ISGID;
1683	} else {
1684		if ((acl_ids->z_mode & S_ISGID) &&
1685		    secpolicy_vnode_setids_setgids(ZTOV(dzp), cr, gid) != 0)
1686			acl_ids->z_mode &= ~S_ISGID;
1687	}
1688
1689	if (acl_ids->z_aclp == NULL) {
1690		mutex_enter(&dzp->z_acl_lock);
1691		if (!(flag & IS_ROOT_NODE) &&
1692		    (dzp->z_pflags & ZFS_INHERIT_ACE) &&
1693		    !(dzp->z_pflags & ZFS_XATTR)) {
1694			VERIFY(0 == zfs_acl_node_read(dzp, &paclp, B_FALSE));
1695			acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
1696			    vap->va_type, paclp, acl_ids->z_mode);
1697			inherited = B_TRUE;
1698		} else {
1699			acl_ids->z_aclp =
1700			    zfs_acl_alloc(zfs_acl_version_zp(dzp));
1701			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
1702		}
1703		mutex_exit(&dzp->z_acl_lock);
1704
1705		if (vap->va_type == VDIR)
1706			acl_ids->z_aclp->z_hints |= ZFS_ACL_AUTO_INHERIT;
1707
1708		if (zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK &&
1709		    zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH &&
1710		    zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH_X)
1711			trim = B_TRUE;
1712		zfs_acl_chmod(vap->va_type, acl_ids->z_mode, B_FALSE, trim,
1713		    acl_ids->z_aclp);
1714	}
1715
1716	if (inherited || vsecp) {
1717		acl_ids->z_mode = zfs_mode_compute(acl_ids->z_mode,
1718		    acl_ids->z_aclp, &acl_ids->z_aclp->z_hints,
1719		    acl_ids->z_fuid, acl_ids->z_fgid);
1720		if (ace_trivial_common(acl_ids->z_aclp, 0, zfs_ace_walk) == 0)
1721			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
1722	}
1723
1724	return (0);
1725}
1726
1727/*
1728 * Free ACL and fuid_infop, but not the acl_ids structure
1729 */
1730void
1731zfs_acl_ids_free(zfs_acl_ids_t *acl_ids)
1732{
1733	if (acl_ids->z_aclp)
1734		zfs_acl_free(acl_ids->z_aclp);
1735	if (acl_ids->z_fuidp)
1736		zfs_fuid_info_free(acl_ids->z_fuidp);
1737	acl_ids->z_aclp = NULL;
1738	acl_ids->z_fuidp = NULL;
1739}
1740
1741boolean_t
1742zfs_acl_ids_overquota(zfsvfs_t *zfsvfs, zfs_acl_ids_t *acl_ids)
1743{
1744	return (zfs_fuid_overquota(zfsvfs, B_FALSE, acl_ids->z_fuid) ||
1745	    zfs_fuid_overquota(zfsvfs, B_TRUE, acl_ids->z_fgid));
1746}
1747
1748/*
1749 * Retrieve a file's ACL
1750 */
1751int
1752zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
1753{
1754	zfs_acl_t	*aclp;
1755	ulong_t		mask;
1756	int		error;
1757	int 		count = 0;
1758	int		largeace = 0;
1759
1760	mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT |
1761	    VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
1762
1763	if (mask == 0)
1764		return (SET_ERROR(ENOSYS));
1765
1766	if (error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr))
1767		return (error);
1768
1769	mutex_enter(&zp->z_acl_lock);
1770
1771	ASSERT_VOP_LOCKED(ZTOV(zp), __func__);
1772	error = zfs_acl_node_read(zp, &aclp, B_FALSE);
1773	if (error != 0) {
1774		mutex_exit(&zp->z_acl_lock);
1775		return (error);
1776	}
1777
1778	/*
1779	 * Scan ACL to determine number of ACEs
1780	 */
1781	if ((zp->z_pflags & ZFS_ACL_OBJ_ACE) && !(mask & VSA_ACE_ALLTYPES)) {
1782		void *zacep = NULL;
1783		uint64_t who;
1784		uint32_t access_mask;
1785		uint16_t type, iflags;
1786
1787		while (zacep = zfs_acl_next_ace(aclp, zacep,
1788		    &who, &access_mask, &iflags, &type)) {
1789			switch (type) {
1790			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
1791			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
1792			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
1793			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
1794				largeace++;
1795				continue;
1796			default:
1797				count++;
1798			}
1799		}
1800		vsecp->vsa_aclcnt = count;
1801	} else
1802		count = (int)aclp->z_acl_count;
1803
1804	if (mask & VSA_ACECNT) {
1805		vsecp->vsa_aclcnt = count;
1806	}
1807
1808	if (mask & VSA_ACE) {
1809		size_t aclsz;
1810
1811		aclsz = count * sizeof (ace_t) +
1812		    sizeof (ace_object_t) * largeace;
1813
1814		vsecp->vsa_aclentp = kmem_alloc(aclsz, KM_SLEEP);
1815		vsecp->vsa_aclentsz = aclsz;
1816
1817		if (aclp->z_version == ZFS_ACL_VERSION_FUID)
1818			zfs_copy_fuid_2_ace(zp->z_zfsvfs, aclp, cr,
1819			    vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES));
1820		else {
1821			zfs_acl_node_t *aclnode;
1822			void *start = vsecp->vsa_aclentp;
1823
1824			for (aclnode = list_head(&aclp->z_acl); aclnode;
1825			    aclnode = list_next(&aclp->z_acl, aclnode)) {
1826				bcopy(aclnode->z_acldata, start,
1827				    aclnode->z_size);
1828				start = (caddr_t)start + aclnode->z_size;
1829			}
1830			ASSERT((caddr_t)start - (caddr_t)vsecp->vsa_aclentp ==
1831			    aclp->z_acl_bytes);
1832		}
1833	}
1834	if (mask & VSA_ACE_ACLFLAGS) {
1835		vsecp->vsa_aclflags = 0;
1836		if (zp->z_pflags & ZFS_ACL_DEFAULTED)
1837			vsecp->vsa_aclflags |= ACL_DEFAULTED;
1838		if (zp->z_pflags & ZFS_ACL_PROTECTED)
1839			vsecp->vsa_aclflags |= ACL_PROTECTED;
1840		if (zp->z_pflags & ZFS_ACL_AUTO_INHERIT)
1841			vsecp->vsa_aclflags |= ACL_AUTO_INHERIT;
1842	}
1843
1844	mutex_exit(&zp->z_acl_lock);
1845
1846	return (0);
1847}
1848
1849int
1850zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, vtype_t obj_type,
1851    vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp)
1852{
1853	zfs_acl_t *aclp;
1854	zfs_acl_node_t *aclnode;
1855	int aclcnt = vsecp->vsa_aclcnt;
1856	int error;
1857
1858	if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0)
1859		return (SET_ERROR(EINVAL));
1860
1861	aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version));
1862
1863	aclp->z_hints = 0;
1864	aclnode = zfs_acl_node_alloc(aclcnt * sizeof (zfs_object_ace_t));
1865	if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
1866		if ((error = zfs_copy_ace_2_oldace(obj_type, aclp,
1867		    (ace_t *)vsecp->vsa_aclentp, aclnode->z_acldata,
1868		    aclcnt, &aclnode->z_size)) != 0) {
1869			zfs_acl_free(aclp);
1870			zfs_acl_node_free(aclnode);
1871			return (error);
1872		}
1873	} else {
1874		if ((error = zfs_copy_ace_2_fuid(zfsvfs, obj_type, aclp,
1875		    vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt,
1876		    &aclnode->z_size, fuidp, cr)) != 0) {
1877			zfs_acl_free(aclp);
1878			zfs_acl_node_free(aclnode);
1879			return (error);
1880		}
1881	}
1882	aclp->z_acl_bytes = aclnode->z_size;
1883	aclnode->z_ace_count = aclcnt;
1884	aclp->z_acl_count = aclcnt;
1885	list_insert_head(&aclp->z_acl, aclnode);
1886
1887	/*
1888	 * If flags are being set then add them to z_hints
1889	 */
1890	if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) {
1891		if (vsecp->vsa_aclflags & ACL_PROTECTED)
1892			aclp->z_hints |= ZFS_ACL_PROTECTED;
1893		if (vsecp->vsa_aclflags & ACL_DEFAULTED)
1894			aclp->z_hints |= ZFS_ACL_DEFAULTED;
1895		if (vsecp->vsa_aclflags & ACL_AUTO_INHERIT)
1896			aclp->z_hints |= ZFS_ACL_AUTO_INHERIT;
1897	}
1898
1899	*zaclp = aclp;
1900
1901	return (0);
1902}
1903
1904/*
1905 * Set a file's ACL
1906 */
1907int
1908zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
1909{
1910	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
1911	zilog_t		*zilog = zfsvfs->z_log;
1912	ulong_t		mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
1913	dmu_tx_t	*tx;
1914	int		error;
1915	zfs_acl_t	*aclp;
1916	zfs_fuid_info_t	*fuidp = NULL;
1917	boolean_t	fuid_dirtied;
1918	uint64_t	acl_obj;
1919
1920	ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
1921	if (mask == 0)
1922		return (SET_ERROR(ENOSYS));
1923
1924	if (zp->z_pflags & ZFS_IMMUTABLE)
1925		return (SET_ERROR(EPERM));
1926
1927	if (error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr))
1928		return (error);
1929
1930	error = zfs_vsec_2_aclp(zfsvfs, ZTOV(zp)->v_type, vsecp, cr, &fuidp,
1931	    &aclp);
1932	if (error)
1933		return (error);
1934
1935	/*
1936	 * If ACL wide flags aren't being set then preserve any
1937	 * existing flags.
1938	 */
1939	if (!(vsecp->vsa_mask & VSA_ACE_ACLFLAGS)) {
1940		aclp->z_hints |=
1941		    (zp->z_pflags & V4_ACL_WIDE_FLAGS);
1942	}
1943top:
1944	mutex_enter(&zp->z_acl_lock);
1945
1946	tx = dmu_tx_create(zfsvfs->z_os);
1947
1948	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1949
1950	fuid_dirtied = zfsvfs->z_fuid_dirty;
1951	if (fuid_dirtied)
1952		zfs_fuid_txhold(zfsvfs, tx);
1953
1954	/*
1955	 * If old version and ACL won't fit in bonus and we aren't
1956	 * upgrading then take out necessary DMU holds
1957	 */
1958
1959	if ((acl_obj = zfs_external_acl(zp)) != 0) {
1960		if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
1961		    zfs_znode_acl_version(zp) <= ZFS_ACL_VERSION_INITIAL) {
1962			dmu_tx_hold_free(tx, acl_obj, 0,
1963			    DMU_OBJECT_END);
1964			dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
1965			    aclp->z_acl_bytes);
1966		} else {
1967			dmu_tx_hold_write(tx, acl_obj, 0, aclp->z_acl_bytes);
1968		}
1969	} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1970		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes);
1971	}
1972
1973	zfs_sa_upgrade_txholds(tx, zp);
1974	error = dmu_tx_assign(tx, TXG_NOWAIT);
1975	if (error) {
1976		mutex_exit(&zp->z_acl_lock);
1977
1978		if (error == ERESTART) {
1979			dmu_tx_wait(tx);
1980			dmu_tx_abort(tx);
1981			goto top;
1982		}
1983		dmu_tx_abort(tx);
1984		zfs_acl_free(aclp);
1985		return (error);
1986	}
1987
1988	error = zfs_aclset_common(zp, aclp, cr, tx);
1989	ASSERT(error == 0);
1990	ASSERT(zp->z_acl_cached == NULL);
1991	zp->z_acl_cached = aclp;
1992
1993	if (fuid_dirtied)
1994		zfs_fuid_sync(zfsvfs, tx);
1995
1996	zfs_log_acl(zilog, tx, zp, vsecp, fuidp);
1997
1998	if (fuidp)
1999		zfs_fuid_info_free(fuidp);
2000	dmu_tx_commit(tx);
2001	mutex_exit(&zp->z_acl_lock);
2002
2003	return (error);
2004}
2005
2006/*
2007 * Check accesses of interest (AoI) against attributes of the dataset
2008 * such as read-only.  Returns zero if no AoI conflict with dataset
2009 * attributes, otherwise an appropriate errno is returned.
2010 */
2011static int
2012zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
2013{
2014	if ((v4_mode & WRITE_MASK) &&
2015	    (zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
2016	    (!IS_DEVVP(ZTOV(zp)) ||
2017	    (IS_DEVVP(ZTOV(zp)) && (v4_mode & WRITE_MASK_ATTRS)))) {
2018		return (SET_ERROR(EROFS));
2019	}
2020
2021	/*
2022	 * Only check for READONLY on non-directories.
2023	 */
2024	if ((v4_mode & WRITE_MASK_DATA) &&
2025	    (((ZTOV(zp)->v_type != VDIR) &&
2026	    (zp->z_pflags & (ZFS_READONLY | ZFS_IMMUTABLE))) ||
2027	    (ZTOV(zp)->v_type == VDIR &&
2028	    (zp->z_pflags & ZFS_IMMUTABLE)))) {
2029		return (SET_ERROR(EPERM));
2030	}
2031
2032#ifdef illumos
2033	if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) &&
2034	    (zp->z_pflags & ZFS_NOUNLINK)) {
2035		return (SET_ERROR(EPERM));
2036	}
2037#else
2038	/*
2039	 * In FreeBSD we allow to modify directory's content is ZFS_NOUNLINK
2040	 * (sunlnk) is set. We just don't allow directory removal, which is
2041	 * handled in zfs_zaccess_delete().
2042	 */
2043	if ((v4_mode & ACE_DELETE) &&
2044	    (zp->z_pflags & ZFS_NOUNLINK)) {
2045		return (EPERM);
2046	}
2047#endif
2048
2049	if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) &&
2050	    (zp->z_pflags & ZFS_AV_QUARANTINED))) {
2051		return (SET_ERROR(EACCES));
2052	}
2053
2054	return (0);
2055}
2056
2057/*
2058 * The primary usage of this function is to loop through all of the
2059 * ACEs in the znode, determining what accesses of interest (AoI) to
2060 * the caller are allowed or denied.  The AoI are expressed as bits in
2061 * the working_mode parameter.  As each ACE is processed, bits covered
2062 * by that ACE are removed from the working_mode.  This removal
2063 * facilitates two things.  The first is that when the working mode is
2064 * empty (= 0), we know we've looked at all the AoI. The second is
2065 * that the ACE interpretation rules don't allow a later ACE to undo
2066 * something granted or denied by an earlier ACE.  Removing the
2067 * discovered access or denial enforces this rule.  At the end of
2068 * processing the ACEs, all AoI that were found to be denied are
2069 * placed into the working_mode, giving the caller a mask of denied
2070 * accesses.  Returns:
2071 *	0		if all AoI granted
2072 *	EACCESS 	if the denied mask is non-zero
2073 *	other error	if abnormal failure (e.g., IO error)
2074 *
2075 * A secondary usage of the function is to determine if any of the
2076 * AoI are granted.  If an ACE grants any access in
2077 * the working_mode, we immediately short circuit out of the function.
2078 * This mode is chosen by setting anyaccess to B_TRUE.  The
2079 * working_mode is not a denied access mask upon exit if the function
2080 * is used in this manner.
2081 */
2082static int
2083zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
2084    boolean_t anyaccess, cred_t *cr)
2085{
2086	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
2087	zfs_acl_t	*aclp;
2088	int		error;
2089	uid_t		uid = crgetuid(cr);
2090	uint64_t 	who;
2091	uint16_t	type, iflags;
2092	uint16_t	entry_type;
2093	uint32_t	access_mask;
2094	uint32_t	deny_mask = 0;
2095	zfs_ace_hdr_t	*acep = NULL;
2096	boolean_t	checkit;
2097	uid_t		gowner;
2098	uid_t		fowner;
2099
2100	zfs_fuid_map_ids(zp, cr, &fowner, &gowner);
2101
2102	mutex_enter(&zp->z_acl_lock);
2103
2104	ASSERT_VOP_LOCKED(ZTOV(zp), __func__);
2105	error = zfs_acl_node_read(zp, &aclp, B_FALSE);
2106	if (error != 0) {
2107		mutex_exit(&zp->z_acl_lock);
2108		return (error);
2109	}
2110
2111	ASSERT(zp->z_acl_cached);
2112
2113	while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
2114	    &iflags, &type)) {
2115		uint32_t mask_matched;
2116
2117		if (!zfs_acl_valid_ace_type(type, iflags))
2118			continue;
2119
2120		if (ZTOV(zp)->v_type == VDIR && (iflags & ACE_INHERIT_ONLY_ACE))
2121			continue;
2122
2123		/* Skip ACE if it does not affect any AoI */
2124		mask_matched = (access_mask & *working_mode);
2125		if (!mask_matched)
2126			continue;
2127
2128		entry_type = (iflags & ACE_TYPE_FLAGS);
2129
2130		checkit = B_FALSE;
2131
2132		switch (entry_type) {
2133		case ACE_OWNER:
2134			if (uid == fowner)
2135				checkit = B_TRUE;
2136			break;
2137		case OWNING_GROUP:
2138			who = gowner;
2139			/*FALLTHROUGH*/
2140		case ACE_IDENTIFIER_GROUP:
2141			checkit = zfs_groupmember(zfsvfs, who, cr);
2142			break;
2143		case ACE_EVERYONE:
2144			checkit = B_TRUE;
2145			break;
2146
2147		/* USER Entry */
2148		default:
2149			if (entry_type == 0) {
2150				uid_t newid;
2151
2152				newid = zfs_fuid_map_id(zfsvfs, who, cr,
2153				    ZFS_ACE_USER);
2154				if (newid != IDMAP_WK_CREATOR_OWNER_UID &&
2155				    uid == newid)
2156					checkit = B_TRUE;
2157				break;
2158			} else {
2159				mutex_exit(&zp->z_acl_lock);
2160				return (SET_ERROR(EIO));
2161			}
2162		}
2163
2164		if (checkit) {
2165			if (type == DENY) {
2166				DTRACE_PROBE3(zfs__ace__denies,
2167				    znode_t *, zp,
2168				    zfs_ace_hdr_t *, acep,
2169				    uint32_t, mask_matched);
2170				deny_mask |= mask_matched;
2171			} else {
2172				DTRACE_PROBE3(zfs__ace__allows,
2173				    znode_t *, zp,
2174				    zfs_ace_hdr_t *, acep,
2175				    uint32_t, mask_matched);
2176				if (anyaccess) {
2177					mutex_exit(&zp->z_acl_lock);
2178					return (0);
2179				}
2180			}
2181			*working_mode &= ~mask_matched;
2182		}
2183
2184		/* Are we done? */
2185		if (*working_mode == 0)
2186			break;
2187	}
2188
2189	mutex_exit(&zp->z_acl_lock);
2190
2191	/* Put the found 'denies' back on the working mode */
2192	if (deny_mask) {
2193		*working_mode |= deny_mask;
2194		return (SET_ERROR(EACCES));
2195	} else if (*working_mode) {
2196		return (-1);
2197	}
2198
2199	return (0);
2200}
2201
2202/*
2203 * Return true if any access whatsoever granted, we don't actually
2204 * care what access is granted.
2205 */
2206boolean_t
2207zfs_has_access(znode_t *zp, cred_t *cr)
2208{
2209	uint32_t have = ACE_ALL_PERMS;
2210
2211	if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) {
2212		uid_t owner;
2213
2214		owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
2215		return (secpolicy_vnode_any_access(cr, ZTOV(zp), owner) == 0);
2216	}
2217	return (B_TRUE);
2218}
2219
2220static int
2221zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
2222    boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
2223{
2224	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2225	int err;
2226
2227	*working_mode = v4_mode;
2228	*check_privs = B_TRUE;
2229
2230	/*
2231	 * Short circuit empty requests
2232	 */
2233	if (v4_mode == 0 || zfsvfs->z_replay) {
2234		*working_mode = 0;
2235		return (0);
2236	}
2237
2238	if ((err = zfs_zaccess_dataset_check(zp, v4_mode)) != 0) {
2239		*check_privs = B_FALSE;
2240		return (err);
2241	}
2242
2243	/*
2244	 * The caller requested that the ACL check be skipped.  This
2245	 * would only happen if the caller checked VOP_ACCESS() with a
2246	 * 32 bit ACE mask and already had the appropriate permissions.
2247	 */
2248	if (skipaclchk) {
2249		*working_mode = 0;
2250		return (0);
2251	}
2252
2253	return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr));
2254}
2255
2256static int
2257zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
2258    cred_t *cr)
2259{
2260	if (*working_mode != ACE_WRITE_DATA)
2261		return (SET_ERROR(EACCES));
2262
2263	return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode,
2264	    check_privs, B_FALSE, cr));
2265}
2266
2267int
2268zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
2269{
2270	boolean_t owner = B_FALSE;
2271	boolean_t groupmbr = B_FALSE;
2272	boolean_t is_attr;
2273	uid_t uid = crgetuid(cr);
2274	int error;
2275
2276	if (zdp->z_pflags & ZFS_AV_QUARANTINED)
2277		return (SET_ERROR(EACCES));
2278
2279	is_attr = ((zdp->z_pflags & ZFS_XATTR) &&
2280	    (ZTOV(zdp)->v_type == VDIR));
2281	if (is_attr)
2282		goto slow;
2283
2284
2285	mutex_enter(&zdp->z_acl_lock);
2286
2287	if (zdp->z_pflags & ZFS_NO_EXECS_DENIED) {
2288		mutex_exit(&zdp->z_acl_lock);
2289		return (0);
2290	}
2291
2292	if (FUID_INDEX(zdp->z_uid) != 0 || FUID_INDEX(zdp->z_gid) != 0) {
2293		mutex_exit(&zdp->z_acl_lock);
2294		goto slow;
2295	}
2296
2297	if (uid == zdp->z_uid) {
2298		owner = B_TRUE;
2299		if (zdp->z_mode & S_IXUSR) {
2300			mutex_exit(&zdp->z_acl_lock);
2301			return (0);
2302		} else {
2303			mutex_exit(&zdp->z_acl_lock);
2304			goto slow;
2305		}
2306	}
2307	if (groupmember(zdp->z_gid, cr)) {
2308		groupmbr = B_TRUE;
2309		if (zdp->z_mode & S_IXGRP) {
2310			mutex_exit(&zdp->z_acl_lock);
2311			return (0);
2312		} else {
2313			mutex_exit(&zdp->z_acl_lock);
2314			goto slow;
2315		}
2316	}
2317	if (!owner && !groupmbr) {
2318		if (zdp->z_mode & S_IXOTH) {
2319			mutex_exit(&zdp->z_acl_lock);
2320			return (0);
2321		}
2322	}
2323
2324	mutex_exit(&zdp->z_acl_lock);
2325
2326slow:
2327	DTRACE_PROBE(zfs__fastpath__execute__access__miss);
2328	ZFS_ENTER(zdp->z_zfsvfs);
2329	error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr);
2330	ZFS_EXIT(zdp->z_zfsvfs);
2331	return (error);
2332}
2333
2334/*
2335 * Determine whether Access should be granted/denied.
2336 *
2337 * The least priv subsytem is always consulted as a basic privilege
2338 * can define any form of access.
2339 */
2340int
2341zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
2342{
2343	uint32_t	working_mode;
2344	int		error;
2345	int		is_attr;
2346	boolean_t 	check_privs;
2347	znode_t		*xzp;
2348	znode_t 	*check_zp = zp;
2349	mode_t		needed_bits;
2350	uid_t		owner;
2351
2352	is_attr = ((zp->z_pflags & ZFS_XATTR) && (ZTOV(zp)->v_type == VDIR));
2353
2354#ifndef illumos
2355	/*
2356	 * In FreeBSD, we don't care about permissions of individual ADS.
2357	 * Note that not checking them is not just an optimization - without
2358	 * this shortcut, EA operations may bogusly fail with EACCES.
2359	 */
2360	if (zp->z_pflags & ZFS_XATTR)
2361		return (0);
2362	xzp = NULL;	// XXX: hello clang is_attr is false here.
2363#else
2364	/*
2365	 * If attribute then validate against base file
2366	 */
2367	if (is_attr) {
2368		uint64_t	parent;
2369
2370		if ((error = sa_lookup(zp->z_sa_hdl,
2371		    SA_ZPL_PARENT(zp->z_zfsvfs), &parent,
2372		    sizeof (parent))) != 0)
2373			return (error);
2374
2375		if ((error = zfs_zget(zp->z_zfsvfs,
2376		    parent, &xzp)) != 0)	{
2377			return (error);
2378		}
2379
2380		check_zp = xzp;
2381
2382		/*
2383		 * fixup mode to map to xattr perms
2384		 */
2385
2386		if (mode & (ACE_WRITE_DATA|ACE_APPEND_DATA)) {
2387			mode &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
2388			mode |= ACE_WRITE_NAMED_ATTRS;
2389		}
2390
2391		if (mode & (ACE_READ_DATA|ACE_EXECUTE)) {
2392			mode &= ~(ACE_READ_DATA|ACE_EXECUTE);
2393			mode |= ACE_READ_NAMED_ATTRS;
2394		}
2395	}
2396#endif
2397
2398	owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
2399	/*
2400	 * Map the bits required to the standard vnode flags VREAD|VWRITE|VEXEC
2401	 * in needed_bits.  Map the bits mapped by working_mode (currently
2402	 * missing) in missing_bits.
2403	 * Call secpolicy_vnode_access2() with (needed_bits & ~checkmode),
2404	 * needed_bits.
2405	 */
2406	needed_bits = 0;
2407
2408	working_mode = mode;
2409	if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
2410	    owner == crgetuid(cr))
2411		working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
2412
2413	if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
2414	    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
2415		needed_bits |= VREAD;
2416	if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
2417	    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
2418		needed_bits |= VWRITE;
2419	if (working_mode & ACE_EXECUTE)
2420		needed_bits |= VEXEC;
2421
2422	if ((error = zfs_zaccess_common(check_zp, mode, &working_mode,
2423	    &check_privs, skipaclchk, cr)) == 0) {
2424		if (is_attr)
2425			VN_RELE(ZTOV(xzp));
2426		return (secpolicy_vnode_access2(cr, ZTOV(zp), owner,
2427		    needed_bits, needed_bits));
2428	}
2429
2430	if (error && !check_privs) {
2431		if (is_attr)
2432			VN_RELE(ZTOV(xzp));
2433		return (error);
2434	}
2435
2436	if (error && (flags & V_APPEND)) {
2437		error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr);
2438	}
2439
2440	if (error && check_privs) {
2441		mode_t		checkmode = 0;
2442
2443		/*
2444		 * First check for implicit owner permission on
2445		 * read_acl/read_attributes
2446		 */
2447
2448		error = 0;
2449		ASSERT(working_mode != 0);
2450
2451		if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) &&
2452		    owner == crgetuid(cr)))
2453			working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
2454
2455		if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
2456		    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
2457			checkmode |= VREAD;
2458		if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
2459		    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
2460			checkmode |= VWRITE;
2461		if (working_mode & ACE_EXECUTE)
2462			checkmode |= VEXEC;
2463
2464		error = secpolicy_vnode_access2(cr, ZTOV(check_zp), owner,
2465		    needed_bits & ~checkmode, needed_bits);
2466
2467		if (error == 0 && (working_mode & ACE_WRITE_OWNER))
2468			error = secpolicy_vnode_chown(ZTOV(check_zp), cr, owner);
2469		if (error == 0 && (working_mode & ACE_WRITE_ACL))
2470			error = secpolicy_vnode_setdac(ZTOV(check_zp), cr, owner);
2471
2472		if (error == 0 && (working_mode &
2473		    (ACE_DELETE|ACE_DELETE_CHILD)))
2474			error = secpolicy_vnode_remove(ZTOV(check_zp), cr);
2475
2476		if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) {
2477			error = secpolicy_vnode_chown(ZTOV(check_zp), cr, owner);
2478		}
2479		if (error == 0) {
2480			/*
2481			 * See if any bits other than those already checked
2482			 * for are still present.  If so then return EACCES
2483			 */
2484			if (working_mode & ~(ZFS_CHECKED_MASKS)) {
2485				error = SET_ERROR(EACCES);
2486			}
2487		}
2488	} else if (error == 0) {
2489		error = secpolicy_vnode_access2(cr, ZTOV(zp), owner,
2490		    needed_bits, needed_bits);
2491	}
2492
2493
2494	if (is_attr)
2495		VN_RELE(ZTOV(xzp));
2496
2497	return (error);
2498}
2499
2500/*
2501 * Translate traditional unix VREAD/VWRITE/VEXEC mode into
2502 * native ACL format and call zfs_zaccess()
2503 */
2504int
2505zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr)
2506{
2507	return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr));
2508}
2509
2510/*
2511 * Access function for secpolicy_vnode_setattr
2512 */
2513int
2514zfs_zaccess_unix(znode_t *zp, mode_t mode, cred_t *cr)
2515{
2516	int v4_mode = zfs_unix_to_v4(mode >> 6);
2517
2518	return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr));
2519}
2520
2521static int
2522zfs_delete_final_check(znode_t *zp, znode_t *dzp,
2523    mode_t available_perms, cred_t *cr)
2524{
2525	int error;
2526	uid_t downer;
2527
2528	downer = zfs_fuid_map_id(dzp->z_zfsvfs, dzp->z_uid, cr, ZFS_OWNER);
2529
2530	error = secpolicy_vnode_access2(cr, ZTOV(dzp),
2531	    downer, available_perms, VWRITE|VEXEC);
2532
2533	if (error == 0)
2534		error = zfs_sticky_remove_access(dzp, zp, cr);
2535
2536	return (error);
2537}
2538
2539/*
2540 * Determine whether Access should be granted/deny, without
2541 * consulting least priv subsystem.
2542 *
2543 * The following chart is the recommended NFSv4 enforcement for
2544 * ability to delete an object.
2545 *
2546 *      -------------------------------------------------------
2547 *      |   Parent Dir  |           Target Object Permissions |
2548 *      |  permissions  |                                     |
2549 *      -------------------------------------------------------
2550 *      |               | ACL Allows | ACL Denies| Delete     |
2551 *      |               |  Delete    |  Delete   | unspecified|
2552 *      -------------------------------------------------------
2553 *      |  ACL Allows   | Permit     | Permit    | Permit     |
2554 *      |  DELETE_CHILD |                                     |
2555 *      -------------------------------------------------------
2556 *      |  ACL Denies   | Permit     | Deny      | Deny       |
2557 *      |  DELETE_CHILD |            |           |            |
2558 *      -------------------------------------------------------
2559 *      | ACL specifies |            |           |            |
2560 *      | only allow    | Permit     | Permit    | Permit     |
2561 *      | write and     |            |           |            |
2562 *      | execute       |            |           |            |
2563 *      -------------------------------------------------------
2564 *      | ACL denies    |            |           |            |
2565 *      | write and     | Permit     | Deny      | Deny       |
2566 *      | execute       |            |           |            |
2567 *      -------------------------------------------------------
2568 *         ^
2569 *         |
2570 *         No search privilege, can't even look up file?
2571 *
2572 */
2573int
2574zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
2575{
2576	uint32_t dzp_working_mode = 0;
2577	uint32_t zp_working_mode = 0;
2578	int dzp_error, zp_error;
2579	mode_t available_perms;
2580	boolean_t dzpcheck_privs = B_TRUE;
2581	boolean_t zpcheck_privs = B_TRUE;
2582
2583	/*
2584	 * We want specific DELETE permissions to
2585	 * take precedence over WRITE/EXECUTE.  We don't
2586	 * want an ACL such as this to mess us up.
2587	 * user:joe:write_data:deny,user:joe:delete:allow
2588	 *
2589	 * However, deny permissions may ultimately be overridden
2590	 * by secpolicy_vnode_access().
2591	 *
2592	 * We will ask for all of the necessary permissions and then
2593	 * look at the working modes from the directory and target object
2594	 * to determine what was found.
2595	 */
2596
2597	if (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_NOUNLINK))
2598		return (SET_ERROR(EPERM));
2599
2600	/*
2601	 * First row
2602	 * If the directory permissions allow the delete, we are done.
2603	 */
2604	if ((dzp_error = zfs_zaccess_common(dzp, ACE_DELETE_CHILD,
2605	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr)) == 0)
2606		return (0);
2607
2608	/*
2609	 * If target object has delete permission then we are done
2610	 */
2611	if ((zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode,
2612	    &zpcheck_privs, B_FALSE, cr)) == 0)
2613		return (0);
2614
2615	ASSERT(dzp_error && zp_error);
2616
2617	if (!dzpcheck_privs)
2618		return (dzp_error);
2619	if (!zpcheck_privs)
2620		return (zp_error);
2621
2622	/*
2623	 * Second row
2624	 *
2625	 * If directory returns EACCES then delete_child was denied
2626	 * due to deny delete_child.  In this case send the request through
2627	 * secpolicy_vnode_remove().  We don't use zfs_delete_final_check()
2628	 * since that *could* allow the delete based on write/execute permission
2629	 * and we want delete permissions to override write/execute.
2630	 */
2631
2632	if (dzp_error == EACCES)
2633		return (secpolicy_vnode_remove(ZTOV(dzp), cr));	/* XXXPJD: s/dzp/zp/ ? */
2634
2635	/*
2636	 * Third Row
2637	 * only need to see if we have write/execute on directory.
2638	 */
2639
2640	dzp_error = zfs_zaccess_common(dzp, ACE_EXECUTE|ACE_WRITE_DATA,
2641	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr);
2642
2643	if (dzp_error != 0 && !dzpcheck_privs)
2644		return (dzp_error);
2645
2646	/*
2647	 * Fourth row
2648	 */
2649
2650	available_perms = (dzp_working_mode & ACE_WRITE_DATA) ? 0 : VWRITE;
2651	available_perms |= (dzp_working_mode & ACE_EXECUTE) ? 0 : VEXEC;
2652
2653	return (zfs_delete_final_check(zp, dzp, available_perms, cr));
2654
2655}
2656
2657int
2658zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
2659    znode_t *tzp, cred_t *cr)
2660{
2661	int add_perm;
2662	int error;
2663
2664	if (szp->z_pflags & ZFS_AV_QUARANTINED)
2665		return (SET_ERROR(EACCES));
2666
2667	add_perm = (ZTOV(szp)->v_type == VDIR) ?
2668	    ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE;
2669
2670	/*
2671	 * Rename permissions are combination of delete permission +
2672	 * add file/subdir permission.
2673	 *
2674	 * BSD operating systems also require write permission
2675	 * on the directory being moved from one parent directory
2676	 * to another.
2677	 */
2678	if (ZTOV(szp)->v_type == VDIR && ZTOV(sdzp) != ZTOV(tdzp)) {
2679		if (error = zfs_zaccess(szp, ACE_WRITE_DATA, 0, B_FALSE, cr))
2680			return (error);
2681	}
2682
2683	/*
2684	 * first make sure we do the delete portion.
2685	 *
2686	 * If that succeeds then check for add_file/add_subdir permissions
2687	 */
2688
2689	if (error = zfs_zaccess_delete(sdzp, szp, cr))
2690		return (error);
2691
2692	/*
2693	 * If we have a tzp, see if we can delete it?
2694	 */
2695	if (tzp) {
2696		if (error = zfs_zaccess_delete(tdzp, tzp, cr))
2697			return (error);
2698	}
2699
2700	/*
2701	 * Now check for add permissions
2702	 */
2703	error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr);
2704
2705	return (error);
2706}
2707