1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <sys/types.h>
27#include <sys/param.h>
28#include <sys/time.h>
29#include <sys/systm.h>
30#include <sys/sysmacros.h>
31#include <sys/resource.h>
32#include <sys/vfs.h>
33#include <sys/vnode.h>
34#include <sys/sid.h>
35#include <sys/file.h>
36#include <sys/stat.h>
37#include <sys/kmem.h>
38#include <sys/cmn_err.h>
39#include <sys/errno.h>
40#include <sys/unistd.h>
41#include <sys/sdt.h>
42#include <sys/fs/zfs.h>
43#include <sys/mode.h>
44#include <sys/policy.h>
45#include <sys/zfs_znode.h>
46#include <sys/zfs_fuid.h>
47#include <sys/zfs_acl.h>
48#include <sys/zfs_dir.h>
49#include <sys/zfs_vfsops.h>
50#include <sys/dmu.h>
51#include <sys/dnode.h>
52#include <sys/zap.h>
53#include <sys/fs/fs_subr.h>
54#include <sys/acl/acl_common.h>
55
56#define	ALLOW	ACE_ACCESS_ALLOWED_ACE_TYPE
57#define	DENY	ACE_ACCESS_DENIED_ACE_TYPE
58#define	MAX_ACE_TYPE	ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE
59#define	MIN_ACE_TYPE	ALLOW
60
61#define	OWNING_GROUP		(ACE_GROUP|ACE_IDENTIFIER_GROUP)
62#define	EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \
63    ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE)
64#define	EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \
65    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
66#define	OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \
67    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
68
69#define	ZFS_CHECKED_MASKS (ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_DATA| \
70    ACE_READ_NAMED_ATTRS|ACE_WRITE_DATA|ACE_WRITE_ATTRIBUTES| \
71    ACE_WRITE_NAMED_ATTRS|ACE_APPEND_DATA|ACE_EXECUTE|ACE_WRITE_OWNER| \
72    ACE_WRITE_ACL|ACE_DELETE|ACE_DELETE_CHILD|ACE_SYNCHRONIZE)
73
74#define	WRITE_MASK_DATA (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS)
75#define	WRITE_MASK_ATTRS (ACE_WRITE_ACL|ACE_WRITE_OWNER|ACE_WRITE_ATTRIBUTES| \
76    ACE_DELETE|ACE_DELETE_CHILD)
77#define	WRITE_MASK (WRITE_MASK_DATA|WRITE_MASK_ATTRS)
78
79#define	OGE_CLEAR	(ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
80    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
81
82#define	OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
83    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
84
85#define	ALL_INHERIT	(ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \
86    ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE|ACE_INHERITED_ACE)
87
88#define	RESTRICTED_CLEAR	(ACE_WRITE_ACL|ACE_WRITE_OWNER)
89
90#define	V4_ACL_WIDE_FLAGS (ZFS_ACL_AUTO_INHERIT|ZFS_ACL_DEFAULTED|\
91    ZFS_ACL_PROTECTED)
92
93#define	ZFS_ACL_WIDE_FLAGS (V4_ACL_WIDE_FLAGS|ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|\
94    ZFS_ACL_OBJ_ACE)
95
96#define	ALL_MODE_EXECS (S_IXUSR | S_IXGRP | S_IXOTH)
97
98static uint16_t
99zfs_ace_v0_get_type(void *acep)
100{
101	return (((zfs_oldace_t *)acep)->z_type);
102}
103
104static uint16_t
105zfs_ace_v0_get_flags(void *acep)
106{
107	return (((zfs_oldace_t *)acep)->z_flags);
108}
109
110static uint32_t
111zfs_ace_v0_get_mask(void *acep)
112{
113	return (((zfs_oldace_t *)acep)->z_access_mask);
114}
115
116static uint64_t
117zfs_ace_v0_get_who(void *acep)
118{
119	return (((zfs_oldace_t *)acep)->z_fuid);
120}
121
122static void
123zfs_ace_v0_set_type(void *acep, uint16_t type)
124{
125	((zfs_oldace_t *)acep)->z_type = type;
126}
127
128static void
129zfs_ace_v0_set_flags(void *acep, uint16_t flags)
130{
131	((zfs_oldace_t *)acep)->z_flags = flags;
132}
133
134static void
135zfs_ace_v0_set_mask(void *acep, uint32_t mask)
136{
137	((zfs_oldace_t *)acep)->z_access_mask = mask;
138}
139
140static void
141zfs_ace_v0_set_who(void *acep, uint64_t who)
142{
143	((zfs_oldace_t *)acep)->z_fuid = who;
144}
145
146/*ARGSUSED*/
147static size_t
148zfs_ace_v0_size(void *acep)
149{
150	return (sizeof (zfs_oldace_t));
151}
152
153static size_t
154zfs_ace_v0_abstract_size(void)
155{
156	return (sizeof (zfs_oldace_t));
157}
158
159static int
160zfs_ace_v0_mask_off(void)
161{
162	return (offsetof(zfs_oldace_t, z_access_mask));
163}
164
165/*ARGSUSED*/
166static int
167zfs_ace_v0_data(void *acep, void **datap)
168{
169	*datap = NULL;
170	return (0);
171}
172
173static acl_ops_t zfs_acl_v0_ops = {
174	zfs_ace_v0_get_mask,
175	zfs_ace_v0_set_mask,
176	zfs_ace_v0_get_flags,
177	zfs_ace_v0_set_flags,
178	zfs_ace_v0_get_type,
179	zfs_ace_v0_set_type,
180	zfs_ace_v0_get_who,
181	zfs_ace_v0_set_who,
182	zfs_ace_v0_size,
183	zfs_ace_v0_abstract_size,
184	zfs_ace_v0_mask_off,
185	zfs_ace_v0_data
186};
187
188static uint16_t
189zfs_ace_fuid_get_type(void *acep)
190{
191	return (((zfs_ace_hdr_t *)acep)->z_type);
192}
193
194static uint16_t
195zfs_ace_fuid_get_flags(void *acep)
196{
197	return (((zfs_ace_hdr_t *)acep)->z_flags);
198}
199
200static uint32_t
201zfs_ace_fuid_get_mask(void *acep)
202{
203	return (((zfs_ace_hdr_t *)acep)->z_access_mask);
204}
205
206static uint64_t
207zfs_ace_fuid_get_who(void *args)
208{
209	uint16_t entry_type;
210	zfs_ace_t *acep = args;
211
212	entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
213
214	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
215	    entry_type == ACE_EVERYONE)
216		return (-1);
217	return (((zfs_ace_t *)acep)->z_fuid);
218}
219
220static void
221zfs_ace_fuid_set_type(void *acep, uint16_t type)
222{
223	((zfs_ace_hdr_t *)acep)->z_type = type;
224}
225
226static void
227zfs_ace_fuid_set_flags(void *acep, uint16_t flags)
228{
229	((zfs_ace_hdr_t *)acep)->z_flags = flags;
230}
231
232static void
233zfs_ace_fuid_set_mask(void *acep, uint32_t mask)
234{
235	((zfs_ace_hdr_t *)acep)->z_access_mask = mask;
236}
237
238static void
239zfs_ace_fuid_set_who(void *arg, uint64_t who)
240{
241	zfs_ace_t *acep = arg;
242
243	uint16_t entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
244
245	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
246	    entry_type == ACE_EVERYONE)
247		return;
248	acep->z_fuid = who;
249}
250
251static size_t
252zfs_ace_fuid_size(void *acep)
253{
254	zfs_ace_hdr_t *zacep = acep;
255	uint16_t entry_type;
256
257	switch (zacep->z_type) {
258	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
259	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
260	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
261	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
262		return (sizeof (zfs_object_ace_t));
263	case ALLOW:
264	case DENY:
265		entry_type =
266		    (((zfs_ace_hdr_t *)acep)->z_flags & ACE_TYPE_FLAGS);
267		if (entry_type == ACE_OWNER ||
268		    entry_type == OWNING_GROUP ||
269		    entry_type == ACE_EVERYONE)
270			return (sizeof (zfs_ace_hdr_t));
271		/*FALLTHROUGH*/
272	default:
273		return (sizeof (zfs_ace_t));
274	}
275}
276
277static size_t
278zfs_ace_fuid_abstract_size(void)
279{
280	return (sizeof (zfs_ace_hdr_t));
281}
282
283static int
284zfs_ace_fuid_mask_off(void)
285{
286	return (offsetof(zfs_ace_hdr_t, z_access_mask));
287}
288
289static int
290zfs_ace_fuid_data(void *acep, void **datap)
291{
292	zfs_ace_t *zacep = acep;
293	zfs_object_ace_t *zobjp;
294
295	switch (zacep->z_hdr.z_type) {
296	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
297	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
298	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
299	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
300		zobjp = acep;
301		*datap = (caddr_t)zobjp + sizeof (zfs_ace_t);
302		return (sizeof (zfs_object_ace_t) - sizeof (zfs_ace_t));
303	default:
304		*datap = NULL;
305		return (0);
306	}
307}
308
309static acl_ops_t zfs_acl_fuid_ops = {
310	zfs_ace_fuid_get_mask,
311	zfs_ace_fuid_set_mask,
312	zfs_ace_fuid_get_flags,
313	zfs_ace_fuid_set_flags,
314	zfs_ace_fuid_get_type,
315	zfs_ace_fuid_set_type,
316	zfs_ace_fuid_get_who,
317	zfs_ace_fuid_set_who,
318	zfs_ace_fuid_size,
319	zfs_ace_fuid_abstract_size,
320	zfs_ace_fuid_mask_off,
321	zfs_ace_fuid_data
322};
323
324static int
325zfs_acl_version(int version)
326{
327	if (version < ZPL_VERSION_FUID)
328		return (ZFS_ACL_VERSION_INITIAL);
329	else
330		return (ZFS_ACL_VERSION_FUID);
331}
332
333static int
334zfs_acl_version_zp(znode_t *zp)
335{
336	return (zfs_acl_version(zp->z_zfsvfs->z_version));
337}
338
339static zfs_acl_t *
340zfs_acl_alloc(int vers)
341{
342	zfs_acl_t *aclp;
343
344	aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP);
345	list_create(&aclp->z_acl, sizeof (zfs_acl_node_t),
346	    offsetof(zfs_acl_node_t, z_next));
347	aclp->z_version = vers;
348	if (vers == ZFS_ACL_VERSION_FUID)
349		aclp->z_ops = zfs_acl_fuid_ops;
350	else
351		aclp->z_ops = zfs_acl_v0_ops;
352	return (aclp);
353}
354
355static zfs_acl_node_t *
356zfs_acl_node_alloc(size_t bytes)
357{
358	zfs_acl_node_t *aclnode;
359
360	aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_SLEEP);
361	if (bytes) {
362		aclnode->z_acldata = kmem_alloc(bytes, KM_SLEEP);
363		aclnode->z_allocdata = aclnode->z_acldata;
364		aclnode->z_allocsize = bytes;
365		aclnode->z_size = bytes;
366	}
367
368	return (aclnode);
369}
370
371static void
372zfs_acl_node_free(zfs_acl_node_t *aclnode)
373{
374	if (aclnode->z_allocsize)
375		kmem_free(aclnode->z_allocdata, aclnode->z_allocsize);
376	kmem_free(aclnode, sizeof (zfs_acl_node_t));
377}
378
379static void
380zfs_acl_release_nodes(zfs_acl_t *aclp)
381{
382	zfs_acl_node_t *aclnode;
383
384	while (aclnode = list_head(&aclp->z_acl)) {
385		list_remove(&aclp->z_acl, aclnode);
386		zfs_acl_node_free(aclnode);
387	}
388	aclp->z_acl_count = 0;
389	aclp->z_acl_bytes = 0;
390}
391
392void
393zfs_acl_free(zfs_acl_t *aclp)
394{
395	zfs_acl_release_nodes(aclp);
396	list_destroy(&aclp->z_acl);
397	kmem_free(aclp, sizeof (zfs_acl_t));
398}
399
400static boolean_t
401zfs_acl_valid_ace_type(uint_t type, uint_t flags)
402{
403	uint16_t entry_type;
404
405	switch (type) {
406	case ALLOW:
407	case DENY:
408	case ACE_SYSTEM_AUDIT_ACE_TYPE:
409	case ACE_SYSTEM_ALARM_ACE_TYPE:
410		entry_type = flags & ACE_TYPE_FLAGS;
411		return (entry_type == ACE_OWNER ||
412		    entry_type == OWNING_GROUP ||
413		    entry_type == ACE_EVERYONE || entry_type == 0 ||
414		    entry_type == ACE_IDENTIFIER_GROUP);
415	default:
416		if (type >= MIN_ACE_TYPE && type <= MAX_ACE_TYPE)
417			return (B_TRUE);
418	}
419	return (B_FALSE);
420}
421
422static boolean_t
423zfs_ace_valid(vtype_t obj_type, zfs_acl_t *aclp, uint16_t type, uint16_t iflags)
424{
425	/*
426	 * first check type of entry
427	 */
428
429	if (!zfs_acl_valid_ace_type(type, iflags))
430		return (B_FALSE);
431
432	switch (type) {
433	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
434	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
435	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
436	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
437		if (aclp->z_version < ZFS_ACL_VERSION_FUID)
438			return (B_FALSE);
439		aclp->z_hints |= ZFS_ACL_OBJ_ACE;
440	}
441
442	/*
443	 * next check inheritance level flags
444	 */
445
446	if (obj_type == VDIR &&
447	    (iflags & (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
448		aclp->z_hints |= ZFS_INHERIT_ACE;
449
450	if (iflags & (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) {
451		if ((iflags & (ACE_FILE_INHERIT_ACE|
452		    ACE_DIRECTORY_INHERIT_ACE)) == 0) {
453			return (B_FALSE);
454		}
455	}
456
457	return (B_TRUE);
458}
459
460static void *
461zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who,
462    uint32_t *access_mask, uint16_t *iflags, uint16_t *type)
463{
464	zfs_acl_node_t *aclnode;
465
466	if (start == NULL) {
467		aclnode = list_head(&aclp->z_acl);
468		if (aclnode == NULL)
469			return (NULL);
470
471		aclp->z_next_ace = aclnode->z_acldata;
472		aclp->z_curr_node = aclnode;
473		aclnode->z_ace_idx = 0;
474	}
475
476	aclnode = aclp->z_curr_node;
477
478	if (aclnode == NULL)
479		return (NULL);
480
481	if (aclnode->z_ace_idx >= aclnode->z_ace_count) {
482		aclnode = list_next(&aclp->z_acl, aclnode);
483		if (aclnode == NULL)
484			return (NULL);
485		else {
486			aclp->z_curr_node = aclnode;
487			aclnode->z_ace_idx = 0;
488			aclp->z_next_ace = aclnode->z_acldata;
489		}
490	}
491
492	if (aclnode->z_ace_idx < aclnode->z_ace_count) {
493		void *acep = aclp->z_next_ace;
494		size_t ace_size;
495
496		/*
497		 * Make sure we don't overstep our bounds
498		 */
499		ace_size = aclp->z_ops.ace_size(acep);
500
501		if (((caddr_t)acep + ace_size) >
502		    ((caddr_t)aclnode->z_acldata + aclnode->z_size)) {
503			return (NULL);
504		}
505
506		*iflags = aclp->z_ops.ace_flags_get(acep);
507		*type = aclp->z_ops.ace_type_get(acep);
508		*access_mask = aclp->z_ops.ace_mask_get(acep);
509		*who = aclp->z_ops.ace_who_get(acep);
510		aclp->z_next_ace = (caddr_t)aclp->z_next_ace + ace_size;
511		aclnode->z_ace_idx++;
512		return ((void *)acep);
513	}
514	return (NULL);
515}
516
517/*ARGSUSED*/
518static uint64_t
519zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt,
520    uint16_t *flags, uint16_t *type, uint32_t *mask)
521{
522	zfs_acl_t *aclp = datap;
523	zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)(uintptr_t)cookie;
524	uint64_t who;
525
526	acep = zfs_acl_next_ace(aclp, acep, &who, mask,
527	    flags, type);
528	return ((uint64_t)(uintptr_t)acep);
529}
530
531static zfs_acl_node_t *
532zfs_acl_curr_node(zfs_acl_t *aclp)
533{
534	ASSERT(aclp->z_curr_node);
535	return (aclp->z_curr_node);
536}
537
538/*
539 * Copy ACE to internal ZFS format.
540 * While processing the ACL each ACE will be validated for correctness.
541 * ACE FUIDs will be created later.
542 */
543int
544zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, vtype_t obj_type, zfs_acl_t *aclp,
545    void *datap, zfs_ace_t *z_acl, int aclcnt, size_t *size,
546    zfs_fuid_info_t **fuidp, cred_t *cr)
547{
548	int i;
549	uint16_t entry_type;
550	zfs_ace_t *aceptr = z_acl;
551	ace_t *acep = datap;
552	zfs_object_ace_t *zobjacep;
553	ace_object_t *aceobjp;
554
555	for (i = 0; i != aclcnt; i++) {
556		aceptr->z_hdr.z_access_mask = acep->a_access_mask;
557		aceptr->z_hdr.z_flags = acep->a_flags;
558		aceptr->z_hdr.z_type = acep->a_type;
559		entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS;
560		if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP &&
561		    entry_type != ACE_EVERYONE) {
562			aceptr->z_fuid = zfs_fuid_create(zfsvfs, acep->a_who,
563			    cr, (entry_type == 0) ?
564			    ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp);
565		}
566
567		/*
568		 * Make sure ACE is valid
569		 */
570		if (zfs_ace_valid(obj_type, aclp, aceptr->z_hdr.z_type,
571		    aceptr->z_hdr.z_flags) != B_TRUE)
572			return (EINVAL);
573
574		switch (acep->a_type) {
575		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
576		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
577		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
578		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
579			zobjacep = (zfs_object_ace_t *)aceptr;
580			aceobjp = (ace_object_t *)acep;
581
582			bcopy(aceobjp->a_obj_type, zobjacep->z_object_type,
583			    sizeof (aceobjp->a_obj_type));
584			bcopy(aceobjp->a_inherit_obj_type,
585			    zobjacep->z_inherit_type,
586			    sizeof (aceobjp->a_inherit_obj_type));
587			acep = (ace_t *)((caddr_t)acep + sizeof (ace_object_t));
588			break;
589		default:
590			acep = (ace_t *)((caddr_t)acep + sizeof (ace_t));
591		}
592
593		aceptr = (zfs_ace_t *)((caddr_t)aceptr +
594		    aclp->z_ops.ace_size(aceptr));
595	}
596
597	*size = (caddr_t)aceptr - (caddr_t)z_acl;
598
599	return (0);
600}
601
602/*
603 * Copy ZFS ACEs to fixed size ace_t layout
604 */
605static void
606zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr,
607    void *datap, int filter)
608{
609	uint64_t who;
610	uint32_t access_mask;
611	uint16_t iflags, type;
612	zfs_ace_hdr_t *zacep = NULL;
613	ace_t *acep = datap;
614	ace_object_t *objacep;
615	zfs_object_ace_t *zobjacep;
616	size_t ace_size;
617	uint16_t entry_type;
618
619	while (zacep = zfs_acl_next_ace(aclp, zacep,
620	    &who, &access_mask, &iflags, &type)) {
621
622		switch (type) {
623		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
624		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
625		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
626		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
627			if (filter) {
628				continue;
629			}
630			zobjacep = (zfs_object_ace_t *)zacep;
631			objacep = (ace_object_t *)acep;
632			bcopy(zobjacep->z_object_type,
633			    objacep->a_obj_type,
634			    sizeof (zobjacep->z_object_type));
635			bcopy(zobjacep->z_inherit_type,
636			    objacep->a_inherit_obj_type,
637			    sizeof (zobjacep->z_inherit_type));
638			ace_size = sizeof (ace_object_t);
639			break;
640		default:
641			ace_size = sizeof (ace_t);
642			break;
643		}
644
645		entry_type = (iflags & ACE_TYPE_FLAGS);
646		if ((entry_type != ACE_OWNER &&
647		    entry_type != OWNING_GROUP &&
648		    entry_type != ACE_EVERYONE)) {
649			acep->a_who = zfs_fuid_map_id(zfsvfs, who,
650			    cr, (entry_type & ACE_IDENTIFIER_GROUP) ?
651			    ZFS_ACE_GROUP : ZFS_ACE_USER);
652		} else {
653			acep->a_who = (uid_t)(int64_t)who;
654		}
655		acep->a_access_mask = access_mask;
656		acep->a_flags = iflags;
657		acep->a_type = type;
658		acep = (ace_t *)((caddr_t)acep + ace_size);
659	}
660}
661
662static int
663zfs_copy_ace_2_oldace(vtype_t obj_type, zfs_acl_t *aclp, ace_t *acep,
664    zfs_oldace_t *z_acl, int aclcnt, size_t *size)
665{
666	int i;
667	zfs_oldace_t *aceptr = z_acl;
668
669	for (i = 0; i != aclcnt; i++, aceptr++) {
670		aceptr->z_access_mask = acep[i].a_access_mask;
671		aceptr->z_type = acep[i].a_type;
672		aceptr->z_flags = acep[i].a_flags;
673		aceptr->z_fuid = acep[i].a_who;
674		/*
675		 * Make sure ACE is valid
676		 */
677		if (zfs_ace_valid(obj_type, aclp, aceptr->z_type,
678		    aceptr->z_flags) != B_TRUE)
679			return (EINVAL);
680	}
681	*size = (caddr_t)aceptr - (caddr_t)z_acl;
682	return (0);
683}
684
685/*
686 * convert old ACL format to new
687 */
688void
689zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr)
690{
691	zfs_oldace_t *oldaclp;
692	int i;
693	uint16_t type, iflags;
694	uint32_t access_mask;
695	uint64_t who;
696	void *cookie = NULL;
697	zfs_acl_node_t *newaclnode;
698
699	ASSERT(aclp->z_version == ZFS_ACL_VERSION_INITIAL);
700	/*
701	 * First create the ACE in a contiguous piece of memory
702	 * for zfs_copy_ace_2_fuid().
703	 *
704	 * We only convert an ACL once, so this won't happen
705	 * everytime.
706	 */
707	oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count,
708	    KM_SLEEP);
709	i = 0;
710	while (cookie = zfs_acl_next_ace(aclp, cookie, &who,
711	    &access_mask, &iflags, &type)) {
712		oldaclp[i].z_flags = iflags;
713		oldaclp[i].z_type = type;
714		oldaclp[i].z_fuid = who;
715		oldaclp[i++].z_access_mask = access_mask;
716	}
717
718	newaclnode = zfs_acl_node_alloc(aclp->z_acl_count *
719	    sizeof (zfs_object_ace_t));
720	aclp->z_ops = zfs_acl_fuid_ops;
721	VERIFY(zfs_copy_ace_2_fuid(zp->z_zfsvfs, ZTOV(zp)->v_type, aclp,
722	    oldaclp, newaclnode->z_acldata, aclp->z_acl_count,
723	    &newaclnode->z_size, NULL, cr) == 0);
724	newaclnode->z_ace_count = aclp->z_acl_count;
725	aclp->z_version = ZFS_ACL_VERSION;
726	kmem_free(oldaclp, aclp->z_acl_count * sizeof (zfs_oldace_t));
727
728	/*
729	 * Release all previous ACL nodes
730	 */
731
732	zfs_acl_release_nodes(aclp);
733
734	list_insert_head(&aclp->z_acl, newaclnode);
735
736	aclp->z_acl_bytes = newaclnode->z_size;
737	aclp->z_acl_count = newaclnode->z_ace_count;
738
739}
740
741/*
742 * Convert unix access mask to v4 access mask
743 */
744static uint32_t
745zfs_unix_to_v4(uint32_t access_mask)
746{
747	uint32_t new_mask = 0;
748
749	if (access_mask & S_IXOTH)
750		new_mask |= ACE_EXECUTE;
751	if (access_mask & S_IWOTH)
752		new_mask |= ACE_WRITE_DATA;
753	if (access_mask & S_IROTH)
754		new_mask |= ACE_READ_DATA;
755	return (new_mask);
756}
757
758static void
759zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask,
760    uint16_t access_type, uint64_t fuid, uint16_t entry_type)
761{
762	uint16_t type = entry_type & ACE_TYPE_FLAGS;
763
764	aclp->z_ops.ace_mask_set(acep, access_mask);
765	aclp->z_ops.ace_type_set(acep, access_type);
766	aclp->z_ops.ace_flags_set(acep, entry_type);
767	if ((type != ACE_OWNER && type != OWNING_GROUP &&
768	    type != ACE_EVERYONE))
769		aclp->z_ops.ace_who_set(acep, fuid);
770}
771
772/*
773 * Determine mode of file based on ACL.
774 * Also, create FUIDs for any User/Group ACEs
775 */
776static uint64_t
777zfs_mode_compute(znode_t *zp, zfs_acl_t *aclp)
778{
779	int		entry_type;
780	mode_t		mode;
781	mode_t		seen = 0;
782	zfs_ace_hdr_t 	*acep = NULL;
783	uint64_t	who;
784	uint16_t	iflags, type;
785	uint32_t	access_mask;
786	boolean_t	an_exec_denied = B_FALSE;
787
788	mode = (zp->z_phys->zp_mode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
789
790	while (acep = zfs_acl_next_ace(aclp, acep, &who,
791	    &access_mask, &iflags, &type)) {
792
793		if (!zfs_acl_valid_ace_type(type, iflags))
794			continue;
795
796		entry_type = (iflags & ACE_TYPE_FLAGS);
797
798		/*
799		 * Skip over owner@, group@ or everyone@ inherit only ACEs
800		 */
801		if ((iflags & ACE_INHERIT_ONLY_ACE) &&
802		    (entry_type == ACE_OWNER || entry_type == ACE_EVERYONE ||
803		    entry_type == OWNING_GROUP))
804			continue;
805
806		if (entry_type == ACE_OWNER) {
807			if ((access_mask & ACE_READ_DATA) &&
808			    (!(seen & S_IRUSR))) {
809				seen |= S_IRUSR;
810				if (type == ALLOW) {
811					mode |= S_IRUSR;
812				}
813			}
814			if ((access_mask & ACE_WRITE_DATA) &&
815			    (!(seen & S_IWUSR))) {
816				seen |= S_IWUSR;
817				if (type == ALLOW) {
818					mode |= S_IWUSR;
819				}
820			}
821			if ((access_mask & ACE_EXECUTE) &&
822			    (!(seen & S_IXUSR))) {
823				seen |= S_IXUSR;
824				if (type == ALLOW) {
825					mode |= S_IXUSR;
826				}
827			}
828		} else if (entry_type == OWNING_GROUP) {
829			if ((access_mask & ACE_READ_DATA) &&
830			    (!(seen & S_IRGRP))) {
831				seen |= S_IRGRP;
832				if (type == ALLOW) {
833					mode |= S_IRGRP;
834				}
835			}
836			if ((access_mask & ACE_WRITE_DATA) &&
837			    (!(seen & S_IWGRP))) {
838				seen |= S_IWGRP;
839				if (type == ALLOW) {
840					mode |= S_IWGRP;
841				}
842			}
843			if ((access_mask & ACE_EXECUTE) &&
844			    (!(seen & S_IXGRP))) {
845				seen |= S_IXGRP;
846				if (type == ALLOW) {
847					mode |= S_IXGRP;
848				}
849			}
850		} else if (entry_type == ACE_EVERYONE) {
851			if ((access_mask & ACE_READ_DATA)) {
852				if (!(seen & S_IRUSR)) {
853					seen |= S_IRUSR;
854					if (type == ALLOW) {
855						mode |= S_IRUSR;
856					}
857				}
858				if (!(seen & S_IRGRP)) {
859					seen |= S_IRGRP;
860					if (type == ALLOW) {
861						mode |= S_IRGRP;
862					}
863				}
864				if (!(seen & S_IROTH)) {
865					seen |= S_IROTH;
866					if (type == ALLOW) {
867						mode |= S_IROTH;
868					}
869				}
870			}
871			if ((access_mask & ACE_WRITE_DATA)) {
872				if (!(seen & S_IWUSR)) {
873					seen |= S_IWUSR;
874					if (type == ALLOW) {
875						mode |= S_IWUSR;
876					}
877				}
878				if (!(seen & S_IWGRP)) {
879					seen |= S_IWGRP;
880					if (type == ALLOW) {
881						mode |= S_IWGRP;
882					}
883				}
884				if (!(seen & S_IWOTH)) {
885					seen |= S_IWOTH;
886					if (type == ALLOW) {
887						mode |= S_IWOTH;
888					}
889				}
890			}
891			if ((access_mask & ACE_EXECUTE)) {
892				if (!(seen & S_IXUSR)) {
893					seen |= S_IXUSR;
894					if (type == ALLOW) {
895						mode |= S_IXUSR;
896					}
897				}
898				if (!(seen & S_IXGRP)) {
899					seen |= S_IXGRP;
900					if (type == ALLOW) {
901						mode |= S_IXGRP;
902					}
903				}
904				if (!(seen & S_IXOTH)) {
905					seen |= S_IXOTH;
906					if (type == ALLOW) {
907						mode |= S_IXOTH;
908					}
909				}
910			}
911		} else {
912			/*
913			 * Only care if this IDENTIFIER_GROUP or
914			 * USER ACE denies execute access to someone,
915			 * mode is not affected
916			 */
917			if ((access_mask & ACE_EXECUTE) && type == DENY)
918				an_exec_denied = B_TRUE;
919		}
920	}
921
922	/*
923	 * Failure to allow is effectively a deny, so execute permission
924	 * is denied if it was never mentioned or if we explicitly
925	 * weren't allowed it.
926	 */
927	if (!an_exec_denied &&
928	    ((seen & ALL_MODE_EXECS) != ALL_MODE_EXECS ||
929	    (mode & ALL_MODE_EXECS) != ALL_MODE_EXECS))
930		an_exec_denied = B_TRUE;
931
932	if (an_exec_denied)
933		zp->z_phys->zp_flags &= ~ZFS_NO_EXECS_DENIED;
934	else
935		zp->z_phys->zp_flags |= ZFS_NO_EXECS_DENIED;
936
937	return (mode);
938}
939
940static zfs_acl_t *
941zfs_acl_node_read_internal(znode_t *zp, boolean_t will_modify)
942{
943	zfs_acl_t	*aclp;
944	zfs_acl_node_t	*aclnode;
945
946	aclp = zfs_acl_alloc(zp->z_phys->zp_acl.z_acl_version);
947
948	/*
949	 * Version 0 to 1 znode_acl_phys has the size/count fields swapped.
950	 * Version 0 didn't have a size field, only a count.
951	 */
952	if (zp->z_phys->zp_acl.z_acl_version == ZFS_ACL_VERSION_INITIAL) {
953		aclp->z_acl_count = zp->z_phys->zp_acl.z_acl_size;
954		aclp->z_acl_bytes = ZFS_ACL_SIZE(aclp->z_acl_count);
955	} else {
956		aclp->z_acl_count = zp->z_phys->zp_acl.z_acl_count;
957		aclp->z_acl_bytes = zp->z_phys->zp_acl.z_acl_size;
958	}
959
960	aclnode = zfs_acl_node_alloc(will_modify ? aclp->z_acl_bytes : 0);
961	aclnode->z_ace_count = aclp->z_acl_count;
962	if (will_modify) {
963		bcopy(zp->z_phys->zp_acl.z_ace_data, aclnode->z_acldata,
964		    aclp->z_acl_bytes);
965	} else {
966		aclnode->z_size = aclp->z_acl_bytes;
967		aclnode->z_acldata = &zp->z_phys->zp_acl.z_ace_data[0];
968	}
969
970	list_insert_head(&aclp->z_acl, aclnode);
971
972	return (aclp);
973}
974
975/*
976 * Read an external acl object.  If the intent is to modify, always
977 * create a new acl and leave any cached acl in place.
978 */
979static int
980zfs_acl_node_read(znode_t *zp, zfs_acl_t **aclpp, boolean_t will_modify)
981{
982	uint64_t extacl = zp->z_phys->zp_acl.z_acl_extern_obj;
983	zfs_acl_t	*aclp;
984	size_t		aclsize;
985	size_t		acl_count;
986	zfs_acl_node_t	*aclnode;
987	int error;
988
989	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
990
991	if (zp->z_acl_cached && !will_modify) {
992		*aclpp = zp->z_acl_cached;
993		return (0);
994	}
995
996	if (zp->z_phys->zp_acl.z_acl_extern_obj == 0) {
997		*aclpp = zfs_acl_node_read_internal(zp, will_modify);
998		if (!will_modify)
999			zp->z_acl_cached = *aclpp;
1000		return (0);
1001	}
1002
1003	aclp = zfs_acl_alloc(zp->z_phys->zp_acl.z_acl_version);
1004	if (zp->z_phys->zp_acl.z_acl_version == ZFS_ACL_VERSION_INITIAL) {
1005		zfs_acl_phys_v0_t *zacl0 =
1006		    (zfs_acl_phys_v0_t *)&zp->z_phys->zp_acl;
1007
1008		aclsize = ZFS_ACL_SIZE(zacl0->z_acl_count);
1009		acl_count = zacl0->z_acl_count;
1010	} else {
1011		aclsize = zp->z_phys->zp_acl.z_acl_size;
1012		acl_count = zp->z_phys->zp_acl.z_acl_count;
1013		if (aclsize == 0)
1014			aclsize = acl_count * sizeof (zfs_ace_t);
1015	}
1016	aclnode = zfs_acl_node_alloc(aclsize);
1017	list_insert_head(&aclp->z_acl, aclnode);
1018	error = dmu_read(zp->z_zfsvfs->z_os, extacl, 0,
1019	    aclsize, aclnode->z_acldata, DMU_READ_PREFETCH);
1020	aclnode->z_ace_count = acl_count;
1021	aclp->z_acl_count = acl_count;
1022	aclp->z_acl_bytes = aclsize;
1023
1024	if (error != 0) {
1025		zfs_acl_free(aclp);
1026		/* convert checksum errors into IO errors */
1027		if (error == ECKSUM)
1028			error = EIO;
1029		return (error);
1030	}
1031
1032	*aclpp = aclp;
1033	if (!will_modify)
1034		zp->z_acl_cached = aclp;
1035	return (0);
1036}
1037
1038/*
1039 * common code for setting ACLs.
1040 *
1041 * This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl.
1042 * zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's
1043 * already checked the acl and knows whether to inherit.
1044 */
1045int
1046zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
1047{
1048	int		error;
1049	znode_phys_t	*zphys = zp->z_phys;
1050	zfs_acl_phys_t	*zacl = &zphys->zp_acl;
1051	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
1052	uint64_t	aoid = zphys->zp_acl.z_acl_extern_obj;
1053	uint64_t	off = 0;
1054	dmu_object_type_t otype;
1055	zfs_acl_node_t	*aclnode;
1056
1057	dmu_buf_will_dirty(zp->z_dbuf, tx);
1058
1059	if (zp->z_acl_cached) {
1060		zfs_acl_free(zp->z_acl_cached);
1061		zp->z_acl_cached = NULL;
1062	}
1063
1064	zphys->zp_mode = zfs_mode_compute(zp, aclp);
1065
1066	/*
1067	 * Decide which object type to use.  If we are forced to
1068	 * use old ACL format then transform ACL into zfs_oldace_t
1069	 * layout.
1070	 */
1071	if (!zfsvfs->z_use_fuids) {
1072		otype = DMU_OT_OLDACL;
1073	} else {
1074		if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) &&
1075		    (zfsvfs->z_version >= ZPL_VERSION_FUID))
1076			zfs_acl_xform(zp, aclp, cr);
1077		ASSERT(aclp->z_version >= ZFS_ACL_VERSION_FUID);
1078		otype = DMU_OT_ACL;
1079	}
1080
1081	if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1082		/*
1083		 * If ACL was previously external and we are now
1084		 * converting to new ACL format then release old
1085		 * ACL object and create a new one.
1086		 */
1087		if (aoid && aclp->z_version != zacl->z_acl_version) {
1088			error = dmu_object_free(zfsvfs->z_os,
1089			    zp->z_phys->zp_acl.z_acl_extern_obj, tx);
1090			if (error)
1091				return (error);
1092			aoid = 0;
1093		}
1094		if (aoid == 0) {
1095			aoid = dmu_object_alloc(zfsvfs->z_os,
1096			    otype, aclp->z_acl_bytes,
1097			    otype == DMU_OT_ACL ? DMU_OT_SYSACL : DMU_OT_NONE,
1098			    otype == DMU_OT_ACL ? DN_MAX_BONUSLEN : 0, tx);
1099		} else {
1100			(void) dmu_object_set_blocksize(zfsvfs->z_os, aoid,
1101			    aclp->z_acl_bytes, 0, tx);
1102		}
1103		zphys->zp_acl.z_acl_extern_obj = aoid;
1104		for (aclnode = list_head(&aclp->z_acl); aclnode;
1105		    aclnode = list_next(&aclp->z_acl, aclnode)) {
1106			if (aclnode->z_ace_count == 0)
1107				continue;
1108			dmu_write(zfsvfs->z_os, aoid, off,
1109			    aclnode->z_size, aclnode->z_acldata, tx);
1110			off += aclnode->z_size;
1111		}
1112	} else {
1113		void *start = zacl->z_ace_data;
1114		/*
1115		 * Migrating back embedded?
1116		 */
1117		if (zphys->zp_acl.z_acl_extern_obj) {
1118			error = dmu_object_free(zfsvfs->z_os,
1119			    zp->z_phys->zp_acl.z_acl_extern_obj, tx);
1120			if (error)
1121				return (error);
1122			zphys->zp_acl.z_acl_extern_obj = 0;
1123		}
1124
1125		for (aclnode = list_head(&aclp->z_acl); aclnode;
1126		    aclnode = list_next(&aclp->z_acl, aclnode)) {
1127			if (aclnode->z_ace_count == 0)
1128				continue;
1129			bcopy(aclnode->z_acldata, start, aclnode->z_size);
1130			start = (caddr_t)start + aclnode->z_size;
1131		}
1132	}
1133
1134	/*
1135	 * If Old version then swap count/bytes to match old
1136	 * layout of znode_acl_phys_t.
1137	 */
1138	if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
1139		zphys->zp_acl.z_acl_size = aclp->z_acl_count;
1140		zphys->zp_acl.z_acl_count = aclp->z_acl_bytes;
1141	} else {
1142		zphys->zp_acl.z_acl_size = aclp->z_acl_bytes;
1143		zphys->zp_acl.z_acl_count = aclp->z_acl_count;
1144	}
1145
1146	zphys->zp_acl.z_acl_version = aclp->z_version;
1147
1148	/*
1149	 * Replace ACL wide bits, but first clear them.
1150	 */
1151	zp->z_phys->zp_flags &= ~ZFS_ACL_WIDE_FLAGS;
1152
1153	zp->z_phys->zp_flags |= aclp->z_hints;
1154
1155	if (ace_trivial_common(aclp, 0, zfs_ace_walk) == 0)
1156		zp->z_phys->zp_flags |= ZFS_ACL_TRIVIAL;
1157
1158	return (0);
1159}
1160
1161/*
1162 * Update access mask for prepended ACE
1163 *
1164 * This applies the "groupmask" value for aclmode property.
1165 */
1166static void
1167zfs_acl_prepend_fixup(zfs_acl_t *aclp, void  *acep, void  *origacep,
1168    mode_t mode, uint64_t owner)
1169{
1170	int	rmask, wmask, xmask;
1171	int	user_ace;
1172	uint16_t aceflags;
1173	uint32_t origmask, acepmask;
1174	uint64_t fuid;
1175
1176	aceflags = aclp->z_ops.ace_flags_get(acep);
1177	fuid = aclp->z_ops.ace_who_get(acep);
1178	origmask = aclp->z_ops.ace_mask_get(origacep);
1179	acepmask = aclp->z_ops.ace_mask_get(acep);
1180
1181	user_ace = (!(aceflags &
1182	    (ACE_OWNER|ACE_GROUP|ACE_IDENTIFIER_GROUP)));
1183
1184	if (user_ace && (fuid == owner)) {
1185		rmask = S_IRUSR;
1186		wmask = S_IWUSR;
1187		xmask = S_IXUSR;
1188	} else {
1189		rmask = S_IRGRP;
1190		wmask = S_IWGRP;
1191		xmask = S_IXGRP;
1192	}
1193
1194	if (origmask & ACE_READ_DATA) {
1195		if (mode & rmask) {
1196			acepmask &= ~ACE_READ_DATA;
1197		} else {
1198			acepmask |= ACE_READ_DATA;
1199		}
1200	}
1201
1202	if (origmask & ACE_WRITE_DATA) {
1203		if (mode & wmask) {
1204			acepmask &= ~ACE_WRITE_DATA;
1205		} else {
1206			acepmask |= ACE_WRITE_DATA;
1207		}
1208	}
1209
1210	if (origmask & ACE_APPEND_DATA) {
1211		if (mode & wmask) {
1212			acepmask &= ~ACE_APPEND_DATA;
1213		} else {
1214			acepmask |= ACE_APPEND_DATA;
1215		}
1216	}
1217
1218	if (origmask & ACE_EXECUTE) {
1219		if (mode & xmask) {
1220			acepmask &= ~ACE_EXECUTE;
1221		} else {
1222			acepmask |= ACE_EXECUTE;
1223		}
1224	}
1225	aclp->z_ops.ace_mask_set(acep, acepmask);
1226}
1227
1228/*
1229 * Apply mode to canonical six ACEs.
1230 */
1231static void
1232zfs_acl_fixup_canonical_six(zfs_acl_t *aclp, mode_t mode)
1233{
1234	zfs_acl_node_t *aclnode = list_tail(&aclp->z_acl);
1235	void	*acep;
1236	int	maskoff = aclp->z_ops.ace_mask_off();
1237	size_t abstract_size = aclp->z_ops.ace_abstract_size();
1238
1239	ASSERT(aclnode != NULL);
1240
1241	acep = (void *)((caddr_t)aclnode->z_acldata +
1242	    aclnode->z_size - (abstract_size * 6));
1243
1244	/*
1245	 * Fixup final ACEs to match the mode
1246	 */
1247
1248	adjust_ace_pair_common(acep, maskoff, abstract_size,
1249	    (mode & 0700) >> 6);	/* owner@ */
1250
1251	acep = (caddr_t)acep + (abstract_size * 2);
1252
1253	adjust_ace_pair_common(acep, maskoff, abstract_size,
1254	    (mode & 0070) >> 3);	/* group@ */
1255
1256	acep = (caddr_t)acep + (abstract_size * 2);
1257	adjust_ace_pair_common(acep, maskoff,
1258	    abstract_size, mode);	/* everyone@ */
1259}
1260
1261
1262static int
1263zfs_acl_ace_match(zfs_acl_t *aclp, void *acep, int allow_deny,
1264    int entry_type, int accessmask)
1265{
1266	uint32_t mask = aclp->z_ops.ace_mask_get(acep);
1267	uint16_t type = aclp->z_ops.ace_type_get(acep);
1268	uint16_t flags = aclp->z_ops.ace_flags_get(acep);
1269
1270	return (mask == accessmask && type == allow_deny &&
1271	    ((flags & ACE_TYPE_FLAGS) == entry_type));
1272}
1273
1274/*
1275 * Can prepended ACE be reused?
1276 */
1277static int
1278zfs_reuse_deny(zfs_acl_t *aclp, void *acep, void *prevacep)
1279{
1280	int okay_masks;
1281	uint16_t prevtype;
1282	uint16_t prevflags;
1283	uint16_t flags;
1284	uint32_t mask, prevmask;
1285
1286	if (prevacep == NULL)
1287		return (B_FALSE);
1288
1289	prevtype = aclp->z_ops.ace_type_get(prevacep);
1290	prevflags = aclp->z_ops.ace_flags_get(prevacep);
1291	flags = aclp->z_ops.ace_flags_get(acep);
1292	mask = aclp->z_ops.ace_mask_get(acep);
1293	prevmask = aclp->z_ops.ace_mask_get(prevacep);
1294
1295	if (prevtype != DENY)
1296		return (B_FALSE);
1297
1298	if (prevflags != (flags & ACE_IDENTIFIER_GROUP))
1299		return (B_FALSE);
1300
1301	okay_masks = (mask & OKAY_MASK_BITS);
1302
1303	if (prevmask & ~okay_masks)
1304		return (B_FALSE);
1305
1306	return (B_TRUE);
1307}
1308
1309
1310/*
1311 * Insert new ACL node into chain of zfs_acl_node_t's
1312 *
1313 * This will result in two possible results.
1314 * 1. If the ACL is currently just a single zfs_acl_node and
1315 *    we are prepending the entry then current acl node will have
1316 *    a new node inserted above it.
1317 *
1318 * 2. If we are inserting in the middle of current acl node then
1319 *    the current node will be split in two and new node will be inserted
1320 *    in between the two split nodes.
1321 */
1322static zfs_acl_node_t *
1323zfs_acl_ace_insert(zfs_acl_t *aclp, void  *acep)
1324{
1325	zfs_acl_node_t 	*newnode;
1326	zfs_acl_node_t 	*trailernode = NULL;
1327	zfs_acl_node_t 	*currnode = zfs_acl_curr_node(aclp);
1328	int		curr_idx = aclp->z_curr_node->z_ace_idx;
1329	int		trailer_count;
1330	size_t		oldsize;
1331
1332	newnode = zfs_acl_node_alloc(aclp->z_ops.ace_size(acep));
1333	newnode->z_ace_count = 1;
1334
1335	oldsize = currnode->z_size;
1336
1337	if (curr_idx != 1) {
1338		trailernode = zfs_acl_node_alloc(0);
1339		trailernode->z_acldata = acep;
1340
1341		trailer_count = currnode->z_ace_count - curr_idx + 1;
1342		currnode->z_ace_count = curr_idx - 1;
1343		currnode->z_size = (caddr_t)acep - (caddr_t)currnode->z_acldata;
1344		trailernode->z_size = oldsize - currnode->z_size;
1345		trailernode->z_ace_count = trailer_count;
1346	}
1347
1348	aclp->z_acl_count += 1;
1349	aclp->z_acl_bytes += aclp->z_ops.ace_size(acep);
1350
1351	if (curr_idx == 1)
1352		list_insert_before(&aclp->z_acl, currnode, newnode);
1353	else
1354		list_insert_after(&aclp->z_acl, currnode, newnode);
1355	if (trailernode) {
1356		list_insert_after(&aclp->z_acl, newnode, trailernode);
1357		aclp->z_curr_node = trailernode;
1358		trailernode->z_ace_idx = 1;
1359	}
1360
1361	return (newnode);
1362}
1363
1364/*
1365 * Prepend deny ACE
1366 */
1367static void *
1368zfs_acl_prepend_deny(uint64_t uid, zfs_acl_t *aclp, void *acep,
1369    mode_t mode)
1370{
1371	zfs_acl_node_t *aclnode;
1372	void  *newacep;
1373	uint64_t fuid;
1374	uint16_t flags;
1375
1376	aclnode = zfs_acl_ace_insert(aclp, acep);
1377	newacep = aclnode->z_acldata;
1378	fuid = aclp->z_ops.ace_who_get(acep);
1379	flags = aclp->z_ops.ace_flags_get(acep);
1380	zfs_set_ace(aclp, newacep, 0, DENY, fuid, (flags & ACE_TYPE_FLAGS));
1381	zfs_acl_prepend_fixup(aclp, newacep, acep, mode, uid);
1382
1383	return (newacep);
1384}
1385
1386/*
1387 * Split an inherited ACE into inherit_only ACE
1388 * and original ACE with inheritance flags stripped off.
1389 */
1390static void
1391zfs_acl_split_ace(zfs_acl_t *aclp, zfs_ace_hdr_t *acep)
1392{
1393	zfs_acl_node_t *aclnode;
1394	zfs_acl_node_t *currnode;
1395	void  *newacep;
1396	uint16_t type, flags;
1397	uint32_t mask;
1398	uint64_t fuid;
1399
1400	type = aclp->z_ops.ace_type_get(acep);
1401	flags = aclp->z_ops.ace_flags_get(acep);
1402	mask = aclp->z_ops.ace_mask_get(acep);
1403	fuid = aclp->z_ops.ace_who_get(acep);
1404
1405	aclnode = zfs_acl_ace_insert(aclp, acep);
1406	newacep = aclnode->z_acldata;
1407
1408	aclp->z_ops.ace_type_set(newacep, type);
1409	aclp->z_ops.ace_flags_set(newacep, flags | ACE_INHERIT_ONLY_ACE);
1410	aclp->z_ops.ace_mask_set(newacep, mask);
1411	aclp->z_ops.ace_type_set(newacep, type);
1412	aclp->z_ops.ace_who_set(newacep, fuid);
1413	aclp->z_next_ace = acep;
1414	flags &= ~ALL_INHERIT;
1415	aclp->z_ops.ace_flags_set(acep, flags);
1416	currnode = zfs_acl_curr_node(aclp);
1417	ASSERT(currnode->z_ace_idx >= 1);
1418	currnode->z_ace_idx -= 1;
1419}
1420
1421/*
1422 * Are ACES started at index i, the canonical six ACES?
1423 */
1424static int
1425zfs_have_canonical_six(zfs_acl_t *aclp)
1426{
1427	void *acep;
1428	zfs_acl_node_t *aclnode = list_tail(&aclp->z_acl);
1429	int		i = 0;
1430	size_t abstract_size = aclp->z_ops.ace_abstract_size();
1431
1432	ASSERT(aclnode != NULL);
1433
1434	if (aclnode->z_ace_count < 6)
1435		return (0);
1436
1437	acep = (void *)((caddr_t)aclnode->z_acldata +
1438	    aclnode->z_size - (aclp->z_ops.ace_abstract_size() * 6));
1439
1440	if ((zfs_acl_ace_match(aclp, (caddr_t)acep + (abstract_size * i++),
1441	    DENY, ACE_OWNER, 0) &&
1442	    zfs_acl_ace_match(aclp, (caddr_t)acep + (abstract_size * i++),
1443	    ALLOW, ACE_OWNER, OWNER_ALLOW_MASK) &&
1444	    zfs_acl_ace_match(aclp, (caddr_t)acep + (abstract_size * i++), DENY,
1445	    OWNING_GROUP, 0) && zfs_acl_ace_match(aclp, (caddr_t)acep +
1446	    (abstract_size * i++),
1447	    ALLOW, OWNING_GROUP, 0) &&
1448	    zfs_acl_ace_match(aclp, (caddr_t)acep + (abstract_size * i++),
1449	    DENY, ACE_EVERYONE, EVERYONE_DENY_MASK) &&
1450	    zfs_acl_ace_match(aclp, (caddr_t)acep + (abstract_size * i++),
1451	    ALLOW, ACE_EVERYONE, EVERYONE_ALLOW_MASK))) {
1452		return (1);
1453	} else {
1454		return (0);
1455	}
1456}
1457
1458
1459/*
1460 * Apply step 1g, to group entries
1461 *
1462 * Need to deal with corner case where group may have
1463 * greater permissions than owner.  If so then limit
1464 * group permissions, based on what extra permissions
1465 * group has.
1466 */
1467static void
1468zfs_fixup_group_entries(zfs_acl_t *aclp, void *acep, void *prevacep,
1469    mode_t mode)
1470{
1471	uint32_t prevmask = aclp->z_ops.ace_mask_get(prevacep);
1472	uint32_t mask = aclp->z_ops.ace_mask_get(acep);
1473	uint16_t prevflags = aclp->z_ops.ace_flags_get(prevacep);
1474	mode_t extramode = (mode >> 3) & 07;
1475	mode_t ownermode = (mode >> 6);
1476
1477	if (prevflags & ACE_IDENTIFIER_GROUP) {
1478
1479		extramode &= ~ownermode;
1480
1481		if (extramode) {
1482			if (extramode & S_IROTH) {
1483				prevmask &= ~ACE_READ_DATA;
1484				mask &= ~ACE_READ_DATA;
1485			}
1486			if (extramode & S_IWOTH) {
1487				prevmask &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
1488				mask &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
1489			}
1490			if (extramode & S_IXOTH) {
1491				prevmask  &= ~ACE_EXECUTE;
1492				mask &= ~ACE_EXECUTE;
1493			}
1494		}
1495	}
1496	aclp->z_ops.ace_mask_set(acep, mask);
1497	aclp->z_ops.ace_mask_set(prevacep, prevmask);
1498}
1499
1500/*
1501 * Apply the chmod algorithm as described
1502 * in PSARC/2002/240
1503 */
1504static void
1505zfs_acl_chmod(zfsvfs_t *zfsvfs, uint64_t uid,
1506    uint64_t mode, zfs_acl_t *aclp)
1507{
1508	void		*acep = NULL, *prevacep = NULL;
1509	uint64_t	who;
1510	int 		i;
1511	int 		entry_type;
1512	int 		reuse_deny;
1513	int 		need_canonical_six = 1;
1514	uint16_t	iflags, type;
1515	uint32_t	access_mask;
1516
1517	/*
1518	 * If discard then just discard all ACL nodes which
1519	 * represent the ACEs.
1520	 *
1521	 * New owner@/group@/everone@ ACEs will be added
1522	 * later.
1523	 */
1524	if (zfsvfs->z_acl_mode == ZFS_ACL_DISCARD)
1525		zfs_acl_release_nodes(aclp);
1526
1527	while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
1528	    &iflags, &type)) {
1529
1530		entry_type = (iflags & ACE_TYPE_FLAGS);
1531		iflags = (iflags & ALL_INHERIT);
1532
1533		if ((type != ALLOW && type != DENY) ||
1534		    (iflags & ACE_INHERIT_ONLY_ACE)) {
1535			if (iflags)
1536				aclp->z_hints |= ZFS_INHERIT_ACE;
1537			switch (type) {
1538			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
1539			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
1540			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
1541			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
1542				aclp->z_hints |= ZFS_ACL_OBJ_ACE;
1543				break;
1544			}
1545			goto nextace;
1546		}
1547
1548		/*
1549		 * Need to split ace into two?
1550		 */
1551		if ((iflags & (ACE_FILE_INHERIT_ACE|
1552		    ACE_DIRECTORY_INHERIT_ACE)) &&
1553		    (!(iflags & ACE_INHERIT_ONLY_ACE))) {
1554			zfs_acl_split_ace(aclp, acep);
1555			aclp->z_hints |= ZFS_INHERIT_ACE;
1556			goto nextace;
1557		}
1558
1559		if (entry_type == ACE_OWNER || entry_type == ACE_EVERYONE ||
1560		    (entry_type == OWNING_GROUP)) {
1561			access_mask &= ~OGE_CLEAR;
1562			aclp->z_ops.ace_mask_set(acep, access_mask);
1563			goto nextace;
1564		} else {
1565			reuse_deny = B_TRUE;
1566			if (type == ALLOW) {
1567
1568				/*
1569				 * Check preceding ACE if any, to see
1570				 * if we need to prepend a DENY ACE.
1571				 * This is only applicable when the acl_mode
1572				 * property == groupmask.
1573				 */
1574				if (zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK) {
1575
1576					reuse_deny = zfs_reuse_deny(aclp, acep,
1577					    prevacep);
1578
1579					if (!reuse_deny) {
1580						prevacep =
1581						    zfs_acl_prepend_deny(uid,
1582						    aclp, acep, mode);
1583					} else {
1584						zfs_acl_prepend_fixup(
1585						    aclp, prevacep,
1586						    acep, mode, uid);
1587					}
1588					zfs_fixup_group_entries(aclp, acep,
1589					    prevacep, mode);
1590				}
1591			}
1592		}
1593nextace:
1594		prevacep = acep;
1595	}
1596
1597	/*
1598	 * Check out last six aces, if we have six.
1599	 */
1600
1601	if (aclp->z_acl_count >= 6) {
1602		if (zfs_have_canonical_six(aclp)) {
1603			need_canonical_six = 0;
1604		}
1605	}
1606
1607	if (need_canonical_six) {
1608		size_t abstract_size = aclp->z_ops.ace_abstract_size();
1609		void *zacep;
1610		zfs_acl_node_t *aclnode =
1611		    zfs_acl_node_alloc(abstract_size * 6);
1612
1613		aclnode->z_size = abstract_size * 6;
1614		aclnode->z_ace_count = 6;
1615		aclp->z_acl_bytes += aclnode->z_size;
1616		list_insert_tail(&aclp->z_acl, aclnode);
1617
1618		zacep = aclnode->z_acldata;
1619
1620		i = 0;
1621		zfs_set_ace(aclp, (caddr_t)zacep + (abstract_size * i++),
1622		    0, DENY, -1, ACE_OWNER);
1623		zfs_set_ace(aclp, (caddr_t)zacep + (abstract_size * i++),
1624		    OWNER_ALLOW_MASK, ALLOW, -1, ACE_OWNER);
1625		zfs_set_ace(aclp, (caddr_t)zacep + (abstract_size * i++), 0,
1626		    DENY, -1, OWNING_GROUP);
1627		zfs_set_ace(aclp, (caddr_t)zacep + (abstract_size * i++), 0,
1628		    ALLOW, -1, OWNING_GROUP);
1629		zfs_set_ace(aclp, (caddr_t)zacep + (abstract_size * i++),
1630		    EVERYONE_DENY_MASK, DENY, -1, ACE_EVERYONE);
1631		zfs_set_ace(aclp, (caddr_t)zacep + (abstract_size * i++),
1632		    EVERYONE_ALLOW_MASK, ALLOW, -1, ACE_EVERYONE);
1633		aclp->z_acl_count += 6;
1634	}
1635
1636	zfs_acl_fixup_canonical_six(aclp, mode);
1637}
1638
1639int
1640zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)
1641{
1642	int error;
1643
1644	mutex_enter(&zp->z_lock);
1645	mutex_enter(&zp->z_acl_lock);
1646	*aclp = NULL;
1647	error = zfs_acl_node_read(zp, aclp, B_TRUE);
1648	if (error == 0) {
1649		(*aclp)->z_hints = zp->z_phys->zp_flags & V4_ACL_WIDE_FLAGS;
1650		zfs_acl_chmod(zp->z_zfsvfs, zp->z_phys->zp_uid, mode, *aclp);
1651	}
1652	mutex_exit(&zp->z_acl_lock);
1653	mutex_exit(&zp->z_lock);
1654	return (error);
1655}
1656
1657/*
1658 * strip off write_owner and write_acl
1659 */
1660static void
1661zfs_restricted_update(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, void *acep)
1662{
1663	uint32_t mask = aclp->z_ops.ace_mask_get(acep);
1664
1665	if ((zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED) &&
1666	    (aclp->z_ops.ace_type_get(acep) == ALLOW)) {
1667		mask &= ~RESTRICTED_CLEAR;
1668		aclp->z_ops.ace_mask_set(acep, mask);
1669	}
1670}
1671
1672/*
1673 * Should ACE be inherited?
1674 */
1675static int
1676zfs_ace_can_use(vtype_t vtype, uint16_t acep_flags)
1677{
1678	int	iflags = (acep_flags & 0xf);
1679
1680	if ((vtype == VDIR) && (iflags & ACE_DIRECTORY_INHERIT_ACE))
1681		return (1);
1682	else if (iflags & ACE_FILE_INHERIT_ACE)
1683		return (!((vtype == VDIR) &&
1684		    (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)));
1685	return (0);
1686}
1687
1688/*
1689 * inherit inheritable ACEs from parent
1690 */
1691static zfs_acl_t *
1692zfs_acl_inherit(zfsvfs_t *zfsvfs, vtype_t vtype, zfs_acl_t *paclp,
1693    uint64_t mode, boolean_t *need_chmod)
1694{
1695	void		*pacep;
1696	void		*acep, *acep2;
1697	zfs_acl_node_t  *aclnode, *aclnode2;
1698	zfs_acl_t	*aclp = NULL;
1699	uint64_t	who;
1700	uint32_t	access_mask;
1701	uint16_t	iflags, newflags, type;
1702	size_t		ace_size;
1703	void		*data1, *data2;
1704	size_t		data1sz, data2sz;
1705	boolean_t	vdir = vtype == VDIR;
1706	boolean_t	vreg = vtype == VREG;
1707	boolean_t	passthrough, passthrough_x, noallow;
1708
1709	passthrough_x =
1710	    zfsvfs->z_acl_inherit == ZFS_ACL_PASSTHROUGH_X;
1711	passthrough = passthrough_x ||
1712	    zfsvfs->z_acl_inherit == ZFS_ACL_PASSTHROUGH;
1713	noallow =
1714	    zfsvfs->z_acl_inherit == ZFS_ACL_NOALLOW;
1715
1716	*need_chmod = B_TRUE;
1717	pacep = NULL;
1718	aclp = zfs_acl_alloc(paclp->z_version);
1719	if (zfsvfs->z_acl_inherit == ZFS_ACL_DISCARD)
1720		return (aclp);
1721	while (pacep = zfs_acl_next_ace(paclp, pacep, &who,
1722	    &access_mask, &iflags, &type)) {
1723
1724		/*
1725		 * don't inherit bogus ACEs
1726		 */
1727		if (!zfs_acl_valid_ace_type(type, iflags))
1728			continue;
1729
1730		if (noallow && type == ALLOW)
1731			continue;
1732
1733		ace_size = aclp->z_ops.ace_size(pacep);
1734
1735		if (!zfs_ace_can_use(vtype, iflags))
1736			continue;
1737
1738		/*
1739		 * If owner@, group@, or everyone@ inheritable
1740		 * then zfs_acl_chmod() isn't needed.
1741		 */
1742		if (passthrough &&
1743		    ((iflags & (ACE_OWNER|ACE_EVERYONE)) ||
1744		    ((iflags & OWNING_GROUP) ==
1745		    OWNING_GROUP)) && (vreg || (vdir && (iflags &
1746		    ACE_DIRECTORY_INHERIT_ACE)))) {
1747			*need_chmod = B_FALSE;
1748
1749			if (!vdir && passthrough_x &&
1750			    ((mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)) {
1751				access_mask &= ~ACE_EXECUTE;
1752			}
1753		}
1754
1755		aclnode = zfs_acl_node_alloc(ace_size);
1756		list_insert_tail(&aclp->z_acl, aclnode);
1757		acep = aclnode->z_acldata;
1758
1759		zfs_set_ace(aclp, acep, access_mask, type,
1760		    who, iflags|ACE_INHERITED_ACE);
1761
1762		/*
1763		 * Copy special opaque data if any
1764		 */
1765		if ((data1sz = paclp->z_ops.ace_data(pacep, &data1)) != 0) {
1766			VERIFY((data2sz = aclp->z_ops.ace_data(acep,
1767			    &data2)) == data1sz);
1768			bcopy(data1, data2, data2sz);
1769		}
1770		aclp->z_acl_count++;
1771		aclnode->z_ace_count++;
1772		aclp->z_acl_bytes += aclnode->z_size;
1773		newflags = aclp->z_ops.ace_flags_get(acep);
1774
1775		if (vdir)
1776			aclp->z_hints |= ZFS_INHERIT_ACE;
1777
1778		if ((iflags & ACE_NO_PROPAGATE_INHERIT_ACE) || !vdir) {
1779			newflags &= ~ALL_INHERIT;
1780			aclp->z_ops.ace_flags_set(acep,
1781			    newflags|ACE_INHERITED_ACE);
1782			zfs_restricted_update(zfsvfs, aclp, acep);
1783			continue;
1784		}
1785
1786		ASSERT(vdir);
1787
1788		newflags = aclp->z_ops.ace_flags_get(acep);
1789		if ((iflags & (ACE_FILE_INHERIT_ACE |
1790		    ACE_DIRECTORY_INHERIT_ACE)) !=
1791		    ACE_FILE_INHERIT_ACE) {
1792			aclnode2 = zfs_acl_node_alloc(ace_size);
1793			list_insert_tail(&aclp->z_acl, aclnode2);
1794			acep2 = aclnode2->z_acldata;
1795			zfs_set_ace(aclp, acep2,
1796			    access_mask, type, who,
1797			    iflags|ACE_INHERITED_ACE);
1798			newflags |= ACE_INHERIT_ONLY_ACE;
1799			aclp->z_ops.ace_flags_set(acep, newflags);
1800			newflags &= ~ALL_INHERIT;
1801			aclp->z_ops.ace_flags_set(acep2,
1802			    newflags|ACE_INHERITED_ACE);
1803
1804			/*
1805			 * Copy special opaque data if any
1806			 */
1807			if ((data1sz = aclp->z_ops.ace_data(acep,
1808			    &data1)) != 0) {
1809				VERIFY((data2sz =
1810				    aclp->z_ops.ace_data(acep2,
1811				    &data2)) == data1sz);
1812				bcopy(data1, data2, data1sz);
1813			}
1814			aclp->z_acl_count++;
1815			aclnode2->z_ace_count++;
1816			aclp->z_acl_bytes += aclnode->z_size;
1817			zfs_restricted_update(zfsvfs, aclp, acep2);
1818		} else {
1819			newflags |= ACE_INHERIT_ONLY_ACE;
1820			aclp->z_ops.ace_flags_set(acep,
1821			    newflags|ACE_INHERITED_ACE);
1822		}
1823	}
1824	return (aclp);
1825}
1826
1827/*
1828 * Create file system object initial permissions
1829 * including inheritable ACEs.
1830 */
1831int
1832zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
1833    vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids)
1834{
1835	int		error;
1836	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
1837	zfs_acl_t	*paclp;
1838	gid_t		gid;
1839	boolean_t	need_chmod = B_TRUE;
1840
1841	bzero(acl_ids, sizeof (zfs_acl_ids_t));
1842	acl_ids->z_mode = MAKEIMODE(vap->va_type, vap->va_mode);
1843
1844	if (vsecp)
1845		if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, cr,
1846		    &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0)
1847			return (error);
1848
1849	/*
1850	 * Determine uid and gid.
1851	 */
1852	if ((flag & IS_ROOT_NODE) || zfsvfs->z_replay ||
1853	    ((flag & IS_XATTR) && (vap->va_type == VDIR))) {
1854		acl_ids->z_fuid = zfs_fuid_create(zfsvfs,
1855		    (uint64_t)vap->va_uid, cr,
1856		    ZFS_OWNER, &acl_ids->z_fuidp);
1857		acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
1858		    (uint64_t)vap->va_gid, cr,
1859		    ZFS_GROUP, &acl_ids->z_fuidp);
1860		gid = vap->va_gid;
1861	} else {
1862		acl_ids->z_fuid = zfs_fuid_create_cred(zfsvfs, ZFS_OWNER,
1863		    cr, &acl_ids->z_fuidp);
1864		acl_ids->z_fgid = 0;
1865		if (vap->va_mask & AT_GID)  {
1866			acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
1867			    (uint64_t)vap->va_gid,
1868			    cr, ZFS_GROUP, &acl_ids->z_fuidp);
1869			gid = vap->va_gid;
1870			if (acl_ids->z_fgid != dzp->z_phys->zp_gid &&
1871			    !groupmember(vap->va_gid, cr) &&
1872			    secpolicy_vnode_create_gid(cr) != 0)
1873				acl_ids->z_fgid = 0;
1874		}
1875		if (acl_ids->z_fgid == 0) {
1876			if (dzp->z_phys->zp_mode & S_ISGID) {
1877				char		*domain;
1878				uint32_t	rid;
1879
1880				acl_ids->z_fgid = dzp->z_phys->zp_gid;
1881				gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid,
1882				    cr, ZFS_GROUP);
1883
1884				if (zfsvfs->z_use_fuids &&
1885				    IS_EPHEMERAL(acl_ids->z_fgid)) {
1886					domain = zfs_fuid_idx_domain(
1887					    &zfsvfs->z_fuid_idx,
1888					    FUID_INDEX(acl_ids->z_fgid));
1889					rid = FUID_RID(acl_ids->z_fgid);
1890					zfs_fuid_node_add(&acl_ids->z_fuidp,
1891					    domain, rid,
1892					    FUID_INDEX(acl_ids->z_fgid),
1893					    acl_ids->z_fgid, ZFS_GROUP);
1894				}
1895			} else {
1896				acl_ids->z_fgid = zfs_fuid_create_cred(zfsvfs,
1897				    ZFS_GROUP, cr, &acl_ids->z_fuidp);
1898				gid = crgetgid(cr);
1899			}
1900		}
1901	}
1902
1903	/*
1904	 * If we're creating a directory, and the parent directory has the
1905	 * set-GID bit set, set in on the new directory.
1906	 * Otherwise, if the user is neither privileged nor a member of the
1907	 * file's new group, clear the file's set-GID bit.
1908	 */
1909
1910	if (!(flag & IS_ROOT_NODE) && (dzp->z_phys->zp_mode & S_ISGID) &&
1911	    (vap->va_type == VDIR)) {
1912		acl_ids->z_mode |= S_ISGID;
1913	} else {
1914		if ((acl_ids->z_mode & S_ISGID) &&
1915		    secpolicy_vnode_setids_setgids(cr, gid) != 0)
1916			acl_ids->z_mode &= ~S_ISGID;
1917	}
1918
1919	if (acl_ids->z_aclp == NULL) {
1920		mutex_enter(&dzp->z_lock);
1921		if (!(flag & IS_ROOT_NODE) && (ZTOV(dzp)->v_type == VDIR &&
1922		    (dzp->z_phys->zp_flags & ZFS_INHERIT_ACE)) &&
1923		    !(dzp->z_phys->zp_flags & ZFS_XATTR)) {
1924			mutex_enter(&dzp->z_acl_lock);
1925			VERIFY(0 == zfs_acl_node_read(dzp, &paclp, B_FALSE));
1926			mutex_exit(&dzp->z_acl_lock);
1927			acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
1928			    vap->va_type, paclp, acl_ids->z_mode, &need_chmod);
1929		} else {
1930			acl_ids->z_aclp =
1931			    zfs_acl_alloc(zfs_acl_version_zp(dzp));
1932		}
1933		mutex_exit(&dzp->z_lock);
1934		if (need_chmod) {
1935			acl_ids->z_aclp->z_hints = (vap->va_type == VDIR) ?
1936			    ZFS_ACL_AUTO_INHERIT : 0;
1937			zfs_acl_chmod(zfsvfs, acl_ids->z_fuid,
1938			    acl_ids->z_mode, acl_ids->z_aclp);
1939		}
1940	}
1941
1942	return (0);
1943}
1944
1945/*
1946 * Free ACL and fuid_infop, but not the acl_ids structure
1947 */
1948void
1949zfs_acl_ids_free(zfs_acl_ids_t *acl_ids)
1950{
1951	if (acl_ids->z_aclp)
1952		zfs_acl_free(acl_ids->z_aclp);
1953	if (acl_ids->z_fuidp)
1954		zfs_fuid_info_free(acl_ids->z_fuidp);
1955	acl_ids->z_aclp = NULL;
1956	acl_ids->z_fuidp = NULL;
1957}
1958
1959boolean_t
1960zfs_acl_ids_overquota(zfsvfs_t *zfsvfs, zfs_acl_ids_t *acl_ids)
1961{
1962	return (zfs_usergroup_overquota(zfsvfs, B_FALSE, acl_ids->z_fuid) ||
1963	    zfs_usergroup_overquota(zfsvfs, B_TRUE, acl_ids->z_fgid));
1964}
1965
1966/*
1967 * Retrieve a files ACL
1968 */
1969int
1970zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
1971{
1972	zfs_acl_t	*aclp;
1973	ulong_t		mask;
1974	int		error;
1975	int 		count = 0;
1976	int		largeace = 0;
1977
1978	mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT |
1979	    VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
1980
1981	if (error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr))
1982		return (error);
1983
1984	if (mask == 0)
1985		return (ENOSYS);
1986
1987	mutex_enter(&zp->z_acl_lock);
1988
1989	error = zfs_acl_node_read(zp, &aclp, B_FALSE);
1990	if (error != 0) {
1991		mutex_exit(&zp->z_acl_lock);
1992		return (error);
1993	}
1994
1995	/*
1996	 * Scan ACL to determine number of ACEs
1997	 */
1998	if ((zp->z_phys->zp_flags & ZFS_ACL_OBJ_ACE) &&
1999	    !(mask & VSA_ACE_ALLTYPES)) {
2000		void *zacep = NULL;
2001		uint64_t who;
2002		uint32_t access_mask;
2003		uint16_t type, iflags;
2004
2005		while (zacep = zfs_acl_next_ace(aclp, zacep,
2006		    &who, &access_mask, &iflags, &type)) {
2007			switch (type) {
2008			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
2009			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
2010			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
2011			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
2012				largeace++;
2013				continue;
2014			default:
2015				count++;
2016			}
2017		}
2018		vsecp->vsa_aclcnt = count;
2019	} else
2020		count = aclp->z_acl_count;
2021
2022	if (mask & VSA_ACECNT) {
2023		vsecp->vsa_aclcnt = count;
2024	}
2025
2026	if (mask & VSA_ACE) {
2027		size_t aclsz;
2028
2029		aclsz = count * sizeof (ace_t) +
2030		    sizeof (ace_object_t) * largeace;
2031
2032		vsecp->vsa_aclentp = kmem_alloc(aclsz, KM_SLEEP);
2033		vsecp->vsa_aclentsz = aclsz;
2034
2035		if (aclp->z_version == ZFS_ACL_VERSION_FUID)
2036			zfs_copy_fuid_2_ace(zp->z_zfsvfs, aclp, cr,
2037			    vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES));
2038		else {
2039			zfs_acl_node_t *aclnode;
2040			void *start = vsecp->vsa_aclentp;
2041
2042			for (aclnode = list_head(&aclp->z_acl); aclnode;
2043			    aclnode = list_next(&aclp->z_acl, aclnode)) {
2044				bcopy(aclnode->z_acldata, start,
2045				    aclnode->z_size);
2046				start = (caddr_t)start + aclnode->z_size;
2047			}
2048			ASSERT((caddr_t)start - (caddr_t)vsecp->vsa_aclentp ==
2049			    aclp->z_acl_bytes);
2050		}
2051	}
2052	if (mask & VSA_ACE_ACLFLAGS) {
2053		vsecp->vsa_aclflags = 0;
2054		if (zp->z_phys->zp_flags & ZFS_ACL_DEFAULTED)
2055			vsecp->vsa_aclflags |= ACL_DEFAULTED;
2056		if (zp->z_phys->zp_flags & ZFS_ACL_PROTECTED)
2057			vsecp->vsa_aclflags |= ACL_PROTECTED;
2058		if (zp->z_phys->zp_flags & ZFS_ACL_AUTO_INHERIT)
2059			vsecp->vsa_aclflags |= ACL_AUTO_INHERIT;
2060	}
2061
2062	mutex_exit(&zp->z_acl_lock);
2063
2064	return (0);
2065}
2066
2067int
2068zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, vtype_t obj_type,
2069    vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp)
2070{
2071	zfs_acl_t *aclp;
2072	zfs_acl_node_t *aclnode;
2073	int aclcnt = vsecp->vsa_aclcnt;
2074	int error;
2075
2076	if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0)
2077		return (EINVAL);
2078
2079	aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version));
2080
2081	aclp->z_hints = 0;
2082	aclnode = zfs_acl_node_alloc(aclcnt * sizeof (zfs_object_ace_t));
2083	if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
2084		if ((error = zfs_copy_ace_2_oldace(obj_type, aclp,
2085		    (ace_t *)vsecp->vsa_aclentp, aclnode->z_acldata,
2086		    aclcnt, &aclnode->z_size)) != 0) {
2087			zfs_acl_free(aclp);
2088			zfs_acl_node_free(aclnode);
2089			return (error);
2090		}
2091	} else {
2092		if ((error = zfs_copy_ace_2_fuid(zfsvfs, obj_type, aclp,
2093		    vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt,
2094		    &aclnode->z_size, fuidp, cr)) != 0) {
2095			zfs_acl_free(aclp);
2096			zfs_acl_node_free(aclnode);
2097			return (error);
2098		}
2099	}
2100	aclp->z_acl_bytes = aclnode->z_size;
2101	aclnode->z_ace_count = aclcnt;
2102	aclp->z_acl_count = aclcnt;
2103	list_insert_head(&aclp->z_acl, aclnode);
2104
2105	/*
2106	 * If flags are being set then add them to z_hints
2107	 */
2108	if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) {
2109		if (vsecp->vsa_aclflags & ACL_PROTECTED)
2110			aclp->z_hints |= ZFS_ACL_PROTECTED;
2111		if (vsecp->vsa_aclflags & ACL_DEFAULTED)
2112			aclp->z_hints |= ZFS_ACL_DEFAULTED;
2113		if (vsecp->vsa_aclflags & ACL_AUTO_INHERIT)
2114			aclp->z_hints |= ZFS_ACL_AUTO_INHERIT;
2115	}
2116
2117	*zaclp = aclp;
2118
2119	return (0);
2120}
2121
2122/*
2123 * Set a files ACL
2124 */
2125int
2126zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
2127{
2128	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
2129	zilog_t		*zilog = zfsvfs->z_log;
2130	ulong_t		mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
2131	dmu_tx_t	*tx;
2132	int		error;
2133	zfs_acl_t	*aclp;
2134	zfs_fuid_info_t	*fuidp = NULL;
2135	boolean_t	fuid_dirtied;
2136
2137	if (mask == 0)
2138		return (ENOSYS);
2139
2140	if (zp->z_phys->zp_flags & ZFS_IMMUTABLE)
2141		return (EPERM);
2142
2143	if (error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr))
2144		return (error);
2145
2146	error = zfs_vsec_2_aclp(zfsvfs, ZTOV(zp)->v_type, vsecp, cr, &fuidp,
2147	    &aclp);
2148	if (error)
2149		return (error);
2150
2151	/*
2152	 * If ACL wide flags aren't being set then preserve any
2153	 * existing flags.
2154	 */
2155	if (!(vsecp->vsa_mask & VSA_ACE_ACLFLAGS)) {
2156		aclp->z_hints |= (zp->z_phys->zp_flags & V4_ACL_WIDE_FLAGS);
2157	}
2158top:
2159	mutex_enter(&zp->z_lock);
2160	mutex_enter(&zp->z_acl_lock);
2161
2162	tx = dmu_tx_create(zfsvfs->z_os);
2163	dmu_tx_hold_bonus(tx, zp->z_id);
2164
2165	if (zp->z_phys->zp_acl.z_acl_extern_obj) {
2166		/* Are we upgrading ACL? */
2167		if (zfsvfs->z_version <= ZPL_VERSION_FUID &&
2168		    zp->z_phys->zp_acl.z_acl_version ==
2169		    ZFS_ACL_VERSION_INITIAL) {
2170			dmu_tx_hold_free(tx,
2171			    zp->z_phys->zp_acl.z_acl_extern_obj,
2172			    0, DMU_OBJECT_END);
2173			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2174			    0, aclp->z_acl_bytes);
2175		} else {
2176			dmu_tx_hold_write(tx,
2177			    zp->z_phys->zp_acl.z_acl_extern_obj,
2178			    0, aclp->z_acl_bytes);
2179		}
2180	} else if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
2181		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes);
2182	}
2183	fuid_dirtied = zfsvfs->z_fuid_dirty;
2184	if (fuid_dirtied)
2185		zfs_fuid_txhold(zfsvfs, tx);
2186
2187	error = dmu_tx_assign(tx, TXG_NOWAIT);
2188	if (error) {
2189		mutex_exit(&zp->z_acl_lock);
2190		mutex_exit(&zp->z_lock);
2191
2192		if (error == ERESTART) {
2193			dmu_tx_wait(tx);
2194			dmu_tx_abort(tx);
2195			goto top;
2196		}
2197		dmu_tx_abort(tx);
2198		zfs_acl_free(aclp);
2199		return (error);
2200	}
2201
2202	error = zfs_aclset_common(zp, aclp, cr, tx);
2203	ASSERT(error == 0);
2204	zp->z_acl_cached = aclp;
2205
2206	if (fuid_dirtied)
2207		zfs_fuid_sync(zfsvfs, tx);
2208
2209	zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
2210	zfs_log_acl(zilog, tx, zp, vsecp, fuidp);
2211
2212	if (fuidp)
2213		zfs_fuid_info_free(fuidp);
2214	dmu_tx_commit(tx);
2215done:
2216	mutex_exit(&zp->z_acl_lock);
2217	mutex_exit(&zp->z_lock);
2218
2219	return (error);
2220}
2221
2222/*
2223 * Check accesses of interest (AoI) against attributes of the dataset
2224 * such as read-only.  Returns zero if no AoI conflict with dataset
2225 * attributes, otherwise an appropriate errno is returned.
2226 */
2227static int
2228zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
2229{
2230	if ((v4_mode & WRITE_MASK) &&
2231	    (zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
2232	    (!IS_DEVVP(ZTOV(zp)) ||
2233	    (IS_DEVVP(ZTOV(zp)) && (v4_mode & WRITE_MASK_ATTRS)))) {
2234		return (EROFS);
2235	}
2236
2237	/*
2238	 * Only check for READONLY on non-directories.
2239	 */
2240	if ((v4_mode & WRITE_MASK_DATA) &&
2241	    (((ZTOV(zp)->v_type != VDIR) &&
2242	    (zp->z_phys->zp_flags & (ZFS_READONLY | ZFS_IMMUTABLE))) ||
2243	    (ZTOV(zp)->v_type == VDIR &&
2244	    (zp->z_phys->zp_flags & ZFS_IMMUTABLE)))) {
2245		return (EPERM);
2246	}
2247
2248	if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) &&
2249	    (zp->z_phys->zp_flags & ZFS_NOUNLINK)) {
2250		return (EPERM);
2251	}
2252
2253	if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) &&
2254	    (zp->z_phys->zp_flags & ZFS_AV_QUARANTINED))) {
2255		return (EACCES);
2256	}
2257
2258	return (0);
2259}
2260
2261/*
2262 * The primary usage of this function is to loop through all of the
2263 * ACEs in the znode, determining what accesses of interest (AoI) to
2264 * the caller are allowed or denied.  The AoI are expressed as bits in
2265 * the working_mode parameter.  As each ACE is processed, bits covered
2266 * by that ACE are removed from the working_mode.  This removal
2267 * facilitates two things.  The first is that when the working mode is
2268 * empty (= 0), we know we've looked at all the AoI. The second is
2269 * that the ACE interpretation rules don't allow a later ACE to undo
2270 * something granted or denied by an earlier ACE.  Removing the
2271 * discovered access or denial enforces this rule.  At the end of
2272 * processing the ACEs, all AoI that were found to be denied are
2273 * placed into the working_mode, giving the caller a mask of denied
2274 * accesses.  Returns:
2275 *	0		if all AoI granted
2276 *	EACCESS 	if the denied mask is non-zero
2277 *	other error	if abnormal failure (e.g., IO error)
2278 *
2279 * A secondary usage of the function is to determine if any of the
2280 * AoI are granted.  If an ACE grants any access in
2281 * the working_mode, we immediately short circuit out of the function.
2282 * This mode is chosen by setting anyaccess to B_TRUE.  The
2283 * working_mode is not a denied access mask upon exit if the function
2284 * is used in this manner.
2285 */
2286static int
2287zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
2288    boolean_t anyaccess, cred_t *cr)
2289{
2290	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
2291	zfs_acl_t	*aclp;
2292	int		error;
2293	uid_t		uid = crgetuid(cr);
2294	uint64_t 	who;
2295	uint16_t	type, iflags;
2296	uint16_t	entry_type;
2297	uint32_t	access_mask;
2298	uint32_t	deny_mask = 0;
2299	zfs_ace_hdr_t	*acep = NULL;
2300	boolean_t	checkit;
2301	uid_t		fowner;
2302	uid_t		gowner;
2303
2304	zfs_fuid_map_ids(zp, cr, &fowner, &gowner);
2305
2306	mutex_enter(&zp->z_acl_lock);
2307
2308	error = zfs_acl_node_read(zp, &aclp, B_FALSE);
2309	if (error != 0) {
2310		mutex_exit(&zp->z_acl_lock);
2311		return (error);
2312	}
2313
2314	while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
2315	    &iflags, &type)) {
2316		uint32_t mask_matched;
2317
2318		if (!zfs_acl_valid_ace_type(type, iflags))
2319			continue;
2320
2321		if (ZTOV(zp)->v_type == VDIR && (iflags & ACE_INHERIT_ONLY_ACE))
2322			continue;
2323
2324		/* Skip ACE if it does not affect any AoI */
2325		mask_matched = (access_mask & *working_mode);
2326		if (!mask_matched)
2327			continue;
2328
2329		entry_type = (iflags & ACE_TYPE_FLAGS);
2330
2331		checkit = B_FALSE;
2332
2333		switch (entry_type) {
2334		case ACE_OWNER:
2335			if (uid == fowner)
2336				checkit = B_TRUE;
2337			break;
2338		case OWNING_GROUP:
2339			who = gowner;
2340			/*FALLTHROUGH*/
2341		case ACE_IDENTIFIER_GROUP:
2342			checkit = zfs_groupmember(zfsvfs, who, cr);
2343			break;
2344		case ACE_EVERYONE:
2345			checkit = B_TRUE;
2346			break;
2347
2348		/* USER Entry */
2349		default:
2350			if (entry_type == 0) {
2351				uid_t newid;
2352
2353				newid = zfs_fuid_map_id(zfsvfs, who, cr,
2354				    ZFS_ACE_USER);
2355				if (newid != IDMAP_WK_CREATOR_OWNER_UID &&
2356				    uid == newid)
2357					checkit = B_TRUE;
2358				break;
2359			} else {
2360				mutex_exit(&zp->z_acl_lock);
2361				return (EIO);
2362			}
2363		}
2364
2365		if (checkit) {
2366			if (type == DENY) {
2367				DTRACE_PROBE3(zfs__ace__denies,
2368				    znode_t *, zp,
2369				    zfs_ace_hdr_t *, acep,
2370				    uint32_t, mask_matched);
2371				deny_mask |= mask_matched;
2372			} else {
2373				DTRACE_PROBE3(zfs__ace__allows,
2374				    znode_t *, zp,
2375				    zfs_ace_hdr_t *, acep,
2376				    uint32_t, mask_matched);
2377				if (anyaccess) {
2378					mutex_exit(&zp->z_acl_lock);
2379					return (0);
2380				}
2381			}
2382			*working_mode &= ~mask_matched;
2383		}
2384
2385		/* Are we done? */
2386		if (*working_mode == 0)
2387			break;
2388	}
2389
2390	mutex_exit(&zp->z_acl_lock);
2391
2392	/* Put the found 'denies' back on the working mode */
2393	if (deny_mask) {
2394		*working_mode |= deny_mask;
2395		return (EACCES);
2396	} else if (*working_mode) {
2397		return (-1);
2398	}
2399
2400	return (0);
2401}
2402
2403/*
2404 * Return true if any access whatsoever granted, we don't actually
2405 * care what access is granted.
2406 */
2407boolean_t
2408zfs_has_access(znode_t *zp, cred_t *cr)
2409{
2410	uint32_t have = ACE_ALL_PERMS;
2411
2412	if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) {
2413		uid_t		owner;
2414
2415		owner = zfs_fuid_map_id(zp->z_zfsvfs,
2416		    zp->z_phys->zp_uid, cr, ZFS_OWNER);
2417
2418		return (
2419		    secpolicy_vnode_access(cr, ZTOV(zp), owner, VREAD) == 0 ||
2420		    secpolicy_vnode_access(cr, ZTOV(zp), owner, VWRITE) == 0 ||
2421		    secpolicy_vnode_access(cr, ZTOV(zp), owner, VEXEC) == 0 ||
2422		    secpolicy_vnode_chown(cr, owner) == 0 ||
2423		    secpolicy_vnode_setdac(cr, owner) == 0 ||
2424		    secpolicy_vnode_remove(cr) == 0);
2425	}
2426	return (B_TRUE);
2427}
2428
2429static int
2430zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
2431    boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
2432{
2433	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2434	int err;
2435
2436	*working_mode = v4_mode;
2437	*check_privs = B_TRUE;
2438
2439	/*
2440	 * Short circuit empty requests
2441	 */
2442	if (v4_mode == 0 || zfsvfs->z_replay) {
2443		*working_mode = 0;
2444		return (0);
2445	}
2446
2447	if ((err = zfs_zaccess_dataset_check(zp, v4_mode)) != 0) {
2448		*check_privs = B_FALSE;
2449		return (err);
2450	}
2451
2452	/*
2453	 * The caller requested that the ACL check be skipped.  This
2454	 * would only happen if the caller checked VOP_ACCESS() with a
2455	 * 32 bit ACE mask and already had the appropriate permissions.
2456	 */
2457	if (skipaclchk) {
2458		*working_mode = 0;
2459		return (0);
2460	}
2461
2462	return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr));
2463}
2464
2465static int
2466zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
2467    cred_t *cr)
2468{
2469	if (*working_mode != ACE_WRITE_DATA)
2470		return (EACCES);
2471
2472	return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode,
2473	    check_privs, B_FALSE, cr));
2474}
2475
2476int
2477zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
2478{
2479	boolean_t owner = B_FALSE;
2480	boolean_t groupmbr = B_FALSE;
2481	boolean_t is_attr;
2482	uid_t fowner;
2483	uid_t gowner;
2484	uid_t uid = crgetuid(cr);
2485	int error;
2486
2487	if (zdp->z_phys->zp_flags & ZFS_AV_QUARANTINED)
2488		return (EACCES);
2489
2490	is_attr = ((zdp->z_phys->zp_flags & ZFS_XATTR) &&
2491	    (ZTOV(zdp)->v_type == VDIR));
2492	if (is_attr)
2493		goto slow;
2494
2495	mutex_enter(&zdp->z_acl_lock);
2496
2497	if (zdp->z_phys->zp_flags & ZFS_NO_EXECS_DENIED) {
2498		mutex_exit(&zdp->z_acl_lock);
2499		return (0);
2500	}
2501
2502	if (FUID_INDEX(zdp->z_phys->zp_uid) != 0 ||
2503	    FUID_INDEX(zdp->z_phys->zp_gid) != 0) {
2504		mutex_exit(&zdp->z_acl_lock);
2505		goto slow;
2506	}
2507
2508	fowner = (uid_t)zdp->z_phys->zp_uid;
2509	gowner = (uid_t)zdp->z_phys->zp_gid;
2510
2511	if (uid == fowner) {
2512		owner = B_TRUE;
2513		if (zdp->z_phys->zp_mode & S_IXUSR) {
2514			mutex_exit(&zdp->z_acl_lock);
2515			return (0);
2516		} else {
2517			mutex_exit(&zdp->z_acl_lock);
2518			goto slow;
2519		}
2520	}
2521	if (groupmember(gowner, cr)) {
2522		groupmbr = B_TRUE;
2523		if (zdp->z_phys->zp_mode & S_IXGRP) {
2524			mutex_exit(&zdp->z_acl_lock);
2525			return (0);
2526		} else {
2527			mutex_exit(&zdp->z_acl_lock);
2528			goto slow;
2529		}
2530	}
2531	if (!owner && !groupmbr) {
2532		if (zdp->z_phys->zp_mode & S_IXOTH) {
2533			mutex_exit(&zdp->z_acl_lock);
2534			return (0);
2535		}
2536	}
2537
2538	mutex_exit(&zdp->z_acl_lock);
2539
2540slow:
2541	DTRACE_PROBE(zfs__fastpath__execute__access__miss);
2542	ZFS_ENTER(zdp->z_zfsvfs);
2543	error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr);
2544	ZFS_EXIT(zdp->z_zfsvfs);
2545	return (error);
2546}
2547
2548/*
2549 * Determine whether Access should be granted/denied, invoking least
2550 * priv subsytem when a deny is determined.
2551 */
2552int
2553zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
2554{
2555	uint32_t	working_mode;
2556	int		error;
2557	int		is_attr;
2558	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
2559	boolean_t 	check_privs;
2560	znode_t		*xzp;
2561	znode_t 	*check_zp = zp;
2562
2563	is_attr = ((zp->z_phys->zp_flags & ZFS_XATTR) &&
2564	    (ZTOV(zp)->v_type == VDIR));
2565
2566	/*
2567	 * If attribute then validate against base file
2568	 */
2569	if (is_attr) {
2570		if ((error = zfs_zget(zp->z_zfsvfs,
2571		    zp->z_phys->zp_parent, &xzp)) != 0)	{
2572			return (error);
2573		}
2574
2575		check_zp = xzp;
2576
2577		/*
2578		 * fixup mode to map to xattr perms
2579		 */
2580
2581		if (mode & (ACE_WRITE_DATA|ACE_APPEND_DATA)) {
2582			mode &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
2583			mode |= ACE_WRITE_NAMED_ATTRS;
2584		}
2585
2586		if (mode & (ACE_READ_DATA|ACE_EXECUTE)) {
2587			mode &= ~(ACE_READ_DATA|ACE_EXECUTE);
2588			mode |= ACE_READ_NAMED_ATTRS;
2589		}
2590	}
2591
2592	if ((error = zfs_zaccess_common(check_zp, mode, &working_mode,
2593	    &check_privs, skipaclchk, cr)) == 0) {
2594		if (is_attr)
2595			VN_RELE(ZTOV(xzp));
2596		return (0);
2597	}
2598
2599	if (error && !check_privs) {
2600		if (is_attr)
2601			VN_RELE(ZTOV(xzp));
2602		return (error);
2603	}
2604
2605	if (error && (flags & V_APPEND)) {
2606		error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr);
2607	}
2608
2609	if (error && check_privs) {
2610		uid_t		owner;
2611		mode_t		checkmode = 0;
2612
2613		owner = zfs_fuid_map_id(zfsvfs, check_zp->z_phys->zp_uid, cr,
2614		    ZFS_OWNER);
2615
2616		/*
2617		 * First check for implicit owner permission on
2618		 * read_acl/read_attributes
2619		 */
2620
2621		error = 0;
2622		ASSERT(working_mode != 0);
2623
2624		if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) &&
2625		    owner == crgetuid(cr)))
2626			working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
2627
2628		if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
2629		    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
2630			checkmode |= VREAD;
2631		if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
2632		    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
2633			checkmode |= VWRITE;
2634		if (working_mode & ACE_EXECUTE)
2635			checkmode |= VEXEC;
2636
2637		if (checkmode)
2638			error = secpolicy_vnode_access(cr, ZTOV(check_zp),
2639			    owner, checkmode);
2640
2641		if (error == 0 && (working_mode & ACE_WRITE_OWNER))
2642			error = secpolicy_vnode_chown(cr, owner);
2643		if (error == 0 && (working_mode & ACE_WRITE_ACL))
2644			error = secpolicy_vnode_setdac(cr, owner);
2645
2646		if (error == 0 && (working_mode &
2647		    (ACE_DELETE|ACE_DELETE_CHILD)))
2648			error = secpolicy_vnode_remove(cr);
2649
2650		if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) {
2651			error = secpolicy_vnode_chown(cr, owner);
2652		}
2653		if (error == 0) {
2654			/*
2655			 * See if any bits other than those already checked
2656			 * for are still present.  If so then return EACCES
2657			 */
2658			if (working_mode & ~(ZFS_CHECKED_MASKS)) {
2659				error = EACCES;
2660			}
2661		}
2662	}
2663
2664	if (is_attr)
2665		VN_RELE(ZTOV(xzp));
2666
2667	return (error);
2668}
2669
2670/*
2671 * Translate traditional unix VREAD/VWRITE/VEXEC mode into
2672 * native ACL format and call zfs_zaccess()
2673 */
2674int
2675zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr)
2676{
2677	return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr));
2678}
2679
2680/*
2681 * Access function for secpolicy_vnode_setattr
2682 */
2683int
2684zfs_zaccess_unix(znode_t *zp, mode_t mode, cred_t *cr)
2685{
2686	int v4_mode = zfs_unix_to_v4(mode >> 6);
2687
2688	return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr));
2689}
2690
2691static int
2692zfs_delete_final_check(znode_t *zp, znode_t *dzp,
2693    mode_t missing_perms, cred_t *cr)
2694{
2695	int error;
2696	uid_t downer;
2697	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2698
2699	downer = zfs_fuid_map_id(zfsvfs, dzp->z_phys->zp_uid, cr, ZFS_OWNER);
2700
2701	error = secpolicy_vnode_access(cr, ZTOV(dzp), downer, missing_perms);
2702
2703	if (error == 0)
2704		error = zfs_sticky_remove_access(dzp, zp, cr);
2705
2706	return (error);
2707}
2708
2709/*
2710 * Determine whether Access should be granted/deny, without
2711 * consulting least priv subsystem.
2712 *
2713 *
2714 * The following chart is the recommended NFSv4 enforcement for
2715 * ability to delete an object.
2716 *
2717 *      -------------------------------------------------------
2718 *      |   Parent Dir  |           Target Object Permissions |
2719 *      |  permissions  |                                     |
2720 *      -------------------------------------------------------
2721 *      |               | ACL Allows | ACL Denies| Delete     |
2722 *      |               |  Delete    |  Delete   | unspecified|
2723 *      -------------------------------------------------------
2724 *      |  ACL Allows   | Permit     | Permit    | Permit     |
2725 *      |  DELETE_CHILD |                                     |
2726 *      -------------------------------------------------------
2727 *      |  ACL Denies   | Permit     | Deny      | Deny       |
2728 *      |  DELETE_CHILD |            |           |            |
2729 *      -------------------------------------------------------
2730 *      | ACL specifies |            |           |            |
2731 *      | only allow    | Permit     | Permit    | Permit     |
2732 *      | write and     |            |           |            |
2733 *      | execute       |            |           |            |
2734 *      -------------------------------------------------------
2735 *      | ACL denies    |            |           |            |
2736 *      | write and     | Permit     | Deny      | Deny       |
2737 *      | execute       |            |           |            |
2738 *      -------------------------------------------------------
2739 *         ^
2740 *         |
2741 *         No search privilege, can't even look up file?
2742 *
2743 */
2744int
2745zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
2746{
2747	uint32_t dzp_working_mode = 0;
2748	uint32_t zp_working_mode = 0;
2749	int dzp_error, zp_error;
2750	mode_t missing_perms;
2751	boolean_t dzpcheck_privs = B_TRUE;
2752	boolean_t zpcheck_privs = B_TRUE;
2753
2754	/*
2755	 * We want specific DELETE permissions to
2756	 * take precedence over WRITE/EXECUTE.  We don't
2757	 * want an ACL such as this to mess us up.
2758	 * user:joe:write_data:deny,user:joe:delete:allow
2759	 *
2760	 * However, deny permissions may ultimately be overridden
2761	 * by secpolicy_vnode_access().
2762	 *
2763	 * We will ask for all of the necessary permissions and then
2764	 * look at the working modes from the directory and target object
2765	 * to determine what was found.
2766	 */
2767
2768	if (zp->z_phys->zp_flags & (ZFS_IMMUTABLE | ZFS_NOUNLINK))
2769		return (EPERM);
2770
2771	/*
2772	 * First row
2773	 * If the directory permissions allow the delete, we are done.
2774	 */
2775	if ((dzp_error = zfs_zaccess_common(dzp, ACE_DELETE_CHILD,
2776	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr)) == 0)
2777		return (0);
2778
2779	/*
2780	 * If target object has delete permission then we are done
2781	 */
2782	if ((zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode,
2783	    &zpcheck_privs, B_FALSE, cr)) == 0)
2784		return (0);
2785
2786	ASSERT(dzp_error && zp_error);
2787
2788	if (!dzpcheck_privs)
2789		return (dzp_error);
2790	if (!zpcheck_privs)
2791		return (zp_error);
2792
2793	/*
2794	 * Second row
2795	 *
2796	 * If directory returns EACCES then delete_child was denied
2797	 * due to deny delete_child.  In this case send the request through
2798	 * secpolicy_vnode_remove().  We don't use zfs_delete_final_check()
2799	 * since that *could* allow the delete based on write/execute permission
2800	 * and we want delete permissions to override write/execute.
2801	 */
2802
2803	if (dzp_error == EACCES)
2804		return (secpolicy_vnode_remove(cr));
2805
2806	/*
2807	 * Third Row
2808	 * only need to see if we have write/execute on directory.
2809	 */
2810
2811	if ((dzp_error = zfs_zaccess_common(dzp, ACE_EXECUTE|ACE_WRITE_DATA,
2812	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr)) == 0)
2813		return (zfs_sticky_remove_access(dzp, zp, cr));
2814
2815	if (!dzpcheck_privs)
2816		return (dzp_error);
2817
2818	/*
2819	 * Fourth row
2820	 */
2821
2822	missing_perms = (dzp_working_mode & ACE_WRITE_DATA) ? VWRITE : 0;
2823	missing_perms |= (dzp_working_mode & ACE_EXECUTE) ? VEXEC : 0;
2824
2825	ASSERT(missing_perms);
2826
2827	return (zfs_delete_final_check(zp, dzp, missing_perms, cr));
2828
2829}
2830
2831int
2832zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
2833    znode_t *tzp, cred_t *cr)
2834{
2835	int add_perm;
2836	int error;
2837
2838	if (szp->z_phys->zp_flags & ZFS_AV_QUARANTINED)
2839		return (EACCES);
2840
2841	add_perm = (ZTOV(szp)->v_type == VDIR) ?
2842	    ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE;
2843
2844	/*
2845	 * Rename permissions are combination of delete permission +
2846	 * add file/subdir permission.
2847	 */
2848
2849	/*
2850	 * first make sure we do the delete portion.
2851	 *
2852	 * If that succeeds then check for add_file/add_subdir permissions
2853	 */
2854
2855	if (error = zfs_zaccess_delete(sdzp, szp, cr))
2856		return (error);
2857
2858	/*
2859	 * If we have a tzp, see if we can delete it?
2860	 */
2861	if (tzp) {
2862		if (error = zfs_zaccess_delete(tdzp, tzp, cr))
2863			return (error);
2864	}
2865
2866	/*
2867	 * Now check for add permissions
2868	 */
2869	error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr);
2870
2871	return (error);
2872}
2873