1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2002,2003 by Andreas Gruenbacher <a.gruenbacher@computer.org>
4 *
5 * Fixes from William Schumacher incorporated on 15 March 2001.
6 *    (Reported by Charles Bertsch, <CBertsch@microtest.com>).
7 */
8
9/*
10 *  This file contains generic functions for manipulating
11 *  POSIX 1003.1e draft standard 17 ACLs.
12 */
13
14#include <linux/kernel.h>
15#include <linux/slab.h>
16#include <linux/atomic.h>
17#include <linux/fs.h>
18#include <linux/sched.h>
19#include <linux/cred.h>
20#include <linux/posix_acl.h>
21#include <linux/posix_acl_xattr.h>
22#include <linux/xattr.h>
23#include <linux/export.h>
24#include <linux/user_namespace.h>
25#include <linux/namei.h>
26#include <linux/mnt_idmapping.h>
27#include <linux/iversion.h>
28#include <linux/security.h>
29#include <linux/fsnotify.h>
30#include <linux/filelock.h>
31
32#include "internal.h"
33
34static struct posix_acl **acl_by_type(struct inode *inode, int type)
35{
36	switch (type) {
37	case ACL_TYPE_ACCESS:
38		return &inode->i_acl;
39	case ACL_TYPE_DEFAULT:
40		return &inode->i_default_acl;
41	default:
42		BUG();
43	}
44}
45
46struct posix_acl *get_cached_acl(struct inode *inode, int type)
47{
48	struct posix_acl **p = acl_by_type(inode, type);
49	struct posix_acl *acl;
50
51	for (;;) {
52		rcu_read_lock();
53		acl = rcu_dereference(*p);
54		if (!acl || is_uncached_acl(acl) ||
55		    refcount_inc_not_zero(&acl->a_refcount))
56			break;
57		rcu_read_unlock();
58		cpu_relax();
59	}
60	rcu_read_unlock();
61	return acl;
62}
63EXPORT_SYMBOL(get_cached_acl);
64
65struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type)
66{
67	struct posix_acl *acl = rcu_dereference(*acl_by_type(inode, type));
68
69	if (acl == ACL_DONT_CACHE) {
70		struct posix_acl *ret;
71
72		ret = inode->i_op->get_inode_acl(inode, type, LOOKUP_RCU);
73		if (!IS_ERR(ret))
74			acl = ret;
75	}
76
77	return acl;
78}
79EXPORT_SYMBOL(get_cached_acl_rcu);
80
81void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl)
82{
83	struct posix_acl **p = acl_by_type(inode, type);
84	struct posix_acl *old;
85
86	old = xchg(p, posix_acl_dup(acl));
87	if (!is_uncached_acl(old))
88		posix_acl_release(old);
89}
90EXPORT_SYMBOL(set_cached_acl);
91
92static void __forget_cached_acl(struct posix_acl **p)
93{
94	struct posix_acl *old;
95
96	old = xchg(p, ACL_NOT_CACHED);
97	if (!is_uncached_acl(old))
98		posix_acl_release(old);
99}
100
101void forget_cached_acl(struct inode *inode, int type)
102{
103	__forget_cached_acl(acl_by_type(inode, type));
104}
105EXPORT_SYMBOL(forget_cached_acl);
106
107void forget_all_cached_acls(struct inode *inode)
108{
109	__forget_cached_acl(&inode->i_acl);
110	__forget_cached_acl(&inode->i_default_acl);
111}
112EXPORT_SYMBOL(forget_all_cached_acls);
113
114static struct posix_acl *__get_acl(struct mnt_idmap *idmap,
115				   struct dentry *dentry, struct inode *inode,
116				   int type)
117{
118	struct posix_acl *sentinel;
119	struct posix_acl **p;
120	struct posix_acl *acl;
121
122	/*
123	 * The sentinel is used to detect when another operation like
124	 * set_cached_acl() or forget_cached_acl() races with get_inode_acl().
125	 * It is guaranteed that is_uncached_acl(sentinel) is true.
126	 */
127
128	acl = get_cached_acl(inode, type);
129	if (!is_uncached_acl(acl))
130		return acl;
131
132	if (!IS_POSIXACL(inode))
133		return NULL;
134
135	sentinel = uncached_acl_sentinel(current);
136	p = acl_by_type(inode, type);
137
138	/*
139	 * If the ACL isn't being read yet, set our sentinel.  Otherwise, the
140	 * current value of the ACL will not be ACL_NOT_CACHED and so our own
141	 * sentinel will not be set; another task will update the cache.  We
142	 * could wait for that other task to complete its job, but it's easier
143	 * to just call ->get_inode_acl to fetch the ACL ourself.  (This is
144	 * going to be an unlikely race.)
145	 */
146	cmpxchg(p, ACL_NOT_CACHED, sentinel);
147
148	/*
149	 * Normally, the ACL returned by ->get{_inode}_acl will be cached.
150	 * A filesystem can prevent that by calling
151	 * forget_cached_acl(inode, type) in ->get{_inode}_acl.
152	 *
153	 * If the filesystem doesn't have a get{_inode}_ acl() function at all,
154	 * we'll just create the negative cache entry.
155	 */
156	if (dentry && inode->i_op->get_acl) {
157		acl = inode->i_op->get_acl(idmap, dentry, type);
158	} else if (inode->i_op->get_inode_acl) {
159		acl = inode->i_op->get_inode_acl(inode, type, false);
160	} else {
161		set_cached_acl(inode, type, NULL);
162		return NULL;
163	}
164	if (IS_ERR(acl)) {
165		/*
166		 * Remove our sentinel so that we don't block future attempts
167		 * to cache the ACL.
168		 */
169		cmpxchg(p, sentinel, ACL_NOT_CACHED);
170		return acl;
171	}
172
173	/*
174	 * Cache the result, but only if our sentinel is still in place.
175	 */
176	posix_acl_dup(acl);
177	if (unlikely(!try_cmpxchg(p, &sentinel, acl)))
178		posix_acl_release(acl);
179	return acl;
180}
181
182struct posix_acl *get_inode_acl(struct inode *inode, int type)
183{
184	return __get_acl(&nop_mnt_idmap, NULL, inode, type);
185}
186EXPORT_SYMBOL(get_inode_acl);
187
188/*
189 * Init a fresh posix_acl
190 */
191void
192posix_acl_init(struct posix_acl *acl, int count)
193{
194	refcount_set(&acl->a_refcount, 1);
195	acl->a_count = count;
196}
197EXPORT_SYMBOL(posix_acl_init);
198
199/*
200 * Allocate a new ACL with the specified number of entries.
201 */
202struct posix_acl *
203posix_acl_alloc(int count, gfp_t flags)
204{
205	const size_t size = sizeof(struct posix_acl) +
206	                    count * sizeof(struct posix_acl_entry);
207	struct posix_acl *acl = kmalloc(size, flags);
208	if (acl)
209		posix_acl_init(acl, count);
210	return acl;
211}
212EXPORT_SYMBOL(posix_acl_alloc);
213
214/*
215 * Clone an ACL.
216 */
217struct posix_acl *
218posix_acl_clone(const struct posix_acl *acl, gfp_t flags)
219{
220	struct posix_acl *clone = NULL;
221
222	if (acl) {
223		int size = sizeof(struct posix_acl) + acl->a_count *
224		           sizeof(struct posix_acl_entry);
225		clone = kmemdup(acl, size, flags);
226		if (clone)
227			refcount_set(&clone->a_refcount, 1);
228	}
229	return clone;
230}
231EXPORT_SYMBOL_GPL(posix_acl_clone);
232
233/*
234 * Check if an acl is valid. Returns 0 if it is, or -E... otherwise.
235 */
236int
237posix_acl_valid(struct user_namespace *user_ns, const struct posix_acl *acl)
238{
239	const struct posix_acl_entry *pa, *pe;
240	int state = ACL_USER_OBJ;
241	int needs_mask = 0;
242
243	FOREACH_ACL_ENTRY(pa, acl, pe) {
244		if (pa->e_perm & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
245			return -EINVAL;
246		switch (pa->e_tag) {
247			case ACL_USER_OBJ:
248				if (state == ACL_USER_OBJ) {
249					state = ACL_USER;
250					break;
251				}
252				return -EINVAL;
253
254			case ACL_USER:
255				if (state != ACL_USER)
256					return -EINVAL;
257				if (!kuid_has_mapping(user_ns, pa->e_uid))
258					return -EINVAL;
259				needs_mask = 1;
260				break;
261
262			case ACL_GROUP_OBJ:
263				if (state == ACL_USER) {
264					state = ACL_GROUP;
265					break;
266				}
267				return -EINVAL;
268
269			case ACL_GROUP:
270				if (state != ACL_GROUP)
271					return -EINVAL;
272				if (!kgid_has_mapping(user_ns, pa->e_gid))
273					return -EINVAL;
274				needs_mask = 1;
275				break;
276
277			case ACL_MASK:
278				if (state != ACL_GROUP)
279					return -EINVAL;
280				state = ACL_OTHER;
281				break;
282
283			case ACL_OTHER:
284				if (state == ACL_OTHER ||
285				    (state == ACL_GROUP && !needs_mask)) {
286					state = 0;
287					break;
288				}
289				return -EINVAL;
290
291			default:
292				return -EINVAL;
293		}
294	}
295	if (state == 0)
296		return 0;
297	return -EINVAL;
298}
299EXPORT_SYMBOL(posix_acl_valid);
300
301/*
302 * Returns 0 if the acl can be exactly represented in the traditional
303 * file mode permission bits, or else 1. Returns -E... on error.
304 */
305int
306posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p)
307{
308	const struct posix_acl_entry *pa, *pe;
309	umode_t mode = 0;
310	int not_equiv = 0;
311
312	/*
313	 * A null ACL can always be presented as mode bits.
314	 */
315	if (!acl)
316		return 0;
317
318	FOREACH_ACL_ENTRY(pa, acl, pe) {
319		switch (pa->e_tag) {
320			case ACL_USER_OBJ:
321				mode |= (pa->e_perm & S_IRWXO) << 6;
322				break;
323			case ACL_GROUP_OBJ:
324				mode |= (pa->e_perm & S_IRWXO) << 3;
325				break;
326			case ACL_OTHER:
327				mode |= pa->e_perm & S_IRWXO;
328				break;
329			case ACL_MASK:
330				mode = (mode & ~S_IRWXG) |
331				       ((pa->e_perm & S_IRWXO) << 3);
332				not_equiv = 1;
333				break;
334			case ACL_USER:
335			case ACL_GROUP:
336				not_equiv = 1;
337				break;
338			default:
339				return -EINVAL;
340		}
341	}
342        if (mode_p)
343                *mode_p = (*mode_p & ~S_IRWXUGO) | mode;
344        return not_equiv;
345}
346EXPORT_SYMBOL(posix_acl_equiv_mode);
347
348/*
349 * Create an ACL representing the file mode permission bits of an inode.
350 */
351struct posix_acl *
352posix_acl_from_mode(umode_t mode, gfp_t flags)
353{
354	struct posix_acl *acl = posix_acl_alloc(3, flags);
355	if (!acl)
356		return ERR_PTR(-ENOMEM);
357
358	acl->a_entries[0].e_tag  = ACL_USER_OBJ;
359	acl->a_entries[0].e_perm = (mode & S_IRWXU) >> 6;
360
361	acl->a_entries[1].e_tag  = ACL_GROUP_OBJ;
362	acl->a_entries[1].e_perm = (mode & S_IRWXG) >> 3;
363
364	acl->a_entries[2].e_tag  = ACL_OTHER;
365	acl->a_entries[2].e_perm = (mode & S_IRWXO);
366	return acl;
367}
368EXPORT_SYMBOL(posix_acl_from_mode);
369
370/*
371 * Return 0 if current is granted want access to the inode
372 * by the acl. Returns -E... otherwise.
373 */
374int
375posix_acl_permission(struct mnt_idmap *idmap, struct inode *inode,
376		     const struct posix_acl *acl, int want)
377{
378	const struct posix_acl_entry *pa, *pe, *mask_obj;
379	struct user_namespace *fs_userns = i_user_ns(inode);
380	int found = 0;
381	vfsuid_t vfsuid;
382	vfsgid_t vfsgid;
383
384	want &= MAY_READ | MAY_WRITE | MAY_EXEC;
385
386	FOREACH_ACL_ENTRY(pa, acl, pe) {
387                switch(pa->e_tag) {
388                        case ACL_USER_OBJ:
389				/* (May have been checked already) */
390				vfsuid = i_uid_into_vfsuid(idmap, inode);
391				if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
392                                        goto check_perm;
393                                break;
394                        case ACL_USER:
395				vfsuid = make_vfsuid(idmap, fs_userns,
396						     pa->e_uid);
397				if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
398                                        goto mask;
399				break;
400                        case ACL_GROUP_OBJ:
401				vfsgid = i_gid_into_vfsgid(idmap, inode);
402				if (vfsgid_in_group_p(vfsgid)) {
403					found = 1;
404					if ((pa->e_perm & want) == want)
405						goto mask;
406                                }
407				break;
408                        case ACL_GROUP:
409				vfsgid = make_vfsgid(idmap, fs_userns,
410						     pa->e_gid);
411				if (vfsgid_in_group_p(vfsgid)) {
412					found = 1;
413					if ((pa->e_perm & want) == want)
414						goto mask;
415                                }
416                                break;
417                        case ACL_MASK:
418                                break;
419                        case ACL_OTHER:
420				if (found)
421					return -EACCES;
422				else
423					goto check_perm;
424			default:
425				return -EIO;
426                }
427        }
428	return -EIO;
429
430mask:
431	for (mask_obj = pa+1; mask_obj != pe; mask_obj++) {
432		if (mask_obj->e_tag == ACL_MASK) {
433			if ((pa->e_perm & mask_obj->e_perm & want) == want)
434				return 0;
435			return -EACCES;
436		}
437	}
438
439check_perm:
440	if ((pa->e_perm & want) == want)
441		return 0;
442	return -EACCES;
443}
444
445/*
446 * Modify acl when creating a new inode. The caller must ensure the acl is
447 * only referenced once.
448 *
449 * mode_p initially must contain the mode parameter to the open() / creat()
450 * system calls. All permissions that are not granted by the acl are removed.
451 * The permissions in the acl are changed to reflect the mode_p parameter.
452 */
453static int posix_acl_create_masq(struct posix_acl *acl, umode_t *mode_p)
454{
455	struct posix_acl_entry *pa, *pe;
456	struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL;
457	umode_t mode = *mode_p;
458	int not_equiv = 0;
459
460	/* assert(atomic_read(acl->a_refcount) == 1); */
461
462	FOREACH_ACL_ENTRY(pa, acl, pe) {
463                switch(pa->e_tag) {
464                        case ACL_USER_OBJ:
465				pa->e_perm &= (mode >> 6) | ~S_IRWXO;
466				mode &= (pa->e_perm << 6) | ~S_IRWXU;
467				break;
468
469			case ACL_USER:
470			case ACL_GROUP:
471				not_equiv = 1;
472				break;
473
474                        case ACL_GROUP_OBJ:
475				group_obj = pa;
476                                break;
477
478                        case ACL_OTHER:
479				pa->e_perm &= mode | ~S_IRWXO;
480				mode &= pa->e_perm | ~S_IRWXO;
481                                break;
482
483                        case ACL_MASK:
484				mask_obj = pa;
485				not_equiv = 1;
486                                break;
487
488			default:
489				return -EIO;
490                }
491        }
492
493	if (mask_obj) {
494		mask_obj->e_perm &= (mode >> 3) | ~S_IRWXO;
495		mode &= (mask_obj->e_perm << 3) | ~S_IRWXG;
496	} else {
497		if (!group_obj)
498			return -EIO;
499		group_obj->e_perm &= (mode >> 3) | ~S_IRWXO;
500		mode &= (group_obj->e_perm << 3) | ~S_IRWXG;
501	}
502
503	*mode_p = (*mode_p & ~S_IRWXUGO) | mode;
504        return not_equiv;
505}
506
507/*
508 * Modify the ACL for the chmod syscall.
509 */
510static int __posix_acl_chmod_masq(struct posix_acl *acl, umode_t mode)
511{
512	struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL;
513	struct posix_acl_entry *pa, *pe;
514
515	/* assert(atomic_read(acl->a_refcount) == 1); */
516
517	FOREACH_ACL_ENTRY(pa, acl, pe) {
518		switch(pa->e_tag) {
519			case ACL_USER_OBJ:
520				pa->e_perm = (mode & S_IRWXU) >> 6;
521				break;
522
523			case ACL_USER:
524			case ACL_GROUP:
525				break;
526
527			case ACL_GROUP_OBJ:
528				group_obj = pa;
529				break;
530
531			case ACL_MASK:
532				mask_obj = pa;
533				break;
534
535			case ACL_OTHER:
536				pa->e_perm = (mode & S_IRWXO);
537				break;
538
539			default:
540				return -EIO;
541		}
542	}
543
544	if (mask_obj) {
545		mask_obj->e_perm = (mode & S_IRWXG) >> 3;
546	} else {
547		if (!group_obj)
548			return -EIO;
549		group_obj->e_perm = (mode & S_IRWXG) >> 3;
550	}
551
552	return 0;
553}
554
555int
556__posix_acl_create(struct posix_acl **acl, gfp_t gfp, umode_t *mode_p)
557{
558	struct posix_acl *clone = posix_acl_clone(*acl, gfp);
559	int err = -ENOMEM;
560	if (clone) {
561		err = posix_acl_create_masq(clone, mode_p);
562		if (err < 0) {
563			posix_acl_release(clone);
564			clone = NULL;
565		}
566	}
567	posix_acl_release(*acl);
568	*acl = clone;
569	return err;
570}
571EXPORT_SYMBOL(__posix_acl_create);
572
573int
574__posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, umode_t mode)
575{
576	struct posix_acl *clone = posix_acl_clone(*acl, gfp);
577	int err = -ENOMEM;
578	if (clone) {
579		err = __posix_acl_chmod_masq(clone, mode);
580		if (err) {
581			posix_acl_release(clone);
582			clone = NULL;
583		}
584	}
585	posix_acl_release(*acl);
586	*acl = clone;
587	return err;
588}
589EXPORT_SYMBOL(__posix_acl_chmod);
590
591/**
592 * posix_acl_chmod - chmod a posix acl
593 *
594 * @idmap:	idmap of the mount @inode was found from
595 * @dentry:	dentry to check permissions on
596 * @mode:	the new mode of @inode
597 *
598 * If the dentry has been found through an idmapped mount the idmap of
599 * the vfsmount must be passed through @idmap. This function will then
600 * take care to map the inode according to @idmap before checking
601 * permissions. On non-idmapped mounts or if permission checking is to be
602 * performed on the raw inode simply pass @nop_mnt_idmap.
603 */
604int
605 posix_acl_chmod(struct mnt_idmap *idmap, struct dentry *dentry,
606		    umode_t mode)
607{
608	struct inode *inode = d_inode(dentry);
609	struct posix_acl *acl;
610	int ret = 0;
611
612	if (!IS_POSIXACL(inode))
613		return 0;
614	if (!inode->i_op->set_acl)
615		return -EOPNOTSUPP;
616
617	acl = get_inode_acl(inode, ACL_TYPE_ACCESS);
618	if (IS_ERR_OR_NULL(acl)) {
619		if (acl == ERR_PTR(-EOPNOTSUPP))
620			return 0;
621		return PTR_ERR(acl);
622	}
623
624	ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode);
625	if (ret)
626		return ret;
627	ret = inode->i_op->set_acl(idmap, dentry, acl, ACL_TYPE_ACCESS);
628	posix_acl_release(acl);
629	return ret;
630}
631EXPORT_SYMBOL(posix_acl_chmod);
632
633int
634posix_acl_create(struct inode *dir, umode_t *mode,
635		struct posix_acl **default_acl, struct posix_acl **acl)
636{
637	struct posix_acl *p;
638	struct posix_acl *clone;
639	int ret;
640
641	*acl = NULL;
642	*default_acl = NULL;
643
644	if (S_ISLNK(*mode) || !IS_POSIXACL(dir))
645		return 0;
646
647	p = get_inode_acl(dir, ACL_TYPE_DEFAULT);
648	if (!p || p == ERR_PTR(-EOPNOTSUPP)) {
649		*mode &= ~current_umask();
650		return 0;
651	}
652	if (IS_ERR(p))
653		return PTR_ERR(p);
654
655	ret = -ENOMEM;
656	clone = posix_acl_clone(p, GFP_NOFS);
657	if (!clone)
658		goto err_release;
659
660	ret = posix_acl_create_masq(clone, mode);
661	if (ret < 0)
662		goto err_release_clone;
663
664	if (ret == 0)
665		posix_acl_release(clone);
666	else
667		*acl = clone;
668
669	if (!S_ISDIR(*mode))
670		posix_acl_release(p);
671	else
672		*default_acl = p;
673
674	return 0;
675
676err_release_clone:
677	posix_acl_release(clone);
678err_release:
679	posix_acl_release(p);
680	return ret;
681}
682EXPORT_SYMBOL_GPL(posix_acl_create);
683
684/**
685 * posix_acl_update_mode  -  update mode in set_acl
686 * @idmap:	idmap of the mount @inode was found from
687 * @inode:	target inode
688 * @mode_p:	mode (pointer) for update
689 * @acl:	acl pointer
690 *
691 * Update the file mode when setting an ACL: compute the new file permission
692 * bits based on the ACL.  In addition, if the ACL is equivalent to the new
693 * file mode, set *@acl to NULL to indicate that no ACL should be set.
694 *
695 * As with chmod, clear the setgid bit if the caller is not in the owning group
696 * or capable of CAP_FSETID (see inode_change_ok).
697 *
698 * If the inode has been found through an idmapped mount the idmap of
699 * the vfsmount must be passed through @idmap. This function will then
700 * take care to map the inode according to @idmap before checking
701 * permissions. On non-idmapped mounts or if permission checking is to be
702 * performed on the raw inode simply pass @nop_mnt_idmap.
703 *
704 * Called from set_acl inode operations.
705 */
706int posix_acl_update_mode(struct mnt_idmap *idmap,
707			  struct inode *inode, umode_t *mode_p,
708			  struct posix_acl **acl)
709{
710	umode_t mode = inode->i_mode;
711	int error;
712
713	error = posix_acl_equiv_mode(*acl, &mode);
714	if (error < 0)
715		return error;
716	if (error == 0)
717		*acl = NULL;
718	if (!vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)) &&
719	    !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
720		mode &= ~S_ISGID;
721	*mode_p = mode;
722	return 0;
723}
724EXPORT_SYMBOL(posix_acl_update_mode);
725
726/*
727 * Fix up the uids and gids in posix acl extended attributes in place.
728 */
729static int posix_acl_fix_xattr_common(const void *value, size_t size)
730{
731	const struct posix_acl_xattr_header *header = value;
732	int count;
733
734	if (!header)
735		return -EINVAL;
736	if (size < sizeof(struct posix_acl_xattr_header))
737		return -EINVAL;
738	if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
739		return -EOPNOTSUPP;
740
741	count = posix_acl_xattr_count(size);
742	if (count < 0)
743		return -EINVAL;
744	if (count == 0)
745		return 0;
746
747	return count;
748}
749
750/**
751 * posix_acl_from_xattr - convert POSIX ACLs from backing store to VFS format
752 * @userns: the filesystem's idmapping
753 * @value: the uapi representation of POSIX ACLs
754 * @size: the size of @void
755 *
756 * Filesystems that store POSIX ACLs in the unaltered uapi format should use
757 * posix_acl_from_xattr() when reading them from the backing store and
758 * converting them into the struct posix_acl VFS format. The helper is
759 * specifically intended to be called from the acl inode operation.
760 *
761 * The posix_acl_from_xattr() function will map the raw {g,u}id values stored
762 * in ACL_{GROUP,USER} entries into idmapping in @userns.
763 *
764 * Note that posix_acl_from_xattr() does not take idmapped mounts into account.
765 * If it did it calling it from the get acl inode operation would return POSIX
766 * ACLs mapped according to an idmapped mount which would mean that the value
767 * couldn't be cached for the filesystem. Idmapped mounts are taken into
768 * account on the fly during permission checking or right at the VFS -
769 * userspace boundary before reporting them to the user.
770 *
771 * Return: Allocated struct posix_acl on success, NULL for a valid header but
772 *         without actual POSIX ACL entries, or ERR_PTR() encoded error code.
773 */
774struct posix_acl *posix_acl_from_xattr(struct user_namespace *userns,
775				       const void *value, size_t size)
776{
777	const struct posix_acl_xattr_header *header = value;
778	const struct posix_acl_xattr_entry *entry = (const void *)(header + 1), *end;
779	int count;
780	struct posix_acl *acl;
781	struct posix_acl_entry *acl_e;
782
783	count = posix_acl_fix_xattr_common(value, size);
784	if (count < 0)
785		return ERR_PTR(count);
786	if (count == 0)
787		return NULL;
788
789	acl = posix_acl_alloc(count, GFP_NOFS);
790	if (!acl)
791		return ERR_PTR(-ENOMEM);
792	acl_e = acl->a_entries;
793
794	for (end = entry + count; entry != end; acl_e++, entry++) {
795		acl_e->e_tag  = le16_to_cpu(entry->e_tag);
796		acl_e->e_perm = le16_to_cpu(entry->e_perm);
797
798		switch(acl_e->e_tag) {
799			case ACL_USER_OBJ:
800			case ACL_GROUP_OBJ:
801			case ACL_MASK:
802			case ACL_OTHER:
803				break;
804
805			case ACL_USER:
806				acl_e->e_uid = make_kuid(userns,
807						le32_to_cpu(entry->e_id));
808				if (!uid_valid(acl_e->e_uid))
809					goto fail;
810				break;
811			case ACL_GROUP:
812				acl_e->e_gid = make_kgid(userns,
813						le32_to_cpu(entry->e_id));
814				if (!gid_valid(acl_e->e_gid))
815					goto fail;
816				break;
817
818			default:
819				goto fail;
820		}
821	}
822	return acl;
823
824fail:
825	posix_acl_release(acl);
826	return ERR_PTR(-EINVAL);
827}
828EXPORT_SYMBOL (posix_acl_from_xattr);
829
830/*
831 * Convert from in-memory to extended attribute representation.
832 */
833int
834posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl,
835		   void *buffer, size_t size)
836{
837	struct posix_acl_xattr_header *ext_acl = buffer;
838	struct posix_acl_xattr_entry *ext_entry;
839	int real_size, n;
840
841	real_size = posix_acl_xattr_size(acl->a_count);
842	if (!buffer)
843		return real_size;
844	if (real_size > size)
845		return -ERANGE;
846
847	ext_entry = (void *)(ext_acl + 1);
848	ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);
849
850	for (n=0; n < acl->a_count; n++, ext_entry++) {
851		const struct posix_acl_entry *acl_e = &acl->a_entries[n];
852		ext_entry->e_tag  = cpu_to_le16(acl_e->e_tag);
853		ext_entry->e_perm = cpu_to_le16(acl_e->e_perm);
854		switch(acl_e->e_tag) {
855		case ACL_USER:
856			ext_entry->e_id =
857				cpu_to_le32(from_kuid(user_ns, acl_e->e_uid));
858			break;
859		case ACL_GROUP:
860			ext_entry->e_id =
861				cpu_to_le32(from_kgid(user_ns, acl_e->e_gid));
862			break;
863		default:
864			ext_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
865			break;
866		}
867	}
868	return real_size;
869}
870EXPORT_SYMBOL (posix_acl_to_xattr);
871
872/**
873 * vfs_posix_acl_to_xattr - convert from kernel to userspace representation
874 * @idmap: idmap of the mount
875 * @inode: inode the posix acls are set on
876 * @acl: the posix acls as represented by the vfs
877 * @buffer: the buffer into which to convert @acl
878 * @size: size of @buffer
879 *
880 * This converts @acl from the VFS representation in the filesystem idmapping
881 * to the uapi form reportable to userspace. And mount and caller idmappings
882 * are handled appropriately.
883 *
884 * Return: On success, the size of the stored uapi posix acls, on error a
885 * negative errno.
886 */
887static ssize_t vfs_posix_acl_to_xattr(struct mnt_idmap *idmap,
888				      struct inode *inode,
889				      const struct posix_acl *acl, void *buffer,
890				      size_t size)
891
892{
893	struct posix_acl_xattr_header *ext_acl = buffer;
894	struct posix_acl_xattr_entry *ext_entry;
895	struct user_namespace *fs_userns, *caller_userns;
896	ssize_t real_size, n;
897	vfsuid_t vfsuid;
898	vfsgid_t vfsgid;
899
900	real_size = posix_acl_xattr_size(acl->a_count);
901	if (!buffer)
902		return real_size;
903	if (real_size > size)
904		return -ERANGE;
905
906	ext_entry = (void *)(ext_acl + 1);
907	ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);
908
909	fs_userns = i_user_ns(inode);
910	caller_userns = current_user_ns();
911	for (n=0; n < acl->a_count; n++, ext_entry++) {
912		const struct posix_acl_entry *acl_e = &acl->a_entries[n];
913		ext_entry->e_tag  = cpu_to_le16(acl_e->e_tag);
914		ext_entry->e_perm = cpu_to_le16(acl_e->e_perm);
915		switch(acl_e->e_tag) {
916		case ACL_USER:
917			vfsuid = make_vfsuid(idmap, fs_userns, acl_e->e_uid);
918			ext_entry->e_id = cpu_to_le32(from_kuid(
919				caller_userns, vfsuid_into_kuid(vfsuid)));
920			break;
921		case ACL_GROUP:
922			vfsgid = make_vfsgid(idmap, fs_userns, acl_e->e_gid);
923			ext_entry->e_id = cpu_to_le32(from_kgid(
924				caller_userns, vfsgid_into_kgid(vfsgid)));
925			break;
926		default:
927			ext_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
928			break;
929		}
930	}
931	return real_size;
932}
933
934int
935set_posix_acl(struct mnt_idmap *idmap, struct dentry *dentry,
936	      int type, struct posix_acl *acl)
937{
938	struct inode *inode = d_inode(dentry);
939
940	if (!IS_POSIXACL(inode))
941		return -EOPNOTSUPP;
942	if (!inode->i_op->set_acl)
943		return -EOPNOTSUPP;
944
945	if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
946		return acl ? -EACCES : 0;
947	if (!inode_owner_or_capable(idmap, inode))
948		return -EPERM;
949
950	if (acl) {
951		int ret = posix_acl_valid(inode->i_sb->s_user_ns, acl);
952		if (ret)
953			return ret;
954	}
955	return inode->i_op->set_acl(idmap, dentry, acl, type);
956}
957EXPORT_SYMBOL(set_posix_acl);
958
959int posix_acl_listxattr(struct inode *inode, char **buffer,
960			ssize_t *remaining_size)
961{
962	int err;
963
964	if (!IS_POSIXACL(inode))
965		return 0;
966
967	if (inode->i_acl) {
968		err = xattr_list_one(buffer, remaining_size,
969				     XATTR_NAME_POSIX_ACL_ACCESS);
970		if (err)
971			return err;
972	}
973
974	if (inode->i_default_acl) {
975		err = xattr_list_one(buffer, remaining_size,
976				     XATTR_NAME_POSIX_ACL_DEFAULT);
977		if (err)
978			return err;
979	}
980
981	return 0;
982}
983
984static bool
985posix_acl_xattr_list(struct dentry *dentry)
986{
987	return IS_POSIXACL(d_backing_inode(dentry));
988}
989
990/*
991 * nop_posix_acl_access - legacy xattr handler for access POSIX ACLs
992 *
993 * This is the legacy POSIX ACL access xattr handler. It is used by some
994 * filesystems to implement their ->listxattr() inode operation. New code
995 * should never use them.
996 */
997const struct xattr_handler nop_posix_acl_access = {
998	.name = XATTR_NAME_POSIX_ACL_ACCESS,
999	.list = posix_acl_xattr_list,
1000};
1001EXPORT_SYMBOL_GPL(nop_posix_acl_access);
1002
1003/*
1004 * nop_posix_acl_default - legacy xattr handler for default POSIX ACLs
1005 *
1006 * This is the legacy POSIX ACL default xattr handler. It is used by some
1007 * filesystems to implement their ->listxattr() inode operation. New code
1008 * should never use them.
1009 */
1010const struct xattr_handler nop_posix_acl_default = {
1011	.name = XATTR_NAME_POSIX_ACL_DEFAULT,
1012	.list = posix_acl_xattr_list,
1013};
1014EXPORT_SYMBOL_GPL(nop_posix_acl_default);
1015
1016int simple_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
1017		   struct posix_acl *acl, int type)
1018{
1019	int error;
1020	struct inode *inode = d_inode(dentry);
1021
1022	if (type == ACL_TYPE_ACCESS) {
1023		error = posix_acl_update_mode(idmap, inode,
1024				&inode->i_mode, &acl);
1025		if (error)
1026			return error;
1027	}
1028
1029	inode_set_ctime_current(inode);
1030	if (IS_I_VERSION(inode))
1031		inode_inc_iversion(inode);
1032	set_cached_acl(inode, type, acl);
1033	return 0;
1034}
1035
1036int simple_acl_create(struct inode *dir, struct inode *inode)
1037{
1038	struct posix_acl *default_acl, *acl;
1039	int error;
1040
1041	error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
1042	if (error)
1043		return error;
1044
1045	set_cached_acl(inode, ACL_TYPE_DEFAULT, default_acl);
1046	set_cached_acl(inode, ACL_TYPE_ACCESS, acl);
1047
1048	if (default_acl)
1049		posix_acl_release(default_acl);
1050	if (acl)
1051		posix_acl_release(acl);
1052	return 0;
1053}
1054
1055static int vfs_set_acl_idmapped_mnt(struct mnt_idmap *idmap,
1056				    struct user_namespace *fs_userns,
1057				    struct posix_acl *acl)
1058{
1059	for (int n = 0; n < acl->a_count; n++) {
1060		struct posix_acl_entry *acl_e = &acl->a_entries[n];
1061
1062		switch (acl_e->e_tag) {
1063		case ACL_USER:
1064			acl_e->e_uid = from_vfsuid(idmap, fs_userns,
1065						   VFSUIDT_INIT(acl_e->e_uid));
1066			break;
1067		case ACL_GROUP:
1068			acl_e->e_gid = from_vfsgid(idmap, fs_userns,
1069						   VFSGIDT_INIT(acl_e->e_gid));
1070			break;
1071		}
1072	}
1073
1074	return 0;
1075}
1076
1077/**
1078 * vfs_set_acl - set posix acls
1079 * @idmap: idmap of the mount
1080 * @dentry: the dentry based on which to set the posix acls
1081 * @acl_name: the name of the posix acl
1082 * @kacl: the posix acls in the appropriate VFS format
1083 *
1084 * This function sets @kacl. The caller must all posix_acl_release() on @kacl
1085 * afterwards.
1086 *
1087 * Return: On success 0, on error negative errno.
1088 */
1089int vfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
1090		const char *acl_name, struct posix_acl *kacl)
1091{
1092	int acl_type;
1093	int error;
1094	struct inode *inode = d_inode(dentry);
1095	struct inode *delegated_inode = NULL;
1096
1097	acl_type = posix_acl_type(acl_name);
1098	if (acl_type < 0)
1099		return -EINVAL;
1100
1101	if (kacl) {
1102		/*
1103		 * If we're on an idmapped mount translate from mount specific
1104		 * vfs{g,u}id_t into global filesystem k{g,u}id_t.
1105		 * Afterwards we can cache the POSIX ACLs filesystem wide and -
1106		 * if this is a filesystem with a backing store - ultimately
1107		 * translate them to backing store values.
1108		 */
1109		error = vfs_set_acl_idmapped_mnt(idmap, i_user_ns(inode), kacl);
1110		if (error)
1111			return error;
1112	}
1113
1114retry_deleg:
1115	inode_lock(inode);
1116
1117	/*
1118	 * We only care about restrictions the inode struct itself places upon
1119	 * us otherwise POSIX ACLs aren't subject to any VFS restrictions.
1120	 */
1121	error = may_write_xattr(idmap, inode);
1122	if (error)
1123		goto out_inode_unlock;
1124
1125	error = security_inode_set_acl(idmap, dentry, acl_name, kacl);
1126	if (error)
1127		goto out_inode_unlock;
1128
1129	error = try_break_deleg(inode, &delegated_inode);
1130	if (error)
1131		goto out_inode_unlock;
1132
1133	if (likely(!is_bad_inode(inode)))
1134		error = set_posix_acl(idmap, dentry, acl_type, kacl);
1135	else
1136		error = -EIO;
1137	if (!error) {
1138		fsnotify_xattr(dentry);
1139		security_inode_post_set_acl(dentry, acl_name, kacl);
1140	}
1141
1142out_inode_unlock:
1143	inode_unlock(inode);
1144
1145	if (delegated_inode) {
1146		error = break_deleg_wait(&delegated_inode);
1147		if (!error)
1148			goto retry_deleg;
1149	}
1150
1151	return error;
1152}
1153EXPORT_SYMBOL_GPL(vfs_set_acl);
1154
1155/**
1156 * vfs_get_acl - get posix acls
1157 * @idmap: idmap of the mount
1158 * @dentry: the dentry based on which to retrieve the posix acls
1159 * @acl_name: the name of the posix acl
1160 *
1161 * This function retrieves @kacl from the filesystem. The caller must all
1162 * posix_acl_release() on @kacl.
1163 *
1164 * Return: On success POSIX ACLs in VFS format, on error negative errno.
1165 */
1166struct posix_acl *vfs_get_acl(struct mnt_idmap *idmap,
1167			      struct dentry *dentry, const char *acl_name)
1168{
1169	struct inode *inode = d_inode(dentry);
1170	struct posix_acl *acl;
1171	int acl_type, error;
1172
1173	acl_type = posix_acl_type(acl_name);
1174	if (acl_type < 0)
1175		return ERR_PTR(-EINVAL);
1176
1177	/*
1178	 * The VFS has no restrictions on reading POSIX ACLs so calling
1179	 * something like xattr_permission() isn't needed. Only LSMs get a say.
1180	 */
1181	error = security_inode_get_acl(idmap, dentry, acl_name);
1182	if (error)
1183		return ERR_PTR(error);
1184
1185	if (!IS_POSIXACL(inode))
1186		return ERR_PTR(-EOPNOTSUPP);
1187	if (S_ISLNK(inode->i_mode))
1188		return ERR_PTR(-EOPNOTSUPP);
1189
1190	acl = __get_acl(idmap, dentry, inode, acl_type);
1191	if (IS_ERR(acl))
1192		return acl;
1193	if (!acl)
1194		return ERR_PTR(-ENODATA);
1195
1196	return acl;
1197}
1198EXPORT_SYMBOL_GPL(vfs_get_acl);
1199
1200/**
1201 * vfs_remove_acl - remove posix acls
1202 * @idmap: idmap of the mount
1203 * @dentry: the dentry based on which to retrieve the posix acls
1204 * @acl_name: the name of the posix acl
1205 *
1206 * This function removes posix acls.
1207 *
1208 * Return: On success 0, on error negative errno.
1209 */
1210int vfs_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry,
1211		   const char *acl_name)
1212{
1213	int acl_type;
1214	int error;
1215	struct inode *inode = d_inode(dentry);
1216	struct inode *delegated_inode = NULL;
1217
1218	acl_type = posix_acl_type(acl_name);
1219	if (acl_type < 0)
1220		return -EINVAL;
1221
1222retry_deleg:
1223	inode_lock(inode);
1224
1225	/*
1226	 * We only care about restrictions the inode struct itself places upon
1227	 * us otherwise POSIX ACLs aren't subject to any VFS restrictions.
1228	 */
1229	error = may_write_xattr(idmap, inode);
1230	if (error)
1231		goto out_inode_unlock;
1232
1233	error = security_inode_remove_acl(idmap, dentry, acl_name);
1234	if (error)
1235		goto out_inode_unlock;
1236
1237	error = try_break_deleg(inode, &delegated_inode);
1238	if (error)
1239		goto out_inode_unlock;
1240
1241	if (likely(!is_bad_inode(inode)))
1242		error = set_posix_acl(idmap, dentry, acl_type, NULL);
1243	else
1244		error = -EIO;
1245	if (!error) {
1246		fsnotify_xattr(dentry);
1247		security_inode_post_remove_acl(idmap, dentry, acl_name);
1248	}
1249
1250out_inode_unlock:
1251	inode_unlock(inode);
1252
1253	if (delegated_inode) {
1254		error = break_deleg_wait(&delegated_inode);
1255		if (!error)
1256			goto retry_deleg;
1257	}
1258
1259	return error;
1260}
1261EXPORT_SYMBOL_GPL(vfs_remove_acl);
1262
1263int do_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
1264	       const char *acl_name, const void *kvalue, size_t size)
1265{
1266	int error;
1267	struct posix_acl *acl = NULL;
1268
1269	if (size) {
1270		/*
1271		 * Note that posix_acl_from_xattr() uses GFP_NOFS when it
1272		 * probably doesn't need to here.
1273		 */
1274		acl = posix_acl_from_xattr(current_user_ns(), kvalue, size);
1275		if (IS_ERR(acl))
1276			return PTR_ERR(acl);
1277	}
1278
1279	error = vfs_set_acl(idmap, dentry, acl_name, acl);
1280	posix_acl_release(acl);
1281	return error;
1282}
1283
1284ssize_t do_get_acl(struct mnt_idmap *idmap, struct dentry *dentry,
1285		   const char *acl_name, void *kvalue, size_t size)
1286{
1287	ssize_t error;
1288	struct posix_acl *acl;
1289
1290	acl = vfs_get_acl(idmap, dentry, acl_name);
1291	if (IS_ERR(acl))
1292		return PTR_ERR(acl);
1293
1294	error = vfs_posix_acl_to_xattr(idmap, d_inode(dentry),
1295				       acl, kvalue, size);
1296	posix_acl_release(acl);
1297	return error;
1298}
1299