1// SPDX-License-Identifier: GPL-2.0-only
2/*
3  File: fs/xattr.c
4
5  Extended attribute handling.
6
7  Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
8  Copyright (C) 2001 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com>
9  Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
10 */
11#include <linux/fs.h>
12#include <linux/filelock.h>
13#include <linux/slab.h>
14#include <linux/file.h>
15#include <linux/xattr.h>
16#include <linux/mount.h>
17#include <linux/namei.h>
18#include <linux/security.h>
19#include <linux/syscalls.h>
20#include <linux/export.h>
21#include <linux/fsnotify.h>
22#include <linux/audit.h>
23#include <linux/vmalloc.h>
24#include <linux/posix_acl_xattr.h>
25
26#include <linux/uaccess.h>
27
28#include "internal.h"
29
30static const char *
31strcmp_prefix(const char *a, const char *a_prefix)
32{
33	while (*a_prefix && *a == *a_prefix) {
34		a++;
35		a_prefix++;
36	}
37	return *a_prefix ? NULL : a;
38}
39
40/*
41 * In order to implement different sets of xattr operations for each xattr
42 * prefix, a filesystem should create a null-terminated array of struct
43 * xattr_handler (one for each prefix) and hang a pointer to it off of the
44 * s_xattr field of the superblock.
45 */
46#define for_each_xattr_handler(handlers, handler)		\
47	if (handlers)						\
48		for ((handler) = *(handlers)++;			\
49			(handler) != NULL;			\
50			(handler) = *(handlers)++)
51
52/*
53 * Find the xattr_handler with the matching prefix.
54 */
55static const struct xattr_handler *
56xattr_resolve_name(struct inode *inode, const char **name)
57{
58	const struct xattr_handler * const *handlers = inode->i_sb->s_xattr;
59	const struct xattr_handler *handler;
60
61	if (!(inode->i_opflags & IOP_XATTR)) {
62		if (unlikely(is_bad_inode(inode)))
63			return ERR_PTR(-EIO);
64		return ERR_PTR(-EOPNOTSUPP);
65	}
66	for_each_xattr_handler(handlers, handler) {
67		const char *n;
68
69		n = strcmp_prefix(*name, xattr_prefix(handler));
70		if (n) {
71			if (!handler->prefix ^ !*n) {
72				if (*n)
73					continue;
74				return ERR_PTR(-EINVAL);
75			}
76			*name = n;
77			return handler;
78		}
79	}
80	return ERR_PTR(-EOPNOTSUPP);
81}
82
83/**
84 * may_write_xattr - check whether inode allows writing xattr
85 * @idmap: idmap of the mount the inode was found from
86 * @inode: the inode on which to set an xattr
87 *
88 * Check whether the inode allows writing xattrs. Specifically, we can never
89 * set or remove an extended attribute on a read-only filesystem  or on an
90 * immutable / append-only inode.
91 *
92 * We also need to ensure that the inode has a mapping in the mount to
93 * not risk writing back invalid i_{g,u}id values.
94 *
95 * Return: On success zero is returned. On error a negative errno is returned.
96 */
97int may_write_xattr(struct mnt_idmap *idmap, struct inode *inode)
98{
99	if (IS_IMMUTABLE(inode))
100		return -EPERM;
101	if (IS_APPEND(inode))
102		return -EPERM;
103	if (HAS_UNMAPPED_ID(idmap, inode))
104		return -EPERM;
105	return 0;
106}
107
108/*
109 * Check permissions for extended attribute access.  This is a bit complicated
110 * because different namespaces have very different rules.
111 */
112static int
113xattr_permission(struct mnt_idmap *idmap, struct inode *inode,
114		 const char *name, int mask)
115{
116	if (mask & MAY_WRITE) {
117		int ret;
118
119		ret = may_write_xattr(idmap, inode);
120		if (ret)
121			return ret;
122	}
123
124	/*
125	 * No restriction for security.* and system.* from the VFS.  Decision
126	 * on these is left to the underlying filesystem / security module.
127	 */
128	if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) ||
129	    !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
130		return 0;
131
132	/*
133	 * The trusted.* namespace can only be accessed by privileged users.
134	 */
135	if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
136		if (!capable(CAP_SYS_ADMIN))
137			return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
138		return 0;
139	}
140
141	/*
142	 * In the user.* namespace, only regular files and directories can have
143	 * extended attributes. For sticky directories, only the owner and
144	 * privileged users can write attributes.
145	 */
146	if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
147		if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
148			return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
149		if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
150		    (mask & MAY_WRITE) &&
151		    !inode_owner_or_capable(idmap, inode))
152			return -EPERM;
153	}
154
155	return inode_permission(idmap, inode, mask);
156}
157
158/*
159 * Look for any handler that deals with the specified namespace.
160 */
161int
162xattr_supports_user_prefix(struct inode *inode)
163{
164	const struct xattr_handler * const *handlers = inode->i_sb->s_xattr;
165	const struct xattr_handler *handler;
166
167	if (!(inode->i_opflags & IOP_XATTR)) {
168		if (unlikely(is_bad_inode(inode)))
169			return -EIO;
170		return -EOPNOTSUPP;
171	}
172
173	for_each_xattr_handler(handlers, handler) {
174		if (!strncmp(xattr_prefix(handler), XATTR_USER_PREFIX,
175			     XATTR_USER_PREFIX_LEN))
176			return 0;
177	}
178
179	return -EOPNOTSUPP;
180}
181EXPORT_SYMBOL(xattr_supports_user_prefix);
182
183int
184__vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
185	       struct inode *inode, const char *name, const void *value,
186	       size_t size, int flags)
187{
188	const struct xattr_handler *handler;
189
190	if (is_posix_acl_xattr(name))
191		return -EOPNOTSUPP;
192
193	handler = xattr_resolve_name(inode, &name);
194	if (IS_ERR(handler))
195		return PTR_ERR(handler);
196	if (!handler->set)
197		return -EOPNOTSUPP;
198	if (size == 0)
199		value = "";  /* empty EA, do not remove */
200	return handler->set(handler, idmap, dentry, inode, name, value,
201			    size, flags);
202}
203EXPORT_SYMBOL(__vfs_setxattr);
204
205/**
206 *  __vfs_setxattr_noperm - perform setxattr operation without performing
207 *  permission checks.
208 *
209 *  @idmap: idmap of the mount the inode was found from
210 *  @dentry: object to perform setxattr on
211 *  @name: xattr name to set
212 *  @value: value to set @name to
213 *  @size: size of @value
214 *  @flags: flags to pass into filesystem operations
215 *
216 *  returns the result of the internal setxattr or setsecurity operations.
217 *
218 *  This function requires the caller to lock the inode's i_mutex before it
219 *  is executed. It also assumes that the caller will make the appropriate
220 *  permission checks.
221 */
222int __vfs_setxattr_noperm(struct mnt_idmap *idmap,
223			  struct dentry *dentry, const char *name,
224			  const void *value, size_t size, int flags)
225{
226	struct inode *inode = dentry->d_inode;
227	int error = -EAGAIN;
228	int issec = !strncmp(name, XATTR_SECURITY_PREFIX,
229				   XATTR_SECURITY_PREFIX_LEN);
230
231	if (issec)
232		inode->i_flags &= ~S_NOSEC;
233	if (inode->i_opflags & IOP_XATTR) {
234		error = __vfs_setxattr(idmap, dentry, inode, name, value,
235				       size, flags);
236		if (!error) {
237			fsnotify_xattr(dentry);
238			security_inode_post_setxattr(dentry, name, value,
239						     size, flags);
240		}
241	} else {
242		if (unlikely(is_bad_inode(inode)))
243			return -EIO;
244	}
245	if (error == -EAGAIN) {
246		error = -EOPNOTSUPP;
247
248		if (issec) {
249			const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
250
251			error = security_inode_setsecurity(inode, suffix, value,
252							   size, flags);
253			if (!error)
254				fsnotify_xattr(dentry);
255		}
256	}
257
258	return error;
259}
260
261/**
262 * __vfs_setxattr_locked - set an extended attribute while holding the inode
263 * lock
264 *
265 *  @idmap: idmap of the mount of the target inode
266 *  @dentry: object to perform setxattr on
267 *  @name: xattr name to set
268 *  @value: value to set @name to
269 *  @size: size of @value
270 *  @flags: flags to pass into filesystem operations
271 *  @delegated_inode: on return, will contain an inode pointer that
272 *  a delegation was broken on, NULL if none.
273 */
274int
275__vfs_setxattr_locked(struct mnt_idmap *idmap, struct dentry *dentry,
276		      const char *name, const void *value, size_t size,
277		      int flags, struct inode **delegated_inode)
278{
279	struct inode *inode = dentry->d_inode;
280	int error;
281
282	error = xattr_permission(idmap, inode, name, MAY_WRITE);
283	if (error)
284		return error;
285
286	error = security_inode_setxattr(idmap, dentry, name, value, size,
287					flags);
288	if (error)
289		goto out;
290
291	error = try_break_deleg(inode, delegated_inode);
292	if (error)
293		goto out;
294
295	error = __vfs_setxattr_noperm(idmap, dentry, name, value,
296				      size, flags);
297
298out:
299	return error;
300}
301EXPORT_SYMBOL_GPL(__vfs_setxattr_locked);
302
303int
304vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
305	     const char *name, const void *value, size_t size, int flags)
306{
307	struct inode *inode = dentry->d_inode;
308	struct inode *delegated_inode = NULL;
309	const void  *orig_value = value;
310	int error;
311
312	if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
313		error = cap_convert_nscap(idmap, dentry, &value, size);
314		if (error < 0)
315			return error;
316		size = error;
317	}
318
319retry_deleg:
320	inode_lock(inode);
321	error = __vfs_setxattr_locked(idmap, dentry, name, value, size,
322				      flags, &delegated_inode);
323	inode_unlock(inode);
324
325	if (delegated_inode) {
326		error = break_deleg_wait(&delegated_inode);
327		if (!error)
328			goto retry_deleg;
329	}
330	if (value != orig_value)
331		kfree(value);
332
333	return error;
334}
335EXPORT_SYMBOL_GPL(vfs_setxattr);
336
337static ssize_t
338xattr_getsecurity(struct mnt_idmap *idmap, struct inode *inode,
339		  const char *name, void *value, size_t size)
340{
341	void *buffer = NULL;
342	ssize_t len;
343
344	if (!value || !size) {
345		len = security_inode_getsecurity(idmap, inode, name,
346						 &buffer, false);
347		goto out_noalloc;
348	}
349
350	len = security_inode_getsecurity(idmap, inode, name, &buffer,
351					 true);
352	if (len < 0)
353		return len;
354	if (size < len) {
355		len = -ERANGE;
356		goto out;
357	}
358	memcpy(value, buffer, len);
359out:
360	kfree(buffer);
361out_noalloc:
362	return len;
363}
364
365/*
366 * vfs_getxattr_alloc - allocate memory, if necessary, before calling getxattr
367 *
368 * Allocate memory, if not already allocated, or re-allocate correct size,
369 * before retrieving the extended attribute.  The xattr value buffer should
370 * always be freed by the caller, even on error.
371 *
372 * Returns the result of alloc, if failed, or the getxattr operation.
373 */
374int
375vfs_getxattr_alloc(struct mnt_idmap *idmap, struct dentry *dentry,
376		   const char *name, char **xattr_value, size_t xattr_size,
377		   gfp_t flags)
378{
379	const struct xattr_handler *handler;
380	struct inode *inode = dentry->d_inode;
381	char *value = *xattr_value;
382	int error;
383
384	error = xattr_permission(idmap, inode, name, MAY_READ);
385	if (error)
386		return error;
387
388	handler = xattr_resolve_name(inode, &name);
389	if (IS_ERR(handler))
390		return PTR_ERR(handler);
391	if (!handler->get)
392		return -EOPNOTSUPP;
393	error = handler->get(handler, dentry, inode, name, NULL, 0);
394	if (error < 0)
395		return error;
396
397	if (!value || (error > xattr_size)) {
398		value = krealloc(*xattr_value, error + 1, flags);
399		if (!value)
400			return -ENOMEM;
401		memset(value, 0, error + 1);
402	}
403
404	error = handler->get(handler, dentry, inode, name, value, error);
405	*xattr_value = value;
406	return error;
407}
408
409ssize_t
410__vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name,
411	       void *value, size_t size)
412{
413	const struct xattr_handler *handler;
414
415	if (is_posix_acl_xattr(name))
416		return -EOPNOTSUPP;
417
418	handler = xattr_resolve_name(inode, &name);
419	if (IS_ERR(handler))
420		return PTR_ERR(handler);
421	if (!handler->get)
422		return -EOPNOTSUPP;
423	return handler->get(handler, dentry, inode, name, value, size);
424}
425EXPORT_SYMBOL(__vfs_getxattr);
426
427ssize_t
428vfs_getxattr(struct mnt_idmap *idmap, struct dentry *dentry,
429	     const char *name, void *value, size_t size)
430{
431	struct inode *inode = dentry->d_inode;
432	int error;
433
434	error = xattr_permission(idmap, inode, name, MAY_READ);
435	if (error)
436		return error;
437
438	error = security_inode_getxattr(dentry, name);
439	if (error)
440		return error;
441
442	if (!strncmp(name, XATTR_SECURITY_PREFIX,
443				XATTR_SECURITY_PREFIX_LEN)) {
444		const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
445		int ret = xattr_getsecurity(idmap, inode, suffix, value,
446					    size);
447		/*
448		 * Only overwrite the return value if a security module
449		 * is actually active.
450		 */
451		if (ret == -EOPNOTSUPP)
452			goto nolsm;
453		return ret;
454	}
455nolsm:
456	return __vfs_getxattr(dentry, inode, name, value, size);
457}
458EXPORT_SYMBOL_GPL(vfs_getxattr);
459
460/**
461 * vfs_listxattr - retrieve \0 separated list of xattr names
462 * @dentry: the dentry from whose inode the xattr names are retrieved
463 * @list: buffer to store xattr names into
464 * @size: size of the buffer
465 *
466 * This function returns the names of all xattrs associated with the
467 * inode of @dentry.
468 *
469 * Note, for legacy reasons the vfs_listxattr() function lists POSIX
470 * ACLs as well. Since POSIX ACLs are decoupled from IOP_XATTR the
471 * vfs_listxattr() function doesn't check for this flag since a
472 * filesystem could implement POSIX ACLs without implementing any other
473 * xattrs.
474 *
475 * However, since all codepaths that remove IOP_XATTR also assign of
476 * inode operations that either don't implement or implement a stub
477 * ->listxattr() operation.
478 *
479 * Return: On success, the size of the buffer that was used. On error a
480 *         negative error code.
481 */
482ssize_t
483vfs_listxattr(struct dentry *dentry, char *list, size_t size)
484{
485	struct inode *inode = d_inode(dentry);
486	ssize_t error;
487
488	error = security_inode_listxattr(dentry);
489	if (error)
490		return error;
491
492	if (inode->i_op->listxattr) {
493		error = inode->i_op->listxattr(dentry, list, size);
494	} else {
495		error = security_inode_listsecurity(inode, list, size);
496		if (size && error > size)
497			error = -ERANGE;
498	}
499	return error;
500}
501EXPORT_SYMBOL_GPL(vfs_listxattr);
502
503int
504__vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
505		  const char *name)
506{
507	struct inode *inode = d_inode(dentry);
508	const struct xattr_handler *handler;
509
510	if (is_posix_acl_xattr(name))
511		return -EOPNOTSUPP;
512
513	handler = xattr_resolve_name(inode, &name);
514	if (IS_ERR(handler))
515		return PTR_ERR(handler);
516	if (!handler->set)
517		return -EOPNOTSUPP;
518	return handler->set(handler, idmap, dentry, inode, name, NULL, 0,
519			    XATTR_REPLACE);
520}
521EXPORT_SYMBOL(__vfs_removexattr);
522
523/**
524 * __vfs_removexattr_locked - set an extended attribute while holding the inode
525 * lock
526 *
527 *  @idmap: idmap of the mount of the target inode
528 *  @dentry: object to perform setxattr on
529 *  @name: name of xattr to remove
530 *  @delegated_inode: on return, will contain an inode pointer that
531 *  a delegation was broken on, NULL if none.
532 */
533int
534__vfs_removexattr_locked(struct mnt_idmap *idmap,
535			 struct dentry *dentry, const char *name,
536			 struct inode **delegated_inode)
537{
538	struct inode *inode = dentry->d_inode;
539	int error;
540
541	error = xattr_permission(idmap, inode, name, MAY_WRITE);
542	if (error)
543		return error;
544
545	error = security_inode_removexattr(idmap, dentry, name);
546	if (error)
547		goto out;
548
549	error = try_break_deleg(inode, delegated_inode);
550	if (error)
551		goto out;
552
553	error = __vfs_removexattr(idmap, dentry, name);
554	if (error)
555		return error;
556
557	fsnotify_xattr(dentry);
558	security_inode_post_removexattr(dentry, name);
559
560out:
561	return error;
562}
563EXPORT_SYMBOL_GPL(__vfs_removexattr_locked);
564
565int
566vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
567		const char *name)
568{
569	struct inode *inode = dentry->d_inode;
570	struct inode *delegated_inode = NULL;
571	int error;
572
573retry_deleg:
574	inode_lock(inode);
575	error = __vfs_removexattr_locked(idmap, dentry,
576					 name, &delegated_inode);
577	inode_unlock(inode);
578
579	if (delegated_inode) {
580		error = break_deleg_wait(&delegated_inode);
581		if (!error)
582			goto retry_deleg;
583	}
584
585	return error;
586}
587EXPORT_SYMBOL_GPL(vfs_removexattr);
588
589/*
590 * Extended attribute SET operations
591 */
592
593int setxattr_copy(const char __user *name, struct xattr_ctx *ctx)
594{
595	int error;
596
597	if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE))
598		return -EINVAL;
599
600	error = strncpy_from_user(ctx->kname->name, name,
601				sizeof(ctx->kname->name));
602	if (error == 0 || error == sizeof(ctx->kname->name))
603		return  -ERANGE;
604	if (error < 0)
605		return error;
606
607	error = 0;
608	if (ctx->size) {
609		if (ctx->size > XATTR_SIZE_MAX)
610			return -E2BIG;
611
612		ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size);
613		if (IS_ERR(ctx->kvalue)) {
614			error = PTR_ERR(ctx->kvalue);
615			ctx->kvalue = NULL;
616		}
617	}
618
619	return error;
620}
621
622int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
623		struct xattr_ctx *ctx)
624{
625	if (is_posix_acl_xattr(ctx->kname->name))
626		return do_set_acl(idmap, dentry, ctx->kname->name,
627				  ctx->kvalue, ctx->size);
628
629	return vfs_setxattr(idmap, dentry, ctx->kname->name,
630			ctx->kvalue, ctx->size, ctx->flags);
631}
632
633static long
634setxattr(struct mnt_idmap *idmap, struct dentry *d,
635	const char __user *name, const void __user *value, size_t size,
636	int flags)
637{
638	struct xattr_name kname;
639	struct xattr_ctx ctx = {
640		.cvalue   = value,
641		.kvalue   = NULL,
642		.size     = size,
643		.kname    = &kname,
644		.flags    = flags,
645	};
646	int error;
647
648	error = setxattr_copy(name, &ctx);
649	if (error)
650		return error;
651
652	error = do_setxattr(idmap, d, &ctx);
653
654	kvfree(ctx.kvalue);
655	return error;
656}
657
658static int path_setxattr(const char __user *pathname,
659			 const char __user *name, const void __user *value,
660			 size_t size, int flags, unsigned int lookup_flags)
661{
662	struct path path;
663	int error;
664
665retry:
666	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
667	if (error)
668		return error;
669	error = mnt_want_write(path.mnt);
670	if (!error) {
671		error = setxattr(mnt_idmap(path.mnt), path.dentry, name,
672				 value, size, flags);
673		mnt_drop_write(path.mnt);
674	}
675	path_put(&path);
676	if (retry_estale(error, lookup_flags)) {
677		lookup_flags |= LOOKUP_REVAL;
678		goto retry;
679	}
680	return error;
681}
682
683SYSCALL_DEFINE5(setxattr, const char __user *, pathname,
684		const char __user *, name, const void __user *, value,
685		size_t, size, int, flags)
686{
687	return path_setxattr(pathname, name, value, size, flags, LOOKUP_FOLLOW);
688}
689
690SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname,
691		const char __user *, name, const void __user *, value,
692		size_t, size, int, flags)
693{
694	return path_setxattr(pathname, name, value, size, flags, 0);
695}
696
697SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
698		const void __user *,value, size_t, size, int, flags)
699{
700	struct fd f = fdget(fd);
701	int error = -EBADF;
702
703	if (!f.file)
704		return error;
705	audit_file(f.file);
706	error = mnt_want_write_file(f.file);
707	if (!error) {
708		error = setxattr(file_mnt_idmap(f.file),
709				 f.file->f_path.dentry, name,
710				 value, size, flags);
711		mnt_drop_write_file(f.file);
712	}
713	fdput(f);
714	return error;
715}
716
717/*
718 * Extended attribute GET operations
719 */
720ssize_t
721do_getxattr(struct mnt_idmap *idmap, struct dentry *d,
722	struct xattr_ctx *ctx)
723{
724	ssize_t error;
725	char *kname = ctx->kname->name;
726
727	if (ctx->size) {
728		if (ctx->size > XATTR_SIZE_MAX)
729			ctx->size = XATTR_SIZE_MAX;
730		ctx->kvalue = kvzalloc(ctx->size, GFP_KERNEL);
731		if (!ctx->kvalue)
732			return -ENOMEM;
733	}
734
735	if (is_posix_acl_xattr(ctx->kname->name))
736		error = do_get_acl(idmap, d, kname, ctx->kvalue, ctx->size);
737	else
738		error = vfs_getxattr(idmap, d, kname, ctx->kvalue, ctx->size);
739	if (error > 0) {
740		if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error))
741			error = -EFAULT;
742	} else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) {
743		/* The file system tried to returned a value bigger
744		   than XATTR_SIZE_MAX bytes. Not possible. */
745		error = -E2BIG;
746	}
747
748	return error;
749}
750
751static ssize_t
752getxattr(struct mnt_idmap *idmap, struct dentry *d,
753	 const char __user *name, void __user *value, size_t size)
754{
755	ssize_t error;
756	struct xattr_name kname;
757	struct xattr_ctx ctx = {
758		.value    = value,
759		.kvalue   = NULL,
760		.size     = size,
761		.kname    = &kname,
762		.flags    = 0,
763	};
764
765	error = strncpy_from_user(kname.name, name, sizeof(kname.name));
766	if (error == 0 || error == sizeof(kname.name))
767		error = -ERANGE;
768	if (error < 0)
769		return error;
770
771	error =  do_getxattr(idmap, d, &ctx);
772
773	kvfree(ctx.kvalue);
774	return error;
775}
776
777static ssize_t path_getxattr(const char __user *pathname,
778			     const char __user *name, void __user *value,
779			     size_t size, unsigned int lookup_flags)
780{
781	struct path path;
782	ssize_t error;
783retry:
784	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
785	if (error)
786		return error;
787	error = getxattr(mnt_idmap(path.mnt), path.dentry, name, value, size);
788	path_put(&path);
789	if (retry_estale(error, lookup_flags)) {
790		lookup_flags |= LOOKUP_REVAL;
791		goto retry;
792	}
793	return error;
794}
795
796SYSCALL_DEFINE4(getxattr, const char __user *, pathname,
797		const char __user *, name, void __user *, value, size_t, size)
798{
799	return path_getxattr(pathname, name, value, size, LOOKUP_FOLLOW);
800}
801
802SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname,
803		const char __user *, name, void __user *, value, size_t, size)
804{
805	return path_getxattr(pathname, name, value, size, 0);
806}
807
808SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
809		void __user *, value, size_t, size)
810{
811	struct fd f = fdget(fd);
812	ssize_t error = -EBADF;
813
814	if (!f.file)
815		return error;
816	audit_file(f.file);
817	error = getxattr(file_mnt_idmap(f.file), f.file->f_path.dentry,
818			 name, value, size);
819	fdput(f);
820	return error;
821}
822
823/*
824 * Extended attribute LIST operations
825 */
826static ssize_t
827listxattr(struct dentry *d, char __user *list, size_t size)
828{
829	ssize_t error;
830	char *klist = NULL;
831
832	if (size) {
833		if (size > XATTR_LIST_MAX)
834			size = XATTR_LIST_MAX;
835		klist = kvmalloc(size, GFP_KERNEL);
836		if (!klist)
837			return -ENOMEM;
838	}
839
840	error = vfs_listxattr(d, klist, size);
841	if (error > 0) {
842		if (size && copy_to_user(list, klist, error))
843			error = -EFAULT;
844	} else if (error == -ERANGE && size >= XATTR_LIST_MAX) {
845		/* The file system tried to returned a list bigger
846		   than XATTR_LIST_MAX bytes. Not possible. */
847		error = -E2BIG;
848	}
849
850	kvfree(klist);
851
852	return error;
853}
854
855static ssize_t path_listxattr(const char __user *pathname, char __user *list,
856			      size_t size, unsigned int lookup_flags)
857{
858	struct path path;
859	ssize_t error;
860retry:
861	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
862	if (error)
863		return error;
864	error = listxattr(path.dentry, list, size);
865	path_put(&path);
866	if (retry_estale(error, lookup_flags)) {
867		lookup_flags |= LOOKUP_REVAL;
868		goto retry;
869	}
870	return error;
871}
872
873SYSCALL_DEFINE3(listxattr, const char __user *, pathname, char __user *, list,
874		size_t, size)
875{
876	return path_listxattr(pathname, list, size, LOOKUP_FOLLOW);
877}
878
879SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list,
880		size_t, size)
881{
882	return path_listxattr(pathname, list, size, 0);
883}
884
885SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
886{
887	struct fd f = fdget(fd);
888	ssize_t error = -EBADF;
889
890	if (!f.file)
891		return error;
892	audit_file(f.file);
893	error = listxattr(f.file->f_path.dentry, list, size);
894	fdput(f);
895	return error;
896}
897
898/*
899 * Extended attribute REMOVE operations
900 */
901static long
902removexattr(struct mnt_idmap *idmap, struct dentry *d,
903	    const char __user *name)
904{
905	int error;
906	char kname[XATTR_NAME_MAX + 1];
907
908	error = strncpy_from_user(kname, name, sizeof(kname));
909	if (error == 0 || error == sizeof(kname))
910		error = -ERANGE;
911	if (error < 0)
912		return error;
913
914	if (is_posix_acl_xattr(kname))
915		return vfs_remove_acl(idmap, d, kname);
916
917	return vfs_removexattr(idmap, d, kname);
918}
919
920static int path_removexattr(const char __user *pathname,
921			    const char __user *name, unsigned int lookup_flags)
922{
923	struct path path;
924	int error;
925retry:
926	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
927	if (error)
928		return error;
929	error = mnt_want_write(path.mnt);
930	if (!error) {
931		error = removexattr(mnt_idmap(path.mnt), path.dentry, name);
932		mnt_drop_write(path.mnt);
933	}
934	path_put(&path);
935	if (retry_estale(error, lookup_flags)) {
936		lookup_flags |= LOOKUP_REVAL;
937		goto retry;
938	}
939	return error;
940}
941
942SYSCALL_DEFINE2(removexattr, const char __user *, pathname,
943		const char __user *, name)
944{
945	return path_removexattr(pathname, name, LOOKUP_FOLLOW);
946}
947
948SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,
949		const char __user *, name)
950{
951	return path_removexattr(pathname, name, 0);
952}
953
954SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
955{
956	struct fd f = fdget(fd);
957	int error = -EBADF;
958
959	if (!f.file)
960		return error;
961	audit_file(f.file);
962	error = mnt_want_write_file(f.file);
963	if (!error) {
964		error = removexattr(file_mnt_idmap(f.file),
965				    f.file->f_path.dentry, name);
966		mnt_drop_write_file(f.file);
967	}
968	fdput(f);
969	return error;
970}
971
972int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name)
973{
974	size_t len;
975
976	len = strlen(name) + 1;
977	if (*buffer) {
978		if (*remaining_size < len)
979			return -ERANGE;
980		memcpy(*buffer, name, len);
981		*buffer += len;
982	}
983	*remaining_size -= len;
984	return 0;
985}
986
987/**
988 * generic_listxattr - run through a dentry's xattr list() operations
989 * @dentry: dentry to list the xattrs
990 * @buffer: result buffer
991 * @buffer_size: size of @buffer
992 *
993 * Combine the results of the list() operation from every xattr_handler in the
994 * xattr_handler stack.
995 *
996 * Note that this will not include the entries for POSIX ACLs.
997 */
998ssize_t
999generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
1000{
1001	const struct xattr_handler *handler, * const *handlers = dentry->d_sb->s_xattr;
1002	ssize_t remaining_size = buffer_size;
1003	int err = 0;
1004
1005	for_each_xattr_handler(handlers, handler) {
1006		if (!handler->name || (handler->list && !handler->list(dentry)))
1007			continue;
1008		err = xattr_list_one(&buffer, &remaining_size, handler->name);
1009		if (err)
1010			return err;
1011	}
1012
1013	return err ? err : buffer_size - remaining_size;
1014}
1015EXPORT_SYMBOL(generic_listxattr);
1016
1017/**
1018 * xattr_full_name  -  Compute full attribute name from suffix
1019 *
1020 * @handler:	handler of the xattr_handler operation
1021 * @name:	name passed to the xattr_handler operation
1022 *
1023 * The get and set xattr handler operations are called with the remainder of
1024 * the attribute name after skipping the handler's prefix: for example, "foo"
1025 * is passed to the get operation of a handler with prefix "user." to get
1026 * attribute "user.foo".  The full name is still "there" in the name though.
1027 *
1028 * Note: the list xattr handler operation when called from the vfs is passed a
1029 * NULL name; some file systems use this operation internally, with varying
1030 * semantics.
1031 */
1032const char *xattr_full_name(const struct xattr_handler *handler,
1033			    const char *name)
1034{
1035	size_t prefix_len = strlen(xattr_prefix(handler));
1036
1037	return name - prefix_len;
1038}
1039EXPORT_SYMBOL(xattr_full_name);
1040
1041/**
1042 * simple_xattr_space - estimate the memory used by a simple xattr
1043 * @name: the full name of the xattr
1044 * @size: the size of its value
1045 *
1046 * This takes no account of how much larger the two slab objects actually are:
1047 * that would depend on the slab implementation, when what is required is a
1048 * deterministic number, which grows with name length and size and quantity.
1049 *
1050 * Return: The approximate number of bytes of memory used by such an xattr.
1051 */
1052size_t simple_xattr_space(const char *name, size_t size)
1053{
1054	/*
1055	 * Use "40" instead of sizeof(struct simple_xattr), to return the
1056	 * same result on 32-bit and 64-bit, and even if simple_xattr grows.
1057	 */
1058	return 40 + size + strlen(name);
1059}
1060
1061/**
1062 * simple_xattr_free - free an xattr object
1063 * @xattr: the xattr object
1064 *
1065 * Free the xattr object. Can handle @xattr being NULL.
1066 */
1067void simple_xattr_free(struct simple_xattr *xattr)
1068{
1069	if (xattr)
1070		kfree(xattr->name);
1071	kvfree(xattr);
1072}
1073
1074/**
1075 * simple_xattr_alloc - allocate new xattr object
1076 * @value: value of the xattr object
1077 * @size: size of @value
1078 *
1079 * Allocate a new xattr object and initialize respective members. The caller is
1080 * responsible for handling the name of the xattr.
1081 *
1082 * Return: On success a new xattr object is returned. On failure NULL is
1083 * returned.
1084 */
1085struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
1086{
1087	struct simple_xattr *new_xattr;
1088	size_t len;
1089
1090	/* wrap around? */
1091	len = sizeof(*new_xattr) + size;
1092	if (len < sizeof(*new_xattr))
1093		return NULL;
1094
1095	new_xattr = kvmalloc(len, GFP_KERNEL_ACCOUNT);
1096	if (!new_xattr)
1097		return NULL;
1098
1099	new_xattr->size = size;
1100	memcpy(new_xattr->value, value, size);
1101	return new_xattr;
1102}
1103
1104/**
1105 * rbtree_simple_xattr_cmp - compare xattr name with current rbtree xattr entry
1106 * @key: xattr name
1107 * @node: current node
1108 *
1109 * Compare the xattr name with the xattr name attached to @node in the rbtree.
1110 *
1111 * Return: Negative value if continuing left, positive if continuing right, 0
1112 * if the xattr attached to @node matches @key.
1113 */
1114static int rbtree_simple_xattr_cmp(const void *key, const struct rb_node *node)
1115{
1116	const char *xattr_name = key;
1117	const struct simple_xattr *xattr;
1118
1119	xattr = rb_entry(node, struct simple_xattr, rb_node);
1120	return strcmp(xattr->name, xattr_name);
1121}
1122
1123/**
1124 * rbtree_simple_xattr_node_cmp - compare two xattr rbtree nodes
1125 * @new_node: new node
1126 * @node: current node
1127 *
1128 * Compare the xattr attached to @new_node with the xattr attached to @node.
1129 *
1130 * Return: Negative value if continuing left, positive if continuing right, 0
1131 * if the xattr attached to @new_node matches the xattr attached to @node.
1132 */
1133static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node,
1134					const struct rb_node *node)
1135{
1136	struct simple_xattr *xattr;
1137	xattr = rb_entry(new_node, struct simple_xattr, rb_node);
1138	return rbtree_simple_xattr_cmp(xattr->name, node);
1139}
1140
1141/**
1142 * simple_xattr_get - get an xattr object
1143 * @xattrs: the header of the xattr object
1144 * @name: the name of the xattr to retrieve
1145 * @buffer: the buffer to store the value into
1146 * @size: the size of @buffer
1147 *
1148 * Try to find and retrieve the xattr object associated with @name.
1149 * If @buffer is provided store the value of @xattr in @buffer
1150 * otherwise just return the length. The size of @buffer is limited
1151 * to XATTR_SIZE_MAX which currently is 65536.
1152 *
1153 * Return: On success the length of the xattr value is returned. On error a
1154 * negative error code is returned.
1155 */
1156int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
1157		     void *buffer, size_t size)
1158{
1159	struct simple_xattr *xattr = NULL;
1160	struct rb_node *rbp;
1161	int ret = -ENODATA;
1162
1163	read_lock(&xattrs->lock);
1164	rbp = rb_find(name, &xattrs->rb_root, rbtree_simple_xattr_cmp);
1165	if (rbp) {
1166		xattr = rb_entry(rbp, struct simple_xattr, rb_node);
1167		ret = xattr->size;
1168		if (buffer) {
1169			if (size < xattr->size)
1170				ret = -ERANGE;
1171			else
1172				memcpy(buffer, xattr->value, xattr->size);
1173		}
1174	}
1175	read_unlock(&xattrs->lock);
1176	return ret;
1177}
1178
1179/**
1180 * simple_xattr_set - set an xattr object
1181 * @xattrs: the header of the xattr object
1182 * @name: the name of the xattr to retrieve
1183 * @value: the value to store along the xattr
1184 * @size: the size of @value
1185 * @flags: the flags determining how to set the xattr
1186 *
1187 * Set a new xattr object.
1188 * If @value is passed a new xattr object will be allocated. If XATTR_REPLACE
1189 * is specified in @flags a matching xattr object for @name must already exist.
1190 * If it does it will be replaced with the new xattr object. If it doesn't we
1191 * fail. If XATTR_CREATE is specified and a matching xattr does already exist
1192 * we fail. If it doesn't we create a new xattr. If @flags is zero we simply
1193 * insert the new xattr replacing any existing one.
1194 *
1195 * If @value is empty and a matching xattr object is found we delete it if
1196 * XATTR_REPLACE is specified in @flags or @flags is zero.
1197 *
1198 * If @value is empty and no matching xattr object for @name is found we do
1199 * nothing if XATTR_CREATE is specified in @flags or @flags is zero. For
1200 * XATTR_REPLACE we fail as mentioned above.
1201 *
1202 * Return: On success, the removed or replaced xattr is returned, to be freed
1203 * by the caller; or NULL if none. On failure a negative error code is returned.
1204 */
1205struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
1206				      const char *name, const void *value,
1207				      size_t size, int flags)
1208{
1209	struct simple_xattr *old_xattr = NULL, *new_xattr = NULL;
1210	struct rb_node *parent = NULL, **rbp;
1211	int err = 0, ret;
1212
1213	/* value == NULL means remove */
1214	if (value) {
1215		new_xattr = simple_xattr_alloc(value, size);
1216		if (!new_xattr)
1217			return ERR_PTR(-ENOMEM);
1218
1219		new_xattr->name = kstrdup(name, GFP_KERNEL_ACCOUNT);
1220		if (!new_xattr->name) {
1221			simple_xattr_free(new_xattr);
1222			return ERR_PTR(-ENOMEM);
1223		}
1224	}
1225
1226	write_lock(&xattrs->lock);
1227	rbp = &xattrs->rb_root.rb_node;
1228	while (*rbp) {
1229		parent = *rbp;
1230		ret = rbtree_simple_xattr_cmp(name, *rbp);
1231		if (ret < 0)
1232			rbp = &(*rbp)->rb_left;
1233		else if (ret > 0)
1234			rbp = &(*rbp)->rb_right;
1235		else
1236			old_xattr = rb_entry(*rbp, struct simple_xattr, rb_node);
1237		if (old_xattr)
1238			break;
1239	}
1240
1241	if (old_xattr) {
1242		/* Fail if XATTR_CREATE is requested and the xattr exists. */
1243		if (flags & XATTR_CREATE) {
1244			err = -EEXIST;
1245			goto out_unlock;
1246		}
1247
1248		if (new_xattr)
1249			rb_replace_node(&old_xattr->rb_node,
1250					&new_xattr->rb_node, &xattrs->rb_root);
1251		else
1252			rb_erase(&old_xattr->rb_node, &xattrs->rb_root);
1253	} else {
1254		/* Fail if XATTR_REPLACE is requested but no xattr is found. */
1255		if (flags & XATTR_REPLACE) {
1256			err = -ENODATA;
1257			goto out_unlock;
1258		}
1259
1260		/*
1261		 * If XATTR_CREATE or no flags are specified together with a
1262		 * new value simply insert it.
1263		 */
1264		if (new_xattr) {
1265			rb_link_node(&new_xattr->rb_node, parent, rbp);
1266			rb_insert_color(&new_xattr->rb_node, &xattrs->rb_root);
1267		}
1268
1269		/*
1270		 * If XATTR_CREATE or no flags are specified and neither an
1271		 * old or new xattr exist then we don't need to do anything.
1272		 */
1273	}
1274
1275out_unlock:
1276	write_unlock(&xattrs->lock);
1277	if (!err)
1278		return old_xattr;
1279	simple_xattr_free(new_xattr);
1280	return ERR_PTR(err);
1281}
1282
1283static bool xattr_is_trusted(const char *name)
1284{
1285	return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
1286}
1287
1288/**
1289 * simple_xattr_list - list all xattr objects
1290 * @inode: inode from which to get the xattrs
1291 * @xattrs: the header of the xattr object
1292 * @buffer: the buffer to store all xattrs into
1293 * @size: the size of @buffer
1294 *
1295 * List all xattrs associated with @inode. If @buffer is NULL we returned
1296 * the required size of the buffer. If @buffer is provided we store the
1297 * xattrs value into it provided it is big enough.
1298 *
1299 * Note, the number of xattr names that can be listed with listxattr(2) is
1300 * limited to XATTR_LIST_MAX aka 65536 bytes. If a larger buffer is passed
1301 * then vfs_listxattr() caps it to XATTR_LIST_MAX and if more xattr names
1302 * are found it will return -E2BIG.
1303 *
1304 * Return: On success the required size or the size of the copied xattrs is
1305 * returned. On error a negative error code is returned.
1306 */
1307ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
1308			  char *buffer, size_t size)
1309{
1310	bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
1311	struct simple_xattr *xattr;
1312	struct rb_node *rbp;
1313	ssize_t remaining_size = size;
1314	int err = 0;
1315
1316	err = posix_acl_listxattr(inode, &buffer, &remaining_size);
1317	if (err)
1318		return err;
1319
1320	read_lock(&xattrs->lock);
1321	for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) {
1322		xattr = rb_entry(rbp, struct simple_xattr, rb_node);
1323
1324		/* skip "trusted." attributes for unprivileged callers */
1325		if (!trusted && xattr_is_trusted(xattr->name))
1326			continue;
1327
1328		err = xattr_list_one(&buffer, &remaining_size, xattr->name);
1329		if (err)
1330			break;
1331	}
1332	read_unlock(&xattrs->lock);
1333
1334	return err ? err : size - remaining_size;
1335}
1336
1337/**
1338 * rbtree_simple_xattr_less - compare two xattr rbtree nodes
1339 * @new_node: new node
1340 * @node: current node
1341 *
1342 * Compare the xattr attached to @new_node with the xattr attached to @node.
1343 * Note that this function technically tolerates duplicate entries.
1344 *
1345 * Return: True if insertion point in the rbtree is found.
1346 */
1347static bool rbtree_simple_xattr_less(struct rb_node *new_node,
1348				     const struct rb_node *node)
1349{
1350	return rbtree_simple_xattr_node_cmp(new_node, node) < 0;
1351}
1352
1353/**
1354 * simple_xattr_add - add xattr objects
1355 * @xattrs: the header of the xattr object
1356 * @new_xattr: the xattr object to add
1357 *
1358 * Add an xattr object to @xattrs. This assumes no replacement or removal
1359 * of matching xattrs is wanted. Should only be called during inode
1360 * initialization when a few distinct initial xattrs are supposed to be set.
1361 */
1362void simple_xattr_add(struct simple_xattrs *xattrs,
1363		      struct simple_xattr *new_xattr)
1364{
1365	write_lock(&xattrs->lock);
1366	rb_add(&new_xattr->rb_node, &xattrs->rb_root, rbtree_simple_xattr_less);
1367	write_unlock(&xattrs->lock);
1368}
1369
1370/**
1371 * simple_xattrs_init - initialize new xattr header
1372 * @xattrs: header to initialize
1373 *
1374 * Initialize relevant fields of a an xattr header.
1375 */
1376void simple_xattrs_init(struct simple_xattrs *xattrs)
1377{
1378	xattrs->rb_root = RB_ROOT;
1379	rwlock_init(&xattrs->lock);
1380}
1381
1382/**
1383 * simple_xattrs_free - free xattrs
1384 * @xattrs: xattr header whose xattrs to destroy
1385 * @freed_space: approximate number of bytes of memory freed from @xattrs
1386 *
1387 * Destroy all xattrs in @xattr. When this is called no one can hold a
1388 * reference to any of the xattrs anymore.
1389 */
1390void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space)
1391{
1392	struct rb_node *rbp;
1393
1394	if (freed_space)
1395		*freed_space = 0;
1396	rbp = rb_first(&xattrs->rb_root);
1397	while (rbp) {
1398		struct simple_xattr *xattr;
1399		struct rb_node *rbp_next;
1400
1401		rbp_next = rb_next(rbp);
1402		xattr = rb_entry(rbp, struct simple_xattr, rb_node);
1403		rb_erase(&xattr->rb_node, &xattrs->rb_root);
1404		if (freed_space)
1405			*freed_space += simple_xattr_space(xattr->name,
1406							   xattr->size);
1407		simple_xattr_free(xattr);
1408		rbp = rbp_next;
1409	}
1410}
1411