1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
23 *
24 * Extended attributes (xattr) on Solaris are implemented as files
25 * which exist in a hidden xattr directory.  These extended attributes
26 * can be accessed using the attropen() system call which opens
27 * the extended attribute.  It can then be manipulated just like
28 * a standard file descriptor.  This has a couple advantages such
29 * as practically no size limit on the file, and the extended
30 * attributes permissions may differ from those of the parent file.
31 * This interface is really quite clever, but it's also completely
32 * different than what is supported on Linux.  It also comes with a
33 * steep performance penalty when accessing small xattrs because they
34 * are not stored with the parent file.
35 *
36 * Under Linux extended attributes are manipulated by the system
37 * calls getxattr(2), setxattr(2), and listxattr(2).  They consider
38 * extended attributes to be name/value pairs where the name is a
39 * NULL terminated string.  The name must also include one of the
40 * following namespace prefixes:
41 *
42 *   user     - No restrictions and is available to user applications.
43 *   trusted  - Restricted to kernel and root (CAP_SYS_ADMIN) use.
44 *   system   - Used for access control lists (system.nfs4_acl, etc).
45 *   security - Used by SELinux to store a files security context.
46 *
47 * The value under Linux to limited to 65536 bytes of binary data.
48 * In practice, individual xattrs tend to be much smaller than this
49 * and are typically less than 100 bytes.  A good example of this
50 * are the security.selinux xattrs which are less than 100 bytes and
51 * exist for every file when xattr labeling is enabled.
52 *
53 * The Linux xattr implementation has been written to take advantage of
54 * this typical usage.  When the dataset property 'xattr=sa' is set,
55 * then xattrs will be preferentially stored as System Attributes (SA).
56 * This allows tiny xattrs (~100 bytes) to be stored with the dnode and
57 * up to 64k of xattrs to be stored in the spill block.  If additional
58 * xattr space is required, which is unlikely under Linux, they will
59 * be stored using the traditional directory approach.
60 *
61 * This optimization results in roughly a 3x performance improvement
62 * when accessing xattrs because it avoids the need to perform a seek
63 * for every xattr value.  When multiple xattrs are stored per-file
64 * the performance improvements are even greater because all of the
65 * xattrs stored in the spill block will be cached.
66 *
67 * However, by default SA based xattrs are disabled in the Linux port
68 * to maximize compatibility with other implementations.  If you do
69 * enable SA based xattrs then they will not be visible on platforms
70 * which do not support this feature.
71 *
72 * NOTE: One additional consequence of the xattr directory implementation
73 * is that when an extended attribute is manipulated an inode is created.
74 * This inode will exist in the Linux inode cache but there will be no
75 * associated entry in the dentry cache which references it.  This is
76 * safe but it may result in some confusion.  Enabling SA based xattrs
77 * largely avoids the issue except in the overflow case.
78 */
79
80#include <sys/zfs_znode.h>
81#include <sys/zfs_vfsops.h>
82#include <sys/zfs_vnops.h>
83#include <sys/zap.h>
84#include <sys/vfs.h>
85#include <sys/zpl.h>
86
87typedef struct xattr_filldir {
88	size_t size;
89	size_t offset;
90	char *buf;
91	struct dentry *dentry;
92} xattr_filldir_t;
93
94static const struct xattr_handler *zpl_xattr_handler(const char *);
95
96static int
97zpl_xattr_permission(xattr_filldir_t *xf, const char *name, int name_len)
98{
99	static const struct xattr_handler *handler;
100	struct dentry *d = xf->dentry;
101
102	handler = zpl_xattr_handler(name);
103	if (!handler)
104		return (0);
105
106	if (handler->list) {
107#if defined(HAVE_XATTR_LIST_SIMPLE)
108		if (!handler->list(d))
109			return (0);
110#elif defined(HAVE_XATTR_LIST_DENTRY)
111		if (!handler->list(d, NULL, 0, name, name_len, 0))
112			return (0);
113#elif defined(HAVE_XATTR_LIST_HANDLER)
114		if (!handler->list(handler, d, NULL, 0, name, name_len))
115			return (0);
116#endif
117	}
118
119	return (1);
120}
121
122/*
123 * Determine is a given xattr name should be visible and if so copy it
124 * in to the provided buffer (xf->buf).
125 */
126static int
127zpl_xattr_filldir(xattr_filldir_t *xf, const char *name, int name_len)
128{
129	/* Check permissions using the per-namespace list xattr handler. */
130	if (!zpl_xattr_permission(xf, name, name_len))
131		return (0);
132
133	/* When xf->buf is NULL only calculate the required size. */
134	if (xf->buf) {
135		if (xf->offset + name_len + 1 > xf->size)
136			return (-ERANGE);
137
138		memcpy(xf->buf + xf->offset, name, name_len);
139		xf->buf[xf->offset + name_len] = '\0';
140	}
141
142	xf->offset += (name_len + 1);
143
144	return (0);
145}
146
147/*
148 * Read as many directory entry names as will fit in to the provided buffer,
149 * or when no buffer is provided calculate the required buffer size.
150 */
151static int
152zpl_xattr_readdir(struct inode *dxip, xattr_filldir_t *xf)
153{
154	zap_cursor_t zc;
155	zap_attribute_t	zap;
156	int error;
157
158	zap_cursor_init(&zc, ITOZSB(dxip)->z_os, ITOZ(dxip)->z_id);
159
160	while ((error = -zap_cursor_retrieve(&zc, &zap)) == 0) {
161
162		if (zap.za_integer_length != 8 || zap.za_num_integers != 1) {
163			error = -ENXIO;
164			break;
165		}
166
167		error = zpl_xattr_filldir(xf, zap.za_name, strlen(zap.za_name));
168		if (error)
169			break;
170
171		zap_cursor_advance(&zc);
172	}
173
174	zap_cursor_fini(&zc);
175
176	if (error == -ENOENT)
177		error = 0;
178
179	return (error);
180}
181
182static ssize_t
183zpl_xattr_list_dir(xattr_filldir_t *xf, cred_t *cr)
184{
185	struct inode *ip = xf->dentry->d_inode;
186	struct inode *dxip = NULL;
187	znode_t *dxzp;
188	int error;
189
190	/* Lookup the xattr directory */
191	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, LOOKUP_XATTR,
192	    cr, NULL, NULL);
193	if (error) {
194		if (error == -ENOENT)
195			error = 0;
196
197		return (error);
198	}
199
200	dxip = ZTOI(dxzp);
201	error = zpl_xattr_readdir(dxip, xf);
202	iput(dxip);
203
204	return (error);
205}
206
207static ssize_t
208zpl_xattr_list_sa(xattr_filldir_t *xf)
209{
210	znode_t *zp = ITOZ(xf->dentry->d_inode);
211	nvpair_t *nvp = NULL;
212	int error = 0;
213
214	mutex_enter(&zp->z_lock);
215	if (zp->z_xattr_cached == NULL)
216		error = -zfs_sa_get_xattr(zp);
217	mutex_exit(&zp->z_lock);
218
219	if (error)
220		return (error);
221
222	ASSERT(zp->z_xattr_cached);
223
224	while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
225		ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
226
227		error = zpl_xattr_filldir(xf, nvpair_name(nvp),
228		    strlen(nvpair_name(nvp)));
229		if (error)
230			return (error);
231	}
232
233	return (0);
234}
235
236ssize_t
237zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
238{
239	znode_t *zp = ITOZ(dentry->d_inode);
240	zfsvfs_t *zfsvfs = ZTOZSB(zp);
241	xattr_filldir_t xf = { buffer_size, 0, buffer, dentry };
242	cred_t *cr = CRED();
243	fstrans_cookie_t cookie;
244	int error = 0;
245
246	crhold(cr);
247	cookie = spl_fstrans_mark();
248	ZPL_ENTER(zfsvfs);
249	ZPL_VERIFY_ZP(zp);
250	rw_enter(&zp->z_xattr_lock, RW_READER);
251
252	if (zfsvfs->z_use_sa && zp->z_is_sa) {
253		error = zpl_xattr_list_sa(&xf);
254		if (error)
255			goto out;
256	}
257
258	error = zpl_xattr_list_dir(&xf, cr);
259	if (error)
260		goto out;
261
262	error = xf.offset;
263out:
264
265	rw_exit(&zp->z_xattr_lock);
266	ZPL_EXIT(zfsvfs);
267	spl_fstrans_unmark(cookie);
268	crfree(cr);
269
270	return (error);
271}
272
273static int
274zpl_xattr_get_dir(struct inode *ip, const char *name, void *value,
275    size_t size, cred_t *cr)
276{
277	fstrans_cookie_t cookie;
278	struct inode *xip = NULL;
279	znode_t *dxzp = NULL;
280	znode_t *xzp = NULL;
281	int error;
282
283	/* Lookup the xattr directory */
284	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, LOOKUP_XATTR,
285	    cr, NULL, NULL);
286	if (error)
287		goto out;
288
289	/* Lookup a specific xattr name in the directory */
290	error = -zfs_lookup(dxzp, (char *)name, &xzp, 0, cr, NULL, NULL);
291	if (error)
292		goto out;
293
294	xip = ZTOI(xzp);
295	if (!size) {
296		error = i_size_read(xip);
297		goto out;
298	}
299
300	if (size < i_size_read(xip)) {
301		error = -ERANGE;
302		goto out;
303	}
304
305	struct iovec iov;
306	iov.iov_base = (void *)value;
307	iov.iov_len = size;
308
309	zfs_uio_t uio;
310	zfs_uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, size, 0);
311
312	cookie = spl_fstrans_mark();
313	error = -zfs_read(ITOZ(xip), &uio, 0, cr);
314	spl_fstrans_unmark(cookie);
315
316	if (error == 0)
317		error = size - zfs_uio_resid(&uio);
318out:
319	if (xzp)
320		zrele(xzp);
321
322	if (dxzp)
323		zrele(dxzp);
324
325	return (error);
326}
327
328static int
329zpl_xattr_get_sa(struct inode *ip, const char *name, void *value, size_t size)
330{
331	znode_t *zp = ITOZ(ip);
332	uchar_t *nv_value;
333	uint_t nv_size;
334	int error = 0;
335
336	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
337
338	mutex_enter(&zp->z_lock);
339	if (zp->z_xattr_cached == NULL)
340		error = -zfs_sa_get_xattr(zp);
341	mutex_exit(&zp->z_lock);
342
343	if (error)
344		return (error);
345
346	ASSERT(zp->z_xattr_cached);
347	error = -nvlist_lookup_byte_array(zp->z_xattr_cached, name,
348	    &nv_value, &nv_size);
349	if (error)
350		return (error);
351
352	if (size == 0 || value == NULL)
353		return (nv_size);
354
355	if (size < nv_size)
356		return (-ERANGE);
357
358	memcpy(value, nv_value, nv_size);
359
360	return (nv_size);
361}
362
363static int
364__zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size,
365    cred_t *cr)
366{
367	znode_t *zp = ITOZ(ip);
368	zfsvfs_t *zfsvfs = ZTOZSB(zp);
369	int error;
370
371	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
372
373	if (zfsvfs->z_use_sa && zp->z_is_sa) {
374		error = zpl_xattr_get_sa(ip, name, value, size);
375		if (error != -ENOENT)
376			goto out;
377	}
378
379	error = zpl_xattr_get_dir(ip, name, value, size, cr);
380out:
381	if (error == -ENOENT)
382		error = -ENODATA;
383
384	return (error);
385}
386
387#define	XATTR_NOENT	0x0
388#define	XATTR_IN_SA	0x1
389#define	XATTR_IN_DIR	0x2
390/* check where the xattr resides */
391static int
392__zpl_xattr_where(struct inode *ip, const char *name, int *where, cred_t *cr)
393{
394	znode_t *zp = ITOZ(ip);
395	zfsvfs_t *zfsvfs = ZTOZSB(zp);
396	int error;
397
398	ASSERT(where);
399	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
400
401	*where = XATTR_NOENT;
402	if (zfsvfs->z_use_sa && zp->z_is_sa) {
403		error = zpl_xattr_get_sa(ip, name, NULL, 0);
404		if (error >= 0)
405			*where |= XATTR_IN_SA;
406		else if (error != -ENOENT)
407			return (error);
408	}
409
410	error = zpl_xattr_get_dir(ip, name, NULL, 0, cr);
411	if (error >= 0)
412		*where |= XATTR_IN_DIR;
413	else if (error != -ENOENT)
414		return (error);
415
416	if (*where == (XATTR_IN_SA|XATTR_IN_DIR))
417		cmn_err(CE_WARN, "ZFS: inode %p has xattr \"%s\""
418		    " in both SA and dir", ip, name);
419	if (*where == XATTR_NOENT)
420		error = -ENODATA;
421	else
422		error = 0;
423	return (error);
424}
425
426static int
427zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size)
428{
429	znode_t *zp = ITOZ(ip);
430	zfsvfs_t *zfsvfs = ZTOZSB(zp);
431	cred_t *cr = CRED();
432	fstrans_cookie_t cookie;
433	int error;
434
435	crhold(cr);
436	cookie = spl_fstrans_mark();
437	ZPL_ENTER(zfsvfs);
438	ZPL_VERIFY_ZP(zp);
439	rw_enter(&zp->z_xattr_lock, RW_READER);
440	error = __zpl_xattr_get(ip, name, value, size, cr);
441	rw_exit(&zp->z_xattr_lock);
442	ZPL_EXIT(zfsvfs);
443	spl_fstrans_unmark(cookie);
444	crfree(cr);
445
446	return (error);
447}
448
449static int
450zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
451    size_t size, int flags, cred_t *cr)
452{
453	znode_t *dxzp = NULL;
454	znode_t *xzp = NULL;
455	vattr_t *vap = NULL;
456	int lookup_flags, error;
457	const int xattr_mode = S_IFREG | 0644;
458	loff_t pos = 0;
459
460	/*
461	 * Lookup the xattr directory.  When we're adding an entry pass
462	 * CREATE_XATTR_DIR to ensure the xattr directory is created.
463	 * When removing an entry this flag is not passed to avoid
464	 * unnecessarily creating a new xattr directory.
465	 */
466	lookup_flags = LOOKUP_XATTR;
467	if (value != NULL)
468		lookup_flags |= CREATE_XATTR_DIR;
469
470	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, lookup_flags,
471	    cr, NULL, NULL);
472	if (error)
473		goto out;
474
475	/* Lookup a specific xattr name in the directory */
476	error = -zfs_lookup(dxzp, (char *)name, &xzp, 0, cr, NULL, NULL);
477	if (error && (error != -ENOENT))
478		goto out;
479
480	error = 0;
481
482	/* Remove a specific name xattr when value is set to NULL. */
483	if (value == NULL) {
484		if (xzp)
485			error = -zfs_remove(dxzp, (char *)name, cr, 0);
486
487		goto out;
488	}
489
490	/* Lookup failed create a new xattr. */
491	if (xzp == NULL) {
492		vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
493		vap->va_mode = xattr_mode;
494		vap->va_mask = ATTR_MODE;
495		vap->va_uid = crgetfsuid(cr);
496		vap->va_gid = crgetfsgid(cr);
497
498		error = -zfs_create(dxzp, (char *)name, vap, 0, 0644, &xzp,
499		    cr, 0, NULL);
500		if (error)
501			goto out;
502	}
503
504	ASSERT(xzp != NULL);
505
506	error = -zfs_freesp(xzp, 0, 0, xattr_mode, TRUE);
507	if (error)
508		goto out;
509
510	error = -zfs_write_simple(xzp, value, size, pos, NULL);
511out:
512	if (error == 0) {
513		ip->i_ctime = current_time(ip);
514		zfs_mark_inode_dirty(ip);
515	}
516
517	if (vap)
518		kmem_free(vap, sizeof (vattr_t));
519
520	if (xzp)
521		zrele(xzp);
522
523	if (dxzp)
524		zrele(dxzp);
525
526	if (error == -ENOENT)
527		error = -ENODATA;
528
529	ASSERT3S(error, <=, 0);
530
531	return (error);
532}
533
534static int
535zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,
536    size_t size, int flags, cred_t *cr)
537{
538	znode_t *zp = ITOZ(ip);
539	nvlist_t *nvl;
540	size_t sa_size;
541	int error = 0;
542
543	mutex_enter(&zp->z_lock);
544	if (zp->z_xattr_cached == NULL)
545		error = -zfs_sa_get_xattr(zp);
546	mutex_exit(&zp->z_lock);
547
548	if (error)
549		return (error);
550
551	ASSERT(zp->z_xattr_cached);
552	nvl = zp->z_xattr_cached;
553
554	if (value == NULL) {
555		error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
556		if (error == -ENOENT)
557			error = zpl_xattr_set_dir(ip, name, NULL, 0, flags, cr);
558	} else {
559		/* Limited to 32k to keep nvpair memory allocations small */
560		if (size > DXATTR_MAX_ENTRY_SIZE)
561			return (-EFBIG);
562
563		/* Prevent the DXATTR SA from consuming the entire SA region */
564		error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
565		if (error)
566			return (error);
567
568		if (sa_size > DXATTR_MAX_SA_SIZE)
569			return (-EFBIG);
570
571		error = -nvlist_add_byte_array(nvl, name,
572		    (uchar_t *)value, size);
573	}
574
575	/*
576	 * Update the SA for additions, modifications, and removals. On
577	 * error drop the inconsistent cached version of the nvlist, it
578	 * will be reconstructed from the ARC when next accessed.
579	 */
580	if (error == 0)
581		error = -zfs_sa_set_xattr(zp);
582
583	if (error) {
584		nvlist_free(nvl);
585		zp->z_xattr_cached = NULL;
586	}
587
588	ASSERT3S(error, <=, 0);
589
590	return (error);
591}
592
593static int
594zpl_xattr_set(struct inode *ip, const char *name, const void *value,
595    size_t size, int flags)
596{
597	znode_t *zp = ITOZ(ip);
598	zfsvfs_t *zfsvfs = ZTOZSB(zp);
599	cred_t *cr = CRED();
600	fstrans_cookie_t cookie;
601	int where;
602	int error;
603
604	crhold(cr);
605	cookie = spl_fstrans_mark();
606	ZPL_ENTER(zfsvfs);
607	ZPL_VERIFY_ZP(zp);
608	rw_enter(&ITOZ(ip)->z_xattr_lock, RW_WRITER);
609
610	/*
611	 * Before setting the xattr check to see if it already exists.
612	 * This is done to ensure the following optional flags are honored.
613	 *
614	 *   XATTR_CREATE: fail if xattr already exists
615	 *   XATTR_REPLACE: fail if xattr does not exist
616	 *
617	 * We also want to know if it resides in sa or dir, so we can make
618	 * sure we don't end up with duplicate in both places.
619	 */
620	error = __zpl_xattr_where(ip, name, &where, cr);
621	if (error < 0) {
622		if (error != -ENODATA)
623			goto out;
624		if (flags & XATTR_REPLACE)
625			goto out;
626
627		/* The xattr to be removed already doesn't exist */
628		error = 0;
629		if (value == NULL)
630			goto out;
631	} else {
632		error = -EEXIST;
633		if (flags & XATTR_CREATE)
634			goto out;
635	}
636
637	/* Preferentially store the xattr as a SA for better performance */
638	if (zfsvfs->z_use_sa && zp->z_is_sa &&
639	    (zfsvfs->z_xattr_sa || (value == NULL && where & XATTR_IN_SA))) {
640		error = zpl_xattr_set_sa(ip, name, value, size, flags, cr);
641		if (error == 0) {
642			/*
643			 * Successfully put into SA, we need to clear the one
644			 * in dir.
645			 */
646			if (where & XATTR_IN_DIR)
647				zpl_xattr_set_dir(ip, name, NULL, 0, 0, cr);
648			goto out;
649		}
650	}
651
652	error = zpl_xattr_set_dir(ip, name, value, size, flags, cr);
653	/*
654	 * Successfully put into dir, we need to clear the one in SA.
655	 */
656	if (error == 0 && (where & XATTR_IN_SA))
657		zpl_xattr_set_sa(ip, name, NULL, 0, 0, cr);
658out:
659	rw_exit(&ITOZ(ip)->z_xattr_lock);
660	ZPL_EXIT(zfsvfs);
661	spl_fstrans_unmark(cookie);
662	crfree(cr);
663	ASSERT3S(error, <=, 0);
664
665	return (error);
666}
667
668/*
669 * Extended user attributes
670 *
671 * "Extended user attributes may be assigned to files and directories for
672 * storing arbitrary additional information such as the mime type,
673 * character set or encoding of a file.  The access permissions for user
674 * attributes are defined by the file permission bits: read permission
675 * is required to retrieve the attribute value, and writer permission is
676 * required to change it.
677 *
678 * The file permission bits of regular files and directories are
679 * interpreted differently from the file permission bits of special
680 * files and symbolic links.  For regular files and directories the file
681 * permission bits define access to the file's contents, while for
682 * device special files they define access to the device described by
683 * the special file.  The file permissions of symbolic links are not
684 * used in access checks.  These differences would allow users to
685 * consume filesystem resources in a way not controllable by disk quotas
686 * for group or world writable special files and directories.
687 *
688 * For this reason, extended user attributes are allowed only for
689 * regular files and directories, and access to extended user attributes
690 * is restricted to the owner and to users with appropriate capabilities
691 * for directories with the sticky bit set (see the chmod(1) manual page
692 * for an explanation of the sticky bit)." - xattr(7)
693 *
694 * ZFS allows extended user attributes to be disabled administratively
695 * by setting the 'xattr=off' property on the dataset.
696 */
697static int
698__zpl_xattr_user_list(struct inode *ip, char *list, size_t list_size,
699    const char *name, size_t name_len)
700{
701	return (ITOZSB(ip)->z_flags & ZSB_XATTR);
702}
703ZPL_XATTR_LIST_WRAPPER(zpl_xattr_user_list);
704
705static int
706__zpl_xattr_user_get(struct inode *ip, const char *name,
707    void *value, size_t size)
708{
709	char *xattr_name;
710	int error;
711	/* xattr_resolve_name will do this for us if this is defined */
712#ifndef HAVE_XATTR_HANDLER_NAME
713	if (strcmp(name, "") == 0)
714		return (-EINVAL);
715#endif
716	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
717		return (-EOPNOTSUPP);
718
719	xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
720	error = zpl_xattr_get(ip, xattr_name, value, size);
721	kmem_strfree(xattr_name);
722
723	return (error);
724}
725ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);
726
727static int
728__zpl_xattr_user_set(struct inode *ip, const char *name,
729    const void *value, size_t size, int flags)
730{
731	char *xattr_name;
732	int error;
733	/* xattr_resolve_name will do this for us if this is defined */
734#ifndef HAVE_XATTR_HANDLER_NAME
735	if (strcmp(name, "") == 0)
736		return (-EINVAL);
737#endif
738	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
739		return (-EOPNOTSUPP);
740
741	xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
742	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
743	kmem_strfree(xattr_name);
744
745	return (error);
746}
747ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set);
748
749xattr_handler_t zpl_xattr_user_handler =
750{
751	.prefix	= XATTR_USER_PREFIX,
752	.list	= zpl_xattr_user_list,
753	.get	= zpl_xattr_user_get,
754	.set	= zpl_xattr_user_set,
755};
756
757/*
758 * Trusted extended attributes
759 *
760 * "Trusted extended attributes are visible and accessible only to
761 * processes that have the CAP_SYS_ADMIN capability.  Attributes in this
762 * class are used to implement mechanisms in user space (i.e., outside
763 * the kernel) which keep information in extended attributes to which
764 * ordinary processes should not have access." - xattr(7)
765 */
766static int
767__zpl_xattr_trusted_list(struct inode *ip, char *list, size_t list_size,
768    const char *name, size_t name_len)
769{
770	return (capable(CAP_SYS_ADMIN));
771}
772ZPL_XATTR_LIST_WRAPPER(zpl_xattr_trusted_list);
773
774static int
775__zpl_xattr_trusted_get(struct inode *ip, const char *name,
776    void *value, size_t size)
777{
778	char *xattr_name;
779	int error;
780
781	if (!capable(CAP_SYS_ADMIN))
782		return (-EACCES);
783	/* xattr_resolve_name will do this for us if this is defined */
784#ifndef HAVE_XATTR_HANDLER_NAME
785	if (strcmp(name, "") == 0)
786		return (-EINVAL);
787#endif
788	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
789	error = zpl_xattr_get(ip, xattr_name, value, size);
790	kmem_strfree(xattr_name);
791
792	return (error);
793}
794ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
795
796static int
797__zpl_xattr_trusted_set(struct inode *ip, const char *name,
798    const void *value, size_t size, int flags)
799{
800	char *xattr_name;
801	int error;
802
803	if (!capable(CAP_SYS_ADMIN))
804		return (-EACCES);
805	/* xattr_resolve_name will do this for us if this is defined */
806#ifndef HAVE_XATTR_HANDLER_NAME
807	if (strcmp(name, "") == 0)
808		return (-EINVAL);
809#endif
810	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
811	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
812	kmem_strfree(xattr_name);
813
814	return (error);
815}
816ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set);
817
818xattr_handler_t zpl_xattr_trusted_handler =
819{
820	.prefix	= XATTR_TRUSTED_PREFIX,
821	.list	= zpl_xattr_trusted_list,
822	.get	= zpl_xattr_trusted_get,
823	.set	= zpl_xattr_trusted_set,
824};
825
826/*
827 * Extended security attributes
828 *
829 * "The security attribute namespace is used by kernel security modules,
830 * such as Security Enhanced Linux, and also to implement file
831 * capabilities (see capabilities(7)).  Read and write access
832 * permissions to security attributes depend on the policy implemented
833 * for each security attribute by the security module.  When no security
834 * module is loaded, all processes have read access to extended security
835 * attributes, and write access is limited to processes that have the
836 * CAP_SYS_ADMIN capability." - xattr(7)
837 */
838static int
839__zpl_xattr_security_list(struct inode *ip, char *list, size_t list_size,
840    const char *name, size_t name_len)
841{
842	return (1);
843}
844ZPL_XATTR_LIST_WRAPPER(zpl_xattr_security_list);
845
846static int
847__zpl_xattr_security_get(struct inode *ip, const char *name,
848    void *value, size_t size)
849{
850	char *xattr_name;
851	int error;
852	/* xattr_resolve_name will do this for us if this is defined */
853#ifndef HAVE_XATTR_HANDLER_NAME
854	if (strcmp(name, "") == 0)
855		return (-EINVAL);
856#endif
857	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
858	error = zpl_xattr_get(ip, xattr_name, value, size);
859	kmem_strfree(xattr_name);
860
861	return (error);
862}
863ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
864
865static int
866__zpl_xattr_security_set(struct inode *ip, const char *name,
867    const void *value, size_t size, int flags)
868{
869	char *xattr_name;
870	int error;
871	/* xattr_resolve_name will do this for us if this is defined */
872#ifndef HAVE_XATTR_HANDLER_NAME
873	if (strcmp(name, "") == 0)
874		return (-EINVAL);
875#endif
876	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
877	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
878	kmem_strfree(xattr_name);
879
880	return (error);
881}
882ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set);
883
884static int
885zpl_xattr_security_init_impl(struct inode *ip, const struct xattr *xattrs,
886    void *fs_info)
887{
888	const struct xattr *xattr;
889	int error = 0;
890
891	for (xattr = xattrs; xattr->name != NULL; xattr++) {
892		error = __zpl_xattr_security_set(ip,
893		    xattr->name, xattr->value, xattr->value_len, 0);
894
895		if (error < 0)
896			break;
897	}
898
899	return (error);
900}
901
902int
903zpl_xattr_security_init(struct inode *ip, struct inode *dip,
904    const struct qstr *qstr)
905{
906	return security_inode_init_security(ip, dip, qstr,
907	    &zpl_xattr_security_init_impl, NULL);
908}
909
910/*
911 * Security xattr namespace handlers.
912 */
913xattr_handler_t zpl_xattr_security_handler = {
914	.prefix	= XATTR_SECURITY_PREFIX,
915	.list	= zpl_xattr_security_list,
916	.get	= zpl_xattr_security_get,
917	.set	= zpl_xattr_security_set,
918};
919
920/*
921 * Extended system attributes
922 *
923 * "Extended system attributes are used by the kernel to store system
924 * objects such as Access Control Lists.  Read and write access permissions
925 * to system attributes depend on the policy implemented for each system
926 * attribute implemented by filesystems in the kernel." - xattr(7)
927 */
928#ifdef CONFIG_FS_POSIX_ACL
929static int
930zpl_set_acl_impl(struct inode *ip, struct posix_acl *acl, int type)
931{
932	char *name, *value = NULL;
933	int error = 0;
934	size_t size = 0;
935
936	if (S_ISLNK(ip->i_mode))
937		return (-EOPNOTSUPP);
938
939	switch (type) {
940	case ACL_TYPE_ACCESS:
941		name = XATTR_NAME_POSIX_ACL_ACCESS;
942		if (acl) {
943			umode_t mode = ip->i_mode;
944			error = posix_acl_equiv_mode(acl, &mode);
945			if (error < 0) {
946				return (error);
947			} else {
948				/*
949				 * The mode bits will have been set by
950				 * ->zfs_setattr()->zfs_acl_chmod_setattr()
951				 * using the ZFS ACL conversion.  If they
952				 * differ from the Posix ACL conversion dirty
953				 * the inode to write the Posix mode bits.
954				 */
955				if (ip->i_mode != mode) {
956					ip->i_mode = mode;
957					ip->i_ctime = current_time(ip);
958					zfs_mark_inode_dirty(ip);
959				}
960
961				if (error == 0)
962					acl = NULL;
963			}
964		}
965		break;
966
967	case ACL_TYPE_DEFAULT:
968		name = XATTR_NAME_POSIX_ACL_DEFAULT;
969		if (!S_ISDIR(ip->i_mode))
970			return (acl ? -EACCES : 0);
971		break;
972
973	default:
974		return (-EINVAL);
975	}
976
977	if (acl) {
978		size = posix_acl_xattr_size(acl->a_count);
979		value = kmem_alloc(size, KM_SLEEP);
980
981		error = zpl_acl_to_xattr(acl, value, size);
982		if (error < 0) {
983			kmem_free(value, size);
984			return (error);
985		}
986	}
987
988	error = zpl_xattr_set(ip, name, value, size, 0);
989	if (value)
990		kmem_free(value, size);
991
992	if (!error) {
993		if (acl)
994			zpl_set_cached_acl(ip, type, acl);
995		else
996			zpl_forget_cached_acl(ip, type);
997	}
998
999	return (error);
1000}
1001
1002#ifdef HAVE_SET_ACL
1003int
1004#ifdef HAVE_SET_ACL_USERNS
1005zpl_set_acl(struct user_namespace *userns, struct inode *ip,
1006    struct posix_acl *acl, int type)
1007#else
1008zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type)
1009#endif /* HAVE_SET_ACL_USERNS */
1010{
1011	return (zpl_set_acl_impl(ip, acl, type));
1012}
1013#endif /* HAVE_SET_ACL */
1014
1015struct posix_acl *
1016zpl_get_acl(struct inode *ip, int type)
1017{
1018	struct posix_acl *acl;
1019	void *value = NULL;
1020	char *name;
1021	int size;
1022
1023	/*
1024	 * As of Linux 3.14, the kernel get_acl will check this for us.
1025	 * Also as of Linux 4.7, comparing against ACL_NOT_CACHED is wrong
1026	 * as the kernel get_acl will set it to temporary sentinel value.
1027	 */
1028#ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE
1029	acl = get_cached_acl(ip, type);
1030	if (acl != ACL_NOT_CACHED)
1031		return (acl);
1032#endif
1033
1034	switch (type) {
1035	case ACL_TYPE_ACCESS:
1036		name = XATTR_NAME_POSIX_ACL_ACCESS;
1037		break;
1038	case ACL_TYPE_DEFAULT:
1039		name = XATTR_NAME_POSIX_ACL_DEFAULT;
1040		break;
1041	default:
1042		return (ERR_PTR(-EINVAL));
1043	}
1044
1045	size = zpl_xattr_get(ip, name, NULL, 0);
1046	if (size > 0) {
1047		value = kmem_alloc(size, KM_SLEEP);
1048		size = zpl_xattr_get(ip, name, value, size);
1049	}
1050
1051	if (size > 0) {
1052		acl = zpl_acl_from_xattr(value, size);
1053	} else if (size == -ENODATA || size == -ENOSYS) {
1054		acl = NULL;
1055	} else {
1056		acl = ERR_PTR(-EIO);
1057	}
1058
1059	if (size > 0)
1060		kmem_free(value, size);
1061
1062	/* As of Linux 4.7, the kernel get_acl will set this for us */
1063#ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE
1064	if (!IS_ERR(acl))
1065		zpl_set_cached_acl(ip, type, acl);
1066#endif
1067
1068	return (acl);
1069}
1070
1071int
1072zpl_init_acl(struct inode *ip, struct inode *dir)
1073{
1074	struct posix_acl *acl = NULL;
1075	int error = 0;
1076
1077	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1078		return (0);
1079
1080	if (!S_ISLNK(ip->i_mode)) {
1081		acl = zpl_get_acl(dir, ACL_TYPE_DEFAULT);
1082		if (IS_ERR(acl))
1083			return (PTR_ERR(acl));
1084		if (!acl) {
1085			ip->i_mode &= ~current_umask();
1086			ip->i_ctime = current_time(ip);
1087			zfs_mark_inode_dirty(ip);
1088			return (0);
1089		}
1090	}
1091
1092	if (acl) {
1093		umode_t mode;
1094
1095		if (S_ISDIR(ip->i_mode)) {
1096			error = zpl_set_acl_impl(ip, acl, ACL_TYPE_DEFAULT);
1097			if (error)
1098				goto out;
1099		}
1100
1101		mode = ip->i_mode;
1102		error = __posix_acl_create(&acl, GFP_KERNEL, &mode);
1103		if (error >= 0) {
1104			ip->i_mode = mode;
1105			zfs_mark_inode_dirty(ip);
1106			if (error > 0) {
1107				error = zpl_set_acl_impl(ip, acl,
1108				    ACL_TYPE_ACCESS);
1109			}
1110		}
1111	}
1112out:
1113	zpl_posix_acl_release(acl);
1114
1115	return (error);
1116}
1117
1118int
1119zpl_chmod_acl(struct inode *ip)
1120{
1121	struct posix_acl *acl;
1122	int error;
1123
1124	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1125		return (0);
1126
1127	if (S_ISLNK(ip->i_mode))
1128		return (-EOPNOTSUPP);
1129
1130	acl = zpl_get_acl(ip, ACL_TYPE_ACCESS);
1131	if (IS_ERR(acl) || !acl)
1132		return (PTR_ERR(acl));
1133
1134	error = __posix_acl_chmod(&acl, GFP_KERNEL, ip->i_mode);
1135	if (!error)
1136		error = zpl_set_acl_impl(ip, acl, ACL_TYPE_ACCESS);
1137
1138	zpl_posix_acl_release(acl);
1139
1140	return (error);
1141}
1142
1143static int
1144__zpl_xattr_acl_list_access(struct inode *ip, char *list, size_t list_size,
1145    const char *name, size_t name_len)
1146{
1147	char *xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
1148	size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_ACCESS);
1149
1150	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1151		return (0);
1152
1153	if (list && xattr_size <= list_size)
1154		memcpy(list, xattr_name, xattr_size);
1155
1156	return (xattr_size);
1157}
1158ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_access);
1159
1160static int
1161__zpl_xattr_acl_list_default(struct inode *ip, char *list, size_t list_size,
1162    const char *name, size_t name_len)
1163{
1164	char *xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
1165	size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_DEFAULT);
1166
1167	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1168		return (0);
1169
1170	if (list && xattr_size <= list_size)
1171		memcpy(list, xattr_name, xattr_size);
1172
1173	return (xattr_size);
1174}
1175ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_default);
1176
1177static int
1178__zpl_xattr_acl_get_access(struct inode *ip, const char *name,
1179    void *buffer, size_t size)
1180{
1181	struct posix_acl *acl;
1182	int type = ACL_TYPE_ACCESS;
1183	int error;
1184	/* xattr_resolve_name will do this for us if this is defined */
1185#ifndef HAVE_XATTR_HANDLER_NAME
1186	if (strcmp(name, "") != 0)
1187		return (-EINVAL);
1188#endif
1189	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1190		return (-EOPNOTSUPP);
1191
1192	acl = zpl_get_acl(ip, type);
1193	if (IS_ERR(acl))
1194		return (PTR_ERR(acl));
1195	if (acl == NULL)
1196		return (-ENODATA);
1197
1198	error = zpl_acl_to_xattr(acl, buffer, size);
1199	zpl_posix_acl_release(acl);
1200
1201	return (error);
1202}
1203ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_access);
1204
1205static int
1206__zpl_xattr_acl_get_default(struct inode *ip, const char *name,
1207    void *buffer, size_t size)
1208{
1209	struct posix_acl *acl;
1210	int type = ACL_TYPE_DEFAULT;
1211	int error;
1212	/* xattr_resolve_name will do this for us if this is defined */
1213#ifndef HAVE_XATTR_HANDLER_NAME
1214	if (strcmp(name, "") != 0)
1215		return (-EINVAL);
1216#endif
1217	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1218		return (-EOPNOTSUPP);
1219
1220	acl = zpl_get_acl(ip, type);
1221	if (IS_ERR(acl))
1222		return (PTR_ERR(acl));
1223	if (acl == NULL)
1224		return (-ENODATA);
1225
1226	error = zpl_acl_to_xattr(acl, buffer, size);
1227	zpl_posix_acl_release(acl);
1228
1229	return (error);
1230}
1231ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_default);
1232
1233static int
1234__zpl_xattr_acl_set_access(struct inode *ip, const char *name,
1235    const void *value, size_t size, int flags)
1236{
1237	struct posix_acl *acl;
1238	int type = ACL_TYPE_ACCESS;
1239	int error = 0;
1240	/* xattr_resolve_name will do this for us if this is defined */
1241#ifndef HAVE_XATTR_HANDLER_NAME
1242	if (strcmp(name, "") != 0)
1243		return (-EINVAL);
1244#endif
1245	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1246		return (-EOPNOTSUPP);
1247
1248	if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
1249		return (-EPERM);
1250
1251	if (value) {
1252		acl = zpl_acl_from_xattr(value, size);
1253		if (IS_ERR(acl))
1254			return (PTR_ERR(acl));
1255		else if (acl) {
1256			error = zpl_posix_acl_valid(ip, acl);
1257			if (error) {
1258				zpl_posix_acl_release(acl);
1259				return (error);
1260			}
1261		}
1262	} else {
1263		acl = NULL;
1264	}
1265	error = zpl_set_acl_impl(ip, acl, type);
1266	zpl_posix_acl_release(acl);
1267
1268	return (error);
1269}
1270ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_access);
1271
1272static int
1273__zpl_xattr_acl_set_default(struct inode *ip, const char *name,
1274    const void *value, size_t size, int flags)
1275{
1276	struct posix_acl *acl;
1277	int type = ACL_TYPE_DEFAULT;
1278	int error = 0;
1279	/* xattr_resolve_name will do this for us if this is defined */
1280#ifndef HAVE_XATTR_HANDLER_NAME
1281	if (strcmp(name, "") != 0)
1282		return (-EINVAL);
1283#endif
1284	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1285		return (-EOPNOTSUPP);
1286
1287	if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
1288		return (-EPERM);
1289
1290	if (value) {
1291		acl = zpl_acl_from_xattr(value, size);
1292		if (IS_ERR(acl))
1293			return (PTR_ERR(acl));
1294		else if (acl) {
1295			error = zpl_posix_acl_valid(ip, acl);
1296			if (error) {
1297				zpl_posix_acl_release(acl);
1298				return (error);
1299			}
1300		}
1301	} else {
1302		acl = NULL;
1303	}
1304
1305	error = zpl_set_acl_impl(ip, acl, type);
1306	zpl_posix_acl_release(acl);
1307
1308	return (error);
1309}
1310ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_default);
1311
1312/*
1313 * ACL access xattr namespace handlers.
1314 *
1315 * Use .name instead of .prefix when available. xattr_resolve_name will match
1316 * whole name and reject anything that has .name only as prefix.
1317 */
1318xattr_handler_t zpl_xattr_acl_access_handler =
1319{
1320#ifdef HAVE_XATTR_HANDLER_NAME
1321	.name	= XATTR_NAME_POSIX_ACL_ACCESS,
1322#else
1323	.prefix	= XATTR_NAME_POSIX_ACL_ACCESS,
1324#endif
1325	.list	= zpl_xattr_acl_list_access,
1326	.get	= zpl_xattr_acl_get_access,
1327	.set	= zpl_xattr_acl_set_access,
1328#if defined(HAVE_XATTR_LIST_SIMPLE) || \
1329    defined(HAVE_XATTR_LIST_DENTRY) || \
1330    defined(HAVE_XATTR_LIST_HANDLER)
1331	.flags	= ACL_TYPE_ACCESS,
1332#endif
1333};
1334
1335/*
1336 * ACL default xattr namespace handlers.
1337 *
1338 * Use .name instead of .prefix when available. xattr_resolve_name will match
1339 * whole name and reject anything that has .name only as prefix.
1340 */
1341xattr_handler_t zpl_xattr_acl_default_handler =
1342{
1343#ifdef HAVE_XATTR_HANDLER_NAME
1344	.name	= XATTR_NAME_POSIX_ACL_DEFAULT,
1345#else
1346	.prefix	= XATTR_NAME_POSIX_ACL_DEFAULT,
1347#endif
1348	.list	= zpl_xattr_acl_list_default,
1349	.get	= zpl_xattr_acl_get_default,
1350	.set	= zpl_xattr_acl_set_default,
1351#if defined(HAVE_XATTR_LIST_SIMPLE) || \
1352    defined(HAVE_XATTR_LIST_DENTRY) || \
1353    defined(HAVE_XATTR_LIST_HANDLER)
1354	.flags	= ACL_TYPE_DEFAULT,
1355#endif
1356};
1357
1358#endif /* CONFIG_FS_POSIX_ACL */
1359
1360xattr_handler_t *zpl_xattr_handlers[] = {
1361	&zpl_xattr_security_handler,
1362	&zpl_xattr_trusted_handler,
1363	&zpl_xattr_user_handler,
1364#ifdef CONFIG_FS_POSIX_ACL
1365	&zpl_xattr_acl_access_handler,
1366	&zpl_xattr_acl_default_handler,
1367#endif /* CONFIG_FS_POSIX_ACL */
1368	NULL
1369};
1370
1371static const struct xattr_handler *
1372zpl_xattr_handler(const char *name)
1373{
1374	if (strncmp(name, XATTR_USER_PREFIX,
1375	    XATTR_USER_PREFIX_LEN) == 0)
1376		return (&zpl_xattr_user_handler);
1377
1378	if (strncmp(name, XATTR_TRUSTED_PREFIX,
1379	    XATTR_TRUSTED_PREFIX_LEN) == 0)
1380		return (&zpl_xattr_trusted_handler);
1381
1382	if (strncmp(name, XATTR_SECURITY_PREFIX,
1383	    XATTR_SECURITY_PREFIX_LEN) == 0)
1384		return (&zpl_xattr_security_handler);
1385
1386#ifdef CONFIG_FS_POSIX_ACL
1387	if (strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
1388	    sizeof (XATTR_NAME_POSIX_ACL_ACCESS)) == 0)
1389		return (&zpl_xattr_acl_access_handler);
1390
1391	if (strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
1392	    sizeof (XATTR_NAME_POSIX_ACL_DEFAULT)) == 0)
1393		return (&zpl_xattr_acl_default_handler);
1394#endif /* CONFIG_FS_POSIX_ACL */
1395
1396	return (NULL);
1397}
1398
1399#if !defined(HAVE_POSIX_ACL_RELEASE) || defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY)
1400struct acl_rel_struct {
1401	struct acl_rel_struct *next;
1402	struct posix_acl *acl;
1403	clock_t time;
1404};
1405
1406#define	ACL_REL_GRACE	(60*HZ)
1407#define	ACL_REL_WINDOW	(1*HZ)
1408#define	ACL_REL_SCHED	(ACL_REL_GRACE+ACL_REL_WINDOW)
1409
1410/*
1411 * Lockless multi-producer single-consumer fifo list.
1412 * Nodes are added to tail and removed from head. Tail pointer is our
1413 * synchronization point. It always points to the next pointer of the last
1414 * node, or head if list is empty.
1415 */
1416static struct acl_rel_struct *acl_rel_head = NULL;
1417static struct acl_rel_struct **acl_rel_tail = &acl_rel_head;
1418
1419static void
1420zpl_posix_acl_free(void *arg)
1421{
1422	struct acl_rel_struct *freelist = NULL;
1423	struct acl_rel_struct *a;
1424	clock_t new_time;
1425	boolean_t refire = B_FALSE;
1426
1427	ASSERT3P(acl_rel_head, !=, NULL);
1428	while (acl_rel_head) {
1429		a = acl_rel_head;
1430		if (ddi_get_lbolt() - a->time >= ACL_REL_GRACE) {
1431			/*
1432			 * If a is the last node we need to reset tail, but we
1433			 * need to use cmpxchg to make sure it is still the
1434			 * last node.
1435			 */
1436			if (acl_rel_tail == &a->next) {
1437				acl_rel_head = NULL;
1438				if (cmpxchg(&acl_rel_tail, &a->next,
1439				    &acl_rel_head) == &a->next) {
1440					ASSERT3P(a->next, ==, NULL);
1441					a->next = freelist;
1442					freelist = a;
1443					break;
1444				}
1445			}
1446			/*
1447			 * a is not last node, make sure next pointer is set
1448			 * by the adder and advance the head.
1449			 */
1450			while (READ_ONCE(a->next) == NULL)
1451				cpu_relax();
1452			acl_rel_head = a->next;
1453			a->next = freelist;
1454			freelist = a;
1455		} else {
1456			/*
1457			 * a is still in grace period. We are responsible to
1458			 * reschedule the free task, since adder will only do
1459			 * so if list is empty.
1460			 */
1461			new_time = a->time + ACL_REL_SCHED;
1462			refire = B_TRUE;
1463			break;
1464		}
1465	}
1466
1467	if (refire)
1468		taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
1469		    NULL, TQ_SLEEP, new_time);
1470
1471	while (freelist) {
1472		a = freelist;
1473		freelist = a->next;
1474		kfree(a->acl);
1475		kmem_free(a, sizeof (struct acl_rel_struct));
1476	}
1477}
1478
1479void
1480zpl_posix_acl_release_impl(struct posix_acl *acl)
1481{
1482	struct acl_rel_struct *a, **prev;
1483
1484	a = kmem_alloc(sizeof (struct acl_rel_struct), KM_SLEEP);
1485	a->next = NULL;
1486	a->acl = acl;
1487	a->time = ddi_get_lbolt();
1488	/* atomically points tail to us and get the previous tail */
1489	prev = xchg(&acl_rel_tail, &a->next);
1490	ASSERT3P(*prev, ==, NULL);
1491	*prev = a;
1492	/* if it was empty before, schedule the free task */
1493	if (prev == &acl_rel_head)
1494		taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
1495		    NULL, TQ_SLEEP, ddi_get_lbolt() + ACL_REL_SCHED);
1496}
1497#endif
1498