1/*
2 *   Copyright (C) International Business Machines  Corp., 2000-2004
3 *   Copyright (C) Christoph Hellwig, 2002
4 *
5 *   This program is free software;  you can redistribute it and/or modify
6 *   it under the terms of the GNU General Public License as published by
7 *   the Free Software Foundation; either version 2 of the License, or
8 *   (at your option) any later version.
9 *
10 *   This program is distributed in the hope that it will be useful,
11 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
13 *   the GNU General Public License for more details.
14 *
15 *   You should have received a copy of the GNU General Public License
16 *   along with this program;  if not, write to the Free Software
17 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/capability.h>
21#include <linux/fs.h>
22#include <linux/xattr.h>
23#include <linux/posix_acl_xattr.h>
24#include <linux/quotaops.h>
25#include <linux/security.h>
26#include "jfs_incore.h"
27#include "jfs_superblock.h"
28#include "jfs_dmap.h"
29#include "jfs_debug.h"
30#include "jfs_dinode.h"
31#include "jfs_extent.h"
32#include "jfs_metapage.h"
33#include "jfs_xattr.h"
34#include "jfs_acl.h"
35
36/*
37 *	jfs_xattr.c: extended attribute service
38 *
39 * Overall design --
40 *
41 * Format:
42 *
43 *   Extended attribute lists (jfs_ea_list) consist of an overall size (32 bit
44 *   value) and a variable (0 or more) number of extended attribute
45 *   entries.  Each extended attribute entry (jfs_ea) is a <name,value> double
46 *   where <name> is constructed from a null-terminated ascii string
47 *   (1 ... 255 bytes in the name) and <value> is arbitrary 8 bit data
48 *   (1 ... 65535 bytes).  The in-memory format is
49 *
50 *   0       1        2        4                4 + namelen + 1
51 *   +-------+--------+--------+----------------+-------------------+
52 *   | Flags | Name   | Value  | Name String \0 | Data . . . .      |
53 *   |       | Length | Length |                |                   |
54 *   +-------+--------+--------+----------------+-------------------+
55 *
56 *   A jfs_ea_list then is structured as
57 *
58 *   0            4                   4 + EA_SIZE(ea1)
59 *   +------------+-------------------+--------------------+-----
60 *   | Overall EA | First FEA Element | Second FEA Element | .....
61 *   | List Size  |                   |                    |
62 *   +------------+-------------------+--------------------+-----
63 *
64 *   On-disk:
65 *
66 *     FEALISTs are stored on disk using blocks allocated by dbAlloc() and
67 *     written directly. An EA list may be in-lined in the inode if there is
68 *     sufficient room available.
69 */
70
71struct ea_buffer {
72	int flag;		/* Indicates what storage xattr points to */
73	int max_size;		/* largest xattr that fits in current buffer */
74	dxd_t new_ea;		/* dxd to replace ea when modifying xattr */
75	struct metapage *mp;	/* metapage containing ea list */
76	struct jfs_ea_list *xattr;	/* buffer containing ea list */
77};
78
79/*
80 * ea_buffer.flag values
81 */
82#define EA_INLINE	0x0001
83#define EA_EXTENT	0x0002
84#define EA_NEW		0x0004
85#define EA_MALLOC	0x0008
86
87
88/*
89 * These three routines are used to recognize on-disk extended attributes
90 * that are in a recognized namespace.  If the attribute is not recognized,
91 * "os2." is prepended to the name
92 */
93static inline int is_os2_xattr(struct jfs_ea *ea)
94{
95	/*
96	 * Check for "system."
97	 */
98	if ((ea->namelen >= XATTR_SYSTEM_PREFIX_LEN) &&
99	    !strncmp(ea->name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
100		return false;
101	/*
102	 * Check for "user."
103	 */
104	if ((ea->namelen >= XATTR_USER_PREFIX_LEN) &&
105	    !strncmp(ea->name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
106		return false;
107	/*
108	 * Check for "security."
109	 */
110	if ((ea->namelen >= XATTR_SECURITY_PREFIX_LEN) &&
111	    !strncmp(ea->name, XATTR_SECURITY_PREFIX,
112		     XATTR_SECURITY_PREFIX_LEN))
113		return false;
114	/*
115	 * Check for "trusted."
116	 */
117	if ((ea->namelen >= XATTR_TRUSTED_PREFIX_LEN) &&
118	    !strncmp(ea->name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
119		return false;
120	/*
121	 * Add any other valid namespace prefixes here
122	 */
123
124	/*
125	 * We assume it's OS/2's flat namespace
126	 */
127	return true;
128}
129
130static inline int name_size(struct jfs_ea *ea)
131{
132	if (is_os2_xattr(ea))
133		return ea->namelen + XATTR_OS2_PREFIX_LEN;
134	else
135		return ea->namelen;
136}
137
138static inline int copy_name(char *buffer, struct jfs_ea *ea)
139{
140	int len = ea->namelen;
141
142	if (is_os2_xattr(ea)) {
143		memcpy(buffer, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN);
144		buffer += XATTR_OS2_PREFIX_LEN;
145		len += XATTR_OS2_PREFIX_LEN;
146	}
147	memcpy(buffer, ea->name, ea->namelen);
148	buffer[ea->namelen] = 0;
149
150	return len;
151}
152
153/* Forward references */
154static void ea_release(struct inode *inode, struct ea_buffer *ea_buf);
155
156/*
157 * NAME: ea_write_inline
158 *
159 * FUNCTION: Attempt to write an EA inline if area is available
160 *
161 * PRE CONDITIONS:
162 *	Already verified that the specified EA is small enough to fit inline
163 *
164 * PARAMETERS:
165 *	ip	- Inode pointer
166 *	ealist	- EA list pointer
167 *	size	- size of ealist in bytes
168 *	ea	- dxd_t structure to be filled in with necessary EA information
169 *		  if we successfully copy the EA inline
170 *
171 * NOTES:
172 *	Checks if the inode's inline area is available.  If so, copies EA inline
173 *	and sets <ea> fields appropriately.  Otherwise, returns failure, EA will
174 *	have to be put into an extent.
175 *
176 * RETURNS: 0 for successful copy to inline area; -1 if area not available
177 */
178static int ea_write_inline(struct inode *ip, struct jfs_ea_list *ealist,
179			   int size, dxd_t * ea)
180{
181	struct jfs_inode_info *ji = JFS_IP(ip);
182
183	/*
184	 * Make sure we have an EA -- the NULL EA list is valid, but you
185	 * can't copy it!
186	 */
187	if (ealist && size > sizeof (struct jfs_ea_list)) {
188		assert(size <= sizeof (ji->i_inline_ea));
189
190		/*
191		 * See if the space is available or if it is already being
192		 * used for an inline EA.
193		 */
194		if (!(ji->mode2 & INLINEEA) && !(ji->ea.flag & DXD_INLINE))
195			return -EPERM;
196
197		DXDsize(ea, size);
198		DXDlength(ea, 0);
199		DXDaddress(ea, 0);
200		memcpy(ji->i_inline_ea, ealist, size);
201		ea->flag = DXD_INLINE;
202		ji->mode2 &= ~INLINEEA;
203	} else {
204		ea->flag = 0;
205		DXDsize(ea, 0);
206		DXDlength(ea, 0);
207		DXDaddress(ea, 0);
208
209		/* Free up INLINE area */
210		if (ji->ea.flag & DXD_INLINE)
211			ji->mode2 |= INLINEEA;
212	}
213
214	return 0;
215}
216
217/*
218 * NAME: ea_write
219 *
220 * FUNCTION: Write an EA for an inode
221 *
222 * PRE CONDITIONS: EA has been verified
223 *
224 * PARAMETERS:
225 *	ip	- Inode pointer
226 *	ealist	- EA list pointer
227 *	size	- size of ealist in bytes
228 *	ea	- dxd_t structure to be filled in appropriately with where the
229 *		  EA was copied
230 *
231 * NOTES: Will write EA inline if able to, otherwise allocates blocks for an
232 *	extent and synchronously writes it to those blocks.
233 *
234 * RETURNS: 0 for success; Anything else indicates failure
235 */
236static int ea_write(struct inode *ip, struct jfs_ea_list *ealist, int size,
237		       dxd_t * ea)
238{
239	struct super_block *sb = ip->i_sb;
240	struct jfs_inode_info *ji = JFS_IP(ip);
241	struct jfs_sb_info *sbi = JFS_SBI(sb);
242	int nblocks;
243	s64 blkno;
244	int rc = 0, i;
245	char *cp;
246	s32 nbytes, nb;
247	s32 bytes_to_write;
248	struct metapage *mp;
249
250	/*
251	 * Quick check to see if this is an in-linable EA.  Short EAs
252	 * and empty EAs are all in-linable, provided the space exists.
253	 */
254	if (!ealist || size <= sizeof (ji->i_inline_ea)) {
255		if (!ea_write_inline(ip, ealist, size, ea))
256			return 0;
257	}
258
259	/* figure out how many blocks we need */
260	nblocks = (size + (sb->s_blocksize - 1)) >> sb->s_blocksize_bits;
261
262	/* Allocate new blocks to quota. */
263	if (DQUOT_ALLOC_BLOCK(ip, nblocks)) {
264		return -EDQUOT;
265	}
266
267	rc = dbAlloc(ip, INOHINT(ip), nblocks, &blkno);
268	if (rc) {
269		/*Rollback quota allocation. */
270		DQUOT_FREE_BLOCK(ip, nblocks);
271		return rc;
272	}
273
274	/*
275	 * Now have nblocks worth of storage to stuff into the FEALIST.
276	 * loop over the FEALIST copying data into the buffer one page at
277	 * a time.
278	 */
279	cp = (char *) ealist;
280	nbytes = size;
281	for (i = 0; i < nblocks; i += sbi->nbperpage) {
282		/*
283		 * Determine how many bytes for this request, and round up to
284		 * the nearest aggregate block size
285		 */
286		nb = min(PSIZE, nbytes);
287		bytes_to_write =
288		    ((((nb + sb->s_blocksize - 1)) >> sb->s_blocksize_bits))
289		    << sb->s_blocksize_bits;
290
291		if (!(mp = get_metapage(ip, blkno + i, bytes_to_write, 1))) {
292			rc = -EIO;
293			goto failed;
294		}
295
296		memcpy(mp->data, cp, nb);
297
298		/*
299		 * We really need a way to propagate errors for
300		 * forced writes like this one.  --hch
301		 *
302		 * (__write_metapage => release_metapage => flush_metapage)
303		 */
304#ifdef _JFS_FIXME
305		if ((rc = flush_metapage(mp))) {
306			/*
307			 * the write failed -- this means that the buffer
308			 * is still assigned and the blocks are not being
309			 * used.  this seems like the best error recovery
310			 * we can get ...
311			 */
312			goto failed;
313		}
314#else
315		flush_metapage(mp);
316#endif
317
318		cp += PSIZE;
319		nbytes -= nb;
320	}
321
322	ea->flag = DXD_EXTENT;
323	DXDsize(ea, le32_to_cpu(ealist->size));
324	DXDlength(ea, nblocks);
325	DXDaddress(ea, blkno);
326
327	/* Free up INLINE area */
328	if (ji->ea.flag & DXD_INLINE)
329		ji->mode2 |= INLINEEA;
330
331	return 0;
332
333      failed:
334	/* Rollback quota allocation. */
335	DQUOT_FREE_BLOCK(ip, nblocks);
336
337	dbFree(ip, blkno, nblocks);
338	return rc;
339}
340
341/*
342 * NAME: ea_read_inline
343 *
344 * FUNCTION: Read an inlined EA into user's buffer
345 *
346 * PARAMETERS:
347 *	ip	- Inode pointer
348 *	ealist	- Pointer to buffer to fill in with EA
349 *
350 * RETURNS: 0
351 */
352static int ea_read_inline(struct inode *ip, struct jfs_ea_list *ealist)
353{
354	struct jfs_inode_info *ji = JFS_IP(ip);
355	int ea_size = sizeDXD(&ji->ea);
356
357	if (ea_size == 0) {
358		ealist->size = 0;
359		return 0;
360	}
361
362	/* Sanity Check */
363	if ((sizeDXD(&ji->ea) > sizeof (ji->i_inline_ea)))
364		return -EIO;
365	if (le32_to_cpu(((struct jfs_ea_list *) &ji->i_inline_ea)->size)
366	    != ea_size)
367		return -EIO;
368
369	memcpy(ealist, ji->i_inline_ea, ea_size);
370	return 0;
371}
372
373/*
374 * NAME: ea_read
375 *
376 * FUNCTION: copy EA data into user's buffer
377 *
378 * PARAMETERS:
379 *	ip	- Inode pointer
380 *	ealist	- Pointer to buffer to fill in with EA
381 *
382 * NOTES:  If EA is inline calls ea_read_inline() to copy EA.
383 *
384 * RETURNS: 0 for success; other indicates failure
385 */
386static int ea_read(struct inode *ip, struct jfs_ea_list *ealist)
387{
388	struct super_block *sb = ip->i_sb;
389	struct jfs_inode_info *ji = JFS_IP(ip);
390	struct jfs_sb_info *sbi = JFS_SBI(sb);
391	int nblocks;
392	s64 blkno;
393	char *cp = (char *) ealist;
394	int i;
395	int nbytes, nb;
396	s32 bytes_to_read;
397	struct metapage *mp;
398
399	/* quick check for in-line EA */
400	if (ji->ea.flag & DXD_INLINE)
401		return ea_read_inline(ip, ealist);
402
403	nbytes = sizeDXD(&ji->ea);
404	if (!nbytes) {
405		jfs_error(sb, "ea_read: nbytes is 0");
406		return -EIO;
407	}
408
409	/*
410	 * Figure out how many blocks were allocated when this EA list was
411	 * originally written to disk.
412	 */
413	nblocks = lengthDXD(&ji->ea) << sbi->l2nbperpage;
414	blkno = addressDXD(&ji->ea) << sbi->l2nbperpage;
415
416	/*
417	 * I have found the disk blocks which were originally used to store
418	 * the FEALIST.  now i loop over each contiguous block copying the
419	 * data into the buffer.
420	 */
421	for (i = 0; i < nblocks; i += sbi->nbperpage) {
422		/*
423		 * Determine how many bytes for this request, and round up to
424		 * the nearest aggregate block size
425		 */
426		nb = min(PSIZE, nbytes);
427		bytes_to_read =
428		    ((((nb + sb->s_blocksize - 1)) >> sb->s_blocksize_bits))
429		    << sb->s_blocksize_bits;
430
431		if (!(mp = read_metapage(ip, blkno + i, bytes_to_read, 1)))
432			return -EIO;
433
434		memcpy(cp, mp->data, nb);
435		release_metapage(mp);
436
437		cp += PSIZE;
438		nbytes -= nb;
439	}
440
441	return 0;
442}
443
444/*
445 * NAME: ea_get
446 *
447 * FUNCTION: Returns buffer containing existing extended attributes.
448 *	     The size of the buffer will be the larger of the existing
449 *	     attributes size, or min_size.
450 *
451 *	     The buffer, which may be inlined in the inode or in the
452 *	     page cache must be release by calling ea_release or ea_put
453 *
454 * PARAMETERS:
455 *	inode	- Inode pointer
456 *	ea_buf	- Structure to be populated with ealist and its metadata
457 *	min_size- minimum size of buffer to be returned
458 *
459 * RETURNS: 0 for success; Other indicates failure
460 */
461static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
462{
463	struct jfs_inode_info *ji = JFS_IP(inode);
464	struct super_block *sb = inode->i_sb;
465	int size;
466	int ea_size = sizeDXD(&ji->ea);
467	int blocks_needed, current_blocks;
468	s64 blkno;
469	int rc;
470	int quota_allocation = 0;
471
472	/* When fsck.jfs clears a bad ea, it doesn't clear the size */
473	if (ji->ea.flag == 0)
474		ea_size = 0;
475
476	if (ea_size == 0) {
477		if (min_size == 0) {
478			ea_buf->flag = 0;
479			ea_buf->max_size = 0;
480			ea_buf->xattr = NULL;
481			return 0;
482		}
483		if ((min_size <= sizeof (ji->i_inline_ea)) &&
484		    (ji->mode2 & INLINEEA)) {
485			ea_buf->flag = EA_INLINE | EA_NEW;
486			ea_buf->max_size = sizeof (ji->i_inline_ea);
487			ea_buf->xattr = (struct jfs_ea_list *) ji->i_inline_ea;
488			DXDlength(&ea_buf->new_ea, 0);
489			DXDaddress(&ea_buf->new_ea, 0);
490			ea_buf->new_ea.flag = DXD_INLINE;
491			DXDsize(&ea_buf->new_ea, min_size);
492			return 0;
493		}
494		current_blocks = 0;
495	} else if (ji->ea.flag & DXD_INLINE) {
496		if (min_size <= sizeof (ji->i_inline_ea)) {
497			ea_buf->flag = EA_INLINE;
498			ea_buf->max_size = sizeof (ji->i_inline_ea);
499			ea_buf->xattr = (struct jfs_ea_list *) ji->i_inline_ea;
500			goto size_check;
501		}
502		current_blocks = 0;
503	} else {
504		if (!(ji->ea.flag & DXD_EXTENT)) {
505			jfs_error(sb, "ea_get: invalid ea.flag)");
506			return -EIO;
507		}
508		current_blocks = (ea_size + sb->s_blocksize - 1) >>
509		    sb->s_blocksize_bits;
510	}
511	size = max(min_size, ea_size);
512
513	if (size > PSIZE) {
514		/*
515		 * To keep the rest of the code simple.  Allocate a
516		 * contiguous buffer to work with
517		 */
518		ea_buf->xattr = kmalloc(size, GFP_KERNEL);
519		if (ea_buf->xattr == NULL)
520			return -ENOMEM;
521
522		ea_buf->flag = EA_MALLOC;
523		ea_buf->max_size = (size + sb->s_blocksize - 1) &
524		    ~(sb->s_blocksize - 1);
525
526		if (ea_size == 0)
527			return 0;
528
529		if ((rc = ea_read(inode, ea_buf->xattr))) {
530			kfree(ea_buf->xattr);
531			ea_buf->xattr = NULL;
532			return rc;
533		}
534		goto size_check;
535	}
536	blocks_needed = (min_size + sb->s_blocksize - 1) >>
537	    sb->s_blocksize_bits;
538
539	if (blocks_needed > current_blocks) {
540		/* Allocate new blocks to quota. */
541		if (DQUOT_ALLOC_BLOCK(inode, blocks_needed))
542			return -EDQUOT;
543
544		quota_allocation = blocks_needed;
545
546		rc = dbAlloc(inode, INOHINT(inode), (s64) blocks_needed,
547			     &blkno);
548		if (rc)
549			goto clean_up;
550
551		DXDlength(&ea_buf->new_ea, blocks_needed);
552		DXDaddress(&ea_buf->new_ea, blkno);
553		ea_buf->new_ea.flag = DXD_EXTENT;
554		DXDsize(&ea_buf->new_ea, min_size);
555
556		ea_buf->flag = EA_EXTENT | EA_NEW;
557
558		ea_buf->mp = get_metapage(inode, blkno,
559					  blocks_needed << sb->s_blocksize_bits,
560					  1);
561		if (ea_buf->mp == NULL) {
562			dbFree(inode, blkno, (s64) blocks_needed);
563			rc = -EIO;
564			goto clean_up;
565		}
566		ea_buf->xattr = ea_buf->mp->data;
567		ea_buf->max_size = (min_size + sb->s_blocksize - 1) &
568		    ~(sb->s_blocksize - 1);
569		if (ea_size == 0)
570			return 0;
571		if ((rc = ea_read(inode, ea_buf->xattr))) {
572			discard_metapage(ea_buf->mp);
573			dbFree(inode, blkno, (s64) blocks_needed);
574			goto clean_up;
575		}
576		goto size_check;
577	}
578	ea_buf->flag = EA_EXTENT;
579	ea_buf->mp = read_metapage(inode, addressDXD(&ji->ea),
580				   lengthDXD(&ji->ea) << sb->s_blocksize_bits,
581				   1);
582	if (ea_buf->mp == NULL) {
583		rc = -EIO;
584		goto clean_up;
585	}
586	ea_buf->xattr = ea_buf->mp->data;
587	ea_buf->max_size = (ea_size + sb->s_blocksize - 1) &
588	    ~(sb->s_blocksize - 1);
589
590      size_check:
591	if (EALIST_SIZE(ea_buf->xattr) != ea_size) {
592		printk(KERN_ERR "ea_get: invalid extended attribute\n");
593		dump_mem("xattr", ea_buf->xattr, ea_size);
594		ea_release(inode, ea_buf);
595		rc = -EIO;
596		goto clean_up;
597	}
598
599	return ea_size;
600
601      clean_up:
602	/* Rollback quota allocation */
603	if (quota_allocation)
604		DQUOT_FREE_BLOCK(inode, quota_allocation);
605
606	return (rc);
607}
608
609static void ea_release(struct inode *inode, struct ea_buffer *ea_buf)
610{
611	if (ea_buf->flag & EA_MALLOC)
612		kfree(ea_buf->xattr);
613	else if (ea_buf->flag & EA_EXTENT) {
614		assert(ea_buf->mp);
615		release_metapage(ea_buf->mp);
616
617		if (ea_buf->flag & EA_NEW)
618			dbFree(inode, addressDXD(&ea_buf->new_ea),
619			       lengthDXD(&ea_buf->new_ea));
620	}
621}
622
623static int ea_put(tid_t tid, struct inode *inode, struct ea_buffer *ea_buf,
624		  int new_size)
625{
626	struct jfs_inode_info *ji = JFS_IP(inode);
627	unsigned long old_blocks, new_blocks;
628	int rc = 0;
629
630	if (new_size == 0) {
631		ea_release(inode, ea_buf);
632		ea_buf = NULL;
633	} else if (ea_buf->flag & EA_INLINE) {
634		assert(new_size <= sizeof (ji->i_inline_ea));
635		ji->mode2 &= ~INLINEEA;
636		ea_buf->new_ea.flag = DXD_INLINE;
637		DXDsize(&ea_buf->new_ea, new_size);
638		DXDaddress(&ea_buf->new_ea, 0);
639		DXDlength(&ea_buf->new_ea, 0);
640	} else if (ea_buf->flag & EA_MALLOC) {
641		rc = ea_write(inode, ea_buf->xattr, new_size, &ea_buf->new_ea);
642		kfree(ea_buf->xattr);
643	} else if (ea_buf->flag & EA_NEW) {
644		/* We have already allocated a new dxd */
645		flush_metapage(ea_buf->mp);
646	} else {
647		/* ->xattr must point to original ea's metapage */
648		rc = ea_write(inode, ea_buf->xattr, new_size, &ea_buf->new_ea);
649		discard_metapage(ea_buf->mp);
650	}
651	if (rc)
652		return rc;
653
654	old_blocks = new_blocks = 0;
655
656	if (ji->ea.flag & DXD_EXTENT) {
657		invalidate_dxd_metapages(inode, ji->ea);
658		old_blocks = lengthDXD(&ji->ea);
659	}
660
661	if (ea_buf) {
662		txEA(tid, inode, &ji->ea, &ea_buf->new_ea);
663		if (ea_buf->new_ea.flag & DXD_EXTENT) {
664			new_blocks = lengthDXD(&ea_buf->new_ea);
665			if (ji->ea.flag & DXD_INLINE)
666				ji->mode2 |= INLINEEA;
667		}
668		ji->ea = ea_buf->new_ea;
669	} else {
670		txEA(tid, inode, &ji->ea, NULL);
671		if (ji->ea.flag & DXD_INLINE)
672			ji->mode2 |= INLINEEA;
673		ji->ea.flag = 0;
674		ji->ea.size = 0;
675	}
676
677	/* If old blocks exist, they must be removed from quota allocation. */
678	if (old_blocks)
679		DQUOT_FREE_BLOCK(inode, old_blocks);
680
681	inode->i_ctime = CURRENT_TIME;
682
683	return 0;
684}
685
686/*
687 * can_set_system_xattr
688 *
689 * This code is specific to the system.* namespace.  It contains policy
690 * which doesn't belong in the main xattr codepath.
691 */
692static int can_set_system_xattr(struct inode *inode, const char *name,
693				const void *value, size_t value_len)
694{
695#ifdef CONFIG_JFS_POSIX_ACL
696	struct posix_acl *acl;
697	int rc;
698
699	if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
700		return -EPERM;
701
702	/*
703	 * POSIX_ACL_XATTR_ACCESS is tied to i_mode
704	 */
705	if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) {
706		acl = posix_acl_from_xattr(value, value_len);
707		if (IS_ERR(acl)) {
708			rc = PTR_ERR(acl);
709			printk(KERN_ERR "posix_acl_from_xattr returned %d\n",
710			       rc);
711			return rc;
712		}
713		if (acl) {
714			mode_t mode = inode->i_mode;
715			rc = posix_acl_equiv_mode(acl, &mode);
716			posix_acl_release(acl);
717			if (rc < 0) {
718				printk(KERN_ERR
719				       "posix_acl_equiv_mode returned %d\n",
720				       rc);
721				return rc;
722			}
723			inode->i_mode = mode;
724			mark_inode_dirty(inode);
725		}
726		/*
727		 * We're changing the ACL.  Get rid of the cached one
728		 */
729		acl =JFS_IP(inode)->i_acl;
730		if (acl != JFS_ACL_NOT_CACHED)
731			posix_acl_release(acl);
732		JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED;
733
734		return 0;
735	} else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
736		acl = posix_acl_from_xattr(value, value_len);
737		if (IS_ERR(acl)) {
738			rc = PTR_ERR(acl);
739			printk(KERN_ERR "posix_acl_from_xattr returned %d\n",
740			       rc);
741			return rc;
742		}
743		posix_acl_release(acl);
744
745		/*
746		 * We're changing the default ACL.  Get rid of the cached one
747		 */
748		acl =JFS_IP(inode)->i_default_acl;
749		if (acl && (acl != JFS_ACL_NOT_CACHED))
750			posix_acl_release(acl);
751		JFS_IP(inode)->i_default_acl = JFS_ACL_NOT_CACHED;
752
753		return 0;
754	}
755#endif			/* CONFIG_JFS_POSIX_ACL */
756	return -EOPNOTSUPP;
757}
758
759/*
760 * Most of the permission checking is done by xattr_permission in the vfs.
761 * The local file system is responsible for handling the system.* namespace.
762 * We also need to verify that this is a namespace that we recognize.
763 */
764static int can_set_xattr(struct inode *inode, const char *name,
765			 const void *value, size_t value_len)
766{
767	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
768		return can_set_system_xattr(inode, name, value, value_len);
769
770	/*
771	 * Don't allow setting an attribute in an unknown namespace.
772	 */
773	if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) &&
774	    strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
775	    strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
776	    strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN))
777		return -EOPNOTSUPP;
778
779	return 0;
780}
781
782int __jfs_setxattr(tid_t tid, struct inode *inode, const char *name,
783		   const void *value, size_t value_len, int flags)
784{
785	struct jfs_ea_list *ealist;
786	struct jfs_ea *ea, *old_ea = NULL, *next_ea = NULL;
787	struct ea_buffer ea_buf;
788	int old_ea_size = 0;
789	int xattr_size;
790	int new_size;
791	int namelen = strlen(name);
792	char *os2name = NULL;
793	int found = 0;
794	int rc;
795	int length;
796
797	if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
798		os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1,
799				  GFP_KERNEL);
800		if (!os2name)
801			return -ENOMEM;
802		strcpy(os2name, name + XATTR_OS2_PREFIX_LEN);
803		name = os2name;
804		namelen -= XATTR_OS2_PREFIX_LEN;
805	}
806
807	down_write(&JFS_IP(inode)->xattr_sem);
808
809	xattr_size = ea_get(inode, &ea_buf, 0);
810	if (xattr_size < 0) {
811		rc = xattr_size;
812		goto out;
813	}
814
815      again:
816	ealist = (struct jfs_ea_list *) ea_buf.xattr;
817	new_size = sizeof (struct jfs_ea_list);
818
819	if (xattr_size) {
820		for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist);
821		     ea = NEXT_EA(ea)) {
822			if ((namelen == ea->namelen) &&
823			    (memcmp(name, ea->name, namelen) == 0)) {
824				found = 1;
825				if (flags & XATTR_CREATE) {
826					rc = -EEXIST;
827					goto release;
828				}
829				old_ea = ea;
830				old_ea_size = EA_SIZE(ea);
831				next_ea = NEXT_EA(ea);
832			} else
833				new_size += EA_SIZE(ea);
834		}
835	}
836
837	if (!found) {
838		if (flags & XATTR_REPLACE) {
839			rc = -ENODATA;
840			goto release;
841		}
842		if (value == NULL) {
843			rc = 0;
844			goto release;
845		}
846	}
847	if (value)
848		new_size += sizeof (struct jfs_ea) + namelen + 1 + value_len;
849
850	if (new_size > ea_buf.max_size) {
851		/*
852		 * We need to allocate more space for merged ea list.
853		 * We should only have loop to again: once.
854		 */
855		ea_release(inode, &ea_buf);
856		xattr_size = ea_get(inode, &ea_buf, new_size);
857		if (xattr_size < 0) {
858			rc = xattr_size;
859			goto out;
860		}
861		goto again;
862	}
863
864	/* Remove old ea of the same name */
865	if (found) {
866		/* number of bytes following target EA */
867		length = (char *) END_EALIST(ealist) - (char *) next_ea;
868		if (length > 0)
869			memmove(old_ea, next_ea, length);
870		xattr_size -= old_ea_size;
871	}
872
873	/* Add new entry to the end */
874	if (value) {
875		if (xattr_size == 0)
876			/* Completely new ea list */
877			xattr_size = sizeof (struct jfs_ea_list);
878
879		ea = (struct jfs_ea *) ((char *) ealist + xattr_size);
880		ea->flag = 0;
881		ea->namelen = namelen;
882		ea->valuelen = (cpu_to_le16(value_len));
883		memcpy(ea->name, name, namelen);
884		ea->name[namelen] = 0;
885		if (value_len)
886			memcpy(&ea->name[namelen + 1], value, value_len);
887		xattr_size += EA_SIZE(ea);
888	}
889
890	/* DEBUG - If we did this right, these number match */
891	if (xattr_size != new_size) {
892		printk(KERN_ERR
893		       "jfs_xsetattr: xattr_size = %d, new_size = %d\n",
894		       xattr_size, new_size);
895
896		rc = -EINVAL;
897		goto release;
898	}
899
900	/*
901	 * If we're left with an empty list, there's no ea
902	 */
903	if (new_size == sizeof (struct jfs_ea_list))
904		new_size = 0;
905
906	ealist->size = cpu_to_le32(new_size);
907
908	rc = ea_put(tid, inode, &ea_buf, new_size);
909
910	goto out;
911      release:
912	ea_release(inode, &ea_buf);
913      out:
914	up_write(&JFS_IP(inode)->xattr_sem);
915
916	kfree(os2name);
917
918	return rc;
919}
920
921int jfs_setxattr(struct dentry *dentry, const char *name, const void *value,
922		 size_t value_len, int flags)
923{
924	struct inode *inode = dentry->d_inode;
925	struct jfs_inode_info *ji = JFS_IP(inode);
926	int rc;
927	tid_t tid;
928
929	if ((rc = can_set_xattr(inode, name, value, value_len)))
930		return rc;
931
932	if (value == NULL) {	/* empty EA, do not remove */
933		value = "";
934		value_len = 0;
935	}
936
937	tid = txBegin(inode->i_sb, 0);
938	mutex_lock(&ji->commit_mutex);
939	rc = __jfs_setxattr(tid, dentry->d_inode, name, value, value_len,
940			    flags);
941	if (!rc)
942		rc = txCommit(tid, 1, &inode, 0);
943	txEnd(tid);
944	mutex_unlock(&ji->commit_mutex);
945
946	return rc;
947}
948
949ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
950		       size_t buf_size)
951{
952	struct jfs_ea_list *ealist;
953	struct jfs_ea *ea;
954	struct ea_buffer ea_buf;
955	int xattr_size;
956	ssize_t size;
957	int namelen = strlen(name);
958	char *os2name = NULL;
959	char *value;
960
961	if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
962		os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1,
963				  GFP_KERNEL);
964		if (!os2name)
965			return -ENOMEM;
966		strcpy(os2name, name + XATTR_OS2_PREFIX_LEN);
967		name = os2name;
968		namelen -= XATTR_OS2_PREFIX_LEN;
969	}
970
971	down_read(&JFS_IP(inode)->xattr_sem);
972
973	xattr_size = ea_get(inode, &ea_buf, 0);
974
975	if (xattr_size < 0) {
976		size = xattr_size;
977		goto out;
978	}
979
980	if (xattr_size == 0)
981		goto not_found;
982
983	ealist = (struct jfs_ea_list *) ea_buf.xattr;
984
985	/* Find the named attribute */
986	for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea))
987		if ((namelen == ea->namelen) &&
988		    memcmp(name, ea->name, namelen) == 0) {
989			/* Found it */
990			size = le16_to_cpu(ea->valuelen);
991			if (!data)
992				goto release;
993			else if (size > buf_size) {
994				size = -ERANGE;
995				goto release;
996			}
997			value = ((char *) &ea->name) + ea->namelen + 1;
998			memcpy(data, value, size);
999			goto release;
1000		}
1001      not_found:
1002	size = -ENODATA;
1003      release:
1004	ea_release(inode, &ea_buf);
1005      out:
1006	up_read(&JFS_IP(inode)->xattr_sem);
1007
1008	kfree(os2name);
1009
1010	return size;
1011}
1012
1013ssize_t jfs_getxattr(struct dentry *dentry, const char *name, void *data,
1014		     size_t buf_size)
1015{
1016	int err;
1017
1018	err = __jfs_getxattr(dentry->d_inode, name, data, buf_size);
1019
1020	return err;
1021}
1022
1023/*
1024 * No special permissions are needed to list attributes except for trusted.*
1025 */
1026static inline int can_list(struct jfs_ea *ea)
1027{
1028	return (strncmp(ea->name, XATTR_TRUSTED_PREFIX,
1029			    XATTR_TRUSTED_PREFIX_LEN) ||
1030		capable(CAP_SYS_ADMIN));
1031}
1032
1033ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size)
1034{
1035	struct inode *inode = dentry->d_inode;
1036	char *buffer;
1037	ssize_t size = 0;
1038	int xattr_size;
1039	struct jfs_ea_list *ealist;
1040	struct jfs_ea *ea;
1041	struct ea_buffer ea_buf;
1042
1043	down_read(&JFS_IP(inode)->xattr_sem);
1044
1045	xattr_size = ea_get(inode, &ea_buf, 0);
1046	if (xattr_size < 0) {
1047		size = xattr_size;
1048		goto out;
1049	}
1050
1051	if (xattr_size == 0)
1052		goto release;
1053
1054	ealist = (struct jfs_ea_list *) ea_buf.xattr;
1055
1056	/* compute required size of list */
1057	for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) {
1058		if (can_list(ea))
1059			size += name_size(ea) + 1;
1060	}
1061
1062	if (!data)
1063		goto release;
1064
1065	if (size > buf_size) {
1066		size = -ERANGE;
1067		goto release;
1068	}
1069
1070	/* Copy attribute names to buffer */
1071	buffer = data;
1072	for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) {
1073		if (can_list(ea)) {
1074			int namelen = copy_name(buffer, ea);
1075			buffer += namelen + 1;
1076		}
1077	}
1078
1079      release:
1080	ea_release(inode, &ea_buf);
1081      out:
1082	up_read(&JFS_IP(inode)->xattr_sem);
1083	return size;
1084}
1085
1086int jfs_removexattr(struct dentry *dentry, const char *name)
1087{
1088	struct inode *inode = dentry->d_inode;
1089	struct jfs_inode_info *ji = JFS_IP(inode);
1090	int rc;
1091	tid_t tid;
1092
1093	if ((rc = can_set_xattr(inode, name, NULL, 0)))
1094		return rc;
1095
1096	tid = txBegin(inode->i_sb, 0);
1097	mutex_lock(&ji->commit_mutex);
1098	rc = __jfs_setxattr(tid, dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
1099	if (!rc)
1100		rc = txCommit(tid, 1, &inode, 0);
1101	txEnd(tid);
1102	mutex_unlock(&ji->commit_mutex);
1103
1104	return rc;
1105}
1106
1107#ifdef CONFIG_JFS_SECURITY
1108int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir)
1109{
1110	int rc;
1111	size_t len;
1112	void *value;
1113	char *suffix;
1114	char *name;
1115
1116	rc = security_inode_init_security(inode, dir, &suffix, &value, &len);
1117	if (rc) {
1118		if (rc == -EOPNOTSUPP)
1119			return 0;
1120		return rc;
1121	}
1122	name = kmalloc(XATTR_SECURITY_PREFIX_LEN + 1 + strlen(suffix),
1123		       GFP_NOFS);
1124	if (!name) {
1125		rc = -ENOMEM;
1126		goto kmalloc_failed;
1127	}
1128	strcpy(name, XATTR_SECURITY_PREFIX);
1129	strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix);
1130
1131	rc = __jfs_setxattr(tid, inode, name, value, len, 0);
1132
1133	kfree(name);
1134kmalloc_failed:
1135	kfree(suffix);
1136	kfree(value);
1137
1138	return rc;
1139}
1140#endif
1141