1/*
2 * linux/fs/ext2/xattr.c
3 *
4 * Copyright (C) 2001-2003 Andreas Gruenbacher <agruen@suse.de>
5 *
6 * Fix by Harrison Xing <harrison@mountainviewdata.com>.
7 * Extended attributes for symlinks and special files added per
8 *  suggestion of Luka Renko <luka.renko@hermes.si>.
9 * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
10 *  Red Hat Inc.
11 *
12 */
13
14/*
15 * Extended attributes are stored on disk blocks allocated outside of
16 * any inode. The i_file_acl field is then made to point to this allocated
17 * block. If all extended attributes of an inode are identical, these
18 * inodes may share the same extended attribute block. Such situations
19 * are automatically detected by keeping a cache of recent attribute block
20 * numbers and hashes over the block's contents in memory.
21 *
22 *
23 * Extended attribute block layout:
24 *
25 *   +------------------+
26 *   | header           |
27 *   | entry 1          | |
28 *   | entry 2          | | growing downwards
29 *   | entry 3          | v
30 *   | four null bytes  |
31 *   | . . .            |
32 *   | value 1          | ^
33 *   | value 3          | | growing upwards
34 *   | value 2          | |
35 *   +------------------+
36 *
37 * The block header is followed by multiple entry descriptors. These entry
38 * descriptors are variable in size, and alligned to EXT2_XATTR_PAD
39 * byte boundaries. The entry descriptors are sorted by attribute name,
40 * so that two extended attribute blocks can be compared efficiently.
41 *
42 * Attribute values are aligned to the end of the block, stored in
43 * no specific order. They are also padded to EXT2_XATTR_PAD byte
44 * boundaries. No additional gaps are left between them.
45 *
46 * Locking strategy
47 * ----------------
48 * EXT2_I(inode)->i_file_acl is protected by EXT2_I(inode)->xattr_sem.
49 * EA blocks are only changed if they are exclusive to an inode, so
50 * holding xattr_sem also means that nothing but the EA block's reference
51 * count will change. Multiple writers to an EA block are synchronized
52 * by the bh lock. No more than a single bh lock is held at any time
53 * to avoid deadlocks.
54 */
55
56#include <linux/buffer_head.h>
57#include <linux/module.h>
58#include <linux/init.h>
59#include <linux/slab.h>
60#include <linux/mbcache.h>
61#include <linux/quotaops.h>
62#include <linux/rwsem.h>
63#include "ext2.h"
64#include "xattr.h"
65#include "acl.h"
66
67#define HDR(bh) ((struct ext2_xattr_header *)((bh)->b_data))
68#define ENTRY(ptr) ((struct ext2_xattr_entry *)(ptr))
69#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1)
70#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
71
72#ifdef EXT2_XATTR_DEBUG
73# define ea_idebug(inode, f...) do { \
74		printk(KERN_DEBUG "inode %s:%ld: ", \
75			inode->i_sb->s_id, inode->i_ino); \
76		printk(f); \
77		printk("\n"); \
78	} while (0)
79# define ea_bdebug(bh, f...) do { \
80		char b[BDEVNAME_SIZE]; \
81		printk(KERN_DEBUG "block %s:%lu: ", \
82			bdevname(bh->b_bdev, b), \
83			(unsigned long) bh->b_blocknr); \
84		printk(f); \
85		printk("\n"); \
86	} while (0)
87#else
88# define ea_idebug(f...)
89# define ea_bdebug(f...)
90#endif
91
92static int ext2_xattr_set2(struct inode *, struct buffer_head *,
93			   struct ext2_xattr_header *);
94
95static int ext2_xattr_cache_insert(struct buffer_head *);
96static struct buffer_head *ext2_xattr_cache_find(struct inode *,
97						 struct ext2_xattr_header *);
98static void ext2_xattr_rehash(struct ext2_xattr_header *,
99			      struct ext2_xattr_entry *);
100
101static struct mb_cache *ext2_xattr_cache;
102
103static struct xattr_handler *ext2_xattr_handler_map[] = {
104	[EXT2_XATTR_INDEX_USER]		     = &ext2_xattr_user_handler,
105#ifdef CONFIG_EXT2_FS_POSIX_ACL
106	[EXT2_XATTR_INDEX_POSIX_ACL_ACCESS]  = &ext2_xattr_acl_access_handler,
107	[EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext2_xattr_acl_default_handler,
108#endif
109	[EXT2_XATTR_INDEX_TRUSTED]	     = &ext2_xattr_trusted_handler,
110#ifdef CONFIG_EXT2_FS_SECURITY
111	[EXT2_XATTR_INDEX_SECURITY]	     = &ext2_xattr_security_handler,
112#endif
113};
114
115struct xattr_handler *ext2_xattr_handlers[] = {
116	&ext2_xattr_user_handler,
117	&ext2_xattr_trusted_handler,
118#ifdef CONFIG_EXT2_FS_POSIX_ACL
119	&ext2_xattr_acl_access_handler,
120	&ext2_xattr_acl_default_handler,
121#endif
122#ifdef CONFIG_EXT2_FS_SECURITY
123	&ext2_xattr_security_handler,
124#endif
125	NULL
126};
127
128static inline struct xattr_handler *
129ext2_xattr_handler(int name_index)
130{
131	struct xattr_handler *handler = NULL;
132
133	if (name_index > 0 && name_index < ARRAY_SIZE(ext2_xattr_handler_map))
134		handler = ext2_xattr_handler_map[name_index];
135	return handler;
136}
137
138/*
139 * ext2_xattr_get()
140 *
141 * Copy an extended attribute into the buffer
142 * provided, or compute the buffer size required.
143 * Buffer is NULL to compute the size of the buffer required.
144 *
145 * Returns a negative error number on failure, or the number of bytes
146 * used / required on success.
147 */
148int
149ext2_xattr_get(struct inode *inode, int name_index, const char *name,
150	       void *buffer, size_t buffer_size)
151{
152	struct buffer_head *bh = NULL;
153	struct ext2_xattr_entry *entry;
154	size_t name_len, size;
155	char *end;
156	int error;
157
158	ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
159		  name_index, name, buffer, (long)buffer_size);
160
161	if (name == NULL)
162		return -EINVAL;
163	down_read(&EXT2_I(inode)->xattr_sem);
164	error = -ENODATA;
165	if (!EXT2_I(inode)->i_file_acl)
166		goto cleanup;
167	ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl);
168	bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
169	error = -EIO;
170	if (!bh)
171		goto cleanup;
172	ea_bdebug(bh, "b_count=%d, refcount=%d",
173		atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
174	end = bh->b_data + bh->b_size;
175	if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
176	    HDR(bh)->h_blocks != cpu_to_le32(1)) {
177bad_block:	ext2_error(inode->i_sb, "ext2_xattr_get",
178			"inode %ld: bad block %d", inode->i_ino,
179			EXT2_I(inode)->i_file_acl);
180		error = -EIO;
181		goto cleanup;
182	}
183	/* find named attribute */
184	name_len = strlen(name);
185
186	error = -ERANGE;
187	if (name_len > 255)
188		goto cleanup;
189	entry = FIRST_ENTRY(bh);
190	while (!IS_LAST_ENTRY(entry)) {
191		struct ext2_xattr_entry *next =
192			EXT2_XATTR_NEXT(entry);
193		if ((char *)next >= end)
194			goto bad_block;
195		if (name_index == entry->e_name_index &&
196		    name_len == entry->e_name_len &&
197		    memcmp(name, entry->e_name, name_len) == 0)
198			goto found;
199		entry = next;
200	}
201	/* Check the remaining name entries */
202	while (!IS_LAST_ENTRY(entry)) {
203		struct ext2_xattr_entry *next =
204			EXT2_XATTR_NEXT(entry);
205		if ((char *)next >= end)
206			goto bad_block;
207		entry = next;
208	}
209	if (ext2_xattr_cache_insert(bh))
210		ea_idebug(inode, "cache insert failed");
211	error = -ENODATA;
212	goto cleanup;
213found:
214	/* check the buffer size */
215	if (entry->e_value_block != 0)
216		goto bad_block;
217	size = le32_to_cpu(entry->e_value_size);
218	if (size > inode->i_sb->s_blocksize ||
219	    le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
220		goto bad_block;
221
222	if (ext2_xattr_cache_insert(bh))
223		ea_idebug(inode, "cache insert failed");
224	if (buffer) {
225		error = -ERANGE;
226		if (size > buffer_size)
227			goto cleanup;
228		/* return value of attribute */
229		memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
230			size);
231	}
232	error = size;
233
234cleanup:
235	brelse(bh);
236	up_read(&EXT2_I(inode)->xattr_sem);
237
238	return error;
239}
240
241/*
242 * ext2_xattr_list()
243 *
244 * Copy a list of attribute names into the buffer
245 * provided, or compute the buffer size required.
246 * Buffer is NULL to compute the size of the buffer required.
247 *
248 * Returns a negative error number on failure, or the number of bytes
249 * used / required on success.
250 */
251static int
252ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
253{
254	struct buffer_head *bh = NULL;
255	struct ext2_xattr_entry *entry;
256	char *end;
257	size_t rest = buffer_size;
258	int error;
259
260	ea_idebug(inode, "buffer=%p, buffer_size=%ld",
261		  buffer, (long)buffer_size);
262
263	down_read(&EXT2_I(inode)->xattr_sem);
264	error = 0;
265	if (!EXT2_I(inode)->i_file_acl)
266		goto cleanup;
267	ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl);
268	bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
269	error = -EIO;
270	if (!bh)
271		goto cleanup;
272	ea_bdebug(bh, "b_count=%d, refcount=%d",
273		atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
274	end = bh->b_data + bh->b_size;
275	if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
276	    HDR(bh)->h_blocks != cpu_to_le32(1)) {
277bad_block:	ext2_error(inode->i_sb, "ext2_xattr_list",
278			"inode %ld: bad block %d", inode->i_ino,
279			EXT2_I(inode)->i_file_acl);
280		error = -EIO;
281		goto cleanup;
282	}
283
284	/* check the on-disk data structure */
285	entry = FIRST_ENTRY(bh);
286	while (!IS_LAST_ENTRY(entry)) {
287		struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(entry);
288
289		if ((char *)next >= end)
290			goto bad_block;
291		entry = next;
292	}
293	if (ext2_xattr_cache_insert(bh))
294		ea_idebug(inode, "cache insert failed");
295
296	/* list the attribute names */
297	for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
298	     entry = EXT2_XATTR_NEXT(entry)) {
299		struct xattr_handler *handler =
300			ext2_xattr_handler(entry->e_name_index);
301
302		if (handler) {
303			size_t size = handler->list(inode, buffer, rest,
304						    entry->e_name,
305						    entry->e_name_len);
306			if (buffer) {
307				if (size > rest) {
308					error = -ERANGE;
309					goto cleanup;
310				}
311				buffer += size;
312			}
313			rest -= size;
314		}
315	}
316	error = buffer_size - rest;  /* total size */
317
318cleanup:
319	brelse(bh);
320	up_read(&EXT2_I(inode)->xattr_sem);
321
322	return error;
323}
324
325/*
326 * Inode operation listxattr()
327 *
328 * dentry->d_inode->i_mutex: don't care
329 */
330ssize_t
331ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
332{
333	return ext2_xattr_list(dentry->d_inode, buffer, size);
334}
335
336/*
337 * If the EXT2_FEATURE_COMPAT_EXT_ATTR feature of this file system is
338 * not set, set it.
339 */
340static void ext2_xattr_update_super_block(struct super_block *sb)
341{
342	if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR))
343		return;
344
345	EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR);
346	sb->s_dirt = 1;
347	mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
348}
349
350/*
351 * ext2_xattr_set()
352 *
353 * Create, replace or remove an extended attribute for this inode. Buffer
354 * is NULL to remove an existing extended attribute, and non-NULL to
355 * either replace an existing extended attribute, or create a new extended
356 * attribute. The flags XATTR_REPLACE and XATTR_CREATE
357 * specify that an extended attribute must exist and must not exist
358 * previous to the call, respectively.
359 *
360 * Returns 0, or a negative error number on failure.
361 */
362int
363ext2_xattr_set(struct inode *inode, int name_index, const char *name,
364	       const void *value, size_t value_len, int flags)
365{
366	struct super_block *sb = inode->i_sb;
367	struct buffer_head *bh = NULL;
368	struct ext2_xattr_header *header = NULL;
369	struct ext2_xattr_entry *here, *last;
370	size_t name_len, free, min_offs = sb->s_blocksize;
371	int not_found = 1, error;
372	char *end;
373
374	/*
375	 * header -- Points either into bh, or to a temporarily
376	 *           allocated buffer.
377	 * here -- The named entry found, or the place for inserting, within
378	 *         the block pointed to by header.
379	 * last -- Points right after the last named entry within the block
380	 *         pointed to by header.
381	 * min_offs -- The offset of the first value (values are aligned
382	 *             towards the end of the block).
383	 * end -- Points right after the block pointed to by header.
384	 */
385
386	ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
387		  name_index, name, value, (long)value_len);
388
389	if (value == NULL)
390		value_len = 0;
391	if (name == NULL)
392		return -EINVAL;
393	name_len = strlen(name);
394	if (name_len > 255 || value_len > sb->s_blocksize)
395		return -ERANGE;
396	down_write(&EXT2_I(inode)->xattr_sem);
397	if (EXT2_I(inode)->i_file_acl) {
398		/* The inode already has an extended attribute block. */
399		bh = sb_bread(sb, EXT2_I(inode)->i_file_acl);
400		error = -EIO;
401		if (!bh)
402			goto cleanup;
403		ea_bdebug(bh, "b_count=%d, refcount=%d",
404			atomic_read(&(bh->b_count)),
405			le32_to_cpu(HDR(bh)->h_refcount));
406		header = HDR(bh);
407		end = bh->b_data + bh->b_size;
408		if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
409		    header->h_blocks != cpu_to_le32(1)) {
410bad_block:		ext2_error(sb, "ext2_xattr_set",
411				"inode %ld: bad block %d", inode->i_ino,
412				   EXT2_I(inode)->i_file_acl);
413			error = -EIO;
414			goto cleanup;
415		}
416		/* Find the named attribute. */
417		here = FIRST_ENTRY(bh);
418		while (!IS_LAST_ENTRY(here)) {
419			struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here);
420			if ((char *)next >= end)
421				goto bad_block;
422			if (!here->e_value_block && here->e_value_size) {
423				size_t offs = le16_to_cpu(here->e_value_offs);
424				if (offs < min_offs)
425					min_offs = offs;
426			}
427			not_found = name_index - here->e_name_index;
428			if (!not_found)
429				not_found = name_len - here->e_name_len;
430			if (!not_found)
431				not_found = memcmp(name, here->e_name,name_len);
432			if (not_found <= 0)
433				break;
434			here = next;
435		}
436		last = here;
437		/* We still need to compute min_offs and last. */
438		while (!IS_LAST_ENTRY(last)) {
439			struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last);
440			if ((char *)next >= end)
441				goto bad_block;
442			if (!last->e_value_block && last->e_value_size) {
443				size_t offs = le16_to_cpu(last->e_value_offs);
444				if (offs < min_offs)
445					min_offs = offs;
446			}
447			last = next;
448		}
449
450		/* Check whether we have enough space left. */
451		free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
452	} else {
453		/* We will use a new extended attribute block. */
454		free = sb->s_blocksize -
455			sizeof(struct ext2_xattr_header) - sizeof(__u32);
456		here = last = NULL;  /* avoid gcc uninitialized warning. */
457	}
458
459	if (not_found) {
460		/* Request to remove a nonexistent attribute? */
461		error = -ENODATA;
462		if (flags & XATTR_REPLACE)
463			goto cleanup;
464		error = 0;
465		if (value == NULL)
466			goto cleanup;
467	} else {
468		/* Request to create an existing attribute? */
469		error = -EEXIST;
470		if (flags & XATTR_CREATE)
471			goto cleanup;
472		if (!here->e_value_block && here->e_value_size) {
473			size_t size = le32_to_cpu(here->e_value_size);
474
475			if (le16_to_cpu(here->e_value_offs) + size >
476			    sb->s_blocksize || size > sb->s_blocksize)
477				goto bad_block;
478			free += EXT2_XATTR_SIZE(size);
479		}
480		free += EXT2_XATTR_LEN(name_len);
481	}
482	error = -ENOSPC;
483	if (free < EXT2_XATTR_LEN(name_len) + EXT2_XATTR_SIZE(value_len))
484		goto cleanup;
485
486	/* Here we know that we can set the new attribute. */
487
488	if (header) {
489		struct mb_cache_entry *ce;
490
491		/* assert(header == HDR(bh)); */
492		ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev,
493					bh->b_blocknr);
494		lock_buffer(bh);
495		if (header->h_refcount == cpu_to_le32(1)) {
496			ea_bdebug(bh, "modifying in-place");
497			if (ce)
498				mb_cache_entry_free(ce);
499			/* keep the buffer locked while modifying it. */
500		} else {
501			int offset;
502
503			if (ce)
504				mb_cache_entry_release(ce);
505			unlock_buffer(bh);
506			ea_bdebug(bh, "cloning");
507			header = kmalloc(bh->b_size, GFP_KERNEL);
508			error = -ENOMEM;
509			if (header == NULL)
510				goto cleanup;
511			memcpy(header, HDR(bh), bh->b_size);
512			header->h_refcount = cpu_to_le32(1);
513
514			offset = (char *)here - bh->b_data;
515			here = ENTRY((char *)header + offset);
516			offset = (char *)last - bh->b_data;
517			last = ENTRY((char *)header + offset);
518		}
519	} else {
520		/* Allocate a buffer where we construct the new block. */
521		header = kzalloc(sb->s_blocksize, GFP_KERNEL);
522		error = -ENOMEM;
523		if (header == NULL)
524			goto cleanup;
525		end = (char *)header + sb->s_blocksize;
526		header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC);
527		header->h_blocks = header->h_refcount = cpu_to_le32(1);
528		last = here = ENTRY(header+1);
529	}
530
531	/* Iff we are modifying the block in-place, bh is locked here. */
532
533	if (not_found) {
534		/* Insert the new name. */
535		size_t size = EXT2_XATTR_LEN(name_len);
536		size_t rest = (char *)last - (char *)here;
537		memmove((char *)here + size, here, rest);
538		memset(here, 0, size);
539		here->e_name_index = name_index;
540		here->e_name_len = name_len;
541		memcpy(here->e_name, name, name_len);
542	} else {
543		if (!here->e_value_block && here->e_value_size) {
544			char *first_val = (char *)header + min_offs;
545			size_t offs = le16_to_cpu(here->e_value_offs);
546			char *val = (char *)header + offs;
547			size_t size = EXT2_XATTR_SIZE(
548				le32_to_cpu(here->e_value_size));
549
550			if (size == EXT2_XATTR_SIZE(value_len)) {
551				/* The old and the new value have the same
552				   size. Just replace. */
553				here->e_value_size = cpu_to_le32(value_len);
554				memset(val + size - EXT2_XATTR_PAD, 0,
555				       EXT2_XATTR_PAD); /* Clear pad bytes. */
556				memcpy(val, value, value_len);
557				goto skip_replace;
558			}
559
560			/* Remove the old value. */
561			memmove(first_val + size, first_val, val - first_val);
562			memset(first_val, 0, size);
563			here->e_value_offs = 0;
564			min_offs += size;
565
566			/* Adjust all value offsets. */
567			last = ENTRY(header+1);
568			while (!IS_LAST_ENTRY(last)) {
569				size_t o = le16_to_cpu(last->e_value_offs);
570				if (!last->e_value_block && o < offs)
571					last->e_value_offs =
572						cpu_to_le16(o + size);
573				last = EXT2_XATTR_NEXT(last);
574			}
575		}
576		if (value == NULL) {
577			/* Remove the old name. */
578			size_t size = EXT2_XATTR_LEN(name_len);
579			last = ENTRY((char *)last - size);
580			memmove(here, (char*)here + size,
581				(char*)last - (char*)here);
582			memset(last, 0, size);
583		}
584	}
585
586	if (value != NULL) {
587		/* Insert the new value. */
588		here->e_value_size = cpu_to_le32(value_len);
589		if (value_len) {
590			size_t size = EXT2_XATTR_SIZE(value_len);
591			char *val = (char *)header + min_offs - size;
592			here->e_value_offs =
593				cpu_to_le16((char *)val - (char *)header);
594			memset(val + size - EXT2_XATTR_PAD, 0,
595			       EXT2_XATTR_PAD); /* Clear the pad bytes. */
596			memcpy(val, value, value_len);
597		}
598	}
599
600skip_replace:
601	if (IS_LAST_ENTRY(ENTRY(header+1))) {
602		/* This block is now empty. */
603		if (bh && header == HDR(bh))
604			unlock_buffer(bh);  /* we were modifying in-place. */
605		error = ext2_xattr_set2(inode, bh, NULL);
606	} else {
607		ext2_xattr_rehash(header, here);
608		if (bh && header == HDR(bh))
609			unlock_buffer(bh);  /* we were modifying in-place. */
610		error = ext2_xattr_set2(inode, bh, header);
611	}
612
613cleanup:
614	brelse(bh);
615	if (!(bh && header == HDR(bh)))
616		kfree(header);
617	up_write(&EXT2_I(inode)->xattr_sem);
618
619	return error;
620}
621
622/*
623 * Second half of ext2_xattr_set(): Update the file system.
624 */
625static int
626ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
627		struct ext2_xattr_header *header)
628{
629	struct super_block *sb = inode->i_sb;
630	struct buffer_head *new_bh = NULL;
631	int error;
632
633	if (header) {
634		new_bh = ext2_xattr_cache_find(inode, header);
635		if (new_bh) {
636			/* We found an identical block in the cache. */
637			if (new_bh == old_bh) {
638				ea_bdebug(new_bh, "keeping this block");
639			} else {
640				/* The old block is released after updating
641				   the inode.  */
642				ea_bdebug(new_bh, "reusing block");
643
644				error = -EDQUOT;
645				if (DQUOT_ALLOC_BLOCK(inode, 1)) {
646					unlock_buffer(new_bh);
647					goto cleanup;
648				}
649				HDR(new_bh)->h_refcount = cpu_to_le32(1 +
650					le32_to_cpu(HDR(new_bh)->h_refcount));
651				ea_bdebug(new_bh, "refcount now=%d",
652					le32_to_cpu(HDR(new_bh)->h_refcount));
653			}
654			unlock_buffer(new_bh);
655		} else if (old_bh && header == HDR(old_bh)) {
656			/* Keep this block. No need to lock the block as we
657			   don't need to change the reference count. */
658			new_bh = old_bh;
659			get_bh(new_bh);
660			ext2_xattr_cache_insert(new_bh);
661		} else {
662			/* We need to allocate a new block */
663			int goal = le32_to_cpu(EXT2_SB(sb)->s_es->
664						           s_first_data_block) +
665				   EXT2_I(inode)->i_block_group *
666				   EXT2_BLOCKS_PER_GROUP(sb);
667			int block = ext2_new_block(inode, goal,
668						   NULL, NULL, &error);
669			if (error)
670				goto cleanup;
671			ea_idebug(inode, "creating block %d", block);
672
673			new_bh = sb_getblk(sb, block);
674			if (!new_bh) {
675				ext2_free_blocks(inode, block, 1);
676				error = -EIO;
677				goto cleanup;
678			}
679			lock_buffer(new_bh);
680			memcpy(new_bh->b_data, header, new_bh->b_size);
681			set_buffer_uptodate(new_bh);
682			unlock_buffer(new_bh);
683			ext2_xattr_cache_insert(new_bh);
684
685			ext2_xattr_update_super_block(sb);
686		}
687		mark_buffer_dirty(new_bh);
688		if (IS_SYNC(inode)) {
689			sync_dirty_buffer(new_bh);
690			error = -EIO;
691			if (buffer_req(new_bh) && !buffer_uptodate(new_bh))
692				goto cleanup;
693		}
694	}
695
696	/* Update the inode. */
697	EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
698	inode->i_ctime = CURRENT_TIME_SEC;
699	if (IS_SYNC(inode)) {
700		error = ext2_sync_inode (inode);
701		/* In case sync failed due to ENOSPC the inode was actually
702		 * written (only some dirty data were not) so we just proceed
703		 * as if nothing happened and cleanup the unused block */
704		if (error && error != -ENOSPC) {
705			if (new_bh && new_bh != old_bh)
706				DQUOT_FREE_BLOCK(inode, 1);
707			goto cleanup;
708		}
709	} else
710		mark_inode_dirty(inode);
711
712	error = 0;
713	if (old_bh && old_bh != new_bh) {
714		struct mb_cache_entry *ce;
715
716		/*
717		 * If there was an old block and we are no longer using it,
718		 * release the old block.
719		 */
720		ce = mb_cache_entry_get(ext2_xattr_cache, old_bh->b_bdev,
721					old_bh->b_blocknr);
722		lock_buffer(old_bh);
723		if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
724			/* Free the old block. */
725			if (ce)
726				mb_cache_entry_free(ce);
727			ea_bdebug(old_bh, "freeing");
728			ext2_free_blocks(inode, old_bh->b_blocknr, 1);
729			/* We let our caller release old_bh, so we
730			 * need to duplicate the buffer before. */
731			get_bh(old_bh);
732			bforget(old_bh);
733		} else {
734			/* Decrement the refcount only. */
735			HDR(old_bh)->h_refcount = cpu_to_le32(
736				le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
737			if (ce)
738				mb_cache_entry_release(ce);
739			DQUOT_FREE_BLOCK(inode, 1);
740			mark_buffer_dirty(old_bh);
741			ea_bdebug(old_bh, "refcount now=%d",
742				le32_to_cpu(HDR(old_bh)->h_refcount));
743		}
744		unlock_buffer(old_bh);
745	}
746
747cleanup:
748	brelse(new_bh);
749
750	return error;
751}
752
753/*
754 * ext2_xattr_delete_inode()
755 *
756 * Free extended attribute resources associated with this inode. This
757 * is called immediately before an inode is freed.
758 */
759void
760ext2_xattr_delete_inode(struct inode *inode)
761{
762	struct buffer_head *bh = NULL;
763	struct mb_cache_entry *ce;
764
765	down_write(&EXT2_I(inode)->xattr_sem);
766	if (!EXT2_I(inode)->i_file_acl)
767		goto cleanup;
768	bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
769	if (!bh) {
770		ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
771			"inode %ld: block %d read error", inode->i_ino,
772			EXT2_I(inode)->i_file_acl);
773		goto cleanup;
774	}
775	ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count)));
776	if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
777	    HDR(bh)->h_blocks != cpu_to_le32(1)) {
778		ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
779			"inode %ld: bad block %d", inode->i_ino,
780			EXT2_I(inode)->i_file_acl);
781		goto cleanup;
782	}
783	ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr);
784	lock_buffer(bh);
785	if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
786		if (ce)
787			mb_cache_entry_free(ce);
788		ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
789		get_bh(bh);
790		bforget(bh);
791		unlock_buffer(bh);
792	} else {
793		HDR(bh)->h_refcount = cpu_to_le32(
794			le32_to_cpu(HDR(bh)->h_refcount) - 1);
795		if (ce)
796			mb_cache_entry_release(ce);
797		ea_bdebug(bh, "refcount now=%d",
798			le32_to_cpu(HDR(bh)->h_refcount));
799		unlock_buffer(bh);
800		mark_buffer_dirty(bh);
801		if (IS_SYNC(inode))
802			sync_dirty_buffer(bh);
803		DQUOT_FREE_BLOCK(inode, 1);
804	}
805	EXT2_I(inode)->i_file_acl = 0;
806
807cleanup:
808	brelse(bh);
809	up_write(&EXT2_I(inode)->xattr_sem);
810}
811
812/*
813 * ext2_xattr_put_super()
814 *
815 * This is called when a file system is unmounted.
816 */
817void
818ext2_xattr_put_super(struct super_block *sb)
819{
820	mb_cache_shrink(sb->s_bdev);
821}
822
823
824/*
825 * ext2_xattr_cache_insert()
826 *
827 * Create a new entry in the extended attribute cache, and insert
828 * it unless such an entry is already in the cache.
829 *
830 * Returns 0, or a negative error number on failure.
831 */
832static int
833ext2_xattr_cache_insert(struct buffer_head *bh)
834{
835	__u32 hash = le32_to_cpu(HDR(bh)->h_hash);
836	struct mb_cache_entry *ce;
837	int error;
838
839	ce = mb_cache_entry_alloc(ext2_xattr_cache);
840	if (!ce)
841		return -ENOMEM;
842	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
843	if (error) {
844		mb_cache_entry_free(ce);
845		if (error == -EBUSY) {
846			ea_bdebug(bh, "already in cache (%d cache entries)",
847				atomic_read(&ext2_xattr_cache->c_entry_count));
848			error = 0;
849		}
850	} else {
851		ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
852			  atomic_read(&ext2_xattr_cache->c_entry_count));
853		mb_cache_entry_release(ce);
854	}
855	return error;
856}
857
858/*
859 * ext2_xattr_cmp()
860 *
861 * Compare two extended attribute blocks for equality.
862 *
863 * Returns 0 if the blocks are equal, 1 if they differ, and
864 * a negative error number on errors.
865 */
866static int
867ext2_xattr_cmp(struct ext2_xattr_header *header1,
868	       struct ext2_xattr_header *header2)
869{
870	struct ext2_xattr_entry *entry1, *entry2;
871
872	entry1 = ENTRY(header1+1);
873	entry2 = ENTRY(header2+1);
874	while (!IS_LAST_ENTRY(entry1)) {
875		if (IS_LAST_ENTRY(entry2))
876			return 1;
877		if (entry1->e_hash != entry2->e_hash ||
878		    entry1->e_name_index != entry2->e_name_index ||
879		    entry1->e_name_len != entry2->e_name_len ||
880		    entry1->e_value_size != entry2->e_value_size ||
881		    memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
882			return 1;
883		if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
884			return -EIO;
885		if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
886			   (char *)header2 + le16_to_cpu(entry2->e_value_offs),
887			   le32_to_cpu(entry1->e_value_size)))
888			return 1;
889
890		entry1 = EXT2_XATTR_NEXT(entry1);
891		entry2 = EXT2_XATTR_NEXT(entry2);
892	}
893	if (!IS_LAST_ENTRY(entry2))
894		return 1;
895	return 0;
896}
897
898/*
899 * ext2_xattr_cache_find()
900 *
901 * Find an identical extended attribute block.
902 *
903 * Returns a locked buffer head to the block found, or NULL if such
904 * a block was not found or an error occurred.
905 */
906static struct buffer_head *
907ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
908{
909	__u32 hash = le32_to_cpu(header->h_hash);
910	struct mb_cache_entry *ce;
911
912	if (!header->h_hash)
913		return NULL;  /* never share */
914	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
915again:
916	ce = mb_cache_entry_find_first(ext2_xattr_cache, 0,
917				       inode->i_sb->s_bdev, hash);
918	while (ce) {
919		struct buffer_head *bh;
920
921		if (IS_ERR(ce)) {
922			if (PTR_ERR(ce) == -EAGAIN)
923				goto again;
924			break;
925		}
926
927		bh = sb_bread(inode->i_sb, ce->e_block);
928		if (!bh) {
929			ext2_error(inode->i_sb, "ext2_xattr_cache_find",
930				"inode %ld: block %ld read error",
931				inode->i_ino, (unsigned long) ce->e_block);
932		} else {
933			lock_buffer(bh);
934			if (le32_to_cpu(HDR(bh)->h_refcount) >
935				   EXT2_XATTR_REFCOUNT_MAX) {
936				ea_idebug(inode, "block %ld refcount %d>%d",
937					  (unsigned long) ce->e_block,
938					  le32_to_cpu(HDR(bh)->h_refcount),
939					  EXT2_XATTR_REFCOUNT_MAX);
940			} else if (!ext2_xattr_cmp(header, HDR(bh))) {
941				ea_bdebug(bh, "b_count=%d",
942					  atomic_read(&(bh->b_count)));
943				mb_cache_entry_release(ce);
944				return bh;
945			}
946			unlock_buffer(bh);
947			brelse(bh);
948		}
949		ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
950	}
951	return NULL;
952}
953
954#define NAME_HASH_SHIFT 5
955#define VALUE_HASH_SHIFT 16
956
957/*
958 * ext2_xattr_hash_entry()
959 *
960 * Compute the hash of an extended attribute.
961 */
962static inline void ext2_xattr_hash_entry(struct ext2_xattr_header *header,
963					 struct ext2_xattr_entry *entry)
964{
965	__u32 hash = 0;
966	char *name = entry->e_name;
967	int n;
968
969	for (n=0; n < entry->e_name_len; n++) {
970		hash = (hash << NAME_HASH_SHIFT) ^
971		       (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
972		       *name++;
973	}
974
975	if (entry->e_value_block == 0 && entry->e_value_size != 0) {
976		__le32 *value = (__le32 *)((char *)header +
977			le16_to_cpu(entry->e_value_offs));
978		for (n = (le32_to_cpu(entry->e_value_size) +
979		     EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) {
980			hash = (hash << VALUE_HASH_SHIFT) ^
981			       (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
982			       le32_to_cpu(*value++);
983		}
984	}
985	entry->e_hash = cpu_to_le32(hash);
986}
987
988#undef NAME_HASH_SHIFT
989#undef VALUE_HASH_SHIFT
990
991#define BLOCK_HASH_SHIFT 16
992
993/*
994 * ext2_xattr_rehash()
995 *
996 * Re-compute the extended attribute hash value after an entry has changed.
997 */
998static void ext2_xattr_rehash(struct ext2_xattr_header *header,
999			      struct ext2_xattr_entry *entry)
1000{
1001	struct ext2_xattr_entry *here;
1002	__u32 hash = 0;
1003
1004	ext2_xattr_hash_entry(header, entry);
1005	here = ENTRY(header+1);
1006	while (!IS_LAST_ENTRY(here)) {
1007		if (!here->e_hash) {
1008			/* Block is not shared if an entry's hash value == 0 */
1009			hash = 0;
1010			break;
1011		}
1012		hash = (hash << BLOCK_HASH_SHIFT) ^
1013		       (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
1014		       le32_to_cpu(here->e_hash);
1015		here = EXT2_XATTR_NEXT(here);
1016	}
1017	header->h_hash = cpu_to_le32(hash);
1018}
1019
1020#undef BLOCK_HASH_SHIFT
1021
1022int __init
1023init_ext2_xattr(void)
1024{
1025	ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL,
1026		sizeof(struct mb_cache_entry) +
1027		sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
1028	if (!ext2_xattr_cache)
1029		return -ENOMEM;
1030	return 0;
1031}
1032
1033void
1034exit_ext2_xattr(void)
1035{
1036	mb_cache_destroy(ext2_xattr_cache);
1037}
1038