1/*
2 *  linux/fs/ext3/namei.c
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 *  from
10 *
11 *  linux/fs/minix/namei.c
12 *
13 *  Copyright (C) 1991, 1992  Linus Torvalds
14 *
15 *  Big-endian to little-endian byte-swapping/bitmaps by
16 *        David S. Miller (davem@caip.rutgers.edu), 1995
17 *  Directory entry file type support and forward compatibility hooks
18 *  	for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
19 */
20
21#include <linux/fs.h>
22#include <linux/jbd.h>
23#include <linux/sched.h>
24#include <linux/ext3_fs.h>
25#include <linux/ext3_jbd.h>
26#include <linux/fcntl.h>
27#include <linux/stat.h>
28#include <linux/string.h>
29#include <linux/locks.h>
30#include <linux/quotaops.h>
31
32
33/*
34 * define how far ahead to read directories while searching them.
35 */
36#define NAMEI_RA_CHUNKS  2
37#define NAMEI_RA_BLOCKS  4
38#define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
39#define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
40
41/*
42 * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure.
43 *
44 * `len <= EXT3_NAME_LEN' is guaranteed by caller.
45 * `de != NULL' is guaranteed by caller.
46 */
47static inline int ext3_match (int len, const char * const name,
48			      struct ext3_dir_entry_2 * de)
49{
50	if (len != de->name_len)
51		return 0;
52	if (!de->inode)
53		return 0;
54	return !memcmp(name, de->name, len);
55}
56
57/*
58 * Returns 0 if not found, -1 on failure, and 1 on success
59 */
60static int inline search_dirblock(struct buffer_head * bh,
61				  struct inode *dir,
62				  struct dentry *dentry,
63				  unsigned long offset,
64				  struct ext3_dir_entry_2 ** res_dir)
65{
66	struct ext3_dir_entry_2 * de;
67	char * dlimit;
68	int de_len;
69	const char *name = dentry->d_name.name;
70	int namelen = dentry->d_name.len;
71
72	de = (struct ext3_dir_entry_2 *) bh->b_data;
73	dlimit = bh->b_data + dir->i_sb->s_blocksize;
74	while ((char *) de < dlimit) {
75		/* this code is executed quadratically often */
76		/* do minimal checking `by hand' */
77
78		if ((char *) de + namelen <= dlimit &&
79		    ext3_match (namelen, name, de)) {
80			/* found a match - just to be sure, do a full check */
81			if (!ext3_check_dir_entry("ext3_find_entry",
82						  dir, de, bh, offset))
83				return -1;
84			*res_dir = de;
85			return 1;
86		}
87		/* prevent looping on a bad block */
88		de_len = le16_to_cpu(de->rec_len);
89		if (de_len <= 0)
90			return -1;
91		offset += de_len;
92		de = (struct ext3_dir_entry_2 *) ((char *) de + de_len);
93	}
94	return 0;
95}
96
97/*
98 *	ext3_find_entry()
99 *
100 * finds an entry in the specified directory with the wanted name. It
101 * returns the cache buffer in which the entry was found, and the entry
102 * itself (as a parameter - res_dir). It does NOT read the inode of the
103 * entry - you'll have to do that yourself if you want to.
104 *
105 * The returned buffer_head has ->b_count elevated.  The caller is expected
106 * to brelse() it when appropriate.
107 */
108static struct buffer_head * ext3_find_entry (struct dentry *dentry,
109					struct ext3_dir_entry_2 ** res_dir)
110{
111	struct super_block * sb;
112	struct buffer_head * bh_use[NAMEI_RA_SIZE];
113	struct buffer_head * bh, *ret = NULL;
114	unsigned long start, block, b;
115	int ra_max = 0;		/* Number of bh's in the readahead
116				   buffer, bh_use[] */
117	int ra_ptr = 0;		/* Current index into readahead
118				   buffer */
119	int num = 0;
120	int nblocks, i, err;
121	struct inode *dir = dentry->d_parent->d_inode;
122
123	*res_dir = NULL;
124	sb = dir->i_sb;
125
126	nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
127	start = dir->u.ext3_i.i_dir_start_lookup;
128	if (start >= nblocks)
129		start = 0;
130	block = start;
131restart:
132	do {
133		/*
134		 * We deal with the read-ahead logic here.
135		 */
136		if (ra_ptr >= ra_max) {
137			/* Refill the readahead buffer */
138			ra_ptr = 0;
139			b = block;
140			for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
141				/*
142				 * Terminate if we reach the end of the
143				 * directory and must wrap, or if our
144				 * search has finished at this block.
145				 */
146				if (b >= nblocks || (num && block == start)) {
147					bh_use[ra_max] = NULL;
148					break;
149				}
150				num++;
151				bh = ext3_getblk(NULL, dir, b++, 0, &err);
152				bh_use[ra_max] = bh;
153				if (bh)
154					ll_rw_block(READ, 1, &bh);
155			}
156		}
157		if ((bh = bh_use[ra_ptr++]) == NULL)
158			goto next;
159		wait_on_buffer(bh);
160		if (!buffer_uptodate(bh)) {
161			/* read error, skip block & hope for the best */
162			brelse(bh);
163			goto next;
164		}
165		i = search_dirblock(bh, dir, dentry,
166			    block << EXT3_BLOCK_SIZE_BITS(sb), res_dir);
167		if (i == 1) {
168			dir->u.ext3_i.i_dir_start_lookup = block;
169			ret = bh;
170			goto cleanup_and_exit;
171		} else {
172			brelse(bh);
173			if (i < 0)
174				goto cleanup_and_exit;
175		}
176	next:
177		if (++block >= nblocks)
178			block = 0;
179	} while (block != start);
180
181	/*
182	 * If the directory has grown while we were searching, then
183	 * search the last part of the directory before giving up.
184	 */
185	block = nblocks;
186	nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
187	if (block < nblocks) {
188		start = 0;
189		goto restart;
190	}
191
192cleanup_and_exit:
193	/* Clean up the read-ahead blocks */
194	for (; ra_ptr < ra_max; ra_ptr++)
195		brelse (bh_use[ra_ptr]);
196	return ret;
197}
198
199static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry)
200{
201	struct inode * inode;
202	struct ext3_dir_entry_2 * de;
203	struct buffer_head * bh;
204
205	if (dentry->d_name.len > EXT3_NAME_LEN)
206		return ERR_PTR(-ENAMETOOLONG);
207
208	bh = ext3_find_entry(dentry, &de);
209	inode = NULL;
210	if (bh) {
211		unsigned long ino = le32_to_cpu(de->inode);
212		brelse (bh);
213		inode = iget(dir->i_sb, ino);
214
215		if (!inode)
216			return ERR_PTR(-EACCES);
217	}
218	d_add(dentry, inode);
219	return NULL;
220}
221
222#define S_SHIFT 12
223static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = {
224	[S_IFREG >> S_SHIFT]	EXT3_FT_REG_FILE,
225	[S_IFDIR >> S_SHIFT]	EXT3_FT_DIR,
226	[S_IFCHR >> S_SHIFT]	EXT3_FT_CHRDEV,
227	[S_IFBLK >> S_SHIFT]	EXT3_FT_BLKDEV,
228	[S_IFIFO >> S_SHIFT]	EXT3_FT_FIFO,
229	[S_IFSOCK >> S_SHIFT]	EXT3_FT_SOCK,
230	[S_IFLNK >> S_SHIFT]	EXT3_FT_SYMLINK,
231};
232
233static inline void ext3_set_de_type(struct super_block *sb,
234				struct ext3_dir_entry_2 *de,
235				umode_t mode) {
236	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE))
237		de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
238}
239
240/*
241 *	ext3_add_entry()
242 *
243 * adds a file entry to the specified directory, using the same
244 * semantics as ext3_find_entry(). It returns NULL if it failed.
245 *
246 * NOTE!! The inode part of 'de' is left at 0 - which means you
247 * may not sleep between calling this and putting something into
248 * the entry, as someone else might have used it while you slept.
249 */
250
251/*
252 * AKPM: the journalling code here looks wrong on the error paths
253 */
254static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
255	struct inode *inode)
256{
257	struct inode *dir = dentry->d_parent->d_inode;
258	const char *name = dentry->d_name.name;
259	int namelen = dentry->d_name.len;
260	unsigned long offset;
261	unsigned short rec_len;
262	struct buffer_head * bh;
263	struct ext3_dir_entry_2 * de, * de1;
264	struct super_block * sb;
265	int	retval;
266
267	sb = dir->i_sb;
268
269	if (!namelen)
270		return -EINVAL;
271	bh = ext3_bread (handle, dir, 0, 0, &retval);
272	if (!bh)
273		return retval;
274	rec_len = EXT3_DIR_REC_LEN(namelen);
275	offset = 0;
276	de = (struct ext3_dir_entry_2 *) bh->b_data;
277	while (1) {
278		if ((char *)de >= sb->s_blocksize + bh->b_data) {
279			brelse (bh);
280			bh = NULL;
281			bh = ext3_bread (handle, dir,
282				offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval);
283			if (!bh)
284				return retval;
285			if (dir->i_size <= offset) {
286				if (dir->i_size == 0) {
287					brelse(bh);
288					return -ENOENT;
289				}
290
291				ext3_debug ("creating next block\n");
292
293				BUFFER_TRACE(bh, "get_write_access");
294				ext3_journal_get_write_access(handle, bh);
295				de = (struct ext3_dir_entry_2 *) bh->b_data;
296				de->inode = 0;
297				de->rec_len = le16_to_cpu(sb->s_blocksize);
298				dir->u.ext3_i.i_disksize =
299					dir->i_size = offset + sb->s_blocksize;
300				dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
301				ext3_mark_inode_dirty(handle, dir);
302			} else {
303
304				ext3_debug ("skipping to next block\n");
305
306				de = (struct ext3_dir_entry_2 *) bh->b_data;
307			}
308		}
309		if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh,
310					   offset)) {
311			brelse (bh);
312			return -ENOENT;
313		}
314		if (ext3_match (namelen, name, de)) {
315				brelse (bh);
316				return -EEXIST;
317		}
318		if ((le32_to_cpu(de->inode) == 0 &&
319				le16_to_cpu(de->rec_len) >= rec_len) ||
320		    (le16_to_cpu(de->rec_len) >=
321				EXT3_DIR_REC_LEN(de->name_len) + rec_len)) {
322			BUFFER_TRACE(bh, "get_write_access");
323			ext3_journal_get_write_access(handle, bh);
324			/* By now the buffer is marked for journaling */
325			offset += le16_to_cpu(de->rec_len);
326			if (le32_to_cpu(de->inode)) {
327				de1 = (struct ext3_dir_entry_2 *) ((char *) de +
328					EXT3_DIR_REC_LEN(de->name_len));
329				de1->rec_len =
330					cpu_to_le16(le16_to_cpu(de->rec_len) -
331					EXT3_DIR_REC_LEN(de->name_len));
332				de->rec_len = cpu_to_le16(
333						EXT3_DIR_REC_LEN(de->name_len));
334				de = de1;
335			}
336			de->file_type = EXT3_FT_UNKNOWN;
337			if (inode) {
338				de->inode = cpu_to_le32(inode->i_ino);
339				ext3_set_de_type(dir->i_sb, de, inode->i_mode);
340			} else
341				de->inode = 0;
342			de->name_len = namelen;
343			memcpy (de->name, name, namelen);
344			dir->i_mtime = dir->i_ctime = CURRENT_TIME;
345			dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
346			dir->i_version = ++event;
347			ext3_mark_inode_dirty(handle, dir);
348			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
349			ext3_journal_dirty_metadata(handle, bh);
350			brelse(bh);
351			return 0;
352		}
353		offset += le16_to_cpu(de->rec_len);
354		de = (struct ext3_dir_entry_2 *)
355			((char *) de + le16_to_cpu(de->rec_len));
356	}
357	brelse (bh);
358	return -ENOSPC;
359}
360
361/*
362 * ext3_delete_entry deletes a directory entry by merging it with the
363 * previous entry
364 */
365static int ext3_delete_entry (handle_t *handle,
366			      struct inode * dir,
367			      struct ext3_dir_entry_2 * de_del,
368			      struct buffer_head * bh)
369{
370	struct ext3_dir_entry_2 * de, * pde;
371	int i;
372
373	i = 0;
374	pde = NULL;
375	de = (struct ext3_dir_entry_2 *) bh->b_data;
376	while (i < bh->b_size) {
377		if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i))
378			return -EIO;
379		if (de == de_del)  {
380			BUFFER_TRACE(bh, "get_write_access");
381			ext3_journal_get_write_access(handle, bh);
382			if (pde)
383				pde->rec_len =
384					cpu_to_le16(le16_to_cpu(pde->rec_len) +
385						    le16_to_cpu(de->rec_len));
386			else
387				de->inode = 0;
388			dir->i_version = ++event;
389			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
390			ext3_journal_dirty_metadata(handle, bh);
391			return 0;
392		}
393		i += le16_to_cpu(de->rec_len);
394		pde = de;
395		de = (struct ext3_dir_entry_2 *)
396			((char *) de + le16_to_cpu(de->rec_len));
397	}
398	return -ENOENT;
399}
400
401/*
402 * ext3_mark_inode_dirty is somewhat expensive, so unlike ext2 we
403 * do not perform it in these functions.  We perform it at the call site,
404 * if it is needed.
405 */
406static inline void ext3_inc_count(handle_t *handle, struct inode *inode)
407{
408	inode->i_nlink++;
409}
410
411static inline void ext3_dec_count(handle_t *handle, struct inode *inode)
412{
413	inode->i_nlink--;
414}
415
416static int ext3_add_nondir(handle_t *handle,
417		struct dentry *dentry, struct inode *inode)
418{
419	int err = ext3_add_entry(handle, dentry, inode);
420	if (!err) {
421		d_instantiate(dentry, inode);
422		return 0;
423	}
424	ext3_dec_count(handle, inode);
425	iput(inode);
426	return err;
427}
428
429/*
430 * By the time this is called, we already have created
431 * the directory cache entry for the new file, but it
432 * is so far negative - it has no inode.
433 *
434 * If the create succeeds, we fill in the inode information
435 * with d_instantiate().
436 */
437static int ext3_create (struct inode * dir, struct dentry * dentry, int mode)
438{
439	handle_t *handle;
440	struct inode * inode;
441	int err;
442
443	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
444	if (IS_ERR(handle))
445		return PTR_ERR(handle);
446
447	if (IS_SYNC(dir))
448		handle->h_sync = 1;
449
450	inode = ext3_new_inode (handle, dir, mode);
451	err = PTR_ERR(inode);
452	if (!IS_ERR(inode)) {
453		inode->i_op = &ext3_file_inode_operations;
454		inode->i_fop = &ext3_file_operations;
455		inode->i_mapping->a_ops = &ext3_aops;
456		err = ext3_add_nondir(handle, dentry, inode);
457		ext3_mark_inode_dirty(handle, inode);
458	}
459	ext3_journal_stop(handle, dir);
460	return err;
461}
462
463static int ext3_mknod (struct inode * dir, struct dentry *dentry,
464			int mode, int rdev)
465{
466	handle_t *handle;
467	struct inode *inode;
468	int err;
469
470	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
471	if (IS_ERR(handle))
472		return PTR_ERR(handle);
473
474	if (IS_SYNC(dir))
475		handle->h_sync = 1;
476
477	inode = ext3_new_inode (handle, dir, mode);
478	err = PTR_ERR(inode);
479	if (!IS_ERR(inode)) {
480		init_special_inode(inode, mode, rdev);
481		err = ext3_add_nondir(handle, dentry, inode);
482		ext3_mark_inode_dirty(handle, inode);
483	}
484	ext3_journal_stop(handle, dir);
485	return err;
486}
487
488static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
489{
490	handle_t *handle;
491	struct inode * inode;
492	struct buffer_head * dir_block;
493	struct ext3_dir_entry_2 * de;
494	int err;
495
496	if (dir->i_nlink >= EXT3_LINK_MAX)
497		return -EMLINK;
498
499	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
500	if (IS_ERR(handle))
501		return PTR_ERR(handle);
502
503	if (IS_SYNC(dir))
504		handle->h_sync = 1;
505
506	inode = ext3_new_inode (handle, dir, S_IFDIR);
507	err = PTR_ERR(inode);
508	if (IS_ERR(inode))
509		goto out_stop;
510
511	inode->i_op = &ext3_dir_inode_operations;
512	inode->i_fop = &ext3_dir_operations;
513	inode->i_size = inode->u.ext3_i.i_disksize = inode->i_sb->s_blocksize;
514	inode->i_blocks = 0;
515	dir_block = ext3_bread (handle, inode, 0, 1, &err);
516	if (!dir_block) {
517		inode->i_nlink--; /* is this nlink == 0? */
518		ext3_mark_inode_dirty(handle, inode);
519		iput (inode);
520		goto out_stop;
521	}
522	BUFFER_TRACE(dir_block, "get_write_access");
523	ext3_journal_get_write_access(handle, dir_block);
524	de = (struct ext3_dir_entry_2 *) dir_block->b_data;
525	de->inode = cpu_to_le32(inode->i_ino);
526	de->name_len = 1;
527	de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(de->name_len));
528	strcpy (de->name, ".");
529	ext3_set_de_type(dir->i_sb, de, S_IFDIR);
530	de = (struct ext3_dir_entry_2 *)
531			((char *) de + le16_to_cpu(de->rec_len));
532	de->inode = cpu_to_le32(dir->i_ino);
533	de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT3_DIR_REC_LEN(1));
534	de->name_len = 2;
535	strcpy (de->name, "..");
536	ext3_set_de_type(dir->i_sb, de, S_IFDIR);
537	inode->i_nlink = 2;
538	BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
539	ext3_journal_dirty_metadata(handle, dir_block);
540	brelse (dir_block);
541	inode->i_mode = S_IFDIR | mode;
542	if (dir->i_mode & S_ISGID)
543		inode->i_mode |= S_ISGID;
544	ext3_mark_inode_dirty(handle, inode);
545	err = ext3_add_entry (handle, dentry, inode);
546	if (err)
547		goto out_no_entry;
548	dir->i_nlink++;
549	dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
550	ext3_mark_inode_dirty(handle, dir);
551	d_instantiate(dentry, inode);
552out_stop:
553	ext3_journal_stop(handle, dir);
554	return err;
555
556out_no_entry:
557	inode->i_nlink = 0;
558	ext3_mark_inode_dirty(handle, inode);
559	iput (inode);
560	goto out_stop;
561}
562
563/*
564 * routine to check that the specified directory is empty (for rmdir)
565 */
566static int empty_dir (struct inode * inode)
567{
568	unsigned long offset;
569	struct buffer_head * bh;
570	struct ext3_dir_entry_2 * de, * de1;
571	struct super_block * sb;
572	int err;
573
574	sb = inode->i_sb;
575	if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
576	    !(bh = ext3_bread (NULL, inode, 0, 0, &err))) {
577	    	ext3_warning (inode->i_sb, "empty_dir",
578			      "bad directory (dir #%lu) - no data block",
579			      inode->i_ino);
580		return 1;
581	}
582	de = (struct ext3_dir_entry_2 *) bh->b_data;
583	de1 = (struct ext3_dir_entry_2 *)
584			((char *) de + le16_to_cpu(de->rec_len));
585	if (le32_to_cpu(de->inode) != inode->i_ino ||
586			!le32_to_cpu(de1->inode) ||
587			strcmp (".", de->name) ||
588			strcmp ("..", de1->name)) {
589	    	ext3_warning (inode->i_sb, "empty_dir",
590			      "bad directory (dir #%lu) - no `.' or `..'",
591			      inode->i_ino);
592		brelse (bh);
593		return 1;
594	}
595	offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
596	de = (struct ext3_dir_entry_2 *)
597			((char *) de1 + le16_to_cpu(de1->rec_len));
598	while (offset < inode->i_size ) {
599		if (!bh ||
600			(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
601			brelse (bh);
602			bh = ext3_bread (NULL, inode,
603				offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err);
604			if (!bh) {
605				offset += sb->s_blocksize;
606				continue;
607			}
608			de = (struct ext3_dir_entry_2 *) bh->b_data;
609		}
610		if (!ext3_check_dir_entry ("empty_dir", inode, de, bh,
611					   offset)) {
612			brelse (bh);
613			return 1;
614		}
615		if (le32_to_cpu(de->inode)) {
616			brelse (bh);
617			return 0;
618		}
619		offset += le16_to_cpu(de->rec_len);
620		de = (struct ext3_dir_entry_2 *)
621				((char *) de + le16_to_cpu(de->rec_len));
622	}
623	brelse (bh);
624	return 1;
625}
626
627/* ext3_orphan_add() links an unlinked or truncated inode into a list of
628 * such inodes, starting at the superblock, in case we crash before the
629 * file is closed/deleted, or in case the inode truncate spans multiple
630 * transactions and the last transaction is not recovered after a crash.
631 *
632 * At filesystem recovery time, we walk this list deleting unlinked
633 * inodes and truncating linked inodes in ext3_orphan_cleanup().
634 */
635int ext3_orphan_add(handle_t *handle, struct inode *inode)
636{
637	struct super_block *sb = inode->i_sb;
638	struct ext3_iloc iloc;
639	int err = 0, rc;
640
641	lock_super(sb);
642	if (!list_empty(&inode->u.ext3_i.i_orphan))
643		goto out_unlock;
644
645	/* Orphan handling is only valid for files with data blocks
646	 * being truncated, or files being unlinked. */
647
648	J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
649		S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
650
651	BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
652	err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
653	if (err)
654		goto out_unlock;
655
656	err = ext3_reserve_inode_write(handle, inode, &iloc);
657	if (err)
658		goto out_unlock;
659
660	/* Insert this inode at the head of the on-disk orphan list... */
661	NEXT_ORPHAN(inode) = le32_to_cpu(EXT3_SB(sb)->s_es->s_last_orphan);
662	EXT3_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
663	err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
664	rc = ext3_mark_iloc_dirty(handle, inode, &iloc);
665	if (!err)
666		err = rc;
667
668	/* Only add to the head of the in-memory list if all the
669	 * previous operations succeeded.  If the orphan_add is going to
670	 * fail (possibly taking the journal offline), we can't risk
671	 * leaving the inode on the orphan list: stray orphan-list
672	 * entries can cause panics at unmount time.
673	 *
674	 * This is safe: on error we're going to ignore the orphan list
675	 * anyway on the next recovery. */
676	if (!err)
677		list_add(&inode->u.ext3_i.i_orphan, &EXT3_SB(sb)->s_orphan);
678
679	jbd_debug(4, "superblock will point to %ld\n", inode->i_ino);
680	jbd_debug(4, "orphan inode %ld will point to %d\n",
681			inode->i_ino, NEXT_ORPHAN(inode));
682out_unlock:
683	unlock_super(sb);
684	ext3_std_error(inode->i_sb, err);
685	return err;
686}
687
688/*
689 * ext3_orphan_del() removes an unlinked or truncated inode from the list
690 * of such inodes stored on disk, because it is finally being cleaned up.
691 */
692int ext3_orphan_del(handle_t *handle, struct inode *inode)
693{
694	struct list_head *prev;
695	struct ext3_sb_info *sbi;
696	ino_t ino_next;
697	struct ext3_iloc iloc;
698	int err = 0;
699
700	lock_super(inode->i_sb);
701	if (list_empty(&inode->u.ext3_i.i_orphan)) {
702		unlock_super(inode->i_sb);
703		return 0;
704	}
705
706	ino_next = NEXT_ORPHAN(inode);
707	prev = inode->u.ext3_i.i_orphan.prev;
708	sbi = EXT3_SB(inode->i_sb);
709
710	jbd_debug(4, "remove inode %ld from orphan list\n", inode->i_ino);
711
712	list_del(&inode->u.ext3_i.i_orphan);
713	INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan);
714
715	/* If we're on an error path, we may not have a valid
716	 * transaction handle with which to update the orphan list on
717	 * disk, but we still need to remove the inode from the linked
718	 * list in memory. */
719	if (!handle)
720		goto out;
721
722	err = ext3_reserve_inode_write(handle, inode, &iloc);
723	if (err)
724		goto out_err;
725
726	if (prev == &sbi->s_orphan) {
727		jbd_debug(4, "superblock will point to %ld\n", ino_next);
728		BUFFER_TRACE(sbi->s_sbh, "get_write_access");
729		err = ext3_journal_get_write_access(handle, sbi->s_sbh);
730		if (err)
731			goto out_brelse;
732		sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
733		err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
734	} else {
735		struct ext3_iloc iloc2;
736		struct inode *i_prev =
737			list_entry(prev, struct inode, u.ext3_i.i_orphan);
738
739		jbd_debug(4, "orphan inode %ld will point to %ld\n",
740			  i_prev->i_ino, ino_next);
741		err = ext3_reserve_inode_write(handle, i_prev, &iloc2);
742		if (err)
743			goto out_brelse;
744		NEXT_ORPHAN(i_prev) = ino_next;
745		err = ext3_mark_iloc_dirty(handle, i_prev, &iloc2);
746	}
747	if (err)
748		goto out_brelse;
749	NEXT_ORPHAN(inode) = 0;
750	err = ext3_mark_iloc_dirty(handle, inode, &iloc);
751	if (err)
752		goto out_brelse;
753
754out_err:
755	ext3_std_error(inode->i_sb, err);
756out:
757	unlock_super(inode->i_sb);
758	return err;
759
760out_brelse:
761	brelse(iloc.bh);
762	goto out_err;
763}
764
765static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
766{
767	int retval;
768	struct inode * inode;
769	struct buffer_head * bh;
770	struct ext3_dir_entry_2 * de;
771	handle_t *handle;
772
773	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
774	if (IS_ERR(handle))
775		return PTR_ERR(handle);
776
777	retval = -ENOENT;
778	bh = ext3_find_entry (dentry, &de);
779	if (!bh)
780		goto end_rmdir;
781
782	if (IS_SYNC(dir))
783		handle->h_sync = 1;
784
785	inode = dentry->d_inode;
786	DQUOT_INIT(inode);
787
788	retval = -EIO;
789	if (le32_to_cpu(de->inode) != inode->i_ino)
790		goto end_rmdir;
791
792	retval = -ENOTEMPTY;
793	if (!empty_dir (inode))
794		goto end_rmdir;
795
796	retval = ext3_delete_entry(handle, dir, de, bh);
797	if (retval)
798		goto end_rmdir;
799	if (inode->i_nlink != 2)
800		ext3_warning (inode->i_sb, "ext3_rmdir",
801			      "empty directory has nlink!=2 (%d)",
802			      inode->i_nlink);
803	inode->i_version = ++event;
804	inode->i_nlink = 0;
805	/* There's no need to set i_disksize: the fact that i_nlink is
806	 * zero will ensure that the right thing happens during any
807	 * recovery. */
808	inode->i_size = 0;
809	ext3_orphan_add(handle, inode);
810	dir->i_nlink--;
811	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
812	ext3_mark_inode_dirty(handle, inode);
813	dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
814	ext3_mark_inode_dirty(handle, dir);
815
816end_rmdir:
817	ext3_journal_stop(handle, dir);
818	brelse (bh);
819	return retval;
820}
821
822static int ext3_unlink(struct inode * dir, struct dentry *dentry)
823{
824	int retval;
825	struct inode * inode;
826	struct buffer_head * bh;
827	struct ext3_dir_entry_2 * de;
828	handle_t *handle;
829
830	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
831	if (IS_ERR(handle))
832		return PTR_ERR(handle);
833
834	if (IS_SYNC(dir))
835		handle->h_sync = 1;
836
837	retval = -ENOENT;
838	bh = ext3_find_entry (dentry, &de);
839	if (!bh)
840		goto end_unlink;
841
842	inode = dentry->d_inode;
843	DQUOT_INIT(inode);
844
845	retval = -EIO;
846	if (le32_to_cpu(de->inode) != inode->i_ino)
847		goto end_unlink;
848
849	if (!inode->i_nlink) {
850		ext3_warning (inode->i_sb, "ext3_unlink",
851			      "Deleting nonexistent file (%lu), %d",
852			      inode->i_ino, inode->i_nlink);
853		inode->i_nlink = 1;
854	}
855	retval = ext3_delete_entry(handle, dir, de, bh);
856	if (retval)
857		goto end_unlink;
858	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
859	dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
860	ext3_mark_inode_dirty(handle, dir);
861	inode->i_nlink--;
862	if (!inode->i_nlink)
863		ext3_orphan_add(handle, inode);
864	inode->i_ctime = dir->i_ctime;
865	ext3_mark_inode_dirty(handle, inode);
866	retval = 0;
867
868end_unlink:
869	ext3_journal_stop(handle, dir);
870	brelse (bh);
871	return retval;
872}
873
874static int ext3_symlink (struct inode * dir,
875		struct dentry *dentry, const char * symname)
876{
877	handle_t *handle;
878	struct inode * inode;
879	int l, err;
880
881	l = strlen(symname)+1;
882	if (l > dir->i_sb->s_blocksize)
883		return -ENAMETOOLONG;
884
885	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5);
886	if (IS_ERR(handle))
887		return PTR_ERR(handle);
888
889	if (IS_SYNC(dir))
890		handle->h_sync = 1;
891
892	inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO);
893	err = PTR_ERR(inode);
894	if (IS_ERR(inode))
895		goto out_stop;
896
897	if (l > sizeof (inode->u.ext3_i.i_data)) {
898		inode->i_op = &page_symlink_inode_operations;
899		inode->i_mapping->a_ops = &ext3_aops;
900		/*
901		 * block_symlink() calls back into ext3_prepare/commit_write.
902		 * We have a transaction open.  All is sweetness.  It also sets
903		 * i_size in generic_commit_write().
904		 */
905		err = block_symlink(inode, symname, l);
906		if (err)
907			goto out_no_entry;
908	} else {
909		inode->i_op = &ext3_fast_symlink_inode_operations;
910		memcpy((char*)&inode->u.ext3_i.i_data,symname,l);
911		inode->i_size = l-1;
912	}
913	inode->u.ext3_i.i_disksize = inode->i_size;
914	err = ext3_add_nondir(handle, dentry, inode);
915	ext3_mark_inode_dirty(handle, inode);
916out_stop:
917	ext3_journal_stop(handle, dir);
918	return err;
919
920out_no_entry:
921	ext3_dec_count(handle, inode);
922	ext3_mark_inode_dirty(handle, inode);
923	iput (inode);
924	goto out_stop;
925}
926
927static int ext3_link (struct dentry * old_dentry,
928		struct inode * dir, struct dentry *dentry)
929{
930	handle_t *handle;
931	struct inode *inode = old_dentry->d_inode;
932	int err;
933
934	if (S_ISDIR(inode->i_mode))
935		return -EPERM;
936
937	if (inode->i_nlink >= EXT3_LINK_MAX)
938		return -EMLINK;
939
940	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS);
941	if (IS_ERR(handle))
942		return PTR_ERR(handle);
943
944	if (IS_SYNC(dir))
945		handle->h_sync = 1;
946
947	inode->i_ctime = CURRENT_TIME;
948	ext3_inc_count(handle, inode);
949	atomic_inc(&inode->i_count);
950
951	err = ext3_add_nondir(handle, dentry, inode);
952	ext3_mark_inode_dirty(handle, inode);
953	ext3_journal_stop(handle, dir);
954	return err;
955}
956
957#define PARENT_INO(buffer) \
958	((struct ext3_dir_entry_2 *) ((char *) buffer + \
959	le16_to_cpu(((struct ext3_dir_entry_2 *) buffer)->rec_len)))->inode
960
961/*
962 * Anybody can rename anything with this: the permission checks are left to the
963 * higher-level routines.
964 */
965static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
966			   struct inode * new_dir,struct dentry *new_dentry)
967{
968	handle_t *handle;
969	struct inode * old_inode, * new_inode;
970	struct buffer_head * old_bh, * new_bh, * dir_bh;
971	struct ext3_dir_entry_2 * old_de, * new_de;
972	int retval;
973
974	old_bh = new_bh = dir_bh = NULL;
975
976	handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2);
977	if (IS_ERR(handle))
978		return PTR_ERR(handle);
979
980	if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
981		handle->h_sync = 1;
982
983	old_bh = ext3_find_entry (old_dentry, &old_de);
984	/*
985	 *  Check for inode number is _not_ due to possible IO errors.
986	 *  We might rmdir the source, keep it as pwd of some process
987	 *  and merrily kill the link to whatever was created under the
988	 *  same name. Goodbye sticky bit ;-<
989	 */
990	old_inode = old_dentry->d_inode;
991	retval = -ENOENT;
992	if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino)
993		goto end_rename;
994
995	new_inode = new_dentry->d_inode;
996	new_bh = ext3_find_entry (new_dentry, &new_de);
997	if (new_bh) {
998		if (!new_inode) {
999			brelse (new_bh);
1000			new_bh = NULL;
1001		} else {
1002			DQUOT_INIT(new_inode);
1003		}
1004	}
1005	if (S_ISDIR(old_inode->i_mode)) {
1006		if (new_inode) {
1007			retval = -ENOTEMPTY;
1008			if (!empty_dir (new_inode))
1009				goto end_rename;
1010		}
1011		retval = -EIO;
1012		dir_bh = ext3_bread (handle, old_inode, 0, 0, &retval);
1013		if (!dir_bh)
1014			goto end_rename;
1015		if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
1016			goto end_rename;
1017		retval = -EMLINK;
1018		if (!new_inode && new_dir!=old_dir &&
1019				new_dir->i_nlink >= EXT3_LINK_MAX)
1020			goto end_rename;
1021	}
1022	if (!new_bh) {
1023		retval = ext3_add_entry (handle, new_dentry, old_inode);
1024		if (retval)
1025			goto end_rename;
1026	} else {
1027		BUFFER_TRACE(new_bh, "get write access");
1028		BUFFER_TRACE(new_bh, "get_write_access");
1029		ext3_journal_get_write_access(handle, new_bh);
1030		new_de->inode = le32_to_cpu(old_inode->i_ino);
1031		if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
1032					      EXT3_FEATURE_INCOMPAT_FILETYPE))
1033			new_de->file_type = old_de->file_type;
1034		new_dir->i_version = ++event;
1035		BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata");
1036		ext3_journal_dirty_metadata(handle, new_bh);
1037		brelse(new_bh);
1038		new_bh = NULL;
1039	}
1040
1041	/*
1042	 * Like most other Unix systems, set the ctime for inodes on a
1043	 * rename.
1044	 */
1045	old_inode->i_ctime = CURRENT_TIME;
1046	ext3_mark_inode_dirty(handle, old_inode);
1047
1048	/*
1049	 * ok, that's it
1050	 */
1051	ext3_delete_entry(handle, old_dir, old_de, old_bh);
1052
1053	if (new_inode) {
1054		new_inode->i_nlink--;
1055		new_inode->i_ctime = CURRENT_TIME;
1056	}
1057	old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
1058	old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
1059	if (dir_bh) {
1060		BUFFER_TRACE(dir_bh, "get_write_access");
1061		ext3_journal_get_write_access(handle, dir_bh);
1062		PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino);
1063		BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
1064		ext3_journal_dirty_metadata(handle, dir_bh);
1065		old_dir->i_nlink--;
1066		if (new_inode) {
1067			new_inode->i_nlink--;
1068		} else {
1069			new_dir->i_nlink++;
1070			new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
1071			ext3_mark_inode_dirty(handle, new_dir);
1072		}
1073	}
1074	ext3_mark_inode_dirty(handle, old_dir);
1075	if (new_inode) {
1076		ext3_mark_inode_dirty(handle, new_inode);
1077		if (!new_inode->i_nlink)
1078			ext3_orphan_add(handle, new_inode);
1079	}
1080	retval = 0;
1081
1082end_rename:
1083	brelse (dir_bh);
1084	brelse (old_bh);
1085	brelse (new_bh);
1086	ext3_journal_stop(handle, old_dir);
1087	return retval;
1088}
1089
1090/*
1091 * directories can handle most operations...
1092 */
1093struct inode_operations ext3_dir_inode_operations = {
1094	create:		ext3_create,		/* BKL held */
1095	lookup:		ext3_lookup,		/* BKL held */
1096	link:		ext3_link,		/* BKL held */
1097	unlink:		ext3_unlink,		/* BKL held */
1098	symlink:	ext3_symlink,		/* BKL held */
1099	mkdir:		ext3_mkdir,		/* BKL held */
1100	rmdir:		ext3_rmdir,		/* BKL held */
1101	mknod:		ext3_mknod,		/* BKL held */
1102	rename:		ext3_rename,		/* BKL held */
1103};
1104