1/*
2 *  linux/fs/namei.c
3 *
4 *  Copyright (C) 1991, 1992  Linus Torvalds
5 */
6
7/*
8 * Some corrections by tytso.
9 */
10
11/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
12 * lookup logic.
13 */
14/* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
15 */
16
17#include <linux/init.h>
18#include <linux/slab.h>
19#include <linux/fs.h>
20#include <linux/quotaops.h>
21#include <linux/pagemap.h>
22#include <linux/dnotify.h>
23#include <linux/smp_lock.h>
24#include <linux/personality.h>
25
26#include <asm/namei.h>
27#include <asm/uaccess.h>
28
29#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
30
31/* [Feb-1997 T. Schoebel-Theuer]
32 * Fundamental changes in the pathname lookup mechanisms (namei)
33 * were necessary because of omirr.  The reason is that omirr needs
34 * to know the _real_ pathname, not the user-supplied one, in case
35 * of symlinks (and also when transname replacements occur).
36 *
37 * The new code replaces the old recursive symlink resolution with
38 * an iterative one (in case of non-nested symlink chains).  It does
39 * this with calls to <fs>_follow_link().
40 * As a side effect, dir_namei(), _namei() and follow_link() are now
41 * replaced with a single function lookup_dentry() that can handle all
42 * the special cases of the former code.
43 *
44 * With the new dcache, the pathname is stored at each inode, at least as
45 * long as the refcount of the inode is positive.  As a side effect, the
46 * size of the dcache depends on the inode cache and thus is dynamic.
47 *
48 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
49 * resolution to correspond with current state of the code.
50 *
51 * Note that the symlink resolution is not *completely* iterative.
52 * There is still a significant amount of tail- and mid- recursion in
53 * the algorithm.  Also, note that <fs>_readlink() is not used in
54 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
55 * may return different results than <fs>_follow_link().  Many virtual
56 * filesystems (including /proc) exhibit this behavior.
57 */
58
59/* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
60 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
61 * and the name already exists in form of a symlink, try to create the new
62 * name indicated by the symlink. The old code always complained that the
63 * name already exists, due to not following the symlink even if its target
64 * is nonexistent.  The new semantics affects also mknod() and link() when
65 * the name is a symlink pointing to a non-existant name.
66 *
67 * I don't know which semantics is the right one, since I have no access
68 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
69 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
70 * "old" one. Personally, I think the new semantics is much more logical.
71 * Note that "ln old new" where "new" is a symlink pointing to a non-existing
72 * file does succeed in both HP-UX and SunOs, but not in Solaris
73 * and in the old Linux semantics.
74 */
75
76/* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
77 * semantics.  See the comments in "open_namei" and "do_link" below.
78 *
79 * [10-Sep-98 Alan Modra] Another symlink change.
80 */
81
82/* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
83 *	inside the path - always follow.
84 *	in the last component in creation/removal/renaming - never follow.
85 *	if LOOKUP_FOLLOW passed - follow.
86 *	if the pathname has trailing slashes - follow.
87 *	otherwise - don't follow.
88 * (applied in that order).
89 *
90 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
91 * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
92 * During the 2.4 we need to fix the userland stuff depending on it -
93 * hopefully we will be able to get rid of that wart in 2.5. So far only
94 * XEmacs seems to be relying on it...
95 */
96
97/* In order to reduce some races, while at the same time doing additional
98 * checking and hopefully speeding things up, we copy filenames to the
99 * kernel data space before using them..
100 *
101 * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
102 * PATH_MAX includes the nul terminator --RR.
103 */
104static inline int do_getname(const char *filename, char *page)
105{
106	int retval;
107	unsigned long len = PATH_MAX;
108
109	if ((unsigned long) filename >= TASK_SIZE) {
110		if (!segment_eq(get_fs(), KERNEL_DS))
111			return -EFAULT;
112	} else if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
113		len = TASK_SIZE - (unsigned long) filename;
114
115	retval = strncpy_from_user((char *)page, filename, len);
116	if (retval > 0) {
117		if (retval < len)
118			return 0;
119		return -ENAMETOOLONG;
120	} else if (!retval)
121		retval = -ENOENT;
122	return retval;
123}
124
125char * getname(const char * filename)
126{
127	char *tmp, *result;
128
129	result = ERR_PTR(-ENOMEM);
130	tmp = __getname();
131	if (tmp)  {
132		int retval = do_getname(filename, tmp);
133
134		result = tmp;
135		if (retval < 0) {
136			putname(tmp);
137			result = ERR_PTR(retval);
138		}
139	}
140	return result;
141}
142
143/*
144 *	vfs_permission()
145 *
146 * is used to check for read/write/execute permissions on a file.
147 * We use "fsuid" for this, letting us set arbitrary permissions
148 * for filesystem access without changing the "normal" uids which
149 * are used for other things..
150 */
151int vfs_permission(struct inode * inode, int mask)
152{
153	umode_t			mode = inode->i_mode;
154
155	if (mask & MAY_WRITE) {
156		/*
157		 * Nobody gets write access to a read-only fs.
158		 */
159		if (IS_RDONLY(inode) &&
160		    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
161			return -EROFS;
162
163		/*
164		 * Nobody gets write access to an immutable file.
165		 */
166		if (IS_IMMUTABLE(inode))
167			return -EACCES;
168	}
169
170	if (current->fsuid == inode->i_uid)
171		mode >>= 6;
172	else if (in_group_p(inode->i_gid))
173		mode >>= 3;
174
175	/*
176	 * If the DACs are ok we don't need any capability check.
177	 */
178	if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask))
179		return 0;
180
181	/*
182	 * Read/write DACs are always overridable.
183	 * Executable DACs are overridable if at least one exec bit is set.
184	 */
185	if ((mask & (MAY_READ|MAY_WRITE)) || (inode->i_mode & S_IXUGO))
186		if (capable(CAP_DAC_OVERRIDE))
187			return 0;
188
189	/*
190	 * Searching includes executable on directories, else just read.
191	 */
192	if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
193		if (capable(CAP_DAC_READ_SEARCH))
194			return 0;
195
196	return -EACCES;
197}
198
199int permission(struct inode * inode,int mask)
200{
201	if (inode->i_op && inode->i_op->permission) {
202		int retval;
203		lock_kernel();
204		retval = inode->i_op->permission(inode, mask);
205		unlock_kernel();
206		return retval;
207	}
208	return vfs_permission(inode, mask);
209}
210
211/*
212 * get_write_access() gets write permission for a file.
213 * put_write_access() releases this write permission.
214 * This is used for regular files.
215 * We cannot support write (and maybe mmap read-write shared) accesses and
216 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
217 * can have the following values:
218 * 0: no writers, no VM_DENYWRITE mappings
219 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
220 * > 0: (i_writecount) users are writing to the file.
221 *
222 * Normally we operate on that counter with atomic_{inc,dec} and it's safe
223 * except for the cases where we don't hold i_writecount yet. Then we need to
224 * use {get,deny}_write_access() - these functions check the sign and refuse
225 * to do the change if sign is wrong. Exclusion between them is provided by
226 * spinlock (arbitration_lock) and I'll rip the second arsehole to the first
227 * who will try to move it in struct inode - just leave it here.
228 */
229static spinlock_t arbitration_lock = SPIN_LOCK_UNLOCKED;
230int get_write_access(struct inode * inode)
231{
232	spin_lock(&arbitration_lock);
233	if (atomic_read(&inode->i_writecount) < 0) {
234		spin_unlock(&arbitration_lock);
235		return -ETXTBSY;
236	}
237	atomic_inc(&inode->i_writecount);
238	spin_unlock(&arbitration_lock);
239	return 0;
240}
241int deny_write_access(struct file * file)
242{
243	spin_lock(&arbitration_lock);
244	if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) {
245		spin_unlock(&arbitration_lock);
246		return -ETXTBSY;
247	}
248	atomic_dec(&file->f_dentry->d_inode->i_writecount);
249	spin_unlock(&arbitration_lock);
250	return 0;
251}
252
253void path_release(struct nameidata *nd)
254{
255	dput(nd->dentry);
256	mntput(nd->mnt);
257}
258
259/*
260 * Internal lookup() using the new generic dcache.
261 * SMP-safe
262 */
263static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
264{
265	struct dentry * dentry = d_lookup(parent, name);
266
267	if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
268		if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
269			dput(dentry);
270			dentry = NULL;
271		}
272	}
273	return dentry;
274}
275
276/*
277 * This is called when everything else fails, and we actually have
278 * to go to the low-level filesystem to find out what we should do..
279 *
280 * We get the directory semaphore, and after getting that we also
281 * make sure that nobody added the entry to the dcache in the meantime..
282 * SMP-safe
283 */
284static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
285{
286	struct dentry * result;
287	struct inode *dir = parent->d_inode;
288
289	down(&dir->i_sem);
290	result = d_lookup(parent, name);
291	if (!result) {
292		struct dentry * dentry = d_alloc(parent, name);
293		result = ERR_PTR(-ENOMEM);
294		if (dentry) {
295			lock_kernel();
296			result = dir->i_op->lookup(dir, dentry);
297			unlock_kernel();
298			if (result)
299				dput(dentry);
300			else
301				result = dentry;
302		}
303		up(&dir->i_sem);
304		return result;
305	}
306
307	/*
308	 * Uhhuh! Nasty case: the cache was re-populated while
309	 * we waited on the semaphore. Need to revalidate.
310	 */
311	up(&dir->i_sem);
312	if (result->d_op && result->d_op->d_revalidate) {
313		if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) {
314			dput(result);
315			result = ERR_PTR(-ENOENT);
316		}
317	}
318	return result;
319}
320
321/*
322 * This limits recursive symlink follows to 8, while
323 * limiting consecutive symlinks to 40.
324 *
325 * Without that kind of total limit, nasty chains of consecutive
326 * symlinks can cause almost arbitrarily long lookups.
327 */
328static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
329{
330	int err;
331	if (current->link_count >= 5)
332		goto loop;
333	if (current->total_link_count >= 40)
334		goto loop;
335	if (current->need_resched) {
336		current->state = TASK_RUNNING;
337		schedule();
338	}
339	current->link_count++;
340	current->total_link_count++;
341	UPDATE_ATIME(dentry->d_inode);
342	err = dentry->d_inode->i_op->follow_link(dentry, nd);
343	current->link_count--;
344	return err;
345loop:
346	path_release(nd);
347	return -ELOOP;
348}
349
350static inline int __follow_up(struct vfsmount **mnt, struct dentry **base)
351{
352	struct vfsmount *parent;
353	struct dentry *dentry;
354	spin_lock(&dcache_lock);
355	parent=(*mnt)->mnt_parent;
356	if (parent == *mnt) {
357		spin_unlock(&dcache_lock);
358		return 0;
359	}
360	mntget(parent);
361	dentry=dget((*mnt)->mnt_mountpoint);
362	spin_unlock(&dcache_lock);
363	dput(*base);
364	*base = dentry;
365	mntput(*mnt);
366	*mnt = parent;
367	return 1;
368}
369
370int follow_up(struct vfsmount **mnt, struct dentry **dentry)
371{
372	return __follow_up(mnt, dentry);
373}
374
375static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
376{
377	struct vfsmount *mounted;
378
379	spin_lock(&dcache_lock);
380	mounted = lookup_mnt(*mnt, *dentry);
381	if (mounted) {
382		*mnt = mntget(mounted);
383		spin_unlock(&dcache_lock);
384		dput(*dentry);
385		mntput(mounted->mnt_parent);
386		*dentry = dget(mounted->mnt_root);
387		return 1;
388	}
389	spin_unlock(&dcache_lock);
390	return 0;
391}
392
393int follow_down(struct vfsmount **mnt, struct dentry **dentry)
394{
395	return __follow_down(mnt,dentry);
396}
397
398static inline void follow_dotdot(struct nameidata *nd)
399{
400	while(1) {
401		struct vfsmount *parent;
402		struct dentry *dentry;
403		read_lock(&current->fs->lock);
404		if (nd->dentry == current->fs->root &&
405		    nd->mnt == current->fs->rootmnt)  {
406			read_unlock(&current->fs->lock);
407			break;
408		}
409		read_unlock(&current->fs->lock);
410		spin_lock(&dcache_lock);
411		if (nd->dentry != nd->mnt->mnt_root) {
412			dentry = dget(nd->dentry->d_parent);
413			spin_unlock(&dcache_lock);
414			dput(nd->dentry);
415			nd->dentry = dentry;
416			break;
417		}
418		parent=nd->mnt->mnt_parent;
419		if (parent == nd->mnt) {
420			spin_unlock(&dcache_lock);
421			break;
422		}
423		mntget(parent);
424		dentry=dget(nd->mnt->mnt_mountpoint);
425		spin_unlock(&dcache_lock);
426		dput(nd->dentry);
427		nd->dentry = dentry;
428		mntput(nd->mnt);
429		nd->mnt = parent;
430	}
431	while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry))
432		;
433}
434
435/*
436 * Name resolution.
437 *
438 * This is the basic name resolution function, turning a pathname
439 * into the final dentry.
440 *
441 * We expect 'base' to be positive and a directory.
442 */
443int link_path_walk(const char * name, struct nameidata *nd)
444{
445	struct dentry *dentry;
446	struct inode *inode;
447	int err;
448	unsigned int lookup_flags = nd->flags;
449
450	while (*name=='/')
451		name++;
452	if (!*name)
453		goto return_reval;
454
455	inode = nd->dentry->d_inode;
456	if (current->link_count)
457		lookup_flags = LOOKUP_FOLLOW;
458
459	/* At this point we know we have a real path component. */
460	for(;;) {
461		unsigned long hash;
462		struct qstr this;
463		unsigned int c;
464
465		err = permission(inode, MAY_EXEC);
466		dentry = ERR_PTR(err);
467 		if (err)
468			break;
469
470		this.name = name;
471		c = *(const unsigned char *)name;
472
473		hash = init_name_hash();
474		do {
475			name++;
476			hash = partial_name_hash(c, hash);
477			c = *(const unsigned char *)name;
478		} while (c && (c != '/'));
479		this.len = name - (const char *) this.name;
480		this.hash = end_name_hash(hash);
481
482		/* remove trailing slashes? */
483		if (!c)
484			goto last_component;
485		while (*++name == '/');
486		if (!*name)
487			goto last_with_slashes;
488
489		/*
490		 * "." and ".." are special - ".." especially so because it has
491		 * to be able to know about the current root directory and
492		 * parent relationships.
493		 */
494		if (this.name[0] == '.') switch (this.len) {
495			default:
496				break;
497			case 2:
498				if (this.name[1] != '.')
499					break;
500				follow_dotdot(nd);
501				inode = nd->dentry->d_inode;
502				/* fallthrough */
503			case 1:
504				continue;
505		}
506		/*
507		 * See if the low-level filesystem might want
508		 * to use its own hash..
509		 */
510		if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
511			err = nd->dentry->d_op->d_hash(nd->dentry, &this);
512			if (err < 0)
513				break;
514		}
515		/* This does the actual lookups.. */
516		dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
517		if (!dentry) {
518			dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
519			err = PTR_ERR(dentry);
520			if (IS_ERR(dentry))
521				break;
522		}
523		/* Check mountpoints.. */
524		while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
525			;
526
527		err = -ENOENT;
528		inode = dentry->d_inode;
529		if (!inode)
530			goto out_dput;
531		err = -ENOTDIR;
532		if (!inode->i_op)
533			goto out_dput;
534
535		if (inode->i_op->follow_link) {
536			err = do_follow_link(dentry, nd);
537			dput(dentry);
538			if (err)
539				goto return_err;
540			err = -ENOENT;
541			inode = nd->dentry->d_inode;
542			if (!inode)
543				break;
544			err = -ENOTDIR;
545			if (!inode->i_op)
546				break;
547		} else {
548			dput(nd->dentry);
549			nd->dentry = dentry;
550		}
551		err = -ENOTDIR;
552		if (!inode->i_op->lookup)
553			break;
554		continue;
555		/* here ends the main loop */
556
557last_with_slashes:
558		lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
559last_component:
560		if (lookup_flags & LOOKUP_PARENT)
561			goto lookup_parent;
562		if (this.name[0] == '.') switch (this.len) {
563			default:
564				break;
565			case 2:
566				if (this.name[1] != '.')
567					break;
568				follow_dotdot(nd);
569				inode = nd->dentry->d_inode;
570				/* fallthrough */
571			case 1:
572				goto return_reval;
573		}
574		if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
575			err = nd->dentry->d_op->d_hash(nd->dentry, &this);
576			if (err < 0)
577				break;
578		}
579		dentry = cached_lookup(nd->dentry, &this, 0);
580		if (!dentry) {
581			dentry = real_lookup(nd->dentry, &this, 0);
582			err = PTR_ERR(dentry);
583			if (IS_ERR(dentry))
584				break;
585		}
586		while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
587			;
588		inode = dentry->d_inode;
589		if ((lookup_flags & LOOKUP_FOLLOW)
590		    && inode && inode->i_op && inode->i_op->follow_link) {
591			err = do_follow_link(dentry, nd);
592			dput(dentry);
593			if (err)
594				goto return_err;
595			inode = nd->dentry->d_inode;
596		} else {
597			dput(nd->dentry);
598			nd->dentry = dentry;
599		}
600		err = -ENOENT;
601		if (!inode)
602			goto no_inode;
603		if (lookup_flags & LOOKUP_DIRECTORY) {
604			err = -ENOTDIR;
605			if (!inode->i_op || !inode->i_op->lookup)
606				break;
607		}
608		goto return_base;
609no_inode:
610		err = -ENOENT;
611		if (lookup_flags & (LOOKUP_POSITIVE|LOOKUP_DIRECTORY))
612			break;
613		goto return_base;
614lookup_parent:
615		nd->last = this;
616		nd->last_type = LAST_NORM;
617		if (this.name[0] != '.')
618			goto return_base;
619		if (this.len == 1)
620			nd->last_type = LAST_DOT;
621		else if (this.len == 2 && this.name[1] == '.')
622			nd->last_type = LAST_DOTDOT;
623return_reval:
624		/*
625		 * We bypassed the ordinary revalidation routines.
626		 * Check the cached dentry for staleness.
627		 */
628		dentry = nd->dentry;
629		if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
630			err = -ESTALE;
631			if (!dentry->d_op->d_revalidate(dentry, 0)) {
632				d_invalidate(dentry);
633				break;
634			}
635		}
636return_base:
637		return 0;
638out_dput:
639		dput(dentry);
640		break;
641	}
642	path_release(nd);
643return_err:
644	return err;
645}
646
647int path_walk(const char * name, struct nameidata *nd)
648{
649	current->total_link_count = 0;
650	return link_path_walk(name, nd);
651}
652
653/* SMP-safe */
654/* returns 1 if everything is done */
655static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
656{
657	if (path_walk(name, nd))
658		return 0;		/* something went wrong... */
659
660	if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) {
661		struct nameidata nd_root;
662		/*
663		 * NAME was not found in alternate root or it's a directory.  Try to find
664		 * it in the normal root:
665		 */
666		nd_root.last_type = LAST_ROOT;
667		nd_root.flags = nd->flags;
668		read_lock(&current->fs->lock);
669		nd_root.mnt = mntget(current->fs->rootmnt);
670		nd_root.dentry = dget(current->fs->root);
671		read_unlock(&current->fs->lock);
672		if (path_walk(name, &nd_root))
673			return 1;
674		if (nd_root.dentry->d_inode) {
675			path_release(nd);
676			nd->dentry = nd_root.dentry;
677			nd->mnt = nd_root.mnt;
678			nd->last = nd_root.last;
679			return 1;
680		}
681		path_release(&nd_root);
682	}
683	return 1;
684}
685
686void set_fs_altroot(void)
687{
688	char *emul = __emul_prefix();
689	struct nameidata nd;
690	struct vfsmount *mnt = NULL, *oldmnt;
691	struct dentry *dentry = NULL, *olddentry;
692	if (emul) {
693		read_lock(&current->fs->lock);
694		nd.mnt = mntget(current->fs->rootmnt);
695		nd.dentry = dget(current->fs->root);
696		read_unlock(&current->fs->lock);
697		nd.flags = LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_POSITIVE;
698		if (path_walk(emul,&nd) == 0) {
699			mnt = nd.mnt;
700			dentry = nd.dentry;
701		}
702	}
703	write_lock(&current->fs->lock);
704	oldmnt = current->fs->altrootmnt;
705	olddentry = current->fs->altroot;
706	current->fs->altrootmnt = mnt;
707	current->fs->altroot = dentry;
708	write_unlock(&current->fs->lock);
709	if (olddentry) {
710		dput(olddentry);
711		mntput(oldmnt);
712	}
713}
714
715/* SMP-safe */
716static inline int
717walk_init_root(const char *name, struct nameidata *nd)
718{
719	read_lock(&current->fs->lock);
720	if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
721		nd->mnt = mntget(current->fs->altrootmnt);
722		nd->dentry = dget(current->fs->altroot);
723		read_unlock(&current->fs->lock);
724		if (__emul_lookup_dentry(name,nd))
725			return 0;
726		read_lock(&current->fs->lock);
727	}
728	nd->mnt = mntget(current->fs->rootmnt);
729	nd->dentry = dget(current->fs->root);
730	read_unlock(&current->fs->lock);
731	return 1;
732}
733
734/* SMP-safe */
735int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
736{
737	int error = 0;
738	if (path_init(path, flags, nd))
739		error = path_walk(path, nd);
740	return error;
741}
742
743
744/* SMP-safe */
745int path_init(const char *name, unsigned int flags, struct nameidata *nd)
746{
747	nd->last_type = LAST_ROOT; /* if there are only slashes... */
748	nd->flags = flags;
749	if (*name=='/')
750		return walk_init_root(name,nd);
751	read_lock(&current->fs->lock);
752	nd->mnt = mntget(current->fs->pwdmnt);
753	nd->dentry = dget(current->fs->pwd);
754	read_unlock(&current->fs->lock);
755	return 1;
756}
757
758/*
759 * Restricted form of lookup. Doesn't follow links, single-component only,
760 * needs parent already locked. Doesn't follow mounts.
761 * SMP-safe.
762 */
763struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
764{
765	struct dentry * dentry;
766	struct inode *inode;
767	int err;
768
769	inode = base->d_inode;
770	err = permission(inode, MAY_EXEC);
771	dentry = ERR_PTR(err);
772	if (err)
773		goto out;
774
775	/*
776	 * See if the low-level filesystem might want
777	 * to use its own hash..
778	 */
779	if (base->d_op && base->d_op->d_hash) {
780		err = base->d_op->d_hash(base, name);
781		dentry = ERR_PTR(err);
782		if (err < 0)
783			goto out;
784	}
785
786	dentry = cached_lookup(base, name, 0);
787	if (!dentry) {
788		struct dentry *new = d_alloc(base, name);
789		dentry = ERR_PTR(-ENOMEM);
790		if (!new)
791			goto out;
792		lock_kernel();
793		dentry = inode->i_op->lookup(inode, new);
794		unlock_kernel();
795		if (!dentry)
796			dentry = new;
797		else
798			dput(new);
799	}
800out:
801	return dentry;
802}
803
804/* SMP-safe */
805struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
806{
807	unsigned long hash;
808	struct qstr this;
809	unsigned int c;
810
811	this.name = name;
812	this.len = len;
813	if (!len)
814		goto access;
815
816	hash = init_name_hash();
817	while (len--) {
818		c = *(const unsigned char *)name++;
819		if (c == '/' || c == '\0')
820			goto access;
821		hash = partial_name_hash(c, hash);
822	}
823	this.hash = end_name_hash(hash);
824
825	return lookup_hash(&this, base);
826access:
827	return ERR_PTR(-EACCES);
828}
829
830/*
831 *	namei()
832 *
833 * is used by most simple commands to get the inode of a specified name.
834 * Open, link etc use their own routines, but this is enough for things
835 * like 'chmod' etc.
836 *
837 * namei exists in two versions: namei/lnamei. The only difference is
838 * that namei follows links, while lnamei does not.
839 * SMP-safe
840 */
841int __user_walk(const char *name, unsigned flags, struct nameidata *nd)
842{
843	char *tmp;
844	int err;
845
846	tmp = getname(name);
847	err = PTR_ERR(tmp);
848	if (!IS_ERR(tmp)) {
849		err = 0;
850		err = path_lookup(tmp, flags, nd);
851		putname(tmp);
852	}
853	return err;
854}
855
856/*
857 * It's inline, so penalty for filesystems that don't use sticky bit is
858 * minimal.
859 */
860static inline int check_sticky(struct inode *dir, struct inode *inode)
861{
862	if (!(dir->i_mode & S_ISVTX))
863		return 0;
864	if (inode->i_uid == current->fsuid)
865		return 0;
866	if (dir->i_uid == current->fsuid)
867		return 0;
868	return !capable(CAP_FOWNER);
869}
870
871/*
872 *	Check whether we can remove a link victim from directory dir, check
873 *  whether the type of victim is right.
874 *  1. We can't do it if dir is read-only (done in permission())
875 *  2. We should have write and exec permissions on dir
876 *  3. We can't remove anything from append-only dir
877 *  4. We can't do anything with immutable dir (done in permission())
878 *  5. If the sticky bit on dir is set we should either
879 *	a. be owner of dir, or
880 *	b. be owner of victim, or
881 *	c. have CAP_FOWNER capability
882 *  6. If the victim is append-only or immutable we can't do antyhing with
883 *     links pointing to it.
884 *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
885 *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
886 *  9. We can't remove a root or mountpoint.
887 */
888static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
889{
890	int error;
891	if (!victim->d_inode || victim->d_parent->d_inode != dir)
892		return -ENOENT;
893	error = permission(dir,MAY_WRITE | MAY_EXEC);
894	if (error)
895		return error;
896	if (IS_APPEND(dir))
897		return -EPERM;
898	if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
899	    IS_IMMUTABLE(victim->d_inode))
900		return -EPERM;
901	if (isdir) {
902		if (!S_ISDIR(victim->d_inode->i_mode))
903			return -ENOTDIR;
904		if (IS_ROOT(victim))
905			return -EBUSY;
906	} else if (S_ISDIR(victim->d_inode->i_mode))
907		return -EISDIR;
908	if (IS_DEADDIR(dir))
909		return -ENOENT;
910	return 0;
911}
912
913/*	Check whether we can create an object with dentry child in directory
914 *  dir.
915 *  1. We can't do it if child already exists (open has special treatment for
916 *     this case, but since we are inlined it's OK)
917 *  2. We can't do it if dir is read-only (done in permission())
918 *  3. We should have write and exec permissions on dir
919 *  4. We can't do it if dir is immutable (done in permission())
920 */
921static inline int may_create(struct inode *dir, struct dentry *child) {
922	if (child->d_inode)
923		return -EEXIST;
924	if (IS_DEADDIR(dir))
925		return -ENOENT;
926	return permission(dir,MAY_WRITE | MAY_EXEC);
927}
928
929/*
930 * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
931 * reasons.
932 *
933 * O_DIRECTORY translates into forcing a directory lookup.
934 */
935static inline int lookup_flags(unsigned int f)
936{
937	unsigned long retval = LOOKUP_FOLLOW;
938
939	if (f & O_NOFOLLOW)
940		retval &= ~LOOKUP_FOLLOW;
941
942	if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
943		retval &= ~LOOKUP_FOLLOW;
944
945	if (f & O_DIRECTORY)
946		retval |= LOOKUP_DIRECTORY;
947
948	return retval;
949}
950
951int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
952{
953	int error;
954
955	mode &= S_IALLUGO;
956	mode |= S_IFREG;
957
958	down(&dir->i_zombie);
959	error = may_create(dir, dentry);
960	if (error)
961		goto exit_lock;
962
963	error = -EACCES;	/* shouldn't it be ENOSYS? */
964	if (!dir->i_op || !dir->i_op->create)
965		goto exit_lock;
966
967	DQUOT_INIT(dir);
968	lock_kernel();
969	error = dir->i_op->create(dir, dentry, mode);
970	unlock_kernel();
971exit_lock:
972	up(&dir->i_zombie);
973	if (!error)
974		inode_dir_notify(dir, DN_CREATE);
975	return error;
976}
977
978/*
979 *	open_namei()
980 *
981 * namei for open - this is in fact almost the whole open-routine.
982 *
983 * Note that the low bits of "flag" aren't the same as in the open
984 * system call - they are 00 - no permissions needed
985 *			  01 - read permission needed
986 *			  10 - write permission needed
987 *			  11 - read/write permissions needed
988 * which is a lot more logical, and also allows the "no perm" needed
989 * for symlinks (where the permissions are checked later).
990 * SMP-safe
991 */
992int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
993{
994	int acc_mode, error = 0;
995	struct inode *inode;
996	struct dentry *dentry;
997	struct dentry *dir;
998	int count = 0;
999
1000	acc_mode = ACC_MODE(flag);
1001
1002	/*
1003	 * The simplest case - just a plain lookup.
1004	 */
1005	if (!(flag & O_CREAT)) {
1006		error = path_lookup(pathname, lookup_flags(flag), nd);
1007		if (error)
1008			return error;
1009		dentry = nd->dentry;
1010		goto ok;
1011	}
1012
1013	/*
1014	 * Create - we need to know the parent.
1015	 */
1016	error = path_lookup(pathname, LOOKUP_PARENT, nd);
1017	if (error)
1018		return error;
1019
1020	/*
1021	 * We have the parent and last component. First of all, check
1022	 * that we are not asked to creat(2) an obvious directory - that
1023	 * will not do.
1024	 */
1025	error = -EISDIR;
1026	if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
1027		goto exit;
1028
1029	dir = nd->dentry;
1030	down(&dir->d_inode->i_sem);
1031	dentry = lookup_hash(&nd->last, nd->dentry);
1032
1033do_last:
1034	error = PTR_ERR(dentry);
1035	if (IS_ERR(dentry)) {
1036		up(&dir->d_inode->i_sem);
1037		goto exit;
1038	}
1039
1040	/* Negative dentry, just create the file */
1041	if (!dentry->d_inode) {
1042		error = vfs_create(dir->d_inode, dentry,
1043				   mode & ~current->fs->umask);
1044		up(&dir->d_inode->i_sem);
1045		dput(nd->dentry);
1046		nd->dentry = dentry;
1047		if (error)
1048			goto exit;
1049		/* Don't check for write permission, don't truncate */
1050		acc_mode = 0;
1051		flag &= ~O_TRUNC;
1052		goto ok;
1053	}
1054
1055	/*
1056	 * It already exists.
1057	 */
1058	up(&dir->d_inode->i_sem);
1059
1060	error = -EEXIST;
1061	if (flag & O_EXCL)
1062		goto exit_dput;
1063
1064	if (d_mountpoint(dentry)) {
1065		error = -ELOOP;
1066		if (flag & O_NOFOLLOW)
1067			goto exit_dput;
1068		while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry));
1069	}
1070	error = -ENOENT;
1071	if (!dentry->d_inode)
1072		goto exit_dput;
1073	if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
1074		goto do_link;
1075
1076	dput(nd->dentry);
1077	nd->dentry = dentry;
1078	error = -EISDIR;
1079	if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
1080		goto exit;
1081ok:
1082	error = -ENOENT;
1083	inode = dentry->d_inode;
1084	if (!inode)
1085		goto exit;
1086
1087	error = -ELOOP;
1088	if (S_ISLNK(inode->i_mode))
1089		goto exit;
1090
1091	error = -EISDIR;
1092	if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
1093		goto exit;
1094
1095	error = permission(inode,acc_mode);
1096	if (error)
1097		goto exit;
1098
1099	/*
1100	 * FIFO's, sockets and device files are special: they don't
1101	 * actually live on the filesystem itself, and as such you
1102	 * can write to them even if the filesystem is read-only.
1103	 */
1104	if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
1105	    	flag &= ~O_TRUNC;
1106	} else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
1107		error = -EACCES;
1108		if (nd->mnt->mnt_flags & MNT_NODEV)
1109			goto exit;
1110
1111		flag &= ~O_TRUNC;
1112	} else {
1113		error = -EROFS;
1114		if (IS_RDONLY(inode) && (flag & 2))
1115			goto exit;
1116	}
1117	/*
1118	 * An append-only file must be opened in append mode for writing.
1119	 */
1120	error = -EPERM;
1121	if (IS_APPEND(inode)) {
1122		if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1123			goto exit;
1124		if (flag & O_TRUNC)
1125			goto exit;
1126	}
1127
1128	/*
1129	 * Ensure there are no outstanding leases on the file.
1130	 */
1131	error = get_lease(inode, flag);
1132	if (error)
1133		goto exit;
1134
1135	if (flag & O_TRUNC) {
1136		error = get_write_access(inode);
1137		if (error)
1138			goto exit;
1139
1140		/*
1141		 * Refuse to truncate files with mandatory locks held on them.
1142		 */
1143		error = locks_verify_locked(inode);
1144		if (!error) {
1145			DQUOT_INIT(inode);
1146
1147			error = do_truncate(dentry, 0);
1148		}
1149		put_write_access(inode);
1150		if (error)
1151			goto exit;
1152	} else
1153		if (flag & FMODE_WRITE)
1154			DQUOT_INIT(inode);
1155
1156	return 0;
1157
1158exit_dput:
1159	dput(dentry);
1160exit:
1161	path_release(nd);
1162	return error;
1163
1164do_link:
1165	error = -ELOOP;
1166	if (flag & O_NOFOLLOW)
1167		goto exit_dput;
1168	/*
1169	 * This is subtle. Instead of calling do_follow_link() we do the
1170	 * thing by hands. The reason is that this way we have zero link_count
1171	 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
1172	 * After that we have the parent and last component, i.e.
1173	 * we are in the same situation as after the first path_walk().
1174	 * Well, almost - if the last component is normal we get its copy
1175	 * stored in nd->last.name and we will have to putname() it when we
1176	 * are done. Procfs-like symlinks just set LAST_BIND.
1177	 */
1178	UPDATE_ATIME(dentry->d_inode);
1179	error = dentry->d_inode->i_op->follow_link(dentry, nd);
1180	dput(dentry);
1181	if (error)
1182		return error;
1183	if (nd->last_type == LAST_BIND) {
1184		dentry = nd->dentry;
1185		goto ok;
1186	}
1187	error = -EISDIR;
1188	if (nd->last_type != LAST_NORM)
1189		goto exit;
1190	if (nd->last.name[nd->last.len]) {
1191		putname(nd->last.name);
1192		goto exit;
1193	}
1194	error = -ELOOP;
1195	if (count++==32) {
1196		putname(nd->last.name);
1197		goto exit;
1198	}
1199	dir = nd->dentry;
1200	down(&dir->d_inode->i_sem);
1201	dentry = lookup_hash(&nd->last, nd->dentry);
1202	putname(nd->last.name);
1203	goto do_last;
1204}
1205
1206/* SMP-safe */
1207static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1208{
1209	struct dentry *dentry;
1210
1211	down(&nd->dentry->d_inode->i_sem);
1212	dentry = ERR_PTR(-EEXIST);
1213	if (nd->last_type != LAST_NORM)
1214		goto fail;
1215	dentry = lookup_hash(&nd->last, nd->dentry);
1216	if (IS_ERR(dentry))
1217		goto fail;
1218	if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
1219		goto enoent;
1220	return dentry;
1221enoent:
1222	dput(dentry);
1223	dentry = ERR_PTR(-ENOENT);
1224fail:
1225	return dentry;
1226}
1227
1228int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1229{
1230	int error = -EPERM;
1231
1232	down(&dir->i_zombie);
1233	if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
1234		goto exit_lock;
1235
1236	error = may_create(dir, dentry);
1237	if (error)
1238		goto exit_lock;
1239
1240	error = -EPERM;
1241	if (!dir->i_op || !dir->i_op->mknod)
1242		goto exit_lock;
1243
1244	DQUOT_INIT(dir);
1245	lock_kernel();
1246	error = dir->i_op->mknod(dir, dentry, mode, dev);
1247	unlock_kernel();
1248exit_lock:
1249	up(&dir->i_zombie);
1250	if (!error)
1251		inode_dir_notify(dir, DN_CREATE);
1252	return error;
1253}
1254
1255asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev)
1256{
1257	int error = 0;
1258	char * tmp;
1259	struct dentry * dentry;
1260	struct nameidata nd;
1261
1262	if (S_ISDIR(mode))
1263		return -EPERM;
1264	tmp = getname(filename);
1265	if (IS_ERR(tmp))
1266		return PTR_ERR(tmp);
1267
1268	error = path_lookup(tmp, LOOKUP_PARENT, &nd);
1269	if (error)
1270		goto out;
1271	dentry = lookup_create(&nd, 0);
1272	error = PTR_ERR(dentry);
1273
1274	mode &= ~current->fs->umask;
1275	if (!IS_ERR(dentry)) {
1276		switch (mode & S_IFMT) {
1277		case 0: case S_IFREG:
1278			error = vfs_create(nd.dentry->d_inode,dentry,mode);
1279			break;
1280		case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
1281			error = vfs_mknod(nd.dentry->d_inode,dentry,mode,dev);
1282			break;
1283		case S_IFDIR:
1284			error = -EPERM;
1285			break;
1286		default:
1287			error = -EINVAL;
1288		}
1289		dput(dentry);
1290	}
1291	up(&nd.dentry->d_inode->i_sem);
1292	path_release(&nd);
1293out:
1294	putname(tmp);
1295
1296	return error;
1297}
1298
1299int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1300{
1301	int error;
1302
1303	down(&dir->i_zombie);
1304	error = may_create(dir, dentry);
1305	if (error)
1306		goto exit_lock;
1307
1308	error = -EPERM;
1309	if (!dir->i_op || !dir->i_op->mkdir)
1310		goto exit_lock;
1311
1312	DQUOT_INIT(dir);
1313	mode &= (S_IRWXUGO|S_ISVTX);
1314	lock_kernel();
1315	error = dir->i_op->mkdir(dir, dentry, mode);
1316	unlock_kernel();
1317
1318exit_lock:
1319	up(&dir->i_zombie);
1320	if (!error)
1321		inode_dir_notify(dir, DN_CREATE);
1322	return error;
1323}
1324
1325asmlinkage long sys_mkdir(const char * pathname, int mode)
1326{
1327	int error = 0;
1328	char * tmp;
1329
1330	tmp = getname(pathname);
1331	error = PTR_ERR(tmp);
1332	if (!IS_ERR(tmp)) {
1333		struct dentry *dentry;
1334		struct nameidata nd;
1335
1336		error = path_lookup(tmp, LOOKUP_PARENT, &nd);
1337		if (error)
1338			goto out;
1339		dentry = lookup_create(&nd, 1);
1340		error = PTR_ERR(dentry);
1341		if (!IS_ERR(dentry)) {
1342			error = vfs_mkdir(nd.dentry->d_inode, dentry,
1343					  mode & ~current->fs->umask);
1344			dput(dentry);
1345		}
1346		up(&nd.dentry->d_inode->i_sem);
1347		path_release(&nd);
1348out:
1349		putname(tmp);
1350	}
1351
1352	return error;
1353}
1354
1355/*
1356 * We try to drop the dentry early: we should have
1357 * a usage count of 2 if we're the only user of this
1358 * dentry, and if that is true (possibly after pruning
1359 * the dcache), then we drop the dentry now.
1360 *
1361 * A low-level filesystem can, if it choses, legally
1362 * do a
1363 *
1364 *	if (!d_unhashed(dentry))
1365 *		return -EBUSY;
1366 *
1367 * if it cannot handle the case of removing a directory
1368 * that is still in use by something else..
1369 */
1370static void d_unhash(struct dentry *dentry)
1371{
1372	dget(dentry);
1373	spin_lock(&dcache_lock);
1374	switch (atomic_read(&dentry->d_count)) {
1375	default:
1376		spin_unlock(&dcache_lock);
1377		shrink_dcache_parent(dentry);
1378		spin_lock(&dcache_lock);
1379		if (atomic_read(&dentry->d_count) != 2)
1380			break;
1381	case 2:
1382		list_del_init(&dentry->d_hash);
1383	}
1384	spin_unlock(&dcache_lock);
1385}
1386
1387int vfs_rmdir(struct inode *dir, struct dentry *dentry)
1388{
1389	int error;
1390
1391	error = may_delete(dir, dentry, 1);
1392	if (error)
1393		return error;
1394
1395	if (!dir->i_op || !dir->i_op->rmdir)
1396		return -EPERM;
1397
1398	DQUOT_INIT(dir);
1399
1400	double_down(&dir->i_zombie, &dentry->d_inode->i_zombie);
1401	d_unhash(dentry);
1402	if (d_mountpoint(dentry))
1403		error = -EBUSY;
1404	else {
1405		lock_kernel();
1406		error = dir->i_op->rmdir(dir, dentry);
1407		unlock_kernel();
1408		if (!error)
1409			dentry->d_inode->i_flags |= S_DEAD;
1410	}
1411	double_up(&dir->i_zombie, &dentry->d_inode->i_zombie);
1412	if (!error) {
1413		inode_dir_notify(dir, DN_DELETE);
1414		d_delete(dentry);
1415	}
1416	dput(dentry);
1417
1418	return error;
1419}
1420
1421asmlinkage long sys_rmdir(const char * pathname)
1422{
1423	int error = 0;
1424	char * name;
1425	struct dentry *dentry;
1426	struct nameidata nd;
1427
1428	name = getname(pathname);
1429	if(IS_ERR(name))
1430		return PTR_ERR(name);
1431
1432	error = path_lookup(name, LOOKUP_PARENT, &nd);
1433	if (error)
1434		goto exit;
1435
1436	switch(nd.last_type) {
1437		case LAST_DOTDOT:
1438			error = -ENOTEMPTY;
1439			goto exit1;
1440		case LAST_DOT:
1441			error = -EINVAL;
1442			goto exit1;
1443		case LAST_ROOT:
1444			error = -EBUSY;
1445			goto exit1;
1446	}
1447	down(&nd.dentry->d_inode->i_sem);
1448	dentry = lookup_hash(&nd.last, nd.dentry);
1449	error = PTR_ERR(dentry);
1450	if (!IS_ERR(dentry)) {
1451		error = vfs_rmdir(nd.dentry->d_inode, dentry);
1452		dput(dentry);
1453	}
1454	up(&nd.dentry->d_inode->i_sem);
1455exit1:
1456	path_release(&nd);
1457exit:
1458	putname(name);
1459	return error;
1460}
1461
1462int vfs_unlink(struct inode *dir, struct dentry *dentry)
1463{
1464	int error;
1465
1466	down(&dir->i_zombie);
1467	error = may_delete(dir, dentry, 0);
1468	if (!error) {
1469		error = -EPERM;
1470		if (dir->i_op && dir->i_op->unlink) {
1471			DQUOT_INIT(dir);
1472			if (d_mountpoint(dentry))
1473				error = -EBUSY;
1474			else {
1475				lock_kernel();
1476				error = dir->i_op->unlink(dir, dentry);
1477				unlock_kernel();
1478				if (!error)
1479					d_delete(dentry);
1480			}
1481		}
1482	}
1483	up(&dir->i_zombie);
1484	if (!error)
1485		inode_dir_notify(dir, DN_DELETE);
1486	return error;
1487}
1488
1489asmlinkage long sys_unlink(const char * pathname)
1490{
1491	int error = 0;
1492	char * name;
1493	struct dentry *dentry;
1494	struct nameidata nd;
1495
1496	name = getname(pathname);
1497	if(IS_ERR(name))
1498		return PTR_ERR(name);
1499
1500	error = path_lookup(name, LOOKUP_PARENT, &nd);
1501	if (error)
1502		goto exit;
1503	error = -EISDIR;
1504	if (nd.last_type != LAST_NORM)
1505		goto exit1;
1506	down(&nd.dentry->d_inode->i_sem);
1507	dentry = lookup_hash(&nd.last, nd.dentry);
1508	error = PTR_ERR(dentry);
1509	if (!IS_ERR(dentry)) {
1510		/* Why not before? Because we want correct error value */
1511		if (nd.last.name[nd.last.len])
1512			goto slashes;
1513		error = vfs_unlink(nd.dentry->d_inode, dentry);
1514	exit2:
1515		dput(dentry);
1516	}
1517	up(&nd.dentry->d_inode->i_sem);
1518exit1:
1519	path_release(&nd);
1520exit:
1521	putname(name);
1522
1523	return error;
1524
1525slashes:
1526	error = !dentry->d_inode ? -ENOENT :
1527		S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
1528	goto exit2;
1529}
1530
1531int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
1532{
1533	int error;
1534
1535	down(&dir->i_zombie);
1536	error = may_create(dir, dentry);
1537	if (error)
1538		goto exit_lock;
1539
1540	error = -EPERM;
1541	if (!dir->i_op || !dir->i_op->symlink)
1542		goto exit_lock;
1543
1544	DQUOT_INIT(dir);
1545	lock_kernel();
1546	error = dir->i_op->symlink(dir, dentry, oldname);
1547	unlock_kernel();
1548
1549exit_lock:
1550	up(&dir->i_zombie);
1551	if (!error)
1552		inode_dir_notify(dir, DN_CREATE);
1553	return error;
1554}
1555
1556asmlinkage long sys_symlink(const char * oldname, const char * newname)
1557{
1558	int error = 0;
1559	char * from;
1560	char * to;
1561
1562	from = getname(oldname);
1563	if(IS_ERR(from))
1564		return PTR_ERR(from);
1565	to = getname(newname);
1566	error = PTR_ERR(to);
1567	if (!IS_ERR(to)) {
1568		struct dentry *dentry;
1569		struct nameidata nd;
1570
1571		error = path_lookup(to, LOOKUP_PARENT, &nd);
1572		if (error)
1573			goto out;
1574		dentry = lookup_create(&nd, 0);
1575		error = PTR_ERR(dentry);
1576		if (!IS_ERR(dentry)) {
1577			error = vfs_symlink(nd.dentry->d_inode, dentry, from);
1578			dput(dentry);
1579		}
1580		up(&nd.dentry->d_inode->i_sem);
1581		path_release(&nd);
1582out:
1583		putname(to);
1584	}
1585	putname(from);
1586	return error;
1587}
1588
1589int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
1590{
1591	struct inode *inode;
1592	int error;
1593
1594	down(&dir->i_zombie);
1595	error = -ENOENT;
1596	inode = old_dentry->d_inode;
1597	if (!inode)
1598		goto exit_lock;
1599
1600	error = may_create(dir, new_dentry);
1601	if (error)
1602		goto exit_lock;
1603
1604	error = -EXDEV;
1605	if (dir->i_dev != inode->i_dev)
1606		goto exit_lock;
1607
1608	/*
1609	 * A link to an append-only or immutable file cannot be created.
1610	 */
1611	error = -EPERM;
1612	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1613		goto exit_lock;
1614	if (!dir->i_op || !dir->i_op->link)
1615		goto exit_lock;
1616
1617	DQUOT_INIT(dir);
1618	lock_kernel();
1619	error = dir->i_op->link(old_dentry, dir, new_dentry);
1620	unlock_kernel();
1621
1622exit_lock:
1623	up(&dir->i_zombie);
1624	if (!error)
1625		inode_dir_notify(dir, DN_CREATE);
1626	return error;
1627}
1628
1629/*
1630 * Hardlinks are often used in delicate situations.  We avoid
1631 * security-related surprises by not following symlinks on the
1632 * newname.  --KAB
1633 *
1634 * We don't follow them on the oldname either to be compatible
1635 * with linux 2.0, and to avoid hard-linking to directories
1636 * and other special files.  --ADM
1637 */
1638asmlinkage long sys_link(const char * oldname, const char * newname)
1639{
1640	int error;
1641	char * to;
1642
1643	to = getname(newname);
1644	error = PTR_ERR(to);
1645	if (!IS_ERR(to)) {
1646		struct dentry *new_dentry;
1647		struct nameidata nd, old_nd;
1648
1649		error = __user_walk(oldname, LOOKUP_POSITIVE, &old_nd);
1650		if (error)
1651			goto exit;
1652		error = path_lookup(to, LOOKUP_PARENT, &nd);
1653		if (error)
1654			goto out;
1655		error = -EXDEV;
1656		if (old_nd.mnt != nd.mnt)
1657			goto out_release;
1658		new_dentry = lookup_create(&nd, 0);
1659		error = PTR_ERR(new_dentry);
1660		if (!IS_ERR(new_dentry)) {
1661			error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
1662			dput(new_dentry);
1663		}
1664		up(&nd.dentry->d_inode->i_sem);
1665out_release:
1666		path_release(&nd);
1667out:
1668		path_release(&old_nd);
1669exit:
1670		putname(to);
1671	}
1672	return error;
1673}
1674
1675int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
1676	       struct inode *new_dir, struct dentry *new_dentry)
1677{
1678	int error;
1679	struct inode *target;
1680
1681	if (old_dentry->d_inode == new_dentry->d_inode)
1682		return 0;
1683
1684	error = may_delete(old_dir, old_dentry, 1);
1685	if (error)
1686		return error;
1687
1688	if (new_dir->i_dev != old_dir->i_dev)
1689		return -EXDEV;
1690
1691	if (!new_dentry->d_inode)
1692		error = may_create(new_dir, new_dentry);
1693	else
1694		error = may_delete(new_dir, new_dentry, 1);
1695	if (error)
1696		return error;
1697
1698	if (!old_dir->i_op || !old_dir->i_op->rename)
1699		return -EPERM;
1700
1701	/*
1702	 * If we are going to change the parent - check write permissions,
1703	 * we'll need to flip '..'.
1704	 */
1705	if (new_dir != old_dir) {
1706		error = permission(old_dentry->d_inode, MAY_WRITE);
1707	}
1708	if (error)
1709		return error;
1710
1711	DQUOT_INIT(old_dir);
1712	DQUOT_INIT(new_dir);
1713	down(&old_dir->i_sb->s_vfs_rename_sem);
1714	error = -EINVAL;
1715	if (is_subdir(new_dentry, old_dentry))
1716		goto out_unlock;
1717	/* Don't eat your daddy, dear... */
1718	/* This also avoids locking issues */
1719	if (old_dentry->d_parent == new_dentry)
1720		goto out_unlock;
1721	target = new_dentry->d_inode;
1722	if (target) { /* Hastur! Hastur! Hastur! */
1723		triple_down(&old_dir->i_zombie,
1724			    &new_dir->i_zombie,
1725			    &target->i_zombie);
1726		d_unhash(new_dentry);
1727	} else
1728		double_down(&old_dir->i_zombie,
1729			    &new_dir->i_zombie);
1730	if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1731		error = -EBUSY;
1732	else
1733		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1734	if (target) {
1735		if (!error)
1736			target->i_flags |= S_DEAD;
1737		triple_up(&old_dir->i_zombie,
1738			  &new_dir->i_zombie,
1739			  &target->i_zombie);
1740		if (d_unhashed(new_dentry))
1741			d_rehash(new_dentry);
1742		dput(new_dentry);
1743	} else
1744		double_up(&old_dir->i_zombie,
1745			  &new_dir->i_zombie);
1746
1747	if (!error)
1748		d_move(old_dentry,new_dentry);
1749out_unlock:
1750	up(&old_dir->i_sb->s_vfs_rename_sem);
1751	return error;
1752}
1753
1754int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
1755	       struct inode *new_dir, struct dentry *new_dentry)
1756{
1757	int error;
1758
1759	if (old_dentry->d_inode == new_dentry->d_inode)
1760		return 0;
1761
1762	error = may_delete(old_dir, old_dentry, 0);
1763	if (error)
1764		return error;
1765
1766	if (new_dir->i_dev != old_dir->i_dev)
1767		return -EXDEV;
1768
1769	if (!new_dentry->d_inode)
1770		error = may_create(new_dir, new_dentry);
1771	else
1772		error = may_delete(new_dir, new_dentry, 0);
1773	if (error)
1774		return error;
1775
1776	if (!old_dir->i_op || !old_dir->i_op->rename)
1777		return -EPERM;
1778
1779	DQUOT_INIT(old_dir);
1780	DQUOT_INIT(new_dir);
1781	double_down(&old_dir->i_zombie, &new_dir->i_zombie);
1782	if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1783		error = -EBUSY;
1784	else
1785		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1786	double_up(&old_dir->i_zombie, &new_dir->i_zombie);
1787	if (error)
1788		return error;
1789	/* The following d_move() should become unconditional */
1790	if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) {
1791		d_move(old_dentry, new_dentry);
1792	}
1793	return 0;
1794}
1795
1796int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1797	       struct inode *new_dir, struct dentry *new_dentry)
1798{
1799	int error;
1800	if (S_ISDIR(old_dentry->d_inode->i_mode))
1801		error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
1802	else
1803		error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
1804	if (!error) {
1805		if (old_dir == new_dir)
1806			inode_dir_notify(old_dir, DN_RENAME);
1807		else {
1808			inode_dir_notify(old_dir, DN_DELETE);
1809			inode_dir_notify(new_dir, DN_CREATE);
1810		}
1811	}
1812	return error;
1813}
1814
1815static inline int do_rename(const char * oldname, const char * newname)
1816{
1817	int error = 0;
1818	struct dentry * old_dir, * new_dir;
1819	struct dentry * old_dentry, *new_dentry;
1820	struct nameidata oldnd, newnd;
1821
1822	error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
1823	if (error)
1824		goto exit;
1825
1826	error = path_lookup(newname, LOOKUP_PARENT, &newnd);
1827	if (error)
1828		goto exit1;
1829
1830	error = -EXDEV;
1831	if (oldnd.mnt != newnd.mnt)
1832		goto exit2;
1833
1834	old_dir = oldnd.dentry;
1835	error = -EBUSY;
1836	if (oldnd.last_type != LAST_NORM)
1837		goto exit2;
1838
1839	new_dir = newnd.dentry;
1840	if (newnd.last_type != LAST_NORM)
1841		goto exit2;
1842
1843	double_lock(new_dir, old_dir);
1844
1845	old_dentry = lookup_hash(&oldnd.last, old_dir);
1846	error = PTR_ERR(old_dentry);
1847	if (IS_ERR(old_dentry))
1848		goto exit3;
1849	/* source must exist */
1850	error = -ENOENT;
1851	if (!old_dentry->d_inode)
1852		goto exit4;
1853	/* unless the source is a directory trailing slashes give -ENOTDIR */
1854	if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
1855		error = -ENOTDIR;
1856		if (oldnd.last.name[oldnd.last.len])
1857			goto exit4;
1858		if (newnd.last.name[newnd.last.len])
1859			goto exit4;
1860	}
1861	new_dentry = lookup_hash(&newnd.last, new_dir);
1862	error = PTR_ERR(new_dentry);
1863	if (IS_ERR(new_dentry))
1864		goto exit4;
1865
1866	lock_kernel();
1867	error = vfs_rename(old_dir->d_inode, old_dentry,
1868				   new_dir->d_inode, new_dentry);
1869	unlock_kernel();
1870
1871	dput(new_dentry);
1872exit4:
1873	dput(old_dentry);
1874exit3:
1875	double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
1876exit2:
1877	path_release(&newnd);
1878exit1:
1879	path_release(&oldnd);
1880exit:
1881	return error;
1882}
1883
1884asmlinkage long sys_rename(const char * oldname, const char * newname)
1885{
1886	int error;
1887	char * from;
1888	char * to;
1889
1890	from = getname(oldname);
1891	if(IS_ERR(from))
1892		return PTR_ERR(from);
1893	to = getname(newname);
1894	error = PTR_ERR(to);
1895	if (!IS_ERR(to)) {
1896		error = do_rename(from,to);
1897		putname(to);
1898	}
1899	putname(from);
1900	return error;
1901}
1902
1903int vfs_readlink(struct dentry *dentry, char *buffer, int buflen, const char *link)
1904{
1905	int len;
1906
1907	len = PTR_ERR(link);
1908	if (IS_ERR(link))
1909		goto out;
1910
1911	len = strlen(link);
1912	if (len > (unsigned) buflen)
1913		len = buflen;
1914	if (copy_to_user(buffer, link, len))
1915		len = -EFAULT;
1916out:
1917	return len;
1918}
1919
1920static inline int
1921__vfs_follow_link(struct nameidata *nd, const char *link)
1922{
1923	int res = 0;
1924	char *name;
1925	if (IS_ERR(link))
1926		goto fail;
1927
1928	if (*link == '/') {
1929		path_release(nd);
1930		if (!walk_init_root(link, nd))
1931			/* weird __emul_prefix() stuff did it */
1932			goto out;
1933	}
1934	res = link_path_walk(link, nd);
1935out:
1936	if (current->link_count || res || nd->last_type!=LAST_NORM)
1937		return res;
1938	/*
1939	 * If it is an iterative symlinks resolution in open_namei() we
1940	 * have to copy the last component. And all that crap because of
1941	 * bloody create() on broken symlinks. Furrfu...
1942	 */
1943	name = __getname();
1944	if (!name)
1945		return -ENOMEM;
1946	strcpy(name, nd->last.name);
1947	nd->last.name = name;
1948	return 0;
1949fail:
1950	path_release(nd);
1951	return PTR_ERR(link);
1952}
1953
1954int vfs_follow_link(struct nameidata *nd, const char *link)
1955{
1956	return __vfs_follow_link(nd, link);
1957}
1958
1959/* get the link contents into pagecache */
1960static char *page_getlink(struct dentry * dentry, struct page **ppage)
1961{
1962	struct page * page;
1963	struct address_space *mapping = dentry->d_inode->i_mapping;
1964	page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
1965				NULL);
1966	if (IS_ERR(page))
1967		goto sync_fail;
1968	wait_on_page(page);
1969	if (!Page_Uptodate(page))
1970		goto async_fail;
1971	*ppage = page;
1972	return kmap(page);
1973
1974async_fail:
1975	page_cache_release(page);
1976	return ERR_PTR(-EIO);
1977
1978sync_fail:
1979	return (char*)page;
1980}
1981
1982int page_readlink(struct dentry *dentry, char *buffer, int buflen)
1983{
1984	struct page *page = NULL;
1985	char *s = page_getlink(dentry, &page);
1986	int res = vfs_readlink(dentry,buffer,buflen,s);
1987	if (page) {
1988		kunmap(page);
1989		page_cache_release(page);
1990	}
1991	return res;
1992}
1993
1994int page_follow_link(struct dentry *dentry, struct nameidata *nd)
1995{
1996	struct page *page = NULL;
1997	char *s = page_getlink(dentry, &page);
1998	int res = __vfs_follow_link(nd, s);
1999	if (page) {
2000		kunmap(page);
2001		page_cache_release(page);
2002	}
2003	return res;
2004}
2005
2006struct inode_operations page_symlink_inode_operations = {
2007	readlink:	page_readlink,
2008	follow_link:	page_follow_link,
2009};
2010