1/* 2 * linux/fs/namei.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7/* 8 * Some corrections by tytso. 9 */ 10 11/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname 12 * lookup logic. 13 */ 14/* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture. 15 */ 16 17#include <linux/init.h> 18#include <linux/slab.h> 19#include <linux/fs.h> 20#include <linux/quotaops.h> 21#include <linux/pagemap.h> 22#include <linux/dnotify.h> 23#include <linux/smp_lock.h> 24#include <linux/personality.h> 25 26#include <asm/namei.h> 27#include <asm/uaccess.h> 28 29#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) 30 31/* [Feb-1997 T. Schoebel-Theuer] 32 * Fundamental changes in the pathname lookup mechanisms (namei) 33 * were necessary because of omirr. The reason is that omirr needs 34 * to know the _real_ pathname, not the user-supplied one, in case 35 * of symlinks (and also when transname replacements occur). 36 * 37 * The new code replaces the old recursive symlink resolution with 38 * an iterative one (in case of non-nested symlink chains). It does 39 * this with calls to <fs>_follow_link(). 40 * As a side effect, dir_namei(), _namei() and follow_link() are now 41 * replaced with a single function lookup_dentry() that can handle all 42 * the special cases of the former code. 43 * 44 * With the new dcache, the pathname is stored at each inode, at least as 45 * long as the refcount of the inode is positive. As a side effect, the 46 * size of the dcache depends on the inode cache and thus is dynamic. 47 * 48 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink 49 * resolution to correspond with current state of the code. 50 * 51 * Note that the symlink resolution is not *completely* iterative. 52 * There is still a significant amount of tail- and mid- recursion in 53 * the algorithm. Also, note that <fs>_readlink() is not used in 54 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink() 55 * may return different results than <fs>_follow_link(). Many virtual 56 * filesystems (including /proc) exhibit this behavior. 57 */ 58 59/* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: 60 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL 61 * and the name already exists in form of a symlink, try to create the new 62 * name indicated by the symlink. The old code always complained that the 63 * name already exists, due to not following the symlink even if its target 64 * is nonexistent. The new semantics affects also mknod() and link() when 65 * the name is a symlink pointing to a non-existant name. 66 * 67 * I don't know which semantics is the right one, since I have no access 68 * to standards. But I found by trial that HP-UX 9.0 has the full "new" 69 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the 70 * "old" one. Personally, I think the new semantics is much more logical. 71 * Note that "ln old new" where "new" is a symlink pointing to a non-existing 72 * file does succeed in both HP-UX and SunOs, but not in Solaris 73 * and in the old Linux semantics. 74 */ 75 76/* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink 77 * semantics. See the comments in "open_namei" and "do_link" below. 78 * 79 * [10-Sep-98 Alan Modra] Another symlink change. 80 */ 81 82/* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks: 83 * inside the path - always follow. 84 * in the last component in creation/removal/renaming - never follow. 85 * if LOOKUP_FOLLOW passed - follow. 86 * if the pathname has trailing slashes - follow. 87 * otherwise - don't follow. 88 * (applied in that order). 89 * 90 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT 91 * restored for 2.4. This is the last surviving part of old 4.2BSD bug. 92 * During the 2.4 we need to fix the userland stuff depending on it - 93 * hopefully we will be able to get rid of that wart in 2.5. So far only 94 * XEmacs seems to be relying on it... 95 */ 96 97/* In order to reduce some races, while at the same time doing additional 98 * checking and hopefully speeding things up, we copy filenames to the 99 * kernel data space before using them.. 100 * 101 * POSIX.1 2.4: an empty pathname is invalid (ENOENT). 102 * PATH_MAX includes the nul terminator --RR. 103 */ 104static inline int do_getname(const char *filename, char *page) 105{ 106 int retval; 107 unsigned long len = PATH_MAX; 108 109 if ((unsigned long) filename >= TASK_SIZE) { 110 if (!segment_eq(get_fs(), KERNEL_DS)) 111 return -EFAULT; 112 } else if (TASK_SIZE - (unsigned long) filename < PATH_MAX) 113 len = TASK_SIZE - (unsigned long) filename; 114 115 retval = strncpy_from_user((char *)page, filename, len); 116 if (retval > 0) { 117 if (retval < len) 118 return 0; 119 return -ENAMETOOLONG; 120 } else if (!retval) 121 retval = -ENOENT; 122 return retval; 123} 124 125char * getname(const char * filename) 126{ 127 char *tmp, *result; 128 129 result = ERR_PTR(-ENOMEM); 130 tmp = __getname(); 131 if (tmp) { 132 int retval = do_getname(filename, tmp); 133 134 result = tmp; 135 if (retval < 0) { 136 putname(tmp); 137 result = ERR_PTR(retval); 138 } 139 } 140 return result; 141} 142 143/* 144 * vfs_permission() 145 * 146 * is used to check for read/write/execute permissions on a file. 147 * We use "fsuid" for this, letting us set arbitrary permissions 148 * for filesystem access without changing the "normal" uids which 149 * are used for other things.. 150 */ 151int vfs_permission(struct inode * inode, int mask) 152{ 153 umode_t mode = inode->i_mode; 154 155 if (mask & MAY_WRITE) { 156 /* 157 * Nobody gets write access to a read-only fs. 158 */ 159 if (IS_RDONLY(inode) && 160 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) 161 return -EROFS; 162 163 /* 164 * Nobody gets write access to an immutable file. 165 */ 166 if (IS_IMMUTABLE(inode)) 167 return -EACCES; 168 } 169 170 if (current->fsuid == inode->i_uid) 171 mode >>= 6; 172 else if (in_group_p(inode->i_gid)) 173 mode >>= 3; 174 175 /* 176 * If the DACs are ok we don't need any capability check. 177 */ 178 if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) 179 return 0; 180 181 /* 182 * Read/write DACs are always overridable. 183 * Executable DACs are overridable if at least one exec bit is set. 184 */ 185 if ((mask & (MAY_READ|MAY_WRITE)) || (inode->i_mode & S_IXUGO)) 186 if (capable(CAP_DAC_OVERRIDE)) 187 return 0; 188 189 /* 190 * Searching includes executable on directories, else just read. 191 */ 192 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 193 if (capable(CAP_DAC_READ_SEARCH)) 194 return 0; 195 196 return -EACCES; 197} 198 199int permission(struct inode * inode,int mask) 200{ 201 if (inode->i_op && inode->i_op->permission) { 202 int retval; 203 lock_kernel(); 204 retval = inode->i_op->permission(inode, mask); 205 unlock_kernel(); 206 return retval; 207 } 208 return vfs_permission(inode, mask); 209} 210 211/* 212 * get_write_access() gets write permission for a file. 213 * put_write_access() releases this write permission. 214 * This is used for regular files. 215 * We cannot support write (and maybe mmap read-write shared) accesses and 216 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode 217 * can have the following values: 218 * 0: no writers, no VM_DENYWRITE mappings 219 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist 220 * > 0: (i_writecount) users are writing to the file. 221 * 222 * Normally we operate on that counter with atomic_{inc,dec} and it's safe 223 * except for the cases where we don't hold i_writecount yet. Then we need to 224 * use {get,deny}_write_access() - these functions check the sign and refuse 225 * to do the change if sign is wrong. Exclusion between them is provided by 226 * spinlock (arbitration_lock) and I'll rip the second arsehole to the first 227 * who will try to move it in struct inode - just leave it here. 228 */ 229static spinlock_t arbitration_lock = SPIN_LOCK_UNLOCKED; 230int get_write_access(struct inode * inode) 231{ 232 spin_lock(&arbitration_lock); 233 if (atomic_read(&inode->i_writecount) < 0) { 234 spin_unlock(&arbitration_lock); 235 return -ETXTBSY; 236 } 237 atomic_inc(&inode->i_writecount); 238 spin_unlock(&arbitration_lock); 239 return 0; 240} 241int deny_write_access(struct file * file) 242{ 243 spin_lock(&arbitration_lock); 244 if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) { 245 spin_unlock(&arbitration_lock); 246 return -ETXTBSY; 247 } 248 atomic_dec(&file->f_dentry->d_inode->i_writecount); 249 spin_unlock(&arbitration_lock); 250 return 0; 251} 252 253void path_release(struct nameidata *nd) 254{ 255 dput(nd->dentry); 256 mntput(nd->mnt); 257} 258 259/* 260 * Internal lookup() using the new generic dcache. 261 * SMP-safe 262 */ 263static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) 264{ 265 struct dentry * dentry = d_lookup(parent, name); 266 267 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { 268 if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { 269 dput(dentry); 270 dentry = NULL; 271 } 272 } 273 return dentry; 274} 275 276/* 277 * This is called when everything else fails, and we actually have 278 * to go to the low-level filesystem to find out what we should do.. 279 * 280 * We get the directory semaphore, and after getting that we also 281 * make sure that nobody added the entry to the dcache in the meantime.. 282 * SMP-safe 283 */ 284static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) 285{ 286 struct dentry * result; 287 struct inode *dir = parent->d_inode; 288 289 down(&dir->i_sem); 290 result = d_lookup(parent, name); 291 if (!result) { 292 struct dentry * dentry = d_alloc(parent, name); 293 result = ERR_PTR(-ENOMEM); 294 if (dentry) { 295 lock_kernel(); 296 result = dir->i_op->lookup(dir, dentry); 297 unlock_kernel(); 298 if (result) 299 dput(dentry); 300 else 301 result = dentry; 302 } 303 up(&dir->i_sem); 304 return result; 305 } 306 307 /* 308 * Uhhuh! Nasty case: the cache was re-populated while 309 * we waited on the semaphore. Need to revalidate. 310 */ 311 up(&dir->i_sem); 312 if (result->d_op && result->d_op->d_revalidate) { 313 if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) { 314 dput(result); 315 result = ERR_PTR(-ENOENT); 316 } 317 } 318 return result; 319} 320 321/* 322 * This limits recursive symlink follows to 8, while 323 * limiting consecutive symlinks to 40. 324 * 325 * Without that kind of total limit, nasty chains of consecutive 326 * symlinks can cause almost arbitrarily long lookups. 327 */ 328static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) 329{ 330 int err; 331 if (current->link_count >= 5) 332 goto loop; 333 if (current->total_link_count >= 40) 334 goto loop; 335 if (current->need_resched) { 336 current->state = TASK_RUNNING; 337 schedule(); 338 } 339 current->link_count++; 340 current->total_link_count++; 341 UPDATE_ATIME(dentry->d_inode); 342 err = dentry->d_inode->i_op->follow_link(dentry, nd); 343 current->link_count--; 344 return err; 345loop: 346 path_release(nd); 347 return -ELOOP; 348} 349 350static inline int __follow_up(struct vfsmount **mnt, struct dentry **base) 351{ 352 struct vfsmount *parent; 353 struct dentry *dentry; 354 spin_lock(&dcache_lock); 355 parent=(*mnt)->mnt_parent; 356 if (parent == *mnt) { 357 spin_unlock(&dcache_lock); 358 return 0; 359 } 360 mntget(parent); 361 dentry=dget((*mnt)->mnt_mountpoint); 362 spin_unlock(&dcache_lock); 363 dput(*base); 364 *base = dentry; 365 mntput(*mnt); 366 *mnt = parent; 367 return 1; 368} 369 370int follow_up(struct vfsmount **mnt, struct dentry **dentry) 371{ 372 return __follow_up(mnt, dentry); 373} 374 375static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry) 376{ 377 struct vfsmount *mounted; 378 379 spin_lock(&dcache_lock); 380 mounted = lookup_mnt(*mnt, *dentry); 381 if (mounted) { 382 *mnt = mntget(mounted); 383 spin_unlock(&dcache_lock); 384 dput(*dentry); 385 mntput(mounted->mnt_parent); 386 *dentry = dget(mounted->mnt_root); 387 return 1; 388 } 389 spin_unlock(&dcache_lock); 390 return 0; 391} 392 393int follow_down(struct vfsmount **mnt, struct dentry **dentry) 394{ 395 return __follow_down(mnt,dentry); 396} 397 398static inline void follow_dotdot(struct nameidata *nd) 399{ 400 while(1) { 401 struct vfsmount *parent; 402 struct dentry *dentry; 403 read_lock(¤t->fs->lock); 404 if (nd->dentry == current->fs->root && 405 nd->mnt == current->fs->rootmnt) { 406 read_unlock(¤t->fs->lock); 407 break; 408 } 409 read_unlock(¤t->fs->lock); 410 spin_lock(&dcache_lock); 411 if (nd->dentry != nd->mnt->mnt_root) { 412 dentry = dget(nd->dentry->d_parent); 413 spin_unlock(&dcache_lock); 414 dput(nd->dentry); 415 nd->dentry = dentry; 416 break; 417 } 418 parent=nd->mnt->mnt_parent; 419 if (parent == nd->mnt) { 420 spin_unlock(&dcache_lock); 421 break; 422 } 423 mntget(parent); 424 dentry=dget(nd->mnt->mnt_mountpoint); 425 spin_unlock(&dcache_lock); 426 dput(nd->dentry); 427 nd->dentry = dentry; 428 mntput(nd->mnt); 429 nd->mnt = parent; 430 } 431 while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry)) 432 ; 433} 434 435/* 436 * Name resolution. 437 * 438 * This is the basic name resolution function, turning a pathname 439 * into the final dentry. 440 * 441 * We expect 'base' to be positive and a directory. 442 */ 443int link_path_walk(const char * name, struct nameidata *nd) 444{ 445 struct dentry *dentry; 446 struct inode *inode; 447 int err; 448 unsigned int lookup_flags = nd->flags; 449 450 while (*name=='/') 451 name++; 452 if (!*name) 453 goto return_reval; 454 455 inode = nd->dentry->d_inode; 456 if (current->link_count) 457 lookup_flags = LOOKUP_FOLLOW; 458 459 /* At this point we know we have a real path component. */ 460 for(;;) { 461 unsigned long hash; 462 struct qstr this; 463 unsigned int c; 464 465 err = permission(inode, MAY_EXEC); 466 dentry = ERR_PTR(err); 467 if (err) 468 break; 469 470 this.name = name; 471 c = *(const unsigned char *)name; 472 473 hash = init_name_hash(); 474 do { 475 name++; 476 hash = partial_name_hash(c, hash); 477 c = *(const unsigned char *)name; 478 } while (c && (c != '/')); 479 this.len = name - (const char *) this.name; 480 this.hash = end_name_hash(hash); 481 482 /* remove trailing slashes? */ 483 if (!c) 484 goto last_component; 485 while (*++name == '/'); 486 if (!*name) 487 goto last_with_slashes; 488 489 /* 490 * "." and ".." are special - ".." especially so because it has 491 * to be able to know about the current root directory and 492 * parent relationships. 493 */ 494 if (this.name[0] == '.') switch (this.len) { 495 default: 496 break; 497 case 2: 498 if (this.name[1] != '.') 499 break; 500 follow_dotdot(nd); 501 inode = nd->dentry->d_inode; 502 /* fallthrough */ 503 case 1: 504 continue; 505 } 506 /* 507 * See if the low-level filesystem might want 508 * to use its own hash.. 509 */ 510 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { 511 err = nd->dentry->d_op->d_hash(nd->dentry, &this); 512 if (err < 0) 513 break; 514 } 515 /* This does the actual lookups.. */ 516 dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE); 517 if (!dentry) { 518 dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE); 519 err = PTR_ERR(dentry); 520 if (IS_ERR(dentry)) 521 break; 522 } 523 /* Check mountpoints.. */ 524 while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)) 525 ; 526 527 err = -ENOENT; 528 inode = dentry->d_inode; 529 if (!inode) 530 goto out_dput; 531 err = -ENOTDIR; 532 if (!inode->i_op) 533 goto out_dput; 534 535 if (inode->i_op->follow_link) { 536 err = do_follow_link(dentry, nd); 537 dput(dentry); 538 if (err) 539 goto return_err; 540 err = -ENOENT; 541 inode = nd->dentry->d_inode; 542 if (!inode) 543 break; 544 err = -ENOTDIR; 545 if (!inode->i_op) 546 break; 547 } else { 548 dput(nd->dentry); 549 nd->dentry = dentry; 550 } 551 err = -ENOTDIR; 552 if (!inode->i_op->lookup) 553 break; 554 continue; 555 /* here ends the main loop */ 556 557last_with_slashes: 558 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 559last_component: 560 if (lookup_flags & LOOKUP_PARENT) 561 goto lookup_parent; 562 if (this.name[0] == '.') switch (this.len) { 563 default: 564 break; 565 case 2: 566 if (this.name[1] != '.') 567 break; 568 follow_dotdot(nd); 569 inode = nd->dentry->d_inode; 570 /* fallthrough */ 571 case 1: 572 goto return_reval; 573 } 574 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { 575 err = nd->dentry->d_op->d_hash(nd->dentry, &this); 576 if (err < 0) 577 break; 578 } 579 dentry = cached_lookup(nd->dentry, &this, 0); 580 if (!dentry) { 581 dentry = real_lookup(nd->dentry, &this, 0); 582 err = PTR_ERR(dentry); 583 if (IS_ERR(dentry)) 584 break; 585 } 586 while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)) 587 ; 588 inode = dentry->d_inode; 589 if ((lookup_flags & LOOKUP_FOLLOW) 590 && inode && inode->i_op && inode->i_op->follow_link) { 591 err = do_follow_link(dentry, nd); 592 dput(dentry); 593 if (err) 594 goto return_err; 595 inode = nd->dentry->d_inode; 596 } else { 597 dput(nd->dentry); 598 nd->dentry = dentry; 599 } 600 err = -ENOENT; 601 if (!inode) 602 goto no_inode; 603 if (lookup_flags & LOOKUP_DIRECTORY) { 604 err = -ENOTDIR; 605 if (!inode->i_op || !inode->i_op->lookup) 606 break; 607 } 608 goto return_base; 609no_inode: 610 err = -ENOENT; 611 if (lookup_flags & (LOOKUP_POSITIVE|LOOKUP_DIRECTORY)) 612 break; 613 goto return_base; 614lookup_parent: 615 nd->last = this; 616 nd->last_type = LAST_NORM; 617 if (this.name[0] != '.') 618 goto return_base; 619 if (this.len == 1) 620 nd->last_type = LAST_DOT; 621 else if (this.len == 2 && this.name[1] == '.') 622 nd->last_type = LAST_DOTDOT; 623return_reval: 624 /* 625 * We bypassed the ordinary revalidation routines. 626 * Check the cached dentry for staleness. 627 */ 628 dentry = nd->dentry; 629 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { 630 err = -ESTALE; 631 if (!dentry->d_op->d_revalidate(dentry, 0)) { 632 d_invalidate(dentry); 633 break; 634 } 635 } 636return_base: 637 return 0; 638out_dput: 639 dput(dentry); 640 break; 641 } 642 path_release(nd); 643return_err: 644 return err; 645} 646 647int path_walk(const char * name, struct nameidata *nd) 648{ 649 current->total_link_count = 0; 650 return link_path_walk(name, nd); 651} 652 653/* SMP-safe */ 654/* returns 1 if everything is done */ 655static int __emul_lookup_dentry(const char *name, struct nameidata *nd) 656{ 657 if (path_walk(name, nd)) 658 return 0; /* something went wrong... */ 659 660 if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) { 661 struct nameidata nd_root; 662 /* 663 * NAME was not found in alternate root or it's a directory. Try to find 664 * it in the normal root: 665 */ 666 nd_root.last_type = LAST_ROOT; 667 nd_root.flags = nd->flags; 668 read_lock(¤t->fs->lock); 669 nd_root.mnt = mntget(current->fs->rootmnt); 670 nd_root.dentry = dget(current->fs->root); 671 read_unlock(¤t->fs->lock); 672 if (path_walk(name, &nd_root)) 673 return 1; 674 if (nd_root.dentry->d_inode) { 675 path_release(nd); 676 nd->dentry = nd_root.dentry; 677 nd->mnt = nd_root.mnt; 678 nd->last = nd_root.last; 679 return 1; 680 } 681 path_release(&nd_root); 682 } 683 return 1; 684} 685 686void set_fs_altroot(void) 687{ 688 char *emul = __emul_prefix(); 689 struct nameidata nd; 690 struct vfsmount *mnt = NULL, *oldmnt; 691 struct dentry *dentry = NULL, *olddentry; 692 if (emul) { 693 read_lock(¤t->fs->lock); 694 nd.mnt = mntget(current->fs->rootmnt); 695 nd.dentry = dget(current->fs->root); 696 read_unlock(¤t->fs->lock); 697 nd.flags = LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_POSITIVE; 698 if (path_walk(emul,&nd) == 0) { 699 mnt = nd.mnt; 700 dentry = nd.dentry; 701 } 702 } 703 write_lock(¤t->fs->lock); 704 oldmnt = current->fs->altrootmnt; 705 olddentry = current->fs->altroot; 706 current->fs->altrootmnt = mnt; 707 current->fs->altroot = dentry; 708 write_unlock(¤t->fs->lock); 709 if (olddentry) { 710 dput(olddentry); 711 mntput(oldmnt); 712 } 713} 714 715/* SMP-safe */ 716static inline int 717walk_init_root(const char *name, struct nameidata *nd) 718{ 719 read_lock(¤t->fs->lock); 720 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { 721 nd->mnt = mntget(current->fs->altrootmnt); 722 nd->dentry = dget(current->fs->altroot); 723 read_unlock(¤t->fs->lock); 724 if (__emul_lookup_dentry(name,nd)) 725 return 0; 726 read_lock(¤t->fs->lock); 727 } 728 nd->mnt = mntget(current->fs->rootmnt); 729 nd->dentry = dget(current->fs->root); 730 read_unlock(¤t->fs->lock); 731 return 1; 732} 733 734/* SMP-safe */ 735int path_lookup(const char *path, unsigned flags, struct nameidata *nd) 736{ 737 int error = 0; 738 if (path_init(path, flags, nd)) 739 error = path_walk(path, nd); 740 return error; 741} 742 743 744/* SMP-safe */ 745int path_init(const char *name, unsigned int flags, struct nameidata *nd) 746{ 747 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 748 nd->flags = flags; 749 if (*name=='/') 750 return walk_init_root(name,nd); 751 read_lock(¤t->fs->lock); 752 nd->mnt = mntget(current->fs->pwdmnt); 753 nd->dentry = dget(current->fs->pwd); 754 read_unlock(¤t->fs->lock); 755 return 1; 756} 757 758/* 759 * Restricted form of lookup. Doesn't follow links, single-component only, 760 * needs parent already locked. Doesn't follow mounts. 761 * SMP-safe. 762 */ 763struct dentry * lookup_hash(struct qstr *name, struct dentry * base) 764{ 765 struct dentry * dentry; 766 struct inode *inode; 767 int err; 768 769 inode = base->d_inode; 770 err = permission(inode, MAY_EXEC); 771 dentry = ERR_PTR(err); 772 if (err) 773 goto out; 774 775 /* 776 * See if the low-level filesystem might want 777 * to use its own hash.. 778 */ 779 if (base->d_op && base->d_op->d_hash) { 780 err = base->d_op->d_hash(base, name); 781 dentry = ERR_PTR(err); 782 if (err < 0) 783 goto out; 784 } 785 786 dentry = cached_lookup(base, name, 0); 787 if (!dentry) { 788 struct dentry *new = d_alloc(base, name); 789 dentry = ERR_PTR(-ENOMEM); 790 if (!new) 791 goto out; 792 lock_kernel(); 793 dentry = inode->i_op->lookup(inode, new); 794 unlock_kernel(); 795 if (!dentry) 796 dentry = new; 797 else 798 dput(new); 799 } 800out: 801 return dentry; 802} 803 804/* SMP-safe */ 805struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) 806{ 807 unsigned long hash; 808 struct qstr this; 809 unsigned int c; 810 811 this.name = name; 812 this.len = len; 813 if (!len) 814 goto access; 815 816 hash = init_name_hash(); 817 while (len--) { 818 c = *(const unsigned char *)name++; 819 if (c == '/' || c == '\0') 820 goto access; 821 hash = partial_name_hash(c, hash); 822 } 823 this.hash = end_name_hash(hash); 824 825 return lookup_hash(&this, base); 826access: 827 return ERR_PTR(-EACCES); 828} 829 830/* 831 * namei() 832 * 833 * is used by most simple commands to get the inode of a specified name. 834 * Open, link etc use their own routines, but this is enough for things 835 * like 'chmod' etc. 836 * 837 * namei exists in two versions: namei/lnamei. The only difference is 838 * that namei follows links, while lnamei does not. 839 * SMP-safe 840 */ 841int __user_walk(const char *name, unsigned flags, struct nameidata *nd) 842{ 843 char *tmp; 844 int err; 845 846 tmp = getname(name); 847 err = PTR_ERR(tmp); 848 if (!IS_ERR(tmp)) { 849 err = 0; 850 err = path_lookup(tmp, flags, nd); 851 putname(tmp); 852 } 853 return err; 854} 855 856/* 857 * It's inline, so penalty for filesystems that don't use sticky bit is 858 * minimal. 859 */ 860static inline int check_sticky(struct inode *dir, struct inode *inode) 861{ 862 if (!(dir->i_mode & S_ISVTX)) 863 return 0; 864 if (inode->i_uid == current->fsuid) 865 return 0; 866 if (dir->i_uid == current->fsuid) 867 return 0; 868 return !capable(CAP_FOWNER); 869} 870 871/* 872 * Check whether we can remove a link victim from directory dir, check 873 * whether the type of victim is right. 874 * 1. We can't do it if dir is read-only (done in permission()) 875 * 2. We should have write and exec permissions on dir 876 * 3. We can't remove anything from append-only dir 877 * 4. We can't do anything with immutable dir (done in permission()) 878 * 5. If the sticky bit on dir is set we should either 879 * a. be owner of dir, or 880 * b. be owner of victim, or 881 * c. have CAP_FOWNER capability 882 * 6. If the victim is append-only or immutable we can't do antyhing with 883 * links pointing to it. 884 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 885 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 886 * 9. We can't remove a root or mountpoint. 887 */ 888static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir) 889{ 890 int error; 891 if (!victim->d_inode || victim->d_parent->d_inode != dir) 892 return -ENOENT; 893 error = permission(dir,MAY_WRITE | MAY_EXEC); 894 if (error) 895 return error; 896 if (IS_APPEND(dir)) 897 return -EPERM; 898 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| 899 IS_IMMUTABLE(victim->d_inode)) 900 return -EPERM; 901 if (isdir) { 902 if (!S_ISDIR(victim->d_inode->i_mode)) 903 return -ENOTDIR; 904 if (IS_ROOT(victim)) 905 return -EBUSY; 906 } else if (S_ISDIR(victim->d_inode->i_mode)) 907 return -EISDIR; 908 if (IS_DEADDIR(dir)) 909 return -ENOENT; 910 return 0; 911} 912 913/* Check whether we can create an object with dentry child in directory 914 * dir. 915 * 1. We can't do it if child already exists (open has special treatment for 916 * this case, but since we are inlined it's OK) 917 * 2. We can't do it if dir is read-only (done in permission()) 918 * 3. We should have write and exec permissions on dir 919 * 4. We can't do it if dir is immutable (done in permission()) 920 */ 921static inline int may_create(struct inode *dir, struct dentry *child) { 922 if (child->d_inode) 923 return -EEXIST; 924 if (IS_DEADDIR(dir)) 925 return -ENOENT; 926 return permission(dir,MAY_WRITE | MAY_EXEC); 927} 928 929/* 930 * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security 931 * reasons. 932 * 933 * O_DIRECTORY translates into forcing a directory lookup. 934 */ 935static inline int lookup_flags(unsigned int f) 936{ 937 unsigned long retval = LOOKUP_FOLLOW; 938 939 if (f & O_NOFOLLOW) 940 retval &= ~LOOKUP_FOLLOW; 941 942 if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) 943 retval &= ~LOOKUP_FOLLOW; 944 945 if (f & O_DIRECTORY) 946 retval |= LOOKUP_DIRECTORY; 947 948 return retval; 949} 950 951int vfs_create(struct inode *dir, struct dentry *dentry, int mode) 952{ 953 int error; 954 955 mode &= S_IALLUGO; 956 mode |= S_IFREG; 957 958 down(&dir->i_zombie); 959 error = may_create(dir, dentry); 960 if (error) 961 goto exit_lock; 962 963 error = -EACCES; /* shouldn't it be ENOSYS? */ 964 if (!dir->i_op || !dir->i_op->create) 965 goto exit_lock; 966 967 DQUOT_INIT(dir); 968 lock_kernel(); 969 error = dir->i_op->create(dir, dentry, mode); 970 unlock_kernel(); 971exit_lock: 972 up(&dir->i_zombie); 973 if (!error) 974 inode_dir_notify(dir, DN_CREATE); 975 return error; 976} 977 978/* 979 * open_namei() 980 * 981 * namei for open - this is in fact almost the whole open-routine. 982 * 983 * Note that the low bits of "flag" aren't the same as in the open 984 * system call - they are 00 - no permissions needed 985 * 01 - read permission needed 986 * 10 - write permission needed 987 * 11 - read/write permissions needed 988 * which is a lot more logical, and also allows the "no perm" needed 989 * for symlinks (where the permissions are checked later). 990 * SMP-safe 991 */ 992int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) 993{ 994 int acc_mode, error = 0; 995 struct inode *inode; 996 struct dentry *dentry; 997 struct dentry *dir; 998 int count = 0; 999 1000 acc_mode = ACC_MODE(flag); 1001 1002 /* 1003 * The simplest case - just a plain lookup. 1004 */ 1005 if (!(flag & O_CREAT)) { 1006 error = path_lookup(pathname, lookup_flags(flag), nd); 1007 if (error) 1008 return error; 1009 dentry = nd->dentry; 1010 goto ok; 1011 } 1012 1013 /* 1014 * Create - we need to know the parent. 1015 */ 1016 error = path_lookup(pathname, LOOKUP_PARENT, nd); 1017 if (error) 1018 return error; 1019 1020 /* 1021 * We have the parent and last component. First of all, check 1022 * that we are not asked to creat(2) an obvious directory - that 1023 * will not do. 1024 */ 1025 error = -EISDIR; 1026 if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len]) 1027 goto exit; 1028 1029 dir = nd->dentry; 1030 down(&dir->d_inode->i_sem); 1031 dentry = lookup_hash(&nd->last, nd->dentry); 1032 1033do_last: 1034 error = PTR_ERR(dentry); 1035 if (IS_ERR(dentry)) { 1036 up(&dir->d_inode->i_sem); 1037 goto exit; 1038 } 1039 1040 /* Negative dentry, just create the file */ 1041 if (!dentry->d_inode) { 1042 error = vfs_create(dir->d_inode, dentry, 1043 mode & ~current->fs->umask); 1044 up(&dir->d_inode->i_sem); 1045 dput(nd->dentry); 1046 nd->dentry = dentry; 1047 if (error) 1048 goto exit; 1049 /* Don't check for write permission, don't truncate */ 1050 acc_mode = 0; 1051 flag &= ~O_TRUNC; 1052 goto ok; 1053 } 1054 1055 /* 1056 * It already exists. 1057 */ 1058 up(&dir->d_inode->i_sem); 1059 1060 error = -EEXIST; 1061 if (flag & O_EXCL) 1062 goto exit_dput; 1063 1064 if (d_mountpoint(dentry)) { 1065 error = -ELOOP; 1066 if (flag & O_NOFOLLOW) 1067 goto exit_dput; 1068 while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry)); 1069 } 1070 error = -ENOENT; 1071 if (!dentry->d_inode) 1072 goto exit_dput; 1073 if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link) 1074 goto do_link; 1075 1076 dput(nd->dentry); 1077 nd->dentry = dentry; 1078 error = -EISDIR; 1079 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) 1080 goto exit; 1081ok: 1082 error = -ENOENT; 1083 inode = dentry->d_inode; 1084 if (!inode) 1085 goto exit; 1086 1087 error = -ELOOP; 1088 if (S_ISLNK(inode->i_mode)) 1089 goto exit; 1090 1091 error = -EISDIR; 1092 if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) 1093 goto exit; 1094 1095 error = permission(inode,acc_mode); 1096 if (error) 1097 goto exit; 1098 1099 /* 1100 * FIFO's, sockets and device files are special: they don't 1101 * actually live on the filesystem itself, and as such you 1102 * can write to them even if the filesystem is read-only. 1103 */ 1104 if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 1105 flag &= ~O_TRUNC; 1106 } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { 1107 error = -EACCES; 1108 if (nd->mnt->mnt_flags & MNT_NODEV) 1109 goto exit; 1110 1111 flag &= ~O_TRUNC; 1112 } else { 1113 error = -EROFS; 1114 if (IS_RDONLY(inode) && (flag & 2)) 1115 goto exit; 1116 } 1117 /* 1118 * An append-only file must be opened in append mode for writing. 1119 */ 1120 error = -EPERM; 1121 if (IS_APPEND(inode)) { 1122 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 1123 goto exit; 1124 if (flag & O_TRUNC) 1125 goto exit; 1126 } 1127 1128 /* 1129 * Ensure there are no outstanding leases on the file. 1130 */ 1131 error = get_lease(inode, flag); 1132 if (error) 1133 goto exit; 1134 1135 if (flag & O_TRUNC) { 1136 error = get_write_access(inode); 1137 if (error) 1138 goto exit; 1139 1140 /* 1141 * Refuse to truncate files with mandatory locks held on them. 1142 */ 1143 error = locks_verify_locked(inode); 1144 if (!error) { 1145 DQUOT_INIT(inode); 1146 1147 error = do_truncate(dentry, 0); 1148 } 1149 put_write_access(inode); 1150 if (error) 1151 goto exit; 1152 } else 1153 if (flag & FMODE_WRITE) 1154 DQUOT_INIT(inode); 1155 1156 return 0; 1157 1158exit_dput: 1159 dput(dentry); 1160exit: 1161 path_release(nd); 1162 return error; 1163 1164do_link: 1165 error = -ELOOP; 1166 if (flag & O_NOFOLLOW) 1167 goto exit_dput; 1168 /* 1169 * This is subtle. Instead of calling do_follow_link() we do the 1170 * thing by hands. The reason is that this way we have zero link_count 1171 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT. 1172 * After that we have the parent and last component, i.e. 1173 * we are in the same situation as after the first path_walk(). 1174 * Well, almost - if the last component is normal we get its copy 1175 * stored in nd->last.name and we will have to putname() it when we 1176 * are done. Procfs-like symlinks just set LAST_BIND. 1177 */ 1178 UPDATE_ATIME(dentry->d_inode); 1179 error = dentry->d_inode->i_op->follow_link(dentry, nd); 1180 dput(dentry); 1181 if (error) 1182 return error; 1183 if (nd->last_type == LAST_BIND) { 1184 dentry = nd->dentry; 1185 goto ok; 1186 } 1187 error = -EISDIR; 1188 if (nd->last_type != LAST_NORM) 1189 goto exit; 1190 if (nd->last.name[nd->last.len]) { 1191 putname(nd->last.name); 1192 goto exit; 1193 } 1194 error = -ELOOP; 1195 if (count++==32) { 1196 putname(nd->last.name); 1197 goto exit; 1198 } 1199 dir = nd->dentry; 1200 down(&dir->d_inode->i_sem); 1201 dentry = lookup_hash(&nd->last, nd->dentry); 1202 putname(nd->last.name); 1203 goto do_last; 1204} 1205 1206/* SMP-safe */ 1207static struct dentry *lookup_create(struct nameidata *nd, int is_dir) 1208{ 1209 struct dentry *dentry; 1210 1211 down(&nd->dentry->d_inode->i_sem); 1212 dentry = ERR_PTR(-EEXIST); 1213 if (nd->last_type != LAST_NORM) 1214 goto fail; 1215 dentry = lookup_hash(&nd->last, nd->dentry); 1216 if (IS_ERR(dentry)) 1217 goto fail; 1218 if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) 1219 goto enoent; 1220 return dentry; 1221enoent: 1222 dput(dentry); 1223 dentry = ERR_PTR(-ENOENT); 1224fail: 1225 return dentry; 1226} 1227 1228int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 1229{ 1230 int error = -EPERM; 1231 1232 down(&dir->i_zombie); 1233 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) 1234 goto exit_lock; 1235 1236 error = may_create(dir, dentry); 1237 if (error) 1238 goto exit_lock; 1239 1240 error = -EPERM; 1241 if (!dir->i_op || !dir->i_op->mknod) 1242 goto exit_lock; 1243 1244 DQUOT_INIT(dir); 1245 lock_kernel(); 1246 error = dir->i_op->mknod(dir, dentry, mode, dev); 1247 unlock_kernel(); 1248exit_lock: 1249 up(&dir->i_zombie); 1250 if (!error) 1251 inode_dir_notify(dir, DN_CREATE); 1252 return error; 1253} 1254 1255asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev) 1256{ 1257 int error = 0; 1258 char * tmp; 1259 struct dentry * dentry; 1260 struct nameidata nd; 1261 1262 if (S_ISDIR(mode)) 1263 return -EPERM; 1264 tmp = getname(filename); 1265 if (IS_ERR(tmp)) 1266 return PTR_ERR(tmp); 1267 1268 error = path_lookup(tmp, LOOKUP_PARENT, &nd); 1269 if (error) 1270 goto out; 1271 dentry = lookup_create(&nd, 0); 1272 error = PTR_ERR(dentry); 1273 1274 mode &= ~current->fs->umask; 1275 if (!IS_ERR(dentry)) { 1276 switch (mode & S_IFMT) { 1277 case 0: case S_IFREG: 1278 error = vfs_create(nd.dentry->d_inode,dentry,mode); 1279 break; 1280 case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: 1281 error = vfs_mknod(nd.dentry->d_inode,dentry,mode,dev); 1282 break; 1283 case S_IFDIR: 1284 error = -EPERM; 1285 break; 1286 default: 1287 error = -EINVAL; 1288 } 1289 dput(dentry); 1290 } 1291 up(&nd.dentry->d_inode->i_sem); 1292 path_release(&nd); 1293out: 1294 putname(tmp); 1295 1296 return error; 1297} 1298 1299int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1300{ 1301 int error; 1302 1303 down(&dir->i_zombie); 1304 error = may_create(dir, dentry); 1305 if (error) 1306 goto exit_lock; 1307 1308 error = -EPERM; 1309 if (!dir->i_op || !dir->i_op->mkdir) 1310 goto exit_lock; 1311 1312 DQUOT_INIT(dir); 1313 mode &= (S_IRWXUGO|S_ISVTX); 1314 lock_kernel(); 1315 error = dir->i_op->mkdir(dir, dentry, mode); 1316 unlock_kernel(); 1317 1318exit_lock: 1319 up(&dir->i_zombie); 1320 if (!error) 1321 inode_dir_notify(dir, DN_CREATE); 1322 return error; 1323} 1324 1325asmlinkage long sys_mkdir(const char * pathname, int mode) 1326{ 1327 int error = 0; 1328 char * tmp; 1329 1330 tmp = getname(pathname); 1331 error = PTR_ERR(tmp); 1332 if (!IS_ERR(tmp)) { 1333 struct dentry *dentry; 1334 struct nameidata nd; 1335 1336 error = path_lookup(tmp, LOOKUP_PARENT, &nd); 1337 if (error) 1338 goto out; 1339 dentry = lookup_create(&nd, 1); 1340 error = PTR_ERR(dentry); 1341 if (!IS_ERR(dentry)) { 1342 error = vfs_mkdir(nd.dentry->d_inode, dentry, 1343 mode & ~current->fs->umask); 1344 dput(dentry); 1345 } 1346 up(&nd.dentry->d_inode->i_sem); 1347 path_release(&nd); 1348out: 1349 putname(tmp); 1350 } 1351 1352 return error; 1353} 1354 1355/* 1356 * We try to drop the dentry early: we should have 1357 * a usage count of 2 if we're the only user of this 1358 * dentry, and if that is true (possibly after pruning 1359 * the dcache), then we drop the dentry now. 1360 * 1361 * A low-level filesystem can, if it choses, legally 1362 * do a 1363 * 1364 * if (!d_unhashed(dentry)) 1365 * return -EBUSY; 1366 * 1367 * if it cannot handle the case of removing a directory 1368 * that is still in use by something else.. 1369 */ 1370static void d_unhash(struct dentry *dentry) 1371{ 1372 dget(dentry); 1373 spin_lock(&dcache_lock); 1374 switch (atomic_read(&dentry->d_count)) { 1375 default: 1376 spin_unlock(&dcache_lock); 1377 shrink_dcache_parent(dentry); 1378 spin_lock(&dcache_lock); 1379 if (atomic_read(&dentry->d_count) != 2) 1380 break; 1381 case 2: 1382 list_del_init(&dentry->d_hash); 1383 } 1384 spin_unlock(&dcache_lock); 1385} 1386 1387int vfs_rmdir(struct inode *dir, struct dentry *dentry) 1388{ 1389 int error; 1390 1391 error = may_delete(dir, dentry, 1); 1392 if (error) 1393 return error; 1394 1395 if (!dir->i_op || !dir->i_op->rmdir) 1396 return -EPERM; 1397 1398 DQUOT_INIT(dir); 1399 1400 double_down(&dir->i_zombie, &dentry->d_inode->i_zombie); 1401 d_unhash(dentry); 1402 if (d_mountpoint(dentry)) 1403 error = -EBUSY; 1404 else { 1405 lock_kernel(); 1406 error = dir->i_op->rmdir(dir, dentry); 1407 unlock_kernel(); 1408 if (!error) 1409 dentry->d_inode->i_flags |= S_DEAD; 1410 } 1411 double_up(&dir->i_zombie, &dentry->d_inode->i_zombie); 1412 if (!error) { 1413 inode_dir_notify(dir, DN_DELETE); 1414 d_delete(dentry); 1415 } 1416 dput(dentry); 1417 1418 return error; 1419} 1420 1421asmlinkage long sys_rmdir(const char * pathname) 1422{ 1423 int error = 0; 1424 char * name; 1425 struct dentry *dentry; 1426 struct nameidata nd; 1427 1428 name = getname(pathname); 1429 if(IS_ERR(name)) 1430 return PTR_ERR(name); 1431 1432 error = path_lookup(name, LOOKUP_PARENT, &nd); 1433 if (error) 1434 goto exit; 1435 1436 switch(nd.last_type) { 1437 case LAST_DOTDOT: 1438 error = -ENOTEMPTY; 1439 goto exit1; 1440 case LAST_DOT: 1441 error = -EINVAL; 1442 goto exit1; 1443 case LAST_ROOT: 1444 error = -EBUSY; 1445 goto exit1; 1446 } 1447 down(&nd.dentry->d_inode->i_sem); 1448 dentry = lookup_hash(&nd.last, nd.dentry); 1449 error = PTR_ERR(dentry); 1450 if (!IS_ERR(dentry)) { 1451 error = vfs_rmdir(nd.dentry->d_inode, dentry); 1452 dput(dentry); 1453 } 1454 up(&nd.dentry->d_inode->i_sem); 1455exit1: 1456 path_release(&nd); 1457exit: 1458 putname(name); 1459 return error; 1460} 1461 1462int vfs_unlink(struct inode *dir, struct dentry *dentry) 1463{ 1464 int error; 1465 1466 down(&dir->i_zombie); 1467 error = may_delete(dir, dentry, 0); 1468 if (!error) { 1469 error = -EPERM; 1470 if (dir->i_op && dir->i_op->unlink) { 1471 DQUOT_INIT(dir); 1472 if (d_mountpoint(dentry)) 1473 error = -EBUSY; 1474 else { 1475 lock_kernel(); 1476 error = dir->i_op->unlink(dir, dentry); 1477 unlock_kernel(); 1478 if (!error) 1479 d_delete(dentry); 1480 } 1481 } 1482 } 1483 up(&dir->i_zombie); 1484 if (!error) 1485 inode_dir_notify(dir, DN_DELETE); 1486 return error; 1487} 1488 1489asmlinkage long sys_unlink(const char * pathname) 1490{ 1491 int error = 0; 1492 char * name; 1493 struct dentry *dentry; 1494 struct nameidata nd; 1495 1496 name = getname(pathname); 1497 if(IS_ERR(name)) 1498 return PTR_ERR(name); 1499 1500 error = path_lookup(name, LOOKUP_PARENT, &nd); 1501 if (error) 1502 goto exit; 1503 error = -EISDIR; 1504 if (nd.last_type != LAST_NORM) 1505 goto exit1; 1506 down(&nd.dentry->d_inode->i_sem); 1507 dentry = lookup_hash(&nd.last, nd.dentry); 1508 error = PTR_ERR(dentry); 1509 if (!IS_ERR(dentry)) { 1510 /* Why not before? Because we want correct error value */ 1511 if (nd.last.name[nd.last.len]) 1512 goto slashes; 1513 error = vfs_unlink(nd.dentry->d_inode, dentry); 1514 exit2: 1515 dput(dentry); 1516 } 1517 up(&nd.dentry->d_inode->i_sem); 1518exit1: 1519 path_release(&nd); 1520exit: 1521 putname(name); 1522 1523 return error; 1524 1525slashes: 1526 error = !dentry->d_inode ? -ENOENT : 1527 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; 1528 goto exit2; 1529} 1530 1531int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) 1532{ 1533 int error; 1534 1535 down(&dir->i_zombie); 1536 error = may_create(dir, dentry); 1537 if (error) 1538 goto exit_lock; 1539 1540 error = -EPERM; 1541 if (!dir->i_op || !dir->i_op->symlink) 1542 goto exit_lock; 1543 1544 DQUOT_INIT(dir); 1545 lock_kernel(); 1546 error = dir->i_op->symlink(dir, dentry, oldname); 1547 unlock_kernel(); 1548 1549exit_lock: 1550 up(&dir->i_zombie); 1551 if (!error) 1552 inode_dir_notify(dir, DN_CREATE); 1553 return error; 1554} 1555 1556asmlinkage long sys_symlink(const char * oldname, const char * newname) 1557{ 1558 int error = 0; 1559 char * from; 1560 char * to; 1561 1562 from = getname(oldname); 1563 if(IS_ERR(from)) 1564 return PTR_ERR(from); 1565 to = getname(newname); 1566 error = PTR_ERR(to); 1567 if (!IS_ERR(to)) { 1568 struct dentry *dentry; 1569 struct nameidata nd; 1570 1571 error = path_lookup(to, LOOKUP_PARENT, &nd); 1572 if (error) 1573 goto out; 1574 dentry = lookup_create(&nd, 0); 1575 error = PTR_ERR(dentry); 1576 if (!IS_ERR(dentry)) { 1577 error = vfs_symlink(nd.dentry->d_inode, dentry, from); 1578 dput(dentry); 1579 } 1580 up(&nd.dentry->d_inode->i_sem); 1581 path_release(&nd); 1582out: 1583 putname(to); 1584 } 1585 putname(from); 1586 return error; 1587} 1588 1589int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) 1590{ 1591 struct inode *inode; 1592 int error; 1593 1594 down(&dir->i_zombie); 1595 error = -ENOENT; 1596 inode = old_dentry->d_inode; 1597 if (!inode) 1598 goto exit_lock; 1599 1600 error = may_create(dir, new_dentry); 1601 if (error) 1602 goto exit_lock; 1603 1604 error = -EXDEV; 1605 if (dir->i_dev != inode->i_dev) 1606 goto exit_lock; 1607 1608 /* 1609 * A link to an append-only or immutable file cannot be created. 1610 */ 1611 error = -EPERM; 1612 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 1613 goto exit_lock; 1614 if (!dir->i_op || !dir->i_op->link) 1615 goto exit_lock; 1616 1617 DQUOT_INIT(dir); 1618 lock_kernel(); 1619 error = dir->i_op->link(old_dentry, dir, new_dentry); 1620 unlock_kernel(); 1621 1622exit_lock: 1623 up(&dir->i_zombie); 1624 if (!error) 1625 inode_dir_notify(dir, DN_CREATE); 1626 return error; 1627} 1628 1629/* 1630 * Hardlinks are often used in delicate situations. We avoid 1631 * security-related surprises by not following symlinks on the 1632 * newname. --KAB 1633 * 1634 * We don't follow them on the oldname either to be compatible 1635 * with linux 2.0, and to avoid hard-linking to directories 1636 * and other special files. --ADM 1637 */ 1638asmlinkage long sys_link(const char * oldname, const char * newname) 1639{ 1640 int error; 1641 char * to; 1642 1643 to = getname(newname); 1644 error = PTR_ERR(to); 1645 if (!IS_ERR(to)) { 1646 struct dentry *new_dentry; 1647 struct nameidata nd, old_nd; 1648 1649 error = __user_walk(oldname, LOOKUP_POSITIVE, &old_nd); 1650 if (error) 1651 goto exit; 1652 error = path_lookup(to, LOOKUP_PARENT, &nd); 1653 if (error) 1654 goto out; 1655 error = -EXDEV; 1656 if (old_nd.mnt != nd.mnt) 1657 goto out_release; 1658 new_dentry = lookup_create(&nd, 0); 1659 error = PTR_ERR(new_dentry); 1660 if (!IS_ERR(new_dentry)) { 1661 error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); 1662 dput(new_dentry); 1663 } 1664 up(&nd.dentry->d_inode->i_sem); 1665out_release: 1666 path_release(&nd); 1667out: 1668 path_release(&old_nd); 1669exit: 1670 putname(to); 1671 } 1672 return error; 1673} 1674 1675int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, 1676 struct inode *new_dir, struct dentry *new_dentry) 1677{ 1678 int error; 1679 struct inode *target; 1680 1681 if (old_dentry->d_inode == new_dentry->d_inode) 1682 return 0; 1683 1684 error = may_delete(old_dir, old_dentry, 1); 1685 if (error) 1686 return error; 1687 1688 if (new_dir->i_dev != old_dir->i_dev) 1689 return -EXDEV; 1690 1691 if (!new_dentry->d_inode) 1692 error = may_create(new_dir, new_dentry); 1693 else 1694 error = may_delete(new_dir, new_dentry, 1); 1695 if (error) 1696 return error; 1697 1698 if (!old_dir->i_op || !old_dir->i_op->rename) 1699 return -EPERM; 1700 1701 /* 1702 * If we are going to change the parent - check write permissions, 1703 * we'll need to flip '..'. 1704 */ 1705 if (new_dir != old_dir) { 1706 error = permission(old_dentry->d_inode, MAY_WRITE); 1707 } 1708 if (error) 1709 return error; 1710 1711 DQUOT_INIT(old_dir); 1712 DQUOT_INIT(new_dir); 1713 down(&old_dir->i_sb->s_vfs_rename_sem); 1714 error = -EINVAL; 1715 if (is_subdir(new_dentry, old_dentry)) 1716 goto out_unlock; 1717 /* Don't eat your daddy, dear... */ 1718 /* This also avoids locking issues */ 1719 if (old_dentry->d_parent == new_dentry) 1720 goto out_unlock; 1721 target = new_dentry->d_inode; 1722 if (target) { /* Hastur! Hastur! Hastur! */ 1723 triple_down(&old_dir->i_zombie, 1724 &new_dir->i_zombie, 1725 &target->i_zombie); 1726 d_unhash(new_dentry); 1727 } else 1728 double_down(&old_dir->i_zombie, 1729 &new_dir->i_zombie); 1730 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 1731 error = -EBUSY; 1732 else 1733 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 1734 if (target) { 1735 if (!error) 1736 target->i_flags |= S_DEAD; 1737 triple_up(&old_dir->i_zombie, 1738 &new_dir->i_zombie, 1739 &target->i_zombie); 1740 if (d_unhashed(new_dentry)) 1741 d_rehash(new_dentry); 1742 dput(new_dentry); 1743 } else 1744 double_up(&old_dir->i_zombie, 1745 &new_dir->i_zombie); 1746 1747 if (!error) 1748 d_move(old_dentry,new_dentry); 1749out_unlock: 1750 up(&old_dir->i_sb->s_vfs_rename_sem); 1751 return error; 1752} 1753 1754int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, 1755 struct inode *new_dir, struct dentry *new_dentry) 1756{ 1757 int error; 1758 1759 if (old_dentry->d_inode == new_dentry->d_inode) 1760 return 0; 1761 1762 error = may_delete(old_dir, old_dentry, 0); 1763 if (error) 1764 return error; 1765 1766 if (new_dir->i_dev != old_dir->i_dev) 1767 return -EXDEV; 1768 1769 if (!new_dentry->d_inode) 1770 error = may_create(new_dir, new_dentry); 1771 else 1772 error = may_delete(new_dir, new_dentry, 0); 1773 if (error) 1774 return error; 1775 1776 if (!old_dir->i_op || !old_dir->i_op->rename) 1777 return -EPERM; 1778 1779 DQUOT_INIT(old_dir); 1780 DQUOT_INIT(new_dir); 1781 double_down(&old_dir->i_zombie, &new_dir->i_zombie); 1782 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 1783 error = -EBUSY; 1784 else 1785 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 1786 double_up(&old_dir->i_zombie, &new_dir->i_zombie); 1787 if (error) 1788 return error; 1789 /* The following d_move() should become unconditional */ 1790 if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) { 1791 d_move(old_dentry, new_dentry); 1792 } 1793 return 0; 1794} 1795 1796int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 1797 struct inode *new_dir, struct dentry *new_dentry) 1798{ 1799 int error; 1800 if (S_ISDIR(old_dentry->d_inode->i_mode)) 1801 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); 1802 else 1803 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); 1804 if (!error) { 1805 if (old_dir == new_dir) 1806 inode_dir_notify(old_dir, DN_RENAME); 1807 else { 1808 inode_dir_notify(old_dir, DN_DELETE); 1809 inode_dir_notify(new_dir, DN_CREATE); 1810 } 1811 } 1812 return error; 1813} 1814 1815static inline int do_rename(const char * oldname, const char * newname) 1816{ 1817 int error = 0; 1818 struct dentry * old_dir, * new_dir; 1819 struct dentry * old_dentry, *new_dentry; 1820 struct nameidata oldnd, newnd; 1821 1822 error = path_lookup(oldname, LOOKUP_PARENT, &oldnd); 1823 if (error) 1824 goto exit; 1825 1826 error = path_lookup(newname, LOOKUP_PARENT, &newnd); 1827 if (error) 1828 goto exit1; 1829 1830 error = -EXDEV; 1831 if (oldnd.mnt != newnd.mnt) 1832 goto exit2; 1833 1834 old_dir = oldnd.dentry; 1835 error = -EBUSY; 1836 if (oldnd.last_type != LAST_NORM) 1837 goto exit2; 1838 1839 new_dir = newnd.dentry; 1840 if (newnd.last_type != LAST_NORM) 1841 goto exit2; 1842 1843 double_lock(new_dir, old_dir); 1844 1845 old_dentry = lookup_hash(&oldnd.last, old_dir); 1846 error = PTR_ERR(old_dentry); 1847 if (IS_ERR(old_dentry)) 1848 goto exit3; 1849 /* source must exist */ 1850 error = -ENOENT; 1851 if (!old_dentry->d_inode) 1852 goto exit4; 1853 /* unless the source is a directory trailing slashes give -ENOTDIR */ 1854 if (!S_ISDIR(old_dentry->d_inode->i_mode)) { 1855 error = -ENOTDIR; 1856 if (oldnd.last.name[oldnd.last.len]) 1857 goto exit4; 1858 if (newnd.last.name[newnd.last.len]) 1859 goto exit4; 1860 } 1861 new_dentry = lookup_hash(&newnd.last, new_dir); 1862 error = PTR_ERR(new_dentry); 1863 if (IS_ERR(new_dentry)) 1864 goto exit4; 1865 1866 lock_kernel(); 1867 error = vfs_rename(old_dir->d_inode, old_dentry, 1868 new_dir->d_inode, new_dentry); 1869 unlock_kernel(); 1870 1871 dput(new_dentry); 1872exit4: 1873 dput(old_dentry); 1874exit3: 1875 double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem); 1876exit2: 1877 path_release(&newnd); 1878exit1: 1879 path_release(&oldnd); 1880exit: 1881 return error; 1882} 1883 1884asmlinkage long sys_rename(const char * oldname, const char * newname) 1885{ 1886 int error; 1887 char * from; 1888 char * to; 1889 1890 from = getname(oldname); 1891 if(IS_ERR(from)) 1892 return PTR_ERR(from); 1893 to = getname(newname); 1894 error = PTR_ERR(to); 1895 if (!IS_ERR(to)) { 1896 error = do_rename(from,to); 1897 putname(to); 1898 } 1899 putname(from); 1900 return error; 1901} 1902 1903int vfs_readlink(struct dentry *dentry, char *buffer, int buflen, const char *link) 1904{ 1905 int len; 1906 1907 len = PTR_ERR(link); 1908 if (IS_ERR(link)) 1909 goto out; 1910 1911 len = strlen(link); 1912 if (len > (unsigned) buflen) 1913 len = buflen; 1914 if (copy_to_user(buffer, link, len)) 1915 len = -EFAULT; 1916out: 1917 return len; 1918} 1919 1920static inline int 1921__vfs_follow_link(struct nameidata *nd, const char *link) 1922{ 1923 int res = 0; 1924 char *name; 1925 if (IS_ERR(link)) 1926 goto fail; 1927 1928 if (*link == '/') { 1929 path_release(nd); 1930 if (!walk_init_root(link, nd)) 1931 /* weird __emul_prefix() stuff did it */ 1932 goto out; 1933 } 1934 res = link_path_walk(link, nd); 1935out: 1936 if (current->link_count || res || nd->last_type!=LAST_NORM) 1937 return res; 1938 /* 1939 * If it is an iterative symlinks resolution in open_namei() we 1940 * have to copy the last component. And all that crap because of 1941 * bloody create() on broken symlinks. Furrfu... 1942 */ 1943 name = __getname(); 1944 if (!name) 1945 return -ENOMEM; 1946 strcpy(name, nd->last.name); 1947 nd->last.name = name; 1948 return 0; 1949fail: 1950 path_release(nd); 1951 return PTR_ERR(link); 1952} 1953 1954int vfs_follow_link(struct nameidata *nd, const char *link) 1955{ 1956 return __vfs_follow_link(nd, link); 1957} 1958 1959/* get the link contents into pagecache */ 1960static char *page_getlink(struct dentry * dentry, struct page **ppage) 1961{ 1962 struct page * page; 1963 struct address_space *mapping = dentry->d_inode->i_mapping; 1964 page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage, 1965 NULL); 1966 if (IS_ERR(page)) 1967 goto sync_fail; 1968 wait_on_page(page); 1969 if (!Page_Uptodate(page)) 1970 goto async_fail; 1971 *ppage = page; 1972 return kmap(page); 1973 1974async_fail: 1975 page_cache_release(page); 1976 return ERR_PTR(-EIO); 1977 1978sync_fail: 1979 return (char*)page; 1980} 1981 1982int page_readlink(struct dentry *dentry, char *buffer, int buflen) 1983{ 1984 struct page *page = NULL; 1985 char *s = page_getlink(dentry, &page); 1986 int res = vfs_readlink(dentry,buffer,buflen,s); 1987 if (page) { 1988 kunmap(page); 1989 page_cache_release(page); 1990 } 1991 return res; 1992} 1993 1994int page_follow_link(struct dentry *dentry, struct nameidata *nd) 1995{ 1996 struct page *page = NULL; 1997 char *s = page_getlink(dentry, &page); 1998 int res = __vfs_follow_link(nd, s); 1999 if (page) { 2000 kunmap(page); 2001 page_cache_release(page); 2002 } 2003 return res; 2004} 2005 2006struct inode_operations page_symlink_inode_operations = { 2007 readlink: page_readlink, 2008 follow_link: page_follow_link, 2009}; 2010