1/* 2 * proc/fs/generic.c --- generic routines for the proc-fs 3 * 4 * This file contains generic proc-fs routines for handling 5 * directories and files. 6 * 7 * Copyright (C) 1991, 1992 Linus Torvalds. 8 * Copyright (C) 1997 Theodore Ts'o 9 */ 10 11#include <linux/errno.h> 12#include <linux/time.h> 13#include <linux/proc_fs.h> 14#include <linux/stat.h> 15#include <linux/module.h> 16#include <linux/mount.h> 17#include <linux/smp_lock.h> 18#include <linux/init.h> 19#include <linux/idr.h> 20#include <linux/namei.h> 21#include <linux/bitops.h> 22#include <linux/spinlock.h> 23#include <asm/uaccess.h> 24 25#include "internal.h" 26 27static ssize_t proc_file_read(struct file *file, char __user *buf, 28 size_t nbytes, loff_t *ppos); 29static ssize_t proc_file_write(struct file *file, const char __user *buffer, 30 size_t count, loff_t *ppos); 31static loff_t proc_file_lseek(struct file *, loff_t, int); 32 33DEFINE_SPINLOCK(proc_subdir_lock); 34 35static int proc_match(int len, const char *name, struct proc_dir_entry *de) 36{ 37 if (de->namelen != len) 38 return 0; 39 return !memcmp(name, de->name, len); 40} 41 42static const struct file_operations proc_file_operations = { 43 .llseek = proc_file_lseek, 44 .read = proc_file_read, 45 .write = proc_file_write, 46}; 47 48/* buffer size is one page but our output routines use some slack for overruns */ 49#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) 50 51static ssize_t 52proc_file_read(struct file *file, char __user *buf, size_t nbytes, 53 loff_t *ppos) 54{ 55 struct inode * inode = file->f_path.dentry->d_inode; 56 char *page; 57 ssize_t retval=0; 58 int eof=0; 59 ssize_t n, count; 60 char *start; 61 struct proc_dir_entry * dp; 62 unsigned long long pos; 63 64 /* 65 * Gaah, please just use "seq_file" instead. The legacy /proc 66 * interfaces cut loff_t down to off_t for reads, and ignore 67 * the offset entirely for writes.. 68 */ 69 pos = *ppos; 70 if (pos > MAX_NON_LFS) 71 return 0; 72 if (nbytes > MAX_NON_LFS - pos) 73 nbytes = MAX_NON_LFS - pos; 74 75 dp = PDE(inode); 76 if (!(page = (char*) __get_free_page(GFP_KERNEL))) 77 return -ENOMEM; 78 79 while ((nbytes > 0) && !eof) { 80 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); 81 82 start = NULL; 83 if (dp->get_info) { 84 /* Handle old net routines */ 85 n = dp->get_info(page, &start, *ppos, count); 86 if (n < count) 87 eof = 1; 88 } else if (dp->read_proc) { 89 /* 90 * How to be a proc read function 91 * ------------------------------ 92 * Prototype: 93 * int f(char *buffer, char **start, off_t offset, 94 * int count, int *peof, void *dat) 95 * 96 * Assume that the buffer is "count" bytes in size. 97 * 98 * If you know you have supplied all the data you 99 * have, set *peof. 100 * 101 * You have three ways to return data: 102 * 0) Leave *start = NULL. (This is the default.) 103 * Put the data of the requested offset at that 104 * offset within the buffer. Return the number (n) 105 * of bytes there are from the beginning of the 106 * buffer up to the last byte of data. If the 107 * number of supplied bytes (= n - offset) is 108 * greater than zero and you didn't signal eof 109 * and the reader is prepared to take more data 110 * you will be called again with the requested 111 * offset advanced by the number of bytes 112 * absorbed. This interface is useful for files 113 * no larger than the buffer. 114 * 1) Set *start = an unsigned long value less than 115 * the buffer address but greater than zero. 116 * Put the data of the requested offset at the 117 * beginning of the buffer. Return the number of 118 * bytes of data placed there. If this number is 119 * greater than zero and you didn't signal eof 120 * and the reader is prepared to take more data 121 * you will be called again with the requested 122 * offset advanced by *start. This interface is 123 * useful when you have a large file consisting 124 * of a series of blocks which you want to count 125 * and return as wholes. 126 * (Hack by Paul.Russell@rustcorp.com.au) 127 * 2) Set *start = an address within the buffer. 128 * Put the data of the requested offset at *start. 129 * Return the number of bytes of data placed there. 130 * If this number is greater than zero and you 131 * didn't signal eof and the reader is prepared to 132 * take more data you will be called again with the 133 * requested offset advanced by the number of bytes 134 * absorbed. 135 */ 136 n = dp->read_proc(page, &start, *ppos, 137 count, &eof, dp->data); 138 } else 139 break; 140 141 if (n == 0) /* end of file */ 142 break; 143 if (n < 0) { /* error */ 144 if (retval == 0) 145 retval = n; 146 break; 147 } 148 149 if (start == NULL) { 150 if (n > PAGE_SIZE) { 151 printk(KERN_ERR 152 "proc_file_read: Apparent buffer overflow!\n"); 153 n = PAGE_SIZE; 154 } 155 n -= *ppos; 156 if (n <= 0) 157 break; 158 if (n > count) 159 n = count; 160 start = page + *ppos; 161 } else if (start < page) { 162 if (n > PAGE_SIZE) { 163 printk(KERN_ERR 164 "proc_file_read: Apparent buffer overflow!\n"); 165 n = PAGE_SIZE; 166 } 167 if (n > count) { 168 /* 169 * Don't reduce n because doing so might 170 * cut off part of a data block. 171 */ 172 printk(KERN_WARNING 173 "proc_file_read: Read count exceeded\n"); 174 } 175 } else /* start >= page */ { 176 unsigned long startoff = (unsigned long)(start - page); 177 if (n > (PAGE_SIZE - startoff)) { 178 printk(KERN_ERR 179 "proc_file_read: Apparent buffer overflow!\n"); 180 n = PAGE_SIZE - startoff; 181 } 182 if (n > count) 183 n = count; 184 } 185 186 n -= copy_to_user(buf, start < page ? page : start, n); 187 if (n == 0) { 188 if (retval == 0) 189 retval = -EFAULT; 190 break; 191 } 192 193 *ppos += start < page ? (unsigned long)start : n; 194 nbytes -= n; 195 buf += n; 196 retval += n; 197 } 198 free_page((unsigned long) page); 199 return retval; 200} 201 202static ssize_t 203proc_file_write(struct file *file, const char __user *buffer, 204 size_t count, loff_t *ppos) 205{ 206 struct inode *inode = file->f_path.dentry->d_inode; 207 struct proc_dir_entry * dp; 208 209 dp = PDE(inode); 210 211 if (!dp->write_proc) 212 return -EIO; 213 214 return dp->write_proc(file, buffer, count, dp->data); 215} 216 217 218static loff_t 219proc_file_lseek(struct file *file, loff_t offset, int orig) 220{ 221 loff_t retval = -EINVAL; 222 switch (orig) { 223 case 1: 224 offset += file->f_pos; 225 /* fallthrough */ 226 case 0: 227 if (offset < 0 || offset > MAX_NON_LFS) 228 break; 229 file->f_pos = retval = offset; 230 } 231 return retval; 232} 233 234static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) 235{ 236 struct inode *inode = dentry->d_inode; 237 struct proc_dir_entry *de = PDE(inode); 238 int error; 239 240 error = inode_change_ok(inode, iattr); 241 if (error) 242 goto out; 243 244 error = inode_setattr(inode, iattr); 245 if (error) 246 goto out; 247 248 de->uid = inode->i_uid; 249 de->gid = inode->i_gid; 250 de->mode = inode->i_mode; 251out: 252 return error; 253} 254 255static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, 256 struct kstat *stat) 257{ 258 struct inode *inode = dentry->d_inode; 259 struct proc_dir_entry *de = PROC_I(inode)->pde; 260 if (de && de->nlink) 261 inode->i_nlink = de->nlink; 262 263 generic_fillattr(inode, stat); 264 return 0; 265} 266 267static const struct inode_operations proc_file_inode_operations = { 268 .setattr = proc_notify_change, 269}; 270 271/* 272 * This function parses a name such as "tty/driver/serial", and 273 * returns the struct proc_dir_entry for "/proc/tty/driver", and 274 * returns "serial" in residual. 275 */ 276static int xlate_proc_name(const char *name, 277 struct proc_dir_entry **ret, const char **residual) 278{ 279 const char *cp = name, *next; 280 struct proc_dir_entry *de; 281 int len; 282 int rtn = 0; 283 284 spin_lock(&proc_subdir_lock); 285 de = &proc_root; 286 while (1) { 287 next = strchr(cp, '/'); 288 if (!next) 289 break; 290 291 len = next - cp; 292 for (de = de->subdir; de ; de = de->next) { 293 if (proc_match(len, cp, de)) 294 break; 295 } 296 if (!de) { 297 rtn = -ENOENT; 298 goto out; 299 } 300 cp += len + 1; 301 } 302 *residual = cp; 303 *ret = de; 304out: 305 spin_unlock(&proc_subdir_lock); 306 return rtn; 307} 308 309static DEFINE_IDR(proc_inum_idr); 310static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ 311 312#define PROC_DYNAMIC_FIRST 0xF0000000UL 313 314/* 315 * Return an inode number between PROC_DYNAMIC_FIRST and 316 * 0xffffffff, or zero on failure. 317 */ 318static unsigned int get_inode_number(void) 319{ 320 int i, inum = 0; 321 int error; 322 323retry: 324 if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) 325 return 0; 326 327 spin_lock(&proc_inum_lock); 328 error = idr_get_new(&proc_inum_idr, NULL, &i); 329 spin_unlock(&proc_inum_lock); 330 if (error == -EAGAIN) 331 goto retry; 332 else if (error) 333 return 0; 334 335 inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; 336 337 /* inum will never be more than 0xf0ffffff, so no check 338 * for overflow. 339 */ 340 341 return inum; 342} 343 344static void release_inode_number(unsigned int inum) 345{ 346 int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK; 347 348 spin_lock(&proc_inum_lock); 349 idr_remove(&proc_inum_idr, id); 350 spin_unlock(&proc_inum_lock); 351} 352 353static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) 354{ 355 nd_set_link(nd, PDE(dentry->d_inode)->data); 356 return NULL; 357} 358 359static const struct inode_operations proc_link_inode_operations = { 360 .readlink = generic_readlink, 361 .follow_link = proc_follow_link, 362}; 363 364/* 365 * As some entries in /proc are volatile, we want to 366 * get rid of unused dentries. This could be made 367 * smarter: we could keep a "volatile" flag in the 368 * inode to indicate which ones to keep. 369 */ 370static int proc_delete_dentry(struct dentry * dentry) 371{ 372 return 1; 373} 374 375static struct dentry_operations proc_dentry_operations = 376{ 377 .d_delete = proc_delete_dentry, 378}; 379 380/* 381 * Don't create negative dentries here, return -ENOENT by hand 382 * instead. 383 */ 384struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 385{ 386 struct inode *inode = NULL; 387 struct proc_dir_entry * de; 388 int error = -ENOENT; 389 390 lock_kernel(); 391 spin_lock(&proc_subdir_lock); 392 de = PDE(dir); 393 if (de) { 394 for (de = de->subdir; de ; de = de->next) { 395 if (de->namelen != dentry->d_name.len) 396 continue; 397 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 398 unsigned int ino = de->low_ino; 399 400 de_get(de); 401 spin_unlock(&proc_subdir_lock); 402 error = -EINVAL; 403 inode = proc_get_inode(dir->i_sb, ino, de); 404 spin_lock(&proc_subdir_lock); 405 break; 406 } 407 } 408 } 409 spin_unlock(&proc_subdir_lock); 410 unlock_kernel(); 411 412 if (inode) { 413 dentry->d_op = &proc_dentry_operations; 414 d_add(dentry, inode); 415 return NULL; 416 } 417 de_put(de); 418 return ERR_PTR(error); 419} 420 421/* 422 * This returns non-zero if at EOF, so that the /proc 423 * root directory can use this and check if it should 424 * continue with the <pid> entries.. 425 * 426 * Note that the VFS-layer doesn't care about the return 427 * value of the readdir() call, as long as it's non-negative 428 * for success.. 429 */ 430int proc_readdir(struct file * filp, 431 void * dirent, filldir_t filldir) 432{ 433 struct proc_dir_entry * de; 434 unsigned int ino; 435 int i; 436 struct inode *inode = filp->f_path.dentry->d_inode; 437 int ret = 0; 438 439 lock_kernel(); 440 441 ino = inode->i_ino; 442 de = PDE(inode); 443 if (!de) { 444 ret = -EINVAL; 445 goto out; 446 } 447 i = filp->f_pos; 448 switch (i) { 449 case 0: 450 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 451 goto out; 452 i++; 453 filp->f_pos++; 454 /* fall through */ 455 case 1: 456 if (filldir(dirent, "..", 2, i, 457 parent_ino(filp->f_path.dentry), 458 DT_DIR) < 0) 459 goto out; 460 i++; 461 filp->f_pos++; 462 /* fall through */ 463 default: 464 spin_lock(&proc_subdir_lock); 465 de = de->subdir; 466 i -= 2; 467 for (;;) { 468 if (!de) { 469 ret = 1; 470 spin_unlock(&proc_subdir_lock); 471 goto out; 472 } 473 if (!i) 474 break; 475 de = de->next; 476 i--; 477 } 478 479 do { 480 struct proc_dir_entry *next; 481 482 /* filldir passes info to user space */ 483 de_get(de); 484 spin_unlock(&proc_subdir_lock); 485 if (filldir(dirent, de->name, de->namelen, filp->f_pos, 486 de->low_ino, de->mode >> 12) < 0) { 487 de_put(de); 488 goto out; 489 } 490 spin_lock(&proc_subdir_lock); 491 filp->f_pos++; 492 next = de->next; 493 de_put(de); 494 de = next; 495 } while (de); 496 spin_unlock(&proc_subdir_lock); 497 } 498 ret = 1; 499out: unlock_kernel(); 500 return ret; 501} 502 503/* 504 * These are the generic /proc directory operations. They 505 * use the in-memory "struct proc_dir_entry" tree to parse 506 * the /proc directory. 507 */ 508static const struct file_operations proc_dir_operations = { 509 .read = generic_read_dir, 510 .readdir = proc_readdir, 511}; 512 513/* 514 * proc directories can do almost nothing.. 515 */ 516static const struct inode_operations proc_dir_inode_operations = { 517 .lookup = proc_lookup, 518 .getattr = proc_getattr, 519 .setattr = proc_notify_change, 520}; 521 522static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) 523{ 524 unsigned int i; 525 526 i = get_inode_number(); 527 if (i == 0) 528 return -EAGAIN; 529 dp->low_ino = i; 530 531 spin_lock(&proc_subdir_lock); 532 dp->next = dir->subdir; 533 dp->parent = dir; 534 dir->subdir = dp; 535 spin_unlock(&proc_subdir_lock); 536 537 if (S_ISDIR(dp->mode)) { 538 if (dp->proc_iops == NULL) { 539 dp->proc_fops = &proc_dir_operations; 540 dp->proc_iops = &proc_dir_inode_operations; 541 } 542 dir->nlink++; 543 } else if (S_ISLNK(dp->mode)) { 544 if (dp->proc_iops == NULL) 545 dp->proc_iops = &proc_link_inode_operations; 546 } else if (S_ISREG(dp->mode)) { 547 if (dp->proc_fops == NULL) 548 dp->proc_fops = &proc_file_operations; 549 if (dp->proc_iops == NULL) 550 dp->proc_iops = &proc_file_inode_operations; 551 } 552 return 0; 553} 554 555/* 556 * Kill an inode that got unregistered.. 557 */ 558static void proc_kill_inodes(struct proc_dir_entry *de) 559{ 560 struct list_head *p; 561 struct super_block *sb = proc_mnt->mnt_sb; 562 563 /* 564 * Actually it's a partial revoke(). 565 */ 566 file_list_lock(); 567 list_for_each(p, &sb->s_files) { 568 struct file * filp = list_entry(p, struct file, f_u.fu_list); 569 struct dentry * dentry = filp->f_path.dentry; 570 struct inode * inode; 571 const struct file_operations *fops; 572 573 if (dentry->d_op != &proc_dentry_operations) 574 continue; 575 inode = dentry->d_inode; 576 if (PDE(inode) != de) 577 continue; 578 fops = filp->f_op; 579 filp->f_op = NULL; 580 fops_put(fops); 581 } 582 file_list_unlock(); 583} 584 585static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, 586 const char *name, 587 mode_t mode, 588 nlink_t nlink) 589{ 590 struct proc_dir_entry *ent = NULL; 591 const char *fn = name; 592 int len; 593 594 /* make sure name is valid */ 595 if (!name || !strlen(name)) goto out; 596 597 if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) 598 goto out; 599 600 /* At this point there must not be any '/' characters beyond *fn */ 601 if (strchr(fn, '/')) 602 goto out; 603 604 len = strlen(fn); 605 606 ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); 607 if (!ent) goto out; 608 609 memset(ent, 0, sizeof(struct proc_dir_entry)); 610 memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1); 611 ent->name = ((char *) ent) + sizeof(*ent); 612 ent->namelen = len; 613 ent->mode = mode; 614 ent->nlink = nlink; 615 out: 616 return ent; 617} 618 619struct proc_dir_entry *proc_symlink(const char *name, 620 struct proc_dir_entry *parent, const char *dest) 621{ 622 struct proc_dir_entry *ent; 623 624 ent = proc_create(&parent,name, 625 (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); 626 627 if (ent) { 628 ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); 629 if (ent->data) { 630 strcpy((char*)ent->data,dest); 631 if (proc_register(parent, ent) < 0) { 632 kfree(ent->data); 633 kfree(ent); 634 ent = NULL; 635 } 636 } else { 637 kfree(ent); 638 ent = NULL; 639 } 640 } 641 return ent; 642} 643 644struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, 645 struct proc_dir_entry *parent) 646{ 647 struct proc_dir_entry *ent; 648 649 ent = proc_create(&parent, name, S_IFDIR | mode, 2); 650 if (ent) { 651 ent->proc_fops = &proc_dir_operations; 652 ent->proc_iops = &proc_dir_inode_operations; 653 654 if (proc_register(parent, ent) < 0) { 655 kfree(ent); 656 ent = NULL; 657 } 658 } 659 return ent; 660} 661 662struct proc_dir_entry *proc_mkdir(const char *name, 663 struct proc_dir_entry *parent) 664{ 665 return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); 666} 667 668struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, 669 struct proc_dir_entry *parent) 670{ 671 struct proc_dir_entry *ent; 672 nlink_t nlink; 673 674 if (S_ISDIR(mode)) { 675 if ((mode & S_IALLUGO) == 0) 676 mode |= S_IRUGO | S_IXUGO; 677 nlink = 2; 678 } else { 679 if ((mode & S_IFMT) == 0) 680 mode |= S_IFREG; 681 if ((mode & S_IALLUGO) == 0) 682 mode |= S_IRUGO; 683 nlink = 1; 684 } 685 686 ent = proc_create(&parent,name,mode,nlink); 687 if (ent) { 688 if (S_ISDIR(mode)) { 689 ent->proc_fops = &proc_dir_operations; 690 ent->proc_iops = &proc_dir_inode_operations; 691 } 692 if (proc_register(parent, ent) < 0) { 693 kfree(ent); 694 ent = NULL; 695 } 696 } 697 return ent; 698} 699 700void free_proc_entry(struct proc_dir_entry *de) 701{ 702 unsigned int ino = de->low_ino; 703 704 if (ino < PROC_DYNAMIC_FIRST) 705 return; 706 707 release_inode_number(ino); 708 709 if (S_ISLNK(de->mode) && de->data) 710 kfree(de->data); 711 kfree(de); 712} 713 714/* 715 * Remove a /proc entry and free it if it's not currently in use. 716 * If it is in use, we set the 'deleted' flag. 717 */ 718void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 719{ 720 struct proc_dir_entry **p; 721 struct proc_dir_entry *de; 722 const char *fn = name; 723 int len; 724 725 if (!parent && xlate_proc_name(name, &parent, &fn) != 0) 726 goto out; 727 len = strlen(fn); 728 729 spin_lock(&proc_subdir_lock); 730 for (p = &parent->subdir; *p; p=&(*p)->next ) { 731 if (!proc_match(len, fn, *p)) 732 continue; 733 de = *p; 734 *p = de->next; 735 de->next = NULL; 736 if (S_ISDIR(de->mode)) 737 parent->nlink--; 738 proc_kill_inodes(de); 739 de->nlink = 0; 740 WARN_ON(de->subdir); 741 if (!atomic_read(&de->count)) 742 free_proc_entry(de); 743 else { 744 de->deleted = 1; 745 printk("remove_proc_entry: %s/%s busy, count=%d\n", 746 parent->name, de->name, atomic_read(&de->count)); 747 } 748 break; 749 } 750 spin_unlock(&proc_subdir_lock); 751out: 752 return; 753} 754