1/* 2 * linux/kernel/sys.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7#include <linux/module.h> 8#include <linux/mm.h> 9#include <linux/utsname.h> 10#include <linux/mman.h> 11#include <linux/notifier.h> 12#include <linux/reboot.h> 13#include <linux/prctl.h> 14#include <linux/highuid.h> 15#include <linux/fs.h> 16#include <linux/perf_event.h> 17#include <linux/resource.h> 18#include <linux/kernel.h> 19#include <linux/kexec.h> 20#include <linux/workqueue.h> 21#include <linux/capability.h> 22#include <linux/device.h> 23#include <linux/key.h> 24#include <linux/times.h> 25#include <linux/posix-timers.h> 26#include <linux/security.h> 27#include <linux/dcookies.h> 28#include <linux/suspend.h> 29#include <linux/tty.h> 30#include <linux/signal.h> 31#include <linux/cn_proc.h> 32#include <linux/getcpu.h> 33#include <linux/task_io_accounting_ops.h> 34#include <linux/seccomp.h> 35#include <linux/cpu.h> 36#include <linux/personality.h> 37#include <linux/ptrace.h> 38#include <linux/fs_struct.h> 39#include <linux/gfp.h> 40 41#include <linux/compat.h> 42#include <linux/syscalls.h> 43#include <linux/kprobes.h> 44#include <linux/user_namespace.h> 45 46#include <asm/uaccess.h> 47#include <asm/io.h> 48#include <asm/unistd.h> 49 50#ifndef SET_UNALIGN_CTL 51# define SET_UNALIGN_CTL(a,b) (-EINVAL) 52#endif 53#ifndef GET_UNALIGN_CTL 54# define GET_UNALIGN_CTL(a,b) (-EINVAL) 55#endif 56#ifndef SET_FPEMU_CTL 57# define SET_FPEMU_CTL(a,b) (-EINVAL) 58#endif 59#ifndef GET_FPEMU_CTL 60# define GET_FPEMU_CTL(a,b) (-EINVAL) 61#endif 62#ifndef SET_FPEXC_CTL 63# define SET_FPEXC_CTL(a,b) (-EINVAL) 64#endif 65#ifndef GET_FPEXC_CTL 66# define GET_FPEXC_CTL(a,b) (-EINVAL) 67#endif 68#ifndef GET_ENDIAN 69# define GET_ENDIAN(a,b) (-EINVAL) 70#endif 71#ifndef SET_ENDIAN 72# define SET_ENDIAN(a,b) (-EINVAL) 73#endif 74#ifndef GET_TSC_CTL 75# define GET_TSC_CTL(a) (-EINVAL) 76#endif 77#ifndef SET_TSC_CTL 78# define SET_TSC_CTL(a) (-EINVAL) 79#endif 80 81/* 82 * this is where the system-wide overflow UID and GID are defined, for 83 * architectures that now have 32-bit UID/GID but didn't in the past 84 */ 85 86int overflowuid = DEFAULT_OVERFLOWUID; 87int overflowgid = DEFAULT_OVERFLOWGID; 88 89#ifdef CONFIG_UID16 90EXPORT_SYMBOL(overflowuid); 91EXPORT_SYMBOL(overflowgid); 92#endif 93 94/* 95 * the same as above, but for filesystems which can only store a 16-bit 96 * UID and GID. as such, this is needed on all architectures 97 */ 98 99int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; 100int fs_overflowgid = DEFAULT_FS_OVERFLOWUID; 101 102EXPORT_SYMBOL(fs_overflowuid); 103EXPORT_SYMBOL(fs_overflowgid); 104 105/* 106 * this indicates whether you can reboot with ctrl-alt-del: the default is yes 107 */ 108 109int C_A_D = 1; 110struct pid *cad_pid; 111EXPORT_SYMBOL(cad_pid); 112 113/* 114 * If set, this is used for preparing the system to power off. 115 */ 116 117void (*pm_power_off_prepare)(void); 118 119/* 120 * set the priority of a task 121 * - the caller must hold the RCU read lock 122 */ 123static int set_one_prio(struct task_struct *p, int niceval, int error) 124{ 125 const struct cred *cred = current_cred(), *pcred = __task_cred(p); 126 int no_nice; 127 128 if (pcred->uid != cred->euid && 129 pcred->euid != cred->euid && !capable(CAP_SYS_NICE)) { 130 error = -EPERM; 131 goto out; 132 } 133 if (niceval < task_nice(p) && !can_nice(p, niceval)) { 134 error = -EACCES; 135 goto out; 136 } 137 no_nice = security_task_setnice(p, niceval); 138 if (no_nice) { 139 error = no_nice; 140 goto out; 141 } 142 if (error == -ESRCH) 143 error = 0; 144 set_user_nice(p, niceval); 145out: 146 return error; 147} 148 149SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) 150{ 151 struct task_struct *g, *p; 152 struct user_struct *user; 153 const struct cred *cred = current_cred(); 154 int error = -EINVAL; 155 struct pid *pgrp; 156 157 if (which > PRIO_USER || which < PRIO_PROCESS) 158 goto out; 159 160 /* normalize: avoid signed division (rounding problems) */ 161 error = -ESRCH; 162 if (niceval < -20) 163 niceval = -20; 164 if (niceval > 19) 165 niceval = 19; 166 167 rcu_read_lock(); 168 read_lock(&tasklist_lock); 169 switch (which) { 170 case PRIO_PROCESS: 171 if (who) 172 p = find_task_by_vpid(who); 173 else 174 p = current; 175 if (p) 176 error = set_one_prio(p, niceval, error); 177 break; 178 case PRIO_PGRP: 179 if (who) 180 pgrp = find_vpid(who); 181 else 182 pgrp = task_pgrp(current); 183 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 184 error = set_one_prio(p, niceval, error); 185 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 186 break; 187 case PRIO_USER: 188 user = (struct user_struct *) cred->user; 189 if (!who) 190 who = cred->uid; 191 else if ((who != cred->uid) && 192 !(user = find_user(who))) 193 goto out_unlock; /* No processes for this user */ 194 195 do_each_thread(g, p) { 196 if (__task_cred(p)->uid == who) 197 error = set_one_prio(p, niceval, error); 198 } while_each_thread(g, p); 199 if (who != cred->uid) 200 free_uid(user); /* For find_user() */ 201 break; 202 } 203out_unlock: 204 read_unlock(&tasklist_lock); 205 rcu_read_unlock(); 206out: 207 return error; 208} 209 210/* 211 * Ugh. To avoid negative return values, "getpriority()" will 212 * not return the normal nice-value, but a negated value that 213 * has been offset by 20 (ie it returns 40..1 instead of -20..19) 214 * to stay compatible. 215 */ 216SYSCALL_DEFINE2(getpriority, int, which, int, who) 217{ 218 struct task_struct *g, *p; 219 struct user_struct *user; 220 const struct cred *cred = current_cred(); 221 long niceval, retval = -ESRCH; 222 struct pid *pgrp; 223 224 if (which > PRIO_USER || which < PRIO_PROCESS) 225 return -EINVAL; 226 227 rcu_read_lock(); 228 read_lock(&tasklist_lock); 229 switch (which) { 230 case PRIO_PROCESS: 231 if (who) 232 p = find_task_by_vpid(who); 233 else 234 p = current; 235 if (p) { 236 niceval = 20 - task_nice(p); 237 if (niceval > retval) 238 retval = niceval; 239 } 240 break; 241 case PRIO_PGRP: 242 if (who) 243 pgrp = find_vpid(who); 244 else 245 pgrp = task_pgrp(current); 246 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 247 niceval = 20 - task_nice(p); 248 if (niceval > retval) 249 retval = niceval; 250 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 251 break; 252 case PRIO_USER: 253 user = (struct user_struct *) cred->user; 254 if (!who) 255 who = cred->uid; 256 else if ((who != cred->uid) && 257 !(user = find_user(who))) 258 goto out_unlock; /* No processes for this user */ 259 260 do_each_thread(g, p) { 261 if (__task_cred(p)->uid == who) { 262 niceval = 20 - task_nice(p); 263 if (niceval > retval) 264 retval = niceval; 265 } 266 } while_each_thread(g, p); 267 if (who != cred->uid) 268 free_uid(user); /* for find_user() */ 269 break; 270 } 271out_unlock: 272 read_unlock(&tasklist_lock); 273 rcu_read_unlock(); 274 275 return retval; 276} 277 278/** 279 * emergency_restart - reboot the system 280 * 281 * Without shutting down any hardware or taking any locks 282 * reboot the system. This is called when we know we are in 283 * trouble so this is our best effort to reboot. This is 284 * safe to call in interrupt context. 285 */ 286void emergency_restart(void) 287{ 288 machine_emergency_restart(); 289} 290EXPORT_SYMBOL_GPL(emergency_restart); 291 292void kernel_restart_prepare(char *cmd) 293{ 294 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); 295 system_state = SYSTEM_RESTART; 296 device_shutdown(); 297 sysdev_shutdown(); 298} 299 300/** 301 * kernel_restart - reboot the system 302 * @cmd: pointer to buffer containing command to execute for restart 303 * or %NULL 304 * 305 * Shutdown everything and perform a clean reboot. 306 * This is not safe to call in interrupt context. 307 */ 308void kernel_restart(char *cmd) 309{ 310#ifdef CONFIG_DUMP_PREV_OOPS_MSG 311 enable_oopsbuf(1); 312#endif 313 kernel_restart_prepare(cmd); 314 if (!cmd) 315 printk(KERN_EMERG "Restarting system.\n"); 316 else 317 printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); 318 machine_restart(cmd); 319} 320EXPORT_SYMBOL_GPL(kernel_restart); 321 322static void kernel_shutdown_prepare(enum system_states state) 323{ 324 blocking_notifier_call_chain(&reboot_notifier_list, 325 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); 326 system_state = state; 327 device_shutdown(); 328} 329/** 330 * kernel_halt - halt the system 331 * 332 * Shutdown everything and perform a clean system halt. 333 */ 334void kernel_halt(void) 335{ 336 kernel_shutdown_prepare(SYSTEM_HALT); 337 sysdev_shutdown(); 338 printk(KERN_EMERG "System halted.\n"); 339 machine_halt(); 340} 341 342EXPORT_SYMBOL_GPL(kernel_halt); 343 344/** 345 * kernel_power_off - power_off the system 346 * 347 * Shutdown everything and perform a clean system power_off. 348 */ 349void kernel_power_off(void) 350{ 351 kernel_shutdown_prepare(SYSTEM_POWER_OFF); 352 if (pm_power_off_prepare) 353 pm_power_off_prepare(); 354 disable_nonboot_cpus(); 355 sysdev_shutdown(); 356 printk(KERN_EMERG "Power down.\n"); 357 machine_power_off(); 358} 359EXPORT_SYMBOL_GPL(kernel_power_off); 360 361static DEFINE_MUTEX(reboot_mutex); 362 363/* 364 * Reboot system call: for obvious reasons only root may call it, 365 * and even root needs to set up some magic numbers in the registers 366 * so that some mistake won't make this reboot the whole machine. 367 * You can also set the meaning of the ctrl-alt-del-key here. 368 * 369 * reboot doesn't sync: do that yourself before calling this. 370 */ 371SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, 372 void __user *, arg) 373{ 374 char buffer[256]; 375 int ret = 0; 376 377 /* We only trust the superuser with rebooting the system. */ 378 if (!capable(CAP_SYS_BOOT)) 379 return -EPERM; 380 381 /* For safety, we require "magic" arguments. */ 382 if (magic1 != LINUX_REBOOT_MAGIC1 || 383 (magic2 != LINUX_REBOOT_MAGIC2 && 384 magic2 != LINUX_REBOOT_MAGIC2A && 385 magic2 != LINUX_REBOOT_MAGIC2B && 386 magic2 != LINUX_REBOOT_MAGIC2C)) 387 return -EINVAL; 388 389 /* Instead of trying to make the power_off code look like 390 * halt when pm_power_off is not set do it the easy way. 391 */ 392 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) 393 cmd = LINUX_REBOOT_CMD_HALT; 394 395 mutex_lock(&reboot_mutex); 396 switch (cmd) { 397 case LINUX_REBOOT_CMD_RESTART: 398 kernel_restart(NULL); 399 break; 400 401 case LINUX_REBOOT_CMD_CAD_ON: 402 C_A_D = 1; 403 break; 404 405 case LINUX_REBOOT_CMD_CAD_OFF: 406 C_A_D = 0; 407 break; 408 409 case LINUX_REBOOT_CMD_HALT: 410 kernel_halt(); 411 do_exit(0); 412 panic("cannot halt"); 413 414 case LINUX_REBOOT_CMD_POWER_OFF: 415 kernel_power_off(); 416 do_exit(0); 417 break; 418 419 case LINUX_REBOOT_CMD_RESTART2: 420 if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { 421 ret = -EFAULT; 422 break; 423 } 424 buffer[sizeof(buffer) - 1] = '\0'; 425 426 kernel_restart(buffer); 427 break; 428 429#ifdef CONFIG_KEXEC 430 case LINUX_REBOOT_CMD_KEXEC: 431 ret = kernel_kexec(); 432 break; 433#endif 434 435#ifdef CONFIG_HIBERNATION 436 case LINUX_REBOOT_CMD_SW_SUSPEND: 437 ret = hibernate(); 438 break; 439#endif 440 441 default: 442 ret = -EINVAL; 443 break; 444 } 445 mutex_unlock(&reboot_mutex); 446 return ret; 447} 448 449static void deferred_cad(struct work_struct *dummy) 450{ 451 kernel_restart(NULL); 452} 453 454/* 455 * This function gets called by ctrl-alt-del - ie the keyboard interrupt. 456 * As it's called within an interrupt, it may NOT sync: the only choice 457 * is whether to reboot at once, or just ignore the ctrl-alt-del. 458 */ 459void ctrl_alt_del(void) 460{ 461 static DECLARE_WORK(cad_work, deferred_cad); 462 463 if (C_A_D) 464 schedule_work(&cad_work); 465 else 466 kill_cad_pid(SIGINT, 1); 467} 468 469/* 470 * Unprivileged users may change the real gid to the effective gid 471 * or vice versa. (BSD-style) 472 * 473 * If you set the real gid at all, or set the effective gid to a value not 474 * equal to the real gid, then the saved gid is set to the new effective gid. 475 * 476 * This makes it possible for a setgid program to completely drop its 477 * privileges, which is often a useful assertion to make when you are doing 478 * a security audit over a program. 479 * 480 * The general idea is that a program which uses just setregid() will be 481 * 100% compatible with BSD. A program which uses just setgid() will be 482 * 100% compatible with POSIX with saved IDs. 483 * 484 * SMP: There are not races, the GIDs are checked only by filesystem 485 * operations (as far as semantic preservation is concerned). 486 */ 487SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) 488{ 489 const struct cred *old; 490 struct cred *new; 491 int retval; 492 493 new = prepare_creds(); 494 if (!new) 495 return -ENOMEM; 496 old = current_cred(); 497 498 retval = -EPERM; 499 if (rgid != (gid_t) -1) { 500 if (old->gid == rgid || 501 old->egid == rgid || 502 capable(CAP_SETGID)) 503 new->gid = rgid; 504 else 505 goto error; 506 } 507 if (egid != (gid_t) -1) { 508 if (old->gid == egid || 509 old->egid == egid || 510 old->sgid == egid || 511 capable(CAP_SETGID)) 512 new->egid = egid; 513 else 514 goto error; 515 } 516 517 if (rgid != (gid_t) -1 || 518 (egid != (gid_t) -1 && egid != old->gid)) 519 new->sgid = new->egid; 520 new->fsgid = new->egid; 521 522 return commit_creds(new); 523 524error: 525 abort_creds(new); 526 return retval; 527} 528 529/* 530 * setgid() is implemented like SysV w/ SAVED_IDS 531 * 532 * SMP: Same implicit races as above. 533 */ 534SYSCALL_DEFINE1(setgid, gid_t, gid) 535{ 536 const struct cred *old; 537 struct cred *new; 538 int retval; 539 540 new = prepare_creds(); 541 if (!new) 542 return -ENOMEM; 543 old = current_cred(); 544 545 retval = -EPERM; 546 if (capable(CAP_SETGID)) 547 new->gid = new->egid = new->sgid = new->fsgid = gid; 548 else if (gid == old->gid || gid == old->sgid) 549 new->egid = new->fsgid = gid; 550 else 551 goto error; 552 553 return commit_creds(new); 554 555error: 556 abort_creds(new); 557 return retval; 558} 559 560/* 561 * change the user struct in a credentials set to match the new UID 562 */ 563static int set_user(struct cred *new) 564{ 565 struct user_struct *new_user; 566 567 new_user = alloc_uid(current_user_ns(), new->uid); 568 if (!new_user) 569 return -EAGAIN; 570 571 if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && 572 new_user != INIT_USER) { 573 free_uid(new_user); 574 return -EAGAIN; 575 } 576 577 free_uid(new->user); 578 new->user = new_user; 579 return 0; 580} 581 582/* 583 * Unprivileged users may change the real uid to the effective uid 584 * or vice versa. (BSD-style) 585 * 586 * If you set the real uid at all, or set the effective uid to a value not 587 * equal to the real uid, then the saved uid is set to the new effective uid. 588 * 589 * This makes it possible for a setuid program to completely drop its 590 * privileges, which is often a useful assertion to make when you are doing 591 * a security audit over a program. 592 * 593 * The general idea is that a program which uses just setreuid() will be 594 * 100% compatible with BSD. A program which uses just setuid() will be 595 * 100% compatible with POSIX with saved IDs. 596 */ 597SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) 598{ 599 const struct cred *old; 600 struct cred *new; 601 int retval; 602 603 new = prepare_creds(); 604 if (!new) 605 return -ENOMEM; 606 old = current_cred(); 607 608 retval = -EPERM; 609 if (ruid != (uid_t) -1) { 610 new->uid = ruid; 611 if (old->uid != ruid && 612 old->euid != ruid && 613 !capable(CAP_SETUID)) 614 goto error; 615 } 616 617 if (euid != (uid_t) -1) { 618 new->euid = euid; 619 if (old->uid != euid && 620 old->euid != euid && 621 old->suid != euid && 622 !capable(CAP_SETUID)) 623 goto error; 624 } 625 626 if (new->uid != old->uid) { 627 retval = set_user(new); 628 if (retval < 0) 629 goto error; 630 } 631 if (ruid != (uid_t) -1 || 632 (euid != (uid_t) -1 && euid != old->uid)) 633 new->suid = new->euid; 634 new->fsuid = new->euid; 635 636 retval = security_task_fix_setuid(new, old, LSM_SETID_RE); 637 if (retval < 0) 638 goto error; 639 640 return commit_creds(new); 641 642error: 643 abort_creds(new); 644 return retval; 645} 646 647/* 648 * setuid() is implemented like SysV with SAVED_IDS 649 * 650 * Note that SAVED_ID's is deficient in that a setuid root program 651 * like sendmail, for example, cannot set its uid to be a normal 652 * user and then switch back, because if you're root, setuid() sets 653 * the saved uid too. If you don't like this, blame the bright people 654 * in the POSIX committee and/or USG. Note that the BSD-style setreuid() 655 * will allow a root program to temporarily drop privileges and be able to 656 * regain them by swapping the real and effective uid. 657 */ 658SYSCALL_DEFINE1(setuid, uid_t, uid) 659{ 660 const struct cred *old; 661 struct cred *new; 662 int retval; 663 664 new = prepare_creds(); 665 if (!new) 666 return -ENOMEM; 667 old = current_cred(); 668 669 retval = -EPERM; 670 if (capable(CAP_SETUID)) { 671 new->suid = new->uid = uid; 672 if (uid != old->uid) { 673 retval = set_user(new); 674 if (retval < 0) 675 goto error; 676 } 677 } else if (uid != old->uid && uid != new->suid) { 678 goto error; 679 } 680 681 new->fsuid = new->euid = uid; 682 683 retval = security_task_fix_setuid(new, old, LSM_SETID_ID); 684 if (retval < 0) 685 goto error; 686 687 return commit_creds(new); 688 689error: 690 abort_creds(new); 691 return retval; 692} 693 694 695/* 696 * This function implements a generic ability to update ruid, euid, 697 * and suid. This allows you to implement the 4.4 compatible seteuid(). 698 */ 699SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) 700{ 701 const struct cred *old; 702 struct cred *new; 703 int retval; 704 705 new = prepare_creds(); 706 if (!new) 707 return -ENOMEM; 708 709 old = current_cred(); 710 711 retval = -EPERM; 712 if (!capable(CAP_SETUID)) { 713 if (ruid != (uid_t) -1 && ruid != old->uid && 714 ruid != old->euid && ruid != old->suid) 715 goto error; 716 if (euid != (uid_t) -1 && euid != old->uid && 717 euid != old->euid && euid != old->suid) 718 goto error; 719 if (suid != (uid_t) -1 && suid != old->uid && 720 suid != old->euid && suid != old->suid) 721 goto error; 722 } 723 724 if (ruid != (uid_t) -1) { 725 new->uid = ruid; 726 if (ruid != old->uid) { 727 retval = set_user(new); 728 if (retval < 0) 729 goto error; 730 } 731 } 732 if (euid != (uid_t) -1) 733 new->euid = euid; 734 if (suid != (uid_t) -1) 735 new->suid = suid; 736 new->fsuid = new->euid; 737 738 retval = security_task_fix_setuid(new, old, LSM_SETID_RES); 739 if (retval < 0) 740 goto error; 741 742 return commit_creds(new); 743 744error: 745 abort_creds(new); 746 return retval; 747} 748 749SYSCALL_DEFINE3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid) 750{ 751 const struct cred *cred = current_cred(); 752 int retval; 753 754 if (!(retval = put_user(cred->uid, ruid)) && 755 !(retval = put_user(cred->euid, euid))) 756 retval = put_user(cred->suid, suid); 757 758 return retval; 759} 760 761/* 762 * Same as above, but for rgid, egid, sgid. 763 */ 764SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) 765{ 766 const struct cred *old; 767 struct cred *new; 768 int retval; 769 770 new = prepare_creds(); 771 if (!new) 772 return -ENOMEM; 773 old = current_cred(); 774 775 retval = -EPERM; 776 if (!capable(CAP_SETGID)) { 777 if (rgid != (gid_t) -1 && rgid != old->gid && 778 rgid != old->egid && rgid != old->sgid) 779 goto error; 780 if (egid != (gid_t) -1 && egid != old->gid && 781 egid != old->egid && egid != old->sgid) 782 goto error; 783 if (sgid != (gid_t) -1 && sgid != old->gid && 784 sgid != old->egid && sgid != old->sgid) 785 goto error; 786 } 787 788 if (rgid != (gid_t) -1) 789 new->gid = rgid; 790 if (egid != (gid_t) -1) 791 new->egid = egid; 792 if (sgid != (gid_t) -1) 793 new->sgid = sgid; 794 new->fsgid = new->egid; 795 796 return commit_creds(new); 797 798error: 799 abort_creds(new); 800 return retval; 801} 802 803SYSCALL_DEFINE3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid) 804{ 805 const struct cred *cred = current_cred(); 806 int retval; 807 808 if (!(retval = put_user(cred->gid, rgid)) && 809 !(retval = put_user(cred->egid, egid))) 810 retval = put_user(cred->sgid, sgid); 811 812 return retval; 813} 814 815 816/* 817 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This 818 * is used for "access()" and for the NFS daemon (letting nfsd stay at 819 * whatever uid it wants to). It normally shadows "euid", except when 820 * explicitly set by setfsuid() or for access.. 821 */ 822SYSCALL_DEFINE1(setfsuid, uid_t, uid) 823{ 824 const struct cred *old; 825 struct cred *new; 826 uid_t old_fsuid; 827 828 new = prepare_creds(); 829 if (!new) 830 return current_fsuid(); 831 old = current_cred(); 832 old_fsuid = old->fsuid; 833 834 if (uid == old->uid || uid == old->euid || 835 uid == old->suid || uid == old->fsuid || 836 capable(CAP_SETUID)) { 837 if (uid != old_fsuid) { 838 new->fsuid = uid; 839 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) 840 goto change_okay; 841 } 842 } 843 844 abort_creds(new); 845 return old_fsuid; 846 847change_okay: 848 commit_creds(new); 849 return old_fsuid; 850} 851 852/* 853 * Samma p�� svenska.. 854 */ 855SYSCALL_DEFINE1(setfsgid, gid_t, gid) 856{ 857 const struct cred *old; 858 struct cred *new; 859 gid_t old_fsgid; 860 861 new = prepare_creds(); 862 if (!new) 863 return current_fsgid(); 864 old = current_cred(); 865 old_fsgid = old->fsgid; 866 867 if (gid == old->gid || gid == old->egid || 868 gid == old->sgid || gid == old->fsgid || 869 capable(CAP_SETGID)) { 870 if (gid != old_fsgid) { 871 new->fsgid = gid; 872 goto change_okay; 873 } 874 } 875 876 abort_creds(new); 877 return old_fsgid; 878 879change_okay: 880 commit_creds(new); 881 return old_fsgid; 882} 883 884void do_sys_times(struct tms *tms) 885{ 886 cputime_t tgutime, tgstime, cutime, cstime; 887 888 spin_lock_irq(¤t->sighand->siglock); 889 thread_group_times(current, &tgutime, &tgstime); 890 cutime = current->signal->cutime; 891 cstime = current->signal->cstime; 892 spin_unlock_irq(¤t->sighand->siglock); 893 tms->tms_utime = cputime_to_clock_t(tgutime); 894 tms->tms_stime = cputime_to_clock_t(tgstime); 895 tms->tms_cutime = cputime_to_clock_t(cutime); 896 tms->tms_cstime = cputime_to_clock_t(cstime); 897} 898 899SYSCALL_DEFINE1(times, struct tms __user *, tbuf) 900{ 901 if (tbuf) { 902 struct tms tmp; 903 904 do_sys_times(&tmp); 905 if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) 906 return -EFAULT; 907 } 908 force_successful_syscall_return(); 909 return (long) jiffies_64_to_clock_t(get_jiffies_64()); 910} 911 912/* 913 * This needs some heavy checking ... 914 * I just haven't the stomach for it. I also don't fully 915 * understand sessions/pgrp etc. Let somebody who does explain it. 916 * 917 * OK, I think I have the protection semantics right.... this is really 918 * only important on a multi-user system anyway, to make sure one user 919 * can't send a signal to a process owned by another. -TYT, 12/12/91 920 * 921 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. 922 * LBT 04.03.94 923 */ 924SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) 925{ 926 struct task_struct *p; 927 struct task_struct *group_leader = current->group_leader; 928 struct pid *pgrp; 929 int err; 930 931 if (!pid) 932 pid = task_pid_vnr(group_leader); 933 if (!pgid) 934 pgid = pid; 935 if (pgid < 0) 936 return -EINVAL; 937 rcu_read_lock(); 938 939 /* From this point forward we keep holding onto the tasklist lock 940 * so that our parent does not change from under us. -DaveM 941 */ 942 write_lock_irq(&tasklist_lock); 943 944 err = -ESRCH; 945 p = find_task_by_vpid(pid); 946 if (!p) 947 goto out; 948 949 err = -EINVAL; 950 if (!thread_group_leader(p)) 951 goto out; 952 953 if (same_thread_group(p->real_parent, group_leader)) { 954 err = -EPERM; 955 if (task_session(p) != task_session(group_leader)) 956 goto out; 957 err = -EACCES; 958 if (p->did_exec) 959 goto out; 960 } else { 961 err = -ESRCH; 962 if (p != group_leader) 963 goto out; 964 } 965 966 err = -EPERM; 967 if (p->signal->leader) 968 goto out; 969 970 pgrp = task_pid(p); 971 if (pgid != pid) { 972 struct task_struct *g; 973 974 pgrp = find_vpid(pgid); 975 g = pid_task(pgrp, PIDTYPE_PGID); 976 if (!g || task_session(g) != task_session(group_leader)) 977 goto out; 978 } 979 980 err = security_task_setpgid(p, pgid); 981 if (err) 982 goto out; 983 984 if (task_pgrp(p) != pgrp) 985 change_pid(p, PIDTYPE_PGID, pgrp); 986 987 err = 0; 988out: 989 /* All paths lead to here, thus we are safe. -DaveM */ 990 write_unlock_irq(&tasklist_lock); 991 rcu_read_unlock(); 992 return err; 993} 994 995SYSCALL_DEFINE1(getpgid, pid_t, pid) 996{ 997 struct task_struct *p; 998 struct pid *grp; 999 int retval; 1000 1001 rcu_read_lock(); 1002 if (!pid) 1003 grp = task_pgrp(current); 1004 else { 1005 retval = -ESRCH; 1006 p = find_task_by_vpid(pid); 1007 if (!p) 1008 goto out; 1009 grp = task_pgrp(p); 1010 if (!grp) 1011 goto out; 1012 1013 retval = security_task_getpgid(p); 1014 if (retval) 1015 goto out; 1016 } 1017 retval = pid_vnr(grp); 1018out: 1019 rcu_read_unlock(); 1020 return retval; 1021} 1022 1023#ifdef __ARCH_WANT_SYS_GETPGRP 1024 1025SYSCALL_DEFINE0(getpgrp) 1026{ 1027 return sys_getpgid(0); 1028} 1029 1030#endif 1031 1032SYSCALL_DEFINE1(getsid, pid_t, pid) 1033{ 1034 struct task_struct *p; 1035 struct pid *sid; 1036 int retval; 1037 1038 rcu_read_lock(); 1039 if (!pid) 1040 sid = task_session(current); 1041 else { 1042 retval = -ESRCH; 1043 p = find_task_by_vpid(pid); 1044 if (!p) 1045 goto out; 1046 sid = task_session(p); 1047 if (!sid) 1048 goto out; 1049 1050 retval = security_task_getsid(p); 1051 if (retval) 1052 goto out; 1053 } 1054 retval = pid_vnr(sid); 1055out: 1056 rcu_read_unlock(); 1057 return retval; 1058} 1059 1060SYSCALL_DEFINE0(setsid) 1061{ 1062 struct task_struct *group_leader = current->group_leader; 1063 struct pid *sid = task_pid(group_leader); 1064 pid_t session = pid_vnr(sid); 1065 int err = -EPERM; 1066 1067 write_lock_irq(&tasklist_lock); 1068 /* Fail if I am already a session leader */ 1069 if (group_leader->signal->leader) 1070 goto out; 1071 1072 /* Fail if a process group id already exists that equals the 1073 * proposed session id. 1074 */ 1075 if (pid_task(sid, PIDTYPE_PGID)) 1076 goto out; 1077 1078 group_leader->signal->leader = 1; 1079 __set_special_pids(sid); 1080 1081 proc_clear_tty(group_leader); 1082 1083 err = session; 1084out: 1085 write_unlock_irq(&tasklist_lock); 1086 if (err > 0) 1087 proc_sid_connector(group_leader); 1088 return err; 1089} 1090 1091DECLARE_RWSEM(uts_sem); 1092 1093#ifdef COMPAT_UTS_MACHINE 1094#define override_architecture(name) \ 1095 (personality(current->personality) == PER_LINUX32 && \ 1096 copy_to_user(name->machine, COMPAT_UTS_MACHINE, \ 1097 sizeof(COMPAT_UTS_MACHINE))) 1098#else 1099#define override_architecture(name) 0 1100#endif 1101 1102SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) 1103{ 1104 int errno = 0; 1105 1106 down_read(&uts_sem); 1107 if (copy_to_user(name, utsname(), sizeof *name)) 1108 errno = -EFAULT; 1109 up_read(&uts_sem); 1110 1111 if (!errno && override_architecture(name)) 1112 errno = -EFAULT; 1113 return errno; 1114} 1115 1116#ifdef __ARCH_WANT_SYS_OLD_UNAME 1117/* 1118 * Old cruft 1119 */ 1120SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) 1121{ 1122 int error = 0; 1123 1124 if (!name) 1125 return -EFAULT; 1126 1127 down_read(&uts_sem); 1128 if (copy_to_user(name, utsname(), sizeof(*name))) 1129 error = -EFAULT; 1130 up_read(&uts_sem); 1131 1132 if (!error && override_architecture(name)) 1133 error = -EFAULT; 1134 return error; 1135} 1136 1137SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) 1138{ 1139 int error; 1140 1141 if (!name) 1142 return -EFAULT; 1143 if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) 1144 return -EFAULT; 1145 1146 down_read(&uts_sem); 1147 error = __copy_to_user(&name->sysname, &utsname()->sysname, 1148 __OLD_UTS_LEN); 1149 error |= __put_user(0, name->sysname + __OLD_UTS_LEN); 1150 error |= __copy_to_user(&name->nodename, &utsname()->nodename, 1151 __OLD_UTS_LEN); 1152 error |= __put_user(0, name->nodename + __OLD_UTS_LEN); 1153 error |= __copy_to_user(&name->release, &utsname()->release, 1154 __OLD_UTS_LEN); 1155 error |= __put_user(0, name->release + __OLD_UTS_LEN); 1156 error |= __copy_to_user(&name->version, &utsname()->version, 1157 __OLD_UTS_LEN); 1158 error |= __put_user(0, name->version + __OLD_UTS_LEN); 1159 error |= __copy_to_user(&name->machine, &utsname()->machine, 1160 __OLD_UTS_LEN); 1161 error |= __put_user(0, name->machine + __OLD_UTS_LEN); 1162 up_read(&uts_sem); 1163 1164 if (!error && override_architecture(name)) 1165 error = -EFAULT; 1166 return error ? -EFAULT : 0; 1167} 1168#endif 1169 1170SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) 1171{ 1172 int errno; 1173 char tmp[__NEW_UTS_LEN]; 1174 1175 if (!capable(CAP_SYS_ADMIN)) 1176 return -EPERM; 1177 if (len < 0 || len > __NEW_UTS_LEN) 1178 return -EINVAL; 1179 down_write(&uts_sem); 1180 errno = -EFAULT; 1181 if (!copy_from_user(tmp, name, len)) { 1182 struct new_utsname *u = utsname(); 1183 1184 memcpy(u->nodename, tmp, len); 1185 memset(u->nodename + len, 0, sizeof(u->nodename) - len); 1186 errno = 0; 1187 } 1188 up_write(&uts_sem); 1189 return errno; 1190} 1191 1192#ifdef __ARCH_WANT_SYS_GETHOSTNAME 1193 1194SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) 1195{ 1196 int i, errno; 1197 struct new_utsname *u; 1198 1199 if (len < 0) 1200 return -EINVAL; 1201 down_read(&uts_sem); 1202 u = utsname(); 1203 i = 1 + strlen(u->nodename); 1204 if (i > len) 1205 i = len; 1206 errno = 0; 1207 if (copy_to_user(name, u->nodename, i)) 1208 errno = -EFAULT; 1209 up_read(&uts_sem); 1210 return errno; 1211} 1212 1213#endif 1214 1215/* 1216 * Only setdomainname; getdomainname can be implemented by calling 1217 * uname() 1218 */ 1219SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) 1220{ 1221 int errno; 1222 char tmp[__NEW_UTS_LEN]; 1223 1224 if (!capable(CAP_SYS_ADMIN)) 1225 return -EPERM; 1226 if (len < 0 || len > __NEW_UTS_LEN) 1227 return -EINVAL; 1228 1229 down_write(&uts_sem); 1230 errno = -EFAULT; 1231 if (!copy_from_user(tmp, name, len)) { 1232 struct new_utsname *u = utsname(); 1233 1234 memcpy(u->domainname, tmp, len); 1235 memset(u->domainname + len, 0, sizeof(u->domainname) - len); 1236 errno = 0; 1237 } 1238 up_write(&uts_sem); 1239 return errno; 1240} 1241 1242SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1243{ 1244 struct rlimit value; 1245 int ret; 1246 1247 ret = do_prlimit(current, resource, NULL, &value); 1248 if (!ret) 1249 ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; 1250 1251 return ret; 1252} 1253 1254#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT 1255 1256/* 1257 * Back compatibility for getrlimit. Needed for some apps. 1258 */ 1259 1260SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, 1261 struct rlimit __user *, rlim) 1262{ 1263 struct rlimit x; 1264 if (resource >= RLIM_NLIMITS) 1265 return -EINVAL; 1266 1267 task_lock(current->group_leader); 1268 x = current->signal->rlim[resource]; 1269 task_unlock(current->group_leader); 1270 if (x.rlim_cur > 0x7FFFFFFF) 1271 x.rlim_cur = 0x7FFFFFFF; 1272 if (x.rlim_max > 0x7FFFFFFF) 1273 x.rlim_max = 0x7FFFFFFF; 1274 return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0; 1275} 1276 1277#endif 1278 1279static inline bool rlim64_is_infinity(__u64 rlim64) 1280{ 1281#if BITS_PER_LONG < 64 1282 return rlim64 >= ULONG_MAX; 1283#else 1284 return rlim64 == RLIM64_INFINITY; 1285#endif 1286} 1287 1288static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64) 1289{ 1290 if (rlim->rlim_cur == RLIM_INFINITY) 1291 rlim64->rlim_cur = RLIM64_INFINITY; 1292 else 1293 rlim64->rlim_cur = rlim->rlim_cur; 1294 if (rlim->rlim_max == RLIM_INFINITY) 1295 rlim64->rlim_max = RLIM64_INFINITY; 1296 else 1297 rlim64->rlim_max = rlim->rlim_max; 1298} 1299 1300static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim) 1301{ 1302 if (rlim64_is_infinity(rlim64->rlim_cur)) 1303 rlim->rlim_cur = RLIM_INFINITY; 1304 else 1305 rlim->rlim_cur = (unsigned long)rlim64->rlim_cur; 1306 if (rlim64_is_infinity(rlim64->rlim_max)) 1307 rlim->rlim_max = RLIM_INFINITY; 1308 else 1309 rlim->rlim_max = (unsigned long)rlim64->rlim_max; 1310} 1311 1312/* make sure you are allowed to change @tsk limits before calling this */ 1313int do_prlimit(struct task_struct *tsk, unsigned int resource, 1314 struct rlimit *new_rlim, struct rlimit *old_rlim) 1315{ 1316 struct rlimit *rlim; 1317 int retval = 0; 1318 1319 if (resource >= RLIM_NLIMITS) 1320 return -EINVAL; 1321 if (new_rlim) { 1322 if (new_rlim->rlim_cur > new_rlim->rlim_max) 1323 return -EINVAL; 1324 if (resource == RLIMIT_NOFILE && 1325 new_rlim->rlim_max > sysctl_nr_open) 1326 return -EPERM; 1327 } 1328 1329 /* protect tsk->signal and tsk->sighand from disappearing */ 1330 read_lock(&tasklist_lock); 1331 if (!tsk->sighand) { 1332 retval = -ESRCH; 1333 goto out; 1334 } 1335 1336 rlim = tsk->signal->rlim + resource; 1337 task_lock(tsk->group_leader); 1338 if (new_rlim) { 1339 if (new_rlim->rlim_max > rlim->rlim_max && 1340 !capable(CAP_SYS_RESOURCE)) 1341 retval = -EPERM; 1342 if (!retval) 1343 retval = security_task_setrlimit(tsk->group_leader, 1344 resource, new_rlim); 1345 if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) { 1346 /* 1347 * The caller is asking for an immediate RLIMIT_CPU 1348 * expiry. But we use the zero value to mean "it was 1349 * never set". So let's cheat and make it one second 1350 * instead 1351 */ 1352 new_rlim->rlim_cur = 1; 1353 } 1354 } 1355 if (!retval) { 1356 if (old_rlim) 1357 *old_rlim = *rlim; 1358 if (new_rlim) 1359 *rlim = *new_rlim; 1360 } 1361 task_unlock(tsk->group_leader); 1362 1363 /* 1364 * RLIMIT_CPU handling. Note that the kernel fails to return an error 1365 * code if it rejected the user's attempt to set RLIMIT_CPU. This is a 1366 * very long-standing error, and fixing it now risks breakage of 1367 * applications, so we live with it 1368 */ 1369 if (!retval && new_rlim && resource == RLIMIT_CPU && 1370 new_rlim->rlim_cur != RLIM_INFINITY) 1371 update_rlimit_cpu(tsk, new_rlim->rlim_cur); 1372out: 1373 read_unlock(&tasklist_lock); 1374 return retval; 1375} 1376 1377/* rcu lock must be held */ 1378static int check_prlimit_permission(struct task_struct *task) 1379{ 1380 const struct cred *cred = current_cred(), *tcred; 1381 1382 tcred = __task_cred(task); 1383 if (current != task && 1384 (cred->uid != tcred->euid || 1385 cred->uid != tcred->suid || 1386 cred->uid != tcred->uid || 1387 cred->gid != tcred->egid || 1388 cred->gid != tcred->sgid || 1389 cred->gid != tcred->gid) && 1390 !capable(CAP_SYS_RESOURCE)) { 1391 return -EPERM; 1392 } 1393 1394 return 0; 1395} 1396 1397SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, 1398 const struct rlimit64 __user *, new_rlim, 1399 struct rlimit64 __user *, old_rlim) 1400{ 1401 struct rlimit64 old64, new64; 1402 struct rlimit old, new; 1403 struct task_struct *tsk; 1404 int ret; 1405 1406 if (new_rlim) { 1407 if (copy_from_user(&new64, new_rlim, sizeof(new64))) 1408 return -EFAULT; 1409 rlim64_to_rlim(&new64, &new); 1410 } 1411 1412 rcu_read_lock(); 1413 tsk = pid ? find_task_by_vpid(pid) : current; 1414 if (!tsk) { 1415 rcu_read_unlock(); 1416 return -ESRCH; 1417 } 1418 ret = check_prlimit_permission(tsk); 1419 if (ret) { 1420 rcu_read_unlock(); 1421 return ret; 1422 } 1423 get_task_struct(tsk); 1424 rcu_read_unlock(); 1425 1426 ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, 1427 old_rlim ? &old : NULL); 1428 1429 if (!ret && old_rlim) { 1430 rlim_to_rlim64(&old, &old64); 1431 if (copy_to_user(old_rlim, &old64, sizeof(old64))) 1432 ret = -EFAULT; 1433 } 1434 1435 put_task_struct(tsk); 1436 return ret; 1437} 1438 1439SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1440{ 1441 struct rlimit new_rlim; 1442 1443 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1444 return -EFAULT; 1445 return do_prlimit(current, resource, &new_rlim, NULL); 1446} 1447 1448/* 1449 * It would make sense to put struct rusage in the task_struct, 1450 * except that would make the task_struct be *really big*. After 1451 * task_struct gets moved into malloc'ed memory, it would 1452 * make sense to do this. It will make moving the rest of the information 1453 * a lot simpler! (Which we're not doing right now because we're not 1454 * measuring them yet). 1455 * 1456 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have 1457 * races with threads incrementing their own counters. But since word 1458 * reads are atomic, we either get new values or old values and we don't 1459 * care which for the sums. We always take the siglock to protect reading 1460 * the c* fields from p->signal from races with exit.c updating those 1461 * fields when reaping, so a sample either gets all the additions of a 1462 * given child after it's reaped, or none so this sample is before reaping. 1463 * 1464 * Locking: 1465 * We need to take the siglock for CHILDEREN, SELF and BOTH 1466 * for the cases current multithreaded, non-current single threaded 1467 * non-current multithreaded. Thread traversal is now safe with 1468 * the siglock held. 1469 * Strictly speaking, we donot need to take the siglock if we are current and 1470 * single threaded, as no one else can take our signal_struct away, no one 1471 * else can reap the children to update signal->c* counters, and no one else 1472 * can race with the signal-> fields. If we do not take any lock, the 1473 * signal-> fields could be read out of order while another thread was just 1474 * exiting. So we should place a read memory barrier when we avoid the lock. 1475 * On the writer side, write memory barrier is implied in __exit_signal 1476 * as __exit_signal releases the siglock spinlock after updating the signal-> 1477 * fields. But we don't do this yet to keep things simple. 1478 * 1479 */ 1480 1481static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) 1482{ 1483 r->ru_nvcsw += t->nvcsw; 1484 r->ru_nivcsw += t->nivcsw; 1485 r->ru_minflt += t->min_flt; 1486 r->ru_majflt += t->maj_flt; 1487 r->ru_inblock += task_io_get_inblock(t); 1488 r->ru_oublock += task_io_get_oublock(t); 1489} 1490 1491static void k_getrusage(struct task_struct *p, int who, struct rusage *r) 1492{ 1493 struct task_struct *t; 1494 unsigned long flags; 1495 cputime_t tgutime, tgstime, utime, stime; 1496 unsigned long maxrss = 0; 1497 1498 memset((char *) r, 0, sizeof *r); 1499 utime = stime = cputime_zero; 1500 1501 if (who == RUSAGE_THREAD) { 1502 task_times(current, &utime, &stime); 1503 accumulate_thread_rusage(p, r); 1504 maxrss = p->signal->maxrss; 1505 goto out; 1506 } 1507 1508 if (!lock_task_sighand(p, &flags)) 1509 return; 1510 1511 switch (who) { 1512 case RUSAGE_BOTH: 1513 case RUSAGE_CHILDREN: 1514 utime = p->signal->cutime; 1515 stime = p->signal->cstime; 1516 r->ru_nvcsw = p->signal->cnvcsw; 1517 r->ru_nivcsw = p->signal->cnivcsw; 1518 r->ru_minflt = p->signal->cmin_flt; 1519 r->ru_majflt = p->signal->cmaj_flt; 1520 r->ru_inblock = p->signal->cinblock; 1521 r->ru_oublock = p->signal->coublock; 1522 maxrss = p->signal->cmaxrss; 1523 1524 if (who == RUSAGE_CHILDREN) 1525 break; 1526 1527 case RUSAGE_SELF: 1528 thread_group_times(p, &tgutime, &tgstime); 1529 utime = cputime_add(utime, tgutime); 1530 stime = cputime_add(stime, tgstime); 1531 r->ru_nvcsw += p->signal->nvcsw; 1532 r->ru_nivcsw += p->signal->nivcsw; 1533 r->ru_minflt += p->signal->min_flt; 1534 r->ru_majflt += p->signal->maj_flt; 1535 r->ru_inblock += p->signal->inblock; 1536 r->ru_oublock += p->signal->oublock; 1537 if (maxrss < p->signal->maxrss) 1538 maxrss = p->signal->maxrss; 1539 t = p; 1540 do { 1541 accumulate_thread_rusage(t, r); 1542 t = next_thread(t); 1543 } while (t != p); 1544 break; 1545 1546 default: 1547 BUG(); 1548 } 1549 unlock_task_sighand(p, &flags); 1550 1551out: 1552 cputime_to_timeval(utime, &r->ru_utime); 1553 cputime_to_timeval(stime, &r->ru_stime); 1554 1555 if (who != RUSAGE_CHILDREN) { 1556 struct mm_struct *mm = get_task_mm(p); 1557 if (mm) { 1558 setmax_mm_hiwater_rss(&maxrss, mm); 1559 mmput(mm); 1560 } 1561 } 1562 r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ 1563} 1564 1565int getrusage(struct task_struct *p, int who, struct rusage __user *ru) 1566{ 1567 struct rusage r; 1568 k_getrusage(p, who, &r); 1569 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; 1570} 1571 1572SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) 1573{ 1574 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && 1575 who != RUSAGE_THREAD) 1576 return -EINVAL; 1577 return getrusage(current, who, ru); 1578} 1579 1580SYSCALL_DEFINE1(umask, int, mask) 1581{ 1582 mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); 1583 return mask; 1584} 1585 1586SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, 1587 unsigned long, arg4, unsigned long, arg5) 1588{ 1589 struct task_struct *me = current; 1590 unsigned char comm[sizeof(me->comm)]; 1591 long error; 1592 1593 error = security_task_prctl(option, arg2, arg3, arg4, arg5); 1594 if (error != -ENOSYS) 1595 return error; 1596 1597 error = 0; 1598 switch (option) { 1599 case PR_SET_PDEATHSIG: 1600 if (!valid_signal(arg2)) { 1601 error = -EINVAL; 1602 break; 1603 } 1604 me->pdeath_signal = arg2; 1605 error = 0; 1606 break; 1607 case PR_GET_PDEATHSIG: 1608 error = put_user(me->pdeath_signal, (int __user *)arg2); 1609 break; 1610 case PR_GET_DUMPABLE: 1611 error = get_dumpable(me->mm); 1612 break; 1613 case PR_SET_DUMPABLE: 1614 if (arg2 < 0 || arg2 > 1) { 1615 error = -EINVAL; 1616 break; 1617 } 1618 set_dumpable(me->mm, arg2); 1619 error = 0; 1620 break; 1621 1622 case PR_SET_UNALIGN: 1623 error = SET_UNALIGN_CTL(me, arg2); 1624 break; 1625 case PR_GET_UNALIGN: 1626 error = GET_UNALIGN_CTL(me, arg2); 1627 break; 1628 case PR_SET_FPEMU: 1629 error = SET_FPEMU_CTL(me, arg2); 1630 break; 1631 case PR_GET_FPEMU: 1632 error = GET_FPEMU_CTL(me, arg2); 1633 break; 1634 case PR_SET_FPEXC: 1635 error = SET_FPEXC_CTL(me, arg2); 1636 break; 1637 case PR_GET_FPEXC: 1638 error = GET_FPEXC_CTL(me, arg2); 1639 break; 1640 case PR_GET_TIMING: 1641 error = PR_TIMING_STATISTICAL; 1642 break; 1643 case PR_SET_TIMING: 1644 if (arg2 != PR_TIMING_STATISTICAL) 1645 error = -EINVAL; 1646 else 1647 error = 0; 1648 break; 1649 1650 case PR_SET_NAME: 1651 comm[sizeof(me->comm)-1] = 0; 1652 if (strncpy_from_user(comm, (char __user *)arg2, 1653 sizeof(me->comm) - 1) < 0) 1654 return -EFAULT; 1655 set_task_comm(me, comm); 1656 return 0; 1657 case PR_GET_NAME: 1658 get_task_comm(comm, me); 1659 if (copy_to_user((char __user *)arg2, comm, 1660 sizeof(comm))) 1661 return -EFAULT; 1662 return 0; 1663 case PR_GET_ENDIAN: 1664 error = GET_ENDIAN(me, arg2); 1665 break; 1666 case PR_SET_ENDIAN: 1667 error = SET_ENDIAN(me, arg2); 1668 break; 1669 1670 case PR_GET_SECCOMP: 1671 error = prctl_get_seccomp(); 1672 break; 1673 case PR_SET_SECCOMP: 1674 error = prctl_set_seccomp(arg2); 1675 break; 1676 case PR_GET_TSC: 1677 error = GET_TSC_CTL(arg2); 1678 break; 1679 case PR_SET_TSC: 1680 error = SET_TSC_CTL(arg2); 1681 break; 1682 case PR_TASK_PERF_EVENTS_DISABLE: 1683 error = perf_event_task_disable(); 1684 break; 1685 case PR_TASK_PERF_EVENTS_ENABLE: 1686 error = perf_event_task_enable(); 1687 break; 1688 case PR_GET_TIMERSLACK: 1689 error = current->timer_slack_ns; 1690 break; 1691 case PR_SET_TIMERSLACK: 1692 if (arg2 <= 0) 1693 current->timer_slack_ns = 1694 current->default_timer_slack_ns; 1695 else 1696 current->timer_slack_ns = arg2; 1697 error = 0; 1698 break; 1699 case PR_MCE_KILL: 1700 if (arg4 | arg5) 1701 return -EINVAL; 1702 switch (arg2) { 1703 case PR_MCE_KILL_CLEAR: 1704 if (arg3 != 0) 1705 return -EINVAL; 1706 current->flags &= ~PF_MCE_PROCESS; 1707 break; 1708 case PR_MCE_KILL_SET: 1709 current->flags |= PF_MCE_PROCESS; 1710 if (arg3 == PR_MCE_KILL_EARLY) 1711 current->flags |= PF_MCE_EARLY; 1712 else if (arg3 == PR_MCE_KILL_LATE) 1713 current->flags &= ~PF_MCE_EARLY; 1714 else if (arg3 == PR_MCE_KILL_DEFAULT) 1715 current->flags &= 1716 ~(PF_MCE_EARLY|PF_MCE_PROCESS); 1717 else 1718 return -EINVAL; 1719 break; 1720 default: 1721 return -EINVAL; 1722 } 1723 error = 0; 1724 break; 1725 case PR_MCE_KILL_GET: 1726 if (arg2 | arg3 | arg4 | arg5) 1727 return -EINVAL; 1728 if (current->flags & PF_MCE_PROCESS) 1729 error = (current->flags & PF_MCE_EARLY) ? 1730 PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; 1731 else 1732 error = PR_MCE_KILL_DEFAULT; 1733 break; 1734 default: 1735 error = -EINVAL; 1736 break; 1737 } 1738 return error; 1739} 1740 1741SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, 1742 struct getcpu_cache __user *, unused) 1743{ 1744 int err = 0; 1745 int cpu = raw_smp_processor_id(); 1746 if (cpup) 1747 err |= put_user(cpu, cpup); 1748 if (nodep) 1749 err |= put_user(cpu_to_node(cpu), nodep); 1750 return err ? -EFAULT : 0; 1751} 1752 1753char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; 1754 1755static void argv_cleanup(struct subprocess_info *info) 1756{ 1757 argv_free(info->argv); 1758} 1759 1760/** 1761 * orderly_poweroff - Trigger an orderly system poweroff 1762 * @force: force poweroff if command execution fails 1763 * 1764 * This may be called from any context to trigger a system shutdown. 1765 * If the orderly shutdown fails, it will force an immediate shutdown. 1766 */ 1767int orderly_poweroff(bool force) 1768{ 1769 int argc; 1770 char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); 1771 static char *envp[] = { 1772 "HOME=/", 1773 "PATH=/sbin:/bin:/usr/sbin:/usr/bin", 1774 NULL 1775 }; 1776 int ret = -ENOMEM; 1777 struct subprocess_info *info; 1778 1779 if (argv == NULL) { 1780 printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", 1781 __func__, poweroff_cmd); 1782 goto out; 1783 } 1784 1785 info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC); 1786 if (info == NULL) { 1787 argv_free(argv); 1788 goto out; 1789 } 1790 1791 call_usermodehelper_setfns(info, NULL, argv_cleanup, NULL); 1792 1793 ret = call_usermodehelper_exec(info, UMH_NO_WAIT); 1794 1795 out: 1796 if (ret && force) { 1797 printk(KERN_WARNING "Failed to start orderly shutdown: " 1798 "forcing the issue\n"); 1799 1800 /* I guess this should try to kick off some daemon to 1801 sync and poweroff asap. Or not even bother syncing 1802 if we're doing an emergency shutdown? */ 1803 emergency_sync(); 1804 kernel_power_off(); 1805 } 1806 1807 return ret; 1808} 1809EXPORT_SYMBOL_GPL(orderly_poweroff); 1810