1/*
2 *  linux/kernel/sys.c
3 *
4 *  Copyright (C) 1991, 1992  Linus Torvalds
5 */
6
7#include <linux/module.h>
8#include <linux/mm.h>
9#include <linux/utsname.h>
10#include <linux/mman.h>
11#include <linux/notifier.h>
12#include <linux/reboot.h>
13#include <linux/prctl.h>
14#include <linux/highuid.h>
15#include <linux/fs.h>
16#include <linux/perf_event.h>
17#include <linux/resource.h>
18#include <linux/kernel.h>
19#include <linux/kexec.h>
20#include <linux/workqueue.h>
21#include <linux/capability.h>
22#include <linux/device.h>
23#include <linux/key.h>
24#include <linux/times.h>
25#include <linux/posix-timers.h>
26#include <linux/security.h>
27#include <linux/dcookies.h>
28#include <linux/suspend.h>
29#include <linux/tty.h>
30#include <linux/signal.h>
31#include <linux/cn_proc.h>
32#include <linux/getcpu.h>
33#include <linux/task_io_accounting_ops.h>
34#include <linux/seccomp.h>
35#include <linux/cpu.h>
36#include <linux/personality.h>
37#include <linux/ptrace.h>
38#include <linux/fs_struct.h>
39#include <linux/gfp.h>
40
41#include <linux/compat.h>
42#include <linux/syscalls.h>
43#include <linux/kprobes.h>
44#include <linux/user_namespace.h>
45
46#include <asm/uaccess.h>
47#include <asm/io.h>
48#include <asm/unistd.h>
49
50#ifndef SET_UNALIGN_CTL
51# define SET_UNALIGN_CTL(a,b)	(-EINVAL)
52#endif
53#ifndef GET_UNALIGN_CTL
54# define GET_UNALIGN_CTL(a,b)	(-EINVAL)
55#endif
56#ifndef SET_FPEMU_CTL
57# define SET_FPEMU_CTL(a,b)	(-EINVAL)
58#endif
59#ifndef GET_FPEMU_CTL
60# define GET_FPEMU_CTL(a,b)	(-EINVAL)
61#endif
62#ifndef SET_FPEXC_CTL
63# define SET_FPEXC_CTL(a,b)	(-EINVAL)
64#endif
65#ifndef GET_FPEXC_CTL
66# define GET_FPEXC_CTL(a,b)	(-EINVAL)
67#endif
68#ifndef GET_ENDIAN
69# define GET_ENDIAN(a,b)	(-EINVAL)
70#endif
71#ifndef SET_ENDIAN
72# define SET_ENDIAN(a,b)	(-EINVAL)
73#endif
74#ifndef GET_TSC_CTL
75# define GET_TSC_CTL(a)		(-EINVAL)
76#endif
77#ifndef SET_TSC_CTL
78# define SET_TSC_CTL(a)		(-EINVAL)
79#endif
80
81/*
82 * this is where the system-wide overflow UID and GID are defined, for
83 * architectures that now have 32-bit UID/GID but didn't in the past
84 */
85
86int overflowuid = DEFAULT_OVERFLOWUID;
87int overflowgid = DEFAULT_OVERFLOWGID;
88
89#ifdef CONFIG_UID16
90EXPORT_SYMBOL(overflowuid);
91EXPORT_SYMBOL(overflowgid);
92#endif
93
94/*
95 * the same as above, but for filesystems which can only store a 16-bit
96 * UID and GID. as such, this is needed on all architectures
97 */
98
99int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
100int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
101
102EXPORT_SYMBOL(fs_overflowuid);
103EXPORT_SYMBOL(fs_overflowgid);
104
105/*
106 * this indicates whether you can reboot with ctrl-alt-del: the default is yes
107 */
108
109int C_A_D = 1;
110struct pid *cad_pid;
111EXPORT_SYMBOL(cad_pid);
112
113/*
114 * If set, this is used for preparing the system to power off.
115 */
116
117void (*pm_power_off_prepare)(void);
118
119/*
120 * set the priority of a task
121 * - the caller must hold the RCU read lock
122 */
123static int set_one_prio(struct task_struct *p, int niceval, int error)
124{
125	const struct cred *cred = current_cred(), *pcred = __task_cred(p);
126	int no_nice;
127
128	if (pcred->uid  != cred->euid &&
129	    pcred->euid != cred->euid && !capable(CAP_SYS_NICE)) {
130		error = -EPERM;
131		goto out;
132	}
133	if (niceval < task_nice(p) && !can_nice(p, niceval)) {
134		error = -EACCES;
135		goto out;
136	}
137	no_nice = security_task_setnice(p, niceval);
138	if (no_nice) {
139		error = no_nice;
140		goto out;
141	}
142	if (error == -ESRCH)
143		error = 0;
144	set_user_nice(p, niceval);
145out:
146	return error;
147}
148
149SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
150{
151	struct task_struct *g, *p;
152	struct user_struct *user;
153	const struct cred *cred = current_cred();
154	int error = -EINVAL;
155	struct pid *pgrp;
156
157	if (which > PRIO_USER || which < PRIO_PROCESS)
158		goto out;
159
160	/* normalize: avoid signed division (rounding problems) */
161	error = -ESRCH;
162	if (niceval < -20)
163		niceval = -20;
164	if (niceval > 19)
165		niceval = 19;
166
167	rcu_read_lock();
168	read_lock(&tasklist_lock);
169	switch (which) {
170		case PRIO_PROCESS:
171			if (who)
172				p = find_task_by_vpid(who);
173			else
174				p = current;
175			if (p)
176				error = set_one_prio(p, niceval, error);
177			break;
178		case PRIO_PGRP:
179			if (who)
180				pgrp = find_vpid(who);
181			else
182				pgrp = task_pgrp(current);
183			do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
184				error = set_one_prio(p, niceval, error);
185			} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
186			break;
187		case PRIO_USER:
188			user = (struct user_struct *) cred->user;
189			if (!who)
190				who = cred->uid;
191			else if ((who != cred->uid) &&
192				 !(user = find_user(who)))
193				goto out_unlock;	/* No processes for this user */
194
195			do_each_thread(g, p) {
196				if (__task_cred(p)->uid == who)
197					error = set_one_prio(p, niceval, error);
198			} while_each_thread(g, p);
199			if (who != cred->uid)
200				free_uid(user);		/* For find_user() */
201			break;
202	}
203out_unlock:
204	read_unlock(&tasklist_lock);
205	rcu_read_unlock();
206out:
207	return error;
208}
209
210/*
211 * Ugh. To avoid negative return values, "getpriority()" will
212 * not return the normal nice-value, but a negated value that
213 * has been offset by 20 (ie it returns 40..1 instead of -20..19)
214 * to stay compatible.
215 */
216SYSCALL_DEFINE2(getpriority, int, which, int, who)
217{
218	struct task_struct *g, *p;
219	struct user_struct *user;
220	const struct cred *cred = current_cred();
221	long niceval, retval = -ESRCH;
222	struct pid *pgrp;
223
224	if (which > PRIO_USER || which < PRIO_PROCESS)
225		return -EINVAL;
226
227	rcu_read_lock();
228	read_lock(&tasklist_lock);
229	switch (which) {
230		case PRIO_PROCESS:
231			if (who)
232				p = find_task_by_vpid(who);
233			else
234				p = current;
235			if (p) {
236				niceval = 20 - task_nice(p);
237				if (niceval > retval)
238					retval = niceval;
239			}
240			break;
241		case PRIO_PGRP:
242			if (who)
243				pgrp = find_vpid(who);
244			else
245				pgrp = task_pgrp(current);
246			do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
247				niceval = 20 - task_nice(p);
248				if (niceval > retval)
249					retval = niceval;
250			} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
251			break;
252		case PRIO_USER:
253			user = (struct user_struct *) cred->user;
254			if (!who)
255				who = cred->uid;
256			else if ((who != cred->uid) &&
257				 !(user = find_user(who)))
258				goto out_unlock;	/* No processes for this user */
259
260			do_each_thread(g, p) {
261				if (__task_cred(p)->uid == who) {
262					niceval = 20 - task_nice(p);
263					if (niceval > retval)
264						retval = niceval;
265				}
266			} while_each_thread(g, p);
267			if (who != cred->uid)
268				free_uid(user);		/* for find_user() */
269			break;
270	}
271out_unlock:
272	read_unlock(&tasklist_lock);
273	rcu_read_unlock();
274
275	return retval;
276}
277
278/**
279 *	emergency_restart - reboot the system
280 *
281 *	Without shutting down any hardware or taking any locks
282 *	reboot the system.  This is called when we know we are in
283 *	trouble so this is our best effort to reboot.  This is
284 *	safe to call in interrupt context.
285 */
286void emergency_restart(void)
287{
288	machine_emergency_restart();
289}
290EXPORT_SYMBOL_GPL(emergency_restart);
291
292void kernel_restart_prepare(char *cmd)
293{
294	blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
295	system_state = SYSTEM_RESTART;
296	device_shutdown();
297	sysdev_shutdown();
298}
299
300/**
301 *	kernel_restart - reboot the system
302 *	@cmd: pointer to buffer containing command to execute for restart
303 *		or %NULL
304 *
305 *	Shutdown everything and perform a clean reboot.
306 *	This is not safe to call in interrupt context.
307 */
308void kernel_restart(char *cmd)
309{
310#ifdef CONFIG_DUMP_PREV_OOPS_MSG
311	enable_oopsbuf(1);
312#endif
313	kernel_restart_prepare(cmd);
314	if (!cmd)
315		printk(KERN_EMERG "Restarting system.\n");
316	else
317		printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
318	machine_restart(cmd);
319}
320EXPORT_SYMBOL_GPL(kernel_restart);
321
322static void kernel_shutdown_prepare(enum system_states state)
323{
324	blocking_notifier_call_chain(&reboot_notifier_list,
325		(state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
326	system_state = state;
327	device_shutdown();
328}
329/**
330 *	kernel_halt - halt the system
331 *
332 *	Shutdown everything and perform a clean system halt.
333 */
334void kernel_halt(void)
335{
336	kernel_shutdown_prepare(SYSTEM_HALT);
337	sysdev_shutdown();
338	printk(KERN_EMERG "System halted.\n");
339	machine_halt();
340}
341
342EXPORT_SYMBOL_GPL(kernel_halt);
343
344/**
345 *	kernel_power_off - power_off the system
346 *
347 *	Shutdown everything and perform a clean system power_off.
348 */
349void kernel_power_off(void)
350{
351	kernel_shutdown_prepare(SYSTEM_POWER_OFF);
352	if (pm_power_off_prepare)
353		pm_power_off_prepare();
354	disable_nonboot_cpus();
355	sysdev_shutdown();
356	printk(KERN_EMERG "Power down.\n");
357	machine_power_off();
358}
359EXPORT_SYMBOL_GPL(kernel_power_off);
360
361static DEFINE_MUTEX(reboot_mutex);
362
363/*
364 * Reboot system call: for obvious reasons only root may call it,
365 * and even root needs to set up some magic numbers in the registers
366 * so that some mistake won't make this reboot the whole machine.
367 * You can also set the meaning of the ctrl-alt-del-key here.
368 *
369 * reboot doesn't sync: do that yourself before calling this.
370 */
371SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
372		void __user *, arg)
373{
374	char buffer[256];
375	int ret = 0;
376
377	/* We only trust the superuser with rebooting the system. */
378	if (!capable(CAP_SYS_BOOT))
379		return -EPERM;
380
381	/* For safety, we require "magic" arguments. */
382	if (magic1 != LINUX_REBOOT_MAGIC1 ||
383	    (magic2 != LINUX_REBOOT_MAGIC2 &&
384	                magic2 != LINUX_REBOOT_MAGIC2A &&
385			magic2 != LINUX_REBOOT_MAGIC2B &&
386	                magic2 != LINUX_REBOOT_MAGIC2C))
387		return -EINVAL;
388
389	/* Instead of trying to make the power_off code look like
390	 * halt when pm_power_off is not set do it the easy way.
391	 */
392	if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
393		cmd = LINUX_REBOOT_CMD_HALT;
394
395	mutex_lock(&reboot_mutex);
396	switch (cmd) {
397	case LINUX_REBOOT_CMD_RESTART:
398		kernel_restart(NULL);
399		break;
400
401	case LINUX_REBOOT_CMD_CAD_ON:
402		C_A_D = 1;
403		break;
404
405	case LINUX_REBOOT_CMD_CAD_OFF:
406		C_A_D = 0;
407		break;
408
409	case LINUX_REBOOT_CMD_HALT:
410		kernel_halt();
411		do_exit(0);
412		panic("cannot halt");
413
414	case LINUX_REBOOT_CMD_POWER_OFF:
415		kernel_power_off();
416		do_exit(0);
417		break;
418
419	case LINUX_REBOOT_CMD_RESTART2:
420		if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) {
421			ret = -EFAULT;
422			break;
423		}
424		buffer[sizeof(buffer) - 1] = '\0';
425
426		kernel_restart(buffer);
427		break;
428
429#ifdef CONFIG_KEXEC
430	case LINUX_REBOOT_CMD_KEXEC:
431		ret = kernel_kexec();
432		break;
433#endif
434
435#ifdef CONFIG_HIBERNATION
436	case LINUX_REBOOT_CMD_SW_SUSPEND:
437		ret = hibernate();
438		break;
439#endif
440
441	default:
442		ret = -EINVAL;
443		break;
444	}
445	mutex_unlock(&reboot_mutex);
446	return ret;
447}
448
449static void deferred_cad(struct work_struct *dummy)
450{
451	kernel_restart(NULL);
452}
453
454/*
455 * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
456 * As it's called within an interrupt, it may NOT sync: the only choice
457 * is whether to reboot at once, or just ignore the ctrl-alt-del.
458 */
459void ctrl_alt_del(void)
460{
461	static DECLARE_WORK(cad_work, deferred_cad);
462
463	if (C_A_D)
464		schedule_work(&cad_work);
465	else
466		kill_cad_pid(SIGINT, 1);
467}
468
469/*
470 * Unprivileged users may change the real gid to the effective gid
471 * or vice versa.  (BSD-style)
472 *
473 * If you set the real gid at all, or set the effective gid to a value not
474 * equal to the real gid, then the saved gid is set to the new effective gid.
475 *
476 * This makes it possible for a setgid program to completely drop its
477 * privileges, which is often a useful assertion to make when you are doing
478 * a security audit over a program.
479 *
480 * The general idea is that a program which uses just setregid() will be
481 * 100% compatible with BSD.  A program which uses just setgid() will be
482 * 100% compatible with POSIX with saved IDs.
483 *
484 * SMP: There are not races, the GIDs are checked only by filesystem
485 *      operations (as far as semantic preservation is concerned).
486 */
487SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
488{
489	const struct cred *old;
490	struct cred *new;
491	int retval;
492
493	new = prepare_creds();
494	if (!new)
495		return -ENOMEM;
496	old = current_cred();
497
498	retval = -EPERM;
499	if (rgid != (gid_t) -1) {
500		if (old->gid == rgid ||
501		    old->egid == rgid ||
502		    capable(CAP_SETGID))
503			new->gid = rgid;
504		else
505			goto error;
506	}
507	if (egid != (gid_t) -1) {
508		if (old->gid == egid ||
509		    old->egid == egid ||
510		    old->sgid == egid ||
511		    capable(CAP_SETGID))
512			new->egid = egid;
513		else
514			goto error;
515	}
516
517	if (rgid != (gid_t) -1 ||
518	    (egid != (gid_t) -1 && egid != old->gid))
519		new->sgid = new->egid;
520	new->fsgid = new->egid;
521
522	return commit_creds(new);
523
524error:
525	abort_creds(new);
526	return retval;
527}
528
529/*
530 * setgid() is implemented like SysV w/ SAVED_IDS
531 *
532 * SMP: Same implicit races as above.
533 */
534SYSCALL_DEFINE1(setgid, gid_t, gid)
535{
536	const struct cred *old;
537	struct cred *new;
538	int retval;
539
540	new = prepare_creds();
541	if (!new)
542		return -ENOMEM;
543	old = current_cred();
544
545	retval = -EPERM;
546	if (capable(CAP_SETGID))
547		new->gid = new->egid = new->sgid = new->fsgid = gid;
548	else if (gid == old->gid || gid == old->sgid)
549		new->egid = new->fsgid = gid;
550	else
551		goto error;
552
553	return commit_creds(new);
554
555error:
556	abort_creds(new);
557	return retval;
558}
559
560/*
561 * change the user struct in a credentials set to match the new UID
562 */
563static int set_user(struct cred *new)
564{
565	struct user_struct *new_user;
566
567	new_user = alloc_uid(current_user_ns(), new->uid);
568	if (!new_user)
569		return -EAGAIN;
570
571	if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
572			new_user != INIT_USER) {
573		free_uid(new_user);
574		return -EAGAIN;
575	}
576
577	free_uid(new->user);
578	new->user = new_user;
579	return 0;
580}
581
582/*
583 * Unprivileged users may change the real uid to the effective uid
584 * or vice versa.  (BSD-style)
585 *
586 * If you set the real uid at all, or set the effective uid to a value not
587 * equal to the real uid, then the saved uid is set to the new effective uid.
588 *
589 * This makes it possible for a setuid program to completely drop its
590 * privileges, which is often a useful assertion to make when you are doing
591 * a security audit over a program.
592 *
593 * The general idea is that a program which uses just setreuid() will be
594 * 100% compatible with BSD.  A program which uses just setuid() will be
595 * 100% compatible with POSIX with saved IDs.
596 */
597SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
598{
599	const struct cred *old;
600	struct cred *new;
601	int retval;
602
603	new = prepare_creds();
604	if (!new)
605		return -ENOMEM;
606	old = current_cred();
607
608	retval = -EPERM;
609	if (ruid != (uid_t) -1) {
610		new->uid = ruid;
611		if (old->uid != ruid &&
612		    old->euid != ruid &&
613		    !capable(CAP_SETUID))
614			goto error;
615	}
616
617	if (euid != (uid_t) -1) {
618		new->euid = euid;
619		if (old->uid != euid &&
620		    old->euid != euid &&
621		    old->suid != euid &&
622		    !capable(CAP_SETUID))
623			goto error;
624	}
625
626	if (new->uid != old->uid) {
627		retval = set_user(new);
628		if (retval < 0)
629			goto error;
630	}
631	if (ruid != (uid_t) -1 ||
632	    (euid != (uid_t) -1 && euid != old->uid))
633		new->suid = new->euid;
634	new->fsuid = new->euid;
635
636	retval = security_task_fix_setuid(new, old, LSM_SETID_RE);
637	if (retval < 0)
638		goto error;
639
640	return commit_creds(new);
641
642error:
643	abort_creds(new);
644	return retval;
645}
646
647/*
648 * setuid() is implemented like SysV with SAVED_IDS
649 *
650 * Note that SAVED_ID's is deficient in that a setuid root program
651 * like sendmail, for example, cannot set its uid to be a normal
652 * user and then switch back, because if you're root, setuid() sets
653 * the saved uid too.  If you don't like this, blame the bright people
654 * in the POSIX committee and/or USG.  Note that the BSD-style setreuid()
655 * will allow a root program to temporarily drop privileges and be able to
656 * regain them by swapping the real and effective uid.
657 */
658SYSCALL_DEFINE1(setuid, uid_t, uid)
659{
660	const struct cred *old;
661	struct cred *new;
662	int retval;
663
664	new = prepare_creds();
665	if (!new)
666		return -ENOMEM;
667	old = current_cred();
668
669	retval = -EPERM;
670	if (capable(CAP_SETUID)) {
671		new->suid = new->uid = uid;
672		if (uid != old->uid) {
673			retval = set_user(new);
674			if (retval < 0)
675				goto error;
676		}
677	} else if (uid != old->uid && uid != new->suid) {
678		goto error;
679	}
680
681	new->fsuid = new->euid = uid;
682
683	retval = security_task_fix_setuid(new, old, LSM_SETID_ID);
684	if (retval < 0)
685		goto error;
686
687	return commit_creds(new);
688
689error:
690	abort_creds(new);
691	return retval;
692}
693
694
695/*
696 * This function implements a generic ability to update ruid, euid,
697 * and suid.  This allows you to implement the 4.4 compatible seteuid().
698 */
699SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
700{
701	const struct cred *old;
702	struct cred *new;
703	int retval;
704
705	new = prepare_creds();
706	if (!new)
707		return -ENOMEM;
708
709	old = current_cred();
710
711	retval = -EPERM;
712	if (!capable(CAP_SETUID)) {
713		if (ruid != (uid_t) -1 && ruid != old->uid &&
714		    ruid != old->euid  && ruid != old->suid)
715			goto error;
716		if (euid != (uid_t) -1 && euid != old->uid &&
717		    euid != old->euid  && euid != old->suid)
718			goto error;
719		if (suid != (uid_t) -1 && suid != old->uid &&
720		    suid != old->euid  && suid != old->suid)
721			goto error;
722	}
723
724	if (ruid != (uid_t) -1) {
725		new->uid = ruid;
726		if (ruid != old->uid) {
727			retval = set_user(new);
728			if (retval < 0)
729				goto error;
730		}
731	}
732	if (euid != (uid_t) -1)
733		new->euid = euid;
734	if (suid != (uid_t) -1)
735		new->suid = suid;
736	new->fsuid = new->euid;
737
738	retval = security_task_fix_setuid(new, old, LSM_SETID_RES);
739	if (retval < 0)
740		goto error;
741
742	return commit_creds(new);
743
744error:
745	abort_creds(new);
746	return retval;
747}
748
749SYSCALL_DEFINE3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid)
750{
751	const struct cred *cred = current_cred();
752	int retval;
753
754	if (!(retval   = put_user(cred->uid,  ruid)) &&
755	    !(retval   = put_user(cred->euid, euid)))
756		retval = put_user(cred->suid, suid);
757
758	return retval;
759}
760
761/*
762 * Same as above, but for rgid, egid, sgid.
763 */
764SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
765{
766	const struct cred *old;
767	struct cred *new;
768	int retval;
769
770	new = prepare_creds();
771	if (!new)
772		return -ENOMEM;
773	old = current_cred();
774
775	retval = -EPERM;
776	if (!capable(CAP_SETGID)) {
777		if (rgid != (gid_t) -1 && rgid != old->gid &&
778		    rgid != old->egid  && rgid != old->sgid)
779			goto error;
780		if (egid != (gid_t) -1 && egid != old->gid &&
781		    egid != old->egid  && egid != old->sgid)
782			goto error;
783		if (sgid != (gid_t) -1 && sgid != old->gid &&
784		    sgid != old->egid  && sgid != old->sgid)
785			goto error;
786	}
787
788	if (rgid != (gid_t) -1)
789		new->gid = rgid;
790	if (egid != (gid_t) -1)
791		new->egid = egid;
792	if (sgid != (gid_t) -1)
793		new->sgid = sgid;
794	new->fsgid = new->egid;
795
796	return commit_creds(new);
797
798error:
799	abort_creds(new);
800	return retval;
801}
802
803SYSCALL_DEFINE3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid)
804{
805	const struct cred *cred = current_cred();
806	int retval;
807
808	if (!(retval   = put_user(cred->gid,  rgid)) &&
809	    !(retval   = put_user(cred->egid, egid)))
810		retval = put_user(cred->sgid, sgid);
811
812	return retval;
813}
814
815
816/*
817 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
818 * is used for "access()" and for the NFS daemon (letting nfsd stay at
819 * whatever uid it wants to). It normally shadows "euid", except when
820 * explicitly set by setfsuid() or for access..
821 */
822SYSCALL_DEFINE1(setfsuid, uid_t, uid)
823{
824	const struct cred *old;
825	struct cred *new;
826	uid_t old_fsuid;
827
828	new = prepare_creds();
829	if (!new)
830		return current_fsuid();
831	old = current_cred();
832	old_fsuid = old->fsuid;
833
834	if (uid == old->uid  || uid == old->euid  ||
835	    uid == old->suid || uid == old->fsuid ||
836	    capable(CAP_SETUID)) {
837		if (uid != old_fsuid) {
838			new->fsuid = uid;
839			if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
840				goto change_okay;
841		}
842	}
843
844	abort_creds(new);
845	return old_fsuid;
846
847change_okay:
848	commit_creds(new);
849	return old_fsuid;
850}
851
852/*
853 * Samma p�� svenska..
854 */
855SYSCALL_DEFINE1(setfsgid, gid_t, gid)
856{
857	const struct cred *old;
858	struct cred *new;
859	gid_t old_fsgid;
860
861	new = prepare_creds();
862	if (!new)
863		return current_fsgid();
864	old = current_cred();
865	old_fsgid = old->fsgid;
866
867	if (gid == old->gid  || gid == old->egid  ||
868	    gid == old->sgid || gid == old->fsgid ||
869	    capable(CAP_SETGID)) {
870		if (gid != old_fsgid) {
871			new->fsgid = gid;
872			goto change_okay;
873		}
874	}
875
876	abort_creds(new);
877	return old_fsgid;
878
879change_okay:
880	commit_creds(new);
881	return old_fsgid;
882}
883
884void do_sys_times(struct tms *tms)
885{
886	cputime_t tgutime, tgstime, cutime, cstime;
887
888	spin_lock_irq(&current->sighand->siglock);
889	thread_group_times(current, &tgutime, &tgstime);
890	cutime = current->signal->cutime;
891	cstime = current->signal->cstime;
892	spin_unlock_irq(&current->sighand->siglock);
893	tms->tms_utime = cputime_to_clock_t(tgutime);
894	tms->tms_stime = cputime_to_clock_t(tgstime);
895	tms->tms_cutime = cputime_to_clock_t(cutime);
896	tms->tms_cstime = cputime_to_clock_t(cstime);
897}
898
899SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
900{
901	if (tbuf) {
902		struct tms tmp;
903
904		do_sys_times(&tmp);
905		if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
906			return -EFAULT;
907	}
908	force_successful_syscall_return();
909	return (long) jiffies_64_to_clock_t(get_jiffies_64());
910}
911
912/*
913 * This needs some heavy checking ...
914 * I just haven't the stomach for it. I also don't fully
915 * understand sessions/pgrp etc. Let somebody who does explain it.
916 *
917 * OK, I think I have the protection semantics right.... this is really
918 * only important on a multi-user system anyway, to make sure one user
919 * can't send a signal to a process owned by another.  -TYT, 12/12/91
920 *
921 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
922 * LBT 04.03.94
923 */
924SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
925{
926	struct task_struct *p;
927	struct task_struct *group_leader = current->group_leader;
928	struct pid *pgrp;
929	int err;
930
931	if (!pid)
932		pid = task_pid_vnr(group_leader);
933	if (!pgid)
934		pgid = pid;
935	if (pgid < 0)
936		return -EINVAL;
937	rcu_read_lock();
938
939	/* From this point forward we keep holding onto the tasklist lock
940	 * so that our parent does not change from under us. -DaveM
941	 */
942	write_lock_irq(&tasklist_lock);
943
944	err = -ESRCH;
945	p = find_task_by_vpid(pid);
946	if (!p)
947		goto out;
948
949	err = -EINVAL;
950	if (!thread_group_leader(p))
951		goto out;
952
953	if (same_thread_group(p->real_parent, group_leader)) {
954		err = -EPERM;
955		if (task_session(p) != task_session(group_leader))
956			goto out;
957		err = -EACCES;
958		if (p->did_exec)
959			goto out;
960	} else {
961		err = -ESRCH;
962		if (p != group_leader)
963			goto out;
964	}
965
966	err = -EPERM;
967	if (p->signal->leader)
968		goto out;
969
970	pgrp = task_pid(p);
971	if (pgid != pid) {
972		struct task_struct *g;
973
974		pgrp = find_vpid(pgid);
975		g = pid_task(pgrp, PIDTYPE_PGID);
976		if (!g || task_session(g) != task_session(group_leader))
977			goto out;
978	}
979
980	err = security_task_setpgid(p, pgid);
981	if (err)
982		goto out;
983
984	if (task_pgrp(p) != pgrp)
985		change_pid(p, PIDTYPE_PGID, pgrp);
986
987	err = 0;
988out:
989	/* All paths lead to here, thus we are safe. -DaveM */
990	write_unlock_irq(&tasklist_lock);
991	rcu_read_unlock();
992	return err;
993}
994
995SYSCALL_DEFINE1(getpgid, pid_t, pid)
996{
997	struct task_struct *p;
998	struct pid *grp;
999	int retval;
1000
1001	rcu_read_lock();
1002	if (!pid)
1003		grp = task_pgrp(current);
1004	else {
1005		retval = -ESRCH;
1006		p = find_task_by_vpid(pid);
1007		if (!p)
1008			goto out;
1009		grp = task_pgrp(p);
1010		if (!grp)
1011			goto out;
1012
1013		retval = security_task_getpgid(p);
1014		if (retval)
1015			goto out;
1016	}
1017	retval = pid_vnr(grp);
1018out:
1019	rcu_read_unlock();
1020	return retval;
1021}
1022
1023#ifdef __ARCH_WANT_SYS_GETPGRP
1024
1025SYSCALL_DEFINE0(getpgrp)
1026{
1027	return sys_getpgid(0);
1028}
1029
1030#endif
1031
1032SYSCALL_DEFINE1(getsid, pid_t, pid)
1033{
1034	struct task_struct *p;
1035	struct pid *sid;
1036	int retval;
1037
1038	rcu_read_lock();
1039	if (!pid)
1040		sid = task_session(current);
1041	else {
1042		retval = -ESRCH;
1043		p = find_task_by_vpid(pid);
1044		if (!p)
1045			goto out;
1046		sid = task_session(p);
1047		if (!sid)
1048			goto out;
1049
1050		retval = security_task_getsid(p);
1051		if (retval)
1052			goto out;
1053	}
1054	retval = pid_vnr(sid);
1055out:
1056	rcu_read_unlock();
1057	return retval;
1058}
1059
1060SYSCALL_DEFINE0(setsid)
1061{
1062	struct task_struct *group_leader = current->group_leader;
1063	struct pid *sid = task_pid(group_leader);
1064	pid_t session = pid_vnr(sid);
1065	int err = -EPERM;
1066
1067	write_lock_irq(&tasklist_lock);
1068	/* Fail if I am already a session leader */
1069	if (group_leader->signal->leader)
1070		goto out;
1071
1072	/* Fail if a process group id already exists that equals the
1073	 * proposed session id.
1074	 */
1075	if (pid_task(sid, PIDTYPE_PGID))
1076		goto out;
1077
1078	group_leader->signal->leader = 1;
1079	__set_special_pids(sid);
1080
1081	proc_clear_tty(group_leader);
1082
1083	err = session;
1084out:
1085	write_unlock_irq(&tasklist_lock);
1086	if (err > 0)
1087		proc_sid_connector(group_leader);
1088	return err;
1089}
1090
1091DECLARE_RWSEM(uts_sem);
1092
1093#ifdef COMPAT_UTS_MACHINE
1094#define override_architecture(name) \
1095	(personality(current->personality) == PER_LINUX32 && \
1096	 copy_to_user(name->machine, COMPAT_UTS_MACHINE, \
1097		      sizeof(COMPAT_UTS_MACHINE)))
1098#else
1099#define override_architecture(name)	0
1100#endif
1101
1102SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
1103{
1104	int errno = 0;
1105
1106	down_read(&uts_sem);
1107	if (copy_to_user(name, utsname(), sizeof *name))
1108		errno = -EFAULT;
1109	up_read(&uts_sem);
1110
1111	if (!errno && override_architecture(name))
1112		errno = -EFAULT;
1113	return errno;
1114}
1115
1116#ifdef __ARCH_WANT_SYS_OLD_UNAME
1117/*
1118 * Old cruft
1119 */
1120SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
1121{
1122	int error = 0;
1123
1124	if (!name)
1125		return -EFAULT;
1126
1127	down_read(&uts_sem);
1128	if (copy_to_user(name, utsname(), sizeof(*name)))
1129		error = -EFAULT;
1130	up_read(&uts_sem);
1131
1132	if (!error && override_architecture(name))
1133		error = -EFAULT;
1134	return error;
1135}
1136
1137SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
1138{
1139	int error;
1140
1141	if (!name)
1142		return -EFAULT;
1143	if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
1144		return -EFAULT;
1145
1146	down_read(&uts_sem);
1147	error = __copy_to_user(&name->sysname, &utsname()->sysname,
1148			       __OLD_UTS_LEN);
1149	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
1150	error |= __copy_to_user(&name->nodename, &utsname()->nodename,
1151				__OLD_UTS_LEN);
1152	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
1153	error |= __copy_to_user(&name->release, &utsname()->release,
1154				__OLD_UTS_LEN);
1155	error |= __put_user(0, name->release + __OLD_UTS_LEN);
1156	error |= __copy_to_user(&name->version, &utsname()->version,
1157				__OLD_UTS_LEN);
1158	error |= __put_user(0, name->version + __OLD_UTS_LEN);
1159	error |= __copy_to_user(&name->machine, &utsname()->machine,
1160				__OLD_UTS_LEN);
1161	error |= __put_user(0, name->machine + __OLD_UTS_LEN);
1162	up_read(&uts_sem);
1163
1164	if (!error && override_architecture(name))
1165		error = -EFAULT;
1166	return error ? -EFAULT : 0;
1167}
1168#endif
1169
1170SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
1171{
1172	int errno;
1173	char tmp[__NEW_UTS_LEN];
1174
1175	if (!capable(CAP_SYS_ADMIN))
1176		return -EPERM;
1177	if (len < 0 || len > __NEW_UTS_LEN)
1178		return -EINVAL;
1179	down_write(&uts_sem);
1180	errno = -EFAULT;
1181	if (!copy_from_user(tmp, name, len)) {
1182		struct new_utsname *u = utsname();
1183
1184		memcpy(u->nodename, tmp, len);
1185		memset(u->nodename + len, 0, sizeof(u->nodename) - len);
1186		errno = 0;
1187	}
1188	up_write(&uts_sem);
1189	return errno;
1190}
1191
1192#ifdef __ARCH_WANT_SYS_GETHOSTNAME
1193
1194SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
1195{
1196	int i, errno;
1197	struct new_utsname *u;
1198
1199	if (len < 0)
1200		return -EINVAL;
1201	down_read(&uts_sem);
1202	u = utsname();
1203	i = 1 + strlen(u->nodename);
1204	if (i > len)
1205		i = len;
1206	errno = 0;
1207	if (copy_to_user(name, u->nodename, i))
1208		errno = -EFAULT;
1209	up_read(&uts_sem);
1210	return errno;
1211}
1212
1213#endif
1214
1215/*
1216 * Only setdomainname; getdomainname can be implemented by calling
1217 * uname()
1218 */
1219SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
1220{
1221	int errno;
1222	char tmp[__NEW_UTS_LEN];
1223
1224	if (!capable(CAP_SYS_ADMIN))
1225		return -EPERM;
1226	if (len < 0 || len > __NEW_UTS_LEN)
1227		return -EINVAL;
1228
1229	down_write(&uts_sem);
1230	errno = -EFAULT;
1231	if (!copy_from_user(tmp, name, len)) {
1232		struct new_utsname *u = utsname();
1233
1234		memcpy(u->domainname, tmp, len);
1235		memset(u->domainname + len, 0, sizeof(u->domainname) - len);
1236		errno = 0;
1237	}
1238	up_write(&uts_sem);
1239	return errno;
1240}
1241
1242SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
1243{
1244	struct rlimit value;
1245	int ret;
1246
1247	ret = do_prlimit(current, resource, NULL, &value);
1248	if (!ret)
1249		ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
1250
1251	return ret;
1252}
1253
1254#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
1255
1256/*
1257 *	Back compatibility for getrlimit. Needed for some apps.
1258 */
1259
1260SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
1261		struct rlimit __user *, rlim)
1262{
1263	struct rlimit x;
1264	if (resource >= RLIM_NLIMITS)
1265		return -EINVAL;
1266
1267	task_lock(current->group_leader);
1268	x = current->signal->rlim[resource];
1269	task_unlock(current->group_leader);
1270	if (x.rlim_cur > 0x7FFFFFFF)
1271		x.rlim_cur = 0x7FFFFFFF;
1272	if (x.rlim_max > 0x7FFFFFFF)
1273		x.rlim_max = 0x7FFFFFFF;
1274	return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
1275}
1276
1277#endif
1278
1279static inline bool rlim64_is_infinity(__u64 rlim64)
1280{
1281#if BITS_PER_LONG < 64
1282	return rlim64 >= ULONG_MAX;
1283#else
1284	return rlim64 == RLIM64_INFINITY;
1285#endif
1286}
1287
1288static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64)
1289{
1290	if (rlim->rlim_cur == RLIM_INFINITY)
1291		rlim64->rlim_cur = RLIM64_INFINITY;
1292	else
1293		rlim64->rlim_cur = rlim->rlim_cur;
1294	if (rlim->rlim_max == RLIM_INFINITY)
1295		rlim64->rlim_max = RLIM64_INFINITY;
1296	else
1297		rlim64->rlim_max = rlim->rlim_max;
1298}
1299
1300static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim)
1301{
1302	if (rlim64_is_infinity(rlim64->rlim_cur))
1303		rlim->rlim_cur = RLIM_INFINITY;
1304	else
1305		rlim->rlim_cur = (unsigned long)rlim64->rlim_cur;
1306	if (rlim64_is_infinity(rlim64->rlim_max))
1307		rlim->rlim_max = RLIM_INFINITY;
1308	else
1309		rlim->rlim_max = (unsigned long)rlim64->rlim_max;
1310}
1311
1312/* make sure you are allowed to change @tsk limits before calling this */
1313int do_prlimit(struct task_struct *tsk, unsigned int resource,
1314		struct rlimit *new_rlim, struct rlimit *old_rlim)
1315{
1316	struct rlimit *rlim;
1317	int retval = 0;
1318
1319	if (resource >= RLIM_NLIMITS)
1320		return -EINVAL;
1321	if (new_rlim) {
1322		if (new_rlim->rlim_cur > new_rlim->rlim_max)
1323			return -EINVAL;
1324		if (resource == RLIMIT_NOFILE &&
1325				new_rlim->rlim_max > sysctl_nr_open)
1326			return -EPERM;
1327	}
1328
1329	/* protect tsk->signal and tsk->sighand from disappearing */
1330	read_lock(&tasklist_lock);
1331	if (!tsk->sighand) {
1332		retval = -ESRCH;
1333		goto out;
1334	}
1335
1336	rlim = tsk->signal->rlim + resource;
1337	task_lock(tsk->group_leader);
1338	if (new_rlim) {
1339		if (new_rlim->rlim_max > rlim->rlim_max &&
1340				!capable(CAP_SYS_RESOURCE))
1341			retval = -EPERM;
1342		if (!retval)
1343			retval = security_task_setrlimit(tsk->group_leader,
1344					resource, new_rlim);
1345		if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
1346			/*
1347			 * The caller is asking for an immediate RLIMIT_CPU
1348			 * expiry.  But we use the zero value to mean "it was
1349			 * never set".  So let's cheat and make it one second
1350			 * instead
1351			 */
1352			new_rlim->rlim_cur = 1;
1353		}
1354	}
1355	if (!retval) {
1356		if (old_rlim)
1357			*old_rlim = *rlim;
1358		if (new_rlim)
1359			*rlim = *new_rlim;
1360	}
1361	task_unlock(tsk->group_leader);
1362
1363	/*
1364	 * RLIMIT_CPU handling.   Note that the kernel fails to return an error
1365	 * code if it rejected the user's attempt to set RLIMIT_CPU.  This is a
1366	 * very long-standing error, and fixing it now risks breakage of
1367	 * applications, so we live with it
1368	 */
1369	 if (!retval && new_rlim && resource == RLIMIT_CPU &&
1370			 new_rlim->rlim_cur != RLIM_INFINITY)
1371		update_rlimit_cpu(tsk, new_rlim->rlim_cur);
1372out:
1373	read_unlock(&tasklist_lock);
1374	return retval;
1375}
1376
1377/* rcu lock must be held */
1378static int check_prlimit_permission(struct task_struct *task)
1379{
1380	const struct cred *cred = current_cred(), *tcred;
1381
1382	tcred = __task_cred(task);
1383	if (current != task &&
1384	    (cred->uid != tcred->euid ||
1385	     cred->uid != tcred->suid ||
1386	     cred->uid != tcred->uid  ||
1387	     cred->gid != tcred->egid ||
1388	     cred->gid != tcred->sgid ||
1389	     cred->gid != tcred->gid) &&
1390	     !capable(CAP_SYS_RESOURCE)) {
1391		return -EPERM;
1392	}
1393
1394	return 0;
1395}
1396
1397SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
1398		const struct rlimit64 __user *, new_rlim,
1399		struct rlimit64 __user *, old_rlim)
1400{
1401	struct rlimit64 old64, new64;
1402	struct rlimit old, new;
1403	struct task_struct *tsk;
1404	int ret;
1405
1406	if (new_rlim) {
1407		if (copy_from_user(&new64, new_rlim, sizeof(new64)))
1408			return -EFAULT;
1409		rlim64_to_rlim(&new64, &new);
1410	}
1411
1412	rcu_read_lock();
1413	tsk = pid ? find_task_by_vpid(pid) : current;
1414	if (!tsk) {
1415		rcu_read_unlock();
1416		return -ESRCH;
1417	}
1418	ret = check_prlimit_permission(tsk);
1419	if (ret) {
1420		rcu_read_unlock();
1421		return ret;
1422	}
1423	get_task_struct(tsk);
1424	rcu_read_unlock();
1425
1426	ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL,
1427			old_rlim ? &old : NULL);
1428
1429	if (!ret && old_rlim) {
1430		rlim_to_rlim64(&old, &old64);
1431		if (copy_to_user(old_rlim, &old64, sizeof(old64)))
1432			ret = -EFAULT;
1433	}
1434
1435	put_task_struct(tsk);
1436	return ret;
1437}
1438
1439SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
1440{
1441	struct rlimit new_rlim;
1442
1443	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1444		return -EFAULT;
1445	return do_prlimit(current, resource, &new_rlim, NULL);
1446}
1447
1448/*
1449 * It would make sense to put struct rusage in the task_struct,
1450 * except that would make the task_struct be *really big*.  After
1451 * task_struct gets moved into malloc'ed memory, it would
1452 * make sense to do this.  It will make moving the rest of the information
1453 * a lot simpler!  (Which we're not doing right now because we're not
1454 * measuring them yet).
1455 *
1456 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have
1457 * races with threads incrementing their own counters.  But since word
1458 * reads are atomic, we either get new values or old values and we don't
1459 * care which for the sums.  We always take the siglock to protect reading
1460 * the c* fields from p->signal from races with exit.c updating those
1461 * fields when reaping, so a sample either gets all the additions of a
1462 * given child after it's reaped, or none so this sample is before reaping.
1463 *
1464 * Locking:
1465 * We need to take the siglock for CHILDEREN, SELF and BOTH
1466 * for  the cases current multithreaded, non-current single threaded
1467 * non-current multithreaded.  Thread traversal is now safe with
1468 * the siglock held.
1469 * Strictly speaking, we donot need to take the siglock if we are current and
1470 * single threaded,  as no one else can take our signal_struct away, no one
1471 * else can  reap the  children to update signal->c* counters, and no one else
1472 * can race with the signal-> fields. If we do not take any lock, the
1473 * signal-> fields could be read out of order while another thread was just
1474 * exiting. So we should  place a read memory barrier when we avoid the lock.
1475 * On the writer side,  write memory barrier is implied in  __exit_signal
1476 * as __exit_signal releases  the siglock spinlock after updating the signal->
1477 * fields. But we don't do this yet to keep things simple.
1478 *
1479 */
1480
1481static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
1482{
1483	r->ru_nvcsw += t->nvcsw;
1484	r->ru_nivcsw += t->nivcsw;
1485	r->ru_minflt += t->min_flt;
1486	r->ru_majflt += t->maj_flt;
1487	r->ru_inblock += task_io_get_inblock(t);
1488	r->ru_oublock += task_io_get_oublock(t);
1489}
1490
1491static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1492{
1493	struct task_struct *t;
1494	unsigned long flags;
1495	cputime_t tgutime, tgstime, utime, stime;
1496	unsigned long maxrss = 0;
1497
1498	memset((char *) r, 0, sizeof *r);
1499	utime = stime = cputime_zero;
1500
1501	if (who == RUSAGE_THREAD) {
1502		task_times(current, &utime, &stime);
1503		accumulate_thread_rusage(p, r);
1504		maxrss = p->signal->maxrss;
1505		goto out;
1506	}
1507
1508	if (!lock_task_sighand(p, &flags))
1509		return;
1510
1511	switch (who) {
1512		case RUSAGE_BOTH:
1513		case RUSAGE_CHILDREN:
1514			utime = p->signal->cutime;
1515			stime = p->signal->cstime;
1516			r->ru_nvcsw = p->signal->cnvcsw;
1517			r->ru_nivcsw = p->signal->cnivcsw;
1518			r->ru_minflt = p->signal->cmin_flt;
1519			r->ru_majflt = p->signal->cmaj_flt;
1520			r->ru_inblock = p->signal->cinblock;
1521			r->ru_oublock = p->signal->coublock;
1522			maxrss = p->signal->cmaxrss;
1523
1524			if (who == RUSAGE_CHILDREN)
1525				break;
1526
1527		case RUSAGE_SELF:
1528			thread_group_times(p, &tgutime, &tgstime);
1529			utime = cputime_add(utime, tgutime);
1530			stime = cputime_add(stime, tgstime);
1531			r->ru_nvcsw += p->signal->nvcsw;
1532			r->ru_nivcsw += p->signal->nivcsw;
1533			r->ru_minflt += p->signal->min_flt;
1534			r->ru_majflt += p->signal->maj_flt;
1535			r->ru_inblock += p->signal->inblock;
1536			r->ru_oublock += p->signal->oublock;
1537			if (maxrss < p->signal->maxrss)
1538				maxrss = p->signal->maxrss;
1539			t = p;
1540			do {
1541				accumulate_thread_rusage(t, r);
1542				t = next_thread(t);
1543			} while (t != p);
1544			break;
1545
1546		default:
1547			BUG();
1548	}
1549	unlock_task_sighand(p, &flags);
1550
1551out:
1552	cputime_to_timeval(utime, &r->ru_utime);
1553	cputime_to_timeval(stime, &r->ru_stime);
1554
1555	if (who != RUSAGE_CHILDREN) {
1556		struct mm_struct *mm = get_task_mm(p);
1557		if (mm) {
1558			setmax_mm_hiwater_rss(&maxrss, mm);
1559			mmput(mm);
1560		}
1561	}
1562	r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
1563}
1564
1565int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
1566{
1567	struct rusage r;
1568	k_getrusage(p, who, &r);
1569	return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1570}
1571
1572SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
1573{
1574	if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
1575	    who != RUSAGE_THREAD)
1576		return -EINVAL;
1577	return getrusage(current, who, ru);
1578}
1579
1580SYSCALL_DEFINE1(umask, int, mask)
1581{
1582	mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
1583	return mask;
1584}
1585
1586SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
1587		unsigned long, arg4, unsigned long, arg5)
1588{
1589	struct task_struct *me = current;
1590	unsigned char comm[sizeof(me->comm)];
1591	long error;
1592
1593	error = security_task_prctl(option, arg2, arg3, arg4, arg5);
1594	if (error != -ENOSYS)
1595		return error;
1596
1597	error = 0;
1598	switch (option) {
1599		case PR_SET_PDEATHSIG:
1600			if (!valid_signal(arg2)) {
1601				error = -EINVAL;
1602				break;
1603			}
1604			me->pdeath_signal = arg2;
1605			error = 0;
1606			break;
1607		case PR_GET_PDEATHSIG:
1608			error = put_user(me->pdeath_signal, (int __user *)arg2);
1609			break;
1610		case PR_GET_DUMPABLE:
1611			error = get_dumpable(me->mm);
1612			break;
1613		case PR_SET_DUMPABLE:
1614			if (arg2 < 0 || arg2 > 1) {
1615				error = -EINVAL;
1616				break;
1617			}
1618			set_dumpable(me->mm, arg2);
1619			error = 0;
1620			break;
1621
1622		case PR_SET_UNALIGN:
1623			error = SET_UNALIGN_CTL(me, arg2);
1624			break;
1625		case PR_GET_UNALIGN:
1626			error = GET_UNALIGN_CTL(me, arg2);
1627			break;
1628		case PR_SET_FPEMU:
1629			error = SET_FPEMU_CTL(me, arg2);
1630			break;
1631		case PR_GET_FPEMU:
1632			error = GET_FPEMU_CTL(me, arg2);
1633			break;
1634		case PR_SET_FPEXC:
1635			error = SET_FPEXC_CTL(me, arg2);
1636			break;
1637		case PR_GET_FPEXC:
1638			error = GET_FPEXC_CTL(me, arg2);
1639			break;
1640		case PR_GET_TIMING:
1641			error = PR_TIMING_STATISTICAL;
1642			break;
1643		case PR_SET_TIMING:
1644			if (arg2 != PR_TIMING_STATISTICAL)
1645				error = -EINVAL;
1646			else
1647				error = 0;
1648			break;
1649
1650		case PR_SET_NAME:
1651			comm[sizeof(me->comm)-1] = 0;
1652			if (strncpy_from_user(comm, (char __user *)arg2,
1653					      sizeof(me->comm) - 1) < 0)
1654				return -EFAULT;
1655			set_task_comm(me, comm);
1656			return 0;
1657		case PR_GET_NAME:
1658			get_task_comm(comm, me);
1659			if (copy_to_user((char __user *)arg2, comm,
1660					 sizeof(comm)))
1661				return -EFAULT;
1662			return 0;
1663		case PR_GET_ENDIAN:
1664			error = GET_ENDIAN(me, arg2);
1665			break;
1666		case PR_SET_ENDIAN:
1667			error = SET_ENDIAN(me, arg2);
1668			break;
1669
1670		case PR_GET_SECCOMP:
1671			error = prctl_get_seccomp();
1672			break;
1673		case PR_SET_SECCOMP:
1674			error = prctl_set_seccomp(arg2);
1675			break;
1676		case PR_GET_TSC:
1677			error = GET_TSC_CTL(arg2);
1678			break;
1679		case PR_SET_TSC:
1680			error = SET_TSC_CTL(arg2);
1681			break;
1682		case PR_TASK_PERF_EVENTS_DISABLE:
1683			error = perf_event_task_disable();
1684			break;
1685		case PR_TASK_PERF_EVENTS_ENABLE:
1686			error = perf_event_task_enable();
1687			break;
1688		case PR_GET_TIMERSLACK:
1689			error = current->timer_slack_ns;
1690			break;
1691		case PR_SET_TIMERSLACK:
1692			if (arg2 <= 0)
1693				current->timer_slack_ns =
1694					current->default_timer_slack_ns;
1695			else
1696				current->timer_slack_ns = arg2;
1697			error = 0;
1698			break;
1699		case PR_MCE_KILL:
1700			if (arg4 | arg5)
1701				return -EINVAL;
1702			switch (arg2) {
1703			case PR_MCE_KILL_CLEAR:
1704				if (arg3 != 0)
1705					return -EINVAL;
1706				current->flags &= ~PF_MCE_PROCESS;
1707				break;
1708			case PR_MCE_KILL_SET:
1709				current->flags |= PF_MCE_PROCESS;
1710				if (arg3 == PR_MCE_KILL_EARLY)
1711					current->flags |= PF_MCE_EARLY;
1712				else if (arg3 == PR_MCE_KILL_LATE)
1713					current->flags &= ~PF_MCE_EARLY;
1714				else if (arg3 == PR_MCE_KILL_DEFAULT)
1715					current->flags &=
1716						~(PF_MCE_EARLY|PF_MCE_PROCESS);
1717				else
1718					return -EINVAL;
1719				break;
1720			default:
1721				return -EINVAL;
1722			}
1723			error = 0;
1724			break;
1725		case PR_MCE_KILL_GET:
1726			if (arg2 | arg3 | arg4 | arg5)
1727				return -EINVAL;
1728			if (current->flags & PF_MCE_PROCESS)
1729				error = (current->flags & PF_MCE_EARLY) ?
1730					PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE;
1731			else
1732				error = PR_MCE_KILL_DEFAULT;
1733			break;
1734		default:
1735			error = -EINVAL;
1736			break;
1737	}
1738	return error;
1739}
1740
1741SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
1742		struct getcpu_cache __user *, unused)
1743{
1744	int err = 0;
1745	int cpu = raw_smp_processor_id();
1746	if (cpup)
1747		err |= put_user(cpu, cpup);
1748	if (nodep)
1749		err |= put_user(cpu_to_node(cpu), nodep);
1750	return err ? -EFAULT : 0;
1751}
1752
1753char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
1754
1755static void argv_cleanup(struct subprocess_info *info)
1756{
1757	argv_free(info->argv);
1758}
1759
1760/**
1761 * orderly_poweroff - Trigger an orderly system poweroff
1762 * @force: force poweroff if command execution fails
1763 *
1764 * This may be called from any context to trigger a system shutdown.
1765 * If the orderly shutdown fails, it will force an immediate shutdown.
1766 */
1767int orderly_poweroff(bool force)
1768{
1769	int argc;
1770	char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc);
1771	static char *envp[] = {
1772		"HOME=/",
1773		"PATH=/sbin:/bin:/usr/sbin:/usr/bin",
1774		NULL
1775	};
1776	int ret = -ENOMEM;
1777	struct subprocess_info *info;
1778
1779	if (argv == NULL) {
1780		printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
1781		       __func__, poweroff_cmd);
1782		goto out;
1783	}
1784
1785	info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC);
1786	if (info == NULL) {
1787		argv_free(argv);
1788		goto out;
1789	}
1790
1791	call_usermodehelper_setfns(info, NULL, argv_cleanup, NULL);
1792
1793	ret = call_usermodehelper_exec(info, UMH_NO_WAIT);
1794
1795  out:
1796	if (ret && force) {
1797		printk(KERN_WARNING "Failed to start orderly shutdown: "
1798		       "forcing the issue\n");
1799
1800		/* I guess this should try to kick off some daemon to
1801		   sync and poweroff asap.  Or not even bother syncing
1802		   if we're doing an emergency shutdown? */
1803		emergency_sync();
1804		kernel_power_off();
1805	}
1806
1807	return ret;
1808}
1809EXPORT_SYMBOL_GPL(orderly_poweroff);
1810