1/*
2 *  linux/arch/x86-64/kernel/process.c
3 *
4 *  Copyright (C) 1995  Linus Torvalds
5 *
6 *  Pentium III FXSR, SSE support
7 *	Gareth Hughes <gareth@valinux.com>, May 2000
8 *
9 *  X86-64 port
10 *	Andi Kleen.
11 *
12 *	CPU hotplug support - ashok.raj@intel.com
13 */
14
15/*
16 * This file handles the architecture-dependent parts of process handling..
17 */
18
19#include <stdarg.h>
20
21#include <linux/cpu.h>
22#include <linux/errno.h>
23#include <linux/sched.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/elfcore.h>
27#include <linux/smp.h>
28#include <linux/slab.h>
29#include <linux/user.h>
30#include <linux/module.h>
31#include <linux/a.out.h>
32#include <linux/interrupt.h>
33#include <linux/delay.h>
34#include <linux/ptrace.h>
35#include <linux/utsname.h>
36#include <linux/random.h>
37#include <linux/notifier.h>
38#include <linux/kprobes.h>
39#include <linux/kdebug.h>
40
41#include <asm/uaccess.h>
42#include <asm/pgtable.h>
43#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/processor.h>
46#include <asm/i387.h>
47#include <asm/mmu_context.h>
48#include <asm/pda.h>
49#include <asm/prctl.h>
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
53#include <asm/idle.h>
54
55asmlinkage extern void ret_from_fork(void);
56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
59unsigned long boot_option_idle_override = 0;
60EXPORT_SYMBOL(boot_option_idle_override);
61
62/*
63 * Powermanagement idle function, if any..
64 */
65void (*pm_idle)(void);
66EXPORT_SYMBOL(pm_idle);
67static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68
69static ATOMIC_NOTIFIER_HEAD(idle_notifier);
70
71void idle_notifier_register(struct notifier_block *n)
72{
73	atomic_notifier_chain_register(&idle_notifier, n);
74}
75EXPORT_SYMBOL_GPL(idle_notifier_register);
76
77void idle_notifier_unregister(struct notifier_block *n)
78{
79	atomic_notifier_chain_unregister(&idle_notifier, n);
80}
81EXPORT_SYMBOL(idle_notifier_unregister);
82
83void enter_idle(void)
84{
85	write_pda(isidle, 1);
86	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
87}
88
89static void __exit_idle(void)
90{
91	if (test_and_clear_bit_pda(0, isidle) == 0)
92		return;
93	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
94}
95
96/* Called from interrupts to signify idle end */
97void exit_idle(void)
98{
99	/* idle loop has pid 0 */
100	if (current->pid)
101		return;
102	__exit_idle();
103}
104
105/*
106 * We use this if we don't have any better
107 * idle routine..
108 */
109static void default_idle(void)
110{
111	current_thread_info()->status &= ~TS_POLLING;
112	/*
113	 * TS_POLLING-cleared state must be visible before we
114	 * test NEED_RESCHED:
115	 */
116	smp_mb();
117	local_irq_disable();
118	if (!need_resched()) {
119		/* Enables interrupts one instruction before HLT.
120		   x86 special cases this so there is no race. */
121		safe_halt();
122	} else
123		local_irq_enable();
124	current_thread_info()->status |= TS_POLLING;
125}
126
127/*
128 * On SMP it's slightly faster (but much more power-consuming!)
129 * to poll the ->need_resched flag instead of waiting for the
130 * cross-CPU IPI to arrive. Use this option with caution.
131 */
132static void poll_idle (void)
133{
134	local_irq_enable();
135	cpu_relax();
136}
137
138void cpu_idle_wait(void)
139{
140	unsigned int cpu, this_cpu = get_cpu();
141	cpumask_t map, tmp = current->cpus_allowed;
142
143	set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
144	put_cpu();
145
146	cpus_clear(map);
147	for_each_online_cpu(cpu) {
148		per_cpu(cpu_idle_state, cpu) = 1;
149		cpu_set(cpu, map);
150	}
151
152	__get_cpu_var(cpu_idle_state) = 0;
153
154	wmb();
155	do {
156		ssleep(1);
157		for_each_online_cpu(cpu) {
158			if (cpu_isset(cpu, map) &&
159					!per_cpu(cpu_idle_state, cpu))
160				cpu_clear(cpu, map);
161		}
162		cpus_and(map, map, cpu_online_map);
163	} while (!cpus_empty(map));
164
165	set_cpus_allowed(current, tmp);
166}
167EXPORT_SYMBOL_GPL(cpu_idle_wait);
168
169#ifdef CONFIG_HOTPLUG_CPU
170DECLARE_PER_CPU(int, cpu_state);
171
172#include <asm/nmi.h>
173/* We halt the CPU with physical CPU hotplug */
174static inline void play_dead(void)
175{
176	idle_task_exit();
177	wbinvd();
178	mb();
179	/* Ack it */
180	__get_cpu_var(cpu_state) = CPU_DEAD;
181
182	local_irq_disable();
183	while (1)
184		halt();
185}
186#else
187static inline void play_dead(void)
188{
189	BUG();
190}
191#endif /* CONFIG_HOTPLUG_CPU */
192
193/*
194 * The idle thread. There's no useful work to be
195 * done, so just try to conserve power and have a
196 * low exit latency (ie sit in a loop waiting for
197 * somebody to say that they'd like to reschedule)
198 */
199void cpu_idle (void)
200{
201	current_thread_info()->status |= TS_POLLING;
202	/* endless idle loop with no priority at all */
203	while (1) {
204		while (!need_resched()) {
205			void (*idle)(void);
206
207			if (__get_cpu_var(cpu_idle_state))
208				__get_cpu_var(cpu_idle_state) = 0;
209
210			rmb();
211			idle = pm_idle;
212			if (!idle)
213				idle = default_idle;
214			if (cpu_is_offline(smp_processor_id()))
215				play_dead();
216			/*
217			 * Idle routines should keep interrupts disabled
218			 * from here on, until they go to idle.
219			 * Otherwise, idle callbacks can misfire.
220			 */
221			local_irq_disable();
222			enter_idle();
223			idle();
224			/* In many cases the interrupt that ended idle
225			   has already called exit_idle. But some idle
226			   loops can be woken up without interrupt. */
227			__exit_idle();
228		}
229
230		preempt_enable_no_resched();
231		schedule();
232		preempt_disable();
233	}
234}
235
236/*
237 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
238 * which can obviate IPI to trigger checking of need_resched.
239 * We execute MONITOR against need_resched and enter optimized wait state
240 * through MWAIT. Whenever someone changes need_resched, we would be woken
241 * up from MWAIT (without an IPI).
242 *
243 * New with Core Duo processors, MWAIT can take some hints based on CPU
244 * capability.
245 */
246void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
247{
248	if (!need_resched()) {
249		__monitor((void *)&current_thread_info()->flags, 0, 0);
250		smp_mb();
251		if (!need_resched())
252			__mwait(eax, ecx);
253	}
254}
255
256/* Default MONITOR/MWAIT with no hints, used for default C1 state */
257static void mwait_idle(void)
258{
259	if (!need_resched()) {
260		__monitor((void *)&current_thread_info()->flags, 0, 0);
261		smp_mb();
262		if (!need_resched())
263			__sti_mwait(0, 0);
264		else
265			local_irq_enable();
266	} else {
267		local_irq_enable();
268	}
269}
270
271void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
272{
273	static int printed;
274	if (cpu_has(c, X86_FEATURE_MWAIT)) {
275		/*
276		 * Skip, if setup has overridden idle.
277		 * One CPU supports mwait => All CPUs supports mwait
278		 */
279		if (!pm_idle) {
280			if (!printed) {
281				printk("using mwait in idle threads.\n");
282				printed = 1;
283			}
284			pm_idle = mwait_idle;
285		}
286	}
287}
288
289static int __init idle_setup (char *str)
290{
291	if (!strcmp(str, "poll")) {
292		printk("using polling idle threads.\n");
293		pm_idle = poll_idle;
294	} else if (!strcmp(str, "mwait"))
295		force_mwait = 1;
296	else
297		return -1;
298
299	boot_option_idle_override = 1;
300	return 0;
301}
302early_param("idle", idle_setup);
303
304/* Prints also some state that isn't saved in the pt_regs */
305void __show_regs(struct pt_regs * regs)
306{
307	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
308	unsigned int fsindex,gsindex;
309	unsigned int ds,cs,es;
310
311	printk("\n");
312	print_modules();
313	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
314		current->pid, current->comm, print_tainted(),
315		init_utsname()->release,
316		(int)strcspn(init_utsname()->version, " "),
317		init_utsname()->version);
318	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
319	printk_address(regs->rip);
320	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
321		regs->eflags);
322	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
323	       regs->rax, regs->rbx, regs->rcx);
324	printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
325	       regs->rdx, regs->rsi, regs->rdi);
326	printk("RBP: %016lx R08: %016lx R09: %016lx\n",
327	       regs->rbp, regs->r8, regs->r9);
328	printk("R10: %016lx R11: %016lx R12: %016lx\n",
329	       regs->r10, regs->r11, regs->r12);
330	printk("R13: %016lx R14: %016lx R15: %016lx\n",
331	       regs->r13, regs->r14, regs->r15);
332
333	asm("movl %%ds,%0" : "=r" (ds));
334	asm("movl %%cs,%0" : "=r" (cs));
335	asm("movl %%es,%0" : "=r" (es));
336	asm("movl %%fs,%0" : "=r" (fsindex));
337	asm("movl %%gs,%0" : "=r" (gsindex));
338
339	rdmsrl(MSR_FS_BASE, fs);
340	rdmsrl(MSR_GS_BASE, gs);
341	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
342
343	asm("movq %%cr0, %0": "=r" (cr0));
344	asm("movq %%cr2, %0": "=r" (cr2));
345	asm("movq %%cr3, %0": "=r" (cr3));
346	asm("movq %%cr4, %0": "=r" (cr4));
347
348	printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
349	       fs,fsindex,gs,gsindex,shadowgs);
350	printk("CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
351	printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
352}
353
354void show_regs(struct pt_regs *regs)
355{
356	printk("CPU %d:", smp_processor_id());
357	__show_regs(regs);
358	show_trace(NULL, regs, (void *)(regs + 1));
359}
360
361/*
362 * Free current thread data structures etc..
363 */
364void exit_thread(void)
365{
366	struct task_struct *me = current;
367	struct thread_struct *t = &me->thread;
368
369	if (me->thread.io_bitmap_ptr) {
370		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
371
372		kfree(t->io_bitmap_ptr);
373		t->io_bitmap_ptr = NULL;
374		clear_thread_flag(TIF_IO_BITMAP);
375		/*
376		 * Careful, clear this in the TSS too:
377		 */
378		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
379		t->io_bitmap_max = 0;
380		put_cpu();
381	}
382}
383
384void flush_thread(void)
385{
386	struct task_struct *tsk = current;
387
388	if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
389		clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
390		if (test_tsk_thread_flag(tsk, TIF_IA32)) {
391			clear_tsk_thread_flag(tsk, TIF_IA32);
392		} else {
393			set_tsk_thread_flag(tsk, TIF_IA32);
394			current_thread_info()->status |= TS_COMPAT;
395		}
396	}
397	clear_tsk_thread_flag(tsk, TIF_DEBUG);
398
399	tsk->thread.debugreg0 = 0;
400	tsk->thread.debugreg1 = 0;
401	tsk->thread.debugreg2 = 0;
402	tsk->thread.debugreg3 = 0;
403	tsk->thread.debugreg6 = 0;
404	tsk->thread.debugreg7 = 0;
405	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
406	/*
407	 * Forget coprocessor state..
408	 */
409	clear_fpu(tsk);
410	clear_used_math();
411}
412
413void release_thread(struct task_struct *dead_task)
414{
415	if (dead_task->mm) {
416		if (dead_task->mm->context.size) {
417			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
418					dead_task->comm,
419					dead_task->mm->context.ldt,
420					dead_task->mm->context.size);
421			BUG();
422		}
423	}
424}
425
426static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
427{
428	struct user_desc ud = {
429		.base_addr = addr,
430		.limit = 0xfffff,
431		.seg_32bit = 1,
432		.limit_in_pages = 1,
433		.useable = 1,
434	};
435	struct n_desc_struct *desc = (void *)t->thread.tls_array;
436	desc += tls;
437	desc->a = LDT_entry_a(&ud);
438	desc->b = LDT_entry_b(&ud);
439}
440
441static inline u32 read_32bit_tls(struct task_struct *t, int tls)
442{
443	struct desc_struct *desc = (void *)t->thread.tls_array;
444	desc += tls;
445	return desc->base0 |
446		(((u32)desc->base1) << 16) |
447		(((u32)desc->base2) << 24);
448}
449
450/*
451 * This gets called before we allocate a new thread and copy
452 * the current task into it.
453 */
454void prepare_to_copy(struct task_struct *tsk)
455{
456	unlazy_fpu(tsk);
457}
458
459int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
460		unsigned long unused,
461	struct task_struct * p, struct pt_regs * regs)
462{
463	int err;
464	struct pt_regs * childregs;
465	struct task_struct *me = current;
466
467	childregs = ((struct pt_regs *)
468			(THREAD_SIZE + task_stack_page(p))) - 1;
469	*childregs = *regs;
470
471	childregs->rax = 0;
472	childregs->rsp = rsp;
473	if (rsp == ~0UL)
474		childregs->rsp = (unsigned long)childregs;
475
476	p->thread.rsp = (unsigned long) childregs;
477	p->thread.rsp0 = (unsigned long) (childregs+1);
478	p->thread.userrsp = me->thread.userrsp;
479
480	set_tsk_thread_flag(p, TIF_FORK);
481
482	p->thread.fs = me->thread.fs;
483	p->thread.gs = me->thread.gs;
484
485	asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
486	asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
487	asm("mov %%es,%0" : "=m" (p->thread.es));
488	asm("mov %%ds,%0" : "=m" (p->thread.ds));
489
490	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
491		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
492		if (!p->thread.io_bitmap_ptr) {
493			p->thread.io_bitmap_max = 0;
494			return -ENOMEM;
495		}
496		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
497				IO_BITMAP_BYTES);
498		set_tsk_thread_flag(p, TIF_IO_BITMAP);
499	}
500
501	/*
502	 * Set a new TLS for the child thread?
503	 */
504	if (clone_flags & CLONE_SETTLS) {
505#ifdef CONFIG_IA32_EMULATION
506		if (test_thread_flag(TIF_IA32))
507			err = ia32_child_tls(p, childregs);
508		else
509#endif
510			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
511		if (err)
512			goto out;
513	}
514	err = 0;
515out:
516	if (err && p->thread.io_bitmap_ptr) {
517		kfree(p->thread.io_bitmap_ptr);
518		p->thread.io_bitmap_max = 0;
519	}
520	return err;
521}
522
523/*
524 * This special macro can be used to load a debugging register
525 */
526#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
527
528static inline void __switch_to_xtra(struct task_struct *prev_p,
529			     	    struct task_struct *next_p,
530			     	    struct tss_struct *tss)
531{
532	struct thread_struct *prev, *next;
533
534	prev = &prev_p->thread,
535	next = &next_p->thread;
536
537	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
538		loaddebug(next, 0);
539		loaddebug(next, 1);
540		loaddebug(next, 2);
541		loaddebug(next, 3);
542		/* no 4 and 5 */
543		loaddebug(next, 6);
544		loaddebug(next, 7);
545	}
546
547	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
548		/*
549		 * Copy the relevant range of the IO bitmap.
550		 * Normally this is 128 bytes or less:
551		 */
552		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
553		       max(prev->io_bitmap_max, next->io_bitmap_max));
554	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
555		/*
556		 * Clear any possible leftover bits:
557		 */
558		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
559	}
560}
561
562/*
563 *	switch_to(x,y) should switch tasks from x to y.
564 *
565 * This could still be optimized:
566 * - fold all the options into a flag word and test it with a single test.
567 * - could test fs/gs bitsliced
568 *
569 * Kprobes not supported here. Set the probe on schedule instead.
570 */
571__kprobes struct task_struct *
572__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
573{
574	struct thread_struct *prev = &prev_p->thread,
575				 *next = &next_p->thread;
576	int cpu = smp_processor_id();
577	struct tss_struct *tss = &per_cpu(init_tss, cpu);
578
579	/* we're going to use this soon, after a few expensive things */
580	if (next_p->fpu_counter>5)
581		prefetch(&next->i387.fxsave);
582
583	/*
584	 * Reload esp0, LDT and the page table pointer:
585	 */
586	tss->rsp0 = next->rsp0;
587
588	/*
589	 * Switch DS and ES.
590	 * This won't pick up thread selector changes, but I guess that is ok.
591	 */
592	asm volatile("mov %%es,%0" : "=m" (prev->es));
593	if (unlikely(next->es | prev->es))
594		loadsegment(es, next->es);
595
596	asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
597	if (unlikely(next->ds | prev->ds))
598		loadsegment(ds, next->ds);
599
600	load_TLS(next, cpu);
601
602	/*
603	 * Switch FS and GS.
604	 */
605	{
606		unsigned fsindex;
607		asm volatile("movl %%fs,%0" : "=r" (fsindex));
608		/* segment register != 0 always requires a reload.
609		   also reload when it has changed.
610		   when prev process used 64bit base always reload
611		   to avoid an information leak. */
612		if (unlikely(fsindex | next->fsindex | prev->fs)) {
613			loadsegment(fs, next->fsindex);
614			/* check if the user used a selector != 0
615	                 * if yes clear 64bit base, since overloaded base
616                         * is always mapped to the Null selector
617                         */
618			if (fsindex)
619			prev->fs = 0;
620		}
621		/* when next process has a 64bit base use it */
622		if (next->fs)
623			wrmsrl(MSR_FS_BASE, next->fs);
624		prev->fsindex = fsindex;
625	}
626	{
627		unsigned gsindex;
628		asm volatile("movl %%gs,%0" : "=r" (gsindex));
629		if (unlikely(gsindex | next->gsindex | prev->gs)) {
630			load_gs_index(next->gsindex);
631			if (gsindex)
632			prev->gs = 0;
633		}
634		if (next->gs)
635			wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
636		prev->gsindex = gsindex;
637	}
638
639	/* Must be after DS reload */
640	unlazy_fpu(prev_p);
641
642	/*
643	 * Switch the PDA and FPU contexts.
644	 */
645	prev->userrsp = read_pda(oldrsp);
646	write_pda(oldrsp, next->userrsp);
647	write_pda(pcurrent, next_p);
648
649	write_pda(kernelstack,
650	(unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
651#ifdef CONFIG_CC_STACKPROTECTOR
652	write_pda(stack_canary, next_p->stack_canary);
653	/*
654	 * Build time only check to make sure the stack_canary is at
655	 * offset 40 in the pda; this is a gcc ABI requirement
656	 */
657	BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
658#endif
659
660	/*
661	 * Now maybe reload the debug registers and handle I/O bitmaps
662	 */
663	if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
664	    || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
665		__switch_to_xtra(prev_p, next_p, tss);
666
667	/* If the task has used fpu the last 5 timeslices, just do a full
668	 * restore of the math state immediately to avoid the trap; the
669	 * chances of needing FPU soon are obviously high now
670	 */
671	if (next_p->fpu_counter>5)
672		math_state_restore();
673	return prev_p;
674}
675
676/*
677 * sys_execve() executes a new program.
678 */
679asmlinkage
680long sys_execve(char __user *name, char __user * __user *argv,
681		char __user * __user *envp, struct pt_regs regs)
682{
683	long error;
684	char * filename;
685
686	filename = getname(name);
687	error = PTR_ERR(filename);
688	if (IS_ERR(filename))
689		return error;
690	error = do_execve(filename, argv, envp, &regs);
691	if (error == 0) {
692		task_lock(current);
693		current->ptrace &= ~PT_DTRACE;
694		task_unlock(current);
695	}
696	putname(filename);
697	return error;
698}
699
700void set_personality_64bit(void)
701{
702	/* inherit personality from parent */
703
704	/* Make sure to be in 64bit mode */
705	clear_thread_flag(TIF_IA32);
706
707	/* TBD: overwrites user setup. Should have two bits.
708	   But 64bit processes have always behaved this way,
709	   so it's not too bad. The main problem is just that
710   	   32bit childs are affected again. */
711	current->personality &= ~READ_IMPLIES_EXEC;
712}
713
714asmlinkage long sys_fork(struct pt_regs *regs)
715{
716	return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
717}
718
719asmlinkage long
720sys_clone(unsigned long clone_flags, unsigned long newsp,
721	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
722{
723	if (!newsp)
724		newsp = regs->rsp;
725	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
726}
727
728/*
729 * This is trivial, and on the face of it looks like it
730 * could equally well be done in user mode.
731 *
732 * Not so, for quite unobvious reasons - register pressure.
733 * In user mode vfork() cannot have a stack frame, and if
734 * done by calling the "clone()" system call directly, you
735 * do not have enough call-clobbered registers to hold all
736 * the information you need.
737 */
738asmlinkage long sys_vfork(struct pt_regs *regs)
739{
740	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
741		    NULL, NULL);
742}
743
744unsigned long get_wchan(struct task_struct *p)
745{
746	unsigned long stack;
747	u64 fp,rip;
748	int count = 0;
749
750	if (!p || p == current || p->state==TASK_RUNNING)
751		return 0;
752	stack = (unsigned long)task_stack_page(p);
753	if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
754		return 0;
755	fp = *(u64 *)(p->thread.rsp);
756	do {
757		if (fp < (unsigned long)stack ||
758		    fp > (unsigned long)stack+THREAD_SIZE)
759			return 0;
760		rip = *(u64 *)(fp+8);
761		if (!in_sched_functions(rip))
762			return rip;
763		fp = *(u64 *)fp;
764	} while (count++ < 16);
765	return 0;
766}
767
768long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
769{
770	int ret = 0;
771	int doit = task == current;
772	int cpu;
773
774	switch (code) {
775	case ARCH_SET_GS:
776		if (addr >= TASK_SIZE_OF(task))
777			return -EPERM;
778		cpu = get_cpu();
779		/* handle small bases via the GDT because that's faster to
780		   switch. */
781		if (addr <= 0xffffffff) {
782			set_32bit_tls(task, GS_TLS, addr);
783			if (doit) {
784				load_TLS(&task->thread, cpu);
785				load_gs_index(GS_TLS_SEL);
786			}
787			task->thread.gsindex = GS_TLS_SEL;
788			task->thread.gs = 0;
789		} else {
790			task->thread.gsindex = 0;
791			task->thread.gs = addr;
792			if (doit) {
793				load_gs_index(0);
794				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
795			}
796		}
797		put_cpu();
798		break;
799	case ARCH_SET_FS:
800		/* Not strictly needed for fs, but do it for symmetry
801		   with gs */
802		if (addr >= TASK_SIZE_OF(task))
803			return -EPERM;
804		cpu = get_cpu();
805		/* handle small bases via the GDT because that's faster to
806		   switch. */
807		if (addr <= 0xffffffff) {
808			set_32bit_tls(task, FS_TLS, addr);
809			if (doit) {
810				load_TLS(&task->thread, cpu);
811				asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
812			}
813			task->thread.fsindex = FS_TLS_SEL;
814			task->thread.fs = 0;
815		} else {
816			task->thread.fsindex = 0;
817			task->thread.fs = addr;
818			if (doit) {
819				/* set the selector to 0 to not confuse
820				   __switch_to */
821				asm volatile("movl %0,%%fs" :: "r" (0));
822				ret = checking_wrmsrl(MSR_FS_BASE, addr);
823			}
824		}
825		put_cpu();
826		break;
827	case ARCH_GET_FS: {
828		unsigned long base;
829		if (task->thread.fsindex == FS_TLS_SEL)
830			base = read_32bit_tls(task, FS_TLS);
831		else if (doit)
832			rdmsrl(MSR_FS_BASE, base);
833		else
834			base = task->thread.fs;
835		ret = put_user(base, (unsigned long __user *)addr);
836		break;
837	}
838	case ARCH_GET_GS: {
839		unsigned long base;
840		unsigned gsindex;
841		if (task->thread.gsindex == GS_TLS_SEL)
842			base = read_32bit_tls(task, GS_TLS);
843		else if (doit) {
844 			asm("movl %%gs,%0" : "=r" (gsindex));
845			if (gsindex)
846				rdmsrl(MSR_KERNEL_GS_BASE, base);
847			else
848				base = task->thread.gs;
849		}
850		else
851			base = task->thread.gs;
852		ret = put_user(base, (unsigned long __user *)addr);
853		break;
854	}
855
856	default:
857		ret = -EINVAL;
858		break;
859	}
860
861	return ret;
862}
863
864long sys_arch_prctl(int code, unsigned long addr)
865{
866	return do_arch_prctl(current, code, addr);
867}
868
869/*
870 * Capture the user space registers if the task is not running (in user space)
871 */
872int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
873{
874	struct pt_regs *pp, ptregs;
875
876	pp = task_pt_regs(tsk);
877
878	ptregs = *pp;
879	ptregs.cs &= 0xffff;
880	ptregs.ss &= 0xffff;
881
882	elf_core_copy_regs(regs, &ptregs);
883
884	return 1;
885}
886
887unsigned long arch_align_stack(unsigned long sp)
888{
889	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
890		sp -= get_random_int() % 8192;
891	return sp & ~0xf;
892}
893