• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-R7000-V1.0.7.12_1.2.5/components/opensource/linux/linux-2.6.36/arch/x86/kernel/
1/*
2 *  Copyright (C) 1995  Linus Torvalds
3 *
4 *  Pentium III FXSR, SSE support
5 *	Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 *  X86-64 port
8 *	Andi Kleen.
9 *
10 *	CPU hotplug support - ashok.raj@intel.com
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17#include <linux/stackprotector.h>
18#include <linux/cpu.h>
19#include <linux/errno.h>
20#include <linux/sched.h>
21#include <linux/fs.h>
22#include <linux/kernel.h>
23#include <linux/mm.h>
24#include <linux/elfcore.h>
25#include <linux/smp.h>
26#include <linux/slab.h>
27#include <linux/user.h>
28#include <linux/interrupt.h>
29#include <linux/delay.h>
30#include <linux/module.h>
31#include <linux/ptrace.h>
32#include <linux/notifier.h>
33#include <linux/kprobes.h>
34#include <linux/kdebug.h>
35#include <linux/tick.h>
36#include <linux/prctl.h>
37#include <linux/uaccess.h>
38#include <linux/io.h>
39#include <linux/ftrace.h>
40
41#include <asm/pgtable.h>
42#include <asm/system.h>
43#include <asm/processor.h>
44#include <asm/i387.h>
45#include <asm/mmu_context.h>
46#include <asm/prctl.h>
47#include <asm/desc.h>
48#include <asm/proto.h>
49#include <asm/ia32.h>
50#include <asm/idle.h>
51#include <asm/syscalls.h>
52#include <asm/debugreg.h>
53
54#include <trace/events/power.h>
55
56asmlinkage extern void ret_from_fork(void);
57
58DEFINE_PER_CPU(unsigned long, old_rsp);
59static DEFINE_PER_CPU(unsigned char, is_idle);
60
61static ATOMIC_NOTIFIER_HEAD(idle_notifier);
62
63void idle_notifier_register(struct notifier_block *n)
64{
65	atomic_notifier_chain_register(&idle_notifier, n);
66}
67EXPORT_SYMBOL_GPL(idle_notifier_register);
68
69void idle_notifier_unregister(struct notifier_block *n)
70{
71	atomic_notifier_chain_unregister(&idle_notifier, n);
72}
73EXPORT_SYMBOL_GPL(idle_notifier_unregister);
74
75void enter_idle(void)
76{
77	percpu_write(is_idle, 1);
78	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
79}
80
81static void __exit_idle(void)
82{
83	if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
84		return;
85	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
86}
87
88/* Called from interrupts to signify idle end */
89void exit_idle(void)
90{
91	/* idle loop has pid 0 */
92	if (current->pid)
93		return;
94	__exit_idle();
95}
96
97#ifndef CONFIG_SMP
98static inline void play_dead(void)
99{
100	BUG();
101}
102#endif
103
104/*
105 * The idle thread. There's no useful work to be
106 * done, so just try to conserve power and have a
107 * low exit latency (ie sit in a loop waiting for
108 * somebody to say that they'd like to reschedule)
109 */
110void cpu_idle(void)
111{
112	current_thread_info()->status |= TS_POLLING;
113
114	/*
115	 * If we're the non-boot CPU, nothing set the stack canary up
116	 * for us.  CPU0 already has it initialized but no harm in
117	 * doing it again.  This is a good place for updating it, as
118	 * we wont ever return from this function (so the invalid
119	 * canaries already on the stack wont ever trigger).
120	 */
121	boot_init_stack_canary();
122
123	/* endless idle loop with no priority at all */
124	while (1) {
125		tick_nohz_stop_sched_tick(1);
126		while (!need_resched()) {
127
128			rmb();
129
130			if (cpu_is_offline(smp_processor_id()))
131				play_dead();
132			/*
133			 * Idle routines should keep interrupts disabled
134			 * from here on, until they go to idle.
135			 * Otherwise, idle callbacks can misfire.
136			 */
137			local_irq_disable();
138			enter_idle();
139			/* Don't trace irqs off for idle */
140			stop_critical_timings();
141			pm_idle();
142			start_critical_timings();
143
144			trace_power_end(smp_processor_id());
145
146			/* In many cases the interrupt that ended idle
147			   has already called exit_idle. But some idle
148			   loops can be woken up without interrupt. */
149			__exit_idle();
150		}
151
152		tick_nohz_restart_sched_tick();
153		preempt_enable_no_resched();
154		schedule();
155		preempt_disable();
156	}
157}
158
159/* Prints also some state that isn't saved in the pt_regs */
160void __show_regs(struct pt_regs *regs, int all)
161{
162	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
163	unsigned long d0, d1, d2, d3, d6, d7;
164	unsigned int fsindex, gsindex;
165	unsigned int ds, cs, es;
166
167	show_regs_common();
168	printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
169	printk_address(regs->ip, 1);
170	printk(KERN_DEFAULT "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
171			regs->sp, regs->flags);
172	printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
173	       regs->ax, regs->bx, regs->cx);
174	printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
175	       regs->dx, regs->si, regs->di);
176	printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
177	       regs->bp, regs->r8, regs->r9);
178	printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
179	       regs->r10, regs->r11, regs->r12);
180	printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
181	       regs->r13, regs->r14, regs->r15);
182
183	asm("movl %%ds,%0" : "=r" (ds));
184	asm("movl %%cs,%0" : "=r" (cs));
185	asm("movl %%es,%0" : "=r" (es));
186	asm("movl %%fs,%0" : "=r" (fsindex));
187	asm("movl %%gs,%0" : "=r" (gsindex));
188
189	rdmsrl(MSR_FS_BASE, fs);
190	rdmsrl(MSR_GS_BASE, gs);
191	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
192
193	if (!all)
194		return;
195
196	cr0 = read_cr0();
197	cr2 = read_cr2();
198	cr3 = read_cr3();
199	cr4 = read_cr4();
200
201	printk(KERN_DEFAULT "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
202	       fs, fsindex, gs, gsindex, shadowgs);
203	printk(KERN_DEFAULT "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
204			es, cr0);
205	printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
206			cr4);
207
208	get_debugreg(d0, 0);
209	get_debugreg(d1, 1);
210	get_debugreg(d2, 2);
211	printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
212	get_debugreg(d3, 3);
213	get_debugreg(d6, 6);
214	get_debugreg(d7, 7);
215	printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
216}
217
218void release_thread(struct task_struct *dead_task)
219{
220	if (dead_task->mm) {
221		if (dead_task->mm->context.size) {
222			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
223					dead_task->comm,
224					dead_task->mm->context.ldt,
225					dead_task->mm->context.size);
226			BUG();
227		}
228	}
229}
230
231static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
232{
233	struct user_desc ud = {
234		.base_addr = addr,
235		.limit = 0xfffff,
236		.seg_32bit = 1,
237		.limit_in_pages = 1,
238		.useable = 1,
239	};
240	struct desc_struct *desc = t->thread.tls_array;
241	desc += tls;
242	fill_ldt(desc, &ud);
243}
244
245static inline u32 read_32bit_tls(struct task_struct *t, int tls)
246{
247	return get_desc_base(&t->thread.tls_array[tls]);
248}
249
250/*
251 * This gets called before we allocate a new thread and copy
252 * the current task into it.
253 */
254void prepare_to_copy(struct task_struct *tsk)
255{
256	unlazy_fpu(tsk);
257}
258
259int copy_thread(unsigned long clone_flags, unsigned long sp,
260		unsigned long unused,
261	struct task_struct *p, struct pt_regs *regs)
262{
263	int err;
264	struct pt_regs *childregs;
265	struct task_struct *me = current;
266
267	childregs = ((struct pt_regs *)
268			(THREAD_SIZE + task_stack_page(p))) - 1;
269	*childregs = *regs;
270
271	childregs->ax = 0;
272	if (user_mode(regs))
273		childregs->sp = sp;
274	else
275		childregs->sp = (unsigned long)childregs;
276
277	p->thread.sp = (unsigned long) childregs;
278	p->thread.sp0 = (unsigned long) (childregs+1);
279	p->thread.usersp = me->thread.usersp;
280
281	set_tsk_thread_flag(p, TIF_FORK);
282
283	p->thread.io_bitmap_ptr = NULL;
284
285	savesegment(gs, p->thread.gsindex);
286	p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs;
287	savesegment(fs, p->thread.fsindex);
288	p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
289	savesegment(es, p->thread.es);
290	savesegment(ds, p->thread.ds);
291
292	err = -ENOMEM;
293	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
294
295	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
296		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
297		if (!p->thread.io_bitmap_ptr) {
298			p->thread.io_bitmap_max = 0;
299			return -ENOMEM;
300		}
301		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
302				IO_BITMAP_BYTES);
303		set_tsk_thread_flag(p, TIF_IO_BITMAP);
304	}
305
306	/*
307	 * Set a new TLS for the child thread?
308	 */
309	if (clone_flags & CLONE_SETTLS) {
310#ifdef CONFIG_IA32_EMULATION
311		if (test_thread_flag(TIF_IA32))
312			err = do_set_thread_area(p, -1,
313				(struct user_desc __user *)childregs->si, 0);
314		else
315#endif
316			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
317		if (err)
318			goto out;
319	}
320	err = 0;
321out:
322	if (err && p->thread.io_bitmap_ptr) {
323		kfree(p->thread.io_bitmap_ptr);
324		p->thread.io_bitmap_max = 0;
325	}
326
327	return err;
328}
329
330static void
331start_thread_common(struct pt_regs *regs, unsigned long new_ip,
332		    unsigned long new_sp,
333		    unsigned int _cs, unsigned int _ss, unsigned int _ds)
334{
335	loadsegment(fs, 0);
336	loadsegment(es, _ds);
337	loadsegment(ds, _ds);
338	load_gs_index(0);
339	regs->ip		= new_ip;
340	regs->sp		= new_sp;
341	percpu_write(old_rsp, new_sp);
342	regs->cs		= _cs;
343	regs->ss		= _ss;
344	regs->flags		= X86_EFLAGS_IF;
345	set_fs(USER_DS);
346	/*
347	 * Free the old FP and other extended state
348	 */
349	free_thread_xstate(current);
350}
351
352void
353start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
354{
355	start_thread_common(regs, new_ip, new_sp,
356			    __USER_CS, __USER_DS, 0);
357}
358
359#ifdef CONFIG_IA32_EMULATION
360void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
361{
362	start_thread_common(regs, new_ip, new_sp,
363			    __USER32_CS, __USER32_DS, __USER32_DS);
364}
365#endif
366
367/*
368 *	switch_to(x,y) should switch tasks from x to y.
369 *
370 * This could still be optimized:
371 * - fold all the options into a flag word and test it with a single test.
372 * - could test fs/gs bitsliced
373 *
374 * Kprobes not supported here. Set the probe on schedule instead.
375 * Function graph tracer not supported too.
376 */
377__notrace_funcgraph struct task_struct *
378__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
379{
380	struct thread_struct *prev = &prev_p->thread;
381	struct thread_struct *next = &next_p->thread;
382	int cpu = smp_processor_id();
383	struct tss_struct *tss = &per_cpu(init_tss, cpu);
384	unsigned fsindex, gsindex;
385	bool preload_fpu;
386
387	/*
388	 * If the task has used fpu the last 5 timeslices, just do a full
389	 * restore of the math state immediately to avoid the trap; the
390	 * chances of needing FPU soon are obviously high now
391	 */
392	preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
393
394	/* we're going to use this soon, after a few expensive things */
395	if (preload_fpu)
396		prefetch(next->fpu.state);
397
398	/*
399	 * Reload esp0, LDT and the page table pointer:
400	 */
401	load_sp0(tss, next);
402
403	/*
404	 * Switch DS and ES.
405	 * This won't pick up thread selector changes, but I guess that is ok.
406	 */
407	savesegment(es, prev->es);
408	if (unlikely(next->es | prev->es))
409		loadsegment(es, next->es);
410
411	savesegment(ds, prev->ds);
412	if (unlikely(next->ds | prev->ds))
413		loadsegment(ds, next->ds);
414
415
416	/* We must save %fs and %gs before load_TLS() because
417	 * %fs and %gs may be cleared by load_TLS().
418	 *
419	 * (e.g. xen_load_tls())
420	 */
421	savesegment(fs, fsindex);
422	savesegment(gs, gsindex);
423
424	load_TLS(next, cpu);
425
426	/* Must be after DS reload */
427	unlazy_fpu(prev_p);
428
429	/* Make sure cpu is ready for new context */
430	if (preload_fpu)
431		clts();
432
433	/*
434	 * Leave lazy mode, flushing any hypercalls made here.
435	 * This must be done before restoring TLS segments so
436	 * the GDT and LDT are properly updated, and must be
437	 * done before math_state_restore, so the TS bit is up
438	 * to date.
439	 */
440	arch_end_context_switch(next_p);
441
442	/*
443	 * Switch FS and GS.
444	 *
445	 * Segment register != 0 always requires a reload.  Also
446	 * reload when it has changed.  When prev process used 64bit
447	 * base always reload to avoid an information leak.
448	 */
449	if (unlikely(fsindex | next->fsindex | prev->fs)) {
450		loadsegment(fs, next->fsindex);
451		/*
452		 * Check if the user used a selector != 0; if yes
453		 *  clear 64bit base, since overloaded base is always
454		 *  mapped to the Null selector
455		 */
456		if (fsindex)
457			prev->fs = 0;
458	}
459	/* when next process has a 64bit base use it */
460	if (next->fs)
461		wrmsrl(MSR_FS_BASE, next->fs);
462	prev->fsindex = fsindex;
463
464	if (unlikely(gsindex | next->gsindex | prev->gs)) {
465		load_gs_index(next->gsindex);
466		if (gsindex)
467			prev->gs = 0;
468	}
469	if (next->gs)
470		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
471	prev->gsindex = gsindex;
472
473	/*
474	 * Switch the PDA and FPU contexts.
475	 */
476	prev->usersp = percpu_read(old_rsp);
477	percpu_write(old_rsp, next->usersp);
478	percpu_write(current_task, next_p);
479
480	percpu_write(kernel_stack,
481		  (unsigned long)task_stack_page(next_p) +
482		  THREAD_SIZE - KERNEL_STACK_OFFSET);
483
484	/*
485	 * Now maybe reload the debug registers and handle I/O bitmaps
486	 */
487	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
488		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
489		__switch_to_xtra(prev_p, next_p, tss);
490
491	/*
492	 * Preload the FPU context, now that we've determined that the
493	 * task is likely to be using it.
494	 */
495	if (preload_fpu)
496		__math_state_restore();
497
498	return prev_p;
499}
500
501void set_personality_64bit(void)
502{
503	/* inherit personality from parent */
504
505	/* Make sure to be in 64bit mode */
506	clear_thread_flag(TIF_IA32);
507
508	/* TBD: overwrites user setup. Should have two bits.
509	   But 64bit processes have always behaved this way,
510	   so it's not too bad. The main problem is just that
511	   32bit childs are affected again. */
512	current->personality &= ~READ_IMPLIES_EXEC;
513}
514
515void set_personality_ia32(void)
516{
517	/* inherit personality from parent */
518
519	/* Make sure to be in 32bit mode */
520	set_thread_flag(TIF_IA32);
521	current->personality |= force_personality32;
522
523	/* Prepare the first "return" to user space */
524	current_thread_info()->status |= TS_COMPAT;
525}
526
527unsigned long get_wchan(struct task_struct *p)
528{
529	unsigned long stack;
530	u64 fp, ip;
531	int count = 0;
532
533	if (!p || p == current || p->state == TASK_RUNNING)
534		return 0;
535	stack = (unsigned long)task_stack_page(p);
536	if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
537		return 0;
538	fp = *(u64 *)(p->thread.sp);
539	do {
540		if (fp < (unsigned long)stack ||
541		    fp >= (unsigned long)stack+THREAD_SIZE)
542			return 0;
543		ip = *(u64 *)(fp+8);
544		if (!in_sched_functions(ip))
545			return ip;
546		fp = *(u64 *)fp;
547	} while (count++ < 16);
548	return 0;
549}
550
551long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
552{
553	int ret = 0;
554	int doit = task == current;
555	int cpu;
556
557	switch (code) {
558	case ARCH_SET_GS:
559		if (addr >= TASK_SIZE_OF(task))
560			return -EPERM;
561		cpu = get_cpu();
562		/* handle small bases via the GDT because that's faster to
563		   switch. */
564		if (addr <= 0xffffffff) {
565			set_32bit_tls(task, GS_TLS, addr);
566			if (doit) {
567				load_TLS(&task->thread, cpu);
568				load_gs_index(GS_TLS_SEL);
569			}
570			task->thread.gsindex = GS_TLS_SEL;
571			task->thread.gs = 0;
572		} else {
573			task->thread.gsindex = 0;
574			task->thread.gs = addr;
575			if (doit) {
576				load_gs_index(0);
577				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
578			}
579		}
580		put_cpu();
581		break;
582	case ARCH_SET_FS:
583		/* Not strictly needed for fs, but do it for symmetry
584		   with gs */
585		if (addr >= TASK_SIZE_OF(task))
586			return -EPERM;
587		cpu = get_cpu();
588		/* handle small bases via the GDT because that's faster to
589		   switch. */
590		if (addr <= 0xffffffff) {
591			set_32bit_tls(task, FS_TLS, addr);
592			if (doit) {
593				load_TLS(&task->thread, cpu);
594				loadsegment(fs, FS_TLS_SEL);
595			}
596			task->thread.fsindex = FS_TLS_SEL;
597			task->thread.fs = 0;
598		} else {
599			task->thread.fsindex = 0;
600			task->thread.fs = addr;
601			if (doit) {
602				/* set the selector to 0 to not confuse
603				   __switch_to */
604				loadsegment(fs, 0);
605				ret = checking_wrmsrl(MSR_FS_BASE, addr);
606			}
607		}
608		put_cpu();
609		break;
610	case ARCH_GET_FS: {
611		unsigned long base;
612		if (task->thread.fsindex == FS_TLS_SEL)
613			base = read_32bit_tls(task, FS_TLS);
614		else if (doit)
615			rdmsrl(MSR_FS_BASE, base);
616		else
617			base = task->thread.fs;
618		ret = put_user(base, (unsigned long __user *)addr);
619		break;
620	}
621	case ARCH_GET_GS: {
622		unsigned long base;
623		unsigned gsindex;
624		if (task->thread.gsindex == GS_TLS_SEL)
625			base = read_32bit_tls(task, GS_TLS);
626		else if (doit) {
627			savesegment(gs, gsindex);
628			if (gsindex)
629				rdmsrl(MSR_KERNEL_GS_BASE, base);
630			else
631				base = task->thread.gs;
632		} else
633			base = task->thread.gs;
634		ret = put_user(base, (unsigned long __user *)addr);
635		break;
636	}
637
638	default:
639		ret = -EINVAL;
640		break;
641	}
642
643	return ret;
644}
645
646long sys_arch_prctl(int code, unsigned long addr)
647{
648	return do_arch_prctl(current, code, addr);
649}
650
651unsigned long KSTK_ESP(struct task_struct *task)
652{
653	return (test_tsk_thread_flag(task, TIF_IA32)) ?
654			(task_pt_regs(task)->sp) : ((task)->thread.usersp);
655}
656