1/*
2 *  linux/arch/x86_64/entry.S
3 *
4 *  Copyright (C) 1991, 1992  Linus Torvalds
5 *  Copyright (C) 2000, 2001, 2002  Andi Kleen SuSE Labs
6 *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
7 *
8 *  $Id: entry.S,v 1.1.1.1 2008/10/15 03:26:21 james26_jang Exp $
9 */
10
11/*
12 * entry.S contains the system-call and fault low-level handling routines.
13 *
14 * NOTE: This code handles signal-recognition, which happens every time
15 * after an interrupt and after each system call.
16 *
17 * Normal syscalls and interrupts don't save a full stack frame, this is
18 * only done for PT_TRACESYS, signals or fork/exec et.al.
19 *
20 * TODO:
21 * - schedule it carefully for the final hardware.
22 *
23 */
24
25#define ASSEMBLY 1
26#include <linux/config.h>
27#include <linux/linkage.h>
28#include <asm/segment.h>
29#include <asm/current.h>
30#include <asm/smp.h>
31#include <asm/cache.h>
32#include <asm/errno.h>
33#include <asm/calling.h>
34#include <asm/offset.h>
35#include <asm/msr.h>
36#include <asm/unistd.h>
37
38	.code64
39
40#define PDAREF(field) %gs:field
41
42/*
43 * C code is not supposed to know about partial frames. Everytime a C function
44 * that looks at the pt_regs is called these two macros are executed around it.
45 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
46 * manipulation.
47 */
48
49	/* %rsp:at FRAMEEND */
50	.macro FIXUP_TOP_OF_STACK tmp
51	movq	PDAREF(pda_oldrsp),\tmp
52	movq  	\tmp,RSP(%rsp)
53	movq    $__USER_DS,SS(%rsp)
54	movq    $__USER_CS,CS(%rsp)
55	movq	$-1,RCX(%rsp)	/* contains return address, already in RIP */
56	movq	R11(%rsp),\tmp  /* get eflags */
57	movq	\tmp,EFLAGS(%rsp)
58	.endm
59
60	.macro RESTORE_TOP_OF_STACK tmp,offset=0
61	movq   RSP-\offset(%rsp),\tmp
62	movq   \tmp,PDAREF(pda_oldrsp)
63	movq   EFLAGS-\offset(%rsp),\tmp
64	movq   \tmp,R11-\offset(%rsp)
65	.endm
66
67
68/*
69 * A newly forked process directly context switches into this.
70 */
71ENTRY(ret_from_fork)
72	movq %rax,%rdi		/* return value of __switch_to -> prev task */
73	call schedule_tail
74	GET_CURRENT(%rcx)
75	testb $PT_TRACESYS,tsk_ptrace(%rcx)
76	jnz 2f
771:
78	RESTORE_REST
79	testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
80	jz   int_ret_from_sys_call
81	testl $ASM_THREAD_IA32,tsk_thread+thread_flags(%rcx)
82	jnz  int_ret_from_sys_call
83	RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
84	jmp ret_from_sys_call
852:
86	movq %rsp,%rdi
87	call syscall_trace
88	jmp 1b
89
90/*
91 * System call entry. Upto 6 arguments in registers are supported.
92 *
93 * SYSCALL does not save anything on the stack and does not change the
94 * stack pointer. Gets the per CPU area from the hidden GS MSR and finds the
95 * current kernel stack.
96 */
97
98/*
99 * Register setup:
100 * rax  system call number
101 * rdi  arg0
102 * rcx  return address for syscall/sysret, C arg3
103 * rsi  arg1
104 * rdx  arg2
105 * r10  arg3 	(--> moved to rcx for C)
106 * r8   arg4
107 * r9   arg5
108 * r11  eflags for syscall/sysret, temporary for C
109 * r12-r15,rbp,rbx saved by C code, not touched.
110 *
111 * Interrupts are off on entry.
112 * Only called from user space.
113 */
114
115ENTRY(system_call)
116	swapgs
117	movq	%rsp,PDAREF(pda_oldrsp)
118	movq	PDAREF(pda_kernelstack),%rsp
119	sti
120	SAVE_ARGS 8,1
121	movq  %rax,ORIG_RAX-ARGOFFSET(%rsp)
122	movq  %rcx,RIP-ARGOFFSET(%rsp)
123	GET_CURRENT(%rcx)
124	testl $PT_TRACESYS,tsk_ptrace(%rcx)
125	jne tracesys
126	cmpq $__NR_syscall_max,%rax
127	ja badsys
128	movq %r10,%rcx
129	call *sys_call_table(,%rax,8)  # XXX:	 rip relative
130	movq %rax,RAX-ARGOFFSET(%rsp)
131	.globl ret_from_sys_call
132ret_from_sys_call:
133sysret_with_reschedule:
134	GET_CURRENT(%rcx)
135	cli
136	cmpq $0,tsk_need_resched(%rcx)
137	jne sysret_reschedule
138	cmpl $0,tsk_sigpending(%rcx)
139	jne sysret_signal
140sysret_restore_args:
141	movq    RIP-ARGOFFSET(%rsp),%rcx
142	RESTORE_ARGS 0,-ARG_SKIP,1
143	movq	PDAREF(pda_oldrsp),%rsp
144	swapgs
145	sysretq
146
147sysret_signal:
148	sti
149	xorl %esi,%esi		# oldset
150	leaq -ARGOFFSET(%rsp),%rdi	# regs
151	leaq do_signal(%rip),%rax
152	call ptregscall_common
153sysret_signal_test:
154	GET_CURRENT(%rcx)
155	cli
156	cmpq $0,tsk_need_resched(%rcx)
157	je   sysret_restore_args
158	sti
159	call schedule
160	jmp sysret_signal_test
161
162sysret_reschedule:
163	sti
164	call schedule
165	jmp sysret_with_reschedule
166
167tracesys:
168	SAVE_REST
169	movq $-ENOSYS,RAX(%rsp)
170	FIXUP_TOP_OF_STACK %rdi
171	movq %rsp,%rdi
172	call syscall_trace
173	LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
174	RESTORE_REST
175	cmpq $__NR_syscall_max,%rax
176	ja  tracesys_done
177tracesys_call:		/* backtrace marker */
178	movq %r10,%rcx	/* fixup for C */
179	call *sys_call_table(,%rax,8)
180	movq %rax,RAX-ARGOFFSET(%rsp)
181tracesys_done:		/* backtrace marker */
182	SAVE_REST
183	movq %rsp,%rdi
184	call syscall_trace
185	RESTORE_TOP_OF_STACK %rbx
186	RESTORE_REST
187	jmp ret_from_sys_call
188
189badsys:
190	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
191	jmp ret_from_sys_call
192
193/*
194 * Syscall return path ending with IRET.
195 * This can be either 64bit calls that require restoring of all registers
196 * (impossible with sysret) or 32bit calls.
197 */
198ENTRY(int_ret_from_sys_call)
199intret_test_kernel:
200	testl $3,CS-ARGOFFSET(%rsp)
201	je retint_restore_args
202intret_with_reschedule:
203	GET_CURRENT(%rcx)
204	cli
205	cmpq $0,tsk_need_resched(%rcx)
206	jne intret_reschedule
207	cmpl $0,tsk_sigpending(%rcx)
208	jne intret_signal
209	jmp retint_restore_args_swapgs
210
211intret_reschedule:
212	sti
213	call schedule
214	jmp intret_with_reschedule
215
216intret_signal:
217	sti
218	SAVE_REST
219	xorq %rsi,%rsi		# oldset -> arg2
220	movq %rsp,%rdi		# &ptregs -> arg1
221	call do_signal
222	RESTORE_REST
223intret_signal_test:
224	GET_CURRENT(%rcx)
225	cli
226	cmpq $0,tsk_need_resched(%rcx)
227	je   retint_restore_args_swapgs
228	sti
229	call schedule
230	jmp  intret_signal_test
231
232/*
233 * Certain special system calls that need to save a complete stack frame.
234 */
235
236	.macro PTREGSCALL label,func
237	.globl \label
238\label:
239	leaq	\func(%rip),%rax
240	jmp	ptregscall_common
241	.endm
242
243	PTREGSCALL stub_clone, sys_clone
244	PTREGSCALL stub_fork, sys_fork
245	PTREGSCALL stub_vfork, sys_vfork
246	PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend
247	PTREGSCALL stub_sigaltstack, sys_sigaltstack
248	PTREGSCALL stub_iopl, sys_iopl
249
250ENTRY(ptregscall_common)
251	popq %r11
252	SAVE_REST
253	movq %r11, %r15
254	FIXUP_TOP_OF_STACK %r11
255	call *%rax
256	RESTORE_TOP_OF_STACK %r11
257	movq %r15, %r11
258	RESTORE_REST
259	pushq %r11
260	ret
261
262ENTRY(stub_execve)
263	popq %r11
264	SAVE_REST
265	movq %r11, %r15
266	FIXUP_TOP_OF_STACK %r11
267	call sys_execve
268	GET_CURRENT(%rcx)
269	testl $ASM_THREAD_IA32,tsk_thread+thread_flags(%rcx)
270	jnz exec_32bit
271	RESTORE_TOP_OF_STACK %r11
272	movq %r15, %r11
273	RESTORE_REST
274	push %r11
275	ret
276
277exec_32bit:
278	movq %rax,RAX(%rsp)
279	RESTORE_REST
280	jmp int_ret_from_sys_call
281
282/*
283 * sigreturn is special because it needs to restore all registers on return.
284 * This cannot be done with SYSRET, so use the IRET return path instead.
285 */
286ENTRY(stub_rt_sigreturn)
287	addq $8, %rsp
288	SAVE_REST
289	FIXUP_TOP_OF_STACK %r11
290	call sys_rt_sigreturn
291	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
292	RESTORE_REST
293	jmp int_ret_from_sys_call
294
295/*
296 * Interrupt entry/exit.
297 *
298 * Interrupt entry points save only callee clobbered registers, except
299 * for signals again.
300 *
301 * Entry runs with interrupts off.
302 */
303
304/* 0(%rsp): interrupt number */
305ENTRY(common_interrupt)
306	testl $3,16(%rsp)	# from kernel?
307	je   1f
308	swapgs
3091:	cld
310	SAVE_ARGS
311	leaq -ARGOFFSET(%rsp),%rdi	# arg1 for handler
312	addl $1,PDAREF(pda_irqcount)	# XXX: should be merged with irq.c irqcount
313	movq PDAREF(pda_irqstackptr),%rax
314	cmoveq %rax,%rsp
315	pushq %rdi			# save old stack
316	call do_IRQ
317	/* 0(%rsp): oldrsp-ARGOFFSET */
318ENTRY(ret_from_intr)
319	cli
320	popq  %rdi
321	subl $1,PDAREF(pda_irqcount)
322	leaq ARGOFFSET(%rdi),%rsp
323	testl $3,CS(%rdi)	# from kernel?
324	je	retint_restore_args
325	/* Interrupt came from user space */
326retint_with_reschedule:
327	GET_CURRENT(%rcx)
328	cmpq $0,tsk_need_resched(%rcx)
329	jne retint_reschedule
330	cmpl $0,tsk_sigpending(%rcx)
331	jne retint_signal
332retint_restore_args_swapgs:
333	swapgs
334retint_restore_args:
335	RESTORE_ARGS 0,8
336iret_label:
337	iretq
338	.section __ex_table,"a"
339	.align 8
340	.quad iret_label,bad_iret
341	.previous
342	.section .fixup,"ax"
343	/* force a signal here? this matches i386 behaviour */
344bad_iret:
345	movq $-9999,%rdi	/* better code? */
346	jmp do_exit
347	.previous
348
349retint_signal:
350	sti
351	SAVE_REST
352	movq $-1,ORIG_RAX(%rsp)
353	xorq %rsi,%rsi		# oldset
354	movq %rsp,%rdi		# &pt_regs
355	call do_signal
356	RESTORE_REST
357retint_signal_test:
358	cli
359	GET_CURRENT(%rcx)
360	cmpq $0,tsk_need_resched(%rcx)
361	je   retint_restore_args_swapgs
362	sti
363	call schedule
364	jmp retint_signal_test
365
366retint_reschedule:
367	sti
368	call schedule
369	cli
370	jmp retint_with_reschedule
371
372/*
373 * Exception entry points.
374 */
375	.macro zeroentry sym
376	pushq $0	/* push error code/oldrax */
377	pushq %rax	/* push real oldrax to the rdi slot */
378	leaq  \sym(%rip),%rax
379	jmp error_entry
380	.endm
381
382	.macro errorentry sym
383	pushq %rax
384	leaq  \sym(%rip),%rax
385	jmp error_entry
386	.endm
387
388/*
389 * Exception entry point. This expects an error code/orig_rax on the stack
390 * and the exception handler in %rax.
391 */
392 	ALIGN
393error_entry:
394	/* rdi slot contains rax, oldrax contains error code */
395	pushq %rsi
396	movq  8(%rsp),%rsi	/* load rax */
397	pushq %rdx
398	pushq %rcx
399	pushq %rsi	/* store rax */
400	pushq %r8
401	pushq %r9
402	pushq %r10
403	pushq %r11
404	cld
405	SAVE_REST
406	testl $3,CS(%rsp)
407	je error_kernelspace
408	swapgs
409	movl $1,%r15d
410error_action:
411	sti
412	movq  %rdi,RDI(%rsp)
413	movq %rsp,%rdi
414	movq ORIG_RAX(%rsp),%rsi	/* get error code */
415	movq $-1,ORIG_RAX(%rsp)
416	call *%rax
417	/* r15d: swapgs flag */
418error_exit:
419	testl %r15d,%r15d
420	jz   error_restore
421error_test:
422	cli
423	GET_CURRENT(%rcx)
424	cmpq $0,tsk_need_resched(%rcx)
425	jne  error_reschedule
426	cmpl $0,tsk_sigpending(%rcx)
427	jne  error_signal
428error_restore_swapgs:
429	swapgs
430error_restore:
431	RESTORE_REST
432	jmp retint_restore_args
433
434error_reschedule:
435	sti
436	call schedule
437	jmp  error_test
438
439error_signal:
440	sti
441	xorq %rsi,%rsi
442	movq %rsp,%rdi
443	call do_signal
444error_signal_test:
445	GET_CURRENT(%rcx)
446	cli
447	cmpq $0,tsk_need_resched(%rcx)
448	je   error_restore_swapgs
449	sti
450	call schedule
451	jmp  error_signal_test
452
453error_kernelspace:
454	xorl %r15d,%r15d
455	cmpq $iret_label,RIP(%rsp)
456	jne  error_action
457	movl $1,%r15d
458	swapgs
459	jmp error_action
460
461/*
462 * Create a kernel thread.
463 *
464 * C extern interface:
465 *	extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
466 *
467 * asm input arguments:
468 *	rdi: fn, rsi: arg, rdx: flags
469 */
470ENTRY(kernel_thread)
471	FAKE_STACK_FRAME $child_rip
472	SAVE_ALL
473
474	# rdi: flags, rsi: usp, rdx: will be &pt_regs
475	movq %rdx,%rdi
476	orq  $CLONE_VM, %rdi
477
478	movq $-1, %rsi
479
480	movq %rsp, %rdx
481
482	# clone now
483	call do_fork
484	# save retval on the stack so it's popped before `ret`
485	movq %rax, RAX(%rsp)
486
487	/*
488	 * It isn't worth to check for reschedule here,
489	 * so internally to the x86_64 port you can rely on kernel_thread()
490	 * not to reschedule the child before returning, this avoids the need
491	 * of hacks for example to fork off the per-CPU idle tasks.
492         * [Hopefully no generic code relies on the reschedule -AK]
493	 */
494	RESTORE_ALL
495	UNFAKE_STACK_FRAME
496	ret
497
498child_rip:
499	/*
500	 * Here we are in the child and the registers are set as they were
501	 * at kernel_thread() invocation in the parent.
502	 */
503	movq %rdi, %rax
504	movq %rsi, %rdi
505	call *%rax
506	# exit
507	xorq %rdi, %rdi
508	call do_exit
509
510/*
511 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
512 *
513 * C extern interface:
514 *	 extern long execve(char *name, char **argv, char **envp)
515 *
516 * asm input arguments:
517 *	rdi: name, rsi: argv, rdx: envp
518 *
519 * We want to fallback into:
520 *	extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
521 *
522 * do_sys_execve asm fallback arguments:
523 *	rdi: name, rsi: argv, rdx: envp, fake frame on the stack
524 */
525ENTRY(execve)
526	FAKE_STACK_FRAME $0
527	SAVE_ALL
528	call sys_execve
529	movq %rax, RAX(%rsp)
530	RESTORE_REST
531	testq %rax,%rax
532	je int_ret_from_sys_call
533	RESTORE_ARGS
534	UNFAKE_STACK_FRAME
535	ret
536
537ENTRY(page_fault)
538#ifdef CONFIG_KDB
539	pushq %rcx
540	pushq %rdx
541	pushq %rax
542	movl  $473,%ecx
543	rdmsr
544	andl  $0xfffffffe,%eax		/* Disable last branch recording */
545	wrmsr
546	popq  %rax
547	popq  %rdx
548	popq  %rcx
549#endif
550	errorentry do_page_fault
551
552ENTRY(coprocessor_error)
553	zeroentry do_coprocessor_error
554
555ENTRY(simd_coprocessor_error)
556	zeroentry do_simd_coprocessor_error
557
558ENTRY(device_not_available)
559	pushq $-1
560	SAVE_ALL
561	xorl %r15d,%r15d
562	testl $3,CS(%rsp)
563	jz 1f
564	swapgs
565	movl $1,%r15d
5661:
567	movq  %cr0,%rax
568	leaq  math_state_restore(%rip),%rcx
569	leaq  math_emulate(%rip),%rbx
570	testl $0x4,%eax
571	cmoveq %rcx,%rbx
572	call  *%rbx
573	jmp  error_exit
574
575ENTRY(debug)
576	zeroentry do_debug
577
578ENTRY(nmi)
579	pushq $-1
580	SAVE_ALL
581	/* NMI could happen inside the critical section of a swapgs,
582	   so it is needed to use this expensive way to check.
583	   Rely on arch_prctl forbiding user space from setting a negative
584	   GS. Only the kernel value is negative. */
585	movl  $MSR_GS_BASE,%ecx
586	rdmsr
587	xorl  %ebx,%ebx
588	testl %edx,%edx
589	js    1f
590	swapgs
591	movl  $1,%ebx
5921:	movq %rsp,%rdi
593	call do_nmi
594	cli
595	testl %ebx,%ebx
596	jz error_restore
597	swapgs
598	jmp error_restore
599
600ENTRY(int3)
601	zeroentry do_int3
602
603ENTRY(overflow)
604	zeroentry do_overflow
605
606ENTRY(bounds)
607	zeroentry do_bounds
608
609ENTRY(invalid_op)
610	zeroentry do_invalid_op
611
612ENTRY(coprocessor_segment_overrun)
613	zeroentry do_coprocessor_segment_overrun
614
615ENTRY(reserved)
616	zeroentry do_reserved
617
618ENTRY(double_fault)
619	errorentry do_double_fault
620
621ENTRY(invalid_TSS)
622	errorentry do_invalid_TSS
623
624ENTRY(segment_not_present)
625	errorentry do_segment_not_present
626
627ENTRY(stack_segment)
628	errorentry do_stack_segment
629
630ENTRY(general_protection)
631	errorentry do_general_protection
632
633ENTRY(alignment_check)
634	errorentry do_alignment_check
635
636ENTRY(divide_error)
637	zeroentry do_divide_error
638
639ENTRY(spurious_interrupt_bug)
640	zeroentry do_spurious_interrupt_bug
641
642ENTRY(machine_check)
643	zeroentry do_machine_check
644
645ENTRY(call_debug)
646	zeroentry do_call_debug
647
648