1/*
2 *  linux/arch/x86_64/entry.S
3 *
4 *  Copyright (C) 1991, 1992  Linus Torvalds
5 *  Copyright (C) 2000, 2001, 2002  Andi Kleen SuSE Labs
6 *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
7 */
8
9/*
10 * entry.S contains the system-call and fault low-level handling routines.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
17 *
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
23 *
24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
38 */
39
40#include <linux/linkage.h>
41#include <asm/segment.h>
42#include <asm/cache.h>
43#include <asm/errno.h>
44#include <asm/dwarf2.h>
45#include <asm/calling.h>
46#include <asm/asm-offsets.h>
47#include <asm/msr.h>
48#include <asm/unistd.h>
49#include <asm/thread_info.h>
50#include <asm/hw_irq.h>
51#include <asm/page.h>
52#include <asm/irqflags.h>
53
54	.code64
55
56#ifndef CONFIG_PREEMPT
57#define retint_kernel retint_restore_args
58#endif
59
60
61.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
62#ifdef CONFIG_TRACE_IRQFLAGS
63	bt   $9,EFLAGS-\offset(%rsp)	/* interrupts off? */
64	jnc  1f
65	TRACE_IRQS_ON
661:
67#endif
68.endm
69
70/*
71 * C code is not supposed to know about undefined top of stack. Every time
72 * a C function with an pt_regs argument is called from the SYSCALL based
73 * fast path FIXUP_TOP_OF_STACK is needed.
74 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
75 * manipulation.
76 */
77
78	/* %rsp:at FRAMEEND */
79	.macro FIXUP_TOP_OF_STACK tmp
80	movq	%gs:pda_oldrsp,\tmp
81	movq  	\tmp,RSP(%rsp)
82	movq    $__USER_DS,SS(%rsp)
83	movq    $__USER_CS,CS(%rsp)
84	movq 	$-1,RCX(%rsp)
85	movq	R11(%rsp),\tmp  /* get eflags */
86	movq	\tmp,EFLAGS(%rsp)
87	.endm
88
89	.macro RESTORE_TOP_OF_STACK tmp,offset=0
90	movq   RSP-\offset(%rsp),\tmp
91	movq   \tmp,%gs:pda_oldrsp
92	movq   EFLAGS-\offset(%rsp),\tmp
93	movq   \tmp,R11-\offset(%rsp)
94	.endm
95
96	.macro FAKE_STACK_FRAME child_rip
97	/* push in order ss, rsp, eflags, cs, rip */
98	xorl %eax, %eax
99	pushq %rax /* ss */
100	CFI_ADJUST_CFA_OFFSET	8
101	/*CFI_REL_OFFSET	ss,0*/
102	pushq %rax /* rsp */
103	CFI_ADJUST_CFA_OFFSET	8
104	CFI_REL_OFFSET	rsp,0
105	pushq $(1<<9) /* eflags - interrupts on */
106	CFI_ADJUST_CFA_OFFSET	8
107	/*CFI_REL_OFFSET	rflags,0*/
108	pushq $__KERNEL_CS /* cs */
109	CFI_ADJUST_CFA_OFFSET	8
110	/*CFI_REL_OFFSET	cs,0*/
111	pushq \child_rip /* rip */
112	CFI_ADJUST_CFA_OFFSET	8
113	CFI_REL_OFFSET	rip,0
114	pushq	%rax /* orig rax */
115	CFI_ADJUST_CFA_OFFSET	8
116	.endm
117
118	.macro UNFAKE_STACK_FRAME
119	addq $8*6, %rsp
120	CFI_ADJUST_CFA_OFFSET	-(6*8)
121	.endm
122
123	.macro	CFI_DEFAULT_STACK start=1
124	.if \start
125	CFI_STARTPROC	simple
126	CFI_SIGNAL_FRAME
127	CFI_DEF_CFA	rsp,SS+8
128	.else
129	CFI_DEF_CFA_OFFSET SS+8
130	.endif
131	CFI_REL_OFFSET	r15,R15
132	CFI_REL_OFFSET	r14,R14
133	CFI_REL_OFFSET	r13,R13
134	CFI_REL_OFFSET	r12,R12
135	CFI_REL_OFFSET	rbp,RBP
136	CFI_REL_OFFSET	rbx,RBX
137	CFI_REL_OFFSET	r11,R11
138	CFI_REL_OFFSET	r10,R10
139	CFI_REL_OFFSET	r9,R9
140	CFI_REL_OFFSET	r8,R8
141	CFI_REL_OFFSET	rax,RAX
142	CFI_REL_OFFSET	rcx,RCX
143	CFI_REL_OFFSET	rdx,RDX
144	CFI_REL_OFFSET	rsi,RSI
145	CFI_REL_OFFSET	rdi,RDI
146	CFI_REL_OFFSET	rip,RIP
147	/*CFI_REL_OFFSET	cs,CS*/
148	/*CFI_REL_OFFSET	rflags,EFLAGS*/
149	CFI_REL_OFFSET	rsp,RSP
150	/*CFI_REL_OFFSET	ss,SS*/
151	.endm
152/*
153 * A newly forked process directly context switches into this.
154 */
155/* rdi:	prev */
156ENTRY(ret_from_fork)
157	CFI_DEFAULT_STACK
158	push kernel_eflags(%rip)
159	CFI_ADJUST_CFA_OFFSET 4
160	popf				# reset kernel eflags
161	CFI_ADJUST_CFA_OFFSET -4
162	call schedule_tail
163	GET_THREAD_INFO(%rcx)
164	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
165	jnz rff_trace
166rff_action:
167	RESTORE_REST
168	testl $3,CS-ARGOFFSET(%rsp)	# from kernel_thread?
169	je   int_ret_from_sys_call
170	testl $_TIF_IA32,threadinfo_flags(%rcx)
171	jnz  int_ret_from_sys_call
172	RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
173	jmp ret_from_sys_call
174rff_trace:
175	movq %rsp,%rdi
176	call syscall_trace_leave
177	GET_THREAD_INFO(%rcx)
178	jmp rff_action
179	CFI_ENDPROC
180END(ret_from_fork)
181
182/*
183 * System call entry. Upto 6 arguments in registers are supported.
184 *
185 * SYSCALL does not save anything on the stack and does not change the
186 * stack pointer.
187 */
188
189
190ENTRY(system_call)
191	CFI_STARTPROC	simple
192	CFI_SIGNAL_FRAME
193	CFI_DEF_CFA	rsp,PDA_STACKOFFSET
194	CFI_REGISTER	rip,rcx
195	/*CFI_REGISTER	rflags,r11*/
196	swapgs
197	movq	%rsp,%gs:pda_oldrsp
198	movq	%gs:pda_kernelstack,%rsp
199	/*
200	 * No need to follow this irqs off/on section - it's straight
201	 * and short:
202	 */
203	sti
204	SAVE_ARGS 8,1
205	movq  %rax,ORIG_RAX-ARGOFFSET(%rsp)
206	movq  %rcx,RIP-ARGOFFSET(%rsp)
207	CFI_REL_OFFSET rip,RIP-ARGOFFSET
208	GET_THREAD_INFO(%rcx)
209	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
210	jnz tracesys
211	cmpq $__NR_syscall_max,%rax
212	ja badsys
213	movq %r10,%rcx
214	call *sys_call_table(,%rax,8)  # XXX:	 rip relative
215	movq %rax,RAX-ARGOFFSET(%rsp)
216/*
217 * Syscall return path ending with SYSRET (fast path)
218 * Has incomplete stack frame and undefined top of stack.
219 */
220ret_from_sys_call:
221	movl $_TIF_ALLWORK_MASK,%edi
222	/* edi:	flagmask */
223sysret_check:
224	GET_THREAD_INFO(%rcx)
225	cli
226	TRACE_IRQS_OFF
227	movl threadinfo_flags(%rcx),%edx
228	andl %edi,%edx
229	jnz  sysret_careful
230	CFI_REMEMBER_STATE
231	/*
232	 * sysretq will re-enable interrupts:
233	 */
234	TRACE_IRQS_ON
235	movq RIP-ARGOFFSET(%rsp),%rcx
236	CFI_REGISTER	rip,rcx
237	RESTORE_ARGS 0,-ARG_SKIP,1
238	/*CFI_REGISTER	rflags,r11*/
239	movq	%gs:pda_oldrsp,%rsp
240	swapgs
241	sysretq
242
243	CFI_RESTORE_STATE
244	/* Handle reschedules */
245	/* edx:	work, edi: workmask */
246sysret_careful:
247	bt $TIF_NEED_RESCHED,%edx
248	jnc sysret_signal
249	TRACE_IRQS_ON
250	sti
251	pushq %rdi
252	CFI_ADJUST_CFA_OFFSET 8
253	call schedule
254	popq  %rdi
255	CFI_ADJUST_CFA_OFFSET -8
256	jmp sysret_check
257
258	/* Handle a signal */
259sysret_signal:
260	TRACE_IRQS_ON
261	sti
262	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
263	jz    1f
264
265	/* Really a signal */
266	/* edx:	work flags (arg3) */
267	leaq do_notify_resume(%rip),%rax
268	leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
269	xorl %esi,%esi # oldset -> arg2
270	call ptregscall_common
2711:	movl $_TIF_NEED_RESCHED,%edi
272	/* Use IRET because user could have changed frame. This
273	   works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
274	cli
275	TRACE_IRQS_OFF
276	jmp int_with_check
277
278badsys:
279	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
280	jmp ret_from_sys_call
281
282	/* Do syscall tracing */
283tracesys:
284	SAVE_REST
285	movq $-ENOSYS,RAX(%rsp)
286	FIXUP_TOP_OF_STACK %rdi
287	movq %rsp,%rdi
288	call syscall_trace_enter
289	LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
290	RESTORE_REST
291	cmpq $__NR_syscall_max,%rax
292	movq $-ENOSYS,%rcx
293	cmova %rcx,%rax
294	ja  1f
295	movq %r10,%rcx	/* fixup for C */
296	call *sys_call_table(,%rax,8)
2971:	movq %rax,RAX-ARGOFFSET(%rsp)
298	/* Use IRET because user could have changed frame */
299
300/*
301 * Syscall return path ending with IRET.
302 * Has correct top of stack, but partial stack frame.
303 */
304	.globl int_ret_from_sys_call
305int_ret_from_sys_call:
306	cli
307	TRACE_IRQS_OFF
308	testl $3,CS-ARGOFFSET(%rsp)
309	je retint_restore_args
310	movl $_TIF_ALLWORK_MASK,%edi
311	/* edi:	mask to check */
312int_with_check:
313	GET_THREAD_INFO(%rcx)
314	movl threadinfo_flags(%rcx),%edx
315	andl %edi,%edx
316	jnz   int_careful
317	andl    $~TS_COMPAT,threadinfo_status(%rcx)
318	jmp   retint_swapgs
319
320	/* Either reschedule or signal or syscall exit tracking needed. */
321	/* First do a reschedule test. */
322	/* edx:	work, edi: workmask */
323int_careful:
324	bt $TIF_NEED_RESCHED,%edx
325	jnc  int_very_careful
326	TRACE_IRQS_ON
327	sti
328	pushq %rdi
329	CFI_ADJUST_CFA_OFFSET 8
330	call schedule
331	popq %rdi
332	CFI_ADJUST_CFA_OFFSET -8
333	cli
334	TRACE_IRQS_OFF
335	jmp int_with_check
336
337	/* handle signals and tracing -- both require a full stack frame */
338int_very_careful:
339	TRACE_IRQS_ON
340	sti
341	SAVE_REST
342	/* Check for syscall exit trace */
343	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
344	jz int_signal
345	pushq %rdi
346	CFI_ADJUST_CFA_OFFSET 8
347	leaq 8(%rsp),%rdi	# &ptregs -> arg1
348	call syscall_trace_leave
349	popq %rdi
350	CFI_ADJUST_CFA_OFFSET -8
351	andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
352	jmp int_restore_rest
353
354int_signal:
355	testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
356	jz 1f
357	movq %rsp,%rdi		# &ptregs -> arg1
358	xorl %esi,%esi		# oldset -> arg2
359	call do_notify_resume
3601:	movl $_TIF_NEED_RESCHED,%edi
361int_restore_rest:
362	RESTORE_REST
363	cli
364	TRACE_IRQS_OFF
365	jmp int_with_check
366	CFI_ENDPROC
367END(system_call)
368
369/*
370 * Certain special system calls that need to save a complete full stack frame.
371 */
372
373	.macro PTREGSCALL label,func,arg
374	.globl \label
375\label:
376	leaq	\func(%rip),%rax
377	leaq    -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
378	jmp	ptregscall_common
379END(\label)
380	.endm
381
382	CFI_STARTPROC
383
384	PTREGSCALL stub_clone, sys_clone, %r8
385	PTREGSCALL stub_fork, sys_fork, %rdi
386	PTREGSCALL stub_vfork, sys_vfork, %rdi
387	PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
388	PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
389	PTREGSCALL stub_iopl, sys_iopl, %rsi
390
391ENTRY(ptregscall_common)
392	popq %r11
393	CFI_ADJUST_CFA_OFFSET -8
394	CFI_REGISTER rip, r11
395	SAVE_REST
396	movq %r11, %r15
397	CFI_REGISTER rip, r15
398	FIXUP_TOP_OF_STACK %r11
399	call *%rax
400	RESTORE_TOP_OF_STACK %r11
401	movq %r15, %r11
402	CFI_REGISTER rip, r11
403	RESTORE_REST
404	pushq %r11
405	CFI_ADJUST_CFA_OFFSET 8
406	CFI_REL_OFFSET rip, 0
407	ret
408	CFI_ENDPROC
409END(ptregscall_common)
410
411ENTRY(stub_execve)
412	CFI_STARTPROC
413	popq %r11
414	CFI_ADJUST_CFA_OFFSET -8
415	CFI_REGISTER rip, r11
416	SAVE_REST
417	FIXUP_TOP_OF_STACK %r11
418	call sys_execve
419	RESTORE_TOP_OF_STACK %r11
420	movq %rax,RAX(%rsp)
421	RESTORE_REST
422	jmp int_ret_from_sys_call
423	CFI_ENDPROC
424END(stub_execve)
425
426/*
427 * sigreturn is special because it needs to restore all registers on return.
428 * This cannot be done with SYSRET, so use the IRET return path instead.
429 */
430ENTRY(stub_rt_sigreturn)
431	CFI_STARTPROC
432	addq $8, %rsp
433	CFI_ADJUST_CFA_OFFSET	-8
434	SAVE_REST
435	movq %rsp,%rdi
436	FIXUP_TOP_OF_STACK %r11
437	call sys_rt_sigreturn
438	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
439	RESTORE_REST
440	jmp int_ret_from_sys_call
441	CFI_ENDPROC
442END(stub_rt_sigreturn)
443
444/*
445 * initial frame state for interrupts and exceptions
446 */
447	.macro _frame ref
448	CFI_STARTPROC simple
449	CFI_SIGNAL_FRAME
450	CFI_DEF_CFA rsp,SS+8-\ref
451	/*CFI_REL_OFFSET ss,SS-\ref*/
452	CFI_REL_OFFSET rsp,RSP-\ref
453	/*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
454	/*CFI_REL_OFFSET cs,CS-\ref*/
455	CFI_REL_OFFSET rip,RIP-\ref
456	.endm
457
458/* initial frame state for interrupts (and exceptions without error code) */
459#define INTR_FRAME _frame RIP
460/* initial frame state for exceptions with error code (and interrupts with
461   vector already pushed) */
462#define XCPT_FRAME _frame ORIG_RAX
463
464/*
465 * Interrupt entry/exit.
466 *
467 * Interrupt entry points save only callee clobbered registers in fast path.
468 *
469 * Entry runs with interrupts off.
470 */
471
472/* 0(%rsp): interrupt number */
473	.macro interrupt func
474	cld
475	SAVE_ARGS
476	leaq -ARGOFFSET(%rsp),%rdi	# arg1 for handler
477	pushq %rbp
478	CFI_ADJUST_CFA_OFFSET	8
479	CFI_REL_OFFSET		rbp, 0
480	movq %rsp,%rbp
481	CFI_DEF_CFA_REGISTER	rbp
482	testl $3,CS(%rdi)
483	je 1f
484	swapgs
485	/* irqcount is used to check if a CPU is already on an interrupt
486	   stack or not. While this is essentially redundant with preempt_count
487	   it is a little cheaper to use a separate counter in the PDA
488	   (short of moving irq_enter into assembly, which would be too
489	    much work) */
4901:	incl	%gs:pda_irqcount
491	cmoveq %gs:pda_irqstackptr,%rsp
492	push    %rbp			# backlink for old unwinder
493	/*
494	 * We entered an interrupt context - irqs are off:
495	 */
496	TRACE_IRQS_OFF
497	call \func
498	.endm
499
500ENTRY(common_interrupt)
501	XCPT_FRAME
502	interrupt do_IRQ
503	/* 0(%rsp): oldrsp-ARGOFFSET */
504ret_from_intr:
505	cli
506	TRACE_IRQS_OFF
507	decl %gs:pda_irqcount
508	leaveq
509	CFI_DEF_CFA_REGISTER	rsp
510	CFI_ADJUST_CFA_OFFSET	-8
511exit_intr:
512	GET_THREAD_INFO(%rcx)
513	testl $3,CS-ARGOFFSET(%rsp)
514	je retint_kernel
515
516	/* Interrupt came from user space */
517	/*
518	 * Has a correct top of stack, but a partial stack frame
519	 * %rcx: thread info. Interrupts off.
520	 */
521retint_with_reschedule:
522	movl $_TIF_WORK_MASK,%edi
523retint_check:
524	movl threadinfo_flags(%rcx),%edx
525	andl %edi,%edx
526	CFI_REMEMBER_STATE
527	jnz  retint_careful
528retint_swapgs:
529	/*
530	 * The iretq could re-enable interrupts:
531	 */
532	cli
533	TRACE_IRQS_IRETQ
534	swapgs
535	jmp restore_args
536
537retint_restore_args:
538	cli
539	/*
540	 * The iretq could re-enable interrupts:
541	 */
542	TRACE_IRQS_IRETQ
543restore_args:
544	RESTORE_ARGS 0,8,0
545iret_label:
546	iretq
547
548	.section __ex_table,"a"
549	.quad iret_label,bad_iret
550	.previous
551	.section .fixup,"ax"
552	/* force a signal here? this matches i386 behaviour */
553	/* running with kernel gs */
554bad_iret:
555	movq $11,%rdi	/* SIGSEGV */
556	TRACE_IRQS_ON
557	sti
558	jmp do_exit
559	.previous
560
561	/* edi: workmask, edx: work */
562retint_careful:
563	CFI_RESTORE_STATE
564	bt    $TIF_NEED_RESCHED,%edx
565	jnc   retint_signal
566	TRACE_IRQS_ON
567	sti
568	pushq %rdi
569	CFI_ADJUST_CFA_OFFSET	8
570	call  schedule
571	popq %rdi
572	CFI_ADJUST_CFA_OFFSET	-8
573	GET_THREAD_INFO(%rcx)
574	cli
575	TRACE_IRQS_OFF
576	jmp retint_check
577
578retint_signal:
579	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
580	jz    retint_swapgs
581	TRACE_IRQS_ON
582	sti
583	SAVE_REST
584	movq $-1,ORIG_RAX(%rsp)
585	xorl %esi,%esi		# oldset
586	movq %rsp,%rdi		# &pt_regs
587	call do_notify_resume
588	RESTORE_REST
589	cli
590	TRACE_IRQS_OFF
591	movl $_TIF_NEED_RESCHED,%edi
592	GET_THREAD_INFO(%rcx)
593	jmp retint_check
594
595#ifdef CONFIG_PREEMPT
596	/* Returning to kernel space. Check if we need preemption */
597	/* rcx:	 threadinfo. interrupts off. */
598ENTRY(retint_kernel)
599	cmpl $0,threadinfo_preempt_count(%rcx)
600	jnz  retint_restore_args
601	bt  $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
602	jnc  retint_restore_args
603	bt   $9,EFLAGS-ARGOFFSET(%rsp)	/* interrupts off? */
604	jnc  retint_restore_args
605	call preempt_schedule_irq
606	jmp exit_intr
607#endif
608
609	CFI_ENDPROC
610END(common_interrupt)
611
612/*
613 * APIC interrupts.
614 */
615	.macro apicinterrupt num,func
616	INTR_FRAME
617	pushq $~(\num)
618	CFI_ADJUST_CFA_OFFSET 8
619	interrupt \func
620	jmp ret_from_intr
621	CFI_ENDPROC
622	.endm
623
624ENTRY(thermal_interrupt)
625	apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
626END(thermal_interrupt)
627
628ENTRY(threshold_interrupt)
629	apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
630END(threshold_interrupt)
631
632#ifdef CONFIG_SMP
633ENTRY(reschedule_interrupt)
634	apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
635END(reschedule_interrupt)
636
637	.macro INVALIDATE_ENTRY num
638ENTRY(invalidate_interrupt\num)
639	apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
640END(invalidate_interrupt\num)
641	.endm
642
643	INVALIDATE_ENTRY 0
644	INVALIDATE_ENTRY 1
645	INVALIDATE_ENTRY 2
646	INVALIDATE_ENTRY 3
647	INVALIDATE_ENTRY 4
648	INVALIDATE_ENTRY 5
649	INVALIDATE_ENTRY 6
650	INVALIDATE_ENTRY 7
651
652ENTRY(call_function_interrupt)
653	apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
654END(call_function_interrupt)
655ENTRY(irq_move_cleanup_interrupt)
656	apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
657END(irq_move_cleanup_interrupt)
658#endif
659
660ENTRY(apic_timer_interrupt)
661	apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
662END(apic_timer_interrupt)
663
664ENTRY(error_interrupt)
665	apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
666END(error_interrupt)
667
668ENTRY(spurious_interrupt)
669	apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
670END(spurious_interrupt)
671
672/*
673 * Exception entry points.
674 */
675	.macro zeroentry sym
676	INTR_FRAME
677	pushq $0	/* push error code/oldrax */
678	CFI_ADJUST_CFA_OFFSET 8
679	pushq %rax	/* push real oldrax to the rdi slot */
680	CFI_ADJUST_CFA_OFFSET 8
681	CFI_REL_OFFSET rax,0
682	leaq  \sym(%rip),%rax
683	jmp error_entry
684	CFI_ENDPROC
685	.endm
686
687	.macro errorentry sym
688	XCPT_FRAME
689	pushq %rax
690	CFI_ADJUST_CFA_OFFSET 8
691	CFI_REL_OFFSET rax,0
692	leaq  \sym(%rip),%rax
693	jmp error_entry
694	CFI_ENDPROC
695	.endm
696
697	/* error code is on the stack already */
698	/* handle NMI like exceptions that can happen everywhere */
699	.macro paranoidentry sym, ist=0, irqtrace=1
700	SAVE_ALL
701	cld
702	movl $1,%ebx
703	movl  $MSR_GS_BASE,%ecx
704	rdmsr
705	testl %edx,%edx
706	js    1f
707	swapgs
708	xorl  %ebx,%ebx
7091:
710	.if \ist
711	movq	%gs:pda_data_offset, %rbp
712	.endif
713	movq %rsp,%rdi
714	movq ORIG_RAX(%rsp),%rsi
715	movq $-1,ORIG_RAX(%rsp)
716	.if \ist
717	subq	$EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
718	.endif
719	call \sym
720	.if \ist
721	addq	$EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
722	.endif
723	cli
724	.if \irqtrace
725	TRACE_IRQS_OFF
726	.endif
727	.endm
728
729	/*
730 	 * "Paranoid" exit path from exception stack.
731  	 * Paranoid because this is used by NMIs and cannot take
732	 * any kernel state for granted.
733	 * We don't do kernel preemption checks here, because only
734	 * NMI should be common and it does not enable IRQs and
735	 * cannot get reschedule ticks.
736	 *
737	 * "trace" is 0 for the NMI handler only, because irq-tracing
738	 * is fundamentally NMI-unsafe. (we cannot change the soft and
739	 * hard flags at once, atomically)
740	 */
741	.macro paranoidexit trace=1
742	/* ebx:	no swapgs flag */
743paranoid_exit\trace:
744	testl %ebx,%ebx				/* swapgs needed? */
745	jnz paranoid_restore\trace
746	testl $3,CS(%rsp)
747	jnz   paranoid_userspace\trace
748paranoid_swapgs\trace:
749	.if \trace
750	TRACE_IRQS_IRETQ 0
751	.endif
752	swapgs
753paranoid_restore\trace:
754	RESTORE_ALL 8
755	iretq
756paranoid_userspace\trace:
757	GET_THREAD_INFO(%rcx)
758	movl threadinfo_flags(%rcx),%ebx
759	andl $_TIF_WORK_MASK,%ebx
760	jz paranoid_swapgs\trace
761	movq %rsp,%rdi			/* &pt_regs */
762	call sync_regs
763	movq %rax,%rsp			/* switch stack for scheduling */
764	testl $_TIF_NEED_RESCHED,%ebx
765	jnz paranoid_schedule\trace
766	movl %ebx,%edx			/* arg3: thread flags */
767	.if \trace
768	TRACE_IRQS_ON
769	.endif
770	sti
771	xorl %esi,%esi 			/* arg2: oldset */
772	movq %rsp,%rdi 			/* arg1: &pt_regs */
773	call do_notify_resume
774	cli
775	.if \trace
776	TRACE_IRQS_OFF
777	.endif
778	jmp paranoid_userspace\trace
779paranoid_schedule\trace:
780	.if \trace
781	TRACE_IRQS_ON
782	.endif
783	sti
784	call schedule
785	cli
786	.if \trace
787	TRACE_IRQS_OFF
788	.endif
789	jmp paranoid_userspace\trace
790	CFI_ENDPROC
791	.endm
792
793/*
794 * Exception entry point. This expects an error code/orig_rax on the stack
795 * and the exception handler in %rax.
796 */
797KPROBE_ENTRY(error_entry)
798	_frame RDI
799	CFI_REL_OFFSET rax,0
800	/* rdi slot contains rax, oldrax contains error code */
801	cld
802	subq  $14*8,%rsp
803	CFI_ADJUST_CFA_OFFSET	(14*8)
804	movq %rsi,13*8(%rsp)
805	CFI_REL_OFFSET	rsi,RSI
806	movq 14*8(%rsp),%rsi	/* load rax from rdi slot */
807	CFI_REGISTER	rax,rsi
808	movq %rdx,12*8(%rsp)
809	CFI_REL_OFFSET	rdx,RDX
810	movq %rcx,11*8(%rsp)
811	CFI_REL_OFFSET	rcx,RCX
812	movq %rsi,10*8(%rsp)	/* store rax */
813	CFI_REL_OFFSET	rax,RAX
814	movq %r8, 9*8(%rsp)
815	CFI_REL_OFFSET	r8,R8
816	movq %r9, 8*8(%rsp)
817	CFI_REL_OFFSET	r9,R9
818	movq %r10,7*8(%rsp)
819	CFI_REL_OFFSET	r10,R10
820	movq %r11,6*8(%rsp)
821	CFI_REL_OFFSET	r11,R11
822	movq %rbx,5*8(%rsp)
823	CFI_REL_OFFSET	rbx,RBX
824	movq %rbp,4*8(%rsp)
825	CFI_REL_OFFSET	rbp,RBP
826	movq %r12,3*8(%rsp)
827	CFI_REL_OFFSET	r12,R12
828	movq %r13,2*8(%rsp)
829	CFI_REL_OFFSET	r13,R13
830	movq %r14,1*8(%rsp)
831	CFI_REL_OFFSET	r14,R14
832	movq %r15,(%rsp)
833	CFI_REL_OFFSET	r15,R15
834	xorl %ebx,%ebx
835	testl $3,CS(%rsp)
836	je  error_kernelspace
837error_swapgs:
838	swapgs
839error_sti:
840	movq %rdi,RDI(%rsp)
841	CFI_REL_OFFSET	rdi,RDI
842	movq %rsp,%rdi
843	movq ORIG_RAX(%rsp),%rsi	/* get error code */
844	movq $-1,ORIG_RAX(%rsp)
845	call *%rax
846	/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
847error_exit:
848	movl %ebx,%eax
849	RESTORE_REST
850	cli
851	TRACE_IRQS_OFF
852	GET_THREAD_INFO(%rcx)
853	testl %eax,%eax
854	jne  retint_kernel
855	movl  threadinfo_flags(%rcx),%edx
856	movl  $_TIF_WORK_MASK,%edi
857	andl  %edi,%edx
858	jnz  retint_careful
859	/*
860	 * The iret might restore flags:
861	 */
862	TRACE_IRQS_IRETQ
863	swapgs
864	RESTORE_ARGS 0,8,0
865	jmp iret_label
866	CFI_ENDPROC
867
868error_kernelspace:
869	incl %ebx
870       /* There are two places in the kernel that can potentially fault with
871          usergs. Handle them here. The exception handlers after
872	   iret run with kernel gs again, so don't set the user space flag.
873	   B stepping K8s sometimes report an truncated RIP for IRET
874	   exceptions returning to compat mode. Check for these here too. */
875	leaq iret_label(%rip),%rbp
876	cmpq %rbp,RIP(%rsp)
877	je   error_swapgs
878	movl %ebp,%ebp	/* zero extend */
879	cmpq %rbp,RIP(%rsp)
880	je   error_swapgs
881	cmpq $gs_change,RIP(%rsp)
882        je   error_swapgs
883	jmp  error_sti
884KPROBE_END(error_entry)
885
886       /* Reload gs selector with exception handling */
887       /* edi:  new selector */
888ENTRY(load_gs_index)
889	CFI_STARTPROC
890	pushf
891	CFI_ADJUST_CFA_OFFSET 8
892	cli
893        swapgs
894gs_change:
895        movl %edi,%gs
8962:	mfence
897	swapgs
898        popf
899	CFI_ADJUST_CFA_OFFSET -8
900        ret
901	CFI_ENDPROC
902ENDPROC(load_gs_index)
903
904        .section __ex_table,"a"
905        .align 8
906        .quad gs_change,bad_gs
907        .previous
908        .section .fixup,"ax"
909	/* running with kernelgs */
910bad_gs:
911	swapgs			/* switch back to user gs */
912	xorl %eax,%eax
913        movl %eax,%gs
914        jmp  2b
915        .previous
916
917/*
918 * Create a kernel thread.
919 *
920 * C extern interface:
921 *	extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
922 *
923 * asm input arguments:
924 *	rdi: fn, rsi: arg, rdx: flags
925 */
926ENTRY(kernel_thread)
927	CFI_STARTPROC
928	FAKE_STACK_FRAME $child_rip
929	SAVE_ALL
930
931	# rdi: flags, rsi: usp, rdx: will be &pt_regs
932	movq %rdx,%rdi
933	orq  kernel_thread_flags(%rip),%rdi
934	movq $-1, %rsi
935	movq %rsp, %rdx
936
937	xorl %r8d,%r8d
938	xorl %r9d,%r9d
939
940	# clone now
941	call do_fork
942	movq %rax,RAX(%rsp)
943	xorl %edi,%edi
944
945	/*
946	 * It isn't worth to check for reschedule here,
947	 * so internally to the x86_64 port you can rely on kernel_thread()
948	 * not to reschedule the child before returning, this avoids the need
949	 * of hacks for example to fork off the per-CPU idle tasks.
950         * [Hopefully no generic code relies on the reschedule -AK]
951	 */
952	RESTORE_ALL
953	UNFAKE_STACK_FRAME
954	ret
955	CFI_ENDPROC
956ENDPROC(kernel_thread)
957
958child_rip:
959	pushq $0		# fake return address
960	CFI_STARTPROC
961	/*
962	 * Here we are in the child and the registers are set as they were
963	 * at kernel_thread() invocation in the parent.
964	 */
965	movq %rdi, %rax
966	movq %rsi, %rdi
967	call *%rax
968	# exit
969	xorl %edi, %edi
970	call do_exit
971	CFI_ENDPROC
972ENDPROC(child_rip)
973
974/*
975 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
976 *
977 * C extern interface:
978 *	 extern long execve(char *name, char **argv, char **envp)
979 *
980 * asm input arguments:
981 *	rdi: name, rsi: argv, rdx: envp
982 *
983 * We want to fallback into:
984 *	extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
985 *
986 * do_sys_execve asm fallback arguments:
987 *	rdi: name, rsi: argv, rdx: envp, fake frame on the stack
988 */
989ENTRY(kernel_execve)
990	CFI_STARTPROC
991	FAKE_STACK_FRAME $0
992	SAVE_ALL
993	call sys_execve
994	movq %rax, RAX(%rsp)
995	RESTORE_REST
996	testq %rax,%rax
997	je int_ret_from_sys_call
998	RESTORE_ARGS
999	UNFAKE_STACK_FRAME
1000	ret
1001	CFI_ENDPROC
1002ENDPROC(kernel_execve)
1003
1004KPROBE_ENTRY(page_fault)
1005	errorentry do_page_fault
1006KPROBE_END(page_fault)
1007
1008ENTRY(coprocessor_error)
1009	zeroentry do_coprocessor_error
1010END(coprocessor_error)
1011
1012ENTRY(simd_coprocessor_error)
1013	zeroentry do_simd_coprocessor_error
1014END(simd_coprocessor_error)
1015
1016ENTRY(device_not_available)
1017	zeroentry math_state_restore
1018END(device_not_available)
1019
1020	/* runs on exception stack */
1021KPROBE_ENTRY(debug)
1022 	INTR_FRAME
1023	pushq $0
1024	CFI_ADJUST_CFA_OFFSET 8
1025	paranoidentry do_debug, DEBUG_STACK
1026	paranoidexit
1027KPROBE_END(debug)
1028
1029	/* runs on exception stack */
1030KPROBE_ENTRY(nmi)
1031	INTR_FRAME
1032	pushq $-1
1033	CFI_ADJUST_CFA_OFFSET 8
1034	paranoidentry do_nmi, 0, 0
1035#ifdef CONFIG_TRACE_IRQFLAGS
1036	paranoidexit 0
1037#else
1038	jmp paranoid_exit1
1039 	CFI_ENDPROC
1040#endif
1041KPROBE_END(nmi)
1042
1043KPROBE_ENTRY(int3)
1044 	INTR_FRAME
1045 	pushq $0
1046 	CFI_ADJUST_CFA_OFFSET 8
1047 	paranoidentry do_int3, DEBUG_STACK
1048 	jmp paranoid_exit1
1049 	CFI_ENDPROC
1050KPROBE_END(int3)
1051
1052ENTRY(overflow)
1053	zeroentry do_overflow
1054END(overflow)
1055
1056ENTRY(bounds)
1057	zeroentry do_bounds
1058END(bounds)
1059
1060ENTRY(invalid_op)
1061	zeroentry do_invalid_op
1062END(invalid_op)
1063
1064ENTRY(coprocessor_segment_overrun)
1065	zeroentry do_coprocessor_segment_overrun
1066END(coprocessor_segment_overrun)
1067
1068ENTRY(reserved)
1069	zeroentry do_reserved
1070END(reserved)
1071
1072	/* runs on exception stack */
1073ENTRY(double_fault)
1074	XCPT_FRAME
1075	paranoidentry do_double_fault
1076	jmp paranoid_exit1
1077	CFI_ENDPROC
1078END(double_fault)
1079
1080ENTRY(invalid_TSS)
1081	errorentry do_invalid_TSS
1082END(invalid_TSS)
1083
1084ENTRY(segment_not_present)
1085	errorentry do_segment_not_present
1086END(segment_not_present)
1087
1088	/* runs on exception stack */
1089ENTRY(stack_segment)
1090	XCPT_FRAME
1091	paranoidentry do_stack_segment
1092	jmp paranoid_exit1
1093	CFI_ENDPROC
1094END(stack_segment)
1095
1096KPROBE_ENTRY(general_protection)
1097	errorentry do_general_protection
1098KPROBE_END(general_protection)
1099
1100ENTRY(alignment_check)
1101	errorentry do_alignment_check
1102END(alignment_check)
1103
1104ENTRY(divide_error)
1105	zeroentry do_divide_error
1106END(divide_error)
1107
1108ENTRY(spurious_interrupt_bug)
1109	zeroentry do_spurious_interrupt_bug
1110END(spurious_interrupt_bug)
1111
1112#ifdef CONFIG_X86_MCE
1113	/* runs on exception stack */
1114ENTRY(machine_check)
1115	INTR_FRAME
1116	pushq $0
1117	CFI_ADJUST_CFA_OFFSET 8
1118	paranoidentry do_machine_check
1119	jmp paranoid_exit1
1120	CFI_ENDPROC
1121END(machine_check)
1122#endif
1123
1124/* Call softirq on interrupt stack. Interrupts are off. */
1125ENTRY(call_softirq)
1126	CFI_STARTPROC
1127	push %rbp
1128	CFI_ADJUST_CFA_OFFSET	8
1129	CFI_REL_OFFSET rbp,0
1130	mov  %rsp,%rbp
1131	CFI_DEF_CFA_REGISTER rbp
1132	incl %gs:pda_irqcount
1133	cmove %gs:pda_irqstackptr,%rsp
1134	push  %rbp			# backlink for old unwinder
1135	call __do_softirq
1136	leaveq
1137	CFI_DEF_CFA_REGISTER	rsp
1138	CFI_ADJUST_CFA_OFFSET   -8
1139	decl %gs:pda_irqcount
1140	ret
1141	CFI_ENDPROC
1142ENDPROC(call_softirq)
1143
1144KPROBE_ENTRY(ignore_sysret)
1145	CFI_STARTPROC
1146	mov $-ENOSYS,%eax
1147	sysret
1148	CFI_ENDPROC
1149ENDPROC(ignore_sysret)
1150