• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6/arch/x86/kernel/
1/*
2 *  linux/arch/x86_64/entry.S
3 *
4 *  Copyright (C) 1991, 1992  Linus Torvalds
5 *  Copyright (C) 2000, 2001, 2002  Andi Kleen SuSE Labs
6 *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
7 */
8
9/*
10 * entry.S contains the system-call and fault low-level handling routines.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
17 *
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
23 *
24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
38 */
39
40#include <linux/linkage.h>
41#include <asm/segment.h>
42#include <asm/cache.h>
43#include <asm/errno.h>
44#include <asm/dwarf2.h>
45#include <asm/calling.h>
46#include <asm/asm-offsets.h>
47#include <asm/msr.h>
48#include <asm/unistd.h>
49#include <asm/thread_info.h>
50#include <asm/hw_irq.h>
51#include <asm/page_types.h>
52#include <asm/irqflags.h>
53#include <asm/paravirt.h>
54#include <asm/ftrace.h>
55#include <asm/percpu.h>
56
57/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
58#include <linux/elf-em.h>
59#define AUDIT_ARCH_X86_64	(EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
60#define __AUDIT_ARCH_64BIT 0x80000000
61#define __AUDIT_ARCH_LE	   0x40000000
62
63	.code64
64#ifdef CONFIG_FUNCTION_TRACER
65#ifdef CONFIG_DYNAMIC_FTRACE
66ENTRY(mcount)
67	retq
68END(mcount)
69
70ENTRY(ftrace_caller)
71	cmpl $0, function_trace_stop
72	jne  ftrace_stub
73
74	MCOUNT_SAVE_FRAME
75
76	movq 0x38(%rsp), %rdi
77	movq 8(%rbp), %rsi
78	subq $MCOUNT_INSN_SIZE, %rdi
79
80GLOBAL(ftrace_call)
81	call ftrace_stub
82
83	MCOUNT_RESTORE_FRAME
84
85#ifdef CONFIG_FUNCTION_GRAPH_TRACER
86GLOBAL(ftrace_graph_call)
87	jmp ftrace_stub
88#endif
89
90GLOBAL(ftrace_stub)
91	retq
92END(ftrace_caller)
93
94#else /* ! CONFIG_DYNAMIC_FTRACE */
95ENTRY(mcount)
96	cmpl $0, function_trace_stop
97	jne  ftrace_stub
98
99	cmpq $ftrace_stub, ftrace_trace_function
100	jnz trace
101
102#ifdef CONFIG_FUNCTION_GRAPH_TRACER
103	cmpq $ftrace_stub, ftrace_graph_return
104	jnz ftrace_graph_caller
105
106	cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
107	jnz ftrace_graph_caller
108#endif
109
110GLOBAL(ftrace_stub)
111	retq
112
113trace:
114	MCOUNT_SAVE_FRAME
115
116	movq 0x38(%rsp), %rdi
117	movq 8(%rbp), %rsi
118	subq $MCOUNT_INSN_SIZE, %rdi
119
120	call   *ftrace_trace_function
121
122	MCOUNT_RESTORE_FRAME
123
124	jmp ftrace_stub
125END(mcount)
126#endif /* CONFIG_DYNAMIC_FTRACE */
127#endif /* CONFIG_FUNCTION_TRACER */
128
129#ifdef CONFIG_FUNCTION_GRAPH_TRACER
130ENTRY(ftrace_graph_caller)
131	cmpl $0, function_trace_stop
132	jne ftrace_stub
133
134	MCOUNT_SAVE_FRAME
135
136	leaq 8(%rbp), %rdi
137	movq 0x38(%rsp), %rsi
138	movq (%rbp), %rdx
139	subq $MCOUNT_INSN_SIZE, %rsi
140
141	call	prepare_ftrace_return
142
143	MCOUNT_RESTORE_FRAME
144
145	retq
146END(ftrace_graph_caller)
147
148GLOBAL(return_to_handler)
149	subq  $24, %rsp
150
151	/* Save the return values */
152	movq %rax, (%rsp)
153	movq %rdx, 8(%rsp)
154	movq %rbp, %rdi
155
156	call ftrace_return_to_handler
157
158	movq %rax, %rdi
159	movq 8(%rsp), %rdx
160	movq (%rsp), %rax
161	addq $24, %rsp
162	jmp *%rdi
163#endif
164
165
166#ifndef CONFIG_PREEMPT
167#define retint_kernel retint_restore_args
168#endif
169
170#ifdef CONFIG_PARAVIRT
171ENTRY(native_usergs_sysret64)
172	swapgs
173	sysretq
174ENDPROC(native_usergs_sysret64)
175#endif /* CONFIG_PARAVIRT */
176
177
178.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
179#ifdef CONFIG_TRACE_IRQFLAGS
180	bt   $9,EFLAGS-\offset(%rsp)	/* interrupts off? */
181	jnc  1f
182	TRACE_IRQS_ON
1831:
184#endif
185.endm
186
187/*
188 * C code is not supposed to know about undefined top of stack. Every time
189 * a C function with an pt_regs argument is called from the SYSCALL based
190 * fast path FIXUP_TOP_OF_STACK is needed.
191 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
192 * manipulation.
193 */
194
195	/* %rsp:at FRAMEEND */
196	.macro FIXUP_TOP_OF_STACK tmp offset=0
197	movq PER_CPU_VAR(old_rsp),\tmp
198	movq \tmp,RSP+\offset(%rsp)
199	movq $__USER_DS,SS+\offset(%rsp)
200	movq $__USER_CS,CS+\offset(%rsp)
201	movq $-1,RCX+\offset(%rsp)
202	movq R11+\offset(%rsp),\tmp  /* get eflags */
203	movq \tmp,EFLAGS+\offset(%rsp)
204	.endm
205
206	.macro RESTORE_TOP_OF_STACK tmp offset=0
207	movq RSP+\offset(%rsp),\tmp
208	movq \tmp,PER_CPU_VAR(old_rsp)
209	movq EFLAGS+\offset(%rsp),\tmp
210	movq \tmp,R11+\offset(%rsp)
211	.endm
212
213	.macro FAKE_STACK_FRAME child_rip
214	/* push in order ss, rsp, eflags, cs, rip */
215	xorl %eax, %eax
216	pushq $__KERNEL_DS /* ss */
217	CFI_ADJUST_CFA_OFFSET	8
218	/*CFI_REL_OFFSET	ss,0*/
219	pushq %rax /* rsp */
220	CFI_ADJUST_CFA_OFFSET	8
221	CFI_REL_OFFSET	rsp,0
222	pushq $X86_EFLAGS_IF /* eflags - interrupts on */
223	CFI_ADJUST_CFA_OFFSET	8
224	/*CFI_REL_OFFSET	rflags,0*/
225	pushq $__KERNEL_CS /* cs */
226	CFI_ADJUST_CFA_OFFSET	8
227	/*CFI_REL_OFFSET	cs,0*/
228	pushq \child_rip /* rip */
229	CFI_ADJUST_CFA_OFFSET	8
230	CFI_REL_OFFSET	rip,0
231	pushq	%rax /* orig rax */
232	CFI_ADJUST_CFA_OFFSET	8
233	.endm
234
235	.macro UNFAKE_STACK_FRAME
236	addq $8*6, %rsp
237	CFI_ADJUST_CFA_OFFSET	-(6*8)
238	.endm
239
240/*
241 * initial frame state for interrupts (and exceptions without error code)
242 */
243	.macro EMPTY_FRAME start=1 offset=0
244	.if \start
245	CFI_STARTPROC simple
246	CFI_SIGNAL_FRAME
247	CFI_DEF_CFA rsp,8+\offset
248	.else
249	CFI_DEF_CFA_OFFSET 8+\offset
250	.endif
251	.endm
252
253/*
254 * initial frame state for interrupts (and exceptions without error code)
255 */
256	.macro INTR_FRAME start=1 offset=0
257	EMPTY_FRAME \start, SS+8+\offset-RIP
258	/*CFI_REL_OFFSET ss, SS+\offset-RIP*/
259	CFI_REL_OFFSET rsp, RSP+\offset-RIP
260	/*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
261	/*CFI_REL_OFFSET cs, CS+\offset-RIP*/
262	CFI_REL_OFFSET rip, RIP+\offset-RIP
263	.endm
264
265/*
266 * initial frame state for exceptions with error code (and interrupts
267 * with vector already pushed)
268 */
269	.macro XCPT_FRAME start=1 offset=0
270	INTR_FRAME \start, RIP+\offset-ORIG_RAX
271	/*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
272	.endm
273
274/*
275 * frame that enables calling into C.
276 */
277	.macro PARTIAL_FRAME start=1 offset=0
278	XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
279	CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
280	CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
281	CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
282	CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
283	CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
284	CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
285	CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
286	CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
287	CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
288	.endm
289
290/*
291 * frame that enables passing a complete pt_regs to a C function.
292 */
293	.macro DEFAULT_FRAME start=1 offset=0
294	PARTIAL_FRAME \start, R11+\offset-R15
295	CFI_REL_OFFSET rbx, RBX+\offset
296	CFI_REL_OFFSET rbp, RBP+\offset
297	CFI_REL_OFFSET r12, R12+\offset
298	CFI_REL_OFFSET r13, R13+\offset
299	CFI_REL_OFFSET r14, R14+\offset
300	CFI_REL_OFFSET r15, R15+\offset
301	.endm
302
303/* save partial stack frame */
304ENTRY(save_args)
305	XCPT_FRAME
306	cld
307	movq_cfi rdi, RDI+16-ARGOFFSET
308	movq_cfi rsi, RSI+16-ARGOFFSET
309	movq_cfi rdx, RDX+16-ARGOFFSET
310	movq_cfi rcx, RCX+16-ARGOFFSET
311	movq_cfi rax, RAX+16-ARGOFFSET
312	movq_cfi  r8,  R8+16-ARGOFFSET
313	movq_cfi  r9,  R9+16-ARGOFFSET
314	movq_cfi r10, R10+16-ARGOFFSET
315	movq_cfi r11, R11+16-ARGOFFSET
316
317	leaq -ARGOFFSET+16(%rsp),%rdi	/* arg1 for handler */
318	movq_cfi rbp, 8		/* push %rbp */
319	leaq 8(%rsp), %rbp		/* mov %rsp, %ebp */
320	testl $3, CS(%rdi)
321	je 1f
322	SWAPGS
323	/*
324	 * irq_count is used to check if a CPU is already on an interrupt stack
325	 * or not. While this is essentially redundant with preempt_count it is
326	 * a little cheaper to use a separate counter in the PDA (short of
327	 * moving irq_enter into assembly, which would be too much work)
328	 */
3291:	incl PER_CPU_VAR(irq_count)
330	jne 2f
331	popq_cfi %rax			/* move return address... */
332	mov PER_CPU_VAR(irq_stack_ptr),%rsp
333	EMPTY_FRAME 0
334	pushq_cfi %rbp			/* backlink for unwinder */
335	pushq_cfi %rax			/* ... to the new stack */
336	/*
337	 * We entered an interrupt context - irqs are off:
338	 */
3392:	TRACE_IRQS_OFF
340	ret
341	CFI_ENDPROC
342END(save_args)
343
344ENTRY(save_rest)
345	PARTIAL_FRAME 1 REST_SKIP+8
346	movq 5*8+16(%rsp), %r11	/* save return address */
347	movq_cfi rbx, RBX+16
348	movq_cfi rbp, RBP+16
349	movq_cfi r12, R12+16
350	movq_cfi r13, R13+16
351	movq_cfi r14, R14+16
352	movq_cfi r15, R15+16
353	movq %r11, 8(%rsp)	/* return address */
354	FIXUP_TOP_OF_STACK %r11, 16
355	ret
356	CFI_ENDPROC
357END(save_rest)
358
359/* save complete stack frame */
360	.pushsection .kprobes.text, "ax"
361ENTRY(save_paranoid)
362	XCPT_FRAME 1 RDI+8
363	cld
364	movq_cfi rdi, RDI+8
365	movq_cfi rsi, RSI+8
366	movq_cfi rdx, RDX+8
367	movq_cfi rcx, RCX+8
368	movq_cfi rax, RAX+8
369	movq_cfi r8, R8+8
370	movq_cfi r9, R9+8
371	movq_cfi r10, R10+8
372	movq_cfi r11, R11+8
373	movq_cfi rbx, RBX+8
374	movq_cfi rbp, RBP+8
375	movq_cfi r12, R12+8
376	movq_cfi r13, R13+8
377	movq_cfi r14, R14+8
378	movq_cfi r15, R15+8
379	movl $1,%ebx
380	movl $MSR_GS_BASE,%ecx
381	rdmsr
382	testl %edx,%edx
383	js 1f	/* negative -> in kernel */
384	SWAPGS
385	xorl %ebx,%ebx
3861:	ret
387	CFI_ENDPROC
388END(save_paranoid)
389	.popsection
390
391/*
392 * A newly forked process directly context switches into this address.
393 *
394 * rdi: prev task we switched from
395 */
396ENTRY(ret_from_fork)
397	DEFAULT_FRAME
398
399	LOCK ; btr $TIF_FORK,TI_flags(%r8)
400
401	push kernel_eflags(%rip)
402	CFI_ADJUST_CFA_OFFSET 8
403	popf					# reset kernel eflags
404	CFI_ADJUST_CFA_OFFSET -8
405
406	call schedule_tail			# rdi: 'prev' task parameter
407
408	GET_THREAD_INFO(%rcx)
409
410	RESTORE_REST
411
412	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
413	je   int_ret_from_sys_call
414
415	testl $_TIF_IA32, TI_flags(%rcx)	# 32-bit compat task needs IRET
416	jnz  int_ret_from_sys_call
417
418	RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
419	jmp ret_from_sys_call			# go to the SYSRET fastpath
420
421	CFI_ENDPROC
422END(ret_from_fork)
423
424/*
425 * System call entry. Upto 6 arguments in registers are supported.
426 *
427 * SYSCALL does not save anything on the stack and does not change the
428 * stack pointer.
429 */
430
431
432ENTRY(system_call)
433	CFI_STARTPROC	simple
434	CFI_SIGNAL_FRAME
435	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
436	CFI_REGISTER	rip,rcx
437	/*CFI_REGISTER	rflags,r11*/
438	SWAPGS_UNSAFE_STACK
439	/*
440	 * A hypervisor implementation might want to use a label
441	 * after the swapgs, so that it can do the swapgs
442	 * for the guest and jump here on syscall.
443	 */
444ENTRY(system_call_after_swapgs)
445
446	movq	%rsp,PER_CPU_VAR(old_rsp)
447	movq	PER_CPU_VAR(kernel_stack),%rsp
448	/*
449	 * No need to follow this irqs off/on section - it's straight
450	 * and short:
451	 */
452	ENABLE_INTERRUPTS(CLBR_NONE)
453	SAVE_ARGS 8,1
454	movq  %rax,ORIG_RAX-ARGOFFSET(%rsp)
455	movq  %rcx,RIP-ARGOFFSET(%rsp)
456	CFI_REL_OFFSET rip,RIP-ARGOFFSET
457	GET_THREAD_INFO(%rcx)
458	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
459	jnz tracesys
460system_call_fastpath:
461	cmpq $__NR_syscall_max,%rax
462	ja badsys
463	movq %r10,%rcx
464	call *sys_call_table(,%rax,8)  # XXX:	 rip relative
465	movq %rax,RAX-ARGOFFSET(%rsp)
466/*
467 * Syscall return path ending with SYSRET (fast path)
468 * Has incomplete stack frame and undefined top of stack.
469 */
470ret_from_sys_call:
471	movl $_TIF_ALLWORK_MASK,%edi
472	/* edi:	flagmask */
473sysret_check:
474	LOCKDEP_SYS_EXIT
475	GET_THREAD_INFO(%rcx)
476	DISABLE_INTERRUPTS(CLBR_NONE)
477	TRACE_IRQS_OFF
478	movl TI_flags(%rcx),%edx
479	andl %edi,%edx
480	jnz  sysret_careful
481	CFI_REMEMBER_STATE
482	/*
483	 * sysretq will re-enable interrupts:
484	 */
485	TRACE_IRQS_ON
486	movq RIP-ARGOFFSET(%rsp),%rcx
487	CFI_REGISTER	rip,rcx
488	RESTORE_ARGS 0,-ARG_SKIP,1
489	/*CFI_REGISTER	rflags,r11*/
490	movq	PER_CPU_VAR(old_rsp), %rsp
491	USERGS_SYSRET64
492
493	CFI_RESTORE_STATE
494	/* Handle reschedules */
495	/* edx:	work, edi: workmask */
496sysret_careful:
497	bt $TIF_NEED_RESCHED,%edx
498	jnc sysret_signal
499	TRACE_IRQS_ON
500	ENABLE_INTERRUPTS(CLBR_NONE)
501	pushq %rdi
502	CFI_ADJUST_CFA_OFFSET 8
503	call schedule
504	popq  %rdi
505	CFI_ADJUST_CFA_OFFSET -8
506	jmp sysret_check
507
508	/* Handle a signal */
509sysret_signal:
510	TRACE_IRQS_ON
511	ENABLE_INTERRUPTS(CLBR_NONE)
512#ifdef CONFIG_AUDITSYSCALL
513	bt $TIF_SYSCALL_AUDIT,%edx
514	jc sysret_audit
515#endif
516	/*
517	 * We have a signal, or exit tracing or single-step.
518	 * These all wind up with the iret return path anyway,
519	 * so just join that path right now.
520	 */
521	FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
522	jmp int_check_syscall_exit_work
523
524badsys:
525	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
526	jmp ret_from_sys_call
527
528#ifdef CONFIG_AUDITSYSCALL
529	/*
530	 * Fast path for syscall audit without full syscall trace.
531	 * We just call audit_syscall_entry() directly, and then
532	 * jump back to the normal fast path.
533	 */
534auditsys:
535	movq %r10,%r9			/* 6th arg: 4th syscall arg */
536	movq %rdx,%r8			/* 5th arg: 3rd syscall arg */
537	movq %rsi,%rcx			/* 4th arg: 2nd syscall arg */
538	movq %rdi,%rdx			/* 3rd arg: 1st syscall arg */
539	movq %rax,%rsi			/* 2nd arg: syscall number */
540	movl $AUDIT_ARCH_X86_64,%edi	/* 1st arg: audit arch */
541	call audit_syscall_entry
542	LOAD_ARGS 0		/* reload call-clobbered registers */
543	jmp system_call_fastpath
544
545	/*
546	 * Return fast path for syscall audit.  Call audit_syscall_exit()
547	 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
548	 * masked off.
549	 */
550sysret_audit:
551	movq RAX-ARGOFFSET(%rsp),%rsi	/* second arg, syscall return value */
552	cmpq $0,%rsi		/* is it < 0? */
553	setl %al		/* 1 if so, 0 if not */
554	movzbl %al,%edi		/* zero-extend that into %edi */
555	inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
556	call audit_syscall_exit
557	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
558	jmp sysret_check
559#endif	/* CONFIG_AUDITSYSCALL */
560
561	/* Do syscall tracing */
562tracesys:
563#ifdef CONFIG_AUDITSYSCALL
564	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
565	jz auditsys
566#endif
567	SAVE_REST
568	movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
569	FIXUP_TOP_OF_STACK %rdi
570	movq %rsp,%rdi
571	call syscall_trace_enter
572	/*
573	 * Reload arg registers from stack in case ptrace changed them.
574	 * We don't reload %rax because syscall_trace_enter() returned
575	 * the value it wants us to use in the table lookup.
576	 */
577	LOAD_ARGS ARGOFFSET, 1
578	RESTORE_REST
579	cmpq $__NR_syscall_max,%rax
580	ja   int_ret_from_sys_call	/* RAX(%rsp) set to -ENOSYS above */
581	movq %r10,%rcx	/* fixup for C */
582	call *sys_call_table(,%rax,8)
583	movq %rax,RAX-ARGOFFSET(%rsp)
584	/* Use IRET because user could have changed frame */
585
586/*
587 * Syscall return path ending with IRET.
588 * Has correct top of stack, but partial stack frame.
589 */
590GLOBAL(int_ret_from_sys_call)
591	DISABLE_INTERRUPTS(CLBR_NONE)
592	TRACE_IRQS_OFF
593	testl $3,CS-ARGOFFSET(%rsp)
594	je retint_restore_args
595	movl $_TIF_ALLWORK_MASK,%edi
596	/* edi:	mask to check */
597GLOBAL(int_with_check)
598	LOCKDEP_SYS_EXIT_IRQ
599	GET_THREAD_INFO(%rcx)
600	movl TI_flags(%rcx),%edx
601	andl %edi,%edx
602	jnz   int_careful
603	andl    $~TS_COMPAT,TI_status(%rcx)
604	jmp   retint_swapgs
605
606	/* Either reschedule or signal or syscall exit tracking needed. */
607	/* First do a reschedule test. */
608	/* edx:	work, edi: workmask */
609int_careful:
610	bt $TIF_NEED_RESCHED,%edx
611	jnc  int_very_careful
612	TRACE_IRQS_ON
613	ENABLE_INTERRUPTS(CLBR_NONE)
614	pushq %rdi
615	CFI_ADJUST_CFA_OFFSET 8
616	call schedule
617	popq %rdi
618	CFI_ADJUST_CFA_OFFSET -8
619	DISABLE_INTERRUPTS(CLBR_NONE)
620	TRACE_IRQS_OFF
621	jmp int_with_check
622
623	/* handle signals and tracing -- both require a full stack frame */
624int_very_careful:
625	TRACE_IRQS_ON
626	ENABLE_INTERRUPTS(CLBR_NONE)
627int_check_syscall_exit_work:
628	SAVE_REST
629	/* Check for syscall exit trace */
630	testl $_TIF_WORK_SYSCALL_EXIT,%edx
631	jz int_signal
632	pushq %rdi
633	CFI_ADJUST_CFA_OFFSET 8
634	leaq 8(%rsp),%rdi	# &ptregs -> arg1
635	call syscall_trace_leave
636	popq %rdi
637	CFI_ADJUST_CFA_OFFSET -8
638	andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
639	jmp int_restore_rest
640
641int_signal:
642	testl $_TIF_DO_NOTIFY_MASK,%edx
643	jz 1f
644	movq %rsp,%rdi		# &ptregs -> arg1
645	xorl %esi,%esi		# oldset -> arg2
646	call do_notify_resume
6471:	movl $_TIF_WORK_MASK,%edi
648int_restore_rest:
649	RESTORE_REST
650	DISABLE_INTERRUPTS(CLBR_NONE)
651	TRACE_IRQS_OFF
652	jmp int_with_check
653	CFI_ENDPROC
654END(system_call)
655
656/*
657 * Certain special system calls that need to save a complete full stack frame.
658 */
659	.macro PTREGSCALL label,func,arg
660ENTRY(\label)
661	PARTIAL_FRAME 1 8		/* offset 8: return address */
662	subq $REST_SKIP, %rsp
663	CFI_ADJUST_CFA_OFFSET REST_SKIP
664	call save_rest
665	DEFAULT_FRAME 0 8		/* offset 8: return address */
666	leaq 8(%rsp), \arg	/* pt_regs pointer */
667	call \func
668	jmp ptregscall_common
669	CFI_ENDPROC
670END(\label)
671	.endm
672
673	PTREGSCALL stub_clone, sys_clone, %r8
674	PTREGSCALL stub_fork, sys_fork, %rdi
675	PTREGSCALL stub_vfork, sys_vfork, %rdi
676	PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
677	PTREGSCALL stub_iopl, sys_iopl, %rsi
678
679ENTRY(ptregscall_common)
680	DEFAULT_FRAME 1 8	/* offset 8: return address */
681	RESTORE_TOP_OF_STACK %r11, 8
682	movq_cfi_restore R15+8, r15
683	movq_cfi_restore R14+8, r14
684	movq_cfi_restore R13+8, r13
685	movq_cfi_restore R12+8, r12
686	movq_cfi_restore RBP+8, rbp
687	movq_cfi_restore RBX+8, rbx
688	ret $REST_SKIP		/* pop extended registers */
689	CFI_ENDPROC
690END(ptregscall_common)
691
692ENTRY(stub_execve)
693	CFI_STARTPROC
694	popq %r11
695	CFI_ADJUST_CFA_OFFSET -8
696	CFI_REGISTER rip, r11
697	SAVE_REST
698	FIXUP_TOP_OF_STACK %r11
699	movq %rsp, %rcx
700	call sys_execve
701	RESTORE_TOP_OF_STACK %r11
702	movq %rax,RAX(%rsp)
703	RESTORE_REST
704	jmp int_ret_from_sys_call
705	CFI_ENDPROC
706END(stub_execve)
707
708/*
709 * sigreturn is special because it needs to restore all registers on return.
710 * This cannot be done with SYSRET, so use the IRET return path instead.
711 */
712ENTRY(stub_rt_sigreturn)
713	CFI_STARTPROC
714	addq $8, %rsp
715	CFI_ADJUST_CFA_OFFSET	-8
716	SAVE_REST
717	movq %rsp,%rdi
718	FIXUP_TOP_OF_STACK %r11
719	call sys_rt_sigreturn
720	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
721	RESTORE_REST
722	jmp int_ret_from_sys_call
723	CFI_ENDPROC
724END(stub_rt_sigreturn)
725
726/*
727 * Build the entry stubs and pointer table with some assembler magic.
728 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
729 * single cache line on all modern x86 implementations.
730 */
731	.section .init.rodata,"a"
732ENTRY(interrupt)
733	.text
734	.p2align 5
735	.p2align CONFIG_X86_L1_CACHE_SHIFT
736ENTRY(irq_entries_start)
737	INTR_FRAME
738vector=FIRST_EXTERNAL_VECTOR
739.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
740	.balign 32
741  .rept	7
742    .if vector < NR_VECTORS
743      .if vector <> FIRST_EXTERNAL_VECTOR
744	CFI_ADJUST_CFA_OFFSET -8
745      .endif
7461:	pushq $(~vector+0x80)	/* Note: always in signed byte range */
747	CFI_ADJUST_CFA_OFFSET 8
748      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
749	jmp 2f
750      .endif
751      .previous
752	.quad 1b
753      .text
754vector=vector+1
755    .endif
756  .endr
7572:	jmp common_interrupt
758.endr
759	CFI_ENDPROC
760END(irq_entries_start)
761
762.previous
763END(interrupt)
764.previous
765
766/*
767 * Interrupt entry/exit.
768 *
769 * Interrupt entry points save only callee clobbered registers in fast path.
770 *
771 * Entry runs with interrupts off.
772 */
773
774/* 0(%rsp): ~(interrupt number) */
775	.macro interrupt func
776	subq $10*8, %rsp
777	CFI_ADJUST_CFA_OFFSET 10*8
778	call save_args
779	PARTIAL_FRAME 0
780	call \func
781	.endm
782
783/*
784 * Interrupt entry/exit should be protected against kprobes
785 */
786	.pushsection .kprobes.text, "ax"
787	/*
788	 * The interrupt stubs push (~vector+0x80) onto the stack and
789	 * then jump to common_interrupt.
790	 */
791	.p2align CONFIG_X86_L1_CACHE_SHIFT
792common_interrupt:
793	XCPT_FRAME
794	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
795	interrupt do_IRQ
796	/* 0(%rsp): old_rsp-ARGOFFSET */
797ret_from_intr:
798	DISABLE_INTERRUPTS(CLBR_NONE)
799	TRACE_IRQS_OFF
800	decl PER_CPU_VAR(irq_count)
801	leaveq
802	CFI_DEF_CFA_REGISTER	rsp
803	CFI_ADJUST_CFA_OFFSET	-8
804exit_intr:
805	GET_THREAD_INFO(%rcx)
806	testl $3,CS-ARGOFFSET(%rsp)
807	je retint_kernel
808
809	/* Interrupt came from user space */
810	/*
811	 * Has a correct top of stack, but a partial stack frame
812	 * %rcx: thread info. Interrupts off.
813	 */
814retint_with_reschedule:
815	movl $_TIF_WORK_MASK,%edi
816retint_check:
817	LOCKDEP_SYS_EXIT_IRQ
818	movl TI_flags(%rcx),%edx
819	andl %edi,%edx
820	CFI_REMEMBER_STATE
821	jnz  retint_careful
822
823retint_swapgs:		/* return to user-space */
824	/*
825	 * The iretq could re-enable interrupts:
826	 */
827	DISABLE_INTERRUPTS(CLBR_ANY)
828	TRACE_IRQS_IRETQ
829	SWAPGS
830	jmp restore_args
831
832retint_restore_args:	/* return to kernel space */
833	DISABLE_INTERRUPTS(CLBR_ANY)
834	/*
835	 * The iretq could re-enable interrupts:
836	 */
837	TRACE_IRQS_IRETQ
838restore_args:
839	RESTORE_ARGS 0,8,0
840
841irq_return:
842	INTERRUPT_RETURN
843
844	.section __ex_table, "a"
845	.quad irq_return, bad_iret
846	.previous
847
848#ifdef CONFIG_PARAVIRT
849ENTRY(native_iret)
850	iretq
851
852	.section __ex_table,"a"
853	.quad native_iret, bad_iret
854	.previous
855#endif
856
857	.section .fixup,"ax"
858bad_iret:
859	/*
860	 * The iret traps when the %cs or %ss being restored is bogus.
861	 * We've lost the original trap vector and error code.
862	 * #GPF is the most likely one to get for an invalid selector.
863	 * So pretend we completed the iret and took the #GPF in user mode.
864	 *
865	 * We are now running with the kernel GS after exception recovery.
866	 * But error_entry expects us to have user GS to match the user %cs,
867	 * so swap back.
868	 */
869	pushq $0
870
871	SWAPGS
872	jmp general_protection
873
874	.previous
875
876	/* edi: workmask, edx: work */
877retint_careful:
878	CFI_RESTORE_STATE
879	bt    $TIF_NEED_RESCHED,%edx
880	jnc   retint_signal
881	TRACE_IRQS_ON
882	ENABLE_INTERRUPTS(CLBR_NONE)
883	pushq %rdi
884	CFI_ADJUST_CFA_OFFSET	8
885	call  schedule
886	popq %rdi
887	CFI_ADJUST_CFA_OFFSET	-8
888	GET_THREAD_INFO(%rcx)
889	DISABLE_INTERRUPTS(CLBR_NONE)
890	TRACE_IRQS_OFF
891	jmp retint_check
892
893retint_signal:
894	testl $_TIF_DO_NOTIFY_MASK,%edx
895	jz    retint_swapgs
896	TRACE_IRQS_ON
897	ENABLE_INTERRUPTS(CLBR_NONE)
898	SAVE_REST
899	movq $-1,ORIG_RAX(%rsp)
900	xorl %esi,%esi		# oldset
901	movq %rsp,%rdi		# &pt_regs
902	call do_notify_resume
903	RESTORE_REST
904	DISABLE_INTERRUPTS(CLBR_NONE)
905	TRACE_IRQS_OFF
906	GET_THREAD_INFO(%rcx)
907	jmp retint_with_reschedule
908
909#ifdef CONFIG_PREEMPT
910	/* Returning to kernel space. Check if we need preemption */
911	/* rcx:	 threadinfo. interrupts off. */
912ENTRY(retint_kernel)
913	cmpl $0,TI_preempt_count(%rcx)
914	jnz  retint_restore_args
915	bt  $TIF_NEED_RESCHED,TI_flags(%rcx)
916	jnc  retint_restore_args
917	bt   $9,EFLAGS-ARGOFFSET(%rsp)	/* interrupts off? */
918	jnc  retint_restore_args
919	call preempt_schedule_irq
920	jmp exit_intr
921#endif
922
923	CFI_ENDPROC
924END(common_interrupt)
925/*
926 * End of kprobes section
927 */
928       .popsection
929
930/*
931 * APIC interrupts.
932 */
933.macro apicinterrupt num sym do_sym
934ENTRY(\sym)
935	INTR_FRAME
936	pushq $~(\num)
937	CFI_ADJUST_CFA_OFFSET 8
938	interrupt \do_sym
939	jmp ret_from_intr
940	CFI_ENDPROC
941END(\sym)
942.endm
943
944#ifdef CONFIG_SMP
945apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
946	irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
947apicinterrupt REBOOT_VECTOR \
948	reboot_interrupt smp_reboot_interrupt
949#endif
950
951#ifdef CONFIG_X86_UV
952apicinterrupt UV_BAU_MESSAGE \
953	uv_bau_message_intr1 uv_bau_message_interrupt
954#endif
955apicinterrupt LOCAL_TIMER_VECTOR \
956	apic_timer_interrupt smp_apic_timer_interrupt
957apicinterrupt X86_PLATFORM_IPI_VECTOR \
958	x86_platform_ipi smp_x86_platform_ipi
959
960#ifdef CONFIG_SMP
961apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
962	invalidate_interrupt0 smp_invalidate_interrupt
963apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \
964	invalidate_interrupt1 smp_invalidate_interrupt
965apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \
966	invalidate_interrupt2 smp_invalidate_interrupt
967apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \
968	invalidate_interrupt3 smp_invalidate_interrupt
969apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \
970	invalidate_interrupt4 smp_invalidate_interrupt
971apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \
972	invalidate_interrupt5 smp_invalidate_interrupt
973apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \
974	invalidate_interrupt6 smp_invalidate_interrupt
975apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
976	invalidate_interrupt7 smp_invalidate_interrupt
977#endif
978
979apicinterrupt THRESHOLD_APIC_VECTOR \
980	threshold_interrupt smp_threshold_interrupt
981apicinterrupt THERMAL_APIC_VECTOR \
982	thermal_interrupt smp_thermal_interrupt
983
984#ifdef CONFIG_X86_MCE
985apicinterrupt MCE_SELF_VECTOR \
986	mce_self_interrupt smp_mce_self_interrupt
987#endif
988
989#ifdef CONFIG_SMP
990apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
991	call_function_single_interrupt smp_call_function_single_interrupt
992apicinterrupt CALL_FUNCTION_VECTOR \
993	call_function_interrupt smp_call_function_interrupt
994apicinterrupt RESCHEDULE_VECTOR \
995	reschedule_interrupt smp_reschedule_interrupt
996#endif
997
998apicinterrupt ERROR_APIC_VECTOR \
999	error_interrupt smp_error_interrupt
1000apicinterrupt SPURIOUS_APIC_VECTOR \
1001	spurious_interrupt smp_spurious_interrupt
1002
1003#ifdef CONFIG_PERF_EVENTS
1004apicinterrupt LOCAL_PENDING_VECTOR \
1005	perf_pending_interrupt smp_perf_pending_interrupt
1006#endif
1007
1008/*
1009 * Exception entry points.
1010 */
1011.macro zeroentry sym do_sym
1012ENTRY(\sym)
1013	INTR_FRAME
1014	PARAVIRT_ADJUST_EXCEPTION_FRAME
1015	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
1016	subq $15*8,%rsp
1017	CFI_ADJUST_CFA_OFFSET 15*8
1018	call error_entry
1019	DEFAULT_FRAME 0
1020	movq %rsp,%rdi		/* pt_regs pointer */
1021	xorl %esi,%esi		/* no error code */
1022	call \do_sym
1023	jmp error_exit		/* %ebx: no swapgs flag */
1024	CFI_ENDPROC
1025END(\sym)
1026.endm
1027
1028.macro paranoidzeroentry sym do_sym
1029ENTRY(\sym)
1030	INTR_FRAME
1031	PARAVIRT_ADJUST_EXCEPTION_FRAME
1032	pushq $-1		/* ORIG_RAX: no syscall to restart */
1033	CFI_ADJUST_CFA_OFFSET 8
1034	subq $15*8, %rsp
1035	call save_paranoid
1036	TRACE_IRQS_OFF
1037	movq %rsp,%rdi		/* pt_regs pointer */
1038	xorl %esi,%esi		/* no error code */
1039	call \do_sym
1040	jmp paranoid_exit	/* %ebx: no swapgs flag */
1041	CFI_ENDPROC
1042END(\sym)
1043.endm
1044
1045#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
1046.macro paranoidzeroentry_ist sym do_sym ist
1047ENTRY(\sym)
1048	INTR_FRAME
1049	PARAVIRT_ADJUST_EXCEPTION_FRAME
1050	pushq $-1		/* ORIG_RAX: no syscall to restart */
1051	CFI_ADJUST_CFA_OFFSET 8
1052	subq $15*8, %rsp
1053	call save_paranoid
1054	TRACE_IRQS_OFF
1055	movq %rsp,%rdi		/* pt_regs pointer */
1056	xorl %esi,%esi		/* no error code */
1057	subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
1058	call \do_sym
1059	addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
1060	jmp paranoid_exit	/* %ebx: no swapgs flag */
1061	CFI_ENDPROC
1062END(\sym)
1063.endm
1064
1065.macro errorentry sym do_sym
1066ENTRY(\sym)
1067	XCPT_FRAME
1068	PARAVIRT_ADJUST_EXCEPTION_FRAME
1069	subq $15*8,%rsp
1070	CFI_ADJUST_CFA_OFFSET 15*8
1071	call error_entry
1072	DEFAULT_FRAME 0
1073	movq %rsp,%rdi			/* pt_regs pointer */
1074	movq ORIG_RAX(%rsp),%rsi	/* get error code */
1075	movq $-1,ORIG_RAX(%rsp)		/* no syscall to restart */
1076	call \do_sym
1077	jmp error_exit			/* %ebx: no swapgs flag */
1078	CFI_ENDPROC
1079END(\sym)
1080.endm
1081
1082	/* error code is on the stack already */
1083.macro paranoiderrorentry sym do_sym
1084ENTRY(\sym)
1085	XCPT_FRAME
1086	PARAVIRT_ADJUST_EXCEPTION_FRAME
1087	subq $15*8,%rsp
1088	CFI_ADJUST_CFA_OFFSET 15*8
1089	call save_paranoid
1090	DEFAULT_FRAME 0
1091	TRACE_IRQS_OFF
1092	movq %rsp,%rdi			/* pt_regs pointer */
1093	movq ORIG_RAX(%rsp),%rsi	/* get error code */
1094	movq $-1,ORIG_RAX(%rsp)		/* no syscall to restart */
1095	call \do_sym
1096	jmp paranoid_exit		/* %ebx: no swapgs flag */
1097	CFI_ENDPROC
1098END(\sym)
1099.endm
1100
1101zeroentry divide_error do_divide_error
1102zeroentry overflow do_overflow
1103zeroentry bounds do_bounds
1104zeroentry invalid_op do_invalid_op
1105zeroentry device_not_available do_device_not_available
1106paranoiderrorentry double_fault do_double_fault
1107zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
1108errorentry invalid_TSS do_invalid_TSS
1109errorentry segment_not_present do_segment_not_present
1110zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
1111zeroentry coprocessor_error do_coprocessor_error
1112errorentry alignment_check do_alignment_check
1113zeroentry simd_coprocessor_error do_simd_coprocessor_error
1114
1115	/* Reload gs selector with exception handling */
1116	/* edi:  new selector */
1117ENTRY(native_load_gs_index)
1118	CFI_STARTPROC
1119	pushf
1120	CFI_ADJUST_CFA_OFFSET 8
1121	DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
1122	SWAPGS
1123gs_change:
1124	movl %edi,%gs
11252:	mfence
1126	SWAPGS
1127	popf
1128	CFI_ADJUST_CFA_OFFSET -8
1129	ret
1130	CFI_ENDPROC
1131END(native_load_gs_index)
1132
1133	.section __ex_table,"a"
1134	.align 8
1135	.quad gs_change,bad_gs
1136	.previous
1137	.section .fixup,"ax"
1138	/* running with kernelgs */
1139bad_gs:
1140	SWAPGS			/* switch back to user gs */
1141	xorl %eax,%eax
1142	movl %eax,%gs
1143	jmp  2b
1144	.previous
1145
1146ENTRY(kernel_thread_helper)
1147	pushq $0		# fake return address
1148	CFI_STARTPROC
1149	/*
1150	 * Here we are in the child and the registers are set as they were
1151	 * at kernel_thread() invocation in the parent.
1152	 */
1153	call *%rsi
1154	# exit
1155	mov %eax, %edi
1156	call do_exit
1157	ud2			# padding for call trace
1158	CFI_ENDPROC
1159END(kernel_thread_helper)
1160
1161/*
1162 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1163 *
1164 * C extern interface:
1165 *	 extern long execve(const char *name, char **argv, char **envp)
1166 *
1167 * asm input arguments:
1168 *	rdi: name, rsi: argv, rdx: envp
1169 *
1170 * We want to fallback into:
1171 *	extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs)
1172 *
1173 * do_sys_execve asm fallback arguments:
1174 *	rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
1175 */
1176ENTRY(kernel_execve)
1177	CFI_STARTPROC
1178	FAKE_STACK_FRAME $0
1179	SAVE_ALL
1180	movq %rsp,%rcx
1181	call sys_execve
1182	movq %rax, RAX(%rsp)
1183	RESTORE_REST
1184	testq %rax,%rax
1185	je int_ret_from_sys_call
1186	RESTORE_ARGS
1187	UNFAKE_STACK_FRAME
1188	ret
1189	CFI_ENDPROC
1190END(kernel_execve)
1191
1192/* Call softirq on interrupt stack. Interrupts are off. */
1193ENTRY(call_softirq)
1194	CFI_STARTPROC
1195	push %rbp
1196	CFI_ADJUST_CFA_OFFSET	8
1197	CFI_REL_OFFSET rbp,0
1198	mov  %rsp,%rbp
1199	CFI_DEF_CFA_REGISTER rbp
1200	incl PER_CPU_VAR(irq_count)
1201	cmove PER_CPU_VAR(irq_stack_ptr),%rsp
1202	push  %rbp			# backlink for old unwinder
1203	call __do_softirq
1204	leaveq
1205	CFI_DEF_CFA_REGISTER	rsp
1206	CFI_ADJUST_CFA_OFFSET   -8
1207	decl PER_CPU_VAR(irq_count)
1208	ret
1209	CFI_ENDPROC
1210END(call_softirq)
1211
1212#ifdef CONFIG_XEN
1213zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
1214
1215/*
1216 * A note on the "critical region" in our callback handler.
1217 * We want to avoid stacking callback handlers due to events occurring
1218 * during handling of the last event. To do this, we keep events disabled
1219 * until we've done all processing. HOWEVER, we must enable events before
1220 * popping the stack frame (can't be done atomically) and so it would still
1221 * be possible to get enough handler activations to overflow the stack.
1222 * Although unlikely, bugs of that kind are hard to track down, so we'd
1223 * like to avoid the possibility.
1224 * So, on entry to the handler we detect whether we interrupted an
1225 * existing activation in its critical region -- if so, we pop the current
1226 * activation and restart the handler using the previous one.
1227 */
1228ENTRY(xen_do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
1229	CFI_STARTPROC
1230/*
1231 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1232 * see the correct pointer to the pt_regs
1233 */
1234	movq %rdi, %rsp            # we don't return, adjust the stack frame
1235	CFI_ENDPROC
1236	DEFAULT_FRAME
123711:	incl PER_CPU_VAR(irq_count)
1238	movq %rsp,%rbp
1239	CFI_DEF_CFA_REGISTER rbp
1240	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
1241	pushq %rbp			# backlink for old unwinder
1242	call xen_evtchn_do_upcall
1243	popq %rsp
1244	CFI_DEF_CFA_REGISTER rsp
1245	decl PER_CPU_VAR(irq_count)
1246	jmp  error_exit
1247	CFI_ENDPROC
1248END(do_hypervisor_callback)
1249
1250/*
1251 * Hypervisor uses this for application faults while it executes.
1252 * We get here for two reasons:
1253 *  1. Fault while reloading DS, ES, FS or GS
1254 *  2. Fault while executing IRET
1255 * Category 1 we do not need to fix up as Xen has already reloaded all segment
1256 * registers that could be reloaded and zeroed the others.
1257 * Category 2 we fix up by killing the current process. We cannot use the
1258 * normal Linux return path in this case because if we use the IRET hypercall
1259 * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1260 * We distinguish between categories by comparing each saved segment register
1261 * with its current contents: any discrepancy means we in category 1.
1262 */
1263ENTRY(xen_failsafe_callback)
1264	INTR_FRAME 1 (6*8)
1265	/*CFI_REL_OFFSET gs,GS*/
1266	/*CFI_REL_OFFSET fs,FS*/
1267	/*CFI_REL_OFFSET es,ES*/
1268	/*CFI_REL_OFFSET ds,DS*/
1269	CFI_REL_OFFSET r11,8
1270	CFI_REL_OFFSET rcx,0
1271	movw %ds,%cx
1272	cmpw %cx,0x10(%rsp)
1273	CFI_REMEMBER_STATE
1274	jne 1f
1275	movw %es,%cx
1276	cmpw %cx,0x18(%rsp)
1277	jne 1f
1278	movw %fs,%cx
1279	cmpw %cx,0x20(%rsp)
1280	jne 1f
1281	movw %gs,%cx
1282	cmpw %cx,0x28(%rsp)
1283	jne 1f
1284	/* All segments match their saved values => Category 2 (Bad IRET). */
1285	movq (%rsp),%rcx
1286	CFI_RESTORE rcx
1287	movq 8(%rsp),%r11
1288	CFI_RESTORE r11
1289	addq $0x30,%rsp
1290	CFI_ADJUST_CFA_OFFSET -0x30
1291	pushq_cfi $0	/* RIP */
1292	pushq_cfi %r11
1293	pushq_cfi %rcx
1294	jmp general_protection
1295	CFI_RESTORE_STATE
12961:	/* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
1297	movq (%rsp),%rcx
1298	CFI_RESTORE rcx
1299	movq 8(%rsp),%r11
1300	CFI_RESTORE r11
1301	addq $0x30,%rsp
1302	CFI_ADJUST_CFA_OFFSET -0x30
1303	pushq_cfi $0
1304	SAVE_ALL
1305	jmp error_exit
1306	CFI_ENDPROC
1307END(xen_failsafe_callback)
1308
1309apicinterrupt XEN_HVM_EVTCHN_CALLBACK \
1310	xen_hvm_callback_vector xen_evtchn_do_upcall
1311
1312#endif /* CONFIG_XEN */
1313
1314/*
1315 * Some functions should be protected against kprobes
1316 */
1317	.pushsection .kprobes.text, "ax"
1318
1319paranoidzeroentry_ist debug do_debug DEBUG_STACK
1320paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
1321paranoiderrorentry stack_segment do_stack_segment
1322#ifdef CONFIG_XEN
1323zeroentry xen_debug do_debug
1324zeroentry xen_int3 do_int3
1325errorentry xen_stack_segment do_stack_segment
1326#endif
1327errorentry general_protection do_general_protection
1328errorentry page_fault do_page_fault
1329#ifdef CONFIG_X86_MCE
1330paranoidzeroentry machine_check *machine_check_vector(%rip)
1331#endif
1332
1333	/*
1334	 * "Paranoid" exit path from exception stack.
1335	 * Paranoid because this is used by NMIs and cannot take
1336	 * any kernel state for granted.
1337	 * We don't do kernel preemption checks here, because only
1338	 * NMI should be common and it does not enable IRQs and
1339	 * cannot get reschedule ticks.
1340	 *
1341	 * "trace" is 0 for the NMI handler only, because irq-tracing
1342	 * is fundamentally NMI-unsafe. (we cannot change the soft and
1343	 * hard flags at once, atomically)
1344	 */
1345
1346	/* ebx:	no swapgs flag */
1347ENTRY(paranoid_exit)
1348	INTR_FRAME
1349	DISABLE_INTERRUPTS(CLBR_NONE)
1350	TRACE_IRQS_OFF
1351	testl %ebx,%ebx				/* swapgs needed? */
1352	jnz paranoid_restore
1353	testl $3,CS(%rsp)
1354	jnz   paranoid_userspace
1355paranoid_swapgs:
1356	TRACE_IRQS_IRETQ 0
1357	SWAPGS_UNSAFE_STACK
1358	RESTORE_ALL 8
1359	jmp irq_return
1360paranoid_restore:
1361	TRACE_IRQS_IRETQ 0
1362	RESTORE_ALL 8
1363	jmp irq_return
1364paranoid_userspace:
1365	GET_THREAD_INFO(%rcx)
1366	movl TI_flags(%rcx),%ebx
1367	andl $_TIF_WORK_MASK,%ebx
1368	jz paranoid_swapgs
1369	movq %rsp,%rdi			/* &pt_regs */
1370	call sync_regs
1371	movq %rax,%rsp			/* switch stack for scheduling */
1372	testl $_TIF_NEED_RESCHED,%ebx
1373	jnz paranoid_schedule
1374	movl %ebx,%edx			/* arg3: thread flags */
1375	TRACE_IRQS_ON
1376	ENABLE_INTERRUPTS(CLBR_NONE)
1377	xorl %esi,%esi 			/* arg2: oldset */
1378	movq %rsp,%rdi 			/* arg1: &pt_regs */
1379	call do_notify_resume
1380	DISABLE_INTERRUPTS(CLBR_NONE)
1381	TRACE_IRQS_OFF
1382	jmp paranoid_userspace
1383paranoid_schedule:
1384	TRACE_IRQS_ON
1385	ENABLE_INTERRUPTS(CLBR_ANY)
1386	call schedule
1387	DISABLE_INTERRUPTS(CLBR_ANY)
1388	TRACE_IRQS_OFF
1389	jmp paranoid_userspace
1390	CFI_ENDPROC
1391END(paranoid_exit)
1392
1393/*
1394 * Exception entry point. This expects an error code/orig_rax on the stack.
1395 * returns in "no swapgs flag" in %ebx.
1396 */
1397ENTRY(error_entry)
1398	XCPT_FRAME
1399	CFI_ADJUST_CFA_OFFSET 15*8
1400	/* oldrax contains error code */
1401	cld
1402	movq_cfi rdi, RDI+8
1403	movq_cfi rsi, RSI+8
1404	movq_cfi rdx, RDX+8
1405	movq_cfi rcx, RCX+8
1406	movq_cfi rax, RAX+8
1407	movq_cfi  r8,  R8+8
1408	movq_cfi  r9,  R9+8
1409	movq_cfi r10, R10+8
1410	movq_cfi r11, R11+8
1411	movq_cfi rbx, RBX+8
1412	movq_cfi rbp, RBP+8
1413	movq_cfi r12, R12+8
1414	movq_cfi r13, R13+8
1415	movq_cfi r14, R14+8
1416	movq_cfi r15, R15+8
1417	xorl %ebx,%ebx
1418	testl $3,CS+8(%rsp)
1419	je error_kernelspace
1420error_swapgs:
1421	SWAPGS
1422error_sti:
1423	TRACE_IRQS_OFF
1424	ret
1425	CFI_ENDPROC
1426
1427/*
1428 * There are two places in the kernel that can potentially fault with
1429 * usergs. Handle them here. The exception handlers after iret run with
1430 * kernel gs again, so don't set the user space flag. B stepping K8s
1431 * sometimes report an truncated RIP for IRET exceptions returning to
1432 * compat mode. Check for these here too.
1433 */
1434error_kernelspace:
1435	incl %ebx
1436	leaq irq_return(%rip),%rcx
1437	cmpq %rcx,RIP+8(%rsp)
1438	je error_swapgs
1439	movl %ecx,%eax	/* zero extend */
1440	cmpq %rax,RIP+8(%rsp)
1441	je bstep_iret
1442	cmpq $gs_change,RIP+8(%rsp)
1443	je error_swapgs
1444	jmp error_sti
1445
1446bstep_iret:
1447	/* Fix truncated RIP */
1448	movq %rcx,RIP+8(%rsp)
1449	jmp error_swapgs
1450END(error_entry)
1451
1452
1453/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
1454ENTRY(error_exit)
1455	DEFAULT_FRAME
1456	movl %ebx,%eax
1457	RESTORE_REST
1458	DISABLE_INTERRUPTS(CLBR_NONE)
1459	TRACE_IRQS_OFF
1460	GET_THREAD_INFO(%rcx)
1461	testl %eax,%eax
1462	jne retint_kernel
1463	LOCKDEP_SYS_EXIT_IRQ
1464	movl TI_flags(%rcx),%edx
1465	movl $_TIF_WORK_MASK,%edi
1466	andl %edi,%edx
1467	jnz retint_careful
1468	jmp retint_swapgs
1469	CFI_ENDPROC
1470END(error_exit)
1471
1472
1473	/* runs on exception stack */
1474ENTRY(nmi)
1475	INTR_FRAME
1476	PARAVIRT_ADJUST_EXCEPTION_FRAME
1477	pushq_cfi $-1
1478	subq $15*8, %rsp
1479	CFI_ADJUST_CFA_OFFSET 15*8
1480	call save_paranoid
1481	DEFAULT_FRAME 0
1482	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1483	movq %rsp,%rdi
1484	movq $-1,%rsi
1485	call do_nmi
1486#ifdef CONFIG_TRACE_IRQFLAGS
1487	/* paranoidexit; without TRACE_IRQS_OFF */
1488	/* ebx:	no swapgs flag */
1489	DISABLE_INTERRUPTS(CLBR_NONE)
1490	testl %ebx,%ebx				/* swapgs needed? */
1491	jnz nmi_restore
1492	testl $3,CS(%rsp)
1493	jnz nmi_userspace
1494nmi_swapgs:
1495	SWAPGS_UNSAFE_STACK
1496nmi_restore:
1497	RESTORE_ALL 8
1498	jmp irq_return
1499nmi_userspace:
1500	GET_THREAD_INFO(%rcx)
1501	movl TI_flags(%rcx),%ebx
1502	andl $_TIF_WORK_MASK,%ebx
1503	jz nmi_swapgs
1504	movq %rsp,%rdi			/* &pt_regs */
1505	call sync_regs
1506	movq %rax,%rsp			/* switch stack for scheduling */
1507	testl $_TIF_NEED_RESCHED,%ebx
1508	jnz nmi_schedule
1509	movl %ebx,%edx			/* arg3: thread flags */
1510	ENABLE_INTERRUPTS(CLBR_NONE)
1511	xorl %esi,%esi 			/* arg2: oldset */
1512	movq %rsp,%rdi 			/* arg1: &pt_regs */
1513	call do_notify_resume
1514	DISABLE_INTERRUPTS(CLBR_NONE)
1515	jmp nmi_userspace
1516nmi_schedule:
1517	ENABLE_INTERRUPTS(CLBR_ANY)
1518	call schedule
1519	DISABLE_INTERRUPTS(CLBR_ANY)
1520	jmp nmi_userspace
1521	CFI_ENDPROC
1522#else
1523	jmp paranoid_exit
1524	CFI_ENDPROC
1525#endif
1526END(nmi)
1527
1528ENTRY(ignore_sysret)
1529	CFI_STARTPROC
1530	mov $-ENOSYS,%eax
1531	sysret
1532	CFI_ENDPROC
1533END(ignore_sysret)
1534
1535/*
1536 * End of kprobes section
1537 */
1538	.popsection
1539