1/*
2 *  linux/arch/i386/entry.S
3 *
4 *  Copyright (C) 1991, 1992  Linus Torvalds
5 */
6
7/*
8 * entry.S contains the system-call and fault low-level handling routines.
9 * This also contains the timer-interrupt handler, as well as all interrupts
10 * and faults that can result in a task-switch.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after a timer-interrupt and after each system call.
14 *
15 * I changed all the .align's to 4 (16 byte alignment), as that's faster
16 * on a 486.
17 *
18 * Stack layout in 'syscall_exit':
19 * 	ptrace needs to have all regs on the stack.
20 *	if the order here is changed, it needs to be
21 *	updated in fork.c:copy_process, signal.c:do_signal,
22 *	ptrace.c and ptrace.h
23 *
24 *	 0(%esp) - %ebx
25 *	 4(%esp) - %ecx
26 *	 8(%esp) - %edx
27 *       C(%esp) - %esi
28 *	10(%esp) - %edi
29 *	14(%esp) - %ebp
30 *	18(%esp) - %eax
31 *	1C(%esp) - %ds
32 *	20(%esp) - %es
33 *	24(%esp) - %fs
34 *	28(%esp) - orig_eax
35 *	2C(%esp) - %eip
36 *	30(%esp) - %cs
37 *	34(%esp) - %eflags
38 *	38(%esp) - %oldesp
39 *	3C(%esp) - %oldss
40 *
41 * "current" is in register %ebx during any slow entries.
42 */
43
44#include <linux/linkage.h>
45#include <asm/thread_info.h>
46#include <asm/irqflags.h>
47#include <asm/errno.h>
48#include <asm/segment.h>
49#include <asm/smp.h>
50#include <asm/page.h>
51#include <asm/desc.h>
52#include <asm/percpu.h>
53#include <asm/dwarf2.h>
54#include "irq_vectors.h"
55
56/*
57 * We use macros for low-level operations which need to be overridden
58 * for paravirtualization.  The following will never clobber any registers:
59 *   INTERRUPT_RETURN (aka. "iret")
60 *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
61 *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
62 *
63 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
64 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
65 * Allowing a register to be clobbered can shrink the paravirt replacement
66 * enough to patch inline, increasing performance.
67 */
68
69#define nr_syscalls ((syscall_table_size)/4)
70
71CF_MASK		= 0x00000001
72TF_MASK		= 0x00000100
73IF_MASK		= 0x00000200
74DF_MASK		= 0x00000400
75NT_MASK		= 0x00004000
76VM_MASK		= 0x00020000
77
78#ifdef CONFIG_PREEMPT
79#define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
80#else
81#define preempt_stop(clobbers)
82#define resume_kernel		restore_nocheck
83#endif
84
85.macro TRACE_IRQS_IRET
86#ifdef CONFIG_TRACE_IRQFLAGS
87	testl $IF_MASK,PT_EFLAGS(%esp)     # interrupts off?
88	jz 1f
89	TRACE_IRQS_ON
901:
91#endif
92.endm
93
94#ifdef CONFIG_VM86
95#define resume_userspace_sig	check_userspace
96#else
97#define resume_userspace_sig	resume_userspace
98#endif
99
100#define SAVE_ALL \
101	cld; \
102	pushl %fs; \
103	CFI_ADJUST_CFA_OFFSET 4;\
104	/*CFI_REL_OFFSET fs, 0;*/\
105	pushl %es; \
106	CFI_ADJUST_CFA_OFFSET 4;\
107	/*CFI_REL_OFFSET es, 0;*/\
108	pushl %ds; \
109	CFI_ADJUST_CFA_OFFSET 4;\
110	/*CFI_REL_OFFSET ds, 0;*/\
111	pushl %eax; \
112	CFI_ADJUST_CFA_OFFSET 4;\
113	CFI_REL_OFFSET eax, 0;\
114	pushl %ebp; \
115	CFI_ADJUST_CFA_OFFSET 4;\
116	CFI_REL_OFFSET ebp, 0;\
117	pushl %edi; \
118	CFI_ADJUST_CFA_OFFSET 4;\
119	CFI_REL_OFFSET edi, 0;\
120	pushl %esi; \
121	CFI_ADJUST_CFA_OFFSET 4;\
122	CFI_REL_OFFSET esi, 0;\
123	pushl %edx; \
124	CFI_ADJUST_CFA_OFFSET 4;\
125	CFI_REL_OFFSET edx, 0;\
126	pushl %ecx; \
127	CFI_ADJUST_CFA_OFFSET 4;\
128	CFI_REL_OFFSET ecx, 0;\
129	pushl %ebx; \
130	CFI_ADJUST_CFA_OFFSET 4;\
131	CFI_REL_OFFSET ebx, 0;\
132	movl $(__USER_DS), %edx; \
133	movl %edx, %ds; \
134	movl %edx, %es; \
135	movl $(__KERNEL_PERCPU), %edx; \
136	movl %edx, %fs
137
138#define RESTORE_INT_REGS \
139	popl %ebx;	\
140	CFI_ADJUST_CFA_OFFSET -4;\
141	CFI_RESTORE ebx;\
142	popl %ecx;	\
143	CFI_ADJUST_CFA_OFFSET -4;\
144	CFI_RESTORE ecx;\
145	popl %edx;	\
146	CFI_ADJUST_CFA_OFFSET -4;\
147	CFI_RESTORE edx;\
148	popl %esi;	\
149	CFI_ADJUST_CFA_OFFSET -4;\
150	CFI_RESTORE esi;\
151	popl %edi;	\
152	CFI_ADJUST_CFA_OFFSET -4;\
153	CFI_RESTORE edi;\
154	popl %ebp;	\
155	CFI_ADJUST_CFA_OFFSET -4;\
156	CFI_RESTORE ebp;\
157	popl %eax;	\
158	CFI_ADJUST_CFA_OFFSET -4;\
159	CFI_RESTORE eax
160
161#define RESTORE_REGS	\
162	RESTORE_INT_REGS; \
1631:	popl %ds;	\
164	CFI_ADJUST_CFA_OFFSET -4;\
165	/*CFI_RESTORE ds;*/\
1662:	popl %es;	\
167	CFI_ADJUST_CFA_OFFSET -4;\
168	/*CFI_RESTORE es;*/\
1693:	popl %fs;	\
170	CFI_ADJUST_CFA_OFFSET -4;\
171	/*CFI_RESTORE fs;*/\
172.pushsection .fixup,"ax";	\
1734:	movl $0,(%esp);	\
174	jmp 1b;		\
1755:	movl $0,(%esp);	\
176	jmp 2b;		\
1776:	movl $0,(%esp);	\
178	jmp 3b;		\
179.section __ex_table,"a";\
180	.align 4;	\
181	.long 1b,4b;	\
182	.long 2b,5b;	\
183	.long 3b,6b;	\
184.popsection
185
186#define RING0_INT_FRAME \
187	CFI_STARTPROC simple;\
188	CFI_SIGNAL_FRAME;\
189	CFI_DEF_CFA esp, 3*4;\
190	/*CFI_OFFSET cs, -2*4;*/\
191	CFI_OFFSET eip, -3*4
192
193#define RING0_EC_FRAME \
194	CFI_STARTPROC simple;\
195	CFI_SIGNAL_FRAME;\
196	CFI_DEF_CFA esp, 4*4;\
197	/*CFI_OFFSET cs, -2*4;*/\
198	CFI_OFFSET eip, -3*4
199
200#define RING0_PTREGS_FRAME \
201	CFI_STARTPROC simple;\
202	CFI_SIGNAL_FRAME;\
203	CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\
204	/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\
205	CFI_OFFSET eip, PT_EIP-PT_OLDESP;\
206	/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\
207	/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\
208	CFI_OFFSET eax, PT_EAX-PT_OLDESP;\
209	CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\
210	CFI_OFFSET edi, PT_EDI-PT_OLDESP;\
211	CFI_OFFSET esi, PT_ESI-PT_OLDESP;\
212	CFI_OFFSET edx, PT_EDX-PT_OLDESP;\
213	CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\
214	CFI_OFFSET ebx, PT_EBX-PT_OLDESP
215
216ENTRY(ret_from_fork)
217	CFI_STARTPROC
218	pushl %eax
219	CFI_ADJUST_CFA_OFFSET 4
220	call schedule_tail
221	GET_THREAD_INFO(%ebp)
222	popl %eax
223	CFI_ADJUST_CFA_OFFSET -4
224	pushl $0x0202			# Reset kernel eflags
225	CFI_ADJUST_CFA_OFFSET 4
226	popfl
227	CFI_ADJUST_CFA_OFFSET -4
228	jmp syscall_exit
229	CFI_ENDPROC
230END(ret_from_fork)
231
232/*
233 * Return to user mode is not as complex as all this looks,
234 * but we want the default path for a system call return to
235 * go as quickly as possible which is why some of this is
236 * less clear than it otherwise should be.
237 */
238
239	# userspace resumption stub bypassing syscall exit tracing
240	ALIGN
241	RING0_PTREGS_FRAME
242ret_from_exception:
243	preempt_stop(CLBR_ANY)
244ret_from_intr:
245	GET_THREAD_INFO(%ebp)
246check_userspace:
247	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS and CS
248	movb PT_CS(%esp), %al
249	andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
250	cmpl $USER_RPL, %eax
251	jb resume_kernel		# not returning to v8086 or userspace
252
253ENTRY(resume_userspace)
254 	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
255					# setting need_resched or sigpending
256					# between sampling and the iret
257	movl TI_flags(%ebp), %ecx
258	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done on
259					# int/exception return?
260	jne work_pending
261	jmp restore_all
262END(ret_from_exception)
263
264#ifdef CONFIG_PREEMPT
265ENTRY(resume_kernel)
266	DISABLE_INTERRUPTS(CLBR_ANY)
267	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
268	jnz restore_nocheck
269need_resched:
270	movl TI_flags(%ebp), %ecx	# need_resched set ?
271	testb $_TIF_NEED_RESCHED, %cl
272	jz restore_all
273	testl $IF_MASK,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
274	jz restore_all
275	call preempt_schedule_irq
276	jmp need_resched
277END(resume_kernel)
278#endif
279	CFI_ENDPROC
280
281/* SYSENTER_RETURN points to after the "sysenter" instruction in
282   the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
283
284	# sysenter call handler stub
285ENTRY(sysenter_entry)
286	CFI_STARTPROC simple
287	CFI_SIGNAL_FRAME
288	CFI_DEF_CFA esp, 0
289	CFI_REGISTER esp, ebp
290	movl TSS_sysenter_esp0(%esp),%esp
291sysenter_past_esp:
292	/*
293	 * No need to follow this irqs on/off section: the syscall
294	 * disabled irqs and here we enable it straight after entry:
295	 */
296	ENABLE_INTERRUPTS(CLBR_NONE)
297	pushl $(__USER_DS)
298	CFI_ADJUST_CFA_OFFSET 4
299	/*CFI_REL_OFFSET ss, 0*/
300	pushl %ebp
301	CFI_ADJUST_CFA_OFFSET 4
302	CFI_REL_OFFSET esp, 0
303	pushfl
304	CFI_ADJUST_CFA_OFFSET 4
305	pushl $(__USER_CS)
306	CFI_ADJUST_CFA_OFFSET 4
307	/*CFI_REL_OFFSET cs, 0*/
308	/*
309	 * Push current_thread_info()->sysenter_return to the stack.
310	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
311	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
312	 */
313	pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
314	CFI_ADJUST_CFA_OFFSET 4
315	CFI_REL_OFFSET eip, 0
316
317/*
318 * Load the potential sixth argument from user stack.
319 * Careful about security.
320 */
321	cmpl $__PAGE_OFFSET-3,%ebp
322	jae syscall_fault
3231:	movl (%ebp),%ebp
324.section __ex_table,"a"
325	.align 4
326	.long 1b,syscall_fault
327.previous
328
329	pushl %eax
330	CFI_ADJUST_CFA_OFFSET 4
331	SAVE_ALL
332	GET_THREAD_INFO(%ebp)
333
334	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
335	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
336	jnz syscall_trace_entry
337	cmpl $(nr_syscalls), %eax
338	jae syscall_badsys
339	call *sys_call_table(,%eax,4)
340	movl %eax,PT_EAX(%esp)
341	DISABLE_INTERRUPTS(CLBR_ANY)
342	TRACE_IRQS_OFF
343	movl TI_flags(%ebp), %ecx
344	testw $_TIF_ALLWORK_MASK, %cx
345	jne syscall_exit_work
346/* if something modifies registers it must also disable sysexit */
347	movl PT_EIP(%esp), %edx
348	movl PT_OLDESP(%esp), %ecx
349	xorl %ebp,%ebp
350	TRACE_IRQS_ON
3511:	mov  PT_FS(%esp), %fs
352	ENABLE_INTERRUPTS_SYSEXIT
353	CFI_ENDPROC
354.pushsection .fixup,"ax"
3552:	movl $0,PT_FS(%esp)
356	jmp 1b
357.section __ex_table,"a"
358	.align 4
359	.long 1b,2b
360.popsection
361ENDPROC(sysenter_entry)
362
363	# system call handler stub
364ENTRY(system_call)
365	RING0_INT_FRAME			# can't unwind into user space anyway
366	pushl %eax			# save orig_eax
367	CFI_ADJUST_CFA_OFFSET 4
368	SAVE_ALL
369	GET_THREAD_INFO(%ebp)
370					# system call tracing in operation / emulation
371	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
372	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
373	jnz syscall_trace_entry
374	cmpl $(nr_syscalls), %eax
375	jae syscall_badsys
376syscall_call:
377	call *sys_call_table(,%eax,4)
378	movl %eax,PT_EAX(%esp)		# store the return value
379syscall_exit:
380	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
381					# setting need_resched or sigpending
382					# between sampling and the iret
383	TRACE_IRQS_OFF
384	testl $TF_MASK,PT_EFLAGS(%esp)	# If tracing set singlestep flag on exit
385	jz no_singlestep
386	orl $_TIF_SINGLESTEP,TI_flags(%ebp)
387no_singlestep:
388	movl TI_flags(%ebp), %ecx
389	testw $_TIF_ALLWORK_MASK, %cx	# current->work
390	jne syscall_exit_work
391
392restore_all:
393	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS, SS and CS
394	# Warning: PT_OLDSS(%esp) contains the wrong/random values if we
395	# are returning to the kernel.
396	# See comments in process.c:copy_thread() for details.
397	movb PT_OLDSS(%esp), %ah
398	movb PT_CS(%esp), %al
399	andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
400	cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
401	CFI_REMEMBER_STATE
402	je ldt_ss			# returning to user-space with LDT SS
403restore_nocheck:
404	TRACE_IRQS_IRET
405restore_nocheck_notrace:
406	RESTORE_REGS
407	addl $4, %esp			# skip orig_eax/error_code
408	CFI_ADJUST_CFA_OFFSET -4
4091:	INTERRUPT_RETURN
410.section .fixup,"ax"
411iret_exc:
412	TRACE_IRQS_ON
413	ENABLE_INTERRUPTS(CLBR_NONE)
414	pushl $0			# no error code
415	pushl $do_iret_error
416	jmp error_code
417.previous
418.section __ex_table,"a"
419	.align 4
420	.long 1b,iret_exc
421.previous
422
423	CFI_RESTORE_STATE
424ldt_ss:
425	larl PT_OLDSS(%esp), %eax
426	jnz restore_nocheck
427	testl $0x00400000, %eax		# returning to 32bit stack?
428	jnz restore_nocheck		# allright, normal return
429
430#ifdef CONFIG_PARAVIRT
431	/*
432	 * The kernel can't run on a non-flat stack if paravirt mode
433	 * is active.  Rather than try to fixup the high bits of
434	 * ESP, bypass this code entirely.  This may break DOSemu
435	 * and/or Wine support in a paravirt VM, although the option
436	 * is still available to implement the setting of the high
437	 * 16-bits in the INTERRUPT_RETURN paravirt-op.
438	 */
439	cmpl $0, paravirt_ops+PARAVIRT_enabled
440	jne restore_nocheck
441#endif
442
443	movl PT_OLDESP(%esp), %eax
444	movl %esp, %edx
445	call patch_espfix_desc
446	pushl $__ESPFIX_SS
447	CFI_ADJUST_CFA_OFFSET 4
448	pushl %eax
449	CFI_ADJUST_CFA_OFFSET 4
450	DISABLE_INTERRUPTS(CLBR_EAX)
451	TRACE_IRQS_OFF
452	lss (%esp), %esp
453	CFI_ADJUST_CFA_OFFSET -8
454	jmp restore_nocheck
455	CFI_ENDPROC
456ENDPROC(system_call)
457
458	# perform work that needs to be done immediately before resumption
459	ALIGN
460	RING0_PTREGS_FRAME		# can't unwind into user space anyway
461work_pending:
462	testb $_TIF_NEED_RESCHED, %cl
463	jz work_notifysig
464work_resched:
465	call schedule
466	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
467					# setting need_resched or sigpending
468					# between sampling and the iret
469	TRACE_IRQS_OFF
470	movl TI_flags(%ebp), %ecx
471	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done other
472					# than syscall tracing?
473	jz restore_all
474	testb $_TIF_NEED_RESCHED, %cl
475	jnz work_resched
476
477work_notifysig:				# deal with pending signals and
478					# notify-resume requests
479#ifdef CONFIG_VM86
480	testl $VM_MASK, PT_EFLAGS(%esp)
481	movl %esp, %eax
482	jne work_notifysig_v86		# returning to kernel-space or
483					# vm86-space
484	xorl %edx, %edx
485	call do_notify_resume
486	jmp resume_userspace_sig
487
488	ALIGN
489work_notifysig_v86:
490	pushl %ecx			# save ti_flags for do_notify_resume
491	CFI_ADJUST_CFA_OFFSET 4
492	call save_v86_state		# %eax contains pt_regs pointer
493	popl %ecx
494	CFI_ADJUST_CFA_OFFSET -4
495	movl %eax, %esp
496#else
497	movl %esp, %eax
498#endif
499	xorl %edx, %edx
500	call do_notify_resume
501	jmp resume_userspace_sig
502END(work_pending)
503
504	# perform syscall exit tracing
505	ALIGN
506syscall_trace_entry:
507	movl $-ENOSYS,PT_EAX(%esp)
508	movl %esp, %eax
509	xorl %edx,%edx
510	call do_syscall_trace
511	cmpl $0, %eax
512	jne resume_userspace		# ret != 0 -> running under PTRACE_SYSEMU,
513					# so must skip actual syscall
514	movl PT_ORIG_EAX(%esp), %eax
515	cmpl $(nr_syscalls), %eax
516	jnae syscall_call
517	jmp syscall_exit
518END(syscall_trace_entry)
519
520	# perform syscall exit tracing
521	ALIGN
522syscall_exit_work:
523	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
524	jz work_pending
525	TRACE_IRQS_ON
526	ENABLE_INTERRUPTS(CLBR_ANY)	# could let do_syscall_trace() call
527					# schedule() instead
528	movl %esp, %eax
529	movl $1, %edx
530	call do_syscall_trace
531	jmp resume_userspace
532END(syscall_exit_work)
533	CFI_ENDPROC
534
535	RING0_INT_FRAME			# can't unwind into user space anyway
536syscall_fault:
537	pushl %eax			# save orig_eax
538	CFI_ADJUST_CFA_OFFSET 4
539	SAVE_ALL
540	GET_THREAD_INFO(%ebp)
541	movl $-EFAULT,PT_EAX(%esp)
542	jmp resume_userspace
543END(syscall_fault)
544
545syscall_badsys:
546	movl $-ENOSYS,PT_EAX(%esp)
547	jmp resume_userspace
548END(syscall_badsys)
549	CFI_ENDPROC
550
551#define FIXUP_ESPFIX_STACK \
552	/* since we are on a wrong stack, we cant make it a C code :( */ \
553	PER_CPU(gdt_page, %ebx); \
554	GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
555	addl %esp, %eax; \
556	pushl $__KERNEL_DS; \
557	CFI_ADJUST_CFA_OFFSET 4; \
558	pushl %eax; \
559	CFI_ADJUST_CFA_OFFSET 4; \
560	lss (%esp), %esp; \
561	CFI_ADJUST_CFA_OFFSET -8;
562#define UNWIND_ESPFIX_STACK \
563	movl %ss, %eax; \
564	/* see if on espfix stack */ \
565	cmpw $__ESPFIX_SS, %ax; \
566	jne 27f; \
567	movl $__KERNEL_DS, %eax; \
568	movl %eax, %ds; \
569	movl %eax, %es; \
570	/* switch to normal stack */ \
571	FIXUP_ESPFIX_STACK; \
57227:;
573
574/*
575 * Build the entry stubs and pointer table with
576 * some assembler magic.
577 */
578.data
579ENTRY(interrupt)
580.text
581
582ENTRY(irq_entries_start)
583	RING0_INT_FRAME
584vector=0
585.rept NR_IRQS
586	ALIGN
587 .if vector
588	CFI_ADJUST_CFA_OFFSET -4
589 .endif
5901:	pushl $~(vector)
591	CFI_ADJUST_CFA_OFFSET 4
592	jmp common_interrupt
593 .previous
594	.long 1b
595 .text
596vector=vector+1
597.endr
598END(irq_entries_start)
599
600.previous
601END(interrupt)
602.previous
603
604/*
605 * the CPU automatically disables interrupts when executing an IRQ vector,
606 * so IRQ-flags tracing has to follow that:
607 */
608	ALIGN
609common_interrupt:
610	SAVE_ALL
611	TRACE_IRQS_OFF
612	movl %esp,%eax
613	call do_IRQ
614	jmp ret_from_intr
615ENDPROC(common_interrupt)
616	CFI_ENDPROC
617
618#define BUILD_INTERRUPT(name, nr)	\
619ENTRY(name)				\
620	RING0_INT_FRAME;		\
621	pushl $~(nr);			\
622	CFI_ADJUST_CFA_OFFSET 4;	\
623	SAVE_ALL;			\
624	TRACE_IRQS_OFF			\
625	movl %esp,%eax;			\
626	call smp_##name;		\
627	jmp ret_from_intr;		\
628	CFI_ENDPROC;			\
629ENDPROC(name)
630
631/* The include is where all of the SMP etc. interrupts come from */
632#include "entry_arch.h"
633
634KPROBE_ENTRY(page_fault)
635	RING0_EC_FRAME
636	pushl $do_page_fault
637	CFI_ADJUST_CFA_OFFSET 4
638	ALIGN
639error_code:
640	/* the function address is in %fs's slot on the stack */
641	pushl %es
642	CFI_ADJUST_CFA_OFFSET 4
643	/*CFI_REL_OFFSET es, 0*/
644	pushl %ds
645	CFI_ADJUST_CFA_OFFSET 4
646	/*CFI_REL_OFFSET ds, 0*/
647	pushl %eax
648	CFI_ADJUST_CFA_OFFSET 4
649	CFI_REL_OFFSET eax, 0
650	pushl %ebp
651	CFI_ADJUST_CFA_OFFSET 4
652	CFI_REL_OFFSET ebp, 0
653	pushl %edi
654	CFI_ADJUST_CFA_OFFSET 4
655	CFI_REL_OFFSET edi, 0
656	pushl %esi
657	CFI_ADJUST_CFA_OFFSET 4
658	CFI_REL_OFFSET esi, 0
659	pushl %edx
660	CFI_ADJUST_CFA_OFFSET 4
661	CFI_REL_OFFSET edx, 0
662	pushl %ecx
663	CFI_ADJUST_CFA_OFFSET 4
664	CFI_REL_OFFSET ecx, 0
665	pushl %ebx
666	CFI_ADJUST_CFA_OFFSET 4
667	CFI_REL_OFFSET ebx, 0
668	cld
669	pushl %fs
670	CFI_ADJUST_CFA_OFFSET 4
671	/*CFI_REL_OFFSET fs, 0*/
672	movl $(__KERNEL_PERCPU), %ecx
673	movl %ecx, %fs
674	UNWIND_ESPFIX_STACK
675	popl %ecx
676	CFI_ADJUST_CFA_OFFSET -4
677	/*CFI_REGISTER es, ecx*/
678	movl PT_FS(%esp), %edi		# get the function address
679	movl PT_ORIG_EAX(%esp), %edx	# get the error code
680	movl $-1, PT_ORIG_EAX(%esp)	# no syscall to restart
681	mov  %ecx, PT_FS(%esp)
682	/*CFI_REL_OFFSET fs, ES*/
683	movl $(__USER_DS), %ecx
684	movl %ecx, %ds
685	movl %ecx, %es
686	movl %esp,%eax			# pt_regs pointer
687	call *%edi
688	jmp ret_from_exception
689	CFI_ENDPROC
690KPROBE_END(page_fault)
691
692ENTRY(coprocessor_error)
693	RING0_INT_FRAME
694	pushl $0
695	CFI_ADJUST_CFA_OFFSET 4
696	pushl $do_coprocessor_error
697	CFI_ADJUST_CFA_OFFSET 4
698	jmp error_code
699	CFI_ENDPROC
700END(coprocessor_error)
701
702ENTRY(simd_coprocessor_error)
703	RING0_INT_FRAME
704	pushl $0
705	CFI_ADJUST_CFA_OFFSET 4
706	pushl $do_simd_coprocessor_error
707	CFI_ADJUST_CFA_OFFSET 4
708	jmp error_code
709	CFI_ENDPROC
710END(simd_coprocessor_error)
711
712ENTRY(device_not_available)
713	RING0_INT_FRAME
714	pushl $-1			# mark this as an int
715	CFI_ADJUST_CFA_OFFSET 4
716	SAVE_ALL
717	GET_CR0_INTO_EAX
718	testl $0x4, %eax		# EM (math emulation bit)
719	jne device_not_available_emulate
720	preempt_stop(CLBR_ANY)
721	call math_state_restore
722	jmp ret_from_exception
723device_not_available_emulate:
724	pushl $0			# temporary storage for ORIG_EIP
725	CFI_ADJUST_CFA_OFFSET 4
726	call math_emulate
727	addl $4, %esp
728	CFI_ADJUST_CFA_OFFSET -4
729	jmp ret_from_exception
730	CFI_ENDPROC
731END(device_not_available)
732
733/*
734 * Debug traps and NMI can happen at the one SYSENTER instruction
735 * that sets up the real kernel stack. Check here, since we can't
736 * allow the wrong stack to be used.
737 *
738 * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
739 * already pushed 3 words if it hits on the sysenter instruction:
740 * eflags, cs and eip.
741 *
742 * We just load the right stack, and push the three (known) values
743 * by hand onto the new stack - while updating the return eip past
744 * the instruction that would have done it for sysenter.
745 */
746#define FIX_STACK(offset, ok, label)		\
747	cmpw $__KERNEL_CS,4(%esp);		\
748	jne ok;					\
749label:						\
750	movl TSS_sysenter_esp0+offset(%esp),%esp;	\
751	CFI_DEF_CFA esp, 0;			\
752	CFI_UNDEFINED eip;			\
753	pushfl;					\
754	CFI_ADJUST_CFA_OFFSET 4;		\
755	pushl $__KERNEL_CS;			\
756	CFI_ADJUST_CFA_OFFSET 4;		\
757	pushl $sysenter_past_esp;		\
758	CFI_ADJUST_CFA_OFFSET 4;		\
759	CFI_REL_OFFSET eip, 0
760
761KPROBE_ENTRY(debug)
762	RING0_INT_FRAME
763	cmpl $sysenter_entry,(%esp)
764	jne debug_stack_correct
765	FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
766debug_stack_correct:
767	pushl $-1			# mark this as an int
768	CFI_ADJUST_CFA_OFFSET 4
769	SAVE_ALL
770	xorl %edx,%edx			# error code 0
771	movl %esp,%eax			# pt_regs pointer
772	call do_debug
773	jmp ret_from_exception
774	CFI_ENDPROC
775KPROBE_END(debug)
776
777/*
778 * NMI is doubly nasty. It can happen _while_ we're handling
779 * a debug fault, and the debug fault hasn't yet been able to
780 * clear up the stack. So we first check whether we got  an
781 * NMI on the sysenter entry path, but after that we need to
782 * check whether we got an NMI on the debug path where the debug
783 * fault happened on the sysenter path.
784 */
785KPROBE_ENTRY(nmi)
786	RING0_INT_FRAME
787	pushl %eax
788	CFI_ADJUST_CFA_OFFSET 4
789	movl %ss, %eax
790	cmpw $__ESPFIX_SS, %ax
791	popl %eax
792	CFI_ADJUST_CFA_OFFSET -4
793	je nmi_espfix_stack
794	cmpl $sysenter_entry,(%esp)
795	je nmi_stack_fixup
796	pushl %eax
797	CFI_ADJUST_CFA_OFFSET 4
798	movl %esp,%eax
799	/* Do not access memory above the end of our stack page,
800	 * it might not exist.
801	 */
802	andl $(THREAD_SIZE-1),%eax
803	cmpl $(THREAD_SIZE-20),%eax
804	popl %eax
805	CFI_ADJUST_CFA_OFFSET -4
806	jae nmi_stack_correct
807	cmpl $sysenter_entry,12(%esp)
808	je nmi_debug_stack_check
809nmi_stack_correct:
810	/* We have a RING0_INT_FRAME here */
811	pushl %eax
812	CFI_ADJUST_CFA_OFFSET 4
813	SAVE_ALL
814	xorl %edx,%edx		# zero error code
815	movl %esp,%eax		# pt_regs pointer
816	call do_nmi
817	jmp restore_nocheck_notrace
818	CFI_ENDPROC
819
820nmi_stack_fixup:
821	RING0_INT_FRAME
822	FIX_STACK(12,nmi_stack_correct, 1)
823	jmp nmi_stack_correct
824
825nmi_debug_stack_check:
826	/* We have a RING0_INT_FRAME here */
827	cmpw $__KERNEL_CS,16(%esp)
828	jne nmi_stack_correct
829	cmpl $debug,(%esp)
830	jb nmi_stack_correct
831	cmpl $debug_esp_fix_insn,(%esp)
832	ja nmi_stack_correct
833	FIX_STACK(24,nmi_stack_correct, 1)
834	jmp nmi_stack_correct
835
836nmi_espfix_stack:
837	/* We have a RING0_INT_FRAME here.
838	 *
839	 * create the pointer to lss back
840	 */
841	pushl %ss
842	CFI_ADJUST_CFA_OFFSET 4
843	pushl %esp
844	CFI_ADJUST_CFA_OFFSET 4
845	addw $4, (%esp)
846	/* copy the iret frame of 12 bytes */
847	.rept 3
848	pushl 16(%esp)
849	CFI_ADJUST_CFA_OFFSET 4
850	.endr
851	pushl %eax
852	CFI_ADJUST_CFA_OFFSET 4
853	SAVE_ALL
854	FIXUP_ESPFIX_STACK		# %eax == %esp
855	xorl %edx,%edx			# zero error code
856	call do_nmi
857	RESTORE_REGS
858	lss 12+4(%esp), %esp		# back to espfix stack
859	CFI_ADJUST_CFA_OFFSET -24
8601:	INTERRUPT_RETURN
861	CFI_ENDPROC
862.section __ex_table,"a"
863	.align 4
864	.long 1b,iret_exc
865.previous
866KPROBE_END(nmi)
867
868#ifdef CONFIG_PARAVIRT
869ENTRY(native_iret)
8701:	iret
871.section __ex_table,"a"
872	.align 4
873	.long 1b,iret_exc
874.previous
875END(native_iret)
876
877ENTRY(native_irq_enable_sysexit)
878	sti
879	sysexit
880END(native_irq_enable_sysexit)
881#endif
882
883KPROBE_ENTRY(int3)
884	RING0_INT_FRAME
885	pushl $-1			# mark this as an int
886	CFI_ADJUST_CFA_OFFSET 4
887	SAVE_ALL
888	xorl %edx,%edx		# zero error code
889	movl %esp,%eax		# pt_regs pointer
890	call do_int3
891	jmp ret_from_exception
892	CFI_ENDPROC
893KPROBE_END(int3)
894
895ENTRY(overflow)
896	RING0_INT_FRAME
897	pushl $0
898	CFI_ADJUST_CFA_OFFSET 4
899	pushl $do_overflow
900	CFI_ADJUST_CFA_OFFSET 4
901	jmp error_code
902	CFI_ENDPROC
903END(overflow)
904
905ENTRY(bounds)
906	RING0_INT_FRAME
907	pushl $0
908	CFI_ADJUST_CFA_OFFSET 4
909	pushl $do_bounds
910	CFI_ADJUST_CFA_OFFSET 4
911	jmp error_code
912	CFI_ENDPROC
913END(bounds)
914
915ENTRY(invalid_op)
916	RING0_INT_FRAME
917	pushl $0
918	CFI_ADJUST_CFA_OFFSET 4
919	pushl $do_invalid_op
920	CFI_ADJUST_CFA_OFFSET 4
921	jmp error_code
922	CFI_ENDPROC
923END(invalid_op)
924
925ENTRY(coprocessor_segment_overrun)
926	RING0_INT_FRAME
927	pushl $0
928	CFI_ADJUST_CFA_OFFSET 4
929	pushl $do_coprocessor_segment_overrun
930	CFI_ADJUST_CFA_OFFSET 4
931	jmp error_code
932	CFI_ENDPROC
933END(coprocessor_segment_overrun)
934
935ENTRY(invalid_TSS)
936	RING0_EC_FRAME
937	pushl $do_invalid_TSS
938	CFI_ADJUST_CFA_OFFSET 4
939	jmp error_code
940	CFI_ENDPROC
941END(invalid_TSS)
942
943ENTRY(segment_not_present)
944	RING0_EC_FRAME
945	pushl $do_segment_not_present
946	CFI_ADJUST_CFA_OFFSET 4
947	jmp error_code
948	CFI_ENDPROC
949END(segment_not_present)
950
951ENTRY(stack_segment)
952	RING0_EC_FRAME
953	pushl $do_stack_segment
954	CFI_ADJUST_CFA_OFFSET 4
955	jmp error_code
956	CFI_ENDPROC
957END(stack_segment)
958
959KPROBE_ENTRY(general_protection)
960	RING0_EC_FRAME
961	pushl $do_general_protection
962	CFI_ADJUST_CFA_OFFSET 4
963	jmp error_code
964	CFI_ENDPROC
965KPROBE_END(general_protection)
966
967ENTRY(alignment_check)
968	RING0_EC_FRAME
969	pushl $do_alignment_check
970	CFI_ADJUST_CFA_OFFSET 4
971	jmp error_code
972	CFI_ENDPROC
973END(alignment_check)
974
975ENTRY(divide_error)
976	RING0_INT_FRAME
977	pushl $0			# no error code
978	CFI_ADJUST_CFA_OFFSET 4
979	pushl $do_divide_error
980	CFI_ADJUST_CFA_OFFSET 4
981	jmp error_code
982	CFI_ENDPROC
983END(divide_error)
984
985#ifdef CONFIG_X86_MCE
986ENTRY(machine_check)
987	RING0_INT_FRAME
988	pushl $0
989	CFI_ADJUST_CFA_OFFSET 4
990	pushl machine_check_vector
991	CFI_ADJUST_CFA_OFFSET 4
992	jmp error_code
993	CFI_ENDPROC
994END(machine_check)
995#endif
996
997ENTRY(spurious_interrupt_bug)
998	RING0_INT_FRAME
999	pushl $0
1000	CFI_ADJUST_CFA_OFFSET 4
1001	pushl $do_spurious_interrupt_bug
1002	CFI_ADJUST_CFA_OFFSET 4
1003	jmp error_code
1004	CFI_ENDPROC
1005END(spurious_interrupt_bug)
1006
1007ENTRY(kernel_thread_helper)
1008	pushl $0		# fake return address for unwinder
1009	CFI_STARTPROC
1010	movl %edx,%eax
1011	push %edx
1012	CFI_ADJUST_CFA_OFFSET 4
1013	call *%ebx
1014	push %eax
1015	CFI_ADJUST_CFA_OFFSET 4
1016	call do_exit
1017	CFI_ENDPROC
1018ENDPROC(kernel_thread_helper)
1019
1020.section .rodata,"a"
1021#include "syscall_table.S"
1022
1023syscall_table_size=(.-sys_call_table)
1024