1/*
2 * Copyright (c) 2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <i386/asm.h>
29#include <assym.s>
30#include <debug.h>
31#include <i386/eflags.h>
32#include <i386/rtclock_asm.h>
33#include <i386/trap.h>
34#define _ARCH_I386_ASM_HELP_H_	/* Prevent inclusion of user header */
35#include <mach/i386/syscall_sw.h>
36#include <i386/postcode.h>
37#include <i386/proc_reg.h>
38#include <mach/exception_types.h>
39
40#if DEBUG
41#define	DEBUG_IDT64 		1
42#endif
43
44/*
45 * This is the low-level trap and interrupt handling code associated with
46 * the IDT. It also includes system call handlers for sysenter/syscall.
47 * The IDT itself is defined in mp_desc.c.
48 *
49 * Code here is structured as follows:
50 *
51 * stubs 	Code called directly from an IDT vector.
52 *		All entry points have the "idt64_" prefix and they are built
53 *		using macros expanded by the inclusion of idt_table.h.
54 *		This code performs vector-dependent identification and jumps
55 * 		into the dispatch code.
56 *
57 * dispatch	The dispatch code is responsible for saving the thread state
58 *		(which is either 64-bit or 32-bit) and then jumping to the
59 *		class handler identified by the stub.
60 *
61 * returns	Code to restore state and return to the previous context.
62 *
63 * handlers	There are several classes of handlers:
64 *   interrupt	- asynchronous events typically from external devices
65 *   trap	- synchronous events due to thread execution
66 *   syscall	- synchronous system call request
67 *   fatal	- fatal traps
68 */
69
70/*
71 * Handlers:
72 */
73#define	HNDL_ALLINTRS		EXT(hndl_allintrs)
74#define	HNDL_ALLTRAPS		EXT(hndl_alltraps)
75#define	HNDL_SYSENTER		EXT(hndl_sysenter)
76#define	HNDL_SYSCALL		EXT(hndl_syscall)
77#define	HNDL_UNIX_SCALL		EXT(hndl_unix_scall)
78#define	HNDL_MACH_SCALL		EXT(hndl_mach_scall)
79#define	HNDL_MDEP_SCALL		EXT(hndl_mdep_scall)
80#define	HNDL_DOUBLE_FAULT	EXT(hndl_double_fault)
81#define	HNDL_MACHINE_CHECK	EXT(hndl_machine_check)
82
83
84#if 1
85#define PUSH_FUNCTION(func) 			 \
86	sub	$8, %rsp			;\
87	push	%rax				;\
88	leaq	func(%rip), %rax		;\
89	movq	%rax, 8(%rsp)			;\
90	pop	%rax
91#else
92#define PUSH_FUNCTION(func) pushq func
93#endif
94
95/* The wrapper for all non-special traps/interrupts */
96/* Everything up to PUSH_FUNCTION is just to output
97 * the interrupt number out to the postcode display
98 */
99#if DEBUG_IDT64
100#define IDT_ENTRY_WRAPPER(n, f)			 \
101	push	%rax				;\
102	POSTCODE2(0x6400+n)			;\
103	pop	%rax				;\
104	PUSH_FUNCTION(f)  			;\
105	pushq	$(n)				;\
106	jmp L_dispatch
107#else
108#define IDT_ENTRY_WRAPPER(n, f)			 \
109	PUSH_FUNCTION(f)  			;\
110	pushq	$(n)				;\
111	jmp L_dispatch
112#endif
113
114/* A trap that comes with an error code already on the stack */
115#define TRAP_ERR(n, f)				 \
116	Entry(f)				;\
117	IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)
118
119/* A normal trap */
120#define TRAP(n, f)				 \
121	Entry(f)				;\
122	pushq	$0          			;\
123	IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)
124
125#define USER_TRAP TRAP
126
127/* An interrupt */
128#define INTERRUPT(n)			 	\
129	Entry(_intr_ ## n)			;\
130	pushq	$0          			;\
131	IDT_ENTRY_WRAPPER(n, HNDL_ALLINTRS)
132
133/* A trap with a special-case handler, hence we don't need to define anything */
134#define TRAP_SPC(n, f)
135#define TRAP_IST1(n, f)
136#define TRAP_IST2(n, f)
137#define USER_TRAP_SPC(n, f)
138
139/* Generate all the stubs */
140#include "idt_table.h"
141
142/*
143 * Common dispatch point.
144 * Determine what mode has been interrupted and save state accordingly.
145 * Here with:
146 *	rsp	from user-space:   interrupt state in PCB, or
147 *		from kernel-space: interrupt state in kernel or interrupt stack
148 *	GSBASE	from user-space:   pthread area, or
149 *		from kernel-space: cpu_data
150 */
151L_dispatch:
152	cmpl	$(KERNEL64_CS), ISF64_CS(%rsp)
153	je	L_dispatch_kernel
154
155	swapgs
156
157L_dispatch_user:
158	cmpl	$(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
159	je	L_dispatch_U32		/* 32-bit user task */
160
161L_dispatch_U64:
162	subq	$(ISS64_OFFSET), %rsp
163	mov	%r15, R64_R15(%rsp)
164	mov	%rsp, %r15
165	mov	%gs:CPU_KERNEL_STACK, %rsp
166	jmp	L_dispatch_64bit
167
168L_dispatch_kernel:
169	subq	$(ISS64_OFFSET), %rsp
170	mov	%r15, R64_R15(%rsp)
171	mov	%rsp, %r15
172
173/*
174 * Here for 64-bit user task or kernel
175 */
176L_dispatch_64bit:
177	movl	$(SS_64), SS_FLAVOR(%r15)
178
179	/*
180	 * Save segment regs - for completeness since theyre not used.
181	 */
182	movl	%fs, R64_FS(%r15)
183	movl	%gs, R64_GS(%r15)
184
185	/* Save general-purpose registers */
186	mov	%rax, R64_RAX(%r15)
187	mov	%rbx, R64_RBX(%r15)
188	mov	%rcx, R64_RCX(%r15)
189	mov	%rdx, R64_RDX(%r15)
190	mov	%rbp, R64_RBP(%r15)
191	mov	%rdi, R64_RDI(%r15)
192	mov	%rsi, R64_RSI(%r15)
193	mov	%r8,  R64_R8(%r15)
194	mov	%r9,  R64_R9(%r15)
195	mov	%r10, R64_R10(%r15)
196	mov	%r11, R64_R11(%r15)
197	mov	%r12, R64_R12(%r15)
198	mov	%r13, R64_R13(%r15)
199	mov	%r14, R64_R14(%r15)
200
201	/* cr2 is significant only for page-faults */
202	mov	%cr2, %rax
203	mov	%rax, R64_CR2(%r15)
204
205	mov	R64_TRAPNO(%r15), %ebx	/* %ebx := trapno for later */
206	mov	R64_TRAPFN(%r15), %rdx	/* %rdx := trapfn for later */
207	mov	R64_CS(%r15), %esi	/* %esi := cs for later */
208
209	jmp	L_common_dispatch
210
211L_64bit_entry_reject:
212	/*
213	 * Here for a 64-bit user attempting an invalid kernel entry.
214	 */
215	pushq	%rax
216	leaq	HNDL_ALLTRAPS(%rip), %rax
217	movq	%rax, ISF64_TRAPFN+8(%rsp)
218	popq	%rax
219	movq	$(T_INVALID_OPCODE), ISF64_TRAPNO(%rsp)
220	jmp 	L_dispatch_U64
221
222L_32bit_entry_check:
223	/*
224	 * Check we're not a confused 64-bit user.
225	 */
226	cmpl	$(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
227	jne	L_64bit_entry_reject
228	/* fall through to 32-bit handler: */
229
230L_dispatch_U32: /* 32-bit user task */
231	subq	$(ISS64_OFFSET), %rsp
232	mov	%rsp, %r15
233	mov	%gs:CPU_KERNEL_STACK, %rsp
234	movl	$(SS_32), SS_FLAVOR(%r15)
235
236	/*
237	 * Save segment regs
238	 */
239	movl	%ds, R32_DS(%r15)
240	movl	%es, R32_ES(%r15)
241	movl	%fs, R32_FS(%r15)
242	movl	%gs, R32_GS(%r15)
243
244	/*
245	 * Save general 32-bit registers
246	 */
247	mov	%eax, R32_EAX(%r15)
248	mov	%ebx, R32_EBX(%r15)
249	mov	%ecx, R32_ECX(%r15)
250	mov	%edx, R32_EDX(%r15)
251	mov	%ebp, R32_EBP(%r15)
252	mov	%esi, R32_ESI(%r15)
253	mov	%edi, R32_EDI(%r15)
254
255	/* Unconditionally save cr2; only meaningful on page faults */
256	mov	%cr2, %rax
257	mov	%eax, R32_CR2(%r15)
258
259	/*
260	 * Copy registers already saved in the machine state
261	 * (in the interrupt stack frame) into the compat save area.
262	 */
263	mov	R64_RIP(%r15), %eax
264	mov	%eax, R32_EIP(%r15)
265	mov	R64_RFLAGS(%r15), %eax
266	mov	%eax, R32_EFLAGS(%r15)
267	mov	R64_RSP(%r15), %eax
268	mov	%eax, R32_UESP(%r15)
269	mov	R64_SS(%r15), %eax
270	mov	%eax, R32_SS(%r15)
271L_dispatch_U32_after_fault:
272	mov	R64_CS(%r15), %esi		/* %esi := %cs for later */
273	mov	%esi, R32_CS(%r15)
274	mov	R64_TRAPNO(%r15), %ebx		/* %ebx := trapno for later */
275	mov	%ebx, R32_TRAPNO(%r15)
276	mov	R64_ERR(%r15), %eax
277	mov	%eax, R32_ERR(%r15)
278	mov	R64_TRAPFN(%r15), %rdx		/* %rdx := trapfn for later */
279
280L_common_dispatch:
281	cld		/* Ensure the direction flag is clear in the kernel */
282	cmpl    $0, EXT(pmap_smap_enabled)(%rip)
283	je	1f
284	clac		/* Clear EFLAGS.AC if SMAP is present/enabled */
2851:
286	/*
287	 * On entering the kernel, we don't need to switch cr3
288	 * because the kernel shares the user's address space.
289	 * But we mark the kernel's cr3 as "active".
290	 * If, however, the invalid cr3 flag is set, we have to flush tlbs
291	 * since the kernel's mapping was changed while we were in userspace.
292	 *
293	 * But: if global no_shared_cr3 is TRUE we do switch to the kernel's cr3
294	 * so that illicit accesses to userspace can be trapped.
295	 */
296	mov	%gs:CPU_KERNEL_CR3, %rcx
297	mov	%rcx, %gs:CPU_ACTIVE_CR3
298	test	$3, %esi			/* user/kernel? */
299	jz	2f				/* skip cr3 reload from kernel */
300	xor	%rbp, %rbp
301	cmpl	$0, EXT(no_shared_cr3)(%rip)
302	je	2f
303	mov	%rcx, %cr3			/* load kernel cr3 */
304	jmp	4f				/* and skip tlb flush test */
3052:
306	mov	%gs:CPU_ACTIVE_CR3+4, %rcx
307	shr	$32, %rcx
308	testl	%ecx, %ecx
309	jz	4f
310	movl	$0, %gs:CPU_TLB_INVALID
311	testl	$(1<<16), %ecx			/* Global? */
312	jz	3f
313	mov	%cr4, %rcx	/* RMWW CR4, for lack of an alternative*/
314	and	$(~CR4_PGE), %rcx
315	mov	%rcx, %cr4
316	or	$(CR4_PGE), %rcx
317	mov	%rcx, %cr4
318	jmp	4f
3193:
320	mov	%cr3, %rcx
321	mov	%rcx, %cr3
3224:
323	mov	%gs:CPU_ACTIVE_THREAD, %rcx	/* Get the active thread */
324	movl	$-1, TH_IOTIER_OVERRIDE(%rcx)	/* Reset IO tier override to -1 before handling trap */
325	cmpq	$0, TH_PCB_IDS(%rcx)	/* Is there a debug register state? */
326	je	5f
327	xor	%ecx, %ecx		/* If so, reset DR7 (the control) */
328	mov	%rcx, %dr7
3295:
330	incl	%gs:hwIntCnt(,%ebx,4)		// Bump the trap/intr count
331	/* Dispatch the designated handler */
332	jmp	*%rdx
333
334/*
335 * Control is passed here to return to user.
336 */
337Entry(return_to_user)
338	TIME_TRAP_UEXIT
339
340Entry(ret_to_user)
341// XXX 'Be nice to tidy up this debug register restore sequence...
342	mov	%gs:CPU_ACTIVE_THREAD, %rdx
343	movq	TH_PCB_IDS(%rdx),%rax	/* Obtain this thread's debug state */
344
345	test	%rax, %rax		/* Is there a debug register context? */
346	je	2f 			/* branch if not */
347	cmpl	$(TASK_MAP_32BIT), %gs:CPU_TASK_MAP /* Are we a 32-bit task? */
348	jne	1f
349	movl	DS_DR0(%rax), %ecx	/* If so, load the 32 bit DRs */
350	movq	%rcx, %dr0
351	movl	DS_DR1(%rax), %ecx
352	movq	%rcx, %dr1
353	movl	DS_DR2(%rax), %ecx
354	movq	%rcx, %dr2
355	movl	DS_DR3(%rax), %ecx
356	movq	%rcx, %dr3
357	movl	DS_DR7(%rax), %ecx
358	movq 	%rcx, %gs:CPU_DR7
359	jmp 	2f
3601:
361	mov	DS64_DR0(%rax), %rcx	/* Load the full width DRs*/
362	mov	%rcx, %dr0
363	mov	DS64_DR1(%rax), %rcx
364	mov	%rcx, %dr1
365	mov	DS64_DR2(%rax), %rcx
366	mov	%rcx, %dr2
367	mov	DS64_DR3(%rax), %rcx
368	mov	%rcx, %dr3
369	mov	DS64_DR7(%rax), %rcx
370	mov 	%rcx, %gs:CPU_DR7
3712:
372	/*
373	 * On exiting the kernel there's no need to switch cr3 since we're
374	 * already running in the user's address space which includes the
375	 * kernel. Nevertheless, we now mark the task's cr3 as active.
376	 * But, if no_shared_cr3 is set, we do need to switch cr3 at this point.
377	 */
378	mov	%gs:CPU_TASK_CR3, %rcx
379	mov	%rcx, %gs:CPU_ACTIVE_CR3
380	movl	EXT(no_shared_cr3)(%rip), %eax
381	test	%eax, %eax		/* -no_shared_cr3 */
382	jz	3f
383	mov	%rcx, %cr3
3843:
385	mov	%gs:CPU_DR7, %rax	/* Is there a debug control register?*/
386	cmp	$0, %rax
387	je	4f
388	mov	%rax, %dr7		/* Set DR7 */
389	movq	$0, %gs:CPU_DR7
3904:
391	cmpl	$(SS_64), SS_FLAVOR(%r15)	/* 64-bit state? */
392	je	L_64bit_return
393
394L_32bit_return:
395#if DEBUG_IDT64
396	cmpl	$(SS_32), SS_FLAVOR(%r15)	/* 32-bit state? */
397	je	1f
398	cli
399	POSTCODE2(0x6432)
400	CCALL1(panic_idt64, %r15)
4011:
402#endif /* DEBUG_IDT64 */
403
404	/*
405	 * Restore registers into the machine state for iret.
406	 * Here on fault stack and PCB address in R11.
407	 */
408	movl	R32_EIP(%r15), %eax
409	movl	%eax, R64_RIP(%r15)
410	movl	R32_EFLAGS(%r15), %eax
411	movl	%eax, R64_RFLAGS(%r15)
412	movl	R32_CS(%r15), %eax
413	movl	%eax, R64_CS(%r15)
414	movl	R32_UESP(%r15), %eax
415	movl	%eax, R64_RSP(%r15)
416	movl	R32_SS(%r15), %eax
417	movl	%eax, R64_SS(%r15)
418
419	/*
420	 * Restore general 32-bit registers
421	 */
422	movl	R32_EAX(%r15), %eax
423	movl	R32_EBX(%r15), %ebx
424	movl	R32_ECX(%r15), %ecx
425	movl	R32_EDX(%r15), %edx
426	movl	R32_EBP(%r15), %ebp
427	movl	R32_ESI(%r15), %esi
428	movl	R32_EDI(%r15), %edi
429
430	/*
431	 * Restore segment registers. A segment exception taken here will
432	 * push state on the IST1 stack and will not affect the "PCB stack".
433	 */
434	mov	%r15, %rsp		/* Set the PCB as the stack */
435	swapgs
436EXT(ret32_set_ds):
437	movl	R32_DS(%rsp), %ds
438EXT(ret32_set_es):
439	movl	R32_ES(%rsp), %es
440EXT(ret32_set_fs):
441	movl	R32_FS(%rsp), %fs
442EXT(ret32_set_gs):
443	movl	R32_GS(%rsp), %gs
444
445	/* pop compat frame + trapno, trapfn and error */
446	add	$(ISS64_OFFSET)+8+8+8, %rsp
447	cmpl	$(SYSENTER_CS),ISF64_CS-8-8-8(%rsp)
448					/* test for fast entry/exit */
449	je      L_fast_exit
450EXT(ret32_iret):
451	iretq				/* return from interrupt */
452
453L_fast_exit:
454	pop	%rdx			/* user return eip */
455	pop	%rcx			/* pop and toss cs */
456	andl	$(~EFL_IF), (%rsp)	/* clear interrupts enable, sti below */
457	popf				/* flags - carry denotes failure */
458	pop	%rcx			/* user return esp */
459	sti				/* interrupts enabled after sysexit */
460	sysexitl			/* 32-bit sysexit */
461
462ret_to_kernel:
463#if DEBUG_IDT64
464	cmpl	$(SS_64), SS_FLAVOR(%r15)	/* 64-bit state? */
465	je	1f
466	cli
467	POSTCODE2(0x6464)
468	CCALL1(panic_idt64, %r15)
469	hlt
4701:
471	cmpl	$(KERNEL64_CS), R64_CS(%r15)
472	je	2f
473	CCALL1(panic_idt64, %r15)
474	hlt
4752:
476#endif
477
478L_64bit_return:
479	/*
480	 * Restore general 64-bit registers.
481	 * Here on fault stack and PCB address in R15.
482	 */
483	mov	R64_R14(%r15), %r14
484	mov	R64_R13(%r15), %r13
485	mov	R64_R12(%r15), %r12
486	mov	R64_R11(%r15), %r11
487	mov	R64_R10(%r15), %r10
488	mov	R64_R9(%r15),  %r9
489	mov	R64_R8(%r15),  %r8
490	mov	R64_RSI(%r15), %rsi
491	mov	R64_RDI(%r15), %rdi
492	mov	R64_RBP(%r15), %rbp
493	mov	R64_RDX(%r15), %rdx
494	mov	R64_RCX(%r15), %rcx
495	mov	R64_RBX(%r15), %rbx
496	mov	R64_RAX(%r15), %rax
497
498	/*
499	 * We must swap GS base if we're returning to user-space,
500	 * or we're returning from an NMI that occurred in a trampoline
501	 * before the user GS had been swapped. In the latter case, the NMI
502	 * handler will have flagged the high-order 32-bits of the CS.
503	 */
504	cmpq	$(KERNEL64_CS), R64_CS(%r15)
505	jz	1f
506	swapgs
5071:
508	mov	R64_R15(%r15), %rsp
509	xchg	%r15, %rsp
510	add	$(ISS64_OFFSET)+24, %rsp	/* pop saved state       */
511						/* + trapno/trapfn/error */
512	cmpl	$(SYSCALL_CS),ISF64_CS-24(%rsp)
513						/* test for fast entry/exit */
514	je      L_sysret
515.globl _dump_iretq
516EXT(ret64_iret):
517        iretq				/* return from interrupt */
518
519L_sysret:
520	/*
521	 * Here to load rcx/r11/rsp and perform the sysret back to user-space.
522	 * 	rcx	user rip
523	 *	r11	user rflags
524	 *	rsp	user stack pointer
525	 */
526	mov	ISF64_RIP-24(%rsp), %rcx
527	mov	ISF64_RFLAGS-24(%rsp), %r11
528	mov	ISF64_RSP-24(%rsp), %rsp
529        sysretq				/* return from systen call */
530
531
532
533/*
534 * System call handlers.
535 * These are entered via a syscall interrupt. The system call number in %rax
536 * is saved to the error code slot in the stack frame. We then branch to the
537 * common state saving code.
538 */
539
540#ifndef UNIX_INT
541#error NO UNIX INT!!!
542#endif
543Entry(idt64_unix_scall)
544	swapgs				/* switch to kernel gs (cpu_data) */
545	pushq	%rax			/* save system call number */
546	PUSH_FUNCTION(HNDL_UNIX_SCALL)
547	pushq	$(UNIX_INT)
548	jmp	L_32bit_entry_check
549
550
551Entry(idt64_mach_scall)
552	swapgs				/* switch to kernel gs (cpu_data) */
553	pushq	%rax			/* save system call number */
554	PUSH_FUNCTION(HNDL_MACH_SCALL)
555	pushq	$(MACH_INT)
556	jmp	L_32bit_entry_check
557
558
559Entry(idt64_mdep_scall)
560	swapgs				/* switch to kernel gs (cpu_data) */
561	pushq	%rax			/* save system call number */
562	PUSH_FUNCTION(HNDL_MDEP_SCALL)
563	pushq	$(MACHDEP_INT)
564	jmp	L_32bit_entry_check
565
566/* Programmed into MSR_IA32_LSTAR by mp_desc.c */
567Entry(hi64_syscall)
568Entry(idt64_syscall)
569L_syscall_continue:
570	swapgs				/* Kapow! get per-cpu data area */
571	mov	%rsp, %gs:CPU_UBER_TMP	/* save user stack */
572	mov	%gs:CPU_UBER_ISF, %rsp	/* switch stack to pcb */
573
574	/*
575	 * Save values in the ISF frame in the PCB
576	 * to cons up the saved machine state.
577	 */
578	movl	$(USER_DS), ISF64_SS(%rsp)
579	movl	$(SYSCALL_CS), ISF64_CS(%rsp)	/* cs - a pseudo-segment */
580	mov	%r11, ISF64_RFLAGS(%rsp)	/* rflags */
581	mov	%rcx, ISF64_RIP(%rsp)		/* rip */
582	mov	%gs:CPU_UBER_TMP, %rcx
583	mov	%rcx, ISF64_RSP(%rsp)		/* user stack */
584	mov	%rax, ISF64_ERR(%rsp)		/* err/rax - syscall code */
585	movq	$(T_SYSCALL), ISF64_TRAPNO(%rsp)	/* trapno */
586	leaq	HNDL_SYSCALL(%rip), %r11;
587	movq	%r11, ISF64_TRAPFN(%rsp)
588	mov	ISF64_RFLAGS(%rsp), %r11	/* Avoid leak, restore R11 */
589	jmp	L_dispatch_U64			/* this can only be 64-bit */
590
591/*
592 * sysenter entry point
593 * Requires user code to set up:
594 *	edx: user instruction pointer (return address)
595 *	ecx: user stack pointer
596 *		on which is pushed stub ret addr and saved ebx
597 * Return to user-space is made using sysexit.
598 * Note: sysenter/sysexit cannot be used for calls returning a value in edx,
599 *       or requiring ecx to be preserved.
600 */
601Entry(hi64_sysenter)
602Entry(idt64_sysenter)
603	movq	(%rsp), %rsp
604	/*
605	 * Push values on to the PCB stack
606	 * to cons up the saved machine state.
607	 */
608	push	$(USER_DS)		/* ss */
609	push	%rcx			/* uesp */
610	pushf				/* flags */
611	/*
612	 * Clear, among others, the Nested Task (NT) flags bit;
613	 * this is zeroed by INT, but not by SYSENTER.
614	 */
615	push	$0
616	popf
617	push	$(SYSENTER_CS)		/* cs */
618L_sysenter_continue:
619	swapgs				/* switch to kernel gs (cpu_data) */
620	push	%rdx			/* eip */
621	push	%rax			/* err/eax - syscall code */
622	PUSH_FUNCTION(HNDL_SYSENTER)
623	pushq	$(T_SYSENTER)
624	orl	$(EFL_IF), ISF64_RFLAGS(%rsp)
625	jmp	L_32bit_entry_check
626
627
628Entry(idt64_page_fault)
629	PUSH_FUNCTION(HNDL_ALLTRAPS)
630	push	$(T_PAGE_FAULT)
631	push	%rax			/* save %rax temporarily */
632	testb	$3, 8+ISF64_CS(%rsp)	/* was trap from kernel? */
633	jz	L_kernel_trap		/* - yes, handle with care */
634	pop	%rax			/* restore %rax, swapgs, and continue */
635	swapgs
636	jmp	L_dispatch_user
637
638
639/*
640 * Debug trap.  Check for single-stepping across system call into
641 * kernel.  If this is the case, taking the debug trap has turned
642 * off single-stepping - save the flags register with the trace
643 * bit set.
644 */
645Entry(idt64_debug)
646	push	$0			/* error code */
647	PUSH_FUNCTION(HNDL_ALLTRAPS)
648	pushq	$(T_DEBUG)
649
650	testb	$3, ISF64_CS(%rsp)
651	jnz	L_dispatch
652
653	/*
654	 * trap came from kernel mode
655	 */
656
657	push	%rax			/* save %rax temporarily */
658	lea	EXT(idt64_sysenter)(%rip), %rax
659	cmp	%rax, ISF64_RIP+8(%rsp)
660	pop	%rax
661	jne	L_dispatch
662	/*
663	 * Interrupt stack frame has been pushed on the temporary stack.
664	 * We have to switch to pcb stack and patch up the saved state.
665	 */
666	mov	%rcx, ISF64_ERR(%rsp)	/* save %rcx in error slot */
667	mov	ISF64_SS+8(%rsp), %rcx	/* top of temp stack -> pcb stack */
668	xchg	%rcx,%rsp		/* switch to pcb stack */
669	push	$(USER_DS)		/* ss */
670	push	ISF64_ERR(%rcx)		/* saved %rcx into rsp slot */
671	push	ISF64_RFLAGS(%rcx)	/* rflags */
672	push	$(SYSENTER_TF_CS)	/* cs - not SYSENTER_CS for iret path */
673	mov	ISF64_ERR(%rcx),%rcx	/* restore %rcx */
674	jmp	L_sysenter_continue	/* continue sysenter entry */
675
676
677Entry(idt64_double_fault)
678	PUSH_FUNCTION(HNDL_DOUBLE_FAULT)
679	pushq	$(T_DOUBLE_FAULT)
680
681	push	%rax
682	leaq	EXT(idt64_syscall)(%rip), %rax
683	cmp	%rax, ISF64_RIP+8(%rsp)
684	pop	%rax
685	jne	L_dispatch_kernel
686
687	mov	ISF64_RSP(%rsp), %rsp
688	jmp	L_syscall_continue
689
690
691/*
692 * For GP/NP/SS faults, we use the IST1 stack.
693 * For faults from user-space, we have to copy the machine state to the
694 * PCB stack and then dispatch as normal.
695 * For faults in kernel-space, we need to scrub for kernel exit faults and
696 * treat these as user-space faults. But for all other kernel-space faults
697 * we continue to run on the IST1 stack and we dispatch to handle the fault
698 * as fatal.
699 */
700Entry(idt64_gen_prot)
701	PUSH_FUNCTION(HNDL_ALLTRAPS)
702	pushq	$(T_GENERAL_PROTECTION)
703	jmp	trap_check_kernel_exit	/* check for kernel exit sequence */
704
705Entry(idt64_stack_fault)
706	PUSH_FUNCTION(HNDL_ALLTRAPS)
707	pushq	$(T_STACK_FAULT)
708	jmp	trap_check_kernel_exit	/* check for kernel exit sequence */
709
710Entry(idt64_segnp)
711	PUSH_FUNCTION(HNDL_ALLTRAPS)
712	pushq	$(T_SEGMENT_NOT_PRESENT)
713					/* indicate fault type */
714trap_check_kernel_exit:
715	testb   $3,ISF64_CS(%rsp)
716	jz	L_kernel_gpf
717
718	/* Here for fault from user-space. Copy interrupt state to PCB. */
719	swapgs
720	push	%rax
721	mov	%rcx, %gs:CPU_UBER_TMP		/* save user RCX  */
722	mov	%gs:CPU_UBER_ISF, %rcx		/* PCB stack addr */
723	mov	ISF64_SS+8(%rsp), %rax
724	mov	%rax, ISF64_SS(%rcx)
725	mov	ISF64_RSP+8(%rsp), %rax
726	mov	%rax, ISF64_RSP(%rcx)
727	mov	ISF64_RFLAGS+8(%rsp), %rax
728	mov	%rax, ISF64_RFLAGS(%rcx)
729	mov	ISF64_CS+8(%rsp), %rax
730	mov	%rax, ISF64_CS(%rcx)
731	mov	ISF64_RIP+8(%rsp), %rax
732	mov	%rax, ISF64_RIP(%rcx)
733	mov	ISF64_ERR+8(%rsp), %rax
734	mov	%rax, ISF64_ERR(%rcx)
735	mov	ISF64_TRAPFN+8(%rsp), %rax
736	mov	%rax, ISF64_TRAPFN(%rcx)
737	mov	ISF64_TRAPNO+8(%rsp), %rax
738	mov	%rax, ISF64_TRAPNO(%rcx)
739	pop	%rax
740	mov	%gs:CPU_UBER_TMP, %rsp		/* user RCX into RSP */
741	xchg	%rcx, %rsp			/* to PCB stack with user RCX */
742	jmp	L_dispatch_user
743
744L_kernel_gpf:
745	/* Here for GPF from kernel_space. Check for recoverable cases. */
746	push	%rax
747	leaq	EXT(ret32_iret)(%rip), %rax
748	cmp	%rax, 8+ISF64_RIP(%rsp)
749	je	L_fault_iret
750	leaq	EXT(ret64_iret)(%rip), %rax
751	cmp	%rax, 8+ISF64_RIP(%rsp)
752	je	L_fault_iret
753	leaq	EXT(ret32_set_ds)(%rip), %rax
754	cmp	%rax, 8+ISF64_RIP(%rsp)
755	je	L_32bit_fault_set_seg
756	leaq	EXT(ret32_set_es)(%rip), %rax
757	cmp	%rax, 8+ISF64_RIP(%rsp)
758	je	L_32bit_fault_set_seg
759	leaq	EXT(ret32_set_fs)(%rip), %rax
760	cmp	%rax, 8+ISF64_RIP(%rsp)
761	je	L_32bit_fault_set_seg
762	leaq	EXT(ret32_set_gs)(%rip), %rax
763	cmp	%rax, 8+ISF64_RIP(%rsp)
764	je	L_32bit_fault_set_seg
765
766	/* Fall through */
767
768L_kernel_trap:
769	/*
770	 * Here after taking an unexpected trap from kernel mode - perhaps
771	 * while running in the trampolines hereabouts.
772	 * Note: %rax has been pushed on stack.
773	 * Make sure we're not on the PCB stack, if so move to the kernel stack.
774	 * This is likely a fatal condition.
775	 * But first, ensure we have the kernel gs base active...
776	 */
777	push	%rcx
778	push	%rdx
779	mov	$(MSR_IA32_GS_BASE), %ecx
780	rdmsr					/* read kernel gsbase */
781	test	$0x80000000, %edx		/* test MSB of address */
782	jne	1f
783	swapgs					/* so swap */
7841:
785	pop	%rdx
786	pop	%rcx
787
788	movq	%gs:CPU_UBER_ISF, %rax		/* PCB stack addr */
789	subq	%rsp, %rax
790	cmpq	$(PAGE_SIZE), %rax		/* current stack in PCB? */
791	jb	2f				/*  - yes, deal with it */
792	pop	%rax				/*  - no, restore %rax */
793	jmp	L_dispatch_kernel
7942:
795	/*
796	 *  Here if %rsp is in the PCB
797	 *  Copy the interrupt stack frame from PCB stack to kernel stack
798	 */
799	movq	%gs:CPU_KERNEL_STACK, %rax
800	xchgq	%rax, %rsp
801	pushq	8+ISF64_SS(%rax)
802	pushq	8+ISF64_RSP(%rax)
803	pushq	8+ISF64_RFLAGS(%rax)
804	pushq	8+ISF64_CS(%rax)
805	pushq	8+ISF64_RIP(%rax)
806	pushq	8+ISF64_ERR(%rax)
807	pushq	8+ISF64_TRAPFN(%rax)
808	pushq	8+ISF64_TRAPNO(%rax)
809	movq	(%rax), %rax
810	jmp	L_dispatch_kernel
811
812
813/*
814 * GP/NP fault on IRET: CS or SS is in error.
815 * User GSBASE is active.
816 * On IST1 stack containing:
817 *  (rax saved above, which is immediately popped)
818 *  0  ISF64_TRAPNO:	trap code (NP or GP)
819 *  8  ISF64_TRAPFN:	trap function
820 *  16 ISF64_ERR:	segment number in error (error code)
821 *  24 ISF64_RIP:	kernel RIP
822 *  32 ISF64_CS:	kernel CS
823 *  40 ISF64_RFLAGS:	kernel RFLAGS
824 *  48 ISF64_RSP:	kernel RSP
825 *  56 ISF64_SS:	kernel SS
826 * On the PCB stack, pointed to by the kernel's RSP is:
827 *   0			user RIP
828 *   8			user CS
829 *  16			user RFLAGS
830 *  24			user RSP
831 *  32 			user SS
832 *
833 * We need to move the kernel's TRAPNO, TRAPFN and ERR to the PCB and handle
834 * as a user fault with:
835 *  0  ISF64_TRAPNO:	trap code (NP or GP)
836 *  8  ISF64_TRAPFN:	trap function
837 *  16 ISF64_ERR:	segment number in error (error code)
838 *  24			user RIP
839 *  32			user CS
840 *  40			user RFLAGS
841 *  48			user RSP
842 *  56 			user SS
843 */
844L_fault_iret:
845	pop	%rax			/* recover saved %rax */
846	mov	%rax, ISF64_RIP(%rsp)	/* save rax (we don`t need saved rip) */
847	mov	ISF64_RSP(%rsp), %rax
848	xchg	%rax, %rsp		/* switch to PCB stack */
849	push	ISF64_ERR(%rax)
850	push	ISF64_TRAPFN(%rax)
851	push	ISF64_TRAPNO(%rax)
852	mov	ISF64_RIP(%rax), %rax	/* restore rax */
853					/* now treat as fault from user */
854	jmp	L_dispatch
855
856/*
857 * Fault restoring a segment register.  All of the saved state is still
858 * on the stack untouched since we haven't yet moved the stack pointer.
859 * On IST1 stack containing:
860 *  (rax saved above, which is immediately popped)
861 *  0  ISF64_TRAPNO:	trap code (NP or GP)
862 *  8  ISF64_TRAPFN:	trap function
863 *  16 ISF64_ERR:	segment number in error (error code)
864 *  24 ISF64_RIP:	kernel RIP
865 *  32 ISF64_CS:	kernel CS
866 *  40 ISF64_RFLAGS:	kernel RFLAGS
867 *  48 ISF64_RSP:	kernel RSP
868 *  56 ISF64_SS:	kernel SS
869 * On the PCB stack, pointed to by the kernel's RSP is:
870 *  0  			user trap code
871 *  8  			user trap function
872 *  16			user err
873 *  24			user RIP
874 *  32			user CS
875 *  40			user RFLAGS
876 *  48			user RSP
877 *  56 			user SS
878 */
879L_32bit_fault_set_seg:
880	swapgs
881	pop	%rax			/* toss saved %rax from stack */
882	mov	ISF64_TRAPNO(%rsp), %rax
883	mov	ISF64_TRAPFN(%rsp), %rcx
884	mov	ISF64_ERR(%rsp), %rdx
885	mov	ISF64_RSP(%rsp), %rsp	/* reset stack to saved state */
886	mov	%rax,R64_TRAPNO(%rsp)
887	mov	%rcx,R64_TRAPFN(%rsp)
888	mov	%rdx,R64_ERR(%rsp)
889					/* now treat as fault from user */
890					/* except that all the state is */
891					/* already saved - we just have to */
892					/* move the trapno and error into */
893					/* the compatibility frame */
894	jmp	L_dispatch_U32_after_fault
895
896/*
897 * Fatal exception handlers:
898 */
899Entry(idt64_db_task_dbl_fault)
900	PUSH_FUNCTION(HNDL_DOUBLE_FAULT)
901	pushq	$(T_DOUBLE_FAULT)
902	jmp	L_dispatch
903
904Entry(idt64_db_task_stk_fault)
905	PUSH_FUNCTION(HNDL_DOUBLE_FAULT)
906	pushq	$(T_STACK_FAULT)
907	jmp	L_dispatch
908
909Entry(idt64_mc)
910	push	$(0)			/* Error */
911	PUSH_FUNCTION(HNDL_MACHINE_CHECK)
912	pushq	$(T_MACHINE_CHECK)
913	jmp	L_dispatch
914
915/*
916 * NMI
917 * This may or may not be fatal but extreme care is required
918 * because it may fall when control was already in another trampoline.
919 *
920 * We get here on IST2 stack which is used for NMIs only.
921 * We must be aware of the interrupted state:
922 *  - from user-space, we
923 *    - copy state to the PCB and continue;
924 *  - from kernel-space, we
925 *    - copy state to the kernel stack and continue, but
926 *    - check what GSBASE was active, set the kernel base and
927 *    - ensure that the active state is restored when the NMI is dismissed.
928 */
929Entry(idt64_nmi)
930	push	%rax				/* save RAX to ISF64_ERR */
931	push	%rcx				/* save RCX to ISF64_TRAPFN */
932	push	%rdx				/* save RDX to ISF64_TRAPNO */
933	testb	$3, ISF64_CS(%rsp)		/* NMI from user-space? */
934	je	1f
935
936	/* From user-space: copy interrupt state to user PCB */
937	swapgs
938	mov	%gs:CPU_UBER_ISF, %rcx		/* PCB stack addr */
939	add	$(ISF64_SIZE), %rcx		/* adjust to base of ISF */
940	swapgs					/* swap back for L_dispatch */
941	jmp	4f				/* Copy state to PCB */
942
9431:
944	/*
945	* From kernel-space:
946	 * Determine whether the kernel or user GS is set.
947	 * Set the kernel and ensure that we'll swap back correctly at IRET.
948	 */
949	mov	$(MSR_IA32_GS_BASE), %ecx
950	rdmsr					/* read kernel gsbase */
951	test	$0x80000000, %edx		/* test MSB of address */
952	jne	2f
953	swapgs					/* so swap */
954	movl	$1, ISF64_CS+4(%rsp)		/* and set flag in CS slot */
9552:
956	/*
957	 * Determine whether we're on the kernel or interrupt stack
958	 * when the NMI hit.
959	 */
960	mov	ISF64_RSP(%rsp), %rcx
961	mov	%gs:CPU_KERNEL_STACK, %rax
962	xor	%rcx, %rax
963	and	EXT(kernel_stack_mask)(%rip), %rax
964	test	%rax, %rax		/* are we on the kernel stack? */
965	je	3f			/* yes */
966
967	mov	%gs:CPU_INT_STACK_TOP, %rax
968	dec	%rax			/* intr stack top is byte above max */
969	xor	%rcx, %rax
970	and	EXT(kernel_stack_mask)(%rip), %rax
971	test	%rax, %rax		/* are we on the interrupt stack? */
972	je	3f			/* yes */
973
974	mov    %gs:CPU_KERNEL_STACK, %rcx
9753:
976	/* 16-byte-align kernel/interrupt stack for state push */
977	and	$0xFFFFFFFFFFFFFFF0, %rcx
978
9794:
980	/*
981	 * Copy state from NMI stack (RSP) to the save area (RCX) which is
982	 * the PCB for user or kernel/interrupt stack from kernel.
983	 * ISF64_ERR(RSP)    saved RAX
984	 * ISF64_TRAPFN(RSP) saved RCX
985	 * ISF64_TRAPNO(RSP) saved RDX
986	 */
987	xchg	%rsp, %rcx			/* set for pushes */
988	push	ISF64_SS(%rcx)
989	push	ISF64_RSP(%rcx)
990	push	ISF64_RFLAGS(%rcx)
991	push	ISF64_CS(%rcx)
992	push	ISF64_RIP(%rcx)
993	push	$(0)				/* error code 0 */
994	lea	HNDL_ALLINTRS(%rip), %rax
995	push	%rax				/* trapfn allintrs */
996	push	$(T_NMI)			/* trapno T_NMI */
997	mov	ISF64_ERR(%rcx), %rax
998	mov	ISF64_TRAPNO(%rcx), %rdx
999	mov	ISF64_TRAPFN(%rcx), %rcx
1000	jmp	L_dispatch
1001
1002
1003/* All 'exceptions' enter hndl_alltraps, with:
1004 *	r15	x86_saved_state_t address
1005 *	rsp	kernel stack if user-space, otherwise interrupt or kernel stack
1006 *	esi	cs at trap
1007 *
1008 * The rest of the state is set up as:
1009 *	both rsp and r15 are 16-byte aligned
1010 *	interrupts disabled
1011 *	direction flag cleared
1012 */
1013Entry(hndl_alltraps)
1014	mov	%esi, %eax
1015	testb	$3, %al
1016	jz	trap_from_kernel
1017
1018	TIME_TRAP_UENTRY
1019
1020	/* Check for active vtimers in the current task */
1021	mov	%gs:CPU_ACTIVE_THREAD, %rcx
1022	movl	$-1, TH_IOTIER_OVERRIDE(%rcx)	/* Reset IO tier override to -1 before handling trap/exception */
1023	mov	TH_TASK(%rcx), %rbx
1024	TASK_VTIMER_CHECK(%rbx, %rcx)
1025
1026	CCALL1(user_trap, %r15)			/* call user trap routine */
1027	/* user_trap() unmasks interrupts */
1028	cli					/* hold off intrs - critical section */
1029	xorl	%ecx, %ecx			/* don't check if we're in the PFZ */
1030
1031#define CLI cli
1032#define STI sti
1033
1034Entry(return_from_trap)
1035	movq	%gs:CPU_ACTIVE_THREAD,%r15	/* Get current thread */
1036	movl	$-1, TH_IOTIER_OVERRIDE(%r15)	/* Reset IO tier override to -1 before returning to userspace */
1037	cmpl	$0, TH_RWLOCK_COUNT(%r15)	/* Check if current thread has pending RW locks held */
1038	jz	1f
1039	xorq	%rbp, %rbp		/* clear framepointer */
1040	mov	%r15, %rdi		/* Set RDI to current thread */
1041	CCALL(lck_rw_clear_promotions_x86)	/* Clear promotions if needed */
10421:
1043	movq	TH_PCB_ISS(%r15), %r15 		/* PCB stack */
1044	movl	%gs:CPU_PENDING_AST,%eax
1045	testl	%eax,%eax
1046	je	EXT(return_to_user)		/* branch if no AST */
1047
1048L_return_from_trap_with_ast:
1049	testl	%ecx, %ecx		/* see if we need to check for an EIP in the PFZ */
1050	je	2f			/* no, go handle the AST */
1051	cmpl	$(SS_64), SS_FLAVOR(%r15)	/* are we a 64-bit task? */
1052	je	1f
1053					/* no... 32-bit user mode */
1054	movl	R32_EIP(%r15), %edi
1055	xorq	%rbp, %rbp		/* clear framepointer */
1056	CCALL(commpage_is_in_pfz32)
1057	testl	%eax, %eax
1058	je	2f			/* not in the PFZ... go service AST */
1059	movl	%eax, R32_EBX(%r15)	/* let the PFZ know we've pended an AST */
1060	jmp	EXT(return_to_user)
10611:
1062	movq	R64_RIP(%r15), %rdi
1063	xorq	%rbp, %rbp		/* clear framepointer */
1064	CCALL(commpage_is_in_pfz64)
1065	testl	%eax, %eax
1066	je	2f			/* not in the PFZ... go service AST */
1067	movl	%eax, R64_RBX(%r15)	/* let the PFZ know we've pended an AST */
1068	jmp	EXT(return_to_user)
10692:
1070	STI				/* interrupts always enabled on return to user mode */
1071
1072	xor	%edi, %edi		/* zero %rdi */
1073	xorq	%rbp, %rbp		/* clear framepointer */
1074	CCALL(i386_astintr)		/* take the AST */
1075
1076	CLI
1077	mov	%rsp, %r15		/* AST changes stack, saved state */
1078	xorl	%ecx, %ecx		/* don't check if we're in the PFZ */
1079	jmp	EXT(return_from_trap)	/* and check again (rare) */
1080
1081/*
1082 * Trap from kernel mode.  No need to switch stacks.
1083 * Interrupts must be off here - we will set them to state at time of trap
1084 * as soon as it's safe for us to do so and not recurse doing preemption
1085 *
1086 */
1087trap_from_kernel:
1088	movq	%r15, %rdi		/* saved state addr */
1089	pushq   R64_RIP(%r15)           /* Simulate a CALL from fault point */
1090	pushq   %rbp                    /* Extend framepointer chain */
1091	movq    %rsp, %rbp
1092	CCALLWITHSP(kernel_trap)	/* to kernel trap routine */
1093	popq    %rbp
1094	addq    $8, %rsp
1095	mov	%rsp, %r15		/* DTrace slides stack/saved-state */
1096	cli
1097
1098	movl	%gs:CPU_PENDING_AST,%eax	/* get pending asts */
1099	testl	$(AST_URGENT),%eax		/* any urgent preemption? */
1100	je	ret_to_kernel			/* no, nothing to do */
1101	cmpl	$(T_PREEMPT),R64_TRAPNO(%r15)
1102	je	ret_to_kernel			/* T_PREEMPT handled in kernel_trap() */
1103	testl	$(EFL_IF),R64_RFLAGS(%r15)	/* interrupts disabled? */
1104	je	ret_to_kernel
1105	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL	/* preemption disabled? */
1106	jne	ret_to_kernel
1107	movq	%gs:CPU_KERNEL_STACK,%rax
1108	movq	%rsp,%rcx
1109	xorq	%rax,%rcx
1110	andq	EXT(kernel_stack_mask)(%rip),%rcx
1111	testq	%rcx,%rcx		/* are we on the kernel stack? */
1112	jne	ret_to_kernel		/* no, skip it */
1113
1114	CCALL1(i386_astintr, $1)	/* take the AST */
1115
1116	mov	%rsp, %r15		/* AST changes stack, saved state */
1117	jmp	ret_to_kernel
1118
1119
1120/*
1121 * All interrupts on all tasks enter here with:
1122 *	r15	 x86_saved_state_t
1123 *	rsp	 kernel or interrupt stack
1124 *	esi	 cs at trap
1125 *
1126 *	both rsp and r15 are 16-byte aligned
1127 *	interrupts disabled
1128 *	direction flag cleared
1129 */
1130Entry(hndl_allintrs)
1131	/*
1132	 * test whether already on interrupt stack
1133	 */
1134	movq	%gs:CPU_INT_STACK_TOP,%rcx
1135	cmpq	%rsp,%rcx
1136	jb	1f
1137	leaq	-INTSTACK_SIZE(%rcx),%rdx
1138	cmpq	%rsp,%rdx
1139	jb	int_from_intstack
11401:
1141	xchgq	%rcx,%rsp		/* switch to interrupt stack */
1142
1143	mov	%cr0,%rax		/* get cr0 */
1144	orl	$(CR0_TS),%eax		/* or in TS bit */
1145	mov	%rax,%cr0		/* set cr0 */
1146
1147	pushq	%rcx			/* save pointer to old stack */
1148	pushq	%gs:CPU_INT_STATE	/* save previous intr state */
1149	movq	%r15,%gs:CPU_INT_STATE	/* set intr state */
1150
1151	TIME_INT_ENTRY			/* do timing */
1152
1153	/* Check for active vtimers in the current task */
1154	mov	%gs:CPU_ACTIVE_THREAD, %rcx
1155	mov	TH_TASK(%rcx), %rbx
1156	TASK_VTIMER_CHECK(%rbx, %rcx)
1157
1158	incl	%gs:CPU_PREEMPTION_LEVEL
1159	incl	%gs:CPU_INTERRUPT_LEVEL
1160
1161	CCALL1(interrupt, %r15)		/* call generic interrupt routine */
1162
1163	cli				/* just in case we returned with intrs enabled */
1164
1165	.globl	EXT(return_to_iret)
1166LEXT(return_to_iret)			/* (label for kdb_kintr and hardclock) */
1167
1168	decl	%gs:CPU_INTERRUPT_LEVEL
1169	decl	%gs:CPU_PREEMPTION_LEVEL
1170
1171	TIME_INT_EXIT			/* do timing */
1172
1173	popq	%gs:CPU_INT_STATE 	/* reset/clear intr state pointer */
1174	popq	%rsp			/* switch back to old stack */
1175
1176	movq	%gs:CPU_ACTIVE_THREAD,%rax
1177	movq	TH_PCB_FPS(%rax),%rax	/* get pcb's ifps */
1178	cmpq	$0,%rax			/* Is there a context */
1179	je	1f			/* Branch if not */
1180	movl	FP_VALID(%rax),%eax	/* Load fp_valid */
1181	cmpl	$0,%eax			/* Check if valid */
1182	jne	1f			/* Branch if valid */
1183	clts				/* Clear TS */
1184	jmp	2f
11851:
1186	mov	%cr0,%rax		/* get cr0 */
1187	orl	$(CR0_TS),%eax		/* or in TS bit */
1188	mov	%rax,%cr0		/* set cr0 */
11892:
1190	/* Load interrupted code segment into %eax */
1191	movl	R32_CS(%r15),%eax	/* assume 32-bit state */
1192	cmpl	$(SS_64),SS_FLAVOR(%r15)/* 64-bit? */
1193#if DEBUG_IDT64
1194	jne	4f
1195	movl	R64_CS(%r15),%eax	/* 64-bit user mode */
1196	jmp	3f
11974:
1198	cmpl    $(SS_32),SS_FLAVOR(%r15)
1199	je	3f
1200	POSTCODE2(0x6431)
1201	CCALL1(panic_idt64, %r15)
1202	hlt
1203#else
1204	jne	3f
1205	movl	R64_CS(%r15),%eax	/* 64-bit user mode */
1206#endif
12073:
1208	testb	$3,%al			/* user mode, */
1209	jnz	ast_from_interrupt_user	/* go handle potential ASTs */
1210	/*
1211	 * we only want to handle preemption requests if
1212	 * the interrupt fell in the kernel context
1213	 * and preemption isn't disabled
1214	 */
1215	movl	%gs:CPU_PENDING_AST,%eax
1216	testl	$(AST_URGENT),%eax		/* any urgent requests? */
1217	je	ret_to_kernel			/* no, nothing to do */
1218
1219	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL	/* preemption disabled? */
1220	jne	ret_to_kernel			/* yes, skip it */
1221
1222	/*
1223	 * Take an AST from kernel space.  We don't need (and don't want)
1224	 * to do as much as the case where the interrupt came from user
1225	 * space.
1226	 */
1227	CCALL1(i386_astintr, $1)
1228
1229	mov	%rsp, %r15		/* AST changes stack, saved state */
1230	jmp	ret_to_kernel
1231
1232
1233/*
1234 * nested int - simple path, can't preempt etc on way out
1235 */
1236int_from_intstack:
1237	incl	%gs:CPU_PREEMPTION_LEVEL
1238	incl	%gs:CPU_INTERRUPT_LEVEL
1239	incl	%gs:CPU_NESTED_ISTACK
1240
1241	push	%gs:CPU_INT_STATE
1242	mov	%r15, %gs:CPU_INT_STATE
1243
1244	CCALL1(interrupt, %r15)
1245
1246	pop	%gs:CPU_INT_STATE
1247
1248	decl	%gs:CPU_INTERRUPT_LEVEL
1249	decl	%gs:CPU_PREEMPTION_LEVEL
1250	decl	%gs:CPU_NESTED_ISTACK
1251
1252	jmp	ret_to_kernel
1253
1254/*
1255 *	Take an AST from an interrupted user
1256 */
1257ast_from_interrupt_user:
1258	movl	%gs:CPU_PENDING_AST,%eax
1259	testl	%eax,%eax		/* pending ASTs? */
1260	je	EXT(ret_to_user)	/* no, nothing to do */
1261
1262	TIME_TRAP_UENTRY
1263
1264	movl	$1, %ecx		/* check if we're in the PFZ */
1265	jmp	L_return_from_trap_with_ast	/* return */
1266
1267
1268/* Syscall dispatch routines! */
1269
1270/*
1271 *
1272 * 32bit Tasks
1273 * System call entries via INTR_GATE or sysenter:
1274 *
1275 *	r15	 x86_saved_state32_t
1276 *	rsp	 kernel stack
1277 *
1278 *	both rsp and r15 are 16-byte aligned
1279 *	interrupts disabled
1280 *	direction flag cleared
1281 */
1282
1283Entry(hndl_sysenter)
1284	/*
1285	 * We can be here either for a mach syscall or a unix syscall,
1286	 * as indicated by the sign of the code:
1287	 */
1288	movl	R32_EAX(%r15),%eax
1289	testl	%eax,%eax
1290	js	EXT(hndl_mach_scall)		/* < 0 => mach */
1291						/* > 0 => unix */
1292
1293Entry(hndl_unix_scall)
1294
1295        TIME_TRAP_UENTRY
1296
1297	movq	%gs:CPU_ACTIVE_THREAD,%rcx	/* get current thread     */
1298	movq	TH_TASK(%rcx),%rbx		/* point to current task  */
1299	incl	TH_SYSCALLS_UNIX(%rcx)		/* increment call count   */
1300
1301	/* Check for active vtimers in the current task */
1302	TASK_VTIMER_CHECK(%rbx,%rcx)
1303
1304	sti
1305
1306	CCALL1(unix_syscall, %r15)
1307	/*
1308	 * always returns through thread_exception_return
1309	 */
1310
1311
1312Entry(hndl_mach_scall)
1313	TIME_TRAP_UENTRY
1314
1315	movq	%gs:CPU_ACTIVE_THREAD,%rcx	/* get current thread     */
1316	movq	TH_TASK(%rcx),%rbx		/* point to current task  */
1317	incl	TH_SYSCALLS_MACH(%rcx)		/* increment call count   */
1318
1319	/* Check for active vtimers in the current task */
1320	TASK_VTIMER_CHECK(%rbx,%rcx)
1321
1322	sti
1323
1324	CCALL1(mach_call_munger, %r15)
1325	/*
1326	 * always returns through thread_exception_return
1327	 */
1328
1329
1330Entry(hndl_mdep_scall)
1331	TIME_TRAP_UENTRY
1332
1333	/* Check for active vtimers in the current task */
1334	movq	%gs:CPU_ACTIVE_THREAD,%rcx	/* get current thread     */
1335	movq	TH_TASK(%rcx),%rbx		/* point to current task  */
1336	TASK_VTIMER_CHECK(%rbx,%rcx)
1337
1338	sti
1339
1340	CCALL1(machdep_syscall, %r15)
1341	/*
1342	 * always returns through thread_exception_return
1343	 */
1344
1345/*
1346 * 64bit Tasks
1347 * System call entries via syscall only:
1348 *
1349 *	r15	 x86_saved_state64_t
1350 *	rsp	 kernel stack
1351 *
1352 *	both rsp and r15 are 16-byte aligned
1353 *	interrupts disabled
1354 *	direction flag cleared
1355 */
1356
1357Entry(hndl_syscall)
1358	TIME_TRAP_UENTRY
1359
1360	movq	%gs:CPU_ACTIVE_THREAD,%rcx	/* get current thread     */
1361	movl	$-1, TH_IOTIER_OVERRIDE(%rcx)	/* Reset IO tier override to -1 before handling syscall */
1362	movq	TH_TASK(%rcx),%rbx		/* point to current task  */
1363
1364	/* Check for active vtimers in the current task */
1365	TASK_VTIMER_CHECK(%rbx,%rcx)
1366
1367	/*
1368	 * We can be here either for a mach, unix machdep or diag syscall,
1369	 * as indicated by the syscall class:
1370	 */
1371	movl	R64_RAX(%r15), %eax		/* syscall number/class */
1372	movl	%eax, %edx
1373	andl	$(SYSCALL_CLASS_MASK), %edx	/* syscall class */
1374	cmpl	$(SYSCALL_CLASS_MACH<<SYSCALL_CLASS_SHIFT), %edx
1375	je	EXT(hndl_mach_scall64)
1376	cmpl	$(SYSCALL_CLASS_UNIX<<SYSCALL_CLASS_SHIFT), %edx
1377	je	EXT(hndl_unix_scall64)
1378	cmpl	$(SYSCALL_CLASS_MDEP<<SYSCALL_CLASS_SHIFT), %edx
1379	je	EXT(hndl_mdep_scall64)
1380	cmpl	$(SYSCALL_CLASS_DIAG<<SYSCALL_CLASS_SHIFT), %edx
1381	je	EXT(hndl_diag_scall64)
1382
1383	/* Syscall class unknown */
1384	sti
1385	CCALL3(i386_exception, $(EXC_SYSCALL), %rax, $1)
1386	/* no return */
1387
1388
1389Entry(hndl_unix_scall64)
1390	incl	TH_SYSCALLS_UNIX(%rcx)		/* increment call count   */
1391	sti
1392
1393	CCALL1(unix_syscall64, %r15)
1394	/*
1395	 * always returns through thread_exception_return
1396	 */
1397
1398
1399Entry(hndl_mach_scall64)
1400	incl	TH_SYSCALLS_MACH(%rcx)		/* increment call count   */
1401	sti
1402
1403	CCALL1(mach_call_munger64, %r15)
1404	/*
1405	 * always returns through thread_exception_return
1406	 */
1407
1408
1409
1410Entry(hndl_mdep_scall64)
1411	sti
1412
1413	CCALL1(machdep_syscall64, %r15)
1414	/*
1415	 * always returns through thread_exception_return
1416	 */
1417
1418Entry(hndl_diag_scall64)
1419	CCALL1(diagCall64, %r15)	// Call diagnostics
1420	cli				// Disable interruptions just in case
1421	test	%eax, %eax		// What kind of return is this?
1422	je	1f			// - branch if bad (zero)
1423	jmp	EXT(return_to_user)	// Normal return, do not check asts...
14241:
1425	sti
1426	CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
1427	/* no return */
1428
1429Entry(hndl_machine_check)
1430	CCALL1(panic_machine_check64, %r15)
1431	hlt
1432
1433Entry(hndl_double_fault)
1434	CCALL1(panic_double_fault64, %r15)
1435	hlt
1436