1/* 2 * linux/arch/x86_64/entry.S 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs 6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 7 * 8 * $Id: entry.S,v 1.1.1.1 2008/10/15 03:26:21 james26_jang Exp $ 9 */ 10 11/* 12 * entry.S contains the system-call and fault low-level handling routines. 13 * 14 * NOTE: This code handles signal-recognition, which happens every time 15 * after an interrupt and after each system call. 16 * 17 * Normal syscalls and interrupts don't save a full stack frame, this is 18 * only done for PT_TRACESYS, signals or fork/exec et.al. 19 * 20 * TODO: 21 * - schedule it carefully for the final hardware. 22 * 23 */ 24 25#define ASSEMBLY 1 26#include <linux/config.h> 27#include <linux/linkage.h> 28#include <asm/segment.h> 29#include <asm/current.h> 30#include <asm/smp.h> 31#include <asm/cache.h> 32#include <asm/errno.h> 33#include <asm/calling.h> 34#include <asm/offset.h> 35#include <asm/msr.h> 36#include <asm/unistd.h> 37 38 .code64 39 40#define PDAREF(field) %gs:field 41 42/* 43 * C code is not supposed to know about partial frames. Everytime a C function 44 * that looks at the pt_regs is called these two macros are executed around it. 45 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs 46 * manipulation. 47 */ 48 49 /* %rsp:at FRAMEEND */ 50 .macro FIXUP_TOP_OF_STACK tmp 51 movq PDAREF(pda_oldrsp),\tmp 52 movq \tmp,RSP(%rsp) 53 movq $__USER_DS,SS(%rsp) 54 movq $__USER_CS,CS(%rsp) 55 movq $-1,RCX(%rsp) /* contains return address, already in RIP */ 56 movq R11(%rsp),\tmp /* get eflags */ 57 movq \tmp,EFLAGS(%rsp) 58 .endm 59 60 .macro RESTORE_TOP_OF_STACK tmp,offset=0 61 movq RSP-\offset(%rsp),\tmp 62 movq \tmp,PDAREF(pda_oldrsp) 63 movq EFLAGS-\offset(%rsp),\tmp 64 movq \tmp,R11-\offset(%rsp) 65 .endm 66 67 68/* 69 * A newly forked process directly context switches into this. 70 */ 71ENTRY(ret_from_fork) 72 movq %rax,%rdi /* return value of __switch_to -> prev task */ 73 call schedule_tail 74 GET_CURRENT(%rcx) 75 testb $PT_TRACESYS,tsk_ptrace(%rcx) 76 jnz 2f 771: 78 RESTORE_REST 79 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? 80 jz int_ret_from_sys_call 81 testl $ASM_THREAD_IA32,tsk_thread+thread_flags(%rcx) 82 jnz int_ret_from_sys_call 83 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET 84 jmp ret_from_sys_call 852: 86 movq %rsp,%rdi 87 call syscall_trace 88 jmp 1b 89 90/* 91 * System call entry. Upto 6 arguments in registers are supported. 92 * 93 * SYSCALL does not save anything on the stack and does not change the 94 * stack pointer. Gets the per CPU area from the hidden GS MSR and finds the 95 * current kernel stack. 96 */ 97 98/* 99 * Register setup: 100 * rax system call number 101 * rdi arg0 102 * rcx return address for syscall/sysret, C arg3 103 * rsi arg1 104 * rdx arg2 105 * r10 arg3 (--> moved to rcx for C) 106 * r8 arg4 107 * r9 arg5 108 * r11 eflags for syscall/sysret, temporary for C 109 * r12-r15,rbp,rbx saved by C code, not touched. 110 * 111 * Interrupts are off on entry. 112 * Only called from user space. 113 */ 114 115ENTRY(system_call) 116 swapgs 117 movq %rsp,PDAREF(pda_oldrsp) 118 movq PDAREF(pda_kernelstack),%rsp 119 sti 120 SAVE_ARGS 8,1 121 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 122 movq %rcx,RIP-ARGOFFSET(%rsp) 123 GET_CURRENT(%rcx) 124 testl $PT_TRACESYS,tsk_ptrace(%rcx) 125 jne tracesys 126 cmpq $__NR_syscall_max,%rax 127 ja badsys 128 movq %r10,%rcx 129 call *sys_call_table(,%rax,8) # XXX: rip relative 130 movq %rax,RAX-ARGOFFSET(%rsp) 131 .globl ret_from_sys_call 132ret_from_sys_call: 133sysret_with_reschedule: 134 GET_CURRENT(%rcx) 135 cli 136 cmpq $0,tsk_need_resched(%rcx) 137 jne sysret_reschedule 138 cmpl $0,tsk_sigpending(%rcx) 139 jne sysret_signal 140sysret_restore_args: 141 movq RIP-ARGOFFSET(%rsp),%rcx 142 RESTORE_ARGS 0,-ARG_SKIP,1 143 movq PDAREF(pda_oldrsp),%rsp 144 swapgs 145 sysretq 146 147sysret_signal: 148 sti 149 xorl %esi,%esi # oldset 150 leaq -ARGOFFSET(%rsp),%rdi # regs 151 leaq do_signal(%rip),%rax 152 call ptregscall_common 153sysret_signal_test: 154 GET_CURRENT(%rcx) 155 cli 156 cmpq $0,tsk_need_resched(%rcx) 157 je sysret_restore_args 158 sti 159 call schedule 160 jmp sysret_signal_test 161 162sysret_reschedule: 163 sti 164 call schedule 165 jmp sysret_with_reschedule 166 167tracesys: 168 SAVE_REST 169 movq $-ENOSYS,RAX(%rsp) 170 FIXUP_TOP_OF_STACK %rdi 171 movq %rsp,%rdi 172 call syscall_trace 173 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ 174 RESTORE_REST 175 cmpq $__NR_syscall_max,%rax 176 ja tracesys_done 177tracesys_call: /* backtrace marker */ 178 movq %r10,%rcx /* fixup for C */ 179 call *sys_call_table(,%rax,8) 180 movq %rax,RAX-ARGOFFSET(%rsp) 181tracesys_done: /* backtrace marker */ 182 SAVE_REST 183 movq %rsp,%rdi 184 call syscall_trace 185 RESTORE_TOP_OF_STACK %rbx 186 RESTORE_REST 187 jmp ret_from_sys_call 188 189badsys: 190 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 191 jmp ret_from_sys_call 192 193/* 194 * Syscall return path ending with IRET. 195 * This can be either 64bit calls that require restoring of all registers 196 * (impossible with sysret) or 32bit calls. 197 */ 198ENTRY(int_ret_from_sys_call) 199intret_test_kernel: 200 testl $3,CS-ARGOFFSET(%rsp) 201 je retint_restore_args 202intret_with_reschedule: 203 GET_CURRENT(%rcx) 204 cli 205 cmpq $0,tsk_need_resched(%rcx) 206 jne intret_reschedule 207 cmpl $0,tsk_sigpending(%rcx) 208 jne intret_signal 209 jmp retint_restore_args_swapgs 210 211intret_reschedule: 212 sti 213 call schedule 214 jmp intret_with_reschedule 215 216intret_signal: 217 sti 218 SAVE_REST 219 xorq %rsi,%rsi # oldset -> arg2 220 movq %rsp,%rdi # &ptregs -> arg1 221 call do_signal 222 RESTORE_REST 223intret_signal_test: 224 GET_CURRENT(%rcx) 225 cli 226 cmpq $0,tsk_need_resched(%rcx) 227 je retint_restore_args_swapgs 228 sti 229 call schedule 230 jmp intret_signal_test 231 232/* 233 * Certain special system calls that need to save a complete stack frame. 234 */ 235 236 .macro PTREGSCALL label,func 237 .globl \label 238\label: 239 leaq \func(%rip),%rax 240 jmp ptregscall_common 241 .endm 242 243 PTREGSCALL stub_clone, sys_clone 244 PTREGSCALL stub_fork, sys_fork 245 PTREGSCALL stub_vfork, sys_vfork 246 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend 247 PTREGSCALL stub_sigaltstack, sys_sigaltstack 248 PTREGSCALL stub_iopl, sys_iopl 249 250ENTRY(ptregscall_common) 251 popq %r11 252 SAVE_REST 253 movq %r11, %r15 254 FIXUP_TOP_OF_STACK %r11 255 call *%rax 256 RESTORE_TOP_OF_STACK %r11 257 movq %r15, %r11 258 RESTORE_REST 259 pushq %r11 260 ret 261 262ENTRY(stub_execve) 263 popq %r11 264 SAVE_REST 265 movq %r11, %r15 266 FIXUP_TOP_OF_STACK %r11 267 call sys_execve 268 GET_CURRENT(%rcx) 269 testl $ASM_THREAD_IA32,tsk_thread+thread_flags(%rcx) 270 jnz exec_32bit 271 RESTORE_TOP_OF_STACK %r11 272 movq %r15, %r11 273 RESTORE_REST 274 push %r11 275 ret 276 277exec_32bit: 278 movq %rax,RAX(%rsp) 279 RESTORE_REST 280 jmp int_ret_from_sys_call 281 282/* 283 * sigreturn is special because it needs to restore all registers on return. 284 * This cannot be done with SYSRET, so use the IRET return path instead. 285 */ 286ENTRY(stub_rt_sigreturn) 287 addq $8, %rsp 288 SAVE_REST 289 FIXUP_TOP_OF_STACK %r11 290 call sys_rt_sigreturn 291 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer 292 RESTORE_REST 293 jmp int_ret_from_sys_call 294 295/* 296 * Interrupt entry/exit. 297 * 298 * Interrupt entry points save only callee clobbered registers, except 299 * for signals again. 300 * 301 * Entry runs with interrupts off. 302 */ 303 304/* 0(%rsp): interrupt number */ 305ENTRY(common_interrupt) 306 testl $3,16(%rsp) # from kernel? 307 je 1f 308 swapgs 3091: cld 310 SAVE_ARGS 311 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler 312 addl $1,PDAREF(pda_irqcount) # XXX: should be merged with irq.c irqcount 313 movq PDAREF(pda_irqstackptr),%rax 314 cmoveq %rax,%rsp 315 pushq %rdi # save old stack 316 call do_IRQ 317 /* 0(%rsp): oldrsp-ARGOFFSET */ 318ENTRY(ret_from_intr) 319 cli 320 popq %rdi 321 subl $1,PDAREF(pda_irqcount) 322 leaq ARGOFFSET(%rdi),%rsp 323 testl $3,CS(%rdi) # from kernel? 324 je retint_restore_args 325 /* Interrupt came from user space */ 326retint_with_reschedule: 327 GET_CURRENT(%rcx) 328 cmpq $0,tsk_need_resched(%rcx) 329 jne retint_reschedule 330 cmpl $0,tsk_sigpending(%rcx) 331 jne retint_signal 332retint_restore_args_swapgs: 333 swapgs 334retint_restore_args: 335 RESTORE_ARGS 0,8 336iret_label: 337 iretq 338 .section __ex_table,"a" 339 .align 8 340 .quad iret_label,bad_iret 341 .previous 342 .section .fixup,"ax" 343 /* force a signal here? this matches i386 behaviour */ 344bad_iret: 345 movq $-9999,%rdi /* better code? */ 346 jmp do_exit 347 .previous 348 349retint_signal: 350 sti 351 SAVE_REST 352 movq $-1,ORIG_RAX(%rsp) 353 xorq %rsi,%rsi # oldset 354 movq %rsp,%rdi # &pt_regs 355 call do_signal 356 RESTORE_REST 357retint_signal_test: 358 cli 359 GET_CURRENT(%rcx) 360 cmpq $0,tsk_need_resched(%rcx) 361 je retint_restore_args_swapgs 362 sti 363 call schedule 364 jmp retint_signal_test 365 366retint_reschedule: 367 sti 368 call schedule 369 cli 370 jmp retint_with_reschedule 371 372/* 373 * Exception entry points. 374 */ 375 .macro zeroentry sym 376 pushq $0 /* push error code/oldrax */ 377 pushq %rax /* push real oldrax to the rdi slot */ 378 leaq \sym(%rip),%rax 379 jmp error_entry 380 .endm 381 382 .macro errorentry sym 383 pushq %rax 384 leaq \sym(%rip),%rax 385 jmp error_entry 386 .endm 387 388/* 389 * Exception entry point. This expects an error code/orig_rax on the stack 390 * and the exception handler in %rax. 391 */ 392 ALIGN 393error_entry: 394 /* rdi slot contains rax, oldrax contains error code */ 395 pushq %rsi 396 movq 8(%rsp),%rsi /* load rax */ 397 pushq %rdx 398 pushq %rcx 399 pushq %rsi /* store rax */ 400 pushq %r8 401 pushq %r9 402 pushq %r10 403 pushq %r11 404 cld 405 SAVE_REST 406 testl $3,CS(%rsp) 407 je error_kernelspace 408 swapgs 409 movl $1,%r15d 410error_action: 411 sti 412 movq %rdi,RDI(%rsp) 413 movq %rsp,%rdi 414 movq ORIG_RAX(%rsp),%rsi /* get error code */ 415 movq $-1,ORIG_RAX(%rsp) 416 call *%rax 417 /* r15d: swapgs flag */ 418error_exit: 419 testl %r15d,%r15d 420 jz error_restore 421error_test: 422 cli 423 GET_CURRENT(%rcx) 424 cmpq $0,tsk_need_resched(%rcx) 425 jne error_reschedule 426 cmpl $0,tsk_sigpending(%rcx) 427 jne error_signal 428error_restore_swapgs: 429 swapgs 430error_restore: 431 RESTORE_REST 432 jmp retint_restore_args 433 434error_reschedule: 435 sti 436 call schedule 437 jmp error_test 438 439error_signal: 440 sti 441 xorq %rsi,%rsi 442 movq %rsp,%rdi 443 call do_signal 444error_signal_test: 445 GET_CURRENT(%rcx) 446 cli 447 cmpq $0,tsk_need_resched(%rcx) 448 je error_restore_swapgs 449 sti 450 call schedule 451 jmp error_signal_test 452 453error_kernelspace: 454 xorl %r15d,%r15d 455 cmpq $iret_label,RIP(%rsp) 456 jne error_action 457 movl $1,%r15d 458 swapgs 459 jmp error_action 460 461/* 462 * Create a kernel thread. 463 * 464 * C extern interface: 465 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) 466 * 467 * asm input arguments: 468 * rdi: fn, rsi: arg, rdx: flags 469 */ 470ENTRY(kernel_thread) 471 FAKE_STACK_FRAME $child_rip 472 SAVE_ALL 473 474 # rdi: flags, rsi: usp, rdx: will be &pt_regs 475 movq %rdx,%rdi 476 orq $CLONE_VM, %rdi 477 478 movq $-1, %rsi 479 480 movq %rsp, %rdx 481 482 # clone now 483 call do_fork 484 # save retval on the stack so it's popped before `ret` 485 movq %rax, RAX(%rsp) 486 487 /* 488 * It isn't worth to check for reschedule here, 489 * so internally to the x86_64 port you can rely on kernel_thread() 490 * not to reschedule the child before returning, this avoids the need 491 * of hacks for example to fork off the per-CPU idle tasks. 492 * [Hopefully no generic code relies on the reschedule -AK] 493 */ 494 RESTORE_ALL 495 UNFAKE_STACK_FRAME 496 ret 497 498child_rip: 499 /* 500 * Here we are in the child and the registers are set as they were 501 * at kernel_thread() invocation in the parent. 502 */ 503 movq %rdi, %rax 504 movq %rsi, %rdi 505 call *%rax 506 # exit 507 xorq %rdi, %rdi 508 call do_exit 509 510/* 511 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. 512 * 513 * C extern interface: 514 * extern long execve(char *name, char **argv, char **envp) 515 * 516 * asm input arguments: 517 * rdi: name, rsi: argv, rdx: envp 518 * 519 * We want to fallback into: 520 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs) 521 * 522 * do_sys_execve asm fallback arguments: 523 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack 524 */ 525ENTRY(execve) 526 FAKE_STACK_FRAME $0 527 SAVE_ALL 528 call sys_execve 529 movq %rax, RAX(%rsp) 530 RESTORE_REST 531 testq %rax,%rax 532 je int_ret_from_sys_call 533 RESTORE_ARGS 534 UNFAKE_STACK_FRAME 535 ret 536 537ENTRY(page_fault) 538#ifdef CONFIG_KDB 539 pushq %rcx 540 pushq %rdx 541 pushq %rax 542 movl $473,%ecx 543 rdmsr 544 andl $0xfffffffe,%eax /* Disable last branch recording */ 545 wrmsr 546 popq %rax 547 popq %rdx 548 popq %rcx 549#endif 550 errorentry do_page_fault 551 552ENTRY(coprocessor_error) 553 zeroentry do_coprocessor_error 554 555ENTRY(simd_coprocessor_error) 556 zeroentry do_simd_coprocessor_error 557 558ENTRY(device_not_available) 559 pushq $-1 560 SAVE_ALL 561 xorl %r15d,%r15d 562 testl $3,CS(%rsp) 563 jz 1f 564 swapgs 565 movl $1,%r15d 5661: 567 movq %cr0,%rax 568 leaq math_state_restore(%rip),%rcx 569 leaq math_emulate(%rip),%rbx 570 testl $0x4,%eax 571 cmoveq %rcx,%rbx 572 call *%rbx 573 jmp error_exit 574 575ENTRY(debug) 576 zeroentry do_debug 577 578ENTRY(nmi) 579 pushq $-1 580 SAVE_ALL 581 /* NMI could happen inside the critical section of a swapgs, 582 so it is needed to use this expensive way to check. 583 Rely on arch_prctl forbiding user space from setting a negative 584 GS. Only the kernel value is negative. */ 585 movl $MSR_GS_BASE,%ecx 586 rdmsr 587 xorl %ebx,%ebx 588 testl %edx,%edx 589 js 1f 590 swapgs 591 movl $1,%ebx 5921: movq %rsp,%rdi 593 call do_nmi 594 cli 595 testl %ebx,%ebx 596 jz error_restore 597 swapgs 598 jmp error_restore 599 600ENTRY(int3) 601 zeroentry do_int3 602 603ENTRY(overflow) 604 zeroentry do_overflow 605 606ENTRY(bounds) 607 zeroentry do_bounds 608 609ENTRY(invalid_op) 610 zeroentry do_invalid_op 611 612ENTRY(coprocessor_segment_overrun) 613 zeroentry do_coprocessor_segment_overrun 614 615ENTRY(reserved) 616 zeroentry do_reserved 617 618ENTRY(double_fault) 619 errorentry do_double_fault 620 621ENTRY(invalid_TSS) 622 errorentry do_invalid_TSS 623 624ENTRY(segment_not_present) 625 errorentry do_segment_not_present 626 627ENTRY(stack_segment) 628 errorentry do_stack_segment 629 630ENTRY(general_protection) 631 errorentry do_general_protection 632 633ENTRY(alignment_check) 634 errorentry do_alignment_check 635 636ENTRY(divide_error) 637 zeroentry do_divide_error 638 639ENTRY(spurious_interrupt_bug) 640 zeroentry do_spurious_interrupt_bug 641 642ENTRY(machine_check) 643 zeroentry do_machine_check 644 645ENTRY(call_debug) 646 zeroentry do_call_debug 647 648