1/* 2 * linux/arch/x86_64/entry.S 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs 6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 7 */ 8 9/* 10 * entry.S contains the system-call and fault low-level handling routines. 11 * 12 * NOTE: This code handles signal-recognition, which happens every time 13 * after an interrupt and after each system call. 14 * 15 * Normal syscalls and interrupts don't save a full stack frame, this is 16 * only done for syscall tracing, signals or fork/exec et.al. 17 * 18 * A note on terminology: 19 * - top of stack: Architecture defined interrupt frame from SS to RIP 20 * at the top of the kernel process stack. 21 * - partial stack frame: partially saved registers upto R11. 22 * - full stack frame: Like partial stack frame, but all register saved. 23 * 24 * Some macro usage: 25 * - CFI macros are used to generate dwarf2 unwind information for better 26 * backtraces. They don't change any code. 27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers 28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. 29 * There are unfortunately lots of special cases where some registers 30 * not touched. The macro is a big mess that should be cleaned up. 31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. 32 * Gives a full stack frame. 33 * - ENTRY/END Define functions in the symbol table. 34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack 35 * frame that is otherwise undefined after a SYSCALL 36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. 37 * - errorentry/paranoidentry/zeroentry - Define exception entry points. 38 */ 39 40#include <linux/linkage.h> 41#include <asm/segment.h> 42#include <asm/cache.h> 43#include <asm/errno.h> 44#include <asm/dwarf2.h> 45#include <asm/calling.h> 46#include <asm/asm-offsets.h> 47#include <asm/msr.h> 48#include <asm/unistd.h> 49#include <asm/thread_info.h> 50#include <asm/hw_irq.h> 51#include <asm/page.h> 52#include <asm/irqflags.h> 53 54 .code64 55 56#ifndef CONFIG_PREEMPT 57#define retint_kernel retint_restore_args 58#endif 59 60 61.macro TRACE_IRQS_IRETQ offset=ARGOFFSET 62#ifdef CONFIG_TRACE_IRQFLAGS 63 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ 64 jnc 1f 65 TRACE_IRQS_ON 661: 67#endif 68.endm 69 70/* 71 * C code is not supposed to know about undefined top of stack. Every time 72 * a C function with an pt_regs argument is called from the SYSCALL based 73 * fast path FIXUP_TOP_OF_STACK is needed. 74 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs 75 * manipulation. 76 */ 77 78 /* %rsp:at FRAMEEND */ 79 .macro FIXUP_TOP_OF_STACK tmp 80 movq %gs:pda_oldrsp,\tmp 81 movq \tmp,RSP(%rsp) 82 movq $__USER_DS,SS(%rsp) 83 movq $__USER_CS,CS(%rsp) 84 movq $-1,RCX(%rsp) 85 movq R11(%rsp),\tmp /* get eflags */ 86 movq \tmp,EFLAGS(%rsp) 87 .endm 88 89 .macro RESTORE_TOP_OF_STACK tmp,offset=0 90 movq RSP-\offset(%rsp),\tmp 91 movq \tmp,%gs:pda_oldrsp 92 movq EFLAGS-\offset(%rsp),\tmp 93 movq \tmp,R11-\offset(%rsp) 94 .endm 95 96 .macro FAKE_STACK_FRAME child_rip 97 /* push in order ss, rsp, eflags, cs, rip */ 98 xorl %eax, %eax 99 pushq %rax /* ss */ 100 CFI_ADJUST_CFA_OFFSET 8 101 /*CFI_REL_OFFSET ss,0*/ 102 pushq %rax /* rsp */ 103 CFI_ADJUST_CFA_OFFSET 8 104 CFI_REL_OFFSET rsp,0 105 pushq $(1<<9) /* eflags - interrupts on */ 106 CFI_ADJUST_CFA_OFFSET 8 107 /*CFI_REL_OFFSET rflags,0*/ 108 pushq $__KERNEL_CS /* cs */ 109 CFI_ADJUST_CFA_OFFSET 8 110 /*CFI_REL_OFFSET cs,0*/ 111 pushq \child_rip /* rip */ 112 CFI_ADJUST_CFA_OFFSET 8 113 CFI_REL_OFFSET rip,0 114 pushq %rax /* orig rax */ 115 CFI_ADJUST_CFA_OFFSET 8 116 .endm 117 118 .macro UNFAKE_STACK_FRAME 119 addq $8*6, %rsp 120 CFI_ADJUST_CFA_OFFSET -(6*8) 121 .endm 122 123 .macro CFI_DEFAULT_STACK start=1 124 .if \start 125 CFI_STARTPROC simple 126 CFI_SIGNAL_FRAME 127 CFI_DEF_CFA rsp,SS+8 128 .else 129 CFI_DEF_CFA_OFFSET SS+8 130 .endif 131 CFI_REL_OFFSET r15,R15 132 CFI_REL_OFFSET r14,R14 133 CFI_REL_OFFSET r13,R13 134 CFI_REL_OFFSET r12,R12 135 CFI_REL_OFFSET rbp,RBP 136 CFI_REL_OFFSET rbx,RBX 137 CFI_REL_OFFSET r11,R11 138 CFI_REL_OFFSET r10,R10 139 CFI_REL_OFFSET r9,R9 140 CFI_REL_OFFSET r8,R8 141 CFI_REL_OFFSET rax,RAX 142 CFI_REL_OFFSET rcx,RCX 143 CFI_REL_OFFSET rdx,RDX 144 CFI_REL_OFFSET rsi,RSI 145 CFI_REL_OFFSET rdi,RDI 146 CFI_REL_OFFSET rip,RIP 147 /*CFI_REL_OFFSET cs,CS*/ 148 /*CFI_REL_OFFSET rflags,EFLAGS*/ 149 CFI_REL_OFFSET rsp,RSP 150 /*CFI_REL_OFFSET ss,SS*/ 151 .endm 152/* 153 * A newly forked process directly context switches into this. 154 */ 155/* rdi: prev */ 156ENTRY(ret_from_fork) 157 CFI_DEFAULT_STACK 158 push kernel_eflags(%rip) 159 CFI_ADJUST_CFA_OFFSET 4 160 popf # reset kernel eflags 161 CFI_ADJUST_CFA_OFFSET -4 162 call schedule_tail 163 GET_THREAD_INFO(%rcx) 164 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) 165 jnz rff_trace 166rff_action: 167 RESTORE_REST 168 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? 169 je int_ret_from_sys_call 170 testl $_TIF_IA32,threadinfo_flags(%rcx) 171 jnz int_ret_from_sys_call 172 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET 173 jmp ret_from_sys_call 174rff_trace: 175 movq %rsp,%rdi 176 call syscall_trace_leave 177 GET_THREAD_INFO(%rcx) 178 jmp rff_action 179 CFI_ENDPROC 180END(ret_from_fork) 181 182/* 183 * System call entry. Upto 6 arguments in registers are supported. 184 * 185 * SYSCALL does not save anything on the stack and does not change the 186 * stack pointer. 187 */ 188 189 190ENTRY(system_call) 191 CFI_STARTPROC simple 192 CFI_SIGNAL_FRAME 193 CFI_DEF_CFA rsp,PDA_STACKOFFSET 194 CFI_REGISTER rip,rcx 195 /*CFI_REGISTER rflags,r11*/ 196 swapgs 197 movq %rsp,%gs:pda_oldrsp 198 movq %gs:pda_kernelstack,%rsp 199 /* 200 * No need to follow this irqs off/on section - it's straight 201 * and short: 202 */ 203 sti 204 SAVE_ARGS 8,1 205 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 206 movq %rcx,RIP-ARGOFFSET(%rsp) 207 CFI_REL_OFFSET rip,RIP-ARGOFFSET 208 GET_THREAD_INFO(%rcx) 209 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) 210 jnz tracesys 211 cmpq $__NR_syscall_max,%rax 212 ja badsys 213 movq %r10,%rcx 214 call *sys_call_table(,%rax,8) # XXX: rip relative 215 movq %rax,RAX-ARGOFFSET(%rsp) 216/* 217 * Syscall return path ending with SYSRET (fast path) 218 * Has incomplete stack frame and undefined top of stack. 219 */ 220ret_from_sys_call: 221 movl $_TIF_ALLWORK_MASK,%edi 222 /* edi: flagmask */ 223sysret_check: 224 GET_THREAD_INFO(%rcx) 225 cli 226 TRACE_IRQS_OFF 227 movl threadinfo_flags(%rcx),%edx 228 andl %edi,%edx 229 jnz sysret_careful 230 CFI_REMEMBER_STATE 231 /* 232 * sysretq will re-enable interrupts: 233 */ 234 TRACE_IRQS_ON 235 movq RIP-ARGOFFSET(%rsp),%rcx 236 CFI_REGISTER rip,rcx 237 RESTORE_ARGS 0,-ARG_SKIP,1 238 /*CFI_REGISTER rflags,r11*/ 239 movq %gs:pda_oldrsp,%rsp 240 swapgs 241 sysretq 242 243 CFI_RESTORE_STATE 244 /* Handle reschedules */ 245 /* edx: work, edi: workmask */ 246sysret_careful: 247 bt $TIF_NEED_RESCHED,%edx 248 jnc sysret_signal 249 TRACE_IRQS_ON 250 sti 251 pushq %rdi 252 CFI_ADJUST_CFA_OFFSET 8 253 call schedule 254 popq %rdi 255 CFI_ADJUST_CFA_OFFSET -8 256 jmp sysret_check 257 258 /* Handle a signal */ 259sysret_signal: 260 TRACE_IRQS_ON 261 sti 262 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx 263 jz 1f 264 265 /* Really a signal */ 266 /* edx: work flags (arg3) */ 267 leaq do_notify_resume(%rip),%rax 268 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 269 xorl %esi,%esi # oldset -> arg2 270 call ptregscall_common 2711: movl $_TIF_NEED_RESCHED,%edi 272 /* Use IRET because user could have changed frame. This 273 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ 274 cli 275 TRACE_IRQS_OFF 276 jmp int_with_check 277 278badsys: 279 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 280 jmp ret_from_sys_call 281 282 /* Do syscall tracing */ 283tracesys: 284 SAVE_REST 285 movq $-ENOSYS,RAX(%rsp) 286 FIXUP_TOP_OF_STACK %rdi 287 movq %rsp,%rdi 288 call syscall_trace_enter 289 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ 290 RESTORE_REST 291 cmpq $__NR_syscall_max,%rax 292 movq $-ENOSYS,%rcx 293 cmova %rcx,%rax 294 ja 1f 295 movq %r10,%rcx /* fixup for C */ 296 call *sys_call_table(,%rax,8) 2971: movq %rax,RAX-ARGOFFSET(%rsp) 298 /* Use IRET because user could have changed frame */ 299 300/* 301 * Syscall return path ending with IRET. 302 * Has correct top of stack, but partial stack frame. 303 */ 304 .globl int_ret_from_sys_call 305int_ret_from_sys_call: 306 cli 307 TRACE_IRQS_OFF 308 testl $3,CS-ARGOFFSET(%rsp) 309 je retint_restore_args 310 movl $_TIF_ALLWORK_MASK,%edi 311 /* edi: mask to check */ 312int_with_check: 313 GET_THREAD_INFO(%rcx) 314 movl threadinfo_flags(%rcx),%edx 315 andl %edi,%edx 316 jnz int_careful 317 andl $~TS_COMPAT,threadinfo_status(%rcx) 318 jmp retint_swapgs 319 320 /* Either reschedule or signal or syscall exit tracking needed. */ 321 /* First do a reschedule test. */ 322 /* edx: work, edi: workmask */ 323int_careful: 324 bt $TIF_NEED_RESCHED,%edx 325 jnc int_very_careful 326 TRACE_IRQS_ON 327 sti 328 pushq %rdi 329 CFI_ADJUST_CFA_OFFSET 8 330 call schedule 331 popq %rdi 332 CFI_ADJUST_CFA_OFFSET -8 333 cli 334 TRACE_IRQS_OFF 335 jmp int_with_check 336 337 /* handle signals and tracing -- both require a full stack frame */ 338int_very_careful: 339 TRACE_IRQS_ON 340 sti 341 SAVE_REST 342 /* Check for syscall exit trace */ 343 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx 344 jz int_signal 345 pushq %rdi 346 CFI_ADJUST_CFA_OFFSET 8 347 leaq 8(%rsp),%rdi # &ptregs -> arg1 348 call syscall_trace_leave 349 popq %rdi 350 CFI_ADJUST_CFA_OFFSET -8 351 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi 352 jmp int_restore_rest 353 354int_signal: 355 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx 356 jz 1f 357 movq %rsp,%rdi # &ptregs -> arg1 358 xorl %esi,%esi # oldset -> arg2 359 call do_notify_resume 3601: movl $_TIF_NEED_RESCHED,%edi 361int_restore_rest: 362 RESTORE_REST 363 cli 364 TRACE_IRQS_OFF 365 jmp int_with_check 366 CFI_ENDPROC 367END(system_call) 368 369/* 370 * Certain special system calls that need to save a complete full stack frame. 371 */ 372 373 .macro PTREGSCALL label,func,arg 374 .globl \label 375\label: 376 leaq \func(%rip),%rax 377 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ 378 jmp ptregscall_common 379END(\label) 380 .endm 381 382 CFI_STARTPROC 383 384 PTREGSCALL stub_clone, sys_clone, %r8 385 PTREGSCALL stub_fork, sys_fork, %rdi 386 PTREGSCALL stub_vfork, sys_vfork, %rdi 387 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx 388 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx 389 PTREGSCALL stub_iopl, sys_iopl, %rsi 390 391ENTRY(ptregscall_common) 392 popq %r11 393 CFI_ADJUST_CFA_OFFSET -8 394 CFI_REGISTER rip, r11 395 SAVE_REST 396 movq %r11, %r15 397 CFI_REGISTER rip, r15 398 FIXUP_TOP_OF_STACK %r11 399 call *%rax 400 RESTORE_TOP_OF_STACK %r11 401 movq %r15, %r11 402 CFI_REGISTER rip, r11 403 RESTORE_REST 404 pushq %r11 405 CFI_ADJUST_CFA_OFFSET 8 406 CFI_REL_OFFSET rip, 0 407 ret 408 CFI_ENDPROC 409END(ptregscall_common) 410 411ENTRY(stub_execve) 412 CFI_STARTPROC 413 popq %r11 414 CFI_ADJUST_CFA_OFFSET -8 415 CFI_REGISTER rip, r11 416 SAVE_REST 417 FIXUP_TOP_OF_STACK %r11 418 call sys_execve 419 RESTORE_TOP_OF_STACK %r11 420 movq %rax,RAX(%rsp) 421 RESTORE_REST 422 jmp int_ret_from_sys_call 423 CFI_ENDPROC 424END(stub_execve) 425 426/* 427 * sigreturn is special because it needs to restore all registers on return. 428 * This cannot be done with SYSRET, so use the IRET return path instead. 429 */ 430ENTRY(stub_rt_sigreturn) 431 CFI_STARTPROC 432 addq $8, %rsp 433 CFI_ADJUST_CFA_OFFSET -8 434 SAVE_REST 435 movq %rsp,%rdi 436 FIXUP_TOP_OF_STACK %r11 437 call sys_rt_sigreturn 438 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer 439 RESTORE_REST 440 jmp int_ret_from_sys_call 441 CFI_ENDPROC 442END(stub_rt_sigreturn) 443 444/* 445 * initial frame state for interrupts and exceptions 446 */ 447 .macro _frame ref 448 CFI_STARTPROC simple 449 CFI_SIGNAL_FRAME 450 CFI_DEF_CFA rsp,SS+8-\ref 451 /*CFI_REL_OFFSET ss,SS-\ref*/ 452 CFI_REL_OFFSET rsp,RSP-\ref 453 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ 454 /*CFI_REL_OFFSET cs,CS-\ref*/ 455 CFI_REL_OFFSET rip,RIP-\ref 456 .endm 457 458/* initial frame state for interrupts (and exceptions without error code) */ 459#define INTR_FRAME _frame RIP 460/* initial frame state for exceptions with error code (and interrupts with 461 vector already pushed) */ 462#define XCPT_FRAME _frame ORIG_RAX 463 464/* 465 * Interrupt entry/exit. 466 * 467 * Interrupt entry points save only callee clobbered registers in fast path. 468 * 469 * Entry runs with interrupts off. 470 */ 471 472/* 0(%rsp): interrupt number */ 473 .macro interrupt func 474 cld 475 SAVE_ARGS 476 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler 477 pushq %rbp 478 CFI_ADJUST_CFA_OFFSET 8 479 CFI_REL_OFFSET rbp, 0 480 movq %rsp,%rbp 481 CFI_DEF_CFA_REGISTER rbp 482 testl $3,CS(%rdi) 483 je 1f 484 swapgs 485 /* irqcount is used to check if a CPU is already on an interrupt 486 stack or not. While this is essentially redundant with preempt_count 487 it is a little cheaper to use a separate counter in the PDA 488 (short of moving irq_enter into assembly, which would be too 489 much work) */ 4901: incl %gs:pda_irqcount 491 cmoveq %gs:pda_irqstackptr,%rsp 492 push %rbp # backlink for old unwinder 493 /* 494 * We entered an interrupt context - irqs are off: 495 */ 496 TRACE_IRQS_OFF 497 call \func 498 .endm 499 500ENTRY(common_interrupt) 501 XCPT_FRAME 502 interrupt do_IRQ 503 /* 0(%rsp): oldrsp-ARGOFFSET */ 504ret_from_intr: 505 cli 506 TRACE_IRQS_OFF 507 decl %gs:pda_irqcount 508 leaveq 509 CFI_DEF_CFA_REGISTER rsp 510 CFI_ADJUST_CFA_OFFSET -8 511exit_intr: 512 GET_THREAD_INFO(%rcx) 513 testl $3,CS-ARGOFFSET(%rsp) 514 je retint_kernel 515 516 /* Interrupt came from user space */ 517 /* 518 * Has a correct top of stack, but a partial stack frame 519 * %rcx: thread info. Interrupts off. 520 */ 521retint_with_reschedule: 522 movl $_TIF_WORK_MASK,%edi 523retint_check: 524 movl threadinfo_flags(%rcx),%edx 525 andl %edi,%edx 526 CFI_REMEMBER_STATE 527 jnz retint_careful 528retint_swapgs: 529 /* 530 * The iretq could re-enable interrupts: 531 */ 532 cli 533 TRACE_IRQS_IRETQ 534 swapgs 535 jmp restore_args 536 537retint_restore_args: 538 cli 539 /* 540 * The iretq could re-enable interrupts: 541 */ 542 TRACE_IRQS_IRETQ 543restore_args: 544 RESTORE_ARGS 0,8,0 545iret_label: 546 iretq 547 548 .section __ex_table,"a" 549 .quad iret_label,bad_iret 550 .previous 551 .section .fixup,"ax" 552 /* force a signal here? this matches i386 behaviour */ 553 /* running with kernel gs */ 554bad_iret: 555 movq $11,%rdi /* SIGSEGV */ 556 TRACE_IRQS_ON 557 sti 558 jmp do_exit 559 .previous 560 561 /* edi: workmask, edx: work */ 562retint_careful: 563 CFI_RESTORE_STATE 564 bt $TIF_NEED_RESCHED,%edx 565 jnc retint_signal 566 TRACE_IRQS_ON 567 sti 568 pushq %rdi 569 CFI_ADJUST_CFA_OFFSET 8 570 call schedule 571 popq %rdi 572 CFI_ADJUST_CFA_OFFSET -8 573 GET_THREAD_INFO(%rcx) 574 cli 575 TRACE_IRQS_OFF 576 jmp retint_check 577 578retint_signal: 579 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx 580 jz retint_swapgs 581 TRACE_IRQS_ON 582 sti 583 SAVE_REST 584 movq $-1,ORIG_RAX(%rsp) 585 xorl %esi,%esi # oldset 586 movq %rsp,%rdi # &pt_regs 587 call do_notify_resume 588 RESTORE_REST 589 cli 590 TRACE_IRQS_OFF 591 movl $_TIF_NEED_RESCHED,%edi 592 GET_THREAD_INFO(%rcx) 593 jmp retint_check 594 595#ifdef CONFIG_PREEMPT 596 /* Returning to kernel space. Check if we need preemption */ 597 /* rcx: threadinfo. interrupts off. */ 598ENTRY(retint_kernel) 599 cmpl $0,threadinfo_preempt_count(%rcx) 600 jnz retint_restore_args 601 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx) 602 jnc retint_restore_args 603 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ 604 jnc retint_restore_args 605 call preempt_schedule_irq 606 jmp exit_intr 607#endif 608 609 CFI_ENDPROC 610END(common_interrupt) 611 612/* 613 * APIC interrupts. 614 */ 615 .macro apicinterrupt num,func 616 INTR_FRAME 617 pushq $~(\num) 618 CFI_ADJUST_CFA_OFFSET 8 619 interrupt \func 620 jmp ret_from_intr 621 CFI_ENDPROC 622 .endm 623 624ENTRY(thermal_interrupt) 625 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt 626END(thermal_interrupt) 627 628ENTRY(threshold_interrupt) 629 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt 630END(threshold_interrupt) 631 632#ifdef CONFIG_SMP 633ENTRY(reschedule_interrupt) 634 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt 635END(reschedule_interrupt) 636 637 .macro INVALIDATE_ENTRY num 638ENTRY(invalidate_interrupt\num) 639 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt 640END(invalidate_interrupt\num) 641 .endm 642 643 INVALIDATE_ENTRY 0 644 INVALIDATE_ENTRY 1 645 INVALIDATE_ENTRY 2 646 INVALIDATE_ENTRY 3 647 INVALIDATE_ENTRY 4 648 INVALIDATE_ENTRY 5 649 INVALIDATE_ENTRY 6 650 INVALIDATE_ENTRY 7 651 652ENTRY(call_function_interrupt) 653 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt 654END(call_function_interrupt) 655ENTRY(irq_move_cleanup_interrupt) 656 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt 657END(irq_move_cleanup_interrupt) 658#endif 659 660ENTRY(apic_timer_interrupt) 661 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt 662END(apic_timer_interrupt) 663 664ENTRY(error_interrupt) 665 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt 666END(error_interrupt) 667 668ENTRY(spurious_interrupt) 669 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt 670END(spurious_interrupt) 671 672/* 673 * Exception entry points. 674 */ 675 .macro zeroentry sym 676 INTR_FRAME 677 pushq $0 /* push error code/oldrax */ 678 CFI_ADJUST_CFA_OFFSET 8 679 pushq %rax /* push real oldrax to the rdi slot */ 680 CFI_ADJUST_CFA_OFFSET 8 681 CFI_REL_OFFSET rax,0 682 leaq \sym(%rip),%rax 683 jmp error_entry 684 CFI_ENDPROC 685 .endm 686 687 .macro errorentry sym 688 XCPT_FRAME 689 pushq %rax 690 CFI_ADJUST_CFA_OFFSET 8 691 CFI_REL_OFFSET rax,0 692 leaq \sym(%rip),%rax 693 jmp error_entry 694 CFI_ENDPROC 695 .endm 696 697 /* error code is on the stack already */ 698 /* handle NMI like exceptions that can happen everywhere */ 699 .macro paranoidentry sym, ist=0, irqtrace=1 700 SAVE_ALL 701 cld 702 movl $1,%ebx 703 movl $MSR_GS_BASE,%ecx 704 rdmsr 705 testl %edx,%edx 706 js 1f 707 swapgs 708 xorl %ebx,%ebx 7091: 710 .if \ist 711 movq %gs:pda_data_offset, %rbp 712 .endif 713 movq %rsp,%rdi 714 movq ORIG_RAX(%rsp),%rsi 715 movq $-1,ORIG_RAX(%rsp) 716 .if \ist 717 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 718 .endif 719 call \sym 720 .if \ist 721 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 722 .endif 723 cli 724 .if \irqtrace 725 TRACE_IRQS_OFF 726 .endif 727 .endm 728 729 /* 730 * "Paranoid" exit path from exception stack. 731 * Paranoid because this is used by NMIs and cannot take 732 * any kernel state for granted. 733 * We don't do kernel preemption checks here, because only 734 * NMI should be common and it does not enable IRQs and 735 * cannot get reschedule ticks. 736 * 737 * "trace" is 0 for the NMI handler only, because irq-tracing 738 * is fundamentally NMI-unsafe. (we cannot change the soft and 739 * hard flags at once, atomically) 740 */ 741 .macro paranoidexit trace=1 742 /* ebx: no swapgs flag */ 743paranoid_exit\trace: 744 testl %ebx,%ebx /* swapgs needed? */ 745 jnz paranoid_restore\trace 746 testl $3,CS(%rsp) 747 jnz paranoid_userspace\trace 748paranoid_swapgs\trace: 749 .if \trace 750 TRACE_IRQS_IRETQ 0 751 .endif 752 swapgs 753paranoid_restore\trace: 754 RESTORE_ALL 8 755 iretq 756paranoid_userspace\trace: 757 GET_THREAD_INFO(%rcx) 758 movl threadinfo_flags(%rcx),%ebx 759 andl $_TIF_WORK_MASK,%ebx 760 jz paranoid_swapgs\trace 761 movq %rsp,%rdi /* &pt_regs */ 762 call sync_regs 763 movq %rax,%rsp /* switch stack for scheduling */ 764 testl $_TIF_NEED_RESCHED,%ebx 765 jnz paranoid_schedule\trace 766 movl %ebx,%edx /* arg3: thread flags */ 767 .if \trace 768 TRACE_IRQS_ON 769 .endif 770 sti 771 xorl %esi,%esi /* arg2: oldset */ 772 movq %rsp,%rdi /* arg1: &pt_regs */ 773 call do_notify_resume 774 cli 775 .if \trace 776 TRACE_IRQS_OFF 777 .endif 778 jmp paranoid_userspace\trace 779paranoid_schedule\trace: 780 .if \trace 781 TRACE_IRQS_ON 782 .endif 783 sti 784 call schedule 785 cli 786 .if \trace 787 TRACE_IRQS_OFF 788 .endif 789 jmp paranoid_userspace\trace 790 CFI_ENDPROC 791 .endm 792 793/* 794 * Exception entry point. This expects an error code/orig_rax on the stack 795 * and the exception handler in %rax. 796 */ 797KPROBE_ENTRY(error_entry) 798 _frame RDI 799 CFI_REL_OFFSET rax,0 800 /* rdi slot contains rax, oldrax contains error code */ 801 cld 802 subq $14*8,%rsp 803 CFI_ADJUST_CFA_OFFSET (14*8) 804 movq %rsi,13*8(%rsp) 805 CFI_REL_OFFSET rsi,RSI 806 movq 14*8(%rsp),%rsi /* load rax from rdi slot */ 807 CFI_REGISTER rax,rsi 808 movq %rdx,12*8(%rsp) 809 CFI_REL_OFFSET rdx,RDX 810 movq %rcx,11*8(%rsp) 811 CFI_REL_OFFSET rcx,RCX 812 movq %rsi,10*8(%rsp) /* store rax */ 813 CFI_REL_OFFSET rax,RAX 814 movq %r8, 9*8(%rsp) 815 CFI_REL_OFFSET r8,R8 816 movq %r9, 8*8(%rsp) 817 CFI_REL_OFFSET r9,R9 818 movq %r10,7*8(%rsp) 819 CFI_REL_OFFSET r10,R10 820 movq %r11,6*8(%rsp) 821 CFI_REL_OFFSET r11,R11 822 movq %rbx,5*8(%rsp) 823 CFI_REL_OFFSET rbx,RBX 824 movq %rbp,4*8(%rsp) 825 CFI_REL_OFFSET rbp,RBP 826 movq %r12,3*8(%rsp) 827 CFI_REL_OFFSET r12,R12 828 movq %r13,2*8(%rsp) 829 CFI_REL_OFFSET r13,R13 830 movq %r14,1*8(%rsp) 831 CFI_REL_OFFSET r14,R14 832 movq %r15,(%rsp) 833 CFI_REL_OFFSET r15,R15 834 xorl %ebx,%ebx 835 testl $3,CS(%rsp) 836 je error_kernelspace 837error_swapgs: 838 swapgs 839error_sti: 840 movq %rdi,RDI(%rsp) 841 CFI_REL_OFFSET rdi,RDI 842 movq %rsp,%rdi 843 movq ORIG_RAX(%rsp),%rsi /* get error code */ 844 movq $-1,ORIG_RAX(%rsp) 845 call *%rax 846 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ 847error_exit: 848 movl %ebx,%eax 849 RESTORE_REST 850 cli 851 TRACE_IRQS_OFF 852 GET_THREAD_INFO(%rcx) 853 testl %eax,%eax 854 jne retint_kernel 855 movl threadinfo_flags(%rcx),%edx 856 movl $_TIF_WORK_MASK,%edi 857 andl %edi,%edx 858 jnz retint_careful 859 /* 860 * The iret might restore flags: 861 */ 862 TRACE_IRQS_IRETQ 863 swapgs 864 RESTORE_ARGS 0,8,0 865 jmp iret_label 866 CFI_ENDPROC 867 868error_kernelspace: 869 incl %ebx 870 /* There are two places in the kernel that can potentially fault with 871 usergs. Handle them here. The exception handlers after 872 iret run with kernel gs again, so don't set the user space flag. 873 B stepping K8s sometimes report an truncated RIP for IRET 874 exceptions returning to compat mode. Check for these here too. */ 875 leaq iret_label(%rip),%rbp 876 cmpq %rbp,RIP(%rsp) 877 je error_swapgs 878 movl %ebp,%ebp /* zero extend */ 879 cmpq %rbp,RIP(%rsp) 880 je error_swapgs 881 cmpq $gs_change,RIP(%rsp) 882 je error_swapgs 883 jmp error_sti 884KPROBE_END(error_entry) 885 886 /* Reload gs selector with exception handling */ 887 /* edi: new selector */ 888ENTRY(load_gs_index) 889 CFI_STARTPROC 890 pushf 891 CFI_ADJUST_CFA_OFFSET 8 892 cli 893 swapgs 894gs_change: 895 movl %edi,%gs 8962: mfence 897 swapgs 898 popf 899 CFI_ADJUST_CFA_OFFSET -8 900 ret 901 CFI_ENDPROC 902ENDPROC(load_gs_index) 903 904 .section __ex_table,"a" 905 .align 8 906 .quad gs_change,bad_gs 907 .previous 908 .section .fixup,"ax" 909 /* running with kernelgs */ 910bad_gs: 911 swapgs /* switch back to user gs */ 912 xorl %eax,%eax 913 movl %eax,%gs 914 jmp 2b 915 .previous 916 917/* 918 * Create a kernel thread. 919 * 920 * C extern interface: 921 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) 922 * 923 * asm input arguments: 924 * rdi: fn, rsi: arg, rdx: flags 925 */ 926ENTRY(kernel_thread) 927 CFI_STARTPROC 928 FAKE_STACK_FRAME $child_rip 929 SAVE_ALL 930 931 # rdi: flags, rsi: usp, rdx: will be &pt_regs 932 movq %rdx,%rdi 933 orq kernel_thread_flags(%rip),%rdi 934 movq $-1, %rsi 935 movq %rsp, %rdx 936 937 xorl %r8d,%r8d 938 xorl %r9d,%r9d 939 940 # clone now 941 call do_fork 942 movq %rax,RAX(%rsp) 943 xorl %edi,%edi 944 945 /* 946 * It isn't worth to check for reschedule here, 947 * so internally to the x86_64 port you can rely on kernel_thread() 948 * not to reschedule the child before returning, this avoids the need 949 * of hacks for example to fork off the per-CPU idle tasks. 950 * [Hopefully no generic code relies on the reschedule -AK] 951 */ 952 RESTORE_ALL 953 UNFAKE_STACK_FRAME 954 ret 955 CFI_ENDPROC 956ENDPROC(kernel_thread) 957 958child_rip: 959 pushq $0 # fake return address 960 CFI_STARTPROC 961 /* 962 * Here we are in the child and the registers are set as they were 963 * at kernel_thread() invocation in the parent. 964 */ 965 movq %rdi, %rax 966 movq %rsi, %rdi 967 call *%rax 968 # exit 969 xorl %edi, %edi 970 call do_exit 971 CFI_ENDPROC 972ENDPROC(child_rip) 973 974/* 975 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. 976 * 977 * C extern interface: 978 * extern long execve(char *name, char **argv, char **envp) 979 * 980 * asm input arguments: 981 * rdi: name, rsi: argv, rdx: envp 982 * 983 * We want to fallback into: 984 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs) 985 * 986 * do_sys_execve asm fallback arguments: 987 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack 988 */ 989ENTRY(kernel_execve) 990 CFI_STARTPROC 991 FAKE_STACK_FRAME $0 992 SAVE_ALL 993 call sys_execve 994 movq %rax, RAX(%rsp) 995 RESTORE_REST 996 testq %rax,%rax 997 je int_ret_from_sys_call 998 RESTORE_ARGS 999 UNFAKE_STACK_FRAME 1000 ret 1001 CFI_ENDPROC 1002ENDPROC(kernel_execve) 1003 1004KPROBE_ENTRY(page_fault) 1005 errorentry do_page_fault 1006KPROBE_END(page_fault) 1007 1008ENTRY(coprocessor_error) 1009 zeroentry do_coprocessor_error 1010END(coprocessor_error) 1011 1012ENTRY(simd_coprocessor_error) 1013 zeroentry do_simd_coprocessor_error 1014END(simd_coprocessor_error) 1015 1016ENTRY(device_not_available) 1017 zeroentry math_state_restore 1018END(device_not_available) 1019 1020 /* runs on exception stack */ 1021KPROBE_ENTRY(debug) 1022 INTR_FRAME 1023 pushq $0 1024 CFI_ADJUST_CFA_OFFSET 8 1025 paranoidentry do_debug, DEBUG_STACK 1026 paranoidexit 1027KPROBE_END(debug) 1028 1029 /* runs on exception stack */ 1030KPROBE_ENTRY(nmi) 1031 INTR_FRAME 1032 pushq $-1 1033 CFI_ADJUST_CFA_OFFSET 8 1034 paranoidentry do_nmi, 0, 0 1035#ifdef CONFIG_TRACE_IRQFLAGS 1036 paranoidexit 0 1037#else 1038 jmp paranoid_exit1 1039 CFI_ENDPROC 1040#endif 1041KPROBE_END(nmi) 1042 1043KPROBE_ENTRY(int3) 1044 INTR_FRAME 1045 pushq $0 1046 CFI_ADJUST_CFA_OFFSET 8 1047 paranoidentry do_int3, DEBUG_STACK 1048 jmp paranoid_exit1 1049 CFI_ENDPROC 1050KPROBE_END(int3) 1051 1052ENTRY(overflow) 1053 zeroentry do_overflow 1054END(overflow) 1055 1056ENTRY(bounds) 1057 zeroentry do_bounds 1058END(bounds) 1059 1060ENTRY(invalid_op) 1061 zeroentry do_invalid_op 1062END(invalid_op) 1063 1064ENTRY(coprocessor_segment_overrun) 1065 zeroentry do_coprocessor_segment_overrun 1066END(coprocessor_segment_overrun) 1067 1068ENTRY(reserved) 1069 zeroentry do_reserved 1070END(reserved) 1071 1072 /* runs on exception stack */ 1073ENTRY(double_fault) 1074 XCPT_FRAME 1075 paranoidentry do_double_fault 1076 jmp paranoid_exit1 1077 CFI_ENDPROC 1078END(double_fault) 1079 1080ENTRY(invalid_TSS) 1081 errorentry do_invalid_TSS 1082END(invalid_TSS) 1083 1084ENTRY(segment_not_present) 1085 errorentry do_segment_not_present 1086END(segment_not_present) 1087 1088 /* runs on exception stack */ 1089ENTRY(stack_segment) 1090 XCPT_FRAME 1091 paranoidentry do_stack_segment 1092 jmp paranoid_exit1 1093 CFI_ENDPROC 1094END(stack_segment) 1095 1096KPROBE_ENTRY(general_protection) 1097 errorentry do_general_protection 1098KPROBE_END(general_protection) 1099 1100ENTRY(alignment_check) 1101 errorentry do_alignment_check 1102END(alignment_check) 1103 1104ENTRY(divide_error) 1105 zeroentry do_divide_error 1106END(divide_error) 1107 1108ENTRY(spurious_interrupt_bug) 1109 zeroentry do_spurious_interrupt_bug 1110END(spurious_interrupt_bug) 1111 1112#ifdef CONFIG_X86_MCE 1113 /* runs on exception stack */ 1114ENTRY(machine_check) 1115 INTR_FRAME 1116 pushq $0 1117 CFI_ADJUST_CFA_OFFSET 8 1118 paranoidentry do_machine_check 1119 jmp paranoid_exit1 1120 CFI_ENDPROC 1121END(machine_check) 1122#endif 1123 1124/* Call softirq on interrupt stack. Interrupts are off. */ 1125ENTRY(call_softirq) 1126 CFI_STARTPROC 1127 push %rbp 1128 CFI_ADJUST_CFA_OFFSET 8 1129 CFI_REL_OFFSET rbp,0 1130 mov %rsp,%rbp 1131 CFI_DEF_CFA_REGISTER rbp 1132 incl %gs:pda_irqcount 1133 cmove %gs:pda_irqstackptr,%rsp 1134 push %rbp # backlink for old unwinder 1135 call __do_softirq 1136 leaveq 1137 CFI_DEF_CFA_REGISTER rsp 1138 CFI_ADJUST_CFA_OFFSET -8 1139 decl %gs:pda_irqcount 1140 ret 1141 CFI_ENDPROC 1142ENDPROC(call_softirq) 1143 1144KPROBE_ENTRY(ignore_sysret) 1145 CFI_STARTPROC 1146 mov $-ENOSYS,%eax 1147 sysret 1148 CFI_ENDPROC 1149ENDPROC(ignore_sysret) 1150