1/* 2 * linux/arch/x86_64/entry.S 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs 6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 7 */ 8 9/* 10 * entry.S contains the system-call and fault low-level handling routines. 11 * 12 * NOTE: This code handles signal-recognition, which happens every time 13 * after an interrupt and after each system call. 14 * 15 * Normal syscalls and interrupts don't save a full stack frame, this is 16 * only done for syscall tracing, signals or fork/exec et.al. 17 * 18 * A note on terminology: 19 * - top of stack: Architecture defined interrupt frame from SS to RIP 20 * at the top of the kernel process stack. 21 * - partial stack frame: partially saved registers upto R11. 22 * - full stack frame: Like partial stack frame, but all register saved. 23 * 24 * Some macro usage: 25 * - CFI macros are used to generate dwarf2 unwind information for better 26 * backtraces. They don't change any code. 27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers 28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. 29 * There are unfortunately lots of special cases where some registers 30 * not touched. The macro is a big mess that should be cleaned up. 31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. 32 * Gives a full stack frame. 33 * - ENTRY/END Define functions in the symbol table. 34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack 35 * frame that is otherwise undefined after a SYSCALL 36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. 37 * - errorentry/paranoidentry/zeroentry - Define exception entry points. 38 */ 39 40#include <linux/linkage.h> 41#include <asm/segment.h> 42#include <asm/cache.h> 43#include <asm/errno.h> 44#include <asm/dwarf2.h> 45#include <asm/calling.h> 46#include <asm/asm-offsets.h> 47#include <asm/msr.h> 48#include <asm/unistd.h> 49#include <asm/thread_info.h> 50#include <asm/hw_irq.h> 51#include <asm/page_types.h> 52#include <asm/irqflags.h> 53#include <asm/paravirt.h> 54#include <asm/ftrace.h> 55#include <asm/percpu.h> 56 57/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 58#include <linux/elf-em.h> 59#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) 60#define __AUDIT_ARCH_64BIT 0x80000000 61#define __AUDIT_ARCH_LE 0x40000000 62 63 .code64 64#ifdef CONFIG_FUNCTION_TRACER 65#ifdef CONFIG_DYNAMIC_FTRACE 66ENTRY(mcount) 67 retq 68END(mcount) 69 70ENTRY(ftrace_caller) 71 cmpl $0, function_trace_stop 72 jne ftrace_stub 73 74 MCOUNT_SAVE_FRAME 75 76 movq 0x38(%rsp), %rdi 77 movq 8(%rbp), %rsi 78 subq $MCOUNT_INSN_SIZE, %rdi 79 80GLOBAL(ftrace_call) 81 call ftrace_stub 82 83 MCOUNT_RESTORE_FRAME 84 85#ifdef CONFIG_FUNCTION_GRAPH_TRACER 86GLOBAL(ftrace_graph_call) 87 jmp ftrace_stub 88#endif 89 90GLOBAL(ftrace_stub) 91 retq 92END(ftrace_caller) 93 94#else /* ! CONFIG_DYNAMIC_FTRACE */ 95ENTRY(mcount) 96 cmpl $0, function_trace_stop 97 jne ftrace_stub 98 99 cmpq $ftrace_stub, ftrace_trace_function 100 jnz trace 101 102#ifdef CONFIG_FUNCTION_GRAPH_TRACER 103 cmpq $ftrace_stub, ftrace_graph_return 104 jnz ftrace_graph_caller 105 106 cmpq $ftrace_graph_entry_stub, ftrace_graph_entry 107 jnz ftrace_graph_caller 108#endif 109 110GLOBAL(ftrace_stub) 111 retq 112 113trace: 114 MCOUNT_SAVE_FRAME 115 116 movq 0x38(%rsp), %rdi 117 movq 8(%rbp), %rsi 118 subq $MCOUNT_INSN_SIZE, %rdi 119 120 call *ftrace_trace_function 121 122 MCOUNT_RESTORE_FRAME 123 124 jmp ftrace_stub 125END(mcount) 126#endif /* CONFIG_DYNAMIC_FTRACE */ 127#endif /* CONFIG_FUNCTION_TRACER */ 128 129#ifdef CONFIG_FUNCTION_GRAPH_TRACER 130ENTRY(ftrace_graph_caller) 131 cmpl $0, function_trace_stop 132 jne ftrace_stub 133 134 MCOUNT_SAVE_FRAME 135 136 leaq 8(%rbp), %rdi 137 movq 0x38(%rsp), %rsi 138 movq (%rbp), %rdx 139 subq $MCOUNT_INSN_SIZE, %rsi 140 141 call prepare_ftrace_return 142 143 MCOUNT_RESTORE_FRAME 144 145 retq 146END(ftrace_graph_caller) 147 148GLOBAL(return_to_handler) 149 subq $24, %rsp 150 151 /* Save the return values */ 152 movq %rax, (%rsp) 153 movq %rdx, 8(%rsp) 154 movq %rbp, %rdi 155 156 call ftrace_return_to_handler 157 158 movq %rax, %rdi 159 movq 8(%rsp), %rdx 160 movq (%rsp), %rax 161 addq $24, %rsp 162 jmp *%rdi 163#endif 164 165 166#ifndef CONFIG_PREEMPT 167#define retint_kernel retint_restore_args 168#endif 169 170#ifdef CONFIG_PARAVIRT 171ENTRY(native_usergs_sysret64) 172 swapgs 173 sysretq 174ENDPROC(native_usergs_sysret64) 175#endif /* CONFIG_PARAVIRT */ 176 177 178.macro TRACE_IRQS_IRETQ offset=ARGOFFSET 179#ifdef CONFIG_TRACE_IRQFLAGS 180 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ 181 jnc 1f 182 TRACE_IRQS_ON 1831: 184#endif 185.endm 186 187/* 188 * C code is not supposed to know about undefined top of stack. Every time 189 * a C function with an pt_regs argument is called from the SYSCALL based 190 * fast path FIXUP_TOP_OF_STACK is needed. 191 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs 192 * manipulation. 193 */ 194 195 /* %rsp:at FRAMEEND */ 196 .macro FIXUP_TOP_OF_STACK tmp offset=0 197 movq PER_CPU_VAR(old_rsp),\tmp 198 movq \tmp,RSP+\offset(%rsp) 199 movq $__USER_DS,SS+\offset(%rsp) 200 movq $__USER_CS,CS+\offset(%rsp) 201 movq $-1,RCX+\offset(%rsp) 202 movq R11+\offset(%rsp),\tmp /* get eflags */ 203 movq \tmp,EFLAGS+\offset(%rsp) 204 .endm 205 206 .macro RESTORE_TOP_OF_STACK tmp offset=0 207 movq RSP+\offset(%rsp),\tmp 208 movq \tmp,PER_CPU_VAR(old_rsp) 209 movq EFLAGS+\offset(%rsp),\tmp 210 movq \tmp,R11+\offset(%rsp) 211 .endm 212 213 .macro FAKE_STACK_FRAME child_rip 214 /* push in order ss, rsp, eflags, cs, rip */ 215 xorl %eax, %eax 216 pushq $__KERNEL_DS /* ss */ 217 CFI_ADJUST_CFA_OFFSET 8 218 /*CFI_REL_OFFSET ss,0*/ 219 pushq %rax /* rsp */ 220 CFI_ADJUST_CFA_OFFSET 8 221 CFI_REL_OFFSET rsp,0 222 pushq $X86_EFLAGS_IF /* eflags - interrupts on */ 223 CFI_ADJUST_CFA_OFFSET 8 224 /*CFI_REL_OFFSET rflags,0*/ 225 pushq $__KERNEL_CS /* cs */ 226 CFI_ADJUST_CFA_OFFSET 8 227 /*CFI_REL_OFFSET cs,0*/ 228 pushq \child_rip /* rip */ 229 CFI_ADJUST_CFA_OFFSET 8 230 CFI_REL_OFFSET rip,0 231 pushq %rax /* orig rax */ 232 CFI_ADJUST_CFA_OFFSET 8 233 .endm 234 235 .macro UNFAKE_STACK_FRAME 236 addq $8*6, %rsp 237 CFI_ADJUST_CFA_OFFSET -(6*8) 238 .endm 239 240/* 241 * initial frame state for interrupts (and exceptions without error code) 242 */ 243 .macro EMPTY_FRAME start=1 offset=0 244 .if \start 245 CFI_STARTPROC simple 246 CFI_SIGNAL_FRAME 247 CFI_DEF_CFA rsp,8+\offset 248 .else 249 CFI_DEF_CFA_OFFSET 8+\offset 250 .endif 251 .endm 252 253/* 254 * initial frame state for interrupts (and exceptions without error code) 255 */ 256 .macro INTR_FRAME start=1 offset=0 257 EMPTY_FRAME \start, SS+8+\offset-RIP 258 /*CFI_REL_OFFSET ss, SS+\offset-RIP*/ 259 CFI_REL_OFFSET rsp, RSP+\offset-RIP 260 /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/ 261 /*CFI_REL_OFFSET cs, CS+\offset-RIP*/ 262 CFI_REL_OFFSET rip, RIP+\offset-RIP 263 .endm 264 265/* 266 * initial frame state for exceptions with error code (and interrupts 267 * with vector already pushed) 268 */ 269 .macro XCPT_FRAME start=1 offset=0 270 INTR_FRAME \start, RIP+\offset-ORIG_RAX 271 /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/ 272 .endm 273 274/* 275 * frame that enables calling into C. 276 */ 277 .macro PARTIAL_FRAME start=1 offset=0 278 XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET 279 CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET 280 CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET 281 CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET 282 CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET 283 CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET 284 CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET 285 CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET 286 CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET 287 CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET 288 .endm 289 290/* 291 * frame that enables passing a complete pt_regs to a C function. 292 */ 293 .macro DEFAULT_FRAME start=1 offset=0 294 PARTIAL_FRAME \start, R11+\offset-R15 295 CFI_REL_OFFSET rbx, RBX+\offset 296 CFI_REL_OFFSET rbp, RBP+\offset 297 CFI_REL_OFFSET r12, R12+\offset 298 CFI_REL_OFFSET r13, R13+\offset 299 CFI_REL_OFFSET r14, R14+\offset 300 CFI_REL_OFFSET r15, R15+\offset 301 .endm 302 303/* save partial stack frame */ 304ENTRY(save_args) 305 XCPT_FRAME 306 cld 307 movq_cfi rdi, RDI+16-ARGOFFSET 308 movq_cfi rsi, RSI+16-ARGOFFSET 309 movq_cfi rdx, RDX+16-ARGOFFSET 310 movq_cfi rcx, RCX+16-ARGOFFSET 311 movq_cfi rax, RAX+16-ARGOFFSET 312 movq_cfi r8, R8+16-ARGOFFSET 313 movq_cfi r9, R9+16-ARGOFFSET 314 movq_cfi r10, R10+16-ARGOFFSET 315 movq_cfi r11, R11+16-ARGOFFSET 316 317 leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ 318 movq_cfi rbp, 8 /* push %rbp */ 319 leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ 320 testl $3, CS(%rdi) 321 je 1f 322 SWAPGS 323 /* 324 * irq_count is used to check if a CPU is already on an interrupt stack 325 * or not. While this is essentially redundant with preempt_count it is 326 * a little cheaper to use a separate counter in the PDA (short of 327 * moving irq_enter into assembly, which would be too much work) 328 */ 3291: incl PER_CPU_VAR(irq_count) 330 jne 2f 331 popq_cfi %rax /* move return address... */ 332 mov PER_CPU_VAR(irq_stack_ptr),%rsp 333 EMPTY_FRAME 0 334 pushq_cfi %rbp /* backlink for unwinder */ 335 pushq_cfi %rax /* ... to the new stack */ 336 /* 337 * We entered an interrupt context - irqs are off: 338 */ 3392: TRACE_IRQS_OFF 340 ret 341 CFI_ENDPROC 342END(save_args) 343 344ENTRY(save_rest) 345 PARTIAL_FRAME 1 REST_SKIP+8 346 movq 5*8+16(%rsp), %r11 /* save return address */ 347 movq_cfi rbx, RBX+16 348 movq_cfi rbp, RBP+16 349 movq_cfi r12, R12+16 350 movq_cfi r13, R13+16 351 movq_cfi r14, R14+16 352 movq_cfi r15, R15+16 353 movq %r11, 8(%rsp) /* return address */ 354 FIXUP_TOP_OF_STACK %r11, 16 355 ret 356 CFI_ENDPROC 357END(save_rest) 358 359/* save complete stack frame */ 360 .pushsection .kprobes.text, "ax" 361ENTRY(save_paranoid) 362 XCPT_FRAME 1 RDI+8 363 cld 364 movq_cfi rdi, RDI+8 365 movq_cfi rsi, RSI+8 366 movq_cfi rdx, RDX+8 367 movq_cfi rcx, RCX+8 368 movq_cfi rax, RAX+8 369 movq_cfi r8, R8+8 370 movq_cfi r9, R9+8 371 movq_cfi r10, R10+8 372 movq_cfi r11, R11+8 373 movq_cfi rbx, RBX+8 374 movq_cfi rbp, RBP+8 375 movq_cfi r12, R12+8 376 movq_cfi r13, R13+8 377 movq_cfi r14, R14+8 378 movq_cfi r15, R15+8 379 movl $1,%ebx 380 movl $MSR_GS_BASE,%ecx 381 rdmsr 382 testl %edx,%edx 383 js 1f /* negative -> in kernel */ 384 SWAPGS 385 xorl %ebx,%ebx 3861: ret 387 CFI_ENDPROC 388END(save_paranoid) 389 .popsection 390 391/* 392 * A newly forked process directly context switches into this address. 393 * 394 * rdi: prev task we switched from 395 */ 396ENTRY(ret_from_fork) 397 DEFAULT_FRAME 398 399 LOCK ; btr $TIF_FORK,TI_flags(%r8) 400 401 push kernel_eflags(%rip) 402 CFI_ADJUST_CFA_OFFSET 8 403 popf # reset kernel eflags 404 CFI_ADJUST_CFA_OFFSET -8 405 406 call schedule_tail # rdi: 'prev' task parameter 407 408 GET_THREAD_INFO(%rcx) 409 410 RESTORE_REST 411 412 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? 413 je int_ret_from_sys_call 414 415 testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET 416 jnz int_ret_from_sys_call 417 418 RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET 419 jmp ret_from_sys_call # go to the SYSRET fastpath 420 421 CFI_ENDPROC 422END(ret_from_fork) 423 424/* 425 * System call entry. Upto 6 arguments in registers are supported. 426 * 427 * SYSCALL does not save anything on the stack and does not change the 428 * stack pointer. 429 */ 430 431 432ENTRY(system_call) 433 CFI_STARTPROC simple 434 CFI_SIGNAL_FRAME 435 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET 436 CFI_REGISTER rip,rcx 437 /*CFI_REGISTER rflags,r11*/ 438 SWAPGS_UNSAFE_STACK 439 /* 440 * A hypervisor implementation might want to use a label 441 * after the swapgs, so that it can do the swapgs 442 * for the guest and jump here on syscall. 443 */ 444ENTRY(system_call_after_swapgs) 445 446 movq %rsp,PER_CPU_VAR(old_rsp) 447 movq PER_CPU_VAR(kernel_stack),%rsp 448 /* 449 * No need to follow this irqs off/on section - it's straight 450 * and short: 451 */ 452 ENABLE_INTERRUPTS(CLBR_NONE) 453 SAVE_ARGS 8,1 454 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 455 movq %rcx,RIP-ARGOFFSET(%rsp) 456 CFI_REL_OFFSET rip,RIP-ARGOFFSET 457 GET_THREAD_INFO(%rcx) 458 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) 459 jnz tracesys 460system_call_fastpath: 461 cmpq $__NR_syscall_max,%rax 462 ja badsys 463 movq %r10,%rcx 464 call *sys_call_table(,%rax,8) # XXX: rip relative 465 movq %rax,RAX-ARGOFFSET(%rsp) 466/* 467 * Syscall return path ending with SYSRET (fast path) 468 * Has incomplete stack frame and undefined top of stack. 469 */ 470ret_from_sys_call: 471 movl $_TIF_ALLWORK_MASK,%edi 472 /* edi: flagmask */ 473sysret_check: 474 LOCKDEP_SYS_EXIT 475 GET_THREAD_INFO(%rcx) 476 DISABLE_INTERRUPTS(CLBR_NONE) 477 TRACE_IRQS_OFF 478 movl TI_flags(%rcx),%edx 479 andl %edi,%edx 480 jnz sysret_careful 481 CFI_REMEMBER_STATE 482 /* 483 * sysretq will re-enable interrupts: 484 */ 485 TRACE_IRQS_ON 486 movq RIP-ARGOFFSET(%rsp),%rcx 487 CFI_REGISTER rip,rcx 488 RESTORE_ARGS 0,-ARG_SKIP,1 489 /*CFI_REGISTER rflags,r11*/ 490 movq PER_CPU_VAR(old_rsp), %rsp 491 USERGS_SYSRET64 492 493 CFI_RESTORE_STATE 494 /* Handle reschedules */ 495 /* edx: work, edi: workmask */ 496sysret_careful: 497 bt $TIF_NEED_RESCHED,%edx 498 jnc sysret_signal 499 TRACE_IRQS_ON 500 ENABLE_INTERRUPTS(CLBR_NONE) 501 pushq %rdi 502 CFI_ADJUST_CFA_OFFSET 8 503 call schedule 504 popq %rdi 505 CFI_ADJUST_CFA_OFFSET -8 506 jmp sysret_check 507 508 /* Handle a signal */ 509sysret_signal: 510 TRACE_IRQS_ON 511 ENABLE_INTERRUPTS(CLBR_NONE) 512#ifdef CONFIG_AUDITSYSCALL 513 bt $TIF_SYSCALL_AUDIT,%edx 514 jc sysret_audit 515#endif 516 /* 517 * We have a signal, or exit tracing or single-step. 518 * These all wind up with the iret return path anyway, 519 * so just join that path right now. 520 */ 521 FIXUP_TOP_OF_STACK %r11, -ARGOFFSET 522 jmp int_check_syscall_exit_work 523 524badsys: 525 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 526 jmp ret_from_sys_call 527 528#ifdef CONFIG_AUDITSYSCALL 529 /* 530 * Fast path for syscall audit without full syscall trace. 531 * We just call audit_syscall_entry() directly, and then 532 * jump back to the normal fast path. 533 */ 534auditsys: 535 movq %r10,%r9 /* 6th arg: 4th syscall arg */ 536 movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ 537 movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ 538 movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ 539 movq %rax,%rsi /* 2nd arg: syscall number */ 540 movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ 541 call audit_syscall_entry 542 LOAD_ARGS 0 /* reload call-clobbered registers */ 543 jmp system_call_fastpath 544 545 /* 546 * Return fast path for syscall audit. Call audit_syscall_exit() 547 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT 548 * masked off. 549 */ 550sysret_audit: 551 movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ 552 cmpq $0,%rsi /* is it < 0? */ 553 setl %al /* 1 if so, 0 if not */ 554 movzbl %al,%edi /* zero-extend that into %edi */ 555 inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ 556 call audit_syscall_exit 557 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi 558 jmp sysret_check 559#endif /* CONFIG_AUDITSYSCALL */ 560 561 /* Do syscall tracing */ 562tracesys: 563#ifdef CONFIG_AUDITSYSCALL 564 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) 565 jz auditsys 566#endif 567 SAVE_REST 568 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ 569 FIXUP_TOP_OF_STACK %rdi 570 movq %rsp,%rdi 571 call syscall_trace_enter 572 /* 573 * Reload arg registers from stack in case ptrace changed them. 574 * We don't reload %rax because syscall_trace_enter() returned 575 * the value it wants us to use in the table lookup. 576 */ 577 LOAD_ARGS ARGOFFSET, 1 578 RESTORE_REST 579 cmpq $__NR_syscall_max,%rax 580 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ 581 movq %r10,%rcx /* fixup for C */ 582 call *sys_call_table(,%rax,8) 583 movq %rax,RAX-ARGOFFSET(%rsp) 584 /* Use IRET because user could have changed frame */ 585 586/* 587 * Syscall return path ending with IRET. 588 * Has correct top of stack, but partial stack frame. 589 */ 590GLOBAL(int_ret_from_sys_call) 591 DISABLE_INTERRUPTS(CLBR_NONE) 592 TRACE_IRQS_OFF 593 testl $3,CS-ARGOFFSET(%rsp) 594 je retint_restore_args 595 movl $_TIF_ALLWORK_MASK,%edi 596 /* edi: mask to check */ 597GLOBAL(int_with_check) 598 LOCKDEP_SYS_EXIT_IRQ 599 GET_THREAD_INFO(%rcx) 600 movl TI_flags(%rcx),%edx 601 andl %edi,%edx 602 jnz int_careful 603 andl $~TS_COMPAT,TI_status(%rcx) 604 jmp retint_swapgs 605 606 /* Either reschedule or signal or syscall exit tracking needed. */ 607 /* First do a reschedule test. */ 608 /* edx: work, edi: workmask */ 609int_careful: 610 bt $TIF_NEED_RESCHED,%edx 611 jnc int_very_careful 612 TRACE_IRQS_ON 613 ENABLE_INTERRUPTS(CLBR_NONE) 614 pushq %rdi 615 CFI_ADJUST_CFA_OFFSET 8 616 call schedule 617 popq %rdi 618 CFI_ADJUST_CFA_OFFSET -8 619 DISABLE_INTERRUPTS(CLBR_NONE) 620 TRACE_IRQS_OFF 621 jmp int_with_check 622 623 /* handle signals and tracing -- both require a full stack frame */ 624int_very_careful: 625 TRACE_IRQS_ON 626 ENABLE_INTERRUPTS(CLBR_NONE) 627int_check_syscall_exit_work: 628 SAVE_REST 629 /* Check for syscall exit trace */ 630 testl $_TIF_WORK_SYSCALL_EXIT,%edx 631 jz int_signal 632 pushq %rdi 633 CFI_ADJUST_CFA_OFFSET 8 634 leaq 8(%rsp),%rdi # &ptregs -> arg1 635 call syscall_trace_leave 636 popq %rdi 637 CFI_ADJUST_CFA_OFFSET -8 638 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi 639 jmp int_restore_rest 640 641int_signal: 642 testl $_TIF_DO_NOTIFY_MASK,%edx 643 jz 1f 644 movq %rsp,%rdi # &ptregs -> arg1 645 xorl %esi,%esi # oldset -> arg2 646 call do_notify_resume 6471: movl $_TIF_WORK_MASK,%edi 648int_restore_rest: 649 RESTORE_REST 650 DISABLE_INTERRUPTS(CLBR_NONE) 651 TRACE_IRQS_OFF 652 jmp int_with_check 653 CFI_ENDPROC 654END(system_call) 655 656/* 657 * Certain special system calls that need to save a complete full stack frame. 658 */ 659 .macro PTREGSCALL label,func,arg 660ENTRY(\label) 661 PARTIAL_FRAME 1 8 /* offset 8: return address */ 662 subq $REST_SKIP, %rsp 663 CFI_ADJUST_CFA_OFFSET REST_SKIP 664 call save_rest 665 DEFAULT_FRAME 0 8 /* offset 8: return address */ 666 leaq 8(%rsp), \arg /* pt_regs pointer */ 667 call \func 668 jmp ptregscall_common 669 CFI_ENDPROC 670END(\label) 671 .endm 672 673 PTREGSCALL stub_clone, sys_clone, %r8 674 PTREGSCALL stub_fork, sys_fork, %rdi 675 PTREGSCALL stub_vfork, sys_vfork, %rdi 676 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx 677 PTREGSCALL stub_iopl, sys_iopl, %rsi 678 679ENTRY(ptregscall_common) 680 DEFAULT_FRAME 1 8 /* offset 8: return address */ 681 RESTORE_TOP_OF_STACK %r11, 8 682 movq_cfi_restore R15+8, r15 683 movq_cfi_restore R14+8, r14 684 movq_cfi_restore R13+8, r13 685 movq_cfi_restore R12+8, r12 686 movq_cfi_restore RBP+8, rbp 687 movq_cfi_restore RBX+8, rbx 688 ret $REST_SKIP /* pop extended registers */ 689 CFI_ENDPROC 690END(ptregscall_common) 691 692ENTRY(stub_execve) 693 CFI_STARTPROC 694 popq %r11 695 CFI_ADJUST_CFA_OFFSET -8 696 CFI_REGISTER rip, r11 697 SAVE_REST 698 FIXUP_TOP_OF_STACK %r11 699 movq %rsp, %rcx 700 call sys_execve 701 RESTORE_TOP_OF_STACK %r11 702 movq %rax,RAX(%rsp) 703 RESTORE_REST 704 jmp int_ret_from_sys_call 705 CFI_ENDPROC 706END(stub_execve) 707 708/* 709 * sigreturn is special because it needs to restore all registers on return. 710 * This cannot be done with SYSRET, so use the IRET return path instead. 711 */ 712ENTRY(stub_rt_sigreturn) 713 CFI_STARTPROC 714 addq $8, %rsp 715 CFI_ADJUST_CFA_OFFSET -8 716 SAVE_REST 717 movq %rsp,%rdi 718 FIXUP_TOP_OF_STACK %r11 719 call sys_rt_sigreturn 720 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer 721 RESTORE_REST 722 jmp int_ret_from_sys_call 723 CFI_ENDPROC 724END(stub_rt_sigreturn) 725 726/* 727 * Build the entry stubs and pointer table with some assembler magic. 728 * We pack 7 stubs into a single 32-byte chunk, which will fit in a 729 * single cache line on all modern x86 implementations. 730 */ 731 .section .init.rodata,"a" 732ENTRY(interrupt) 733 .text 734 .p2align 5 735 .p2align CONFIG_X86_L1_CACHE_SHIFT 736ENTRY(irq_entries_start) 737 INTR_FRAME 738vector=FIRST_EXTERNAL_VECTOR 739.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 740 .balign 32 741 .rept 7 742 .if vector < NR_VECTORS 743 .if vector <> FIRST_EXTERNAL_VECTOR 744 CFI_ADJUST_CFA_OFFSET -8 745 .endif 7461: pushq $(~vector+0x80) /* Note: always in signed byte range */ 747 CFI_ADJUST_CFA_OFFSET 8 748 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 749 jmp 2f 750 .endif 751 .previous 752 .quad 1b 753 .text 754vector=vector+1 755 .endif 756 .endr 7572: jmp common_interrupt 758.endr 759 CFI_ENDPROC 760END(irq_entries_start) 761 762.previous 763END(interrupt) 764.previous 765 766/* 767 * Interrupt entry/exit. 768 * 769 * Interrupt entry points save only callee clobbered registers in fast path. 770 * 771 * Entry runs with interrupts off. 772 */ 773 774/* 0(%rsp): ~(interrupt number) */ 775 .macro interrupt func 776 subq $10*8, %rsp 777 CFI_ADJUST_CFA_OFFSET 10*8 778 call save_args 779 PARTIAL_FRAME 0 780 call \func 781 .endm 782 783/* 784 * Interrupt entry/exit should be protected against kprobes 785 */ 786 .pushsection .kprobes.text, "ax" 787 /* 788 * The interrupt stubs push (~vector+0x80) onto the stack and 789 * then jump to common_interrupt. 790 */ 791 .p2align CONFIG_X86_L1_CACHE_SHIFT 792common_interrupt: 793 XCPT_FRAME 794 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ 795 interrupt do_IRQ 796 /* 0(%rsp): old_rsp-ARGOFFSET */ 797ret_from_intr: 798 DISABLE_INTERRUPTS(CLBR_NONE) 799 TRACE_IRQS_OFF 800 decl PER_CPU_VAR(irq_count) 801 leaveq 802 CFI_DEF_CFA_REGISTER rsp 803 CFI_ADJUST_CFA_OFFSET -8 804exit_intr: 805 GET_THREAD_INFO(%rcx) 806 testl $3,CS-ARGOFFSET(%rsp) 807 je retint_kernel 808 809 /* Interrupt came from user space */ 810 /* 811 * Has a correct top of stack, but a partial stack frame 812 * %rcx: thread info. Interrupts off. 813 */ 814retint_with_reschedule: 815 movl $_TIF_WORK_MASK,%edi 816retint_check: 817 LOCKDEP_SYS_EXIT_IRQ 818 movl TI_flags(%rcx),%edx 819 andl %edi,%edx 820 CFI_REMEMBER_STATE 821 jnz retint_careful 822 823retint_swapgs: /* return to user-space */ 824 /* 825 * The iretq could re-enable interrupts: 826 */ 827 DISABLE_INTERRUPTS(CLBR_ANY) 828 TRACE_IRQS_IRETQ 829 SWAPGS 830 jmp restore_args 831 832retint_restore_args: /* return to kernel space */ 833 DISABLE_INTERRUPTS(CLBR_ANY) 834 /* 835 * The iretq could re-enable interrupts: 836 */ 837 TRACE_IRQS_IRETQ 838restore_args: 839 RESTORE_ARGS 0,8,0 840 841irq_return: 842 INTERRUPT_RETURN 843 844 .section __ex_table, "a" 845 .quad irq_return, bad_iret 846 .previous 847 848#ifdef CONFIG_PARAVIRT 849ENTRY(native_iret) 850 iretq 851 852 .section __ex_table,"a" 853 .quad native_iret, bad_iret 854 .previous 855#endif 856 857 .section .fixup,"ax" 858bad_iret: 859 /* 860 * The iret traps when the %cs or %ss being restored is bogus. 861 * We've lost the original trap vector and error code. 862 * #GPF is the most likely one to get for an invalid selector. 863 * So pretend we completed the iret and took the #GPF in user mode. 864 * 865 * We are now running with the kernel GS after exception recovery. 866 * But error_entry expects us to have user GS to match the user %cs, 867 * so swap back. 868 */ 869 pushq $0 870 871 SWAPGS 872 jmp general_protection 873 874 .previous 875 876 /* edi: workmask, edx: work */ 877retint_careful: 878 CFI_RESTORE_STATE 879 bt $TIF_NEED_RESCHED,%edx 880 jnc retint_signal 881 TRACE_IRQS_ON 882 ENABLE_INTERRUPTS(CLBR_NONE) 883 pushq %rdi 884 CFI_ADJUST_CFA_OFFSET 8 885 call schedule 886 popq %rdi 887 CFI_ADJUST_CFA_OFFSET -8 888 GET_THREAD_INFO(%rcx) 889 DISABLE_INTERRUPTS(CLBR_NONE) 890 TRACE_IRQS_OFF 891 jmp retint_check 892 893retint_signal: 894 testl $_TIF_DO_NOTIFY_MASK,%edx 895 jz retint_swapgs 896 TRACE_IRQS_ON 897 ENABLE_INTERRUPTS(CLBR_NONE) 898 SAVE_REST 899 movq $-1,ORIG_RAX(%rsp) 900 xorl %esi,%esi # oldset 901 movq %rsp,%rdi # &pt_regs 902 call do_notify_resume 903 RESTORE_REST 904 DISABLE_INTERRUPTS(CLBR_NONE) 905 TRACE_IRQS_OFF 906 GET_THREAD_INFO(%rcx) 907 jmp retint_with_reschedule 908 909#ifdef CONFIG_PREEMPT 910 /* Returning to kernel space. Check if we need preemption */ 911 /* rcx: threadinfo. interrupts off. */ 912ENTRY(retint_kernel) 913 cmpl $0,TI_preempt_count(%rcx) 914 jnz retint_restore_args 915 bt $TIF_NEED_RESCHED,TI_flags(%rcx) 916 jnc retint_restore_args 917 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ 918 jnc retint_restore_args 919 call preempt_schedule_irq 920 jmp exit_intr 921#endif 922 923 CFI_ENDPROC 924END(common_interrupt) 925/* 926 * End of kprobes section 927 */ 928 .popsection 929 930/* 931 * APIC interrupts. 932 */ 933.macro apicinterrupt num sym do_sym 934ENTRY(\sym) 935 INTR_FRAME 936 pushq $~(\num) 937 CFI_ADJUST_CFA_OFFSET 8 938 interrupt \do_sym 939 jmp ret_from_intr 940 CFI_ENDPROC 941END(\sym) 942.endm 943 944#ifdef CONFIG_SMP 945apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ 946 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt 947apicinterrupt REBOOT_VECTOR \ 948 reboot_interrupt smp_reboot_interrupt 949#endif 950 951#ifdef CONFIG_X86_UV 952apicinterrupt UV_BAU_MESSAGE \ 953 uv_bau_message_intr1 uv_bau_message_interrupt 954#endif 955apicinterrupt LOCAL_TIMER_VECTOR \ 956 apic_timer_interrupt smp_apic_timer_interrupt 957apicinterrupt X86_PLATFORM_IPI_VECTOR \ 958 x86_platform_ipi smp_x86_platform_ipi 959 960#ifdef CONFIG_SMP 961apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ 962 invalidate_interrupt0 smp_invalidate_interrupt 963apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \ 964 invalidate_interrupt1 smp_invalidate_interrupt 965apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \ 966 invalidate_interrupt2 smp_invalidate_interrupt 967apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \ 968 invalidate_interrupt3 smp_invalidate_interrupt 969apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \ 970 invalidate_interrupt4 smp_invalidate_interrupt 971apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \ 972 invalidate_interrupt5 smp_invalidate_interrupt 973apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \ 974 invalidate_interrupt6 smp_invalidate_interrupt 975apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \ 976 invalidate_interrupt7 smp_invalidate_interrupt 977#endif 978 979apicinterrupt THRESHOLD_APIC_VECTOR \ 980 threshold_interrupt smp_threshold_interrupt 981apicinterrupt THERMAL_APIC_VECTOR \ 982 thermal_interrupt smp_thermal_interrupt 983 984#ifdef CONFIG_X86_MCE 985apicinterrupt MCE_SELF_VECTOR \ 986 mce_self_interrupt smp_mce_self_interrupt 987#endif 988 989#ifdef CONFIG_SMP 990apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ 991 call_function_single_interrupt smp_call_function_single_interrupt 992apicinterrupt CALL_FUNCTION_VECTOR \ 993 call_function_interrupt smp_call_function_interrupt 994apicinterrupt RESCHEDULE_VECTOR \ 995 reschedule_interrupt smp_reschedule_interrupt 996#endif 997 998apicinterrupt ERROR_APIC_VECTOR \ 999 error_interrupt smp_error_interrupt 1000apicinterrupt SPURIOUS_APIC_VECTOR \ 1001 spurious_interrupt smp_spurious_interrupt 1002 1003#ifdef CONFIG_PERF_EVENTS 1004apicinterrupt LOCAL_PENDING_VECTOR \ 1005 perf_pending_interrupt smp_perf_pending_interrupt 1006#endif 1007 1008/* 1009 * Exception entry points. 1010 */ 1011.macro zeroentry sym do_sym 1012ENTRY(\sym) 1013 INTR_FRAME 1014 PARAVIRT_ADJUST_EXCEPTION_FRAME 1015 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1016 subq $15*8,%rsp 1017 CFI_ADJUST_CFA_OFFSET 15*8 1018 call error_entry 1019 DEFAULT_FRAME 0 1020 movq %rsp,%rdi /* pt_regs pointer */ 1021 xorl %esi,%esi /* no error code */ 1022 call \do_sym 1023 jmp error_exit /* %ebx: no swapgs flag */ 1024 CFI_ENDPROC 1025END(\sym) 1026.endm 1027 1028.macro paranoidzeroentry sym do_sym 1029ENTRY(\sym) 1030 INTR_FRAME 1031 PARAVIRT_ADJUST_EXCEPTION_FRAME 1032 pushq $-1 /* ORIG_RAX: no syscall to restart */ 1033 CFI_ADJUST_CFA_OFFSET 8 1034 subq $15*8, %rsp 1035 call save_paranoid 1036 TRACE_IRQS_OFF 1037 movq %rsp,%rdi /* pt_regs pointer */ 1038 xorl %esi,%esi /* no error code */ 1039 call \do_sym 1040 jmp paranoid_exit /* %ebx: no swapgs flag */ 1041 CFI_ENDPROC 1042END(\sym) 1043.endm 1044 1045#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) 1046.macro paranoidzeroentry_ist sym do_sym ist 1047ENTRY(\sym) 1048 INTR_FRAME 1049 PARAVIRT_ADJUST_EXCEPTION_FRAME 1050 pushq $-1 /* ORIG_RAX: no syscall to restart */ 1051 CFI_ADJUST_CFA_OFFSET 8 1052 subq $15*8, %rsp 1053 call save_paranoid 1054 TRACE_IRQS_OFF 1055 movq %rsp,%rdi /* pt_regs pointer */ 1056 xorl %esi,%esi /* no error code */ 1057 subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) 1058 call \do_sym 1059 addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) 1060 jmp paranoid_exit /* %ebx: no swapgs flag */ 1061 CFI_ENDPROC 1062END(\sym) 1063.endm 1064 1065.macro errorentry sym do_sym 1066ENTRY(\sym) 1067 XCPT_FRAME 1068 PARAVIRT_ADJUST_EXCEPTION_FRAME 1069 subq $15*8,%rsp 1070 CFI_ADJUST_CFA_OFFSET 15*8 1071 call error_entry 1072 DEFAULT_FRAME 0 1073 movq %rsp,%rdi /* pt_regs pointer */ 1074 movq ORIG_RAX(%rsp),%rsi /* get error code */ 1075 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ 1076 call \do_sym 1077 jmp error_exit /* %ebx: no swapgs flag */ 1078 CFI_ENDPROC 1079END(\sym) 1080.endm 1081 1082 /* error code is on the stack already */ 1083.macro paranoiderrorentry sym do_sym 1084ENTRY(\sym) 1085 XCPT_FRAME 1086 PARAVIRT_ADJUST_EXCEPTION_FRAME 1087 subq $15*8,%rsp 1088 CFI_ADJUST_CFA_OFFSET 15*8 1089 call save_paranoid 1090 DEFAULT_FRAME 0 1091 TRACE_IRQS_OFF 1092 movq %rsp,%rdi /* pt_regs pointer */ 1093 movq ORIG_RAX(%rsp),%rsi /* get error code */ 1094 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ 1095 call \do_sym 1096 jmp paranoid_exit /* %ebx: no swapgs flag */ 1097 CFI_ENDPROC 1098END(\sym) 1099.endm 1100 1101zeroentry divide_error do_divide_error 1102zeroentry overflow do_overflow 1103zeroentry bounds do_bounds 1104zeroentry invalid_op do_invalid_op 1105zeroentry device_not_available do_device_not_available 1106paranoiderrorentry double_fault do_double_fault 1107zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun 1108errorentry invalid_TSS do_invalid_TSS 1109errorentry segment_not_present do_segment_not_present 1110zeroentry spurious_interrupt_bug do_spurious_interrupt_bug 1111zeroentry coprocessor_error do_coprocessor_error 1112errorentry alignment_check do_alignment_check 1113zeroentry simd_coprocessor_error do_simd_coprocessor_error 1114 1115 /* Reload gs selector with exception handling */ 1116 /* edi: new selector */ 1117ENTRY(native_load_gs_index) 1118 CFI_STARTPROC 1119 pushf 1120 CFI_ADJUST_CFA_OFFSET 8 1121 DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) 1122 SWAPGS 1123gs_change: 1124 movl %edi,%gs 11252: mfence 1126 SWAPGS 1127 popf 1128 CFI_ADJUST_CFA_OFFSET -8 1129 ret 1130 CFI_ENDPROC 1131END(native_load_gs_index) 1132 1133 .section __ex_table,"a" 1134 .align 8 1135 .quad gs_change,bad_gs 1136 .previous 1137 .section .fixup,"ax" 1138 /* running with kernelgs */ 1139bad_gs: 1140 SWAPGS /* switch back to user gs */ 1141 xorl %eax,%eax 1142 movl %eax,%gs 1143 jmp 2b 1144 .previous 1145 1146ENTRY(kernel_thread_helper) 1147 pushq $0 # fake return address 1148 CFI_STARTPROC 1149 /* 1150 * Here we are in the child and the registers are set as they were 1151 * at kernel_thread() invocation in the parent. 1152 */ 1153 call *%rsi 1154 # exit 1155 mov %eax, %edi 1156 call do_exit 1157 ud2 # padding for call trace 1158 CFI_ENDPROC 1159END(kernel_thread_helper) 1160 1161/* 1162 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. 1163 * 1164 * C extern interface: 1165 * extern long execve(const char *name, char **argv, char **envp) 1166 * 1167 * asm input arguments: 1168 * rdi: name, rsi: argv, rdx: envp 1169 * 1170 * We want to fallback into: 1171 * extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs) 1172 * 1173 * do_sys_execve asm fallback arguments: 1174 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack 1175 */ 1176ENTRY(kernel_execve) 1177 CFI_STARTPROC 1178 FAKE_STACK_FRAME $0 1179 SAVE_ALL 1180 movq %rsp,%rcx 1181 call sys_execve 1182 movq %rax, RAX(%rsp) 1183 RESTORE_REST 1184 testq %rax,%rax 1185 je int_ret_from_sys_call 1186 RESTORE_ARGS 1187 UNFAKE_STACK_FRAME 1188 ret 1189 CFI_ENDPROC 1190END(kernel_execve) 1191 1192/* Call softirq on interrupt stack. Interrupts are off. */ 1193ENTRY(call_softirq) 1194 CFI_STARTPROC 1195 push %rbp 1196 CFI_ADJUST_CFA_OFFSET 8 1197 CFI_REL_OFFSET rbp,0 1198 mov %rsp,%rbp 1199 CFI_DEF_CFA_REGISTER rbp 1200 incl PER_CPU_VAR(irq_count) 1201 cmove PER_CPU_VAR(irq_stack_ptr),%rsp 1202 push %rbp # backlink for old unwinder 1203 call __do_softirq 1204 leaveq 1205 CFI_DEF_CFA_REGISTER rsp 1206 CFI_ADJUST_CFA_OFFSET -8 1207 decl PER_CPU_VAR(irq_count) 1208 ret 1209 CFI_ENDPROC 1210END(call_softirq) 1211 1212#ifdef CONFIG_XEN 1213zeroentry xen_hypervisor_callback xen_do_hypervisor_callback 1214 1215/* 1216 * A note on the "critical region" in our callback handler. 1217 * We want to avoid stacking callback handlers due to events occurring 1218 * during handling of the last event. To do this, we keep events disabled 1219 * until we've done all processing. HOWEVER, we must enable events before 1220 * popping the stack frame (can't be done atomically) and so it would still 1221 * be possible to get enough handler activations to overflow the stack. 1222 * Although unlikely, bugs of that kind are hard to track down, so we'd 1223 * like to avoid the possibility. 1224 * So, on entry to the handler we detect whether we interrupted an 1225 * existing activation in its critical region -- if so, we pop the current 1226 * activation and restart the handler using the previous one. 1227 */ 1228ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) 1229 CFI_STARTPROC 1230/* 1231 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will 1232 * see the correct pointer to the pt_regs 1233 */ 1234 movq %rdi, %rsp # we don't return, adjust the stack frame 1235 CFI_ENDPROC 1236 DEFAULT_FRAME 123711: incl PER_CPU_VAR(irq_count) 1238 movq %rsp,%rbp 1239 CFI_DEF_CFA_REGISTER rbp 1240 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp 1241 pushq %rbp # backlink for old unwinder 1242 call xen_evtchn_do_upcall 1243 popq %rsp 1244 CFI_DEF_CFA_REGISTER rsp 1245 decl PER_CPU_VAR(irq_count) 1246 jmp error_exit 1247 CFI_ENDPROC 1248END(do_hypervisor_callback) 1249 1250/* 1251 * Hypervisor uses this for application faults while it executes. 1252 * We get here for two reasons: 1253 * 1. Fault while reloading DS, ES, FS or GS 1254 * 2. Fault while executing IRET 1255 * Category 1 we do not need to fix up as Xen has already reloaded all segment 1256 * registers that could be reloaded and zeroed the others. 1257 * Category 2 we fix up by killing the current process. We cannot use the 1258 * normal Linux return path in this case because if we use the IRET hypercall 1259 * to pop the stack frame we end up in an infinite loop of failsafe callbacks. 1260 * We distinguish between categories by comparing each saved segment register 1261 * with its current contents: any discrepancy means we in category 1. 1262 */ 1263ENTRY(xen_failsafe_callback) 1264 INTR_FRAME 1 (6*8) 1265 /*CFI_REL_OFFSET gs,GS*/ 1266 /*CFI_REL_OFFSET fs,FS*/ 1267 /*CFI_REL_OFFSET es,ES*/ 1268 /*CFI_REL_OFFSET ds,DS*/ 1269 CFI_REL_OFFSET r11,8 1270 CFI_REL_OFFSET rcx,0 1271 movw %ds,%cx 1272 cmpw %cx,0x10(%rsp) 1273 CFI_REMEMBER_STATE 1274 jne 1f 1275 movw %es,%cx 1276 cmpw %cx,0x18(%rsp) 1277 jne 1f 1278 movw %fs,%cx 1279 cmpw %cx,0x20(%rsp) 1280 jne 1f 1281 movw %gs,%cx 1282 cmpw %cx,0x28(%rsp) 1283 jne 1f 1284 /* All segments match their saved values => Category 2 (Bad IRET). */ 1285 movq (%rsp),%rcx 1286 CFI_RESTORE rcx 1287 movq 8(%rsp),%r11 1288 CFI_RESTORE r11 1289 addq $0x30,%rsp 1290 CFI_ADJUST_CFA_OFFSET -0x30 1291 pushq_cfi $0 /* RIP */ 1292 pushq_cfi %r11 1293 pushq_cfi %rcx 1294 jmp general_protection 1295 CFI_RESTORE_STATE 12961: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ 1297 movq (%rsp),%rcx 1298 CFI_RESTORE rcx 1299 movq 8(%rsp),%r11 1300 CFI_RESTORE r11 1301 addq $0x30,%rsp 1302 CFI_ADJUST_CFA_OFFSET -0x30 1303 pushq_cfi $0 1304 SAVE_ALL 1305 jmp error_exit 1306 CFI_ENDPROC 1307END(xen_failsafe_callback) 1308 1309apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ 1310 xen_hvm_callback_vector xen_evtchn_do_upcall 1311 1312#endif /* CONFIG_XEN */ 1313 1314/* 1315 * Some functions should be protected against kprobes 1316 */ 1317 .pushsection .kprobes.text, "ax" 1318 1319paranoidzeroentry_ist debug do_debug DEBUG_STACK 1320paranoidzeroentry_ist int3 do_int3 DEBUG_STACK 1321paranoiderrorentry stack_segment do_stack_segment 1322#ifdef CONFIG_XEN 1323zeroentry xen_debug do_debug 1324zeroentry xen_int3 do_int3 1325errorentry xen_stack_segment do_stack_segment 1326#endif 1327errorentry general_protection do_general_protection 1328errorentry page_fault do_page_fault 1329#ifdef CONFIG_X86_MCE 1330paranoidzeroentry machine_check *machine_check_vector(%rip) 1331#endif 1332 1333 /* 1334 * "Paranoid" exit path from exception stack. 1335 * Paranoid because this is used by NMIs and cannot take 1336 * any kernel state for granted. 1337 * We don't do kernel preemption checks here, because only 1338 * NMI should be common and it does not enable IRQs and 1339 * cannot get reschedule ticks. 1340 * 1341 * "trace" is 0 for the NMI handler only, because irq-tracing 1342 * is fundamentally NMI-unsafe. (we cannot change the soft and 1343 * hard flags at once, atomically) 1344 */ 1345 1346 /* ebx: no swapgs flag */ 1347ENTRY(paranoid_exit) 1348 INTR_FRAME 1349 DISABLE_INTERRUPTS(CLBR_NONE) 1350 TRACE_IRQS_OFF 1351 testl %ebx,%ebx /* swapgs needed? */ 1352 jnz paranoid_restore 1353 testl $3,CS(%rsp) 1354 jnz paranoid_userspace 1355paranoid_swapgs: 1356 TRACE_IRQS_IRETQ 0 1357 SWAPGS_UNSAFE_STACK 1358 RESTORE_ALL 8 1359 jmp irq_return 1360paranoid_restore: 1361 TRACE_IRQS_IRETQ 0 1362 RESTORE_ALL 8 1363 jmp irq_return 1364paranoid_userspace: 1365 GET_THREAD_INFO(%rcx) 1366 movl TI_flags(%rcx),%ebx 1367 andl $_TIF_WORK_MASK,%ebx 1368 jz paranoid_swapgs 1369 movq %rsp,%rdi /* &pt_regs */ 1370 call sync_regs 1371 movq %rax,%rsp /* switch stack for scheduling */ 1372 testl $_TIF_NEED_RESCHED,%ebx 1373 jnz paranoid_schedule 1374 movl %ebx,%edx /* arg3: thread flags */ 1375 TRACE_IRQS_ON 1376 ENABLE_INTERRUPTS(CLBR_NONE) 1377 xorl %esi,%esi /* arg2: oldset */ 1378 movq %rsp,%rdi /* arg1: &pt_regs */ 1379 call do_notify_resume 1380 DISABLE_INTERRUPTS(CLBR_NONE) 1381 TRACE_IRQS_OFF 1382 jmp paranoid_userspace 1383paranoid_schedule: 1384 TRACE_IRQS_ON 1385 ENABLE_INTERRUPTS(CLBR_ANY) 1386 call schedule 1387 DISABLE_INTERRUPTS(CLBR_ANY) 1388 TRACE_IRQS_OFF 1389 jmp paranoid_userspace 1390 CFI_ENDPROC 1391END(paranoid_exit) 1392 1393/* 1394 * Exception entry point. This expects an error code/orig_rax on the stack. 1395 * returns in "no swapgs flag" in %ebx. 1396 */ 1397ENTRY(error_entry) 1398 XCPT_FRAME 1399 CFI_ADJUST_CFA_OFFSET 15*8 1400 /* oldrax contains error code */ 1401 cld 1402 movq_cfi rdi, RDI+8 1403 movq_cfi rsi, RSI+8 1404 movq_cfi rdx, RDX+8 1405 movq_cfi rcx, RCX+8 1406 movq_cfi rax, RAX+8 1407 movq_cfi r8, R8+8 1408 movq_cfi r9, R9+8 1409 movq_cfi r10, R10+8 1410 movq_cfi r11, R11+8 1411 movq_cfi rbx, RBX+8 1412 movq_cfi rbp, RBP+8 1413 movq_cfi r12, R12+8 1414 movq_cfi r13, R13+8 1415 movq_cfi r14, R14+8 1416 movq_cfi r15, R15+8 1417 xorl %ebx,%ebx 1418 testl $3,CS+8(%rsp) 1419 je error_kernelspace 1420error_swapgs: 1421 SWAPGS 1422error_sti: 1423 TRACE_IRQS_OFF 1424 ret 1425 CFI_ENDPROC 1426 1427/* 1428 * There are two places in the kernel that can potentially fault with 1429 * usergs. Handle them here. The exception handlers after iret run with 1430 * kernel gs again, so don't set the user space flag. B stepping K8s 1431 * sometimes report an truncated RIP for IRET exceptions returning to 1432 * compat mode. Check for these here too. 1433 */ 1434error_kernelspace: 1435 incl %ebx 1436 leaq irq_return(%rip),%rcx 1437 cmpq %rcx,RIP+8(%rsp) 1438 je error_swapgs 1439 movl %ecx,%eax /* zero extend */ 1440 cmpq %rax,RIP+8(%rsp) 1441 je bstep_iret 1442 cmpq $gs_change,RIP+8(%rsp) 1443 je error_swapgs 1444 jmp error_sti 1445 1446bstep_iret: 1447 /* Fix truncated RIP */ 1448 movq %rcx,RIP+8(%rsp) 1449 jmp error_swapgs 1450END(error_entry) 1451 1452 1453/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ 1454ENTRY(error_exit) 1455 DEFAULT_FRAME 1456 movl %ebx,%eax 1457 RESTORE_REST 1458 DISABLE_INTERRUPTS(CLBR_NONE) 1459 TRACE_IRQS_OFF 1460 GET_THREAD_INFO(%rcx) 1461 testl %eax,%eax 1462 jne retint_kernel 1463 LOCKDEP_SYS_EXIT_IRQ 1464 movl TI_flags(%rcx),%edx 1465 movl $_TIF_WORK_MASK,%edi 1466 andl %edi,%edx 1467 jnz retint_careful 1468 jmp retint_swapgs 1469 CFI_ENDPROC 1470END(error_exit) 1471 1472 1473 /* runs on exception stack */ 1474ENTRY(nmi) 1475 INTR_FRAME 1476 PARAVIRT_ADJUST_EXCEPTION_FRAME 1477 pushq_cfi $-1 1478 subq $15*8, %rsp 1479 CFI_ADJUST_CFA_OFFSET 15*8 1480 call save_paranoid 1481 DEFAULT_FRAME 0 1482 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ 1483 movq %rsp,%rdi 1484 movq $-1,%rsi 1485 call do_nmi 1486#ifdef CONFIG_TRACE_IRQFLAGS 1487 /* paranoidexit; without TRACE_IRQS_OFF */ 1488 /* ebx: no swapgs flag */ 1489 DISABLE_INTERRUPTS(CLBR_NONE) 1490 testl %ebx,%ebx /* swapgs needed? */ 1491 jnz nmi_restore 1492 testl $3,CS(%rsp) 1493 jnz nmi_userspace 1494nmi_swapgs: 1495 SWAPGS_UNSAFE_STACK 1496nmi_restore: 1497 RESTORE_ALL 8 1498 jmp irq_return 1499nmi_userspace: 1500 GET_THREAD_INFO(%rcx) 1501 movl TI_flags(%rcx),%ebx 1502 andl $_TIF_WORK_MASK,%ebx 1503 jz nmi_swapgs 1504 movq %rsp,%rdi /* &pt_regs */ 1505 call sync_regs 1506 movq %rax,%rsp /* switch stack for scheduling */ 1507 testl $_TIF_NEED_RESCHED,%ebx 1508 jnz nmi_schedule 1509 movl %ebx,%edx /* arg3: thread flags */ 1510 ENABLE_INTERRUPTS(CLBR_NONE) 1511 xorl %esi,%esi /* arg2: oldset */ 1512 movq %rsp,%rdi /* arg1: &pt_regs */ 1513 call do_notify_resume 1514 DISABLE_INTERRUPTS(CLBR_NONE) 1515 jmp nmi_userspace 1516nmi_schedule: 1517 ENABLE_INTERRUPTS(CLBR_ANY) 1518 call schedule 1519 DISABLE_INTERRUPTS(CLBR_ANY) 1520 jmp nmi_userspace 1521 CFI_ENDPROC 1522#else 1523 jmp paranoid_exit 1524 CFI_ENDPROC 1525#endif 1526END(nmi) 1527 1528ENTRY(ignore_sysret) 1529 CFI_STARTPROC 1530 mov $-ENOSYS,%eax 1531 sysret 1532 CFI_ENDPROC 1533END(ignore_sysret) 1534 1535/* 1536 * End of kprobes section 1537 */ 1538 .popsection 1539