locore.S revision 1.113
1/* $OpenBSD: locore.S,v 1.113 2019/01/24 00:00:50 deraadt Exp $ */ 2/* $NetBSD: locore.S,v 1.13 2004/03/25 18:33:17 drochner Exp $ */ 3 4/* 5 * Copyright-o-rama! 6 */ 7 8/* 9 * Copyright (c) 2001 Wasabi Systems, Inc. 10 * All rights reserved. 11 * 12 * Written by Frank van der Linden for Wasabi Systems, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed for the NetBSD Project by 25 * Wasabi Systems, Inc. 26 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 27 * or promote products derived from this software without specific prior 28 * written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 32 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 33 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 34 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 37 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 38 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 40 * POSSIBILITY OF SUCH DAMAGE. 41 */ 42 43 44/*- 45 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. 46 * All rights reserved. 47 * 48 * This code is derived from software contributed to The NetBSD Foundation 49 * by Charles M. Hannum. 50 * 51 * Redistribution and use in source and binary forms, with or without 52 * modification, are permitted provided that the following conditions 53 * are met: 54 * 1. Redistributions of source code must retain the above copyright 55 * notice, this list of conditions and the following disclaimer. 56 * 2. Redistributions in binary form must reproduce the above copyright 57 * notice, this list of conditions and the following disclaimer in the 58 * documentation and/or other materials provided with the distribution. 59 * 60 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 61 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 62 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 63 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 64 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 65 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 66 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 67 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 68 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 69 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 70 * POSSIBILITY OF SUCH DAMAGE. 71 */ 72 73/*- 74 * Copyright (c) 1990 The Regents of the University of California. 75 * All rights reserved. 76 * 77 * This code is derived from software contributed to Berkeley by 78 * William Jolitz. 79 * 80 * Redistribution and use in source and binary forms, with or without 81 * modification, are permitted provided that the following conditions 82 * are met: 83 * 1. Redistributions of source code must retain the above copyright 84 * notice, this list of conditions and the following disclaimer. 85 * 2. Redistributions in binary form must reproduce the above copyright 86 * notice, this list of conditions and the following disclaimer in the 87 * documentation and/or other materials provided with the distribution. 88 * 3. Neither the name of the University nor the names of its contributors 89 * may be used to endorse or promote products derived from this software 90 * without specific prior written permission. 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 93 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 95 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 96 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 97 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 98 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 100 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 101 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 102 * SUCH DAMAGE. 103 * 104 * @(#)locore.s 7.3 (Berkeley) 5/13/91 105 */ 106 107#include "assym.h" 108#include "lapic.h" 109#include "ksyms.h" 110#include "xen.h" 111#include "hyperv.h" 112 113#include <sys/syscall.h> 114 115#include <machine/param.h> 116#include <machine/codepatch.h> 117#include <machine/psl.h> 118#include <machine/segments.h> 119#include <machine/specialreg.h> 120#include <machine/trap.h> /* T_PROTFLT */ 121#include <machine/frameasm.h> 122 123#if NLAPIC > 0 124#include <machine/i82489reg.h> 125#endif 126 127/* 128 * override user-land alignment before including asm.h 129 */ 130#define ALIGN_DATA .align 8,0xcc 131 132#include <machine/asm.h> 133 134#define SET_CURPROC(proc,cpu) \ 135 movq CPUVAR(SELF),cpu ; \ 136 movq proc,CPUVAR(CURPROC) ; \ 137 movq cpu,P_CPU(proc) 138 139#define GET_CURPCB(reg) movq CPUVAR(CURPCB),reg 140#define SET_CURPCB(reg) movq reg,CPUVAR(CURPCB) 141 142 143/* 144 * Initialization 145 */ 146 .data 147 148#if NLAPIC > 0 149 .align NBPG, 0xcc 150 .globl _C_LABEL(local_apic), _C_LABEL(lapic_id), _C_LABEL(lapic_tpr) 151_C_LABEL(local_apic): 152 .space LAPIC_ID 153_C_LABEL(lapic_id): 154 .long 0x00000000 155 .space LAPIC_TPRI-(LAPIC_ID+4) 156_C_LABEL(lapic_tpr): 157 .space LAPIC_PPRI-LAPIC_TPRI 158_C_LABEL(lapic_ppr): 159 .space LAPIC_ISR-LAPIC_PPRI 160_C_LABEL(lapic_isr): 161 .space NBPG-LAPIC_ISR 162#endif 163 164 .globl _C_LABEL(cpu_id),_C_LABEL(cpu_vendor) 165 .globl _C_LABEL(cpuid_level),_C_LABEL(cpu_feature) 166 .globl _C_LABEL(cpu_ebxfeature) 167 .globl _C_LABEL(cpu_ecxfeature),_C_LABEL(ecpu_ecxfeature) 168 .globl _C_LABEL(cpu_perf_eax) 169 .globl _C_LABEL(cpu_perf_ebx) 170 .globl _C_LABEL(cpu_perf_edx) 171 .globl _C_LABEL(cpu_apmi_edx) 172 .globl _C_LABEL(ssym),_C_LABEL(esym),_C_LABEL(boothowto) 173 .globl _C_LABEL(bootdev) 174 .globl _C_LABEL(bootinfo), _C_LABEL(bootinfo_size), _C_LABEL(atdevbase) 175 .globl _C_LABEL(proc0paddr),_C_LABEL(PTDpaddr) 176 .globl _C_LABEL(biosbasemem) 177 .globl _C_LABEL(bootapiver) 178 .globl _C_LABEL(pg_nx) 179 .globl _C_LABEL(pg_g_kern) 180 .globl _C_LABEL(cpu_meltdown) 181_C_LABEL(cpu_id): .long 0 # saved from `cpuid' instruction 182_C_LABEL(cpu_feature): .long 0 # feature flags from 'cpuid' 183 # instruction 184_C_LABEL(cpu_ebxfeature):.long 0 # ext. ebx feature flags from 'cpuid' 185_C_LABEL(cpu_ecxfeature):.long 0 # ext. ecx feature flags from 'cpuid' 186_C_LABEL(ecpu_ecxfeature):.long 0 # extended ecx feature flags 187_C_LABEL(cpu_perf_eax): .long 0 # arch. perf. mon. flags from 'cpuid' 188_C_LABEL(cpu_perf_ebx): .long 0 # arch. perf. mon. flags from 'cpuid' 189_C_LABEL(cpu_perf_edx): .long 0 # arch. perf. mon. flags from 'cpuid' 190_C_LABEL(cpu_apmi_edx): .long 0 # adv. power mgmt. info. from 'cpuid' 191_C_LABEL(cpuid_level): .long -1 # max. level accepted by 'cpuid' 192 # instruction 193_C_LABEL(cpu_vendor): .space 16 # vendor string returned by `cpuid' 194 # instruction 195_C_LABEL(ssym): .quad 0 # ptr to start of syms 196_C_LABEL(esym): .quad 0 # ptr to end of syms 197_C_LABEL(atdevbase): .quad 0 # location of start of iomem in virtual 198_C_LABEL(bootapiver): .long 0 # /boot API version 199_C_LABEL(bootdev): .long 0 # device we booted from 200_C_LABEL(proc0paddr): .quad 0 201_C_LABEL(PTDpaddr): .quad 0 # paddr of PTD, for libkvm 202#ifndef REALBASEMEM 203_C_LABEL(biosbasemem): .long 0 # base memory reported by BIOS 204#else 205_C_LABEL(biosbasemem): .long REALBASEMEM 206#endif 207#ifndef REALEXTMEM 208_C_LABEL(biosextmem): .long 0 # extended memory reported by BIOS 209#else 210_C_LABEL(biosextmem): .long REALEXTMEM 211#endif 212_C_LABEL(pg_nx): .quad 0 # NX PTE bit (if CPU supports) 213_C_LABEL(pg_g_kern): .quad 0 # 0x100 if global pages should be used 214 # in kernel mappings, 0 otherwise (for 215 # insecure CPUs) 216_C_LABEL(cpu_meltdown): .long 0 # 1 if this CPU has Meltdown 217 218/*****************************************************************************/ 219 220/* 221 * Signal trampoline; copied to a page mapped into userspace. 222 * gdb's backtrace logic matches against the instructions in this. 223 */ 224 .section .rodata 225 .globl _C_LABEL(sigcode) 226_C_LABEL(sigcode): 227 call 1f 228 movq %rsp,%rdi 229 pushq %rdi /* fake return address */ 230 movq $SYS_sigreturn,%rax 231 syscall 232 .globl _C_LABEL(sigcoderet) 233_C_LABEL(sigcoderet): 234 movq $SYS_exit,%rax 235 syscall 236 _ALIGN_TRAPS 2371: JMP_RETPOLINE(rax) 238 .globl _C_LABEL(esigcode) 239_C_LABEL(esigcode): 240 241 .globl _C_LABEL(sigfill) 242_C_LABEL(sigfill): 243 int3 244_C_LABEL(esigfill): 245 .globl _C_LABEL(sigfillsiz) 246_C_LABEL(sigfillsiz): 247 .long _C_LABEL(esigfill) - _C_LABEL(sigfill) 248 249 .text 250/* 251 * void lgdt(struct region_descriptor *rdp); 252 * Change the global descriptor table. 253 */ 254NENTRY(lgdt) 255 RETGUARD_SETUP(lgdt, r11) 256 /* Reload the descriptor table. */ 257 movq %rdi,%rax 258 lgdt (%rax) 259 /* Flush the prefetch q. */ 260 jmp 1f 261 nop 2621: /* Reload "stale" selectors. */ 263 movl $GSEL(GDATA_SEL, SEL_KPL),%eax 264 movl %eax,%ds 265 movl %eax,%es 266 movl %eax,%ss 267 /* Reload code selector by doing intersegment return. */ 268 popq %rax 269 pushq $GSEL(GCODE_SEL, SEL_KPL) 270 pushq %rax 271 RETGUARD_CHECK(lgdt, r11) 272 lretq 273 274ENTRY(setjmp) 275 /* 276 * Only save registers that must be preserved across function 277 * calls according to the ABI (%rbx, %rsp, %rbp, %r12-%r15) 278 * and %rip. 279 */ 280 movq %rdi,%rax 281 movq %rbx,(%rax) 282 movq %rsp,8(%rax) 283 movq %rbp,16(%rax) 284 movq %r12,24(%rax) 285 movq %r13,32(%rax) 286 movq %r14,40(%rax) 287 movq %r15,48(%rax) 288 movq (%rsp),%rdx 289 movq %rdx,56(%rax) 290 xorl %eax,%eax 291 ret 292 293ENTRY(longjmp) 294 movq %rdi,%rax 295 movq (%rax),%rbx 296 movq 8(%rax),%rsp 297 movq 16(%rax),%rbp 298 movq 24(%rax),%r12 299 movq 32(%rax),%r13 300 movq 40(%rax),%r14 301 movq 48(%rax),%r15 302 movq 56(%rax),%rdx 303 movq %rdx,(%rsp) 304 xorl %eax,%eax 305 incl %eax 306 ret 307 308/*****************************************************************************/ 309 310/* 311 * int cpu_switchto(struct proc *old, struct proc *new) 312 * Switch from "old" proc to "new". 313 */ 314ENTRY(cpu_switchto) 315 pushq %rbx 316 pushq %rbp 317 pushq %r12 318 pushq %r13 319 pushq %r14 320 pushq %r15 321 322 movq %rdi, %r13 323 movq %rsi, %r12 324 325 /* Record new proc. */ 326 movb $SONPROC,P_STAT(%r12) # p->p_stat = SONPROC 327 SET_CURPROC(%r12,%rcx) 328 329 movl CPUVAR(CPUID),%r9d 330 331 /* for the FPU/"extended CPU state" handling below */ 332 movq xsave_mask(%rip),%rdx 333 movl %edx,%eax 334 shrq $32,%rdx 335 336 /* If old proc exited, don't bother. */ 337 testq %r13,%r13 338 jz switch_exited 339 340 /* 341 * Save old context. 342 * 343 * Registers: 344 * %rax, %rcx - scratch 345 * %r13 - old proc, then old pcb 346 * %r12 - new proc 347 * %r9d - cpuid 348 */ 349 350 movq P_ADDR(%r13),%r13 351 352 /* clear the old pmap's bit for the cpu */ 353 movq PCB_PMAP(%r13),%rcx 354 lock 355 btrq %r9,PM_CPUS(%rcx) 356 357 /* Save stack pointers. */ 358 movq %rsp,PCB_RSP(%r13) 359 movq %rbp,PCB_RBP(%r13) 360 361 /* 362 * If the old proc ran in userspace then save the 363 * floating-point/"extended state" registers 364 */ 365 testl $CPUF_USERXSTATE,CPUVAR(FLAGS) 366 jz .Lxstate_reset 367 368 movq %r13, %rdi 369#if PCB_SAVEFPU != 0 370 addq $PCB_SAVEFPU,%rdi 371#endif 372 CODEPATCH_START 373 .byte 0x48; fxsave (%rdi) /* really fxsave64 */ 374 CODEPATCH_END(CPTAG_XSAVE) 375 376switch_exited: 377 /* now clear the xstate */ 378 movq proc0paddr(%rip),%rdi 379#if PCB_SAVEFPU != 0 380 addq $PCB_SAVEFPU,%rdi 381#endif 382 CODEPATCH_START 383 .byte 0x48; fxrstor (%rdi) /* really fxrstor64 */ 384 CODEPATCH_END(CPTAG_XRSTOR) 385 andl $~CPUF_USERXSTATE,CPUVAR(FLAGS) 386 387.Lxstate_reset: 388 /* 389 * If the segment registers haven't been reset since the old proc 390 * ran in userspace then reset them now 391 */ 392 testl $CPUF_USERSEGS,CPUVAR(FLAGS) 393 jz restore_saved 394 andl $~CPUF_USERSEGS,CPUVAR(FLAGS) 395 396 /* set %ds, %es, %fs, and %gs to expected value to prevent info leak */ 397 movw $(GSEL(GUDATA_SEL, SEL_UPL)),%ax 398 movw %ax,%ds 399 movw %ax,%es 400 movw %ax,%fs 401 cli /* block interrupts when on user GS.base */ 402 swapgs /* switch from kernel to user GS.base */ 403 movw %ax,%gs /* set %gs to UDATA and GS.base to 0 */ 404 swapgs /* back to kernel GS.base */ 405 406restore_saved: 407 /* 408 * Restore saved context. 409 * 410 * Registers: 411 * %rax, %rcx, %rdx - scratch 412 * %r13 - new pcb 413 * %r12 - new process 414 */ 415 416 /* No interrupts while loading new state. */ 417 cli 418 movq P_ADDR(%r12),%r13 419 420 /* Restore stack pointers. */ 421 movq PCB_RSP(%r13),%rsp 422 movq PCB_RBP(%r13),%rbp 423 424 /* Stack pivot done, setup RETGUARD */ 425 RETGUARD_SETUP_OFF(cpu_switchto, r11, 6*8) 426 427 /* don't switch cr3 to the same thing it already was */ 428 movq %cr3,%rax 429 cmpq PCB_CR3(%r13),%rax 430 movq PCB_CR3(%r13),%rax /* flags from cmpq unchanged */ 431 jz .Lsame_cr3 432 433 movq %rax,%cr3 /* %rax used below too */ 434 435.Lsame_cr3: 436 /* 437 * If we switched from a userland thread with a shallow call stack 438 * (e.g interrupt->ast->mi_ast->prempt->mi_switch->cpu_switchto) 439 * then the RSB may have attacker controlled entries when we switch 440 * to a deeper call stack in the new thread. Refill the RSB with 441 * entries safe to speculate into/through. 442 */ 443 RET_STACK_REFILL_WITH_RCX 444 445 /* Don't bother with the rest if switching to a system process. */ 446 testl $P_SYSTEM,P_FLAG(%r12) 447 jnz switch_restored 448 449 /* record the bits needed for future U-->K transition */ 450 movq PCB_KSTACK(%r13),%rdx 451 subq $FRAMESIZE,%rdx 452 movq %rdx,CPUVAR(KERN_RSP) 453 movq PCB_PMAP(%r13),%rcx 454 455 CODEPATCH_START 456 /* 457 * Meltdown: iff we're doing separate U+K and U-K page tables, 458 * then record them in cpu_info for easy access in syscall and 459 * interrupt trampolines. 460 */ 461 movq PM_PDIRPA_INTEL(%rcx),%rdx 462 orq cr3_reuse_pcid,%rax 463 orq cr3_pcid_proc_intel,%rdx 464 movq %rax,CPUVAR(KERN_CR3) 465 movq %rdx,CPUVAR(USER_CR3) 466 CODEPATCH_END(CPTAG_MELTDOWN_NOP) 467 468 /* set the new pmap's bit for the cpu */ 469 lock 470 btsq %r9,PM_CPUS(%rcx) 471#ifdef DIAGNOSTIC 472 jc _C_LABEL(switch_pmcpu_set) 473#endif 474 475switch_restored: 476 SET_CURPCB(%r13) 477 478 /* Interrupts are okay again. */ 479 sti 480 popq %r15 481 popq %r14 482 popq %r13 483 popq %r12 484 popq %rbp 485 popq %rbx 486 RETGUARD_CHECK(cpu_switchto, r11) 487 ret 488 489ENTRY(cpu_idle_enter) 490 movq _C_LABEL(cpu_idle_enter_fcn),%rax 491 cmpq $0,%rax 492 jne retpoline_rax 493 ret 494 495ENTRY(cpu_idle_leave) 496 movq _C_LABEL(cpu_idle_leave_fcn),%rax 497 cmpq $0,%rax 498 jne retpoline_rax 499 ret 500 501/* placed here for correct static branch prediction in cpu_idle_* */ 502NENTRY(retpoline_rax) 503 JMP_RETPOLINE(rax) 504 505ENTRY(cpu_idle_cycle) 506 movq _C_LABEL(cpu_idle_cycle_fcn),%rax 507 cmpq $0,%rax 508 jne retpoline_rax 509 sti 510 hlt 511 ret 512 513 .globl _C_LABEL(panic) 514 515#ifdef DIAGNOSTIC 516NENTRY(switch_pmcpu_set) 517 leaq switch_active(%rip),%rdi 518 call _C_LABEL(panic) 519 /* NOTREACHED */ 520 521 .section .rodata 522switch_active: 523 .asciz "activate already active pmap" 524 .text 525#endif /* DIAGNOSTIC */ 526/* 527 * savectx(struct pcb *pcb); 528 * Update pcb, saving current processor state. 529 */ 530ENTRY(savectx) 531 RETGUARD_SETUP(savectx, r11) 532 /* Save stack pointers. */ 533 movq %rsp,PCB_RSP(%rdi) 534 movq %rbp,PCB_RBP(%rdi) 535 RETGUARD_CHECK(savectx, r11) 536 ret 537 538IDTVEC(syscall32) 539 sysret /* go away please */ 540 541/* 542 * syscall insn entry. 543 * Enter here with interrupts blocked; %rcx contains the caller's 544 * %rip and the original rflags has been copied to %r11. %cs and 545 * %ss have been updated to the kernel segments, but %rsp is still 546 * the user-space value. 547 * First order of business is to swap to the kernel GS.base so that 548 * we can access our struct cpu_info. After possibly mucking with 549 * pagetables, we switch to our kernel stack. Once that's in place 550 * we can unblock interrupts and save the rest of the syscall frame. 551 */ 552KUTEXT_PAGE_START 553 .align NBPG, 0xcc 554XUsyscall_meltdown: 555 /* 556 * This is the real Xsyscall_meltdown page, which is mapped into 557 * the U-K page tables at the same location as Xsyscall_meltdown 558 * below. For this, the Meltdown case, we use the scratch space 559 * in cpu_info so we can switch to the kernel page tables 560 * (thank you, Intel), at which point we'll continue at the 561 * "movq CPUVAR(KERN_RSP),%rax" after Xsyscall below. 562 * In case the CPU speculates past the mov to cr3, we put a 563 * retpoline-style pause-jmp-to-pause loop. 564 */ 565 swapgs 566 movq %rax,CPUVAR(SCRATCH) 567 movq CPUVAR(KERN_CR3),%rax 568 movq %rax,%cr3 5690: pause 570 lfence 571 jmp 0b 572KUTEXT_PAGE_END 573 574KTEXT_PAGE_START 575 .align NBPG, 0xcc 576IDTVEC_NOALIGN(syscall_meltdown) 577 /* pad to match real Xsyscall_meltdown positioning above */ 578 movq CPUVAR(KERN_CR3),%rax 579 movq %rax,%cr3 580IDTVEC_NOALIGN(syscall) 581 swapgs 582 movq %rax,CPUVAR(SCRATCH) 583 movq CPUVAR(KERN_RSP),%rax 584 xchgq %rax,%rsp 585 movq %rcx,TF_RCX(%rsp) 586 movq %rcx,TF_RIP(%rsp) 587 RET_STACK_REFILL_WITH_RCX 588 sti 589 590 /* 591 * XXX don't need this whole frame, split of the 592 * syscall frame and trapframe is needed. 593 * First, leave some room for the trapno, error, 594 * ss:rsp, etc, so that all GP registers can be 595 * saved. Then, fill in the rest. 596 */ 597 movq $(GSEL(GUDATA_SEL, SEL_UPL)),TF_SS(%rsp) 598 movq %rax,TF_RSP(%rsp) 599 movq CPUVAR(SCRATCH),%rax 600 INTR_SAVE_MOST_GPRS_NO_ADJ 601 movq %r11, TF_RFLAGS(%rsp) /* old rflags from syscall insn */ 602 movq $(GSEL(GUCODE_SEL, SEL_UPL)), TF_CS(%rsp) 603 movq %rax,TF_ERR(%rsp) /* stash syscall # for SPL check */ 604 INTR_CLEAR_GPRS 605 606 movq CPUVAR(CURPROC),%r14 607 movq %rsp,P_MD_REGS(%r14) # save pointer to frame 608 andl $~MDP_IRET,P_MD_FLAGS(%r14) 609 movq %rsp,%rdi 610 call _C_LABEL(syscall) 611 612.Lsyscall_check_asts: 613 /* Check for ASTs on exit to user mode. */ 614 cli 615 CHECK_ASTPENDING(%r11) 616 je 2f 617 CLEAR_ASTPENDING(%r11) 618 sti 619 movq %rsp,%rdi 620 call _C_LABEL(ast) 621 jmp .Lsyscall_check_asts 622 6232: 624#ifdef DIAGNOSTIC 625 cmpl $IPL_NONE,CPUVAR(ILEVEL) 626 jne .Lsyscall_spl_not_lowered 627#endif /* DIAGNOSTIC */ 628 629 /* Could registers have been changed that require an iretq? */ 630 testl $MDP_IRET, P_MD_FLAGS(%r14) 631 jne intr_user_exit_post_ast 632 633 /* Restore FPU/"extended CPU state" if it's not already in the CPU */ 634 testl $CPUF_USERXSTATE,CPUVAR(FLAGS) 635 jz .Lsyscall_restore_xstate 636 637 /* Restore FS.base if it's not already in the CPU */ 638 testl $CPUF_USERSEGS,CPUVAR(FLAGS) 639 jz .Lsyscall_restore_fsbase 640 641.Lsyscall_restore_registers: 642 RET_STACK_REFILL_WITH_RCX 643 644 movq TF_RDI(%rsp),%rdi 645 movq TF_RSI(%rsp),%rsi 646 movq TF_R8(%rsp),%r8 647 movq TF_R9(%rsp),%r9 648 movq TF_R10(%rsp),%r10 649 movq TF_R12(%rsp),%r12 650 movq TF_R13(%rsp),%r13 651 movq TF_R14(%rsp),%r14 652 movq TF_R15(%rsp),%r15 653 movq TF_RBP(%rsp),%rbp 654 movq TF_RBX(%rsp),%rbx 655 656 /* 657 * We need to finish reading from the trapframe, then switch 658 * to the user page tables, swapgs, and return. We need 659 * to get the final value for the register that was used 660 * for the mov to %cr3 from somewhere accessible on the 661 * user page tables, so save it in CPUVAR(SCRATCH) across 662 * the switch. 663 */ 664 movq TF_RDX(%rsp),%rdx 665 movq TF_RAX(%rsp),%rax 666 movq TF_RIP(%rsp),%rcx 667 movq TF_RFLAGS(%rsp),%r11 668 movq TF_RSP(%rsp),%rsp 669 CODEPATCH_START 670 movq %rax,CPUVAR(SCRATCH) 671 movq CPUVAR(USER_CR3),%rax 672 PCID_SET_REUSE_NOP 673 movq %rax,%cr3 674Xsyscall_trampback: 6750: pause 676 lfence 677 jmp 0b 678 CODEPATCH_END(CPTAG_MELTDOWN_NOP) 679 swapgs 680 sysretq 681KTEXT_PAGE_END 682 683KUTEXT_PAGE_START 684 .space (Xsyscall_trampback - Xsyscall_meltdown) - \ 685 (. - XUsyscall_meltdown), 0xcc 686 movq %rax,%cr3 687 movq CPUVAR(SCRATCH),%rax 688 swapgs 689 sysretq 690KUTEXT_PAGE_END 691 692 .text 693 _ALIGN_TRAPS 694 /* in this case, need FS.base but not xstate, rarely happens */ 695.Lsyscall_restore_fsbase: /* CPU doesn't have curproc's FS.base */ 696 orl $CPUF_USERSEGS,CPUVAR(FLAGS) 697 movq CPUVAR(CURPCB),%rdi 698 jmp .Lsyscall_restore_fsbase_real 699 700 _ALIGN_TRAPS 701.Lsyscall_restore_xstate: /* CPU doesn't have curproc's xstate */ 702 orl $(CPUF_USERXSTATE|CPUF_USERSEGS),CPUVAR(FLAGS) 703 movq CPUVAR(CURPCB),%rdi 704 movq xsave_mask(%rip),%rdx 705 movl %edx,%eax 706 shrq $32,%rdx 707#if PCB_SAVEFPU != 0 708 addq $PCB_SAVEFPU,%rdi 709#endif 710 /* untouched state so can't fault */ 711 CODEPATCH_START 712 .byte 0x48; fxrstor (%rdi) /* really fxrstor64 */ 713 CODEPATCH_END(CPTAG_XRSTOR) 714#if PCB_SAVEFPU != 0 715 subq $PCB_SAVEFPU,%rdi 716#endif 717.Lsyscall_restore_fsbase_real: 718 movq PCB_FSBASE(%rdi),%rdx 719 movl %edx,%eax 720 shrq $32,%rdx 721 movl $MSR_FSBASE,%ecx 722 wrmsr 723 jmp .Lsyscall_restore_registers 724 725#ifdef DIAGNOSTIC 726.Lsyscall_spl_not_lowered: 727 leaq spl_lowered(%rip), %rdi 728 movl TF_ERR(%rsp),%esi /* syscall # stashed above */ 729 movl TF_RDI(%rsp),%edx 730 movl %ebx,%ecx 731 movl CPUVAR(ILEVEL),%r8d 732 xorq %rax,%rax 733 call _C_LABEL(printf) 734#ifdef DDB 735 int $3 736#endif /* DDB */ 737 movl $IPL_NONE,CPUVAR(ILEVEL) 738 jmp .Lsyscall_check_asts 739 740 .section .rodata 741spl_lowered: 742 .asciz "WARNING: SPL NOT LOWERED ON SYSCALL %d %d EXIT %x %x\n" 743 .text 744#endif 745 746NENTRY(proc_trampoline) 747#ifdef MULTIPROCESSOR 748 call _C_LABEL(proc_trampoline_mp) 749#endif 750 movl $IPL_NONE,CPUVAR(ILEVEL) 751 movq %r13,%rdi 752 movq %r12,%rax 753 call retpoline_rax 754 movq CPUVAR(CURPROC),%r14 755 jmp .Lsyscall_check_asts 756 757 758/* 759 * Returning to userspace via iretq. We do things in this order: 760 * - check for ASTs 761 * - restore FPU/"extended CPU state" if it's not already in the CPU 762 * - DIAGNOSTIC: no more C calls after this, so check the SPL 763 * - restore FS.base if it's not already in the CPU 764 * - restore most registers 765 * - update the iret frame from the trapframe 766 * - finish reading from the trapframe 767 * - switch to the trampoline stack \ 768 * - jump to the .kutext segment |-- Meltdown workaround 769 * - switch to the user page tables / 770 * - swapgs 771 * - iretq 772 */ 773KTEXT_PAGE_START 774 _ALIGN_TRAPS 775GENTRY(intr_user_exit) 776#ifdef DIAGNOSTIC 777 pushfq 778 popq %rdx 779 testq $PSL_I,%rdx 780 jnz .Lintr_user_exit_not_blocked 781#endif /* DIAGNOSTIC */ 782 783 /* Check for ASTs */ 784 CHECK_ASTPENDING(%r11) 785 je intr_user_exit_post_ast 786 CLEAR_ASTPENDING(%r11) 787 sti 788 movq %rsp,%rdi 789 call _C_LABEL(ast) 790 cli 791 jmp intr_user_exit 792 793intr_user_exit_post_ast: 794 /* Restore FPU/"extended CPU state" if it's not already in the CPU */ 795 testl $CPUF_USERXSTATE,CPUVAR(FLAGS) 796 jz .Lintr_restore_xstate 797 798#ifdef DIAGNOSTIC 799 /* no more C calls after this, so check the SPL */ 800 cmpl $0,CPUVAR(ILEVEL) 801 jne .Luser_spl_not_lowered 802#endif /* DIAGNOSTIC */ 803 804 /* Restore FS.base if it's not already in the CPU */ 805 testl $CPUF_USERSEGS,CPUVAR(FLAGS) 806 jz .Lintr_restore_fsbase 807 808.Lintr_restore_registers: 809 RET_STACK_REFILL_WITH_RCX 810 811 movq TF_RDI(%rsp),%rdi 812 movq TF_RSI(%rsp),%rsi 813 movq TF_R8(%rsp),%r8 814 movq TF_R9(%rsp),%r9 815 movq TF_R10(%rsp),%r10 816 movq TF_R12(%rsp),%r12 817 movq TF_R13(%rsp),%r13 818 movq TF_R14(%rsp),%r14 819 movq TF_R15(%rsp),%r15 820 movq TF_RBP(%rsp),%rbp 821 movq TF_RBX(%rsp),%rbx 822 823 /* 824 * To get the final value for the register that was used 825 * for the mov to %cr3, we need access to somewhere accessible 826 * on the user page tables, so we save it in CPUVAR(SCRATCH) 827 * across the switch. 828 */ 829 /* update iret frame */ 830 movq CPUVAR(INTR_RSP),%rdx 831 movq $(GSEL(GUCODE_SEL,SEL_UPL)),IRETQ_CS(%rdx) 832 movq TF_RIP(%rsp),%rax 833 movq %rax,IRETQ_RIP(%rdx) 834 movq TF_RFLAGS(%rsp),%rax 835 movq %rax,IRETQ_RFLAGS(%rdx) 836 movq TF_RSP(%rsp),%rax 837 movq %rax,IRETQ_RSP(%rdx) 838 movq $(GSEL(GUDATA_SEL,SEL_UPL)),IRETQ_SS(%rdx) 839 /* finish with the trap frame */ 840 movq TF_RAX(%rsp),%rax 841 movq TF_RCX(%rsp),%rcx 842 movq TF_R11(%rsp),%r11 843 /* switch to the trampoline stack */ 844 xchgq %rdx,%rsp 845 movq TF_RDX(%rdx),%rdx 846 CODEPATCH_START 847 movq %rax,CPUVAR(SCRATCH) 848 movq CPUVAR(USER_CR3),%rax 849 PCID_SET_REUSE_NOP 850 movq %rax,%cr3 851Xiretq_trampback: 852KTEXT_PAGE_END 853/* the movq %cr3 switches to this "KUTEXT" page */ 854KUTEXT_PAGE_START 855 .space (Xiretq_trampback - Xsyscall_meltdown) - \ 856 (. - XUsyscall_meltdown), 0xcc 857 movq CPUVAR(SCRATCH),%rax 858.Liretq_swapgs: 859 swapgs 860doreti_iret_meltdown: 861 iretq 862KUTEXT_PAGE_END 863/* 864 * Back to the "KTEXT" page to fill in the speculation trap and the 865 * swapgs+iretq used for non-Meltdown kernels. This switching back 866 * and forth between segments is so that we can do the .space 867 * calculation below to guarantee the iretq's above and below line 868 * up, so the 'doreti_iret' label lines up with the iretq whether 869 * the CPU is affected by Meltdown or not. 870 */ 871KTEXT_PAGE_START 8720: pause 873 lfence 874 jmp 0b 875 .space (.Liretq_swapgs - XUsyscall_meltdown) - \ 876 (. - Xsyscall_meltdown), 0xcc 877 CODEPATCH_END(CPTAG_MELTDOWN_NOP) 878 swapgs 879 880 .globl _C_LABEL(doreti_iret) 881_C_LABEL(doreti_iret): 882 iretq 883KTEXT_PAGE_END 884 885 .text 886 _ALIGN_TRAPS 887.Lintr_restore_xstate: /* CPU doesn't have curproc's xstate */ 888 orl $CPUF_USERXSTATE,CPUVAR(FLAGS) 889 movq CPUVAR(CURPCB),%rdi 890#if PCB_SAVEFPU != 0 891 addq $PCB_SAVEFPU,%rdi 892#endif 893 movq xsave_mask(%rip),%rsi 894 call xrstor_user 895 testl %eax,%eax 896 jnz .Lintr_xrstor_faulted 897.Lintr_restore_fsbase: /* CPU doesn't have curproc's FS.base */ 898 orl $CPUF_USERSEGS,CPUVAR(FLAGS) 899 movq CPUVAR(CURPCB),%rdx 900 movq PCB_FSBASE(%rdx),%rdx 901 movl %edx,%eax 902 shrq $32,%rdx 903 movl $MSR_FSBASE,%ecx 904 wrmsr 905 jmp .Lintr_restore_registers 906 907.Lintr_xrstor_faulted: 908 /* 909 * xrstor faulted; we need to reset the FPU state and call trap() 910 * to post a signal, which requires interrupts be enabled. 911 */ 912 sti 913 movq proc0paddr(%rip),%rdi 914#if PCB_SAVEFPU != 0 915 addq $PCB_SAVEFPU,%rdi 916#endif 917 CODEPATCH_START 918 .byte 0x48; fxrstor (%rdi) /* really fxrstor64 */ 919 CODEPATCH_END(CPTAG_XRSTOR) 920 movq $T_PROTFLT,TF_TRAPNO(%rsp) 921 jmp recall_trap 922 923#ifdef DIAGNOSTIC 924.Lintr_user_exit_not_blocked: 925 movl warn_once(%rip),%edi 926 testl %edi,%edi 927 jnz 1f 928 incl %edi 929 movl %edi,warn_once(%rip) 930 leaq .Lnot_blocked(%rip),%rdi 931 call _C_LABEL(printf) 932#ifdef DDB 933 int $3 934#endif /* DDB */ 9351: cli 936 jmp intr_user_exit 937 938.Luser_spl_not_lowered: 939 sti 940 leaq intr_spl_lowered(%rip),%rdi 941 movl CPUVAR(ILEVEL),%esi 942 xorl %edx,%edx /* always SPL zero for userspace */ 943 xorl %eax,%eax 944 call _C_LABEL(printf) 945#ifdef DDB 946 int $3 947#endif /* DDB */ 948 movl $0,CPUVAR(ILEVEL) 949 cli 950 jmp intr_user_exit 951 952 .section .rodata 953intr_spl_lowered: 954 .asciz "WARNING: SPL NOT LOWERED ON TRAP EXIT %x %x\n" 955 .text 956#endif /* DIAGNOSTIC */ 957 958 959/* 960 * Return to supervisor mode from trap or interrupt 961 */ 962NENTRY(intr_fast_exit) 963#ifdef DIAGNOSTIC 964 pushfq 965 popq %rdx 966 testq $PSL_I,%rdx 967 jnz .Lintr_exit_not_blocked 968#endif /* DIAGNOSTIC */ 969 movq TF_RDI(%rsp),%rdi 970 movq TF_RSI(%rsp),%rsi 971 movq TF_R8(%rsp),%r8 972 movq TF_R9(%rsp),%r9 973 movq TF_R10(%rsp),%r10 974 movq TF_R12(%rsp),%r12 975 movq TF_R13(%rsp),%r13 976 movq TF_R14(%rsp),%r14 977 movq TF_R15(%rsp),%r15 978 movq TF_RBP(%rsp),%rbp 979 movq TF_RBX(%rsp),%rbx 980 movq TF_RDX(%rsp),%rdx 981 movq TF_RCX(%rsp),%rcx 982 movq TF_R11(%rsp),%r11 983 movq TF_RAX(%rsp),%rax 984 addq $TF_RIP,%rsp 985 iretq 986 987#ifdef DIAGNOSTIC 988.Lintr_exit_not_blocked: 989 movl warn_once(%rip),%edi 990 testl %edi,%edi 991 jnz 1f 992 incl %edi 993 movl %edi,warn_once(%rip) 994 leaq .Lnot_blocked(%rip),%rdi 995 call _C_LABEL(printf) 996#ifdef DDB 997 int $3 998#endif /* DDB */ 9991: cli 1000 jmp intr_fast_exit 1001 1002 .data 1003.global warn_once 1004warn_once: 1005 .long 0 1006 .section .rodata 1007.Lnot_blocked: 1008 .asciz "WARNING: INTERRUPTS NOT BLOCKED ON INTERRUPT RETURN: 0x%x 0x%x\n" 1009 .text 1010#endif 1011 1012/* 1013 * FPU/"extended CPU state" handling 1014 * int xrstor_user(sfp, mask) 1015 * load given state, returns 0/1 if okay/it trapped 1016 * void fpusave(sfp) 1017 * save current state, but retain it in the FPU 1018 * void fpusavereset(sfp) 1019 * save current state and reset FPU to initial/kernel state 1020 * int xsetbv_user(reg, mask) 1021 * load specifed %xcr# register, returns 0/1 if okay/it trapped 1022 */ 1023 1024ENTRY(xrstor_user) 1025 RETGUARD_SETUP(xrstor_user, r11) 1026 movq %rsi, %rdx 1027 movl %esi, %eax 1028 shrq $32, %rdx 1029 .globl xrstor_fault 1030xrstor_fault: 1031 CODEPATCH_START 1032 .byte 0x48; fxrstor (%rdi) /* really fxrstor64 */ 1033 CODEPATCH_END(CPTAG_XRSTOR) 1034 xorl %eax, %eax 1035 RETGUARD_CHECK(xrstor_user, r11) 1036 ret 1037NENTRY(xrstor_resume) 1038 movl $1, %eax 1039 RETGUARD_CHECK(xrstor_user, r11) 1040 ret 1041END(xrstor_user) 1042 1043ENTRY(fpusave) 1044 RETGUARD_SETUP(fpusave, r11) 1045 movq xsave_mask(%rip),%rdx 1046 movl %edx,%eax 1047 shrq $32,%rdx 1048 CODEPATCH_START 1049 .byte 0x48; fxsave (%rdi) /* really fxsave64 */ 1050 CODEPATCH_END(CPTAG_XSAVE) 1051 RETGUARD_CHECK(fpusave, r11) 1052 ret 1053END(fpusave) 1054 1055ENTRY(fpusavereset) 1056 RETGUARD_SETUP(fpusavereset, r11) 1057 movq xsave_mask(%rip),%rdx 1058 movl %edx,%eax 1059 shrq $32,%rdx 1060 CODEPATCH_START 1061 .byte 0x48; fxsave (%rdi) /* really fxsave64 */ 1062 CODEPATCH_END(CPTAG_XSAVE) 1063 movq proc0paddr(%rip),%rdi 1064#if PCB_SAVEFPU != 0 1065 addq $PCB_SAVEFPU,%rdi 1066#endif 1067 CODEPATCH_START 1068 .byte 0x48; fxrstor (%rdi) /* really fxrstor64 */ 1069 CODEPATCH_END(CPTAG_XRSTOR) 1070 RETGUARD_CHECK(fpusavereset, r11) 1071 ret 1072END(fpusavereset) 1073 1074ENTRY(xsetbv_user) 1075 RETGUARD_SETUP(xsetbv_user, r11) 1076 movl %edi, %ecx 1077 movq %rsi, %rdx 1078 movl %esi, %eax 1079 shrq $32, %rdx 1080 .globl xsetbv_fault 1081xsetbv_fault: 1082 xsetbv 1083 xorl %eax, %eax 1084 RETGUARD_CHECK(xsetbv_user, r11) 1085 ret 1086NENTRY(xsetbv_resume) 1087 movl $1, %eax 1088 RETGUARD_CHECK(xsetbv_user, r11) 1089 ret 1090END(xsetbv_user) 1091 1092 .section .rodata 1093 .globl _C_LABEL(_xrstor) 1094_C_LABEL(_xrstor): 1095 .byte 0x48; xrstor (%rdi) /* really xrstor64 */ 1096 1097 .globl _C_LABEL(_xsave) 1098_C_LABEL(_xsave): 1099 .byte 0x48; xsave (%rdi) /* really xsave64 */ 1100 1101 .globl _C_LABEL(_xsaveopt) 1102_C_LABEL(_xsaveopt): 1103 .byte 0x48; xsaveopt (%rdi) /* really xsaveopt64 */ 1104 1105 .globl _C_LABEL(_pcid_set_reuse) 1106_C_LABEL(_pcid_set_reuse): 1107 orl $(CR3_REUSE_PCID >> 32),CPUVAR(USER_CR3 + 4) 1108 1109ENTRY(pagezero) 1110 RETGUARD_SETUP(pagezero, r11) 1111 movq $-PAGE_SIZE,%rdx 1112 subq %rdx,%rdi 1113 xorq %rax,%rax 11141: 1115 movnti %rax,(%rdi,%rdx) 1116 movnti %rax,8(%rdi,%rdx) 1117 movnti %rax,16(%rdi,%rdx) 1118 movnti %rax,24(%rdi,%rdx) 1119 addq $32,%rdx 1120 jne 1b 1121 sfence 1122 RETGUARD_CHECK(pagezero, r11) 1123 ret 1124 1125/* int rdmsr_safe(u_int msr, uint64_t *data) */ 1126ENTRY(rdmsr_safe) 1127 RETGUARD_SETUP(rdmsr_safe_return, r10) 1128 1129 movl %edi, %ecx /* u_int msr */ 1130 .globl rdmsr_safe_fault 1131rdmsr_safe_fault: 1132 rdmsr 1133 salq $32, %rdx 1134 movl %eax, %eax 1135 orq %rdx, %rax 1136 movq %rax, (%rsi) /* *data */ 1137 xorq %rax, %rax 1138 1139 RETGUARD_CHECK(rdmsr_safe_return, r10) 1140 ret 1141 1142NENTRY(rdmsr_resume) 1143 movl $0x1, %eax 1144 RETGUARD_CHECK(rdmsr_safe_return, r10) 1145 ret 1146 1147#if NXEN > 0 1148 /* Hypercall page needs to be page aligned */ 1149 .text 1150 .align NBPG, 0xcc 1151 .globl _C_LABEL(xen_hypercall_page) 1152_C_LABEL(xen_hypercall_page): 1153 .skip 0x1000, 0xcc 1154#endif /* NXEN > 0 */ 1155 1156#if NHYPERV > 0 1157 /* Hypercall page needs to be page aligned */ 1158 .text 1159 .align NBPG, 0xcc 1160 .globl _C_LABEL(hv_hypercall_page) 1161_C_LABEL(hv_hypercall_page): 1162 .skip 0x1000, 0xcc 1163#endif /* NXEN > 0 */ 1164