1// Copyright 2016 The Fuchsia Authors 2// 3// Use of this source code is governed by a MIT-style 4// license that can be found in the LICENSE file or at 5// https://opensource.org/licenses/MIT 6 7// This file provides real-mode entry points for 8// 1) secondary CPU initialization 9// 2) suspend-to-RAM wakeup 10 11#include <asm.h> 12#include <arch/x86/bootstrap16.h> 13#include <arch/x86/descriptor.h> 14#include <arch/x86/registers.h> 15#include <arch/defines.h> 16 17// This code's only non-PIC instructions are movabs, which can be fixed up 18// safely (see gen-kaslr-fixups.sh). This section name is specially known 19// by kernel.ld and gen-kaslr-fixups.sh. 20.section .text.bootstrap16,"ax",%progbits 21.balign PAGE_SIZE 22 23DATA(x86_bootstrap16_start) 24 25.code16 26FUNCTION_LABEL(x86_bootstrap16_entry) 27 // Enter no-fill cache mode (allegedly this is the initial state 28 // according to Intel 3A, but on at least one Broadwell the APs can 29 // come up with caching enabled) 30 mov %cr0, %ebx 31 or $X86_CR0_CD, %ebx 32 and $~X86_CR0_NW, %ebx 33 mov %ebx, %cr0 340: 35 36 // We cheat a little and don't switch off of our real mode segments in 37 // protected mode. In real mode and protected mode, all of our code 38 // and data accesess are relative to %cs and %ss, using the real mode 39 // segment calculations. 40 41 // setup %ds/%ss to refer to the data region 42 mov %cs, %si 43 add $0x100, %si 44 mov %si, %ds 45 mov %si, %ss 46 47 lgdtl BCD_PHYS_GDTR_OFFSET 48 49 // enter protected mode (but without paging) 50 mov %cr0, %ebx 51 or $X86_CR0_PE, %ebx 52 mov %ebx, %cr0 53 54 // clear instruction prefetch queue 55 jmp 0f 560: 57 // enable PAE / PGE 58 mov %cr4, %ecx 59 or $(X86_CR4_PAE|X86_CR4_PGE), %ecx 60 mov %ecx, %cr4 61 62 // load CR3 with the bootstrap PML4 63 mov BCD_PHYS_BOOTSTRAP_PML4_OFFSET, %ecx 64 mov %ecx, %cr3 65 66 // enable IA-32e mode and indicate support for NX pages. 67 // need the latter for once we switch to the real kernel 68 // address space. 69 mov $X86_MSR_IA32_EFER, %ecx 70 rdmsr 71 or $X86_EFER_LME, %eax 72 or $X86_EFER_NXE, %eax 73 wrmsr 74 75 // enable paging 76 mov %cr0, %ebx 77 or $X86_CR0_PG, %ebx 78 mov %ebx, %cr0 79 80 // Translate data page segment into full address 81 mov %ds, %esi 82 shl $4, %esi 83 84 // Jump to 64-bit CS 85 mov $BCD_PHYS_LM_ENTRY_OFFSET, %esp 86 lretl 87 88// Get the secondary cpu into 64-bit mode with interrupts disabled and no TSS 89.code64 90FUNCTION_LABEL(_x86_secondary_cpu_long_mode_entry) 91 // When we get here, %rsi should contain the absolute address of our data 92 // page. 93 mov $1, %rdi 94 LOCK xadd %edi, BCD_CPU_COUNTER_OFFSET(%esi) 95 // %rdi is now the index this CPU should use to grab resources 96 97 // Shift index by 2, since the per_cpu member contains two 64-bit values which 98 // will be at offsets 8*(2n) and 8*(2n+1) relative to PER_CPU_BASE_OFFSET 99 shl $1, %rdi 100 // Retrieve this CPUs initial kernel stack 101 // Note: the stack is unusable until we switch cr3 below 102 mov BCD_PER_CPU_BASE_OFFSET(%rsi, %rdi, 8), %rsp 103 add $PAGE_SIZE, %rsp 104 105 // Retrieve this CPUs initial thread 106 // Note: the stack is unusable until we switch cr3 below 107 add $1, %rdi 108 mov BCD_PER_CPU_BASE_OFFSET(%rsi, %rdi, 8), %rdx 109 110 // Retrieve the new PML4 address before our data page becomes unreachable 111 mov BCD_PHYS_KERNEL_PML4_OFFSET(%esi), %ecx 112 // Similarly for the CPU waiting mask 113 mov BCD_CPU_WAITING_OFFSET(%esi), %rdi 114 115 // Switch out of the copied code page and into the kernel's 116 // version of it 117 movabs $.Lhighaddr, %rbx 118 jmp *%rbx 119.Lhighaddr: 120 // Switch to the kernel's PML4 121 mov %rcx, %cr3 122 // As of this point, %esi is invalid 123 124 // Reload the GDT with one based off of non-identity mapping 125 lgdt _temp_gdtr(%rip) 126 127 // Zero our data segments 128 xor %eax, %eax 129 mov %eax, %ds 130 mov %eax, %es 131 mov %eax, %fs 132 mov %eax, %gs 133 mov %eax, %ss 134 135 // Load the IDT 136 call load_startup_idt 137 138 mov %rdx, %rsi 139 // Do an indirect call to keep this position independent 140 // x86_secondary_entry(CPU ready counter, thread) 141 movabs $x86_secondary_entry, %rbx 142 call *%rbx 143 144// If x86_secondary_entry returns, hang. 1450: 146 hlt 147 jmp 0b 148 149// Get the cpu into 64-bit mode with interrupts disabled and no TSS. This must 150// only be called on the bootstrap processor. 151FUNCTION_LABEL(_x86_suspend_wakeup) 152 // Retrieve the new PML4 address before our data page becomes unreachable 153 mov BCD_PHYS_KERNEL_PML4_OFFSET(%esi), %ecx 154 155 // Stash register pointer so that we can read it after we change 156 // address spaces 157 mov RED_REGISTERS_OFFSET(%esi), %rdi 158 159 // Switch out of the copied code page and into the kernel's 160 // version of it 161 movabs $.Lwakeup_highaddr, %rbx 162 jmp *%rbx 163.Lwakeup_highaddr: 164 // Switch to the kernel's PML4 165 mov %rcx, %cr3 166 // As of this point, %esi is invalid 167 168 // Reload the GDT with one based off of non-identity mapping 169 lgdt _temp_gdtr(%rip) 170 171 // Zero our data segments 172 xor %eax, %eax 173 mov %eax, %ds 174 mov %eax, %es 175 mov %eax, %fs 176 mov %eax, %gs 177 mov %eax, %ss 178 179 // Restore %gs.base to &bp_percpu. We need to do this before 180 // returning to C code, since the C code might use safe-stack 181 // and/or stack-protector. 182 // TODO(teisenbe): There is a small peformance gain that could be made here 183 // by switching from wrmsr to wrgsbase, if wrgsbase is supported. Currently 184 // this is omitted for simplicity. 185 lea bp_percpu(%rip), %rax 186 mov %rax, %rdx 187 shr $32, %rdx 188 mov $X86_MSR_IA32_GS_BASE, %ecx 189 wrmsr 190 191 // Restore the stack pointer first, so we can use the stack right now. 192 mov 120(%rdi), %rsp 193 194 // Load the IDT. Note this uses the stack and clobbers %rax, but not %rdi. 195 call load_startup_idt 196 197 mov 8(%rdi), %rsi 198 mov 16(%rdi), %rbp 199 mov 24(%rdi), %rbx 200 mov 32(%rdi), %rdx 201 mov 40(%rdi), %rcx 202 mov 48(%rdi), %rax 203 mov 56(%rdi), %r8 204 mov 64(%rdi), %r9 205 mov 72(%rdi), %r10 206 mov 80(%rdi), %r11 207 mov 88(%rdi), %r12 208 mov 96(%rdi), %r13 209 mov 104(%rdi), %r14 210 mov 112(%rdi), %r15 211 212 // Note: %rdi is not restored, but it is a caller-save register anyway. 213 // If we want to restore %rdi, we could potentially use the stack here 214 // to do something like "push 128(%rdi); mov (%rdi), %rdi; ret". 215 216 // Restore RIP 217 jmp *128(%rdi) 218 219DATA(x86_bootstrap16_end) 220 nop 221