1// Copyright 2016 The Fuchsia Authors
2//
3// Use of this source code is governed by a MIT-style
4// license that can be found in the LICENSE file or at
5// https://opensource.org/licenses/MIT
6
7// This file provides real-mode entry points for
8// 1) secondary CPU initialization
9// 2) suspend-to-RAM wakeup
10
11#include <asm.h>
12#include <arch/x86/bootstrap16.h>
13#include <arch/x86/descriptor.h>
14#include <arch/x86/registers.h>
15#include <arch/defines.h>
16
17// This code's only non-PIC instructions are movabs, which can be fixed up
18// safely (see gen-kaslr-fixups.sh).  This section name is specially known
19// by kernel.ld and gen-kaslr-fixups.sh.
20.section .text.bootstrap16,"ax",%progbits
21.balign PAGE_SIZE
22
23DATA(x86_bootstrap16_start)
24
25.code16
26FUNCTION_LABEL(x86_bootstrap16_entry)
27    // Enter no-fill cache mode (allegedly this is the initial state
28    // according to Intel 3A, but on at least one Broadwell the APs can
29    // come up with caching enabled)
30    mov %cr0, %ebx
31    or $X86_CR0_CD, %ebx
32    and $~X86_CR0_NW, %ebx
33    mov %ebx, %cr0
340:
35
36    // We cheat a little and don't switch off of our real mode segments in
37    // protected mode.  In real mode and protected mode, all of our code
38    // and data accesess are relative to %cs and %ss, using the real mode
39    // segment calculations.
40
41    // setup %ds/%ss to refer to the data region
42    mov %cs, %si
43    add $0x100, %si
44    mov %si, %ds
45    mov %si, %ss
46
47    lgdtl BCD_PHYS_GDTR_OFFSET
48
49    // enter protected mode (but without paging)
50    mov %cr0, %ebx
51    or $X86_CR0_PE, %ebx
52    mov %ebx, %cr0
53
54    // clear instruction prefetch queue
55    jmp 0f
560:
57    // enable PAE / PGE
58    mov %cr4, %ecx
59    or $(X86_CR4_PAE|X86_CR4_PGE), %ecx
60    mov %ecx, %cr4
61
62    // load CR3 with the bootstrap PML4
63    mov BCD_PHYS_BOOTSTRAP_PML4_OFFSET, %ecx
64    mov %ecx, %cr3
65
66    // enable IA-32e mode and indicate support for NX pages.
67    // need the latter for once we switch to the real kernel
68    // address space.
69    mov $X86_MSR_IA32_EFER, %ecx
70    rdmsr
71    or $X86_EFER_LME, %eax
72    or $X86_EFER_NXE, %eax
73    wrmsr
74
75    // enable paging
76    mov %cr0, %ebx
77    or $X86_CR0_PG, %ebx
78    mov %ebx, %cr0
79
80    // Translate data page segment into full address
81    mov %ds, %esi
82    shl $4, %esi
83
84    // Jump to 64-bit CS
85    mov $BCD_PHYS_LM_ENTRY_OFFSET, %esp
86    lretl
87
88// Get the secondary cpu into 64-bit mode with interrupts disabled and no TSS
89.code64
90FUNCTION_LABEL(_x86_secondary_cpu_long_mode_entry)
91    // When we get here, %rsi should contain the absolute address of our data
92    // page.
93    mov $1, %rdi
94    LOCK xadd %edi, BCD_CPU_COUNTER_OFFSET(%esi)
95    // %rdi is now the index this CPU should use to grab resources
96
97    // Shift index by 2, since the per_cpu member contains two 64-bit values which
98    // will be at offsets 8*(2n) and 8*(2n+1) relative to PER_CPU_BASE_OFFSET
99    shl $1, %rdi
100    // Retrieve this CPUs initial kernel stack
101    // Note: the stack is unusable until we switch cr3 below
102    mov BCD_PER_CPU_BASE_OFFSET(%rsi, %rdi, 8), %rsp
103    add $PAGE_SIZE, %rsp
104
105    // Retrieve this CPUs initial thread
106    // Note: the stack is unusable until we switch cr3 below
107    add $1, %rdi
108    mov BCD_PER_CPU_BASE_OFFSET(%rsi, %rdi, 8), %rdx
109
110    // Retrieve the new PML4 address before our data page becomes unreachable
111    mov BCD_PHYS_KERNEL_PML4_OFFSET(%esi), %ecx
112    // Similarly for the CPU waiting mask
113    mov BCD_CPU_WAITING_OFFSET(%esi), %rdi
114
115    // Switch out of the copied code page and into the kernel's
116    // version of it
117    movabs $.Lhighaddr, %rbx
118    jmp  *%rbx
119.Lhighaddr:
120    // Switch to the kernel's PML4
121    mov %rcx, %cr3
122    // As of this point, %esi is invalid
123
124    // Reload the GDT with one based off of non-identity mapping
125    lgdt _temp_gdtr(%rip)
126
127    // Zero our data segments
128    xor %eax, %eax
129    mov %eax, %ds
130    mov %eax, %es
131    mov %eax, %fs
132    mov %eax, %gs
133    mov %eax, %ss
134
135    // Load the IDT
136    call load_startup_idt
137
138    mov %rdx, %rsi
139    // Do an indirect call to keep this position independent
140    // x86_secondary_entry(CPU ready counter, thread)
141    movabs $x86_secondary_entry, %rbx
142    call *%rbx
143
144// If x86_secondary_entry returns, hang.
1450:
146    hlt
147    jmp 0b
148
149// Get the cpu into 64-bit mode with interrupts disabled and no TSS.  This must
150// only be called on the bootstrap processor.
151FUNCTION_LABEL(_x86_suspend_wakeup)
152    // Retrieve the new PML4 address before our data page becomes unreachable
153    mov BCD_PHYS_KERNEL_PML4_OFFSET(%esi), %ecx
154
155    // Stash register pointer so that we can read it after we change
156    // address spaces
157    mov RED_REGISTERS_OFFSET(%esi), %rdi
158
159    // Switch out of the copied code page and into the kernel's
160    // version of it
161    movabs $.Lwakeup_highaddr, %rbx
162    jmp  *%rbx
163.Lwakeup_highaddr:
164    // Switch to the kernel's PML4
165    mov %rcx, %cr3
166    // As of this point, %esi is invalid
167
168    // Reload the GDT with one based off of non-identity mapping
169    lgdt _temp_gdtr(%rip)
170
171    // Zero our data segments
172    xor %eax, %eax
173    mov %eax, %ds
174    mov %eax, %es
175    mov %eax, %fs
176    mov %eax, %gs
177    mov %eax, %ss
178
179    // Restore %gs.base to &bp_percpu.  We need to do this before
180    // returning to C code, since the C code might use safe-stack
181    // and/or stack-protector.
182    // TODO(teisenbe):  There is a small peformance gain that could be made here
183    // by switching from wrmsr to wrgsbase, if wrgsbase is supported.  Currently
184    // this is omitted for simplicity.
185    lea bp_percpu(%rip), %rax
186    mov %rax, %rdx
187    shr $32, %rdx
188    mov $X86_MSR_IA32_GS_BASE, %ecx
189    wrmsr
190
191    // Restore the stack pointer first, so we can use the stack right now.
192    mov 120(%rdi), %rsp
193
194    // Load the IDT.  Note this uses the stack and clobbers %rax, but not %rdi.
195    call load_startup_idt
196
197    mov 8(%rdi), %rsi
198    mov 16(%rdi), %rbp
199    mov 24(%rdi), %rbx
200    mov 32(%rdi), %rdx
201    mov 40(%rdi), %rcx
202    mov 48(%rdi), %rax
203    mov 56(%rdi), %r8
204    mov 64(%rdi), %r9
205    mov 72(%rdi), %r10
206    mov 80(%rdi), %r11
207    mov 88(%rdi), %r12
208    mov 96(%rdi), %r13
209    mov 104(%rdi), %r14
210    mov 112(%rdi), %r15
211
212    // Note: %rdi is not restored, but it is a caller-save register anyway.
213    // If we want to restore %rdi, we could potentially use the stack here
214    // to do something like "push 128(%rdi); mov (%rdi), %rdi; ret".
215
216    // Restore RIP
217    jmp *128(%rdi)
218
219DATA(x86_bootstrap16_end)
220    nop
221