1/*
2 * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230)
3 *
4 * SPDX-License-Identifier: GPL-2.0-only
5 */
6
7#pragma once
8
9#include <config.h>
10#include <hardware.h>
11#include <arch/model/statedata.h>
12#include <arch/machine/cpu_registers.h>
13#include <arch/model/smp.h>
14#include <arch/machine.h>
15
16static inline cr3_t makeCR3(paddr_t addr, word_t pcid)
17{
18    return cr3_new(addr, config_set(CONFIG_SUPPORT_PCID) ? pcid : 0);
19}
20
21/* Address space control */
22static inline cr3_t getCurrentCR3(void)
23{
24#ifdef CONFIG_KERNEL_SKIM_WINDOW
25    /* If we're running in the kernel to call this function, then by definition
26     * this must be the current cr3 */
27    return cr3_new(kpptr_to_paddr(x64KSKernelPML4), 0);
28#else
29    return MODE_NODE_STATE(x64KSCurrentCR3);
30#endif
31}
32
33static inline cr3_t getCurrentUserCR3(void)
34{
35#ifdef CONFIG_KERNEL_SKIM_WINDOW
36    // Construct a cr3_t from the state word, dropping any command information
37    // if needed
38    word_t cr3_word = MODE_NODE_STATE(x64KSCurrentUserCR3);
39    cr3_t cr3_ret;
40    if (config_set(CONFIG_SUPPORT_PCID)) {
41        cr3_word &= ~BIT(63);
42    }
43    cr3_ret.words[0] = cr3_word;
44    return cr3_ret;
45#else
46    return getCurrentCR3();
47#endif
48}
49
50static inline paddr_t getCurrentUserVSpaceRoot(void)
51{
52    return cr3_get_pml4_base_address(getCurrentUserCR3());
53}
54
55static inline void setCurrentCR3(cr3_t cr3, word_t preserve_translation)
56{
57#ifdef CONFIG_KERNEL_SKIM_WINDOW
58    /* we should only ever be enabling the kernel window, as the bulk of the
59     * cr3 loading when using the SKIM window will happen on kernel entry/exit
60     * in assembly stubs */
61    assert(cr3_get_pml4_base_address(cr3) == kpptr_to_paddr(x64KSKernelPML4));
62#else
63    MODE_NODE_STATE(x64KSCurrentCR3) = cr3;
64#endif
65    word_t cr3_word = cr3.words[0];
66    if (config_set(CONFIG_SUPPORT_PCID)) {
67        if (preserve_translation) {
68            cr3_word |= BIT(63);
69        }
70    } else {
71        assert(cr3_get_pcid(cr3) == 0);
72    }
73    write_cr3(cr3_word);
74}
75
76/* there is no option for preservation translation when setting the user cr3
77   as it is assumed you want it preserved as you are doing a context switch.
78   If translation needs to be flushed then setCurrentCR3 should be used instead */
79static inline void setCurrentUserCR3(cr3_t cr3)
80{
81#ifdef CONFIG_KERNEL_SKIM_WINDOW
82    // To make the restore stubs more efficient we will set the preserve_translation
83    // command in the state. If we look at the cr3 later on we need to remember to
84    // remove that bit
85    word_t cr3_word = cr3.words[0];
86    if (config_set(CONFIG_SUPPORT_PCID)) {
87        cr3_word |= BIT(63);
88    }
89    MODE_NODE_STATE(x64KSCurrentUserCR3) = cr3_word;
90#else
91    setCurrentCR3(cr3, 1);
92#endif
93}
94
95static inline void setCurrentVSpaceRoot(paddr_t addr, word_t pcid)
96{
97    setCurrentCR3(makeCR3(addr, pcid), 1);
98}
99
100static inline void setCurrentUserVSpaceRoot(paddr_t addr, word_t pcid)
101{
102#ifdef CONFIG_KERNEL_SKIM_WINDOW
103    setCurrentUserCR3(makeCR3(addr, pcid));
104#else
105    setCurrentVSpaceRoot(addr, pcid);
106#endif
107}
108
109/* GDT installation */
110void x64_install_gdt(gdt_idt_ptr_t *gdt_idt_ptr);
111
112/* IDT installation */
113void x64_install_idt(gdt_idt_ptr_t *gdt_idt_ptr);
114
115/* LDT installation */
116void x64_install_ldt(uint32_t ldt_sel);
117
118/* TSS installation */
119void x64_install_tss(uint32_t tss_sel);
120
121void handle_fastsyscall(void);
122
123void init_syscall_msrs(void);
124
125/* Get current stack pointer */
126static inline void *get_current_esp(void)
127{
128    word_t stack;
129    void *result;
130    asm volatile("movq %[stack_address], %[result]" : [result] "=r"(result) : [stack_address] "r"(&stack));
131    return result;
132}
133
134typedef struct invpcid_desc {
135    uint64_t    asid;
136    uint64_t    addr;
137} invpcid_desc_t;
138
139#define INVPCID_TYPE_ADDR           0
140#define INVPCID_TYPE_SINGLE         1
141#define INVPCID_TYPE_ALL_GLOBAL     2   /* also invalidate global */
142#define INVPCID_TYPE_ALL            3
143
144static inline void invalidateLocalPCID(word_t type, void *vaddr, asid_t asid)
145{
146    if (config_set(CONFIG_SUPPORT_PCID)) {
147        invpcid_desc_t desc;
148        desc.asid = asid & 0xfff;
149        desc.addr = (uint64_t)vaddr;
150        asm volatile("invpcid %1, %0" :: "r"(type), "m"(desc));
151    } else {
152        switch (type) {
153        case INVPCID_TYPE_ADDR:
154            asm volatile("invlpg (%[vptr])" :: [vptr] "r"(vaddr));
155            break;
156        case INVPCID_TYPE_SINGLE:
157        case INVPCID_TYPE_ALL:
158            /* reload CR3 to perform a full flush */
159            setCurrentCR3(getCurrentCR3(), 0);
160            break;
161        case INVPCID_TYPE_ALL_GLOBAL: {
162            /* clear and reset the global bit to flush global mappings */
163            unsigned long cr4 = read_cr4();
164            write_cr4(cr4 & ~BIT(7));
165            write_cr4(cr4);
166        }
167        break;
168        }
169    }
170}
171
172static inline void invalidateLocalTranslationSingle(vptr_t vptr)
173{
174    /* As this may be used to invalidate global mappings by the kernel,
175     * and as its only used in boot code, we can just invalidate
176     * absolutely everything form the tlb */
177    invalidateLocalPCID(INVPCID_TYPE_ALL_GLOBAL, (void *)0, 0);
178}
179
180static inline void invalidateLocalTranslationSingleASID(vptr_t vptr, asid_t asid)
181{
182    invalidateLocalPCID(INVPCID_TYPE_ADDR, (void *)vptr, asid);
183}
184
185static inline void invalidateLocalTranslationAll(void)
186{
187    invalidateLocalPCID(INVPCID_TYPE_ALL_GLOBAL, (void *)0, 0);
188}
189
190static inline void invalidateLocalPageStructureCacheASID(paddr_t root, asid_t asid)
191{
192    if (config_set(CONFIG_SUPPORT_PCID)) {
193        /* store our previous cr3 */
194        cr3_t cr3 = getCurrentCR3();
195        /* we load the new vspace root, invalidating translation for it
196         * and then switch back to the old CR3. We do this in a single
197         * asm block to ensure we only rely on the code being mapped in
198         * the temporary address space and not the stack. We preserve the
199         * translation of the old cr3 */
200        asm volatile(
201            "mov %[new_cr3], %%cr3\n"
202            "mov %[old_cr3], %%cr3\n"
203            ::
204            [new_cr3] "r"(makeCR3(root, asid).words[0]),
205            [old_cr3] "r"(cr3.words[0] | BIT(63))
206        );
207    } else {
208        /* just invalidate the page structure cache as per normal, by
209         * doing a dummy invalidation of a tlb entry */
210        asm volatile("invlpg (%[vptr])" :: [vptr] "r"(0));
211    }
212}
213
214static inline void swapgs(void)
215{
216    asm volatile("swapgs");
217}
218
219static inline rdmsr_safe_result_t x86_rdmsr_safe(const uint32_t reg)
220{
221    uint32_t low;
222    uint32_t high;
223    word_t returnto;
224    word_t temp;
225    rdmsr_safe_result_t result;
226    asm volatile(
227        "movabs $1f, %[temp] \n"
228        "movq %[temp], (%[returnto_addr]) \n\
229         rdmsr \n\
230         1: \n\
231         movq (%[returnto_addr]), %[returnto] \n\
232         movq $0, (%[returnto_addr])"
233        : [returnto] "=&r"(returnto),
234        [temp] "=&r"(temp),
235        [high] "=&d"(high),
236        [low] "=&a"(low)
237        : [returnto_addr] "r"(&ARCH_NODE_STATE(x86KSGPExceptReturnTo)),
238        [reg] "c"(reg)
239        : "memory"
240    );
241    result.success = returnto != 0;
242    result.value = ((uint64_t)high << 32) | (uint64_t)low;
243    return result;
244}
245
246#ifdef CONFIG_FSGSBASE_INST
247
248static inline void x86_write_fs_base_impl(word_t base)
249{
250    asm volatile("wrfsbase %0"::"r"(base));
251}
252
253static inline word_t x86_read_fs_base_impl(void)
254{
255    word_t base = 0;
256    asm volatile("rdfsbase %0":"=r"(base));
257    return base;
258}
259
260static inline void x86_save_fsgs_base(tcb_t *thread, cpu_id_t cpu)
261{
262    /*
263     * Store the FS and GS base registers.
264     *
265     * These should only be accessed inside the kernel, between the
266     * entry and exit calls to swapgs if used.
267     */
268#ifdef CONFIG_VTX
269    if (thread_state_ptr_get_tsType(&thread->tcbState) == ThreadState_RunningVM) {
270        /*
271         * Never save the FS/GS of a thread running in a VM as it will
272         * be garbage values.
273         */
274        return;
275    }
276#endif
277    word_t cur_fs_base = x86_read_fs_base(cpu);
278    setRegister(thread, FS_BASE, cur_fs_base);
279    word_t cur_gs_base = x86_read_gs_base(cpu);
280    setRegister(thread, GS_BASE, cur_gs_base);
281}
282
283#endif
284
285#if defined(ENABLE_SMP_SUPPORT)
286
287/*
288 * Under x86_64 with SMP support, the GS.Base register and the
289 * IA32_KERNEL_GS_BASE MSR are swapped so the actual user-level copy of
290 * GS is stored in IA32_KERNEL_GS_BASE between the call to swapgs in the
291 * kernel entry and the call to swapgs in the user restore.
292 */
293
294static inline void x86_write_gs_base_impl(word_t base)
295{
296    x86_wrmsr(IA32_KERNEL_GS_BASE_MSR, base);
297}
298
299static inline word_t x86_read_gs_base_impl(void)
300{
301    return x86_rdmsr(IA32_KERNEL_GS_BASE_MSR);
302}
303
304#elif defined(CONFIG_FSGSBASE_INST)
305
306static inline void x86_write_gs_base_impl(word_t base)
307{
308    asm volatile("wrgsbase %0"::"r"(base));
309}
310
311static inline word_t x86_read_gs_base_impl(void)
312{
313    word_t base = 0;
314    asm volatile("rdgsbase %0":"=r"(base));
315    return base;
316}
317
318#elif defined(CONFIG_FSGSBASE_MSR)
319
320static inline void x86_write_gs_base_impl(word_t base)
321{
322    x86_wrmsr(IA32_GS_BASE_MSR, base);
323}
324
325static inline word_t x86_read_gs_base_impl(void)
326{
327    return x86_rdmsr(IA32_GS_BASE_MSR);
328}
329
330#endif
331
332static inline void x86_set_tls_segment_base(word_t tls_base)
333{
334    x86_write_fs_base(tls_base, SMP_TERNARY(getCurrentCPUIndex(), 0));
335}
336
337