1/* 2 * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230) 3 * 4 * SPDX-License-Identifier: GPL-2.0-only 5 */ 6 7#pragma once 8 9#include <config.h> 10#include <hardware.h> 11#include <arch/model/statedata.h> 12#include <arch/machine/cpu_registers.h> 13#include <arch/model/smp.h> 14#include <arch/machine.h> 15 16static inline cr3_t makeCR3(paddr_t addr, word_t pcid) 17{ 18 return cr3_new(addr, config_set(CONFIG_SUPPORT_PCID) ? pcid : 0); 19} 20 21/* Address space control */ 22static inline cr3_t getCurrentCR3(void) 23{ 24#ifdef CONFIG_KERNEL_SKIM_WINDOW 25 /* If we're running in the kernel to call this function, then by definition 26 * this must be the current cr3 */ 27 return cr3_new(kpptr_to_paddr(x64KSKernelPML4), 0); 28#else 29 return MODE_NODE_STATE(x64KSCurrentCR3); 30#endif 31} 32 33static inline cr3_t getCurrentUserCR3(void) 34{ 35#ifdef CONFIG_KERNEL_SKIM_WINDOW 36 // Construct a cr3_t from the state word, dropping any command information 37 // if needed 38 word_t cr3_word = MODE_NODE_STATE(x64KSCurrentUserCR3); 39 cr3_t cr3_ret; 40 if (config_set(CONFIG_SUPPORT_PCID)) { 41 cr3_word &= ~BIT(63); 42 } 43 cr3_ret.words[0] = cr3_word; 44 return cr3_ret; 45#else 46 return getCurrentCR3(); 47#endif 48} 49 50static inline paddr_t getCurrentUserVSpaceRoot(void) 51{ 52 return cr3_get_pml4_base_address(getCurrentUserCR3()); 53} 54 55static inline void setCurrentCR3(cr3_t cr3, word_t preserve_translation) 56{ 57#ifdef CONFIG_KERNEL_SKIM_WINDOW 58 /* we should only ever be enabling the kernel window, as the bulk of the 59 * cr3 loading when using the SKIM window will happen on kernel entry/exit 60 * in assembly stubs */ 61 assert(cr3_get_pml4_base_address(cr3) == kpptr_to_paddr(x64KSKernelPML4)); 62#else 63 MODE_NODE_STATE(x64KSCurrentCR3) = cr3; 64#endif 65 word_t cr3_word = cr3.words[0]; 66 if (config_set(CONFIG_SUPPORT_PCID)) { 67 if (preserve_translation) { 68 cr3_word |= BIT(63); 69 } 70 } else { 71 assert(cr3_get_pcid(cr3) == 0); 72 } 73 write_cr3(cr3_word); 74} 75 76/* there is no option for preservation translation when setting the user cr3 77 as it is assumed you want it preserved as you are doing a context switch. 78 If translation needs to be flushed then setCurrentCR3 should be used instead */ 79static inline void setCurrentUserCR3(cr3_t cr3) 80{ 81#ifdef CONFIG_KERNEL_SKIM_WINDOW 82 // To make the restore stubs more efficient we will set the preserve_translation 83 // command in the state. If we look at the cr3 later on we need to remember to 84 // remove that bit 85 word_t cr3_word = cr3.words[0]; 86 if (config_set(CONFIG_SUPPORT_PCID)) { 87 cr3_word |= BIT(63); 88 } 89 MODE_NODE_STATE(x64KSCurrentUserCR3) = cr3_word; 90#else 91 setCurrentCR3(cr3, 1); 92#endif 93} 94 95static inline void setCurrentVSpaceRoot(paddr_t addr, word_t pcid) 96{ 97 setCurrentCR3(makeCR3(addr, pcid), 1); 98} 99 100static inline void setCurrentUserVSpaceRoot(paddr_t addr, word_t pcid) 101{ 102#ifdef CONFIG_KERNEL_SKIM_WINDOW 103 setCurrentUserCR3(makeCR3(addr, pcid)); 104#else 105 setCurrentVSpaceRoot(addr, pcid); 106#endif 107} 108 109/* GDT installation */ 110void x64_install_gdt(gdt_idt_ptr_t *gdt_idt_ptr); 111 112/* IDT installation */ 113void x64_install_idt(gdt_idt_ptr_t *gdt_idt_ptr); 114 115/* LDT installation */ 116void x64_install_ldt(uint32_t ldt_sel); 117 118/* TSS installation */ 119void x64_install_tss(uint32_t tss_sel); 120 121void handle_fastsyscall(void); 122 123void init_syscall_msrs(void); 124 125/* Get current stack pointer */ 126static inline void *get_current_esp(void) 127{ 128 word_t stack; 129 void *result; 130 asm volatile("movq %[stack_address], %[result]" : [result] "=r"(result) : [stack_address] "r"(&stack)); 131 return result; 132} 133 134typedef struct invpcid_desc { 135 uint64_t asid; 136 uint64_t addr; 137} invpcid_desc_t; 138 139#define INVPCID_TYPE_ADDR 0 140#define INVPCID_TYPE_SINGLE 1 141#define INVPCID_TYPE_ALL_GLOBAL 2 /* also invalidate global */ 142#define INVPCID_TYPE_ALL 3 143 144static inline void invalidateLocalPCID(word_t type, void *vaddr, asid_t asid) 145{ 146 if (config_set(CONFIG_SUPPORT_PCID)) { 147 invpcid_desc_t desc; 148 desc.asid = asid & 0xfff; 149 desc.addr = (uint64_t)vaddr; 150 asm volatile("invpcid %1, %0" :: "r"(type), "m"(desc)); 151 } else { 152 switch (type) { 153 case INVPCID_TYPE_ADDR: 154 asm volatile("invlpg (%[vptr])" :: [vptr] "r"(vaddr)); 155 break; 156 case INVPCID_TYPE_SINGLE: 157 case INVPCID_TYPE_ALL: 158 /* reload CR3 to perform a full flush */ 159 setCurrentCR3(getCurrentCR3(), 0); 160 break; 161 case INVPCID_TYPE_ALL_GLOBAL: { 162 /* clear and reset the global bit to flush global mappings */ 163 unsigned long cr4 = read_cr4(); 164 write_cr4(cr4 & ~BIT(7)); 165 write_cr4(cr4); 166 } 167 break; 168 } 169 } 170} 171 172static inline void invalidateLocalTranslationSingle(vptr_t vptr) 173{ 174 /* As this may be used to invalidate global mappings by the kernel, 175 * and as its only used in boot code, we can just invalidate 176 * absolutely everything form the tlb */ 177 invalidateLocalPCID(INVPCID_TYPE_ALL_GLOBAL, (void *)0, 0); 178} 179 180static inline void invalidateLocalTranslationSingleASID(vptr_t vptr, asid_t asid) 181{ 182 invalidateLocalPCID(INVPCID_TYPE_ADDR, (void *)vptr, asid); 183} 184 185static inline void invalidateLocalTranslationAll(void) 186{ 187 invalidateLocalPCID(INVPCID_TYPE_ALL_GLOBAL, (void *)0, 0); 188} 189 190static inline void invalidateLocalPageStructureCacheASID(paddr_t root, asid_t asid) 191{ 192 if (config_set(CONFIG_SUPPORT_PCID)) { 193 /* store our previous cr3 */ 194 cr3_t cr3 = getCurrentCR3(); 195 /* we load the new vspace root, invalidating translation for it 196 * and then switch back to the old CR3. We do this in a single 197 * asm block to ensure we only rely on the code being mapped in 198 * the temporary address space and not the stack. We preserve the 199 * translation of the old cr3 */ 200 asm volatile( 201 "mov %[new_cr3], %%cr3\n" 202 "mov %[old_cr3], %%cr3\n" 203 :: 204 [new_cr3] "r"(makeCR3(root, asid).words[0]), 205 [old_cr3] "r"(cr3.words[0] | BIT(63)) 206 ); 207 } else { 208 /* just invalidate the page structure cache as per normal, by 209 * doing a dummy invalidation of a tlb entry */ 210 asm volatile("invlpg (%[vptr])" :: [vptr] "r"(0)); 211 } 212} 213 214static inline void swapgs(void) 215{ 216 asm volatile("swapgs"); 217} 218 219static inline rdmsr_safe_result_t x86_rdmsr_safe(const uint32_t reg) 220{ 221 uint32_t low; 222 uint32_t high; 223 word_t returnto; 224 word_t temp; 225 rdmsr_safe_result_t result; 226 asm volatile( 227 "movabs $1f, %[temp] \n" 228 "movq %[temp], (%[returnto_addr]) \n\ 229 rdmsr \n\ 230 1: \n\ 231 movq (%[returnto_addr]), %[returnto] \n\ 232 movq $0, (%[returnto_addr])" 233 : [returnto] "=&r"(returnto), 234 [temp] "=&r"(temp), 235 [high] "=&d"(high), 236 [low] "=&a"(low) 237 : [returnto_addr] "r"(&ARCH_NODE_STATE(x86KSGPExceptReturnTo)), 238 [reg] "c"(reg) 239 : "memory" 240 ); 241 result.success = returnto != 0; 242 result.value = ((uint64_t)high << 32) | (uint64_t)low; 243 return result; 244} 245 246#ifdef CONFIG_FSGSBASE_INST 247 248static inline void x86_write_fs_base_impl(word_t base) 249{ 250 asm volatile("wrfsbase %0"::"r"(base)); 251} 252 253static inline word_t x86_read_fs_base_impl(void) 254{ 255 word_t base = 0; 256 asm volatile("rdfsbase %0":"=r"(base)); 257 return base; 258} 259 260static inline void x86_save_fsgs_base(tcb_t *thread, cpu_id_t cpu) 261{ 262 /* 263 * Store the FS and GS base registers. 264 * 265 * These should only be accessed inside the kernel, between the 266 * entry and exit calls to swapgs if used. 267 */ 268#ifdef CONFIG_VTX 269 if (thread_state_ptr_get_tsType(&thread->tcbState) == ThreadState_RunningVM) { 270 /* 271 * Never save the FS/GS of a thread running in a VM as it will 272 * be garbage values. 273 */ 274 return; 275 } 276#endif 277 word_t cur_fs_base = x86_read_fs_base(cpu); 278 setRegister(thread, FS_BASE, cur_fs_base); 279 word_t cur_gs_base = x86_read_gs_base(cpu); 280 setRegister(thread, GS_BASE, cur_gs_base); 281} 282 283#endif 284 285#if defined(ENABLE_SMP_SUPPORT) 286 287/* 288 * Under x86_64 with SMP support, the GS.Base register and the 289 * IA32_KERNEL_GS_BASE MSR are swapped so the actual user-level copy of 290 * GS is stored in IA32_KERNEL_GS_BASE between the call to swapgs in the 291 * kernel entry and the call to swapgs in the user restore. 292 */ 293 294static inline void x86_write_gs_base_impl(word_t base) 295{ 296 x86_wrmsr(IA32_KERNEL_GS_BASE_MSR, base); 297} 298 299static inline word_t x86_read_gs_base_impl(void) 300{ 301 return x86_rdmsr(IA32_KERNEL_GS_BASE_MSR); 302} 303 304#elif defined(CONFIG_FSGSBASE_INST) 305 306static inline void x86_write_gs_base_impl(word_t base) 307{ 308 asm volatile("wrgsbase %0"::"r"(base)); 309} 310 311static inline word_t x86_read_gs_base_impl(void) 312{ 313 word_t base = 0; 314 asm volatile("rdgsbase %0":"=r"(base)); 315 return base; 316} 317 318#elif defined(CONFIG_FSGSBASE_MSR) 319 320static inline void x86_write_gs_base_impl(word_t base) 321{ 322 x86_wrmsr(IA32_GS_BASE_MSR, base); 323} 324 325static inline word_t x86_read_gs_base_impl(void) 326{ 327 return x86_rdmsr(IA32_GS_BASE_MSR); 328} 329 330#endif 331 332static inline void x86_set_tls_segment_base(word_t tls_base) 333{ 334 x86_write_fs_base(tls_base, SMP_TERNARY(getCurrentCPUIndex(), 0)); 335} 336 337