1/*- 2 * Copyright (c) KATO Takenori, 1997, 1998. 3 * 4 * All rights reserved. Unpublished rights reserved under the copyright 5 * laws of Japan. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer as 13 * the first lines of this file unmodified. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: stable/11/sys/amd64/amd64/initcpu.c 362383 2020-06-19 13:48:23Z kib $"); 32 33#include "opt_cpu.h" 34 35#include <sys/param.h> 36#include <sys/kernel.h> 37#include <sys/pcpu.h> 38#include <sys/systm.h> 39#include <sys/sysctl.h> 40 41#include <machine/cputypes.h> 42#include <machine/md_var.h> 43#include <machine/specialreg.h> 44 45#include <vm/vm.h> 46#include <vm/pmap.h> 47 48static int hw_instruction_sse; 49SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD, 50 &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU"); 51static int lower_sharedpage_init; 52int hw_lower_amd64_sharedpage; 53SYSCTL_INT(_hw, OID_AUTO, lower_amd64_sharedpage, CTLFLAG_RDTUN, 54 &hw_lower_amd64_sharedpage, 0, 55 "Lower sharedpage to work around Ryzen issue with executing code near the top of user memory"); 56/* 57 * -1: automatic (default) 58 * 0: keep enable CLFLUSH 59 * 1: force disable CLFLUSH 60 */ 61static int hw_clflush_disable = -1; 62 63static void 64init_amd(void) 65{ 66 uint64_t msr; 67 68 /* 69 * Work around Erratum 721 for Family 10h and 12h processors. 70 * These processors may incorrectly update the stack pointer 71 * after a long series of push and/or near-call instructions, 72 * or a long series of pop and/or near-return instructions. 73 * 74 * http://support.amd.com/us/Processor_TechDocs/41322_10h_Rev_Gd.pdf 75 * http://support.amd.com/us/Processor_TechDocs/44739_12h_Rev_Gd.pdf 76 * 77 * Hypervisors do not provide access to the errata MSR, 78 * causing #GP exception on attempt to apply the errata. The 79 * MSR write shall be done on host and persist globally 80 * anyway, so do not try to do it when under virtualization. 81 */ 82 switch (CPUID_TO_FAMILY(cpu_id)) { 83 case 0x10: 84 case 0x12: 85 if ((cpu_feature2 & CPUID2_HV) == 0) 86 wrmsr(0xc0011029, rdmsr(0xc0011029) | 1); 87 break; 88 } 89 90 /* 91 * BIOS may fail to set InitApicIdCpuIdLo to 1 as it should per BKDG. 92 * So, do it here or otherwise some tools could be confused by 93 * Initial Local APIC ID reported with CPUID Function 1 in EBX. 94 */ 95 if (CPUID_TO_FAMILY(cpu_id) == 0x10) { 96 if ((cpu_feature2 & CPUID2_HV) == 0) { 97 msr = rdmsr(MSR_NB_CFG1); 98 msr |= (uint64_t)1 << 54; 99 wrmsr(MSR_NB_CFG1, msr); 100 } 101 } 102 103 /* 104 * BIOS may configure Family 10h processors to convert WC+ cache type 105 * to CD. That can hurt performance of guest VMs using nested paging. 106 * The relevant MSR bit is not documented in the BKDG, 107 * the fix is borrowed from Linux. 108 */ 109 if (CPUID_TO_FAMILY(cpu_id) == 0x10) { 110 if ((cpu_feature2 & CPUID2_HV) == 0) { 111 msr = rdmsr(0xc001102a); 112 msr &= ~((uint64_t)1 << 24); 113 wrmsr(0xc001102a, msr); 114 } 115 } 116 117 /* 118 * Work around Erratum 793: Specific Combination of Writes to Write 119 * Combined Memory Types and Locked Instructions May Cause Core Hang. 120 * See Revision Guide for AMD Family 16h Models 00h-0Fh Processors, 121 * revision 3.04 or later, publication 51810. 122 */ 123 if (CPUID_TO_FAMILY(cpu_id) == 0x16 && CPUID_TO_MODEL(cpu_id) <= 0xf) { 124 if ((cpu_feature2 & CPUID2_HV) == 0) { 125 msr = rdmsr(MSR_LS_CFG); 126 msr |= (uint64_t)1 << 15; 127 wrmsr(MSR_LS_CFG, msr); 128 } 129 } 130 131 /* Ryzen erratas. */ 132 if (CPUID_TO_FAMILY(cpu_id) == 0x17 && CPUID_TO_MODEL(cpu_id) == 0x1 && 133 (cpu_feature2 & CPUID2_HV) == 0) { 134 /* 1021 */ 135 msr = rdmsr(0xc0011029); 136 msr |= 0x2000; 137 wrmsr(0xc0011029, msr); 138 139 /* 1033 */ 140 msr = rdmsr(MSR_LS_CFG); 141 msr |= 0x10; 142 wrmsr(MSR_LS_CFG, msr); 143 144 /* 1049 */ 145 msr = rdmsr(0xc0011028); 146 msr |= 0x10; 147 wrmsr(0xc0011028, msr); 148 149 /* 1095 */ 150 msr = rdmsr(MSR_LS_CFG); 151 msr |= 0x200000000000000; 152 wrmsr(MSR_LS_CFG, msr); 153 } 154 155 /* 156 * Work around a problem on Ryzen that is triggered by executing 157 * code near the top of user memory, in our case the signal 158 * trampoline code in the shared page on amd64. 159 * 160 * This function is executed once for the BSP before tunables take 161 * effect so the value determined here can be overridden by the 162 * tunable. This function is then executed again for each AP and 163 * also on resume. Set a flag the first time so that value set by 164 * the tunable is not overwritten. 165 * 166 * The stepping and/or microcode versions should be checked after 167 * this issue is fixed by AMD so that we don't use this mode if not 168 * needed. 169 */ 170 if (lower_sharedpage_init == 0) { 171 lower_sharedpage_init = 1; 172 if (CPUID_TO_FAMILY(cpu_id) == 0x17) { 173 hw_lower_amd64_sharedpage = 1; 174 } 175 } 176} 177 178/* 179 * Initialize special VIA features 180 */ 181static void 182init_via(void) 183{ 184 u_int regs[4], val; 185 186 /* 187 * Check extended CPUID for PadLock features. 188 * 189 * http://www.via.com.tw/en/downloads/whitepapers/initiatives/padlock/programming_guide.pdf 190 */ 191 do_cpuid(0xc0000000, regs); 192 if (regs[0] >= 0xc0000001) { 193 do_cpuid(0xc0000001, regs); 194 val = regs[3]; 195 } else 196 return; 197 198 /* Enable RNG if present. */ 199 if ((val & VIA_CPUID_HAS_RNG) != 0) { 200 via_feature_rng = VIA_HAS_RNG; 201 wrmsr(0x110B, rdmsr(0x110B) | VIA_CPUID_DO_RNG); 202 } 203 204 /* Enable PadLock if present. */ 205 if ((val & VIA_CPUID_HAS_ACE) != 0) 206 via_feature_xcrypt |= VIA_HAS_AES; 207 if ((val & VIA_CPUID_HAS_ACE2) != 0) 208 via_feature_xcrypt |= VIA_HAS_AESCTR; 209 if ((val & VIA_CPUID_HAS_PHE) != 0) 210 via_feature_xcrypt |= VIA_HAS_SHA; 211 if ((val & VIA_CPUID_HAS_PMM) != 0) 212 via_feature_xcrypt |= VIA_HAS_MM; 213 if (via_feature_xcrypt != 0) 214 wrmsr(0x1107, rdmsr(0x1107) | (1 << 28)); 215} 216 217/* 218 * Initialize CPU control registers 219 */ 220void 221initializecpu(void) 222{ 223 uint64_t msr; 224 uint32_t cr4; 225 226 cr4 = rcr4(); 227 if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) { 228 cr4 |= CR4_FXSR | CR4_XMM; 229 cpu_fxsr = hw_instruction_sse = 1; 230 } 231 if (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) 232 cr4 |= CR4_FSGSBASE; 233 234 /* 235 * If SMEP is present, we only need to flush RSB (by default) 236 * on context switches, to prevent cross-process ret2spec 237 * attacks. Do it automatically if ibrs_disable is set, to 238 * complete the mitigation. 239 * 240 * Postpone enabling the SMEP on the boot CPU until the page 241 * tables are switched from the boot loader identity mapping 242 * to the kernel tables. The boot loader enables the U bit in 243 * its tables. 244 */ 245 if (IS_BSP()) { 246 if (cpu_stdext_feature & CPUID_STDEXT_SMEP && 247 !TUNABLE_INT_FETCH( 248 "machdep.mitigations.cpu_flush_rsb_ctxsw", 249 &cpu_flush_rsb_ctxsw) && 250 hw_ibrs_disable) 251 cpu_flush_rsb_ctxsw = 1; 252 } else { 253 if (cpu_stdext_feature & CPUID_STDEXT_SMEP) 254 cr4 |= CR4_SMEP; 255 } 256 load_cr4(cr4); 257 if ((amd_feature & AMDID_NX) != 0) { 258 msr = rdmsr(MSR_EFER) | EFER_NXE; 259 wrmsr(MSR_EFER, msr); 260 pg_nx = PG_NX; 261 } 262 hw_ibrs_recalculate(false); 263 hw_ssb_recalculate(false); 264 x86_rngds_mitg_recalculate(false); 265 switch (cpu_vendor_id) { 266 case CPU_VENDOR_AMD: 267 init_amd(); 268 break; 269 case CPU_VENDOR_CENTAUR: 270 init_via(); 271 break; 272 } 273 274 if ((amd_feature & AMDID_RDTSCP) != 0 || 275 (cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0) 276 wrmsr(MSR_TSC_AUX, PCPU_GET(cpuid)); 277} 278 279void 280initializecpucache(void) 281{ 282 283 /* 284 * CPUID with %eax = 1, %ebx returns 285 * Bits 15-8: CLFLUSH line size 286 * (Value * 8 = cache line size in bytes) 287 */ 288 if ((cpu_feature & CPUID_CLFSH) != 0) 289 cpu_clflush_line_size = ((cpu_procinfo >> 8) & 0xff) * 8; 290 /* 291 * XXXKIB: (temporary) hack to work around traps generated 292 * when CLFLUSHing APIC register window under virtualization 293 * environments. These environments tend to disable the 294 * CPUID_SS feature even though the native CPU supports it. 295 */ 296 TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable); 297 if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) { 298 cpu_feature &= ~CPUID_CLFSH; 299 cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT; 300 } 301 302 /* 303 * The kernel's use of CLFLUSH{,OPT} can be disabled manually 304 * by setting the hw.clflush_disable tunable. 305 */ 306 if (hw_clflush_disable == 1) { 307 cpu_feature &= ~CPUID_CLFSH; 308 cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT; 309 } 310} 311