1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) KATO Takenori, 1997, 1998.
5 *
6 * All rights reserved.  Unpublished rights reserved under the copyright
7 * laws of Japan.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer as
15 *    the first lines of this file unmodified.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33#include "opt_cpu.h"
34
35#include <sys/param.h>
36#include <sys/kernel.h>
37#include <sys/pcpu.h>
38#include <sys/systm.h>
39#include <sys/sysctl.h>
40
41#include <machine/cputypes.h>
42#include <machine/md_var.h>
43#include <machine/psl.h>
44#include <machine/specialreg.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48
49static int	hw_instruction_sse;
50SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
51    &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU");
52static int	lower_sharedpage_init;
53int		hw_lower_amd64_sharedpage;
54SYSCTL_INT(_hw, OID_AUTO, lower_amd64_sharedpage, CTLFLAG_RDTUN,
55    &hw_lower_amd64_sharedpage, 0,
56   "Lower sharedpage to work around Ryzen issue with executing code near the top of user memory");
57/*
58 * -1: automatic (default)
59 *  0: keep enable CLFLUSH
60 *  1: force disable CLFLUSH
61 */
62static int	hw_clflush_disable = -1;
63
64static void
65init_amd(void)
66{
67	uint64_t msr;
68
69	/*
70	 * C1E renders the local APIC timer dead, so we disable it by
71	 * reading the Interrupt Pending Message register and clearing
72	 * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27).
73	 *
74	 * Reference:
75	 *   "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors"
76	 *   #32559 revision 3.00+
77	 *
78	 * Detect the presence of C1E capability mostly on latest
79	 * dual-cores (or future) k8 family.  Affected models range is
80	 * taken from Linux sources.
81	 */
82	if ((CPUID_TO_FAMILY(cpu_id) == 0xf ||
83	    CPUID_TO_FAMILY(cpu_id) == 0x10) && (cpu_feature2 & CPUID2_HV) == 0)
84		cpu_amdc1e_bug = 1;
85
86	/*
87	 * Work around Erratum 721 for Family 10h and 12h processors.
88	 * These processors may incorrectly update the stack pointer
89	 * after a long series of push and/or near-call instructions,
90	 * or a long series of pop and/or near-return instructions.
91	 *
92	 * http://support.amd.com/us/Processor_TechDocs/41322_10h_Rev_Gd.pdf
93	 * http://support.amd.com/us/Processor_TechDocs/44739_12h_Rev_Gd.pdf
94	 *
95	 * Hypervisors do not provide access to the errata MSR,
96	 * causing #GP exception on attempt to apply the errata.  The
97	 * MSR write shall be done on host and persist globally
98	 * anyway, so do not try to do it when under virtualization.
99	 */
100	switch (CPUID_TO_FAMILY(cpu_id)) {
101	case 0x10:
102	case 0x12:
103		if ((cpu_feature2 & CPUID2_HV) == 0)
104			wrmsr(MSR_DE_CFG, rdmsr(MSR_DE_CFG) |
105			    DE_CFG_10H_12H_STACK_POINTER_JUMP_FIX_BIT);
106		break;
107	}
108
109	/*
110	 * BIOS may fail to set InitApicIdCpuIdLo to 1 as it should per BKDG.
111	 * So, do it here or otherwise some tools could be confused by
112	 * Initial Local APIC ID reported with CPUID Function 1 in EBX.
113	 */
114	if (CPUID_TO_FAMILY(cpu_id) == 0x10) {
115		if ((cpu_feature2 & CPUID2_HV) == 0) {
116			msr = rdmsr(MSR_NB_CFG1);
117			msr |= (uint64_t)1 << 54;
118			wrmsr(MSR_NB_CFG1, msr);
119		}
120	}
121
122	/*
123	 * BIOS may configure Family 10h processors to convert WC+ cache type
124	 * to CD.  That can hurt performance of guest VMs using nested paging.
125	 * The relevant MSR bit is not documented in the BKDG,
126	 * the fix is borrowed from Linux.
127	 */
128	if (CPUID_TO_FAMILY(cpu_id) == 0x10) {
129		if ((cpu_feature2 & CPUID2_HV) == 0) {
130			msr = rdmsr(0xc001102a);
131			msr &= ~((uint64_t)1 << 24);
132			wrmsr(0xc001102a, msr);
133		}
134	}
135
136	/*
137	 * Work around Erratum 793: Specific Combination of Writes to Write
138	 * Combined Memory Types and Locked Instructions May Cause Core Hang.
139	 * See Revision Guide for AMD Family 16h Models 00h-0Fh Processors,
140	 * revision 3.04 or later, publication 51810.
141	 */
142	if (CPUID_TO_FAMILY(cpu_id) == 0x16 && CPUID_TO_MODEL(cpu_id) <= 0xf) {
143		if ((cpu_feature2 & CPUID2_HV) == 0) {
144			msr = rdmsr(MSR_LS_CFG);
145			msr |= (uint64_t)1 << 15;
146			wrmsr(MSR_LS_CFG, msr);
147		}
148	}
149
150	/* Ryzen erratas. */
151	if (CPUID_TO_FAMILY(cpu_id) == 0x17 && CPUID_TO_MODEL(cpu_id) == 0x1 &&
152	    (cpu_feature2 & CPUID2_HV) == 0) {
153		/* 1021 */
154		msr = rdmsr(MSR_DE_CFG);
155		msr |= DE_CFG_ZEN_LOAD_STALE_DATA_FIX_BIT;
156		wrmsr(MSR_DE_CFG, msr);
157
158		/* 1033 */
159		msr = rdmsr(MSR_LS_CFG);
160		msr |= 0x10;
161		wrmsr(MSR_LS_CFG, msr);
162
163		/* 1049 */
164		msr = rdmsr(0xc0011028);
165		msr |= 0x10;
166		wrmsr(0xc0011028, msr);
167
168		/* 1095 */
169		msr = rdmsr(MSR_LS_CFG);
170		msr |= 0x200000000000000;
171		wrmsr(MSR_LS_CFG, msr);
172	}
173
174	/*
175	 * Work around a problem on Ryzen that is triggered by executing
176	 * code near the top of user memory, in our case the signal
177	 * trampoline code in the shared page on amd64.
178	 *
179	 * This function is executed once for the BSP before tunables take
180	 * effect so the value determined here can be overridden by the
181	 * tunable.  This function is then executed again for each AP and
182	 * also on resume.  Set a flag the first time so that value set by
183	 * the tunable is not overwritten.
184	 *
185	 * The stepping and/or microcode versions should be checked after
186	 * this issue is fixed by AMD so that we don't use this mode if not
187	 * needed.
188	 */
189	if (lower_sharedpage_init == 0) {
190		lower_sharedpage_init = 1;
191		if (CPUID_TO_FAMILY(cpu_id) == 0x17 ||
192		    CPUID_TO_FAMILY(cpu_id) == 0x18) {
193			hw_lower_amd64_sharedpage = 1;
194		}
195	}
196
197	/* Zenbleed.  See the comments in 'cpu_machdep.c'. */
198	zenbleed_check_and_apply(false);
199}
200
201/*
202 * Initialize special VIA features
203 */
204static void
205init_via(void)
206{
207	u_int regs[4], val;
208
209	/*
210	 * Check extended CPUID for PadLock features.
211	 *
212	 * http://www.via.com.tw/en/downloads/whitepapers/initiatives/padlock/programming_guide.pdf
213	 */
214	do_cpuid(0xc0000000, regs);
215	if (regs[0] >= 0xc0000001) {
216		do_cpuid(0xc0000001, regs);
217		val = regs[3];
218	} else
219		return;
220
221	/* Enable RNG if present. */
222	if ((val & VIA_CPUID_HAS_RNG) != 0) {
223		via_feature_rng = VIA_HAS_RNG;
224		wrmsr(0x110B, rdmsr(0x110B) | VIA_CPUID_DO_RNG);
225	}
226
227	/* Enable PadLock if present. */
228	if ((val & VIA_CPUID_HAS_ACE) != 0)
229		via_feature_xcrypt |= VIA_HAS_AES;
230	if ((val & VIA_CPUID_HAS_ACE2) != 0)
231		via_feature_xcrypt |= VIA_HAS_AESCTR;
232	if ((val & VIA_CPUID_HAS_PHE) != 0)
233		via_feature_xcrypt |= VIA_HAS_SHA;
234	if ((val & VIA_CPUID_HAS_PMM) != 0)
235		via_feature_xcrypt |= VIA_HAS_MM;
236	if (via_feature_xcrypt != 0)
237		wrmsr(0x1107, rdmsr(0x1107) | (1 << 28));
238}
239
240/*
241 * The value for the TSC_AUX MSR and rdtscp/rdpid on the invoking CPU.
242 *
243 * Caller should prevent CPU migration.
244 */
245u_int
246cpu_auxmsr(void)
247{
248	KASSERT((read_rflags() & PSL_I) == 0, ("context switch possible"));
249	return (PCPU_GET(cpuid));
250}
251
252void
253cpu_init_small_core(void)
254{
255	u_int r[4];
256
257	if (cpu_high < 0x1a)
258		return;
259
260	cpuid_count(0x1a, 0, r);
261	if ((r[0] & CPUID_HYBRID_CORE_MASK) != CPUID_HYBRID_SMALL_CORE)
262		return;
263
264	PCPU_SET(small_core, 1);
265	if (pmap_pcid_enabled && invpcid_works &&
266	    pmap_pcid_invlpg_workaround_uena) {
267		PCPU_SET(pcid_invlpg_workaround, 1);
268		pmap_pcid_invlpg_workaround = 1;
269	}
270}
271
272/*
273 * Initialize CPU control registers
274 */
275void
276initializecpu(void)
277{
278	uint64_t msr;
279	uint32_t cr4;
280
281	TSENTER();
282	cr4 = rcr4();
283	if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
284		cr4 |= CR4_FXSR | CR4_XMM;
285		hw_instruction_sse = 1;
286	}
287	if (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE)
288		cr4 |= CR4_FSGSBASE;
289
290	if (cpu_stdext_feature2 & CPUID_STDEXT2_PKU)
291		cr4 |= CR4_PKE;
292
293	/*
294	 * If SMEP is present, we only need to flush RSB (by default)
295	 * on context switches, to prevent cross-process ret2spec
296	 * attacks.  Do it automatically if ibrs_disable is set, to
297	 * complete the mitigation.
298	 *
299	 * Postpone enabling the SMEP on the boot CPU until the page
300	 * tables are switched from the boot loader identity mapping
301	 * to the kernel tables.  The boot loader enables the U bit in
302	 * its tables.
303	 */
304	if (IS_BSP()) {
305		if (cpu_stdext_feature & CPUID_STDEXT_SMEP &&
306		    !TUNABLE_INT_FETCH(
307		    "machdep.mitigations.cpu_flush_rsb_ctxsw",
308		    &cpu_flush_rsb_ctxsw) &&
309		    hw_ibrs_disable)
310			cpu_flush_rsb_ctxsw = 1;
311	} else {
312		if (cpu_stdext_feature & CPUID_STDEXT_SMEP)
313			cr4 |= CR4_SMEP;
314		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
315			cr4 |= CR4_SMAP;
316	}
317	TSENTER2("load_cr4");
318	load_cr4(cr4);
319	TSEXIT2("load_cr4");
320	/* Reload cpu ext features to reflect cr4 changes */
321	if (IS_BSP() && cold)
322		identify_cpu_ext_features();
323	if (IS_BSP() && (amd_feature & AMDID_NX) != 0) {
324		msr = rdmsr(MSR_EFER) | EFER_NXE;
325		wrmsr(MSR_EFER, msr);
326		pg_nx = PG_NX;
327	}
328	hw_ibrs_recalculate(false);
329	hw_ssb_recalculate(false);
330	amd64_syscall_ret_flush_l1d_recalc();
331	x86_rngds_mitg_recalculate(false);
332	switch (cpu_vendor_id) {
333	case CPU_VENDOR_AMD:
334	case CPU_VENDOR_HYGON:
335		init_amd();
336		break;
337	case CPU_VENDOR_CENTAUR:
338		init_via();
339		break;
340	}
341
342	if ((amd_feature & AMDID_RDTSCP) != 0 ||
343	    (cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0)
344		wrmsr(MSR_TSC_AUX, cpu_auxmsr());
345
346	if (!IS_BSP())
347		cpu_init_small_core();
348	TSEXIT();
349}
350
351void
352initializecpucache(void)
353{
354
355	/*
356	 * CPUID with %eax = 1, %ebx returns
357	 * Bits 15-8: CLFLUSH line size
358	 * 	(Value * 8 = cache line size in bytes)
359	 */
360	if ((cpu_feature & CPUID_CLFSH) != 0)
361		cpu_clflush_line_size = ((cpu_procinfo >> 8) & 0xff) * 8;
362	/*
363	 * XXXKIB: (temporary) hack to work around traps generated
364	 * when CLFLUSHing APIC register window under virtualization
365	 * environments.  These environments tend to disable the
366	 * CPUID_SS feature even though the native CPU supports it.
367	 */
368	TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable);
369	if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) {
370		cpu_feature &= ~CPUID_CLFSH;
371		cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
372	}
373
374	/*
375	 * The kernel's use of CLFLUSH{,OPT} can be disabled manually
376	 * by setting the hw.clflush_disable tunable.
377	 */
378	if (hw_clflush_disable == 1) {
379		cpu_feature &= ~CPUID_CLFSH;
380		cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
381	}
382}
383