1/*-
2 * Copyright (c) KATO Takenori, 1997, 1998.
3 *
4 * All rights reserved.  Unpublished rights reserved under the copyright
5 * laws of Japan.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer as
13 *    the first lines of this file unmodified.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: stable/11/sys/amd64/amd64/initcpu.c 362383 2020-06-19 13:48:23Z kib $");
32
33#include "opt_cpu.h"
34
35#include <sys/param.h>
36#include <sys/kernel.h>
37#include <sys/pcpu.h>
38#include <sys/systm.h>
39#include <sys/sysctl.h>
40
41#include <machine/cputypes.h>
42#include <machine/md_var.h>
43#include <machine/specialreg.h>
44
45#include <vm/vm.h>
46#include <vm/pmap.h>
47
48static int	hw_instruction_sse;
49SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
50    &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU");
51static int	lower_sharedpage_init;
52int		hw_lower_amd64_sharedpage;
53SYSCTL_INT(_hw, OID_AUTO, lower_amd64_sharedpage, CTLFLAG_RDTUN,
54    &hw_lower_amd64_sharedpage, 0,
55   "Lower sharedpage to work around Ryzen issue with executing code near the top of user memory");
56/*
57 * -1: automatic (default)
58 *  0: keep enable CLFLUSH
59 *  1: force disable CLFLUSH
60 */
61static int	hw_clflush_disable = -1;
62
63static void
64init_amd(void)
65{
66	uint64_t msr;
67
68	/*
69	 * Work around Erratum 721 for Family 10h and 12h processors.
70	 * These processors may incorrectly update the stack pointer
71	 * after a long series of push and/or near-call instructions,
72	 * or a long series of pop and/or near-return instructions.
73	 *
74	 * http://support.amd.com/us/Processor_TechDocs/41322_10h_Rev_Gd.pdf
75	 * http://support.amd.com/us/Processor_TechDocs/44739_12h_Rev_Gd.pdf
76	 *
77	 * Hypervisors do not provide access to the errata MSR,
78	 * causing #GP exception on attempt to apply the errata.  The
79	 * MSR write shall be done on host and persist globally
80	 * anyway, so do not try to do it when under virtualization.
81	 */
82	switch (CPUID_TO_FAMILY(cpu_id)) {
83	case 0x10:
84	case 0x12:
85		if ((cpu_feature2 & CPUID2_HV) == 0)
86			wrmsr(0xc0011029, rdmsr(0xc0011029) | 1);
87		break;
88	}
89
90	/*
91	 * BIOS may fail to set InitApicIdCpuIdLo to 1 as it should per BKDG.
92	 * So, do it here or otherwise some tools could be confused by
93	 * Initial Local APIC ID reported with CPUID Function 1 in EBX.
94	 */
95	if (CPUID_TO_FAMILY(cpu_id) == 0x10) {
96		if ((cpu_feature2 & CPUID2_HV) == 0) {
97			msr = rdmsr(MSR_NB_CFG1);
98			msr |= (uint64_t)1 << 54;
99			wrmsr(MSR_NB_CFG1, msr);
100		}
101	}
102
103	/*
104	 * BIOS may configure Family 10h processors to convert WC+ cache type
105	 * to CD.  That can hurt performance of guest VMs using nested paging.
106	 * The relevant MSR bit is not documented in the BKDG,
107	 * the fix is borrowed from Linux.
108	 */
109	if (CPUID_TO_FAMILY(cpu_id) == 0x10) {
110		if ((cpu_feature2 & CPUID2_HV) == 0) {
111			msr = rdmsr(0xc001102a);
112			msr &= ~((uint64_t)1 << 24);
113			wrmsr(0xc001102a, msr);
114		}
115	}
116
117	/*
118	 * Work around Erratum 793: Specific Combination of Writes to Write
119	 * Combined Memory Types and Locked Instructions May Cause Core Hang.
120	 * See Revision Guide for AMD Family 16h Models 00h-0Fh Processors,
121	 * revision 3.04 or later, publication 51810.
122	 */
123	if (CPUID_TO_FAMILY(cpu_id) == 0x16 && CPUID_TO_MODEL(cpu_id) <= 0xf) {
124		if ((cpu_feature2 & CPUID2_HV) == 0) {
125			msr = rdmsr(MSR_LS_CFG);
126			msr |= (uint64_t)1 << 15;
127			wrmsr(MSR_LS_CFG, msr);
128		}
129	}
130
131	/* Ryzen erratas. */
132	if (CPUID_TO_FAMILY(cpu_id) == 0x17 && CPUID_TO_MODEL(cpu_id) == 0x1 &&
133	    (cpu_feature2 & CPUID2_HV) == 0) {
134		/* 1021 */
135		msr = rdmsr(0xc0011029);
136		msr |= 0x2000;
137		wrmsr(0xc0011029, msr);
138
139		/* 1033 */
140		msr = rdmsr(MSR_LS_CFG);
141		msr |= 0x10;
142		wrmsr(MSR_LS_CFG, msr);
143
144		/* 1049 */
145		msr = rdmsr(0xc0011028);
146		msr |= 0x10;
147		wrmsr(0xc0011028, msr);
148
149		/* 1095 */
150		msr = rdmsr(MSR_LS_CFG);
151		msr |= 0x200000000000000;
152		wrmsr(MSR_LS_CFG, msr);
153	}
154
155	/*
156	 * Work around a problem on Ryzen that is triggered by executing
157	 * code near the top of user memory, in our case the signal
158	 * trampoline code in the shared page on amd64.
159	 *
160	 * This function is executed once for the BSP before tunables take
161	 * effect so the value determined here can be overridden by the
162	 * tunable.  This function is then executed again for each AP and
163	 * also on resume.  Set a flag the first time so that value set by
164	 * the tunable is not overwritten.
165	 *
166	 * The stepping and/or microcode versions should be checked after
167	 * this issue is fixed by AMD so that we don't use this mode if not
168	 * needed.
169	 */
170	if (lower_sharedpage_init == 0) {
171		lower_sharedpage_init = 1;
172		if (CPUID_TO_FAMILY(cpu_id) == 0x17) {
173			hw_lower_amd64_sharedpage = 1;
174		}
175	}
176}
177
178/*
179 * Initialize special VIA features
180 */
181static void
182init_via(void)
183{
184	u_int regs[4], val;
185
186	/*
187	 * Check extended CPUID for PadLock features.
188	 *
189	 * http://www.via.com.tw/en/downloads/whitepapers/initiatives/padlock/programming_guide.pdf
190	 */
191	do_cpuid(0xc0000000, regs);
192	if (regs[0] >= 0xc0000001) {
193		do_cpuid(0xc0000001, regs);
194		val = regs[3];
195	} else
196		return;
197
198	/* Enable RNG if present. */
199	if ((val & VIA_CPUID_HAS_RNG) != 0) {
200		via_feature_rng = VIA_HAS_RNG;
201		wrmsr(0x110B, rdmsr(0x110B) | VIA_CPUID_DO_RNG);
202	}
203
204	/* Enable PadLock if present. */
205	if ((val & VIA_CPUID_HAS_ACE) != 0)
206		via_feature_xcrypt |= VIA_HAS_AES;
207	if ((val & VIA_CPUID_HAS_ACE2) != 0)
208		via_feature_xcrypt |= VIA_HAS_AESCTR;
209	if ((val & VIA_CPUID_HAS_PHE) != 0)
210		via_feature_xcrypt |= VIA_HAS_SHA;
211	if ((val & VIA_CPUID_HAS_PMM) != 0)
212		via_feature_xcrypt |= VIA_HAS_MM;
213	if (via_feature_xcrypt != 0)
214		wrmsr(0x1107, rdmsr(0x1107) | (1 << 28));
215}
216
217/*
218 * Initialize CPU control registers
219 */
220void
221initializecpu(void)
222{
223	uint64_t msr;
224	uint32_t cr4;
225
226	cr4 = rcr4();
227	if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
228		cr4 |= CR4_FXSR | CR4_XMM;
229		cpu_fxsr = hw_instruction_sse = 1;
230	}
231	if (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE)
232		cr4 |= CR4_FSGSBASE;
233
234	/*
235	 * If SMEP is present, we only need to flush RSB (by default)
236	 * on context switches, to prevent cross-process ret2spec
237	 * attacks.  Do it automatically if ibrs_disable is set, to
238	 * complete the mitigation.
239	 *
240	 * Postpone enabling the SMEP on the boot CPU until the page
241	 * tables are switched from the boot loader identity mapping
242	 * to the kernel tables.  The boot loader enables the U bit in
243	 * its tables.
244	 */
245	if (IS_BSP()) {
246		if (cpu_stdext_feature & CPUID_STDEXT_SMEP &&
247		    !TUNABLE_INT_FETCH(
248		    "machdep.mitigations.cpu_flush_rsb_ctxsw",
249		    &cpu_flush_rsb_ctxsw) &&
250		    hw_ibrs_disable)
251			cpu_flush_rsb_ctxsw = 1;
252	} else {
253		if (cpu_stdext_feature & CPUID_STDEXT_SMEP)
254			cr4 |= CR4_SMEP;
255	}
256	load_cr4(cr4);
257	if ((amd_feature & AMDID_NX) != 0) {
258		msr = rdmsr(MSR_EFER) | EFER_NXE;
259		wrmsr(MSR_EFER, msr);
260		pg_nx = PG_NX;
261	}
262	hw_ibrs_recalculate(false);
263	hw_ssb_recalculate(false);
264	x86_rngds_mitg_recalculate(false);
265	switch (cpu_vendor_id) {
266	case CPU_VENDOR_AMD:
267		init_amd();
268		break;
269	case CPU_VENDOR_CENTAUR:
270		init_via();
271		break;
272	}
273
274	if ((amd_feature & AMDID_RDTSCP) != 0 ||
275	    (cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0)
276		wrmsr(MSR_TSC_AUX, PCPU_GET(cpuid));
277}
278
279void
280initializecpucache(void)
281{
282
283	/*
284	 * CPUID with %eax = 1, %ebx returns
285	 * Bits 15-8: CLFLUSH line size
286	 * 	(Value * 8 = cache line size in bytes)
287	 */
288	if ((cpu_feature & CPUID_CLFSH) != 0)
289		cpu_clflush_line_size = ((cpu_procinfo >> 8) & 0xff) * 8;
290	/*
291	 * XXXKIB: (temporary) hack to work around traps generated
292	 * when CLFLUSHing APIC register window under virtualization
293	 * environments.  These environments tend to disable the
294	 * CPUID_SS feature even though the native CPU supports it.
295	 */
296	TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable);
297	if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) {
298		cpu_feature &= ~CPUID_CLFSH;
299		cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
300	}
301
302	/*
303	 * The kernel's use of CLFLUSH{,OPT} can be disabled manually
304	 * by setting the hw.clflush_disable tunable.
305	 */
306	if (hw_clflush_disable == 1) {
307		cpu_feature &= ~CPUID_CLFSH;
308		cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
309	}
310}
311