1/*	$OpenBSD: identcpu.c,v 1.145 2024/06/24 21:22:14 bluhm Exp $	*/
2/*	$NetBSD: identcpu.c,v 1.1 2003/04/26 18:39:28 fvdl Exp $	*/
3
4/*
5 * Copyright (c) 2003 Wasabi Systems, Inc.
6 * All rights reserved.
7 *
8 * Written by Frank van der Linden for Wasabi Systems, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *      This product includes software developed for the NetBSD Project by
21 *      Wasabi Systems, Inc.
22 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
23 *    or promote products derived from this software without specific prior
24 *    written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/atomic.h>
42#include <sys/proc.h>
43#include <sys/sysctl.h>
44
45#include "vmm.h"
46#include "pvbus.h"
47
48#include <machine/cpu.h>
49#include <machine/cpufunc.h>
50
51#if NPVBUS > 0
52#include <dev/pv/pvvar.h>
53#endif
54
55void	replacesmap(void);
56void	replacemeltdown(void);
57uint64_t cpu_freq(struct cpu_info *);
58void	tsc_identify(struct cpu_info *);
59void	tsc_timecounter_init(struct cpu_info *, uint64_t);
60#if NVMM > 0
61void	cpu_check_vmm_cap(struct cpu_info *);
62#endif /* NVMM > 0 */
63
64/* sysctl wants this. */
65char cpu_model[48];
66int cpuspeed;
67
68int amd64_has_xcrypt;
69int amd64_pos_cbit;
70int has_rdrand;
71int has_rdseed;
72
73int
74cpu_amd64speed(int *freq)
75{
76	*freq = cpuspeed;
77	return (0);
78}
79
80#ifndef SMALL_KERNEL
81void	intelcore_update_sensor(void *);
82void	cpu_hz_update_sensor(void *);
83
84/*
85 * Temperature read on the CPU is relative to the maximum
86 * temperature supported by the CPU, Tj(Max).
87 * Refer to:
88 * 64-ia-32-architectures-software-developer-vol-3c-part-3-manual.pdf
89 * Section 35 and
90 * http://www.intel.com/content/dam/www/public/us/en/documents/
91 * white-papers/cpu-monitoring-dts-peci-paper.pdf
92 *
93 * The temperature on Intel CPUs can be between 70 and 105 degC, since
94 * Westmere we can read the TJmax from the die. For older CPUs we have
95 * to guess or use undocumented MSRs. Then we subtract the temperature
96 * portion of thermal status from max to get current temperature.
97 */
98void
99intelcore_update_sensor(void *args)
100{
101	struct cpu_info *ci = (struct cpu_info *) args;
102	u_int64_t msr;
103	int max = 100;
104
105	/* Only some Core family chips have MSR_TEMPERATURE_TARGET. */
106	if (ci->ci_model == 0x0e &&
107	    (rdmsr(MSR_TEMPERATURE_TARGET_UNDOCUMENTED) &
108	     MSR_TEMPERATURE_TARGET_LOW_BIT_UNDOCUMENTED))
109		max = 85;
110
111	/*
112	 * Newer CPUs can tell you what their max temperature is.
113	 * See: '64-ia-32-architectures-software-developer-
114	 * vol-3c-part-3-manual.pdf'
115	 */
116	if (ci->ci_model > 0x17 && ci->ci_model != 0x1c &&
117	    ci->ci_model != 0x26 && ci->ci_model != 0x27 &&
118	    ci->ci_model != 0x35 && ci->ci_model != 0x36)
119		max = MSR_TEMPERATURE_TARGET_TJMAX(
120		    rdmsr(MSR_TEMPERATURE_TARGET));
121
122	msr = rdmsr(MSR_THERM_STATUS);
123	if (msr & MSR_THERM_STATUS_VALID_BIT) {
124		ci->ci_sensor.value = max - MSR_THERM_STATUS_TEMP(msr);
125		/* micro degrees */
126		ci->ci_sensor.value *= 1000000;
127		/* kelvin */
128		ci->ci_sensor.value += 273150000;
129		ci->ci_sensor.flags &= ~SENSOR_FINVALID;
130	} else {
131		ci->ci_sensor.value = 0;
132		ci->ci_sensor.flags |= SENSOR_FINVALID;
133	}
134}
135
136/*
137 * Effective CPU frequency measurement
138 *
139 * Refer to:
140 *   64-ia-32-architectures-software-developer-vol-3b-part-2-manual.pdf
141 *   Section 14.2 and
142 *   OSRR for AMD Family 17h processors Section 2.1.2
143 * Round to 50Mhz which is the accuracy of this measurement.
144 */
145#define FREQ_50MHZ	(50ULL * 1000000ULL * 1000000ULL)
146void
147cpu_hz_update_sensor(void *args)
148{
149	extern uint64_t	 tsc_frequency;
150	struct cpu_info	*ci = args;
151	uint64_t	 mperf, aperf, mdelta, adelta, val;
152	unsigned long	 s;
153
154	sched_peg_curproc(ci);
155
156	s = intr_disable();
157	mperf = rdmsr(MSR_MPERF);
158	aperf = rdmsr(MSR_APERF);
159	intr_restore(s);
160
161	mdelta = mperf - ci->ci_hz_mperf;
162	adelta = aperf - ci->ci_hz_aperf;
163	ci->ci_hz_mperf = mperf;
164	ci->ci_hz_aperf = aperf;
165
166	if (mdelta > 0) {
167		val = (adelta * 1000000) / mdelta * tsc_frequency;
168		val = ((val + FREQ_50MHZ / 2) / FREQ_50MHZ) * FREQ_50MHZ;
169		ci->ci_hz_sensor.value = val;
170	}
171
172	atomic_clearbits_int(&curproc->p_flag, P_CPUPEG);
173}
174#endif
175
176void (*setperf_setup)(struct cpu_info *);
177
178void via_nano_setup(struct cpu_info *ci);
179
180void cpu_topology(struct cpu_info *ci);
181
182void
183via_nano_setup(struct cpu_info *ci)
184{
185	u_int32_t regs[4], val;
186	u_int64_t msreg;
187	int model = (ci->ci_signature >> 4) & 15;
188
189	if (model >= 9) {
190		CPUID(0xC0000000, regs[0], regs[1], regs[2], regs[3]);
191		val = regs[0];
192		if (val >= 0xC0000001) {
193			CPUID(0xC0000001, regs[0], regs[1], regs[2], regs[3]);
194			val = regs[3];
195		} else
196			val = 0;
197
198		if (val & (C3_CPUID_HAS_RNG | C3_CPUID_HAS_ACE))
199			printf("%s:", ci->ci_dev->dv_xname);
200
201		/* Enable RNG if present and disabled */
202		if (val & C3_CPUID_HAS_RNG) {
203			extern int viac3_rnd_present;
204
205			if (!(val & C3_CPUID_DO_RNG)) {
206				msreg = rdmsr(0x110B);
207				msreg |= 0x40;
208				wrmsr(0x110B, msreg);
209			}
210			viac3_rnd_present = 1;
211			printf(" RNG");
212		}
213
214		/* Enable AES engine if present and disabled */
215		if (val & C3_CPUID_HAS_ACE) {
216#ifdef CRYPTO
217			if (!(val & C3_CPUID_DO_ACE)) {
218				msreg = rdmsr(0x1107);
219				msreg |= (0x01 << 28);
220				wrmsr(0x1107, msreg);
221			}
222			amd64_has_xcrypt |= C3_HAS_AES;
223#endif /* CRYPTO */
224			printf(" AES");
225		}
226
227		/* Enable ACE2 engine if present and disabled */
228		if (val & C3_CPUID_HAS_ACE2) {
229#ifdef CRYPTO
230			if (!(val & C3_CPUID_DO_ACE2)) {
231				msreg = rdmsr(0x1107);
232				msreg |= (0x01 << 28);
233				wrmsr(0x1107, msreg);
234			}
235			amd64_has_xcrypt |= C3_HAS_AESCTR;
236#endif /* CRYPTO */
237			printf(" AES-CTR");
238		}
239
240		/* Enable SHA engine if present and disabled */
241		if (val & C3_CPUID_HAS_PHE) {
242#ifdef CRYPTO
243			if (!(val & C3_CPUID_DO_PHE)) {
244				msreg = rdmsr(0x1107);
245				msreg |= (0x01 << 28/**/);
246				wrmsr(0x1107, msreg);
247			}
248			amd64_has_xcrypt |= C3_HAS_SHA;
249#endif /* CRYPTO */
250			printf(" SHA1 SHA256");
251		}
252
253		/* Enable MM engine if present and disabled */
254		if (val & C3_CPUID_HAS_PMM) {
255#ifdef CRYPTO
256			if (!(val & C3_CPUID_DO_PMM)) {
257				msreg = rdmsr(0x1107);
258				msreg |= (0x01 << 28/**/);
259				wrmsr(0x1107, msreg);
260			}
261			amd64_has_xcrypt |= C3_HAS_MM;
262#endif /* CRYPTO */
263			printf(" RSA");
264		}
265
266		printf("\n");
267	}
268}
269
270#ifndef SMALL_KERNEL
271void via_update_sensor(void *args);
272void
273via_update_sensor(void *args)
274{
275	struct cpu_info *ci = (struct cpu_info *) args;
276	u_int64_t msr;
277
278	msr = rdmsr(MSR_CENT_TMTEMPERATURE);
279	ci->ci_sensor.value = (msr & 0xffffff);
280	/* micro degrees */
281	ci->ci_sensor.value *= 1000000;
282	ci->ci_sensor.value += 273150000;
283	ci->ci_sensor.flags &= ~SENSOR_FINVALID;
284}
285#endif
286
287uint64_t
288cpu_freq_ctr(struct cpu_info *ci, uint32_t cpu_perf_eax,
289    uint32_t cpu_perf_edx)
290{
291	uint64_t count, last_count, msr;
292
293	if ((ci->ci_flags & CPUF_CONST_TSC) == 0 ||
294	    (cpu_perf_eax & CPUIDEAX_VERID) <= 1 ||
295	    CPUIDEDX_NUM_FC(cpu_perf_edx) <= 1)
296		return (0);
297
298	msr = rdmsr(MSR_PERF_FIXED_CTR_CTRL);
299	if (msr & MSR_PERF_FIXED_CTR_FC(1, MSR_PERF_FIXED_CTR_FC_MASK)) {
300		/* some hypervisor is dicking us around */
301		return (0);
302	}
303
304	msr |= MSR_PERF_FIXED_CTR_FC(1, MSR_PERF_FIXED_CTR_FC_1);
305	wrmsr(MSR_PERF_FIXED_CTR_CTRL, msr);
306
307	msr = rdmsr(MSR_PERF_GLOBAL_CTRL) | MSR_PERF_GLOBAL_CTR1_EN;
308	wrmsr(MSR_PERF_GLOBAL_CTRL, msr);
309
310	last_count = rdmsr(MSR_PERF_FIXED_CTR1);
311	delay(100000);
312	count = rdmsr(MSR_PERF_FIXED_CTR1);
313
314	msr = rdmsr(MSR_PERF_FIXED_CTR_CTRL);
315	msr &= MSR_PERF_FIXED_CTR_FC(1, MSR_PERF_FIXED_CTR_FC_MASK);
316	wrmsr(MSR_PERF_FIXED_CTR_CTRL, msr);
317
318	msr = rdmsr(MSR_PERF_GLOBAL_CTRL);
319	msr &= ~MSR_PERF_GLOBAL_CTR1_EN;
320	wrmsr(MSR_PERF_GLOBAL_CTRL, msr);
321
322	return ((count - last_count) * 10);
323}
324
325uint64_t
326cpu_freq(struct cpu_info *ci)
327{
328	uint64_t last_count, count;
329
330	last_count = rdtsc();
331	delay(100000);
332	count = rdtsc();
333
334	return ((count - last_count) * 10);
335}
336
337/* print flags from one cpuid for cpu0 */
338static inline void
339pcpu0id3(const char *id, char reg1, uint32_t val1, const char *bits1,
340    char reg2, uint32_t val2, const char *bits2,
341    char reg3, uint32_t val3, const char *bits3)
342{
343	if (val1 || val2 || val3) {
344		printf("\ncpu0: cpuid %s", id);
345		if (val1)
346			printf(" e%cx=%b", reg1, val1, bits1);
347		if (val2)
348			printf(" e%cx=%b", reg2, val2, bits2);
349		if (val3)
350			printf(" e%cx=%b", reg3, val3, bits3);
351	}
352}
353
354/* print flags from one, 32-bit MSR for cpu0 */
355static inline void
356pmsr032(uint32_t msr, uint32_t value, const char *bits)
357{
358	if (value)
359		printf("\ncpu0: msr %x=%b", msr, value, bits);
360}
361
362static void
363pbitdiff(uint32_t value, uint32_t base_value, const char *bits)
364{
365	uint32_t minus;
366	if (value == base_value)
367		return;
368	minus = base_value & ~value;
369	value &= ~base_value;
370	if (minus)
371		printf("-%b", minus, bits);
372	if (value)
373		printf("+%b", value, bits);
374}
375
376static inline void
377pcpuid(struct cpu_info *ci, const char *id, char reg, uint32_t val,
378    uint32_t prev_val, const char *bits)
379{
380	if (CPU_IS_PRIMARY(ci))
381		pcpu0id3(id, reg, val, bits, 0, 0, NULL, 0, 0, NULL);
382	else if (val != prev_val) {
383		printf("\n%s: cpuid %s e%cx=", ci->ci_dev->dv_xname, id, reg);
384		pbitdiff(val, prev_val, bits);
385	}
386}
387
388static inline void
389pcpuid2(struct cpu_info *ci, const char *id,
390    char reg1, uint32_t val1, uint32_t prev_val1, const char *bits1,
391    char reg2, uint32_t val2, uint32_t prev_val2, const char *bits2)
392{
393	if (CPU_IS_PRIMARY(ci))
394		pcpu0id3(id, reg1, val1, bits1, reg2, val2, bits2, 0, 0,
395		    NULL);
396	else if (val1 != prev_val1 || val2 != prev_val2) {
397		printf("\n%s: cpuid %s", ci->ci_dev->dv_xname, id);
398		if (val1 != prev_val1) {
399			printf(" e%cx=", reg1);
400			pbitdiff(val1, prev_val1, bits1);
401		}
402		if (val2 != prev_val2) {
403			printf(" e%cx=", reg2);
404			pbitdiff(val2, prev_val2, bits2);
405		}
406	}
407}
408
409static inline void
410pcpuid3(struct cpu_info *ci, const char *id,
411    char reg1, uint32_t val1, uint32_t prev_val1, const char *bits1,
412    char reg2, uint32_t val2, uint32_t prev_val2, const char *bits2,
413    char reg3, uint32_t val3, uint32_t prev_val3, const char *bits3)
414{
415	if (CPU_IS_PRIMARY(ci))
416		pcpu0id3(id, reg1, val1, bits1, reg2, val2, bits2, reg3, val3,
417		    bits3);
418	else if (val1 != prev_val1 || val2 != prev_val2 || val3 != prev_val3) {
419		printf("\n%s: cpuid %s", ci->ci_dev->dv_xname, id);
420		if (val1 != prev_val1) {
421			printf(" e%cx=", reg1);
422			pbitdiff(val1, prev_val1, bits1);
423		}
424		if (val2 != prev_val2) {
425			printf(" e%cx=", reg2);
426			pbitdiff(val2, prev_val2, bits2);
427		}
428		if (val3 != prev_val3) {
429			printf(" e%cx=", reg3);
430			pbitdiff(val3, prev_val3, bits3);
431		}
432	}
433}
434
435static inline void
436pmsr32(struct cpu_info *ci, uint32_t msr, uint32_t value, uint32_t prev_value,
437    const char *bits)
438{
439	if (CPU_IS_PRIMARY(ci))
440		pmsr032(msr, value, bits);
441	else if (value != prev_value) {
442		printf("\n%s: msr %x=", ci->ci_dev->dv_xname, msr);
443		pbitdiff(value, prev_value, bits);
444	}
445}
446
447#ifdef MULTIPROCESSOR
448static uint32_t prevcpu_perf_eax;
449static uint32_t prevcpu_perf_edx;
450#endif
451
452static inline void
453print_perf_cpuid(struct cpu_info *ci, uint32_t cpu_perf_eax,
454    uint32_t cpu_perf_edx)
455{
456	uint32_t version;
457
458	if (CPU_IS_PRIMARY(ci)) {
459		version = cpu_perf_eax & CPUIDEAX_VERID;
460		if (version == 0)
461			return;
462	}
463#ifdef MULTIPROCESSOR
464	else {
465		/* if no difference on the bits we care about, say nothing */
466		if (((cpu_perf_eax ^ prevcpu_perf_eax) & 0x00ffffff) == 0 &&
467		    ((cpu_perf_edx ^ prevcpu_perf_edx) & 0x00001fff) == 0)
468			return;
469		version = cpu_perf_eax & CPUIDEAX_VERID;
470	}
471	prevcpu_perf_eax = cpu_perf_eax;
472	prevcpu_perf_edx = cpu_perf_edx;
473#endif
474
475	printf("\n%s: cpuid a vers=%d", ci->ci_dev->dv_xname, version);
476	if (version) {
477		printf(", gp=%d, gpwidth=%d", CPUIDEAX_NUM_GC(cpu_perf_eax),
478		    CPUIDEAX_BIT_GC(cpu_perf_eax));
479		if (version > 1) {
480			printf(", ff=%d, ffwidth=%d",
481			    CPUIDEDX_NUM_FC(cpu_perf_edx),
482			    CPUIDEDX_BIT_FC(cpu_perf_edx));
483		}
484	}
485}
486
487void
488identifycpu(struct cpu_info *ci)
489{
490	static uint32_t prevcpu_1_ecx, prevcpu_tpm_ecxflags, prevcpu_d_1_eax;
491	static uint32_t prevcpu_apmi_edx, prevcpu_arch_capa;
492	static struct cpu_info *prevci = &cpu_info_primary;
493#define CPUID_MEMBER(member)	ci->member, prevci->member
494	uint32_t cflushsz, curcpu_1_ecx, curcpu_apmi_edx = 0;
495	uint32_t curcpu_perf_eax = 0, curcpu_perf_edx = 0;
496	uint32_t curcpu_tpm_ecxflags = 0, curcpu_d_1_eax = 0;
497	uint64_t freq = 0;
498	u_int32_t dummy;
499	char mycpu_model[48];
500	char *brandstr_from, *brandstr_to;
501	int skipspace;
502
503	CPUID(0x80000000, ci->ci_pnfeatset, dummy, dummy, dummy);
504	CPUID(0x80000001, ci->ci_efeature_eax, dummy, ci->ci_efeature_ecx,
505	    ci->ci_feature_eflags);
506
507	if (CPU_IS_PRIMARY(ci)) {
508		ci->ci_signature = cpu_id;
509		ci->ci_feature_flags = cpu_feature & ~CPUID_NXE;
510		cflushsz = cpu_ebxfeature;
511		curcpu_1_ecx = cpu_ecxfeature;
512		ecpu_ecxfeature = ci->ci_efeature_ecx;
513	} else {
514		CPUID(1, ci->ci_signature, cflushsz, curcpu_1_ecx,
515		    ci->ci_feature_flags);
516		/* Let cpu_feature be the common bits */
517		cpu_feature &= ci->ci_feature_flags |
518		    (ci->ci_feature_eflags & CPUID_NXE);
519		cpu_ecxfeature &= curcpu_1_ecx;
520	}
521	/* cflush cacheline size is equal to bits 15-8 of ebx * 8 */
522	ci->ci_cflushsz = ((cflushsz >> 8) & 0xff) * 8;
523
524	CPUID(0x80000002, ci->ci_brand[0],
525	    ci->ci_brand[1], ci->ci_brand[2], ci->ci_brand[3]);
526	CPUID(0x80000003, ci->ci_brand[4],
527	    ci->ci_brand[5], ci->ci_brand[6], ci->ci_brand[7]);
528	CPUID(0x80000004, ci->ci_brand[8],
529	    ci->ci_brand[9], ci->ci_brand[10], ci->ci_brand[11]);
530	strlcpy(mycpu_model, (char *)ci->ci_brand, sizeof(mycpu_model));
531
532	/* Remove leading, trailing and duplicated spaces from mycpu_model */
533	brandstr_from = brandstr_to = mycpu_model;
534	skipspace = 1;
535	while (*brandstr_from != '\0') {
536		if (!skipspace || *brandstr_from != ' ') {
537			skipspace = 0;
538			*(brandstr_to++) = *brandstr_from;
539		}
540		if (*brandstr_from == ' ')
541			skipspace = 1;
542		brandstr_from++;
543	}
544	if (skipspace && brandstr_to > mycpu_model)
545		brandstr_to--;
546	*brandstr_to = '\0';
547
548	if (mycpu_model[0] == 0)
549		strlcpy(mycpu_model, "Opteron or Athlon 64",
550		    sizeof(mycpu_model));
551
552	/* If primary cpu, fill in the global cpu_model used by sysctl */
553	if (CPU_IS_PRIMARY(ci))
554		strlcpy(cpu_model, mycpu_model, sizeof(cpu_model));
555
556	ci->ci_family = (ci->ci_signature >> 8) & 0x0f;
557	ci->ci_model = (ci->ci_signature >> 4) & 0x0f;
558	if (ci->ci_family == 0x6 || ci->ci_family == 0xf) {
559		ci->ci_family += (ci->ci_signature >> 20) & 0xff;
560		ci->ci_model += ((ci->ci_signature >> 16) & 0x0f) << 4;
561	}
562
563#if NPVBUS > 0
564	/* Detect hypervisors early, attach the paravirtual bus later */
565	if (CPU_IS_PRIMARY(ci) && cpu_ecxfeature & CPUIDECX_HV)
566		pvbus_identify();
567#endif
568
569	if (ci->ci_pnfeatset >= 0x80000007)
570		CPUID(0x80000007, dummy, dummy, dummy, curcpu_apmi_edx);
571
572	if (ci->ci_feature_flags && ci->ci_feature_flags & CPUID_TSC) {
573		/* Has TSC, check if it's constant */
574		if (ci->ci_vendor == CPUV_INTEL) {
575			if ((ci->ci_family == 0x0f && ci->ci_model >= 0x03) ||
576			    (ci->ci_family == 0x06 && ci->ci_model >= 0x0e)) {
577				atomic_setbits_int(&ci->ci_flags, CPUF_CONST_TSC);
578			}
579		} else if (ci->ci_vendor == CPUV_VIA) {
580			/* VIA */
581			if (ci->ci_model >= 0x0f) {
582				atomic_setbits_int(&ci->ci_flags, CPUF_CONST_TSC);
583			}
584		} else if (ci->ci_vendor == CPUV_AMD) {
585			if (curcpu_apmi_edx & CPUIDEDX_ITSC) {
586				/* Invariant TSC indicates constant TSC on AMD */
587				atomic_setbits_int(&ci->ci_flags, CPUF_CONST_TSC);
588			}
589		}
590
591		/* Check if it's an invariant TSC */
592		if (curcpu_apmi_edx & CPUIDEDX_ITSC)
593			atomic_setbits_int(&ci->ci_flags, CPUF_INVAR_TSC);
594
595		tsc_identify(ci);
596	}
597
598	if (ci->ci_cpuid_level >= 0xa) {
599		CPUID(0xa, curcpu_perf_eax, dummy, dummy, curcpu_perf_edx);
600
601		freq = cpu_freq_ctr(ci, curcpu_perf_eax, curcpu_perf_edx);
602	}
603	if (freq == 0)
604		freq = cpu_freq(ci);
605
606	if (ci->ci_cpuid_level >= 0x07) {
607		/* "Structured Extended Feature Flags" */
608		CPUID_LEAF(0x7, 0, dummy, ci->ci_feature_sefflags_ebx,
609		    ci->ci_feature_sefflags_ecx, ci->ci_feature_sefflags_edx);
610		/* SEFF0ECX_OSPKE is set late on AP */
611		ci->ci_feature_sefflags_ecx &= ~SEFF0ECX_OSPKE;
612	}
613
614	printf("%s: %s", ci->ci_dev->dv_xname, mycpu_model);
615
616	if (freq != 0)
617		printf(", %llu.%02llu MHz", (freq + 4999) / 1000000,
618		    ((freq + 4999) / 10000) % 100);
619
620	if (CPU_IS_PRIMARY(ci)) {
621		cpuspeed = (freq + 4999) / 1000000;
622		cpu_cpuspeed = cpu_amd64speed;
623	}
624
625	printf(", %02x-%02x-%02x", ci->ci_family, ci->ci_model,
626	    ci->ci_signature & 0x0f);
627
628	if ((cpu_ecxfeature & CPUIDECX_HV) == 0) {
629		uint64_t level = 0;
630		uint32_t dummy;
631
632		if (ci->ci_vendor == CPUV_AMD) {
633			level = rdmsr(MSR_PATCH_LEVEL);
634		} else if (ci->ci_vendor == CPUV_INTEL) {
635			wrmsr(MSR_BIOS_SIGN, 0);
636			CPUID(1, dummy, dummy, dummy, dummy);
637			level = rdmsr(MSR_BIOS_SIGN) >> 32;
638		}
639		if (level != 0)
640			printf(", patch %08llx", level);
641	}
642
643	if (ci->ci_cpuid_level >= 0x06)
644		CPUID(0x06, ci->ci_feature_tpmflags, dummy,
645		    curcpu_tpm_ecxflags, dummy);
646	if (ci->ci_vendor == CPUV_AMD && ci->ci_family >= 0x12)
647		ci->ci_feature_tpmflags |= TPM_ARAT;
648
649	/* xsave subfeatures */
650	if (ci->ci_cpuid_level >= 0xd)
651		CPUID_LEAF(0xd, 1, curcpu_d_1_eax, dummy, dummy, dummy);
652
653	pcpuid2(ci, "1", 'd', CPUID_MEMBER(ci_feature_flags), CPUID_EDX_BITS,
654	    'c', curcpu_1_ecx, prevcpu_1_ecx, CPUID_ECX_BITS);
655	pcpuid2(ci, "6", 'a', CPUID_MEMBER(ci_feature_tpmflags), TPM_EAX_BITS,
656	    'c', curcpu_tpm_ecxflags, prevcpu_tpm_ecxflags, TPM_ECX_BITS);
657	pcpuid3(ci, "7.0",
658	    'b', CPUID_MEMBER(ci_feature_sefflags_ebx), SEFF0_EBX_BITS,
659	    'c', CPUID_MEMBER(ci_feature_sefflags_ecx), SEFF0_ECX_BITS,
660	    'd', CPUID_MEMBER(ci_feature_sefflags_edx), SEFF0_EDX_BITS);
661	print_perf_cpuid(ci, curcpu_perf_eax, curcpu_perf_edx);
662	pcpuid(ci, "d.1", 'a', curcpu_d_1_eax, prevcpu_d_1_eax, XSAVE_BITS);
663	pcpuid2(ci, "80000001",
664	    'd', CPUID_MEMBER(ci_feature_eflags), CPUIDE_EDX_BITS,
665	    'c', CPUID_MEMBER(ci_efeature_ecx), CPUIDE_ECX_BITS);
666	pcpuid(ci, "80000007", 'd', curcpu_apmi_edx, prevcpu_apmi_edx,
667	    CPUID_APMI_EDX_BITS);
668#ifdef MULTIPROCESSOR
669	prevcpu_1_ecx = curcpu_1_ecx;
670	prevcpu_tpm_ecxflags = curcpu_tpm_ecxflags;
671	prevcpu_d_1_eax = curcpu_d_1_eax;
672	prevcpu_apmi_edx = curcpu_apmi_edx;
673#endif
674
675	/* speculation control features */
676	if (ci->ci_vendor == CPUV_AMD) {
677		if (ci->ci_pnfeatset >= 0x80000008) {
678			CPUID(0x80000008, dummy, ci->ci_feature_amdspec_ebx,
679			    dummy, dummy);
680			pcpuid(ci, "80000008", 'b',
681			    CPUID_MEMBER(ci_feature_amdspec_ebx),
682			    CPUID_AMDSPEC_EBX_BITS);
683		}
684	} else if (ci->ci_vendor == CPUV_INTEL) {
685		if (ci->ci_feature_sefflags_edx & SEFF0EDX_ARCH_CAP) {
686			uint32_t msr = rdmsr(MSR_ARCH_CAPABILITIES);
687
688			pmsr32(ci, MSR_ARCH_CAPABILITIES, msr,
689			    prevcpu_arch_capa, ARCH_CAP_MSR_BITS);
690			prevcpu_arch_capa = msr;
691			if (!CPU_IS_PRIMARY(ci) && cpu_meltdown &&
692			    (msr & ARCH_CAP_RDCL_NO))
693				printf("\n%s: -MELTDOWN", ci->ci_dev->dv_xname);
694		}
695		if (cpu_meltdown && CPU_IS_PRIMARY(ci))
696			printf("\n%s: MELTDOWN", ci->ci_dev->dv_xname);
697	}
698
699	/* AMD secure memory encryption and encrypted virtualization features */
700	if (ci->ci_vendor == CPUV_AMD &&
701	    ci->ci_pnfeatset >= CPUID_AMD_SEV_CAP) {
702		CPUID(CPUID_AMD_SEV_CAP, ci->ci_feature_amdsev_eax,
703		    ci->ci_feature_amdsev_ebx, ci->ci_feature_amdsev_ecx,
704		    ci->ci_feature_amdsev_edx);
705		pcpuid3(ci, "8000001F",
706		    'a', CPUID_MEMBER(ci_feature_amdsev_eax),
707		    CPUID_AMDSEV_EAX_BITS,
708		    'c', CPUID_MEMBER(ci_feature_amdsev_ecx),
709		    CPUID_AMDSEV_ECX_BITS,
710		    'd', CPUID_MEMBER(ci_feature_amdsev_edx),
711		    CPUID_AMDSEV_EDX_BITS);
712		amd64_pos_cbit = (ci->ci_feature_amdsev_ebx & 0x3f);
713	}
714
715	printf("\n");
716
717	replacemeltdown();
718	x86_print_cacheinfo(ci);
719
720	if (CPU_IS_PRIMARY(ci)) {
721#ifndef SMALL_KERNEL
722		if (ci->ci_vendor == CPUV_AMD &&
723		    ci->ci_pnfeatset >= 0x80000007) {
724			if (curcpu_apmi_edx & 0x06) {
725				if ((ci->ci_signature & 0xF00) == 0xF00)
726					setperf_setup = k8_powernow_init;
727			}
728			if (ci->ci_family >= 0x10)
729				setperf_setup = k1x_init;
730		}
731
732		if (cpu_ecxfeature & CPUIDECX_EST)
733			setperf_setup = est_init;
734#endif
735
736		if (cpu_ecxfeature & CPUIDECX_RDRAND)
737			has_rdrand = 1;
738
739		if (ci->ci_feature_sefflags_ebx & SEFF0EBX_RDSEED)
740			has_rdseed = 1;
741
742		if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMAP)
743			replacesmap();
744	}
745
746#ifndef SMALL_KERNEL
747	if (CPU_IS_PRIMARY(ci) && (ci->ci_feature_tpmflags & TPM_SENSOR) &&
748	    ci->ci_vendor == CPUV_INTEL) {
749		ci->ci_sensor.type = SENSOR_TEMP;
750		sensor_task_register(ci, intelcore_update_sensor, 5);
751		sensor_attach(&ci->ci_sensordev, &ci->ci_sensor);
752	}
753#endif
754
755	if (CPU_IS_PRIMARY(ci) && ci->ci_vendor == CPUV_VIA) {
756		ci->cpu_setup = via_nano_setup;
757#ifndef SMALL_KERNEL
758		ci->ci_sensor.type = SENSOR_TEMP;
759		sensor_task_register(ci, via_update_sensor, 5);
760		sensor_attach(&ci->ci_sensordev, &ci->ci_sensor);
761#endif
762	}
763
764	tsc_timecounter_init(ci, freq);
765
766	cpu_topology(ci);
767#if NVMM > 0
768	cpu_check_vmm_cap(ci);
769#endif /* NVMM > 0 */
770
771	/* Check for effective frequency via MPERF, APERF */
772	if ((curcpu_tpm_ecxflags & TPM_EFFFREQ) && ci->ci_smt_id == 0) {
773#ifndef SMALL_KERNEL
774		ci->ci_hz_sensor.type = SENSOR_FREQ;
775		sensor_task_register(ci, cpu_hz_update_sensor, 1);
776		sensor_attach(&ci->ci_sensordev, &ci->ci_hz_sensor);
777#endif
778	}
779	prevci = ci;
780}
781
782#ifndef SMALL_KERNEL
783/*
784 * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
785 */
786static int
787log2(unsigned int i)
788{
789	int ret = 0;
790
791	while (i >>= 1)
792		ret++;
793
794	return (ret);
795}
796
797static int
798mask_width(u_int x)
799{
800	int bit;
801	int mask;
802	int powerof2;
803
804	powerof2 = ((x - 1) & x) == 0;
805	mask = (x << (1 - powerof2)) - 1;
806
807	/* fls */
808	if (mask == 0)
809		return (0);
810	for (bit = 1; mask != 1; bit++)
811		mask = (unsigned int)mask >> 1;
812
813	return (bit);
814}
815#endif
816
817/*
818 * Build up cpu topology for given cpu, must run on the core itself.
819 */
820void
821cpu_topology(struct cpu_info *ci)
822{
823#ifndef SMALL_KERNEL
824	u_int32_t eax, ebx, ecx, edx;
825	u_int32_t apicid, max_apicid = 0, max_coreid = 0;
826	u_int32_t smt_bits = 0, core_bits, pkg_bits = 0;
827	u_int32_t smt_mask = 0, core_mask, pkg_mask = 0;
828
829	/* We need at least apicid at CPUID 1 */
830	if (ci->ci_cpuid_level < 1)
831		goto no_topology;
832
833	/* Initial apicid */
834	CPUID(1, eax, ebx, ecx, edx);
835	apicid = (ebx >> 24) & 0xff;
836
837	if (ci->ci_vendor == CPUV_AMD) {
838		uint32_t nthreads = 1; /* per core */
839		uint32_t thread_id; /* within a package */
840
841		/* We need at least apicid at CPUID 0x80000008 */
842		if (ci->ci_pnfeatset < 0x80000008)
843			goto no_topology;
844
845		CPUID(0x80000008, eax, ebx, ecx, edx);
846		core_bits = (ecx >> 12) & 0xf;
847
848		if (ci->ci_pnfeatset >= 0x8000001e) {
849			CPUID(0x8000001e, eax, ebx, ecx, edx);
850			nthreads = ((ebx >> 8) & 0xf) + 1;
851		}
852
853		/* Shift the core_bits off to get at the pkg bits */
854		ci->ci_pkg_id = apicid >> core_bits;
855
856		/* Get rid of the package bits */
857		core_mask = (1U << core_bits) - 1;
858		thread_id = apicid & core_mask;
859
860		/* Cut logical thread_id into core id, and smt id in a core */
861		ci->ci_core_id = thread_id / nthreads;
862		ci->ci_smt_id = thread_id % nthreads;
863	} else if (ci->ci_vendor == CPUV_INTEL) {
864		/* We only support leaf 1/4 detection */
865		if (ci->ci_cpuid_level < 4)
866			goto no_topology;
867		/* Get max_apicid */
868		CPUID(1, eax, ebx, ecx, edx);
869		max_apicid = (ebx >> 16) & 0xff;
870		/* Get max_coreid */
871		CPUID_LEAF(4, 0, eax, ebx, ecx, edx);
872		max_coreid = ((eax >> 26) & 0x3f) + 1;
873		/* SMT */
874		smt_bits = mask_width(max_apicid / max_coreid);
875		smt_mask = (1U << smt_bits) - 1;
876		/* Core */
877		core_bits = log2(max_coreid);
878		core_mask = (1U << (core_bits + smt_bits)) - 1;
879		core_mask ^= smt_mask;
880		/* Pkg */
881		pkg_bits = core_bits + smt_bits;
882		pkg_mask = ~0U << core_bits;
883
884		ci->ci_smt_id = apicid & smt_mask;
885		ci->ci_core_id = (apicid & core_mask) >> smt_bits;
886		ci->ci_pkg_id = (apicid & pkg_mask) >> pkg_bits;
887	} else
888		goto no_topology;
889#ifdef DEBUG
890	printf("cpu%d: smt %u, core %u, pkg %u "
891		"(apicid 0x%x, max_apicid 0x%x, max_coreid 0x%x, smt_bits 0x%x, smt_mask 0x%x, "
892		"core_bits 0x%x, core_mask 0x%x, pkg_bits 0x%x, pkg_mask 0x%x)\n",
893		ci->ci_cpuid, ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id,
894		apicid, max_apicid, max_coreid, smt_bits, smt_mask, core_bits,
895		core_mask, pkg_bits, pkg_mask);
896#else
897	printf("cpu%d: smt %u, core %u, package %u\n", ci->ci_cpuid,
898		ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id);
899
900#endif
901	return;
902	/* We can't map, so consider ci_core_id as ci_cpuid */
903no_topology:
904#endif
905	ci->ci_smt_id  = 0;
906	ci->ci_core_id = ci->ci_cpuid;
907	ci->ci_pkg_id  = 0;
908}
909
910#if NVMM > 0
911/*
912 * cpu_check_vmm_cap
913 *
914 * Checks for VMM capabilities for 'ci'. Initializes certain per-cpu VMM
915 * state in 'ci' if virtualization extensions are found.
916 *
917 * Parameters:
918 *  ci: the cpu being checked
919 */
920void
921cpu_check_vmm_cap(struct cpu_info *ci)
922{
923	uint64_t msr;
924	uint32_t cap, dummy, edx;
925
926	/*
927	 * Check for workable VMX
928	 */
929	if (cpu_ecxfeature & CPUIDECX_VMX) {
930		msr = rdmsr(MSR_IA32_FEATURE_CONTROL);
931
932		if (!(msr & IA32_FEATURE_CONTROL_LOCK))
933			ci->ci_vmm_flags |= CI_VMM_VMX;
934		else {
935			if (msr & IA32_FEATURE_CONTROL_VMX_EN)
936				ci->ci_vmm_flags |= CI_VMM_VMX;
937			else
938				ci->ci_vmm_flags |= CI_VMM_DIS;
939		}
940	}
941
942	/*
943	 * Check for EPT (Intel Nested Paging) and other secondary
944	 * controls
945	 */
946	if (ci->ci_vmm_flags & CI_VMM_VMX) {
947		/* Secondary controls available? */
948		/* XXX should we check true procbased ctls here if avail? */
949		msr = rdmsr(IA32_VMX_PROCBASED_CTLS);
950		if (msr & (IA32_VMX_ACTIVATE_SECONDARY_CONTROLS) << 32) {
951			msr = rdmsr(IA32_VMX_PROCBASED2_CTLS);
952			/* EPT available? */
953			if (msr & (IA32_VMX_ENABLE_EPT) << 32)
954				ci->ci_vmm_flags |= CI_VMM_EPT;
955			/* VM Functions available? */
956			if (msr & (IA32_VMX_ENABLE_VM_FUNCTIONS) << 32) {
957				ci->ci_vmm_cap.vcc_vmx.vmx_vm_func =
958				    rdmsr(IA32_VMX_VMFUNC);
959			}
960		}
961	}
962
963	/*
964	 * Check startup config (VMX)
965	 */
966	if (ci->ci_vmm_flags & CI_VMM_VMX) {
967		/* CR0 fixed and flexible bits */
968		msr = rdmsr(IA32_VMX_CR0_FIXED0);
969		ci->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0 = msr;
970		msr = rdmsr(IA32_VMX_CR0_FIXED1);
971		ci->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1 = msr;
972
973		/* CR4 fixed and flexible bits */
974		msr = rdmsr(IA32_VMX_CR4_FIXED0);
975		ci->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0 = msr;
976		msr = rdmsr(IA32_VMX_CR4_FIXED1);
977		ci->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1 = msr;
978
979		/* VMXON region revision ID (bits 30:0 of IA32_VMX_BASIC) */
980		msr = rdmsr(IA32_VMX_BASIC);
981		ci->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision =
982			(uint32_t)(msr & 0x7FFFFFFF);
983
984		/* MSR save / load table size */
985		msr = rdmsr(IA32_VMX_MISC);
986		ci->ci_vmm_cap.vcc_vmx.vmx_msr_table_size =
987			(uint32_t)(msr & IA32_VMX_MSR_LIST_SIZE_MASK) >> 25;
988
989		/* CR3 target count size */
990		ci->ci_vmm_cap.vcc_vmx.vmx_cr3_tgt_count =
991			(uint32_t)(msr & IA32_VMX_CR3_TGT_SIZE_MASK) >> 16;
992	}
993
994	/*
995	 * Check for workable SVM
996	 */
997	if (ecpu_ecxfeature & CPUIDECX_SVM) {
998		msr = rdmsr(MSR_AMD_VM_CR);
999
1000		if (!(msr & AMD_SVMDIS))
1001			ci->ci_vmm_flags |= CI_VMM_SVM;
1002
1003		CPUID(CPUID_AMD_SVM_CAP, dummy,
1004		    ci->ci_vmm_cap.vcc_svm.svm_max_asid, dummy, edx);
1005
1006		if (ci->ci_vmm_cap.vcc_svm.svm_max_asid > 0xFFF)
1007			ci->ci_vmm_cap.vcc_svm.svm_max_asid = 0xFFF;
1008
1009		if (edx & AMD_SVM_FLUSH_BY_ASID_CAP)
1010			ci->ci_vmm_cap.vcc_svm.svm_flush_by_asid = 1;
1011
1012		if (edx & AMD_SVM_VMCB_CLEAN_CAP)
1013			ci->ci_vmm_cap.vcc_svm.svm_vmcb_clean = 1;
1014
1015		if (edx & AMD_SVM_DECODE_ASSIST_CAP)
1016			ci->ci_vmm_cap.vcc_svm.svm_decode_assist = 1;
1017	}
1018
1019	/*
1020	 * Check for SVM Nested Paging
1021	 */
1022	if ((ci->ci_vmm_flags & CI_VMM_SVM) &&
1023	    ci->ci_pnfeatset >= CPUID_AMD_SVM_CAP) {
1024		CPUID(CPUID_AMD_SVM_CAP, dummy, dummy, dummy, cap);
1025		if (cap & AMD_SVM_NESTED_PAGING_CAP)
1026			ci->ci_vmm_flags |= CI_VMM_RVI;
1027	}
1028
1029	/*
1030	 * Check "L1 flush on VM entry" (Intel L1TF vuln) semantics
1031	 * Full details can be found here:
1032	 * https://software.intel.com/security-software-guidance/insights/deep-dive-intel-analysis-l1-terminal-fault
1033	 */
1034	if (ci->ci_vendor == CPUV_INTEL) {
1035		if (ci->ci_feature_sefflags_edx & SEFF0EDX_L1DF)
1036			ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr = 1;
1037		else
1038			ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr = 0;
1039
1040		/*
1041		 * Certain CPUs may have the vulnerability remedied in
1042		 * hardware (RDCL_NO), or we may be nested in an VMM that
1043		 * is doing flushes (SKIP_L1DFL_VMENTRY) using the MSR.
1044		 * In either case no mitigation at all is necessary.
1045		 */
1046		if (ci->ci_feature_sefflags_edx & SEFF0EDX_ARCH_CAP) {
1047			msr = rdmsr(MSR_ARCH_CAPABILITIES);
1048			if ((msr & ARCH_CAP_RDCL_NO) ||
1049			    ((msr & ARCH_CAP_SKIP_L1DFL_VMENTRY) &&
1050			    ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr))
1051				ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr =
1052				    VMX_SKIP_L1D_FLUSH;
1053		}
1054	}
1055}
1056#endif /* NVMM > 0 */
1057