1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24/*
25 * Copyright (c) 2010, Intel Corporation.
26 * All rights reserved.
27 */
28/*
29 * Portions Copyright 2009 Advanced Micro Devices, Inc.
30 */
31
32/*
33 * Various routines to handle identification
34 * and classification of x86 processors.
35 */
36
37#include <sys/types.h>
38#include <sys/archsystm.h>
39#include <sys/x86_archext.h>
40#include <sys/kmem.h>
41#include <sys/systm.h>
42#include <sys/cmn_err.h>
43#include <sys/sunddi.h>
44#include <sys/sunndi.h>
45#include <sys/cpuvar.h>
46#include <sys/processor.h>
47#include <sys/sysmacros.h>
48#include <sys/pg.h>
49#include <sys/fp.h>
50#include <sys/controlregs.h>
51#include <sys/bitmap.h>
52#include <sys/auxv_386.h>
53#include <sys/memnode.h>
54#include <sys/pci_cfgspace.h>
55
56#ifdef __xpv
57#include <sys/hypervisor.h>
58#else
59#include <sys/ontrap.h>
60#endif
61
62/*
63 * Pass 0 of cpuid feature analysis happens in locore. It contains special code
64 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with
65 * them accordingly. For most modern processors, feature detection occurs here
66 * in pass 1.
67 *
68 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup()
69 * for the boot CPU and does the basic analysis that the early kernel needs.
70 * x86_featureset is set based on the return value of cpuid_pass1() of the boot
71 * CPU.
72 *
73 * Pass 1 includes:
74 *
75 *	o Determining vendor/model/family/stepping and setting x86_type and
76 *	  x86_vendor accordingly.
77 *	o Processing the feature flags returned by the cpuid instruction while
78 *	  applying any workarounds or tricks for the specific processor.
79 *	o Mapping the feature flags into Solaris feature bits (X86_*).
80 *	o Processing extended feature flags if supported by the processor,
81 *	  again while applying specific processor knowledge.
82 *	o Determining the CMT characteristics of the system.
83 *
84 * Pass 1 is done on non-boot CPUs during their initialization and the results
85 * are used only as a meager attempt at ensuring that all processors within the
86 * system support the same features.
87 *
88 * Pass 2 of cpuid feature analysis happens just at the beginning
89 * of startup().  It just copies in and corrects the remainder
90 * of the cpuid data we depend on: standard cpuid functions that we didn't
91 * need for pass1 feature analysis, and extended cpuid functions beyond the
92 * simple feature processing done in pass1.
93 *
94 * Pass 3 of cpuid analysis is invoked after basic kernel services; in
95 * particular kernel memory allocation has been made available. It creates a
96 * readable brand string based on the data collected in the first two passes.
97 *
98 * Pass 4 of cpuid analysis is invoked after post_startup() when all
99 * the support infrastructure for various hardware features has been
100 * initialized. It determines which processor features will be reported
101 * to userland via the aux vector.
102 *
103 * All passes are executed on all CPUs, but only the boot CPU determines what
104 * features the kernel will use.
105 *
106 * Much of the worst junk in this file is for the support of processors
107 * that didn't really implement the cpuid instruction properly.
108 *
109 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon,
110 * the pass numbers.  Accordingly, changes to the pass code may require changes
111 * to the accessor code.
112 */
113
114uint_t x86_vendor = X86_VENDOR_IntelClone;
115uint_t x86_type = X86_TYPE_OTHER;
116uint_t x86_clflush_size = 0;
117
118uint_t pentiumpro_bug4046376;
119uint_t pentiumpro_bug4064495;
120
121uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
122
123static char *x86_feature_names[NUM_X86_FEATURES] = {
124	"lgpg",
125	"tsc",
126	"msr",
127	"mtrr",
128	"pge",
129	"de",
130	"cmov",
131	"mmx",
132	"mca",
133	"pae",
134	"cv8",
135	"pat",
136	"sep",
137	"sse",
138	"sse2",
139	"htt",
140	"asysc",
141	"nx",
142	"sse3",
143	"cx16",
144	"cmp",
145	"tscp",
146	"mwait",
147	"sse4a",
148	"cpuid",
149	"ssse3",
150	"sse4_1",
151	"sse4_2",
152	"1gpg",
153	"clfsh",
154	"64",
155	"aes",
156	"pclmulqdq",
157	"xsave",
158	"avx" };
159
160boolean_t
161is_x86_feature(void *featureset, uint_t feature)
162{
163	ASSERT(feature < NUM_X86_FEATURES);
164	return (BT_TEST((ulong_t *)featureset, feature));
165}
166
167void
168add_x86_feature(void *featureset, uint_t feature)
169{
170	ASSERT(feature < NUM_X86_FEATURES);
171	BT_SET((ulong_t *)featureset, feature);
172}
173
174void
175remove_x86_feature(void *featureset, uint_t feature)
176{
177	ASSERT(feature < NUM_X86_FEATURES);
178	BT_CLEAR((ulong_t *)featureset, feature);
179}
180
181boolean_t
182compare_x86_featureset(void *setA, void *setB)
183{
184	/*
185	 * We assume that the unused bits of the bitmap are always zero.
186	 */
187	if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
188		return (B_TRUE);
189	} else {
190		return (B_FALSE);
191	}
192}
193
194void
195print_x86_featureset(void *featureset)
196{
197	uint_t i;
198
199	for (i = 0; i < NUM_X86_FEATURES; i++) {
200		if (is_x86_feature(featureset, i)) {
201			cmn_err(CE_CONT, "?x86_feature: %s\n",
202			    x86_feature_names[i]);
203		}
204	}
205}
206
207uint_t enable486;
208
209static size_t xsave_state_size = 0;
210uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
211boolean_t xsave_force_disable = B_FALSE;
212
213/*
214 * This is set to platform type Solaris is running on.
215 */
216static int platform_type = -1;
217
218#if !defined(__xpv)
219/*
220 * Variable to patch if hypervisor platform detection needs to be
221 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
222 */
223int enable_platform_detection = 1;
224#endif
225
226/*
227 * monitor/mwait info.
228 *
229 * size_actual and buf_actual are the real address and size allocated to get
230 * proper mwait_buf alignement.  buf_actual and size_actual should be passed
231 * to kmem_free().  Currently kmem_alloc() and mwait happen to both use
232 * processor cache-line alignment, but this is not guarantied in the furture.
233 */
234struct mwait_info {
235	size_t		mon_min;	/* min size to avoid missed wakeups */
236	size_t		mon_max;	/* size to avoid false wakeups */
237	size_t		size_actual;	/* size actually allocated */
238	void		*buf_actual;	/* memory actually allocated */
239	uint32_t	support;	/* processor support of monitor/mwait */
240};
241
242/*
243 * xsave/xrestor info.
244 *
245 * This structure contains HW feature bits and size of the xsave save area.
246 * Note: the kernel will use the maximum size required for all hardware
247 * features. It is not optimize for potential memory savings if features at
248 * the end of the save area are not enabled.
249 */
250struct xsave_info {
251	uint32_t	xsav_hw_features_low;   /* Supported HW features */
252	uint32_t	xsav_hw_features_high;  /* Supported HW features */
253	size_t		xsav_max_size;  /* max size save area for HW features */
254	size_t		ymm_size;	/* AVX: size of ymm save area */
255	size_t		ymm_offset;	/* AVX: offset for ymm save area */
256};
257
258
259/*
260 * These constants determine how many of the elements of the
261 * cpuid we cache in the cpuid_info data structure; the
262 * remaining elements are accessible via the cpuid instruction.
263 */
264
265#define	NMAX_CPI_STD	6		/* eax = 0 .. 5 */
266#define	NMAX_CPI_EXTD	0x1c		/* eax = 0x80000000 .. 0x8000001b */
267
268/*
269 * Some terminology needs to be explained:
270 *  - Socket: Something that can be plugged into a motherboard.
271 *  - Package: Same as socket
272 *  - Chip: Same as socket. Note that AMD's documentation uses term "chip"
273 *    differently: there, chip is the same as processor node (below)
274 *  - Processor node: Some AMD processors have more than one
275 *    "subprocessor" embedded in a package. These subprocessors (nodes)
276 *    are fully-functional processors themselves with cores, caches,
277 *    memory controllers, PCI configuration spaces. They are connected
278 *    inside the package with Hypertransport links. On single-node
279 *    processors, processor node is equivalent to chip/socket/package.
280 */
281
282struct cpuid_info {
283	uint_t cpi_pass;		/* last pass completed */
284	/*
285	 * standard function information
286	 */
287	uint_t cpi_maxeax;		/* fn 0: %eax */
288	char cpi_vendorstr[13];		/* fn 0: %ebx:%ecx:%edx */
289	uint_t cpi_vendor;		/* enum of cpi_vendorstr */
290
291	uint_t cpi_family;		/* fn 1: extended family */
292	uint_t cpi_model;		/* fn 1: extended model */
293	uint_t cpi_step;		/* fn 1: stepping */
294	chipid_t cpi_chipid;		/* fn 1: %ebx:  Intel: chip # */
295					/*		AMD: package/socket # */
296	uint_t cpi_brandid;		/* fn 1: %ebx: brand ID */
297	int cpi_clogid;			/* fn 1: %ebx: thread # */
298	uint_t cpi_ncpu_per_chip;	/* fn 1: %ebx: logical cpu count */
299	uint8_t cpi_cacheinfo[16];	/* fn 2: intel-style cache desc */
300	uint_t cpi_ncache;		/* fn 2: number of elements */
301	uint_t cpi_ncpu_shr_last_cache;	/* fn 4: %eax: ncpus sharing cache */
302	id_t cpi_last_lvl_cacheid;	/* fn 4: %eax: derived cache id */
303	uint_t cpi_std_4_size;		/* fn 4: number of fn 4 elements */
304	struct cpuid_regs **cpi_std_4;	/* fn 4: %ecx == 0 .. fn4_size */
305	struct cpuid_regs cpi_std[NMAX_CPI_STD];	/* 0 .. 5 */
306	/*
307	 * extended function information
308	 */
309	uint_t cpi_xmaxeax;		/* fn 0x80000000: %eax */
310	char cpi_brandstr[49];		/* fn 0x8000000[234] */
311	uint8_t cpi_pabits;		/* fn 0x80000006: %eax */
312	uint8_t	cpi_vabits;		/* fn 0x80000006: %eax */
313	struct	cpuid_regs cpi_extd[NMAX_CPI_EXTD];	/* 0x800000XX */
314
315	id_t cpi_coreid;		/* same coreid => strands share core */
316	int cpi_pkgcoreid;		/* core number within single package */
317	uint_t cpi_ncore_per_chip;	/* AMD: fn 0x80000008: %ecx[7-0] */
318					/* Intel: fn 4: %eax[31-26] */
319	/*
320	 * supported feature information
321	 */
322	uint32_t cpi_support[5];
323#define	STD_EDX_FEATURES	0
324#define	AMD_EDX_FEATURES	1
325#define	TM_EDX_FEATURES		2
326#define	STD_ECX_FEATURES	3
327#define	AMD_ECX_FEATURES	4
328	/*
329	 * Synthesized information, where known.
330	 */
331	uint32_t cpi_chiprev;		/* See X86_CHIPREV_* in x86_archext.h */
332	const char *cpi_chiprevstr;	/* May be NULL if chiprev unknown */
333	uint32_t cpi_socket;		/* Chip package/socket type */
334
335	struct mwait_info cpi_mwait;	/* fn 5: monitor/mwait info */
336	uint32_t cpi_apicid;
337	uint_t cpi_procnodeid;		/* AMD: nodeID on HT, Intel: chipid */
338	uint_t cpi_procnodes_per_pkg;	/* AMD: # of nodes in the package */
339					/* Intel: 1 */
340
341	struct xsave_info cpi_xsave;	/* fn D: xsave/xrestor info */
342};
343
344
345static struct cpuid_info cpuid_info0;
346
347/*
348 * These bit fields are defined by the Intel Application Note AP-485
349 * "Intel Processor Identification and the CPUID Instruction"
350 */
351#define	CPI_FAMILY_XTD(cpi)	BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
352#define	CPI_MODEL_XTD(cpi)	BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
353#define	CPI_TYPE(cpi)		BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
354#define	CPI_FAMILY(cpi)		BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
355#define	CPI_STEP(cpi)		BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
356#define	CPI_MODEL(cpi)		BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
357
358#define	CPI_FEATURES_EDX(cpi)		((cpi)->cpi_std[1].cp_edx)
359#define	CPI_FEATURES_ECX(cpi)		((cpi)->cpi_std[1].cp_ecx)
360#define	CPI_FEATURES_XTD_EDX(cpi)	((cpi)->cpi_extd[1].cp_edx)
361#define	CPI_FEATURES_XTD_ECX(cpi)	((cpi)->cpi_extd[1].cp_ecx)
362
363#define	CPI_BRANDID(cpi)	BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
364#define	CPI_CHUNKS(cpi)		BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
365#define	CPI_CPU_COUNT(cpi)	BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
366#define	CPI_APIC_ID(cpi)	BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
367
368#define	CPI_MAXEAX_MAX		0x100		/* sanity control */
369#define	CPI_XMAXEAX_MAX		0x80000100
370#define	CPI_FN4_ECX_MAX		0x20		/* sanity: max fn 4 levels */
371#define	CPI_FNB_ECX_MAX		0x20		/* sanity: max fn B levels */
372
373/*
374 * Function 4 (Deterministic Cache Parameters) macros
375 * Defined by Intel Application Note AP-485
376 */
377#define	CPI_NUM_CORES(regs)		BITX((regs)->cp_eax, 31, 26)
378#define	CPI_NTHR_SHR_CACHE(regs)	BITX((regs)->cp_eax, 25, 14)
379#define	CPI_FULL_ASSOC_CACHE(regs)	BITX((regs)->cp_eax, 9, 9)
380#define	CPI_SELF_INIT_CACHE(regs)	BITX((regs)->cp_eax, 8, 8)
381#define	CPI_CACHE_LVL(regs)		BITX((regs)->cp_eax, 7, 5)
382#define	CPI_CACHE_TYPE(regs)		BITX((regs)->cp_eax, 4, 0)
383#define	CPI_CPU_LEVEL_TYPE(regs)	BITX((regs)->cp_ecx, 15, 8)
384
385#define	CPI_CACHE_WAYS(regs)		BITX((regs)->cp_ebx, 31, 22)
386#define	CPI_CACHE_PARTS(regs)		BITX((regs)->cp_ebx, 21, 12)
387#define	CPI_CACHE_COH_LN_SZ(regs)	BITX((regs)->cp_ebx, 11, 0)
388
389#define	CPI_CACHE_SETS(regs)		BITX((regs)->cp_ecx, 31, 0)
390
391#define	CPI_PREFCH_STRIDE(regs)		BITX((regs)->cp_edx, 9, 0)
392
393
394/*
395 * A couple of shorthand macros to identify "later" P6-family chips
396 * like the Pentium M and Core.  First, the "older" P6-based stuff
397 * (loosely defined as "pre-Pentium-4"):
398 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
399 */
400
401#define	IS_LEGACY_P6(cpi) (			\
402	cpi->cpi_family == 6 && 		\
403		(cpi->cpi_model == 1 ||		\
404		cpi->cpi_model == 3 ||		\
405		cpi->cpi_model == 5 ||		\
406		cpi->cpi_model == 6 ||		\
407		cpi->cpi_model == 7 ||		\
408		cpi->cpi_model == 8 ||		\
409		cpi->cpi_model == 0xA ||	\
410		cpi->cpi_model == 0xB)		\
411)
412
413/* A "new F6" is everything with family 6 that's not the above */
414#define	IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
415
416/* Extended family/model support */
417#define	IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
418	cpi->cpi_family >= 0xf)
419
420/*
421 * Info for monitor/mwait idle loop.
422 *
423 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
424 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
425 * 2006.
426 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
427 * Documentation Updates" #33633, Rev 2.05, December 2006.
428 */
429#define	MWAIT_SUPPORT		(0x00000001)	/* mwait supported */
430#define	MWAIT_EXTENSIONS	(0x00000002)	/* extenstion supported */
431#define	MWAIT_ECX_INT_ENABLE	(0x00000004)	/* ecx 1 extension supported */
432#define	MWAIT_SUPPORTED(cpi)	((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
433#define	MWAIT_INT_ENABLE(cpi)	((cpi)->cpi_std[5].cp_ecx & 0x2)
434#define	MWAIT_EXTENSION(cpi)	((cpi)->cpi_std[5].cp_ecx & 0x1)
435#define	MWAIT_SIZE_MIN(cpi)	BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
436#define	MWAIT_SIZE_MAX(cpi)	BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
437/*
438 * Number of sub-cstates for a given c-state.
439 */
440#define	MWAIT_NUM_SUBC_STATES(cpi, c_state)			\
441	BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
442
443/*
444 * XSAVE leaf 0xD enumeration
445 */
446#define	CPUID_LEAFD_2_YMM_OFFSET	576
447#define	CPUID_LEAFD_2_YMM_SIZE		256
448
449/*
450 * Functions we consune from cpuid_subr.c;  don't publish these in a header
451 * file to try and keep people using the expected cpuid_* interfaces.
452 */
453extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
454extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
455extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
456extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
457extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
458
459/*
460 * Apply up various platform-dependent restrictions where the
461 * underlying platform restrictions mean the CPU can be marked
462 * as less capable than its cpuid instruction would imply.
463 */
464#if defined(__xpv)
465static void
466platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
467{
468	switch (eax) {
469	case 1: {
470		uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
471		    0 : CPUID_INTC_EDX_MCA;
472		cp->cp_edx &=
473		    ~(mcamask |
474		    CPUID_INTC_EDX_PSE |
475		    CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
476		    CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
477		    CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
478		    CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
479		    CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
480		break;
481	}
482
483	case 0x80000001:
484		cp->cp_edx &=
485		    ~(CPUID_AMD_EDX_PSE |
486		    CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
487		    CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
488		    CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
489		    CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
490		    CPUID_AMD_EDX_TSCP);
491		cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
492		break;
493	default:
494		break;
495	}
496
497	switch (vendor) {
498	case X86_VENDOR_Intel:
499		switch (eax) {
500		case 4:
501			/*
502			 * Zero out the (ncores-per-chip - 1) field
503			 */
504			cp->cp_eax &= 0x03fffffff;
505			break;
506		default:
507			break;
508		}
509		break;
510	case X86_VENDOR_AMD:
511		switch (eax) {
512
513		case 0x80000001:
514			cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
515			break;
516
517		case 0x80000008:
518			/*
519			 * Zero out the (ncores-per-chip - 1) field
520			 */
521			cp->cp_ecx &= 0xffffff00;
522			break;
523		default:
524			break;
525		}
526		break;
527	default:
528		break;
529	}
530}
531#else
532#define	platform_cpuid_mangle(vendor, eax, cp)	/* nothing */
533#endif
534
535/*
536 *  Some undocumented ways of patching the results of the cpuid
537 *  instruction to permit running Solaris 10 on future cpus that
538 *  we don't currently support.  Could be set to non-zero values
539 *  via settings in eeprom.
540 */
541
542uint32_t cpuid_feature_ecx_include;
543uint32_t cpuid_feature_ecx_exclude;
544uint32_t cpuid_feature_edx_include;
545uint32_t cpuid_feature_edx_exclude;
546
547/*
548 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
549 */
550void
551cpuid_alloc_space(cpu_t *cpu)
552{
553	/*
554	 * By convention, cpu0 is the boot cpu, which is set up
555	 * before memory allocation is available.  All other cpus get
556	 * their cpuid_info struct allocated here.
557	 */
558	ASSERT(cpu->cpu_id != 0);
559	ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
560	cpu->cpu_m.mcpu_cpi =
561	    kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
562}
563
564void
565cpuid_free_space(cpu_t *cpu)
566{
567	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
568	int i;
569
570	ASSERT(cpi != NULL);
571	ASSERT(cpi != &cpuid_info0);
572
573	/*
574	 * Free up any function 4 related dynamic storage
575	 */
576	for (i = 1; i < cpi->cpi_std_4_size; i++)
577		kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs));
578	if (cpi->cpi_std_4_size > 0)
579		kmem_free(cpi->cpi_std_4,
580		    cpi->cpi_std_4_size * sizeof (struct cpuid_regs *));
581
582	kmem_free(cpi, sizeof (*cpi));
583	cpu->cpu_m.mcpu_cpi = NULL;
584}
585
586#if !defined(__xpv)
587
588static void
589determine_platform()
590{
591	struct cpuid_regs cp;
592	char *xen_str;
593	uint32_t xen_signature[4], base;
594
595	platform_type = HW_NATIVE;
596
597	if (!enable_platform_detection)
598		return;
599
600	/*
601	 * In a fully virtualized domain, Xen's pseudo-cpuid function
602	 * returns a string representing the Xen signature in %ebx, %ecx,
603	 * and %edx. %eax contains the maximum supported cpuid function.
604	 * We need at least a (base + 2) leaf value to do what we want
605	 * to do. Try different base values, since the hypervisor might
606	 * use a different one depending on whether hyper-v emulation
607	 * is switched on by default or not.
608	 */
609	for (base = 0x40000000; base < 0x40010000; base += 0x100) {
610		cp.cp_eax = base;
611		(void) __cpuid_insn(&cp);
612		xen_signature[0] = cp.cp_ebx;
613		xen_signature[1] = cp.cp_ecx;
614		xen_signature[2] = cp.cp_edx;
615		xen_signature[3] = 0;
616		xen_str = (char *)xen_signature;
617		if (strcmp("XenVMMXenVMM", xen_str) == 0 &&
618		    cp.cp_eax >= (base + 2)) {
619			platform_type = HW_XEN_HVM;
620			return;
621		}
622	}
623
624	if (vmware_platform()) /* running under vmware hypervisor? */
625		platform_type = HW_VMWARE;
626}
627
628int
629get_hwenv(void)
630{
631	if (platform_type == -1)
632		determine_platform();
633
634	return (platform_type);
635}
636
637int
638is_controldom(void)
639{
640	return (0);
641}
642
643#else
644
645int
646get_hwenv(void)
647{
648	return (HW_XEN_PV);
649}
650
651int
652is_controldom(void)
653{
654	return (DOMAIN_IS_INITDOMAIN(xen_info));
655}
656
657#endif	/* __xpv */
658
659static void
660cpuid_intel_getids(cpu_t *cpu, void *feature)
661{
662	uint_t i;
663	uint_t chipid_shift = 0;
664	uint_t coreid_shift = 0;
665	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
666
667	for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
668		chipid_shift++;
669
670	cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
671	cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
672
673	if (is_x86_feature(feature, X86FSET_CMP)) {
674		/*
675		 * Multi-core (and possibly multi-threaded)
676		 * processors.
677		 */
678		uint_t ncpu_per_core;
679		if (cpi->cpi_ncore_per_chip == 1)
680			ncpu_per_core = cpi->cpi_ncpu_per_chip;
681		else if (cpi->cpi_ncore_per_chip > 1)
682			ncpu_per_core = cpi->cpi_ncpu_per_chip /
683			    cpi->cpi_ncore_per_chip;
684		/*
685		 * 8bit APIC IDs on dual core Pentiums
686		 * look like this:
687		 *
688		 * +-----------------------+------+------+
689		 * | Physical Package ID   |  MC  |  HT  |
690		 * +-----------------------+------+------+
691		 * <------- chipid -------->
692		 * <------- coreid --------------->
693		 *			   <--- clogid -->
694		 *			   <------>
695		 *			   pkgcoreid
696		 *
697		 * Where the number of bits necessary to
698		 * represent MC and HT fields together equals
699		 * to the minimum number of bits necessary to
700		 * store the value of cpi->cpi_ncpu_per_chip.
701		 * Of those bits, the MC part uses the number
702		 * of bits necessary to store the value of
703		 * cpi->cpi_ncore_per_chip.
704		 */
705		for (i = 1; i < ncpu_per_core; i <<= 1)
706			coreid_shift++;
707		cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
708		cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
709	} else if (is_x86_feature(feature, X86FSET_HTT)) {
710		/*
711		 * Single-core multi-threaded processors.
712		 */
713		cpi->cpi_coreid = cpi->cpi_chipid;
714		cpi->cpi_pkgcoreid = 0;
715	}
716	cpi->cpi_procnodeid = cpi->cpi_chipid;
717}
718
719static void
720cpuid_amd_getids(cpu_t *cpu)
721{
722	int i, first_half, coreidsz;
723	uint32_t nb_caps_reg;
724	uint_t node2_1;
725	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
726
727	/*
728	 * AMD CMP chips currently have a single thread per core.
729	 *
730	 * Since no two cpus share a core we must assign a distinct coreid
731	 * per cpu, and we do this by using the cpu_id.  This scheme does not,
732	 * however, guarantee that sibling cores of a chip will have sequential
733	 * coreids starting at a multiple of the number of cores per chip -
734	 * that is usually the case, but if the ACPI MADT table is presented
735	 * in a different order then we need to perform a few more gymnastics
736	 * for the pkgcoreid.
737	 *
738	 * All processors in the system have the same number of enabled
739	 * cores. Cores within a processor are always numbered sequentially
740	 * from 0 regardless of how many or which are disabled, and there
741	 * is no way for operating system to discover the real core id when some
742	 * are disabled.
743	 */
744
745	cpi->cpi_coreid = cpu->cpu_id;
746
747	if (cpi->cpi_xmaxeax >= 0x80000008) {
748
749		coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
750
751		/*
752		 * In AMD parlance chip is really a node while Solaris
753		 * sees chip as equivalent to socket/package.
754		 */
755		cpi->cpi_ncore_per_chip =
756		    BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
757		if (coreidsz == 0) {
758			/* Use legacy method */
759			for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
760				coreidsz++;
761			if (coreidsz == 0)
762				coreidsz = 1;
763		}
764	} else {
765		/* Assume single-core part */
766		cpi->cpi_ncore_per_chip = 1;
767		coreidsz = 1;
768	}
769
770	cpi->cpi_clogid = cpi->cpi_pkgcoreid =
771	    cpi->cpi_apicid & ((1<<coreidsz) - 1);
772	cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip;
773
774	/* Get nodeID */
775	if (cpi->cpi_family == 0xf) {
776		cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
777		cpi->cpi_chipid = cpi->cpi_procnodeid;
778	} else if (cpi->cpi_family == 0x10) {
779		/*
780		 * See if we are a multi-node processor.
781		 * All processors in the system have the same number of nodes
782		 */
783		nb_caps_reg =  pci_getl_func(0, 24, 3, 0xe8);
784		if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
785			/* Single-node */
786			cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
787			    coreidsz);
788			cpi->cpi_chipid = cpi->cpi_procnodeid;
789		} else {
790
791			/*
792			 * Multi-node revision D (2 nodes per package
793			 * are supported)
794			 */
795			cpi->cpi_procnodes_per_pkg = 2;
796
797			first_half = (cpi->cpi_pkgcoreid <=
798			    (cpi->cpi_ncore_per_chip/2 - 1));
799
800			if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
801				/* We are BSP */
802				cpi->cpi_procnodeid = (first_half ? 0 : 1);
803				cpi->cpi_chipid = cpi->cpi_procnodeid >> 1;
804			} else {
805
806				/* We are AP */
807				/* NodeId[2:1] bits to use for reading F3xe8 */
808				node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
809
810				nb_caps_reg =
811				    pci_getl_func(0, 24 + node2_1, 3, 0xe8);
812
813				/*
814				 * Check IntNodeNum bit (31:30, but bit 31 is
815				 * always 0 on dual-node processors)
816				 */
817				if (BITX(nb_caps_reg, 30, 30) == 0)
818					cpi->cpi_procnodeid = node2_1 +
819					    !first_half;
820				else
821					cpi->cpi_procnodeid = node2_1 +
822					    first_half;
823
824				cpi->cpi_chipid = cpi->cpi_procnodeid >> 1;
825			}
826		}
827	} else if (cpi->cpi_family >= 0x11) {
828		cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
829		cpi->cpi_chipid = cpi->cpi_procnodeid;
830	} else {
831		cpi->cpi_procnodeid = 0;
832		cpi->cpi_chipid = cpi->cpi_procnodeid;
833	}
834}
835
836/*
837 * Setup XFeature_Enabled_Mask register. Required by xsave feature.
838 */
839void
840setup_xfem(void)
841{
842	uint64_t flags = XFEATURE_LEGACY_FP;
843
844	ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
845
846	if (is_x86_feature(x86_featureset, X86FSET_SSE))
847		flags |= XFEATURE_SSE;
848
849	if (is_x86_feature(x86_featureset, X86FSET_AVX))
850		flags |= XFEATURE_AVX;
851
852	set_xcr(XFEATURE_ENABLED_MASK, flags);
853
854	xsave_bv_all = flags;
855}
856
857void
858cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
859{
860	uint32_t mask_ecx, mask_edx;
861	struct cpuid_info *cpi;
862	struct cpuid_regs *cp;
863	int xcpuid;
864#if !defined(__xpv)
865	extern int idle_cpu_prefer_mwait;
866#endif
867
868#if !defined(__xpv)
869	determine_platform();
870#endif
871	/*
872	 * Space statically allocated for BSP, ensure pointer is set
873	 */
874	if (cpu->cpu_id == 0) {
875		if (cpu->cpu_m.mcpu_cpi == NULL)
876			cpu->cpu_m.mcpu_cpi = &cpuid_info0;
877	}
878
879	add_x86_feature(featureset, X86FSET_CPUID);
880
881	cpi = cpu->cpu_m.mcpu_cpi;
882	ASSERT(cpi != NULL);
883	cp = &cpi->cpi_std[0];
884	cp->cp_eax = 0;
885	cpi->cpi_maxeax = __cpuid_insn(cp);
886	{
887		uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
888		*iptr++ = cp->cp_ebx;
889		*iptr++ = cp->cp_edx;
890		*iptr++ = cp->cp_ecx;
891		*(char *)&cpi->cpi_vendorstr[12] = '\0';
892	}
893
894	cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
895	x86_vendor = cpi->cpi_vendor; /* for compatibility */
896
897	/*
898	 * Limit the range in case of weird hardware
899	 */
900	if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
901		cpi->cpi_maxeax = CPI_MAXEAX_MAX;
902	if (cpi->cpi_maxeax < 1)
903		goto pass1_done;
904
905	cp = &cpi->cpi_std[1];
906	cp->cp_eax = 1;
907	(void) __cpuid_insn(cp);
908
909	/*
910	 * Extract identifying constants for easy access.
911	 */
912	cpi->cpi_model = CPI_MODEL(cpi);
913	cpi->cpi_family = CPI_FAMILY(cpi);
914
915	if (cpi->cpi_family == 0xf)
916		cpi->cpi_family += CPI_FAMILY_XTD(cpi);
917
918	/*
919	 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
920	 * Intel, and presumably everyone else, uses model == 0xf, as
921	 * one would expect (max value means possible overflow).  Sigh.
922	 */
923
924	switch (cpi->cpi_vendor) {
925	case X86_VENDOR_Intel:
926		if (IS_EXTENDED_MODEL_INTEL(cpi))
927			cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
928		break;
929	case X86_VENDOR_AMD:
930		if (CPI_FAMILY(cpi) == 0xf)
931			cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
932		break;
933	default:
934		if (cpi->cpi_model == 0xf)
935			cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
936		break;
937	}
938
939	cpi->cpi_step = CPI_STEP(cpi);
940	cpi->cpi_brandid = CPI_BRANDID(cpi);
941
942	/*
943	 * *default* assumptions:
944	 * - believe %edx feature word
945	 * - ignore %ecx feature word
946	 * - 32-bit virtual and physical addressing
947	 */
948	mask_edx = 0xffffffff;
949	mask_ecx = 0;
950
951	cpi->cpi_pabits = cpi->cpi_vabits = 32;
952
953	switch (cpi->cpi_vendor) {
954	case X86_VENDOR_Intel:
955		if (cpi->cpi_family == 5)
956			x86_type = X86_TYPE_P5;
957		else if (IS_LEGACY_P6(cpi)) {
958			x86_type = X86_TYPE_P6;
959			pentiumpro_bug4046376 = 1;
960			pentiumpro_bug4064495 = 1;
961			/*
962			 * Clear the SEP bit when it was set erroneously
963			 */
964			if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
965				cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
966		} else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
967			x86_type = X86_TYPE_P4;
968			/*
969			 * We don't currently depend on any of the %ecx
970			 * features until Prescott, so we'll only check
971			 * this from P4 onwards.  We might want to revisit
972			 * that idea later.
973			 */
974			mask_ecx = 0xffffffff;
975		} else if (cpi->cpi_family > 0xf)
976			mask_ecx = 0xffffffff;
977		/*
978		 * We don't support MONITOR/MWAIT if leaf 5 is not available
979		 * to obtain the monitor linesize.
980		 */
981		if (cpi->cpi_maxeax < 5)
982			mask_ecx &= ~CPUID_INTC_ECX_MON;
983		break;
984	case X86_VENDOR_IntelClone:
985	default:
986		break;
987	case X86_VENDOR_AMD:
988#if defined(OPTERON_ERRATUM_108)
989		if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
990			cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
991			cpi->cpi_model = 0xc;
992		} else
993#endif
994		if (cpi->cpi_family == 5) {
995			/*
996			 * AMD K5 and K6
997			 *
998			 * These CPUs have an incomplete implementation
999			 * of MCA/MCE which we mask away.
1000			 */
1001			mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
1002
1003			/*
1004			 * Model 0 uses the wrong (APIC) bit
1005			 * to indicate PGE.  Fix it here.
1006			 */
1007			if (cpi->cpi_model == 0) {
1008				if (cp->cp_edx & 0x200) {
1009					cp->cp_edx &= ~0x200;
1010					cp->cp_edx |= CPUID_INTC_EDX_PGE;
1011				}
1012			}
1013
1014			/*
1015			 * Early models had problems w/ MMX; disable.
1016			 */
1017			if (cpi->cpi_model < 6)
1018				mask_edx &= ~CPUID_INTC_EDX_MMX;
1019		}
1020
1021		/*
1022		 * For newer families, SSE3 and CX16, at least, are valid;
1023		 * enable all
1024		 */
1025		if (cpi->cpi_family >= 0xf)
1026			mask_ecx = 0xffffffff;
1027		/*
1028		 * We don't support MONITOR/MWAIT if leaf 5 is not available
1029		 * to obtain the monitor linesize.
1030		 */
1031		if (cpi->cpi_maxeax < 5)
1032			mask_ecx &= ~CPUID_INTC_ECX_MON;
1033
1034#if !defined(__xpv)
1035		/*
1036		 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD
1037		 * processors.  AMD does not intend MWAIT to be used in the cpu
1038		 * idle loop on current and future processors.  10h and future
1039		 * AMD processors use more power in MWAIT than HLT.
1040		 * Pre-family-10h Opterons do not have the MWAIT instruction.
1041		 */
1042		idle_cpu_prefer_mwait = 0;
1043#endif
1044
1045		break;
1046	case X86_VENDOR_TM:
1047		/*
1048		 * workaround the NT workaround in CMS 4.1
1049		 */
1050		if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
1051		    (cpi->cpi_step == 2 || cpi->cpi_step == 3))
1052			cp->cp_edx |= CPUID_INTC_EDX_CX8;
1053		break;
1054	case X86_VENDOR_Centaur:
1055		/*
1056		 * workaround the NT workarounds again
1057		 */
1058		if (cpi->cpi_family == 6)
1059			cp->cp_edx |= CPUID_INTC_EDX_CX8;
1060		break;
1061	case X86_VENDOR_Cyrix:
1062		/*
1063		 * We rely heavily on the probing in locore
1064		 * to actually figure out what parts, if any,
1065		 * of the Cyrix cpuid instruction to believe.
1066		 */
1067		switch (x86_type) {
1068		case X86_TYPE_CYRIX_486:
1069			mask_edx = 0;
1070			break;
1071		case X86_TYPE_CYRIX_6x86:
1072			mask_edx = 0;
1073			break;
1074		case X86_TYPE_CYRIX_6x86L:
1075			mask_edx =
1076			    CPUID_INTC_EDX_DE |
1077			    CPUID_INTC_EDX_CX8;
1078			break;
1079		case X86_TYPE_CYRIX_6x86MX:
1080			mask_edx =
1081			    CPUID_INTC_EDX_DE |
1082			    CPUID_INTC_EDX_MSR |
1083			    CPUID_INTC_EDX_CX8 |
1084			    CPUID_INTC_EDX_PGE |
1085			    CPUID_INTC_EDX_CMOV |
1086			    CPUID_INTC_EDX_MMX;
1087			break;
1088		case X86_TYPE_CYRIX_GXm:
1089			mask_edx =
1090			    CPUID_INTC_EDX_MSR |
1091			    CPUID_INTC_EDX_CX8 |
1092			    CPUID_INTC_EDX_CMOV |
1093			    CPUID_INTC_EDX_MMX;
1094			break;
1095		case X86_TYPE_CYRIX_MediaGX:
1096			break;
1097		case X86_TYPE_CYRIX_MII:
1098		case X86_TYPE_VIA_CYRIX_III:
1099			mask_edx =
1100			    CPUID_INTC_EDX_DE |
1101			    CPUID_INTC_EDX_TSC |
1102			    CPUID_INTC_EDX_MSR |
1103			    CPUID_INTC_EDX_CX8 |
1104			    CPUID_INTC_EDX_PGE |
1105			    CPUID_INTC_EDX_CMOV |
1106			    CPUID_INTC_EDX_MMX;
1107			break;
1108		default:
1109			break;
1110		}
1111		break;
1112	}
1113
1114#if defined(__xpv)
1115	/*
1116	 * Do not support MONITOR/MWAIT under a hypervisor
1117	 */
1118	mask_ecx &= ~CPUID_INTC_ECX_MON;
1119	/*
1120	 * Do not support XSAVE under a hypervisor for now
1121	 */
1122	xsave_force_disable = B_TRUE;
1123
1124#endif	/* __xpv */
1125
1126	if (xsave_force_disable) {
1127		mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
1128		mask_ecx &= ~CPUID_INTC_ECX_AVX;
1129	}
1130
1131	/*
1132	 * Now we've figured out the masks that determine
1133	 * which bits we choose to believe, apply the masks
1134	 * to the feature words, then map the kernel's view
1135	 * of these feature words into its feature word.
1136	 */
1137	cp->cp_edx &= mask_edx;
1138	cp->cp_ecx &= mask_ecx;
1139
1140	/*
1141	 * apply any platform restrictions (we don't call this
1142	 * immediately after __cpuid_insn here, because we need the
1143	 * workarounds applied above first)
1144	 */
1145	platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
1146
1147	/*
1148	 * fold in overrides from the "eeprom" mechanism
1149	 */
1150	cp->cp_edx |= cpuid_feature_edx_include;
1151	cp->cp_edx &= ~cpuid_feature_edx_exclude;
1152
1153	cp->cp_ecx |= cpuid_feature_ecx_include;
1154	cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
1155
1156	if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
1157		add_x86_feature(featureset, X86FSET_LARGEPAGE);
1158	}
1159	if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
1160		add_x86_feature(featureset, X86FSET_TSC);
1161	}
1162	if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
1163		add_x86_feature(featureset, X86FSET_MSR);
1164	}
1165	if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
1166		add_x86_feature(featureset, X86FSET_MTRR);
1167	}
1168	if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
1169		add_x86_feature(featureset, X86FSET_PGE);
1170	}
1171	if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
1172		add_x86_feature(featureset, X86FSET_CMOV);
1173	}
1174	if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
1175		add_x86_feature(featureset, X86FSET_MMX);
1176	}
1177	if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
1178	    (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
1179		add_x86_feature(featureset, X86FSET_MCA);
1180	}
1181	if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
1182		add_x86_feature(featureset, X86FSET_PAE);
1183	}
1184	if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
1185		add_x86_feature(featureset, X86FSET_CX8);
1186	}
1187	if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
1188		add_x86_feature(featureset, X86FSET_CX16);
1189	}
1190	if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
1191		add_x86_feature(featureset, X86FSET_PAT);
1192	}
1193	if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
1194		add_x86_feature(featureset, X86FSET_SEP);
1195	}
1196	if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
1197		/*
1198		 * In our implementation, fxsave/fxrstor
1199		 * are prerequisites before we'll even
1200		 * try and do SSE things.
1201		 */
1202		if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
1203			add_x86_feature(featureset, X86FSET_SSE);
1204		}
1205		if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
1206			add_x86_feature(featureset, X86FSET_SSE2);
1207		}
1208		if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
1209			add_x86_feature(featureset, X86FSET_SSE3);
1210		}
1211		if (cpi->cpi_vendor == X86_VENDOR_Intel) {
1212			if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
1213				add_x86_feature(featureset, X86FSET_SSSE3);
1214			}
1215			if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
1216				add_x86_feature(featureset, X86FSET_SSE4_1);
1217			}
1218			if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
1219				add_x86_feature(featureset, X86FSET_SSE4_2);
1220			}
1221			if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
1222				add_x86_feature(featureset, X86FSET_AES);
1223			}
1224			if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
1225				add_x86_feature(featureset, X86FSET_PCLMULQDQ);
1226			}
1227
1228			if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
1229				add_x86_feature(featureset, X86FSET_XSAVE);
1230				/* We only test AVX when there is XSAVE */
1231				if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
1232					add_x86_feature(featureset,
1233					    X86FSET_AVX);
1234				}
1235			}
1236		}
1237	}
1238	if (cp->cp_edx & CPUID_INTC_EDX_DE) {
1239		add_x86_feature(featureset, X86FSET_DE);
1240	}
1241#if !defined(__xpv)
1242	if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
1243
1244		/*
1245		 * We require the CLFLUSH instruction for erratum workaround
1246		 * to use MONITOR/MWAIT.
1247		 */
1248		if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1249			cpi->cpi_mwait.support |= MWAIT_SUPPORT;
1250			add_x86_feature(featureset, X86FSET_MWAIT);
1251		} else {
1252			extern int idle_cpu_assert_cflush_monitor;
1253
1254			/*
1255			 * All processors we are aware of which have
1256			 * MONITOR/MWAIT also have CLFLUSH.
1257			 */
1258			if (idle_cpu_assert_cflush_monitor) {
1259				ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
1260				    (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
1261			}
1262		}
1263	}
1264#endif	/* __xpv */
1265
1266	/*
1267	 * Only need it first time, rest of the cpus would follow suite.
1268	 * we only capture this for the bootcpu.
1269	 */
1270	if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1271		add_x86_feature(featureset, X86FSET_CLFSH);
1272		x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
1273	}
1274	if (is_x86_feature(featureset, X86FSET_PAE))
1275		cpi->cpi_pabits = 36;
1276
1277	/*
1278	 * Hyperthreading configuration is slightly tricky on Intel
1279	 * and pure clones, and even trickier on AMD.
1280	 *
1281	 * (AMD chose to set the HTT bit on their CMP processors,
1282	 * even though they're not actually hyperthreaded.  Thus it
1283	 * takes a bit more work to figure out what's really going
1284	 * on ... see the handling of the CMP_LGCY bit below)
1285	 */
1286	if (cp->cp_edx & CPUID_INTC_EDX_HTT) {
1287		cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
1288		if (cpi->cpi_ncpu_per_chip > 1)
1289			add_x86_feature(featureset, X86FSET_HTT);
1290	} else {
1291		cpi->cpi_ncpu_per_chip = 1;
1292	}
1293
1294	/*
1295	 * Work on the "extended" feature information, doing
1296	 * some basic initialization for cpuid_pass2()
1297	 */
1298	xcpuid = 0;
1299	switch (cpi->cpi_vendor) {
1300	case X86_VENDOR_Intel:
1301		if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf)
1302			xcpuid++;
1303		break;
1304	case X86_VENDOR_AMD:
1305		if (cpi->cpi_family > 5 ||
1306		    (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
1307			xcpuid++;
1308		break;
1309	case X86_VENDOR_Cyrix:
1310		/*
1311		 * Only these Cyrix CPUs are -known- to support
1312		 * extended cpuid operations.
1313		 */
1314		if (x86_type == X86_TYPE_VIA_CYRIX_III ||
1315		    x86_type == X86_TYPE_CYRIX_GXm)
1316			xcpuid++;
1317		break;
1318	case X86_VENDOR_Centaur:
1319	case X86_VENDOR_TM:
1320	default:
1321		xcpuid++;
1322		break;
1323	}
1324
1325	if (xcpuid) {
1326		cp = &cpi->cpi_extd[0];
1327		cp->cp_eax = 0x80000000;
1328		cpi->cpi_xmaxeax = __cpuid_insn(cp);
1329	}
1330
1331	if (cpi->cpi_xmaxeax & 0x80000000) {
1332
1333		if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
1334			cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
1335
1336		switch (cpi->cpi_vendor) {
1337		case X86_VENDOR_Intel:
1338		case X86_VENDOR_AMD:
1339			if (cpi->cpi_xmaxeax < 0x80000001)
1340				break;
1341			cp = &cpi->cpi_extd[1];
1342			cp->cp_eax = 0x80000001;
1343			(void) __cpuid_insn(cp);
1344
1345			if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1346			    cpi->cpi_family == 5 &&
1347			    cpi->cpi_model == 6 &&
1348			    cpi->cpi_step == 6) {
1349				/*
1350				 * K6 model 6 uses bit 10 to indicate SYSC
1351				 * Later models use bit 11. Fix it here.
1352				 */
1353				if (cp->cp_edx & 0x400) {
1354					cp->cp_edx &= ~0x400;
1355					cp->cp_edx |= CPUID_AMD_EDX_SYSC;
1356				}
1357			}
1358
1359			platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
1360
1361			/*
1362			 * Compute the additions to the kernel's feature word.
1363			 */
1364			if (cp->cp_edx & CPUID_AMD_EDX_NX) {
1365				add_x86_feature(featureset, X86FSET_NX);
1366			}
1367
1368			/*
1369			 * Regardless whether or not we boot 64-bit,
1370			 * we should have a way to identify whether
1371			 * the CPU is capable of running 64-bit.
1372			 */
1373			if (cp->cp_edx & CPUID_AMD_EDX_LM) {
1374				add_x86_feature(featureset, X86FSET_64);
1375			}
1376
1377#if defined(__amd64)
1378			/* 1 GB large page - enable only for 64 bit kernel */
1379			if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
1380				add_x86_feature(featureset, X86FSET_1GPG);
1381			}
1382#endif
1383
1384			if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
1385			    (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
1386			    (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
1387				add_x86_feature(featureset, X86FSET_SSE4A);
1388			}
1389
1390			/*
1391			 * If both the HTT and CMP_LGCY bits are set,
1392			 * then we're not actually HyperThreaded.  Read
1393			 * "AMD CPUID Specification" for more details.
1394			 */
1395			if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1396			    is_x86_feature(featureset, X86FSET_HTT) &&
1397			    (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) {
1398				remove_x86_feature(featureset, X86FSET_HTT);
1399				add_x86_feature(featureset, X86FSET_CMP);
1400			}
1401#if defined(__amd64)
1402			/*
1403			 * It's really tricky to support syscall/sysret in
1404			 * the i386 kernel; we rely on sysenter/sysexit
1405			 * instead.  In the amd64 kernel, things are -way-
1406			 * better.
1407			 */
1408			if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
1409				add_x86_feature(featureset, X86FSET_ASYSC);
1410			}
1411
1412			/*
1413			 * While we're thinking about system calls, note
1414			 * that AMD processors don't support sysenter
1415			 * in long mode at all, so don't try to program them.
1416			 */
1417			if (x86_vendor == X86_VENDOR_AMD) {
1418				remove_x86_feature(featureset, X86FSET_SEP);
1419			}
1420#endif
1421			if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
1422				add_x86_feature(featureset, X86FSET_TSCP);
1423			}
1424			break;
1425		default:
1426			break;
1427		}
1428
1429		/*
1430		 * Get CPUID data about processor cores and hyperthreads.
1431		 */
1432		switch (cpi->cpi_vendor) {
1433		case X86_VENDOR_Intel:
1434			if (cpi->cpi_maxeax >= 4) {
1435				cp = &cpi->cpi_std[4];
1436				cp->cp_eax = 4;
1437				cp->cp_ecx = 0;
1438				(void) __cpuid_insn(cp);
1439				platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
1440			}
1441			/*FALLTHROUGH*/
1442		case X86_VENDOR_AMD:
1443			if (cpi->cpi_xmaxeax < 0x80000008)
1444				break;
1445			cp = &cpi->cpi_extd[8];
1446			cp->cp_eax = 0x80000008;
1447			(void) __cpuid_insn(cp);
1448			platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp);
1449
1450			/*
1451			 * Virtual and physical address limits from
1452			 * cpuid override previously guessed values.
1453			 */
1454			cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
1455			cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
1456			break;
1457		default:
1458			break;
1459		}
1460
1461		/*
1462		 * Derive the number of cores per chip
1463		 */
1464		switch (cpi->cpi_vendor) {
1465		case X86_VENDOR_Intel:
1466			if (cpi->cpi_maxeax < 4) {
1467				cpi->cpi_ncore_per_chip = 1;
1468				break;
1469			} else {
1470				cpi->cpi_ncore_per_chip =
1471				    BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1;
1472			}
1473			break;
1474		case X86_VENDOR_AMD:
1475			if (cpi->cpi_xmaxeax < 0x80000008) {
1476				cpi->cpi_ncore_per_chip = 1;
1477				break;
1478			} else {
1479				/*
1480				 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is
1481				 * 1 less than the number of physical cores on
1482				 * the chip.  In family 0x10 this value can
1483				 * be affected by "downcoring" - it reflects
1484				 * 1 less than the number of cores actually
1485				 * enabled on this node.
1486				 */
1487				cpi->cpi_ncore_per_chip =
1488				    BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
1489			}
1490			break;
1491		default:
1492			cpi->cpi_ncore_per_chip = 1;
1493			break;
1494		}
1495
1496		/*
1497		 * Get CPUID data about TSC Invariance in Deep C-State.
1498		 */
1499		switch (cpi->cpi_vendor) {
1500		case X86_VENDOR_Intel:
1501			if (cpi->cpi_maxeax >= 7) {
1502				cp = &cpi->cpi_extd[7];
1503				cp->cp_eax = 0x80000007;
1504				cp->cp_ecx = 0;
1505				(void) __cpuid_insn(cp);
1506			}
1507			break;
1508		default:
1509			break;
1510		}
1511	} else {
1512		cpi->cpi_ncore_per_chip = 1;
1513	}
1514
1515	/*
1516	 * If more than one core, then this processor is CMP.
1517	 */
1518	if (cpi->cpi_ncore_per_chip > 1) {
1519		add_x86_feature(featureset, X86FSET_CMP);
1520	}
1521
1522	/*
1523	 * If the number of cores is the same as the number
1524	 * of CPUs, then we cannot have HyperThreading.
1525	 */
1526	if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) {
1527		remove_x86_feature(featureset, X86FSET_HTT);
1528	}
1529
1530	cpi->cpi_apicid = CPI_APIC_ID(cpi);
1531	cpi->cpi_procnodes_per_pkg = 1;
1532	if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE &&
1533	    is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) {
1534		/*
1535		 * Single-core single-threaded processors.
1536		 */
1537		cpi->cpi_chipid = -1;
1538		cpi->cpi_clogid = 0;
1539		cpi->cpi_coreid = cpu->cpu_id;
1540		cpi->cpi_pkgcoreid = 0;
1541		if (cpi->cpi_vendor == X86_VENDOR_AMD)
1542			cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
1543		else
1544			cpi->cpi_procnodeid = cpi->cpi_chipid;
1545	} else if (cpi->cpi_ncpu_per_chip > 1) {
1546		if (cpi->cpi_vendor == X86_VENDOR_Intel)
1547			cpuid_intel_getids(cpu, featureset);
1548		else if (cpi->cpi_vendor == X86_VENDOR_AMD)
1549			cpuid_amd_getids(cpu);
1550		else {
1551			/*
1552			 * All other processors are currently
1553			 * assumed to have single cores.
1554			 */
1555			cpi->cpi_coreid = cpi->cpi_chipid;
1556			cpi->cpi_pkgcoreid = 0;
1557			cpi->cpi_procnodeid = cpi->cpi_chipid;
1558		}
1559	}
1560
1561	/*
1562	 * Synthesize chip "revision" and socket type
1563	 */
1564	cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
1565	    cpi->cpi_model, cpi->cpi_step);
1566	cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
1567	    cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
1568	cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
1569	    cpi->cpi_model, cpi->cpi_step);
1570
1571pass1_done:
1572	cpi->cpi_pass = 1;
1573}
1574
1575/*
1576 * Make copies of the cpuid table entries we depend on, in
1577 * part for ease of parsing now, in part so that we have only
1578 * one place to correct any of it, in part for ease of
1579 * later export to userland, and in part so we can look at
1580 * this stuff in a crash dump.
1581 */
1582
1583/*ARGSUSED*/
1584void
1585cpuid_pass2(cpu_t *cpu)
1586{
1587	uint_t n, nmax;
1588	int i;
1589	struct cpuid_regs *cp;
1590	uint8_t *dp;
1591	uint32_t *iptr;
1592	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1593
1594	ASSERT(cpi->cpi_pass == 1);
1595
1596	if (cpi->cpi_maxeax < 1)
1597		goto pass2_done;
1598
1599	if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
1600		nmax = NMAX_CPI_STD;
1601	/*
1602	 * (We already handled n == 0 and n == 1 in pass 1)
1603	 */
1604	for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
1605		cp->cp_eax = n;
1606
1607		/*
1608		 * CPUID function 4 expects %ecx to be initialized
1609		 * with an index which indicates which cache to return
1610		 * information about. The OS is expected to call function 4
1611		 * with %ecx set to 0, 1, 2, ... until it returns with
1612		 * EAX[4:0] set to 0, which indicates there are no more
1613		 * caches.
1614		 *
1615		 * Here, populate cpi_std[4] with the information returned by
1616		 * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
1617		 * when dynamic memory allocation becomes available.
1618		 *
1619		 * Note: we need to explicitly initialize %ecx here, since
1620		 * function 4 may have been previously invoked.
1621		 */
1622		if (n == 4)
1623			cp->cp_ecx = 0;
1624
1625		(void) __cpuid_insn(cp);
1626		platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
1627		switch (n) {
1628		case 2:
1629			/*
1630			 * "the lower 8 bits of the %eax register
1631			 * contain a value that identifies the number
1632			 * of times the cpuid [instruction] has to be
1633			 * executed to obtain a complete image of the
1634			 * processor's caching systems."
1635			 *
1636			 * How *do* they make this stuff up?
1637			 */
1638			cpi->cpi_ncache = sizeof (*cp) *
1639			    BITX(cp->cp_eax, 7, 0);
1640			if (cpi->cpi_ncache == 0)
1641				break;
1642			cpi->cpi_ncache--;	/* skip count byte */
1643
1644			/*
1645			 * Well, for now, rather than attempt to implement
1646			 * this slightly dubious algorithm, we just look
1647			 * at the first 15 ..
1648			 */
1649			if (cpi->cpi_ncache > (sizeof (*cp) - 1))
1650				cpi->cpi_ncache = sizeof (*cp) - 1;
1651
1652			dp = cpi->cpi_cacheinfo;
1653			if (BITX(cp->cp_eax, 31, 31) == 0) {
1654				uint8_t *p = (void *)&cp->cp_eax;
1655				for (i = 1; i < 4; i++)
1656					if (p[i] != 0)
1657						*dp++ = p[i];
1658			}
1659			if (BITX(cp->cp_ebx, 31, 31) == 0) {
1660				uint8_t *p = (void *)&cp->cp_ebx;
1661				for (i = 0; i < 4; i++)
1662					if (p[i] != 0)
1663						*dp++ = p[i];
1664			}
1665			if (BITX(cp->cp_ecx, 31, 31) == 0) {
1666				uint8_t *p = (void *)&cp->cp_ecx;
1667				for (i = 0; i < 4; i++)
1668					if (p[i] != 0)
1669						*dp++ = p[i];
1670			}
1671			if (BITX(cp->cp_edx, 31, 31) == 0) {
1672				uint8_t *p = (void *)&cp->cp_edx;
1673				for (i = 0; i < 4; i++)
1674					if (p[i] != 0)
1675						*dp++ = p[i];
1676			}
1677			break;
1678
1679		case 3:	/* Processor serial number, if PSN supported */
1680			break;
1681
1682		case 4:	/* Deterministic cache parameters */
1683			break;
1684
1685		case 5:	/* Monitor/Mwait parameters */
1686		{
1687			size_t mwait_size;
1688
1689			/*
1690			 * check cpi_mwait.support which was set in cpuid_pass1
1691			 */
1692			if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
1693				break;
1694
1695			/*
1696			 * Protect ourself from insane mwait line size.
1697			 * Workaround for incomplete hardware emulator(s).
1698			 */
1699			mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
1700			if (mwait_size < sizeof (uint32_t) ||
1701			    !ISP2(mwait_size)) {
1702#if DEBUG
1703				cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
1704				    "size %ld", cpu->cpu_id, (long)mwait_size);
1705#endif
1706				break;
1707			}
1708
1709			cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
1710			cpi->cpi_mwait.mon_max = mwait_size;
1711			if (MWAIT_EXTENSION(cpi)) {
1712				cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
1713				if (MWAIT_INT_ENABLE(cpi))
1714					cpi->cpi_mwait.support |=
1715					    MWAIT_ECX_INT_ENABLE;
1716			}
1717			break;
1718		}
1719		default:
1720			break;
1721		}
1722	}
1723
1724	if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) {
1725		struct cpuid_regs regs;
1726
1727		cp = &regs;
1728		cp->cp_eax = 0xB;
1729		cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1730
1731		(void) __cpuid_insn(cp);
1732
1733		/*
1734		 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1735		 * indicates that the extended topology enumeration leaf is
1736		 * available.
1737		 */
1738		if (cp->cp_ebx) {
1739			uint32_t x2apic_id;
1740			uint_t coreid_shift = 0;
1741			uint_t ncpu_per_core = 1;
1742			uint_t chipid_shift = 0;
1743			uint_t ncpu_per_chip = 1;
1744			uint_t i;
1745			uint_t level;
1746
1747			for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1748				cp->cp_eax = 0xB;
1749				cp->cp_ecx = i;
1750
1751				(void) __cpuid_insn(cp);
1752				level = CPI_CPU_LEVEL_TYPE(cp);
1753
1754				if (level == 1) {
1755					x2apic_id = cp->cp_edx;
1756					coreid_shift = BITX(cp->cp_eax, 4, 0);
1757					ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1758				} else if (level == 2) {
1759					x2apic_id = cp->cp_edx;
1760					chipid_shift = BITX(cp->cp_eax, 4, 0);
1761					ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1762				}
1763			}
1764
1765			cpi->cpi_apicid = x2apic_id;
1766			cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1767			cpi->cpi_ncore_per_chip = ncpu_per_chip /
1768			    ncpu_per_core;
1769			cpi->cpi_chipid = x2apic_id >> chipid_shift;
1770			cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1771			cpi->cpi_coreid = x2apic_id >> coreid_shift;
1772			cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1773		}
1774
1775		/* Make cp NULL so that we don't stumble on others */
1776		cp = NULL;
1777	}
1778
1779	/*
1780	 * XSAVE enumeration
1781	 */
1782	if (cpi->cpi_maxeax >= 0xD && cpi->cpi_vendor == X86_VENDOR_Intel) {
1783		struct cpuid_regs regs;
1784		boolean_t cpuid_d_valid = B_TRUE;
1785
1786		cp = &regs;
1787		cp->cp_eax = 0xD;
1788		cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1789
1790		(void) __cpuid_insn(cp);
1791
1792		/*
1793		 * Sanity checks for debug
1794		 */
1795		if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
1796		    (cp->cp_eax & XFEATURE_SSE) == 0) {
1797			cpuid_d_valid = B_FALSE;
1798		}
1799
1800		cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
1801		cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
1802		cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
1803
1804		/*
1805		 * If the hw supports AVX, get the size and offset in the save
1806		 * area for the ymm state.
1807		 */
1808		if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
1809			cp->cp_eax = 0xD;
1810			cp->cp_ecx = 2;
1811			cp->cp_edx = cp->cp_ebx = 0;
1812
1813			(void) __cpuid_insn(cp);
1814
1815			if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
1816			    cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
1817				cpuid_d_valid = B_FALSE;
1818			}
1819
1820			cpi->cpi_xsave.ymm_size = cp->cp_eax;
1821			cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
1822		}
1823
1824		if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
1825			xsave_state_size = 0;
1826		} else if (cpuid_d_valid) {
1827			xsave_state_size = cpi->cpi_xsave.xsav_max_size;
1828		} else {
1829			/* Broken CPUID 0xD, probably in HVM */
1830			cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
1831			    "value: hw_low = %d, hw_high = %d, xsave_size = %d"
1832			    ", ymm_size = %d, ymm_offset = %d\n",
1833			    cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
1834			    cpi->cpi_xsave.xsav_hw_features_high,
1835			    (int)cpi->cpi_xsave.xsav_max_size,
1836			    (int)cpi->cpi_xsave.ymm_size,
1837			    (int)cpi->cpi_xsave.ymm_offset);
1838
1839			if (xsave_state_size != 0) {
1840				/*
1841				 * This must be a non-boot CPU. We cannot
1842				 * continue, because boot cpu has already
1843				 * enabled XSAVE.
1844				 */
1845				ASSERT(cpu->cpu_id != 0);
1846				cmn_err(CE_PANIC, "cpu%d: we have already "
1847				    "enabled XSAVE on boot cpu, cannot "
1848				    "continue.", cpu->cpu_id);
1849			} else {
1850				/*
1851				 * Must be from boot CPU, OK to disable XSAVE.
1852				 */
1853				ASSERT(cpu->cpu_id == 0);
1854				remove_x86_feature(x86_featureset,
1855				    X86FSET_XSAVE);
1856				remove_x86_feature(x86_featureset, X86FSET_AVX);
1857				CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_XSAVE;
1858				CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_AVX;
1859				xsave_force_disable = B_TRUE;
1860			}
1861		}
1862	}
1863
1864
1865	if ((cpi->cpi_xmaxeax & 0x80000000) == 0)
1866		goto pass2_done;
1867
1868	if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD)
1869		nmax = NMAX_CPI_EXTD;
1870	/*
1871	 * Copy the extended properties, fixing them as we go.
1872	 * (We already handled n == 0 and n == 1 in pass 1)
1873	 */
1874	iptr = (void *)cpi->cpi_brandstr;
1875	for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
1876		cp->cp_eax = 0x80000000 + n;
1877		(void) __cpuid_insn(cp);
1878		platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp);
1879		switch (n) {
1880		case 2:
1881		case 3:
1882		case 4:
1883			/*
1884			 * Extract the brand string
1885			 */
1886			*iptr++ = cp->cp_eax;
1887			*iptr++ = cp->cp_ebx;
1888			*iptr++ = cp->cp_ecx;
1889			*iptr++ = cp->cp_edx;
1890			break;
1891		case 5:
1892			switch (cpi->cpi_vendor) {
1893			case X86_VENDOR_AMD:
1894				/*
1895				 * The Athlon and Duron were the first
1896				 * parts to report the sizes of the
1897				 * TLB for large pages. Before then,
1898				 * we don't trust the data.
1899				 */
1900				if (cpi->cpi_family < 6 ||
1901				    (cpi->cpi_family == 6 &&
1902				    cpi->cpi_model < 1))
1903					cp->cp_eax = 0;
1904				break;
1905			default:
1906				break;
1907			}
1908			break;
1909		case 6:
1910			switch (cpi->cpi_vendor) {
1911			case X86_VENDOR_AMD:
1912				/*
1913				 * The Athlon and Duron were the first
1914				 * AMD parts with L2 TLB's.
1915				 * Before then, don't trust the data.
1916				 */
1917				if (cpi->cpi_family < 6 ||
1918				    cpi->cpi_family == 6 &&
1919				    cpi->cpi_model < 1)
1920					cp->cp_eax = cp->cp_ebx = 0;
1921				/*
1922				 * AMD Duron rev A0 reports L2
1923				 * cache size incorrectly as 1K
1924				 * when it is really 64K
1925				 */
1926				if (cpi->cpi_family == 6 &&
1927				    cpi->cpi_model == 3 &&
1928				    cpi->cpi_step == 0) {
1929					cp->cp_ecx &= 0xffff;
1930					cp->cp_ecx |= 0x400000;
1931				}
1932				break;
1933			case X86_VENDOR_Cyrix:	/* VIA C3 */
1934				/*
1935				 * VIA C3 processors are a bit messed
1936				 * up w.r.t. encoding cache sizes in %ecx
1937				 */
1938				if (cpi->cpi_family != 6)
1939					break;
1940				/*
1941				 * model 7 and 8 were incorrectly encoded
1942				 *
1943				 * xxx is model 8 really broken?
1944				 */
1945				if (cpi->cpi_model == 7 ||
1946				    cpi->cpi_model == 8)
1947					cp->cp_ecx =
1948					    BITX(cp->cp_ecx, 31, 24) << 16 |
1949					    BITX(cp->cp_ecx, 23, 16) << 12 |
1950					    BITX(cp->cp_ecx, 15, 8) << 8 |
1951					    BITX(cp->cp_ecx, 7, 0);
1952				/*
1953				 * model 9 stepping 1 has wrong associativity
1954				 */
1955				if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
1956					cp->cp_ecx |= 8 << 12;
1957				break;
1958			case X86_VENDOR_Intel:
1959				/*
1960				 * Extended L2 Cache features function.
1961				 * First appeared on Prescott.
1962				 */
1963			default:
1964				break;
1965			}
1966			break;
1967		default:
1968			break;
1969		}
1970	}
1971
1972pass2_done:
1973	cpi->cpi_pass = 2;
1974}
1975
1976static const char *
1977intel_cpubrand(const struct cpuid_info *cpi)
1978{
1979	int i;
1980
1981	if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
1982	    cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
1983		return ("i486");
1984
1985	switch (cpi->cpi_family) {
1986	case 5:
1987		return ("Intel Pentium(r)");
1988	case 6:
1989		switch (cpi->cpi_model) {
1990			uint_t celeron, xeon;
1991			const struct cpuid_regs *cp;
1992		case 0:
1993		case 1:
1994		case 2:
1995			return ("Intel Pentium(r) Pro");
1996		case 3:
1997		case 4:
1998			return ("Intel Pentium(r) II");
1999		case 6:
2000			return ("Intel Celeron(r)");
2001		case 5:
2002		case 7:
2003			celeron = xeon = 0;
2004			cp = &cpi->cpi_std[2];	/* cache info */
2005
2006			for (i = 1; i < 4; i++) {
2007				uint_t tmp;
2008
2009				tmp = (cp->cp_eax >> (8 * i)) & 0xff;
2010				if (tmp == 0x40)
2011					celeron++;
2012				if (tmp >= 0x44 && tmp <= 0x45)
2013					xeon++;
2014			}
2015
2016			for (i = 0; i < 2; i++) {
2017				uint_t tmp;
2018
2019				tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
2020				if (tmp == 0x40)
2021					celeron++;
2022				else if (tmp >= 0x44 && tmp <= 0x45)
2023					xeon++;
2024			}
2025
2026			for (i = 0; i < 4; i++) {
2027				uint_t tmp;
2028
2029				tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
2030				if (tmp == 0x40)
2031					celeron++;
2032				else if (tmp >= 0x44 && tmp <= 0x45)
2033					xeon++;
2034			}
2035
2036			for (i = 0; i < 4; i++) {
2037				uint_t tmp;
2038
2039				tmp = (cp->cp_edx >> (8 * i)) & 0xff;
2040				if (tmp == 0x40)
2041					celeron++;
2042				else if (tmp >= 0x44 && tmp <= 0x45)
2043					xeon++;
2044			}
2045
2046			if (celeron)
2047				return ("Intel Celeron(r)");
2048			if (xeon)
2049				return (cpi->cpi_model == 5 ?
2050				    "Intel Pentium(r) II Xeon(tm)" :
2051				    "Intel Pentium(r) III Xeon(tm)");
2052			return (cpi->cpi_model == 5 ?
2053			    "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
2054			    "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
2055		default:
2056			break;
2057		}
2058	default:
2059		break;
2060	}
2061
2062	/* BrandID is present if the field is nonzero */
2063	if (cpi->cpi_brandid != 0) {
2064		static const struct {
2065			uint_t bt_bid;
2066			const char *bt_str;
2067		} brand_tbl[] = {
2068			{ 0x1,	"Intel(r) Celeron(r)" },
2069			{ 0x2,	"Intel(r) Pentium(r) III" },
2070			{ 0x3,	"Intel(r) Pentium(r) III Xeon(tm)" },
2071			{ 0x4,	"Intel(r) Pentium(r) III" },
2072			{ 0x6,	"Mobile Intel(r) Pentium(r) III" },
2073			{ 0x7,	"Mobile Intel(r) Celeron(r)" },
2074			{ 0x8,	"Intel(r) Pentium(r) 4" },
2075			{ 0x9,	"Intel(r) Pentium(r) 4" },
2076			{ 0xa,	"Intel(r) Celeron(r)" },
2077			{ 0xb,	"Intel(r) Xeon(tm)" },
2078			{ 0xc,	"Intel(r) Xeon(tm) MP" },
2079			{ 0xe,	"Mobile Intel(r) Pentium(r) 4" },
2080			{ 0xf,	"Mobile Intel(r) Celeron(r)" },
2081			{ 0x11, "Mobile Genuine Intel(r)" },
2082			{ 0x12, "Intel(r) Celeron(r) M" },
2083			{ 0x13, "Mobile Intel(r) Celeron(r)" },
2084			{ 0x14, "Intel(r) Celeron(r)" },
2085			{ 0x15, "Mobile Genuine Intel(r)" },
2086			{ 0x16,	"Intel(r) Pentium(r) M" },
2087			{ 0x17, "Mobile Intel(r) Celeron(r)" }
2088		};
2089		uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
2090		uint_t sgn;
2091
2092		sgn = (cpi->cpi_family << 8) |
2093		    (cpi->cpi_model << 4) | cpi->cpi_step;
2094
2095		for (i = 0; i < btblmax; i++)
2096			if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
2097				break;
2098		if (i < btblmax) {
2099			if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
2100				return ("Intel(r) Celeron(r)");
2101			if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
2102				return ("Intel(r) Xeon(tm) MP");
2103			if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
2104				return ("Intel(r) Xeon(tm)");
2105			return (brand_tbl[i].bt_str);
2106		}
2107	}
2108
2109	return (NULL);
2110}
2111
2112static const char *
2113amd_cpubrand(const struct cpuid_info *cpi)
2114{
2115	if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2116	    cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
2117		return ("i486 compatible");
2118
2119	switch (cpi->cpi_family) {
2120	case 5:
2121		switch (cpi->cpi_model) {
2122		case 0:
2123		case 1:
2124		case 2:
2125		case 3:
2126		case 4:
2127		case 5:
2128			return ("AMD-K5(r)");
2129		case 6:
2130		case 7:
2131			return ("AMD-K6(r)");
2132		case 8:
2133			return ("AMD-K6(r)-2");
2134		case 9:
2135			return ("AMD-K6(r)-III");
2136		default:
2137			return ("AMD (family 5)");
2138		}
2139	case 6:
2140		switch (cpi->cpi_model) {
2141		case 1:
2142			return ("AMD-K7(tm)");
2143		case 0:
2144		case 2:
2145		case 4:
2146			return ("AMD Athlon(tm)");
2147		case 3:
2148		case 7:
2149			return ("AMD Duron(tm)");
2150		case 6:
2151		case 8:
2152		case 10:
2153			/*
2154			 * Use the L2 cache size to distinguish
2155			 */
2156			return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
2157			    "AMD Athlon(tm)" : "AMD Duron(tm)");
2158		default:
2159			return ("AMD (family 6)");
2160		}
2161	default:
2162		break;
2163	}
2164
2165	if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
2166	    cpi->cpi_brandid != 0) {
2167		switch (BITX(cpi->cpi_brandid, 7, 5)) {
2168		case 3:
2169			return ("AMD Opteron(tm) UP 1xx");
2170		case 4:
2171			return ("AMD Opteron(tm) DP 2xx");
2172		case 5:
2173			return ("AMD Opteron(tm) MP 8xx");
2174		default:
2175			return ("AMD Opteron(tm)");
2176		}
2177	}
2178
2179	return (NULL);
2180}
2181
2182static const char *
2183cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
2184{
2185	if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2186	    cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
2187	    type == X86_TYPE_CYRIX_486)
2188		return ("i486 compatible");
2189
2190	switch (type) {
2191	case X86_TYPE_CYRIX_6x86:
2192		return ("Cyrix 6x86");
2193	case X86_TYPE_CYRIX_6x86L:
2194		return ("Cyrix 6x86L");
2195	case X86_TYPE_CYRIX_6x86MX:
2196		return ("Cyrix 6x86MX");
2197	case X86_TYPE_CYRIX_GXm:
2198		return ("Cyrix GXm");
2199	case X86_TYPE_CYRIX_MediaGX:
2200		return ("Cyrix MediaGX");
2201	case X86_TYPE_CYRIX_MII:
2202		return ("Cyrix M2");
2203	case X86_TYPE_VIA_CYRIX_III:
2204		return ("VIA Cyrix M3");
2205	default:
2206		/*
2207		 * Have another wild guess ..
2208		 */
2209		if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
2210			return ("Cyrix 5x86");
2211		else if (cpi->cpi_family == 5) {
2212			switch (cpi->cpi_model) {
2213			case 2:
2214				return ("Cyrix 6x86");	/* Cyrix M1 */
2215			case 4:
2216				return ("Cyrix MediaGX");
2217			default:
2218				break;
2219			}
2220		} else if (cpi->cpi_family == 6) {
2221			switch (cpi->cpi_model) {
2222			case 0:
2223				return ("Cyrix 6x86MX"); /* Cyrix M2? */
2224			case 5:
2225			case 6:
2226			case 7:
2227			case 8:
2228			case 9:
2229				return ("VIA C3");
2230			default:
2231				break;
2232			}
2233		}
2234		break;
2235	}
2236	return (NULL);
2237}
2238
2239/*
2240 * This only gets called in the case that the CPU extended
2241 * feature brand string (0x80000002, 0x80000003, 0x80000004)
2242 * aren't available, or contain null bytes for some reason.
2243 */
2244static void
2245fabricate_brandstr(struct cpuid_info *cpi)
2246{
2247	const char *brand = NULL;
2248
2249	switch (cpi->cpi_vendor) {
2250	case X86_VENDOR_Intel:
2251		brand = intel_cpubrand(cpi);
2252		break;
2253	case X86_VENDOR_AMD:
2254		brand = amd_cpubrand(cpi);
2255		break;
2256	case X86_VENDOR_Cyrix:
2257		brand = cyrix_cpubrand(cpi, x86_type);
2258		break;
2259	case X86_VENDOR_NexGen:
2260		if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2261			brand = "NexGen Nx586";
2262		break;
2263	case X86_VENDOR_Centaur:
2264		if (cpi->cpi_family == 5)
2265			switch (cpi->cpi_model) {
2266			case 4:
2267				brand = "Centaur C6";
2268				break;
2269			case 8:
2270				brand = "Centaur C2";
2271				break;
2272			case 9:
2273				brand = "Centaur C3";
2274				break;
2275			default:
2276				break;
2277			}
2278		break;
2279	case X86_VENDOR_Rise:
2280		if (cpi->cpi_family == 5 &&
2281		    (cpi->cpi_model == 0 || cpi->cpi_model == 2))
2282			brand = "Rise mP6";
2283		break;
2284	case X86_VENDOR_SiS:
2285		if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2286			brand = "SiS 55x";
2287		break;
2288	case X86_VENDOR_TM:
2289		if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
2290			brand = "Transmeta Crusoe TM3x00 or TM5x00";
2291		break;
2292	case X86_VENDOR_NSC:
2293	case X86_VENDOR_UMC:
2294	default:
2295		break;
2296	}
2297	if (brand) {
2298		(void) strcpy((char *)cpi->cpi_brandstr, brand);
2299		return;
2300	}
2301
2302	/*
2303	 * If all else fails ...
2304	 */
2305	(void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
2306	    "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
2307	    cpi->cpi_model, cpi->cpi_step);
2308}
2309
2310/*
2311 * This routine is called just after kernel memory allocation
2312 * becomes available on cpu0, and as part of mp_startup() on
2313 * the other cpus.
2314 *
2315 * Fixup the brand string, and collect any information from cpuid
2316 * that requires dynamicically allocated storage to represent.
2317 */
2318/*ARGSUSED*/
2319void
2320cpuid_pass3(cpu_t *cpu)
2321{
2322	int	i, max, shft, level, size;
2323	struct cpuid_regs regs;
2324	struct cpuid_regs *cp;
2325	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2326
2327	ASSERT(cpi->cpi_pass == 2);
2328
2329	/*
2330	 * Function 4: Deterministic cache parameters
2331	 *
2332	 * Take this opportunity to detect the number of threads
2333	 * sharing the last level cache, and construct a corresponding
2334	 * cache id. The respective cpuid_info members are initialized
2335	 * to the default case of "no last level cache sharing".
2336	 */
2337	cpi->cpi_ncpu_shr_last_cache = 1;
2338	cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
2339
2340	if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) {
2341
2342		/*
2343		 * Find the # of elements (size) returned by fn 4, and along
2344		 * the way detect last level cache sharing details.
2345		 */
2346		bzero(&regs, sizeof (regs));
2347		cp = &regs;
2348		for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
2349			cp->cp_eax = 4;
2350			cp->cp_ecx = i;
2351
2352			(void) __cpuid_insn(cp);
2353
2354			if (CPI_CACHE_TYPE(cp) == 0)
2355				break;
2356			level = CPI_CACHE_LVL(cp);
2357			if (level > max) {
2358				max = level;
2359				cpi->cpi_ncpu_shr_last_cache =
2360				    CPI_NTHR_SHR_CACHE(cp) + 1;
2361			}
2362		}
2363		cpi->cpi_std_4_size = size = i;
2364
2365		/*
2366		 * Allocate the cpi_std_4 array. The first element
2367		 * references the regs for fn 4, %ecx == 0, which
2368		 * cpuid_pass2() stashed in cpi->cpi_std[4].
2369		 */
2370		if (size > 0) {
2371			cpi->cpi_std_4 =
2372			    kmem_alloc(size * sizeof (cp), KM_SLEEP);
2373			cpi->cpi_std_4[0] = &cpi->cpi_std[4];
2374
2375			/*
2376			 * Allocate storage to hold the additional regs
2377			 * for function 4, %ecx == 1 .. cpi_std_4_size.
2378			 *
2379			 * The regs for fn 4, %ecx == 0 has already
2380			 * been allocated as indicated above.
2381			 */
2382			for (i = 1; i < size; i++) {
2383				cp = cpi->cpi_std_4[i] =
2384				    kmem_zalloc(sizeof (regs), KM_SLEEP);
2385				cp->cp_eax = 4;
2386				cp->cp_ecx = i;
2387
2388				(void) __cpuid_insn(cp);
2389			}
2390		}
2391		/*
2392		 * Determine the number of bits needed to represent
2393		 * the number of CPUs sharing the last level cache.
2394		 *
2395		 * Shift off that number of bits from the APIC id to
2396		 * derive the cache id.
2397		 */
2398		shft = 0;
2399		for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
2400			shft++;
2401		cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
2402	}
2403
2404	/*
2405	 * Now fixup the brand string
2406	 */
2407	if ((cpi->cpi_xmaxeax & 0x80000000) == 0) {
2408		fabricate_brandstr(cpi);
2409	} else {
2410
2411		/*
2412		 * If we successfully extracted a brand string from the cpuid
2413		 * instruction, clean it up by removing leading spaces and
2414		 * similar junk.
2415		 */
2416		if (cpi->cpi_brandstr[0]) {
2417			size_t maxlen = sizeof (cpi->cpi_brandstr);
2418			char *src, *dst;
2419
2420			dst = src = (char *)cpi->cpi_brandstr;
2421			src[maxlen - 1] = '\0';
2422			/*
2423			 * strip leading spaces
2424			 */
2425			while (*src == ' ')
2426				src++;
2427			/*
2428			 * Remove any 'Genuine' or "Authentic" prefixes
2429			 */
2430			if (strncmp(src, "Genuine ", 8) == 0)
2431				src += 8;
2432			if (strncmp(src, "Authentic ", 10) == 0)
2433				src += 10;
2434
2435			/*
2436			 * Now do an in-place copy.
2437			 * Map (R) to (r) and (TM) to (tm).
2438			 * The era of teletypes is long gone, and there's
2439			 * -really- no need to shout.
2440			 */
2441			while (*src != '\0') {
2442				if (src[0] == '(') {
2443					if (strncmp(src + 1, "R)", 2) == 0) {
2444						(void) strncpy(dst, "(r)", 3);
2445						src += 3;
2446						dst += 3;
2447						continue;
2448					}
2449					if (strncmp(src + 1, "TM)", 3) == 0) {
2450						(void) strncpy(dst, "(tm)", 4);
2451						src += 4;
2452						dst += 4;
2453						continue;
2454					}
2455				}
2456				*dst++ = *src++;
2457			}
2458			*dst = '\0';
2459
2460			/*
2461			 * Finally, remove any trailing spaces
2462			 */
2463			while (--dst > cpi->cpi_brandstr)
2464				if (*dst == ' ')
2465					*dst = '\0';
2466				else
2467					break;
2468		} else
2469			fabricate_brandstr(cpi);
2470	}
2471	cpi->cpi_pass = 3;
2472}
2473
2474/*
2475 * This routine is called out of bind_hwcap() much later in the life
2476 * of the kernel (post_startup()).  The job of this routine is to resolve
2477 * the hardware feature support and kernel support for those features into
2478 * what we're actually going to tell applications via the aux vector.
2479 */
2480uint_t
2481cpuid_pass4(cpu_t *cpu)
2482{
2483	struct cpuid_info *cpi;
2484	uint_t hwcap_flags = 0;
2485
2486	if (cpu == NULL)
2487		cpu = CPU;
2488	cpi = cpu->cpu_m.mcpu_cpi;
2489
2490	ASSERT(cpi->cpi_pass == 3);
2491
2492	if (cpi->cpi_maxeax >= 1) {
2493		uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
2494		uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
2495
2496		*edx = CPI_FEATURES_EDX(cpi);
2497		*ecx = CPI_FEATURES_ECX(cpi);
2498
2499		/*
2500		 * [these require explicit kernel support]
2501		 */
2502		if (!is_x86_feature(x86_featureset, X86FSET_SEP))
2503			*edx &= ~CPUID_INTC_EDX_SEP;
2504
2505		if (!is_x86_feature(x86_featureset, X86FSET_SSE))
2506			*edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
2507		if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
2508			*edx &= ~CPUID_INTC_EDX_SSE2;
2509
2510		if (!is_x86_feature(x86_featureset, X86FSET_HTT))
2511			*edx &= ~CPUID_INTC_EDX_HTT;
2512
2513		if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
2514			*ecx &= ~CPUID_INTC_ECX_SSE3;
2515
2516		if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2517			if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
2518				*ecx &= ~CPUID_INTC_ECX_SSSE3;
2519			if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
2520				*ecx &= ~CPUID_INTC_ECX_SSE4_1;
2521			if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
2522				*ecx &= ~CPUID_INTC_ECX_SSE4_2;
2523			if (!is_x86_feature(x86_featureset, X86FSET_AES))
2524				*ecx &= ~CPUID_INTC_ECX_AES;
2525			if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
2526				*ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
2527			if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
2528				*ecx &= ~(CPUID_INTC_ECX_XSAVE |
2529				    CPUID_INTC_ECX_OSXSAVE);
2530			if (!is_x86_feature(x86_featureset, X86FSET_AVX))
2531				*ecx &= ~CPUID_INTC_ECX_AVX;
2532		}
2533
2534		/*
2535		 * [no explicit support required beyond x87 fp context]
2536		 */
2537		if (!fpu_exists)
2538			*edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
2539
2540		/*
2541		 * Now map the supported feature vector to things that we
2542		 * think userland will care about.
2543		 */
2544		if (*edx & CPUID_INTC_EDX_SEP)
2545			hwcap_flags |= AV_386_SEP;
2546		if (*edx & CPUID_INTC_EDX_SSE)
2547			hwcap_flags |= AV_386_FXSR | AV_386_SSE;
2548		if (*edx & CPUID_INTC_EDX_SSE2)
2549			hwcap_flags |= AV_386_SSE2;
2550		if (*ecx & CPUID_INTC_ECX_SSE3)
2551			hwcap_flags |= AV_386_SSE3;
2552		if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2553			if (*ecx & CPUID_INTC_ECX_SSSE3)
2554				hwcap_flags |= AV_386_SSSE3;
2555			if (*ecx & CPUID_INTC_ECX_SSE4_1)
2556				hwcap_flags |= AV_386_SSE4_1;
2557			if (*ecx & CPUID_INTC_ECX_SSE4_2)
2558				hwcap_flags |= AV_386_SSE4_2;
2559			if (*ecx & CPUID_INTC_ECX_MOVBE)
2560				hwcap_flags |= AV_386_MOVBE;
2561			if (*ecx & CPUID_INTC_ECX_AES)
2562				hwcap_flags |= AV_386_AES;
2563			if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
2564				hwcap_flags |= AV_386_PCLMULQDQ;
2565			if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
2566			    (*ecx & CPUID_INTC_ECX_OSXSAVE))
2567				hwcap_flags |= AV_386_XSAVE;
2568		}
2569		if (*ecx & CPUID_INTC_ECX_POPCNT)
2570			hwcap_flags |= AV_386_POPCNT;
2571		if (*edx & CPUID_INTC_EDX_FPU)
2572			hwcap_flags |= AV_386_FPU;
2573		if (*edx & CPUID_INTC_EDX_MMX)
2574			hwcap_flags |= AV_386_MMX;
2575
2576		if (*edx & CPUID_INTC_EDX_TSC)
2577			hwcap_flags |= AV_386_TSC;
2578		if (*edx & CPUID_INTC_EDX_CX8)
2579			hwcap_flags |= AV_386_CX8;
2580		if (*edx & CPUID_INTC_EDX_CMOV)
2581			hwcap_flags |= AV_386_CMOV;
2582		if (*ecx & CPUID_INTC_ECX_CX16)
2583			hwcap_flags |= AV_386_CX16;
2584	}
2585
2586	if (cpi->cpi_xmaxeax < 0x80000001)
2587		goto pass4_done;
2588
2589	switch (cpi->cpi_vendor) {
2590		struct cpuid_regs cp;
2591		uint32_t *edx, *ecx;
2592
2593	case X86_VENDOR_Intel:
2594		/*
2595		 * Seems like Intel duplicated what we necessary
2596		 * here to make the initial crop of 64-bit OS's work.
2597		 * Hopefully, those are the only "extended" bits
2598		 * they'll add.
2599		 */
2600		/*FALLTHROUGH*/
2601
2602	case X86_VENDOR_AMD:
2603		edx = &cpi->cpi_support[AMD_EDX_FEATURES];
2604		ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
2605
2606		*edx = CPI_FEATURES_XTD_EDX(cpi);
2607		*ecx = CPI_FEATURES_XTD_ECX(cpi);
2608
2609		/*
2610		 * [these features require explicit kernel support]
2611		 */
2612		switch (cpi->cpi_vendor) {
2613		case X86_VENDOR_Intel:
2614			if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2615				*edx &= ~CPUID_AMD_EDX_TSCP;
2616			break;
2617
2618		case X86_VENDOR_AMD:
2619			if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2620				*edx &= ~CPUID_AMD_EDX_TSCP;
2621			if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
2622				*ecx &= ~CPUID_AMD_ECX_SSE4A;
2623			break;
2624
2625		default:
2626			break;
2627		}
2628
2629		/*
2630		 * [no explicit support required beyond
2631		 * x87 fp context and exception handlers]
2632		 */
2633		if (!fpu_exists)
2634			*edx &= ~(CPUID_AMD_EDX_MMXamd |
2635			    CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
2636
2637		if (!is_x86_feature(x86_featureset, X86FSET_NX))
2638			*edx &= ~CPUID_AMD_EDX_NX;
2639#if !defined(__amd64)
2640		*edx &= ~CPUID_AMD_EDX_LM;
2641#endif
2642		/*
2643		 * Now map the supported feature vector to
2644		 * things that we think userland will care about.
2645		 */
2646#if defined(__amd64)
2647		if (*edx & CPUID_AMD_EDX_SYSC)
2648			hwcap_flags |= AV_386_AMD_SYSC;
2649#endif
2650		if (*edx & CPUID_AMD_EDX_MMXamd)
2651			hwcap_flags |= AV_386_AMD_MMX;
2652		if (*edx & CPUID_AMD_EDX_3DNow)
2653			hwcap_flags |= AV_386_AMD_3DNow;
2654		if (*edx & CPUID_AMD_EDX_3DNowx)
2655			hwcap_flags |= AV_386_AMD_3DNowx;
2656
2657		switch (cpi->cpi_vendor) {
2658		case X86_VENDOR_AMD:
2659			if (*edx & CPUID_AMD_EDX_TSCP)
2660				hwcap_flags |= AV_386_TSCP;
2661			if (*ecx & CPUID_AMD_ECX_AHF64)
2662				hwcap_flags |= AV_386_AHF;
2663			if (*ecx & CPUID_AMD_ECX_SSE4A)
2664				hwcap_flags |= AV_386_AMD_SSE4A;
2665			if (*ecx & CPUID_AMD_ECX_LZCNT)
2666				hwcap_flags |= AV_386_AMD_LZCNT;
2667			break;
2668
2669		case X86_VENDOR_Intel:
2670			if (*edx & CPUID_AMD_EDX_TSCP)
2671				hwcap_flags |= AV_386_TSCP;
2672			/*
2673			 * Aarrgh.
2674			 * Intel uses a different bit in the same word.
2675			 */
2676			if (*ecx & CPUID_INTC_ECX_AHF64)
2677				hwcap_flags |= AV_386_AHF;
2678			break;
2679
2680		default:
2681			break;
2682		}
2683		break;
2684
2685	case X86_VENDOR_TM:
2686		cp.cp_eax = 0x80860001;
2687		(void) __cpuid_insn(&cp);
2688		cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
2689		break;
2690
2691	default:
2692		break;
2693	}
2694
2695pass4_done:
2696	cpi->cpi_pass = 4;
2697	return (hwcap_flags);
2698}
2699
2700
2701/*
2702 * Simulate the cpuid instruction using the data we previously
2703 * captured about this CPU.  We try our best to return the truth
2704 * about the hardware, independently of kernel support.
2705 */
2706uint32_t
2707cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
2708{
2709	struct cpuid_info *cpi;
2710	struct cpuid_regs *xcp;
2711
2712	if (cpu == NULL)
2713		cpu = CPU;
2714	cpi = cpu->cpu_m.mcpu_cpi;
2715
2716	ASSERT(cpuid_checkpass(cpu, 3));
2717
2718	/*
2719	 * CPUID data is cached in two separate places: cpi_std for standard
2720	 * CPUID functions, and cpi_extd for extended CPUID functions.
2721	 */
2722	if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD)
2723		xcp = &cpi->cpi_std[cp->cp_eax];
2724	else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax &&
2725	    cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD)
2726		xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000];
2727	else
2728		/*
2729		 * The caller is asking for data from an input parameter which
2730		 * the kernel has not cached.  In this case we go fetch from
2731		 * the hardware and return the data directly to the user.
2732		 */
2733		return (__cpuid_insn(cp));
2734
2735	cp->cp_eax = xcp->cp_eax;
2736	cp->cp_ebx = xcp->cp_ebx;
2737	cp->cp_ecx = xcp->cp_ecx;
2738	cp->cp_edx = xcp->cp_edx;
2739	return (cp->cp_eax);
2740}
2741
2742int
2743cpuid_checkpass(cpu_t *cpu, int pass)
2744{
2745	return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
2746	    cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
2747}
2748
2749int
2750cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
2751{
2752	ASSERT(cpuid_checkpass(cpu, 3));
2753
2754	return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
2755}
2756
2757int
2758cpuid_is_cmt(cpu_t *cpu)
2759{
2760	if (cpu == NULL)
2761		cpu = CPU;
2762
2763	ASSERT(cpuid_checkpass(cpu, 1));
2764
2765	return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
2766}
2767
2768/*
2769 * AMD and Intel both implement the 64-bit variant of the syscall
2770 * instruction (syscallq), so if there's -any- support for syscall,
2771 * cpuid currently says "yes, we support this".
2772 *
2773 * However, Intel decided to -not- implement the 32-bit variant of the
2774 * syscall instruction, so we provide a predicate to allow our caller
2775 * to test that subtlety here.
2776 *
2777 * XXPV	Currently, 32-bit syscall instructions don't work via the hypervisor,
2778 *	even in the case where the hardware would in fact support it.
2779 */
2780/*ARGSUSED*/
2781int
2782cpuid_syscall32_insn(cpu_t *cpu)
2783{
2784	ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
2785
2786#if !defined(__xpv)
2787	if (cpu == NULL)
2788		cpu = CPU;
2789
2790	/*CSTYLED*/
2791	{
2792		struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2793
2794		if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2795		    cpi->cpi_xmaxeax >= 0x80000001 &&
2796		    (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
2797			return (1);
2798	}
2799#endif
2800	return (0);
2801}
2802
2803int
2804cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
2805{
2806	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2807
2808	static const char fmt[] =
2809	    "x86 (%s %X family %d model %d step %d clock %d MHz)";
2810	static const char fmt_ht[] =
2811	    "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
2812
2813	ASSERT(cpuid_checkpass(cpu, 1));
2814
2815	if (cpuid_is_cmt(cpu))
2816		return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
2817		    cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2818		    cpi->cpi_family, cpi->cpi_model,
2819		    cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2820	return (snprintf(s, n, fmt,
2821	    cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2822	    cpi->cpi_family, cpi->cpi_model,
2823	    cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2824}
2825
2826const char *
2827cpuid_getvendorstr(cpu_t *cpu)
2828{
2829	ASSERT(cpuid_checkpass(cpu, 1));
2830	return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
2831}
2832
2833uint_t
2834cpuid_getvendor(cpu_t *cpu)
2835{
2836	ASSERT(cpuid_checkpass(cpu, 1));
2837	return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
2838}
2839
2840uint_t
2841cpuid_getfamily(cpu_t *cpu)
2842{
2843	ASSERT(cpuid_checkpass(cpu, 1));
2844	return (cpu->cpu_m.mcpu_cpi->cpi_family);
2845}
2846
2847uint_t
2848cpuid_getmodel(cpu_t *cpu)
2849{
2850	ASSERT(cpuid_checkpass(cpu, 1));
2851	return (cpu->cpu_m.mcpu_cpi->cpi_model);
2852}
2853
2854uint_t
2855cpuid_get_ncpu_per_chip(cpu_t *cpu)
2856{
2857	ASSERT(cpuid_checkpass(cpu, 1));
2858	return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
2859}
2860
2861uint_t
2862cpuid_get_ncore_per_chip(cpu_t *cpu)
2863{
2864	ASSERT(cpuid_checkpass(cpu, 1));
2865	return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
2866}
2867
2868uint_t
2869cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
2870{
2871	ASSERT(cpuid_checkpass(cpu, 2));
2872	return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
2873}
2874
2875id_t
2876cpuid_get_last_lvl_cacheid(cpu_t *cpu)
2877{
2878	ASSERT(cpuid_checkpass(cpu, 2));
2879	return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
2880}
2881
2882uint_t
2883cpuid_getstep(cpu_t *cpu)
2884{
2885	ASSERT(cpuid_checkpass(cpu, 1));
2886	return (cpu->cpu_m.mcpu_cpi->cpi_step);
2887}
2888
2889uint_t
2890cpuid_getsig(struct cpu *cpu)
2891{
2892	ASSERT(cpuid_checkpass(cpu, 1));
2893	return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
2894}
2895
2896uint32_t
2897cpuid_getchiprev(struct cpu *cpu)
2898{
2899	ASSERT(cpuid_checkpass(cpu, 1));
2900	return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
2901}
2902
2903const char *
2904cpuid_getchiprevstr(struct cpu *cpu)
2905{
2906	ASSERT(cpuid_checkpass(cpu, 1));
2907	return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
2908}
2909
2910uint32_t
2911cpuid_getsockettype(struct cpu *cpu)
2912{
2913	ASSERT(cpuid_checkpass(cpu, 1));
2914	return (cpu->cpu_m.mcpu_cpi->cpi_socket);
2915}
2916
2917const char *
2918cpuid_getsocketstr(cpu_t *cpu)
2919{
2920	static const char *socketstr = NULL;
2921	struct cpuid_info *cpi;
2922
2923	ASSERT(cpuid_checkpass(cpu, 1));
2924	cpi = cpu->cpu_m.mcpu_cpi;
2925
2926	/* Assume that socket types are the same across the system */
2927	if (socketstr == NULL)
2928		socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
2929		    cpi->cpi_model, cpi->cpi_step);
2930
2931
2932	return (socketstr);
2933}
2934
2935int
2936cpuid_get_chipid(cpu_t *cpu)
2937{
2938	ASSERT(cpuid_checkpass(cpu, 1));
2939
2940	if (cpuid_is_cmt(cpu))
2941		return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
2942	return (cpu->cpu_id);
2943}
2944
2945id_t
2946cpuid_get_coreid(cpu_t *cpu)
2947{
2948	ASSERT(cpuid_checkpass(cpu, 1));
2949	return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
2950}
2951
2952int
2953cpuid_get_pkgcoreid(cpu_t *cpu)
2954{
2955	ASSERT(cpuid_checkpass(cpu, 1));
2956	return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
2957}
2958
2959int
2960cpuid_get_clogid(cpu_t *cpu)
2961{
2962	ASSERT(cpuid_checkpass(cpu, 1));
2963	return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
2964}
2965
2966int
2967cpuid_get_cacheid(cpu_t *cpu)
2968{
2969	ASSERT(cpuid_checkpass(cpu, 1));
2970	return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
2971}
2972
2973uint_t
2974cpuid_get_procnodeid(cpu_t *cpu)
2975{
2976	ASSERT(cpuid_checkpass(cpu, 1));
2977	return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
2978}
2979
2980uint_t
2981cpuid_get_procnodes_per_pkg(cpu_t *cpu)
2982{
2983	ASSERT(cpuid_checkpass(cpu, 1));
2984	return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
2985}
2986
2987/*ARGSUSED*/
2988int
2989cpuid_have_cr8access(cpu_t *cpu)
2990{
2991#if defined(__amd64)
2992	return (1);
2993#else
2994	struct cpuid_info *cpi;
2995
2996	ASSERT(cpu != NULL);
2997	cpi = cpu->cpu_m.mcpu_cpi;
2998	if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
2999	    (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
3000		return (1);
3001	return (0);
3002#endif
3003}
3004
3005uint32_t
3006cpuid_get_apicid(cpu_t *cpu)
3007{
3008	ASSERT(cpuid_checkpass(cpu, 1));
3009	if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
3010		return (UINT32_MAX);
3011	} else {
3012		return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
3013	}
3014}
3015
3016void
3017cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
3018{
3019	struct cpuid_info *cpi;
3020
3021	if (cpu == NULL)
3022		cpu = CPU;
3023	cpi = cpu->cpu_m.mcpu_cpi;
3024
3025	ASSERT(cpuid_checkpass(cpu, 1));
3026
3027	if (pabits)
3028		*pabits = cpi->cpi_pabits;
3029	if (vabits)
3030		*vabits = cpi->cpi_vabits;
3031}
3032
3033/*
3034 * Returns the number of data TLB entries for a corresponding
3035 * pagesize.  If it can't be computed, or isn't known, the
3036 * routine returns zero.  If you ask about an architecturally
3037 * impossible pagesize, the routine will panic (so that the
3038 * hat implementor knows that things are inconsistent.)
3039 */
3040uint_t
3041cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
3042{
3043	struct cpuid_info *cpi;
3044	uint_t dtlb_nent = 0;
3045
3046	if (cpu == NULL)
3047		cpu = CPU;
3048	cpi = cpu->cpu_m.mcpu_cpi;
3049
3050	ASSERT(cpuid_checkpass(cpu, 1));
3051
3052	/*
3053	 * Check the L2 TLB info
3054	 */
3055	if (cpi->cpi_xmaxeax >= 0x80000006) {
3056		struct cpuid_regs *cp = &cpi->cpi_extd[6];
3057
3058		switch (pagesize) {
3059
3060		case 4 * 1024:
3061			/*
3062			 * All zero in the top 16 bits of the register
3063			 * indicates a unified TLB. Size is in low 16 bits.
3064			 */
3065			if ((cp->cp_ebx & 0xffff0000) == 0)
3066				dtlb_nent = cp->cp_ebx & 0x0000ffff;
3067			else
3068				dtlb_nent = BITX(cp->cp_ebx, 27, 16);
3069			break;
3070
3071		case 2 * 1024 * 1024:
3072			if ((cp->cp_eax & 0xffff0000) == 0)
3073				dtlb_nent = cp->cp_eax & 0x0000ffff;
3074			else
3075				dtlb_nent = BITX(cp->cp_eax, 27, 16);
3076			break;
3077
3078		default:
3079			panic("unknown L2 pagesize");
3080			/*NOTREACHED*/
3081		}
3082	}
3083
3084	if (dtlb_nent != 0)
3085		return (dtlb_nent);
3086
3087	/*
3088	 * No L2 TLB support for this size, try L1.
3089	 */
3090	if (cpi->cpi_xmaxeax >= 0x80000005) {
3091		struct cpuid_regs *cp = &cpi->cpi_extd[5];
3092
3093		switch (pagesize) {
3094		case 4 * 1024:
3095			dtlb_nent = BITX(cp->cp_ebx, 23, 16);
3096			break;
3097		case 2 * 1024 * 1024:
3098			dtlb_nent = BITX(cp->cp_eax, 23, 16);
3099			break;
3100		default:
3101			panic("unknown L1 d-TLB pagesize");
3102			/*NOTREACHED*/
3103		}
3104	}
3105
3106	return (dtlb_nent);
3107}
3108
3109/*
3110 * Return 0 if the erratum is not present or not applicable, positive
3111 * if it is, and negative if the status of the erratum is unknown.
3112 *
3113 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
3114 * Processors" #25759, Rev 3.57, August 2005
3115 */
3116int
3117cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
3118{
3119	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3120	uint_t eax;
3121
3122	/*
3123	 * Bail out if this CPU isn't an AMD CPU, or if it's
3124	 * a legacy (32-bit) AMD CPU.
3125	 */
3126	if (cpi->cpi_vendor != X86_VENDOR_AMD ||
3127	    cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
3128	    cpi->cpi_family == 6)
3129
3130		return (0);
3131
3132	eax = cpi->cpi_std[1].cp_eax;
3133
3134#define	SH_B0(eax)	(eax == 0xf40 || eax == 0xf50)
3135#define	SH_B3(eax) 	(eax == 0xf51)
3136#define	B(eax)		(SH_B0(eax) || SH_B3(eax))
3137
3138#define	SH_C0(eax)	(eax == 0xf48 || eax == 0xf58)
3139
3140#define	SH_CG(eax)	(eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
3141#define	DH_CG(eax)	(eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
3142#define	CH_CG(eax)	(eax == 0xf82 || eax == 0xfb2)
3143#define	CG(eax)		(SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
3144
3145#define	SH_D0(eax)	(eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
3146#define	DH_D0(eax)	(eax == 0x10fc0 || eax == 0x10ff0)
3147#define	CH_D0(eax)	(eax == 0x10f80 || eax == 0x10fb0)
3148#define	D0(eax)		(SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
3149
3150#define	SH_E0(eax)	(eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
3151#define	JH_E1(eax)	(eax == 0x20f10)	/* JH8_E0 had 0x20f30 */
3152#define	DH_E3(eax)	(eax == 0x20fc0 || eax == 0x20ff0)
3153#define	SH_E4(eax)	(eax == 0x20f51 || eax == 0x20f71)
3154#define	BH_E4(eax)	(eax == 0x20fb1)
3155#define	SH_E5(eax)	(eax == 0x20f42)
3156#define	DH_E6(eax)	(eax == 0x20ff2 || eax == 0x20fc2)
3157#define	JH_E6(eax)	(eax == 0x20f12 || eax == 0x20f32)
3158#define	EX(eax)		(SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
3159			    SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
3160			    DH_E6(eax) || JH_E6(eax))
3161
3162#define	DR_AX(eax)	(eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
3163#define	DR_B0(eax)	(eax == 0x100f20)
3164#define	DR_B1(eax)	(eax == 0x100f21)
3165#define	DR_BA(eax)	(eax == 0x100f2a)
3166#define	DR_B2(eax)	(eax == 0x100f22)
3167#define	DR_B3(eax)	(eax == 0x100f23)
3168#define	RB_C0(eax)	(eax == 0x100f40)
3169
3170	switch (erratum) {
3171	case 1:
3172		return (cpi->cpi_family < 0x10);
3173	case 51:	/* what does the asterisk mean? */
3174		return (B(eax) || SH_C0(eax) || CG(eax));
3175	case 52:
3176		return (B(eax));
3177	case 57:
3178		return (cpi->cpi_family <= 0x11);
3179	case 58:
3180		return (B(eax));
3181	case 60:
3182		return (cpi->cpi_family <= 0x11);
3183	case 61:
3184	case 62:
3185	case 63:
3186	case 64:
3187	case 65:
3188	case 66:
3189	case 68:
3190	case 69:
3191	case 70:
3192	case 71:
3193		return (B(eax));
3194	case 72:
3195		return (SH_B0(eax));
3196	case 74:
3197		return (B(eax));
3198	case 75:
3199		return (cpi->cpi_family < 0x10);
3200	case 76:
3201		return (B(eax));
3202	case 77:
3203		return (cpi->cpi_family <= 0x11);
3204	case 78:
3205		return (B(eax) || SH_C0(eax));
3206	case 79:
3207		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3208	case 80:
3209	case 81:
3210	case 82:
3211		return (B(eax));
3212	case 83:
3213		return (B(eax) || SH_C0(eax) || CG(eax));
3214	case 85:
3215		return (cpi->cpi_family < 0x10);
3216	case 86:
3217		return (SH_C0(eax) || CG(eax));
3218	case 88:
3219#if !defined(__amd64)
3220		return (0);
3221#else
3222		return (B(eax) || SH_C0(eax));
3223#endif
3224	case 89:
3225		return (cpi->cpi_family < 0x10);
3226	case 90:
3227		return (B(eax) || SH_C0(eax) || CG(eax));
3228	case 91:
3229	case 92:
3230		return (B(eax) || SH_C0(eax));
3231	case 93:
3232		return (SH_C0(eax));
3233	case 94:
3234		return (B(eax) || SH_C0(eax) || CG(eax));
3235	case 95:
3236#if !defined(__amd64)
3237		return (0);
3238#else
3239		return (B(eax) || SH_C0(eax));
3240#endif
3241	case 96:
3242		return (B(eax) || SH_C0(eax) || CG(eax));
3243	case 97:
3244	case 98:
3245		return (SH_C0(eax) || CG(eax));
3246	case 99:
3247		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3248	case 100:
3249		return (B(eax) || SH_C0(eax));
3250	case 101:
3251	case 103:
3252		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3253	case 104:
3254		return (SH_C0(eax) || CG(eax) || D0(eax));
3255	case 105:
3256	case 106:
3257	case 107:
3258		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3259	case 108:
3260		return (DH_CG(eax));
3261	case 109:
3262		return (SH_C0(eax) || CG(eax) || D0(eax));
3263	case 110:
3264		return (D0(eax) || EX(eax));
3265	case 111:
3266		return (CG(eax));
3267	case 112:
3268		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3269	case 113:
3270		return (eax == 0x20fc0);
3271	case 114:
3272		return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3273	case 115:
3274		return (SH_E0(eax) || JH_E1(eax));
3275	case 116:
3276		return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3277	case 117:
3278		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3279	case 118:
3280		return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
3281		    JH_E6(eax));
3282	case 121:
3283		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3284	case 122:
3285		return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
3286	case 123:
3287		return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
3288	case 131:
3289		return (cpi->cpi_family < 0x10);
3290	case 6336786:
3291		/*
3292		 * Test for AdvPowerMgmtInfo.TscPStateInvariant
3293		 * if this is a K8 family or newer processor
3294		 */
3295		if (CPI_FAMILY(cpi) == 0xf) {
3296			struct cpuid_regs regs;
3297			regs.cp_eax = 0x80000007;
3298			(void) __cpuid_insn(&regs);
3299			return (!(regs.cp_edx & 0x100));
3300		}
3301		return (0);
3302	case 6323525:
3303		return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
3304		    (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
3305
3306	case 6671130:
3307		/*
3308		 * check for processors (pre-Shanghai) that do not provide
3309		 * optimal management of 1gb ptes in its tlb.
3310		 */
3311		return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
3312
3313	case 298:
3314		return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
3315		    DR_B2(eax) || RB_C0(eax));
3316
3317	default:
3318		return (-1);
3319
3320	}
3321}
3322
3323/*
3324 * Determine if specified erratum is present via OSVW (OS Visible Workaround).
3325 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
3326 */
3327int
3328osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
3329{
3330	struct cpuid_info	*cpi;
3331	uint_t			osvwid;
3332	static int		osvwfeature = -1;
3333	uint64_t		osvwlength;
3334
3335
3336	cpi = cpu->cpu_m.mcpu_cpi;
3337
3338	/* confirm OSVW supported */
3339	if (osvwfeature == -1) {
3340		osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
3341	} else {
3342		/* assert that osvw feature setting is consistent on all cpus */
3343		ASSERT(osvwfeature ==
3344		    (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
3345	}
3346	if (!osvwfeature)
3347		return (-1);
3348
3349	osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
3350
3351	switch (erratum) {
3352	case 298:	/* osvwid is 0 */
3353		osvwid = 0;
3354		if (osvwlength <= (uint64_t)osvwid) {
3355			/* osvwid 0 is unknown */
3356			return (-1);
3357		}
3358
3359		/*
3360		 * Check the OSVW STATUS MSR to determine the state
3361		 * of the erratum where:
3362		 *   0 - fixed by HW
3363		 *   1 - BIOS has applied the workaround when BIOS
3364		 *   workaround is available. (Or for other errata,
3365		 *   OS workaround is required.)
3366		 * For a value of 1, caller will confirm that the
3367		 * erratum 298 workaround has indeed been applied by BIOS.
3368		 *
3369		 * A 1 may be set in cpus that have a HW fix
3370		 * in a mixed cpu system. Regarding erratum 298:
3371		 *   In a multiprocessor platform, the workaround above
3372		 *   should be applied to all processors regardless of
3373		 *   silicon revision when an affected processor is
3374		 *   present.
3375		 */
3376
3377		return (rdmsr(MSR_AMD_OSVW_STATUS +
3378		    (osvwid / OSVW_ID_CNT_PER_MSR)) &
3379		    (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
3380
3381	default:
3382		return (-1);
3383	}
3384}
3385
3386static const char assoc_str[] = "associativity";
3387static const char line_str[] = "line-size";
3388static const char size_str[] = "size";
3389
3390static void
3391add_cache_prop(dev_info_t *devi, const char *label, const char *type,
3392    uint32_t val)
3393{
3394	char buf[128];
3395
3396	/*
3397	 * ndi_prop_update_int() is used because it is desirable for
3398	 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
3399	 */
3400	if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
3401		(void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
3402}
3403
3404/*
3405 * Intel-style cache/tlb description
3406 *
3407 * Standard cpuid level 2 gives a randomly ordered
3408 * selection of tags that index into a table that describes
3409 * cache and tlb properties.
3410 */
3411
3412static const char l1_icache_str[] = "l1-icache";
3413static const char l1_dcache_str[] = "l1-dcache";
3414static const char l2_cache_str[] = "l2-cache";
3415static const char l3_cache_str[] = "l3-cache";
3416static const char itlb4k_str[] = "itlb-4K";
3417static const char dtlb4k_str[] = "dtlb-4K";
3418static const char itlb2M_str[] = "itlb-2M";
3419static const char itlb4M_str[] = "itlb-4M";
3420static const char dtlb4M_str[] = "dtlb-4M";
3421static const char dtlb24_str[] = "dtlb0-2M-4M";
3422static const char itlb424_str[] = "itlb-4K-2M-4M";
3423static const char itlb24_str[] = "itlb-2M-4M";
3424static const char dtlb44_str[] = "dtlb-4K-4M";
3425static const char sl1_dcache_str[] = "sectored-l1-dcache";
3426static const char sl2_cache_str[] = "sectored-l2-cache";
3427static const char itrace_str[] = "itrace-cache";
3428static const char sl3_cache_str[] = "sectored-l3-cache";
3429static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
3430
3431static const struct cachetab {
3432	uint8_t 	ct_code;
3433	uint8_t		ct_assoc;
3434	uint16_t 	ct_line_size;
3435	size_t		ct_size;
3436	const char	*ct_label;
3437} intel_ctab[] = {
3438	/*
3439	 * maintain descending order!
3440	 *
3441	 * Codes ignored - Reason
3442	 * ----------------------
3443	 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
3444	 * f0H/f1H - Currently we do not interpret prefetch size by design
3445	 */
3446	{ 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
3447	{ 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
3448	{ 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
3449	{ 0xde, 12, 64, 6*1024*1024, l3_cache_str},
3450	{ 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
3451	{ 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
3452	{ 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
3453	{ 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
3454	{ 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
3455	{ 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
3456	{ 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
3457	{ 0xd0, 4, 64, 512*1024, l3_cache_str},
3458	{ 0xca, 4, 0, 512, sh_l2_tlb4k_str},
3459	{ 0xc0, 4, 0, 8, dtlb44_str },
3460	{ 0xba, 4, 0, 64, dtlb4k_str },
3461	{ 0xb4, 4, 0, 256, dtlb4k_str },
3462	{ 0xb3, 4, 0, 128, dtlb4k_str },
3463	{ 0xb2, 4, 0, 64, itlb4k_str },
3464	{ 0xb0, 4, 0, 128, itlb4k_str },
3465	{ 0x87, 8, 64, 1024*1024, l2_cache_str},
3466	{ 0x86, 4, 64, 512*1024, l2_cache_str},
3467	{ 0x85, 8, 32, 2*1024*1024, l2_cache_str},
3468	{ 0x84, 8, 32, 1024*1024, l2_cache_str},
3469	{ 0x83, 8, 32, 512*1024, l2_cache_str},
3470	{ 0x82, 8, 32, 256*1024, l2_cache_str},
3471	{ 0x80, 8, 64, 512*1024, l2_cache_str},
3472	{ 0x7f, 2, 64, 512*1024, l2_cache_str},
3473	{ 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
3474	{ 0x7c, 8, 64, 1024*1024, sl2_cache_str},
3475	{ 0x7b, 8, 64, 512*1024, sl2_cache_str},
3476	{ 0x7a, 8, 64, 256*1024, sl2_cache_str},
3477	{ 0x79, 8, 64, 128*1024, sl2_cache_str},
3478	{ 0x78, 8, 64, 1024*1024, l2_cache_str},
3479	{ 0x73, 8, 0, 64*1024, itrace_str},
3480	{ 0x72, 8, 0, 32*1024, itrace_str},
3481	{ 0x71, 8, 0, 16*1024, itrace_str},
3482	{ 0x70, 8, 0, 12*1024, itrace_str},
3483	{ 0x68, 4, 64, 32*1024, sl1_dcache_str},
3484	{ 0x67, 4, 64, 16*1024, sl1_dcache_str},
3485	{ 0x66, 4, 64, 8*1024, sl1_dcache_str},
3486	{ 0x60, 8, 64, 16*1024, sl1_dcache_str},
3487	{ 0x5d, 0, 0, 256, dtlb44_str},
3488	{ 0x5c, 0, 0, 128, dtlb44_str},
3489	{ 0x5b, 0, 0, 64, dtlb44_str},
3490	{ 0x5a, 4, 0, 32, dtlb24_str},
3491	{ 0x59, 0, 0, 16, dtlb4k_str},
3492	{ 0x57, 4, 0, 16, dtlb4k_str},
3493	{ 0x56, 4, 0, 16, dtlb4M_str},
3494	{ 0x55, 0, 0, 7, itlb24_str},
3495	{ 0x52, 0, 0, 256, itlb424_str},
3496	{ 0x51, 0, 0, 128, itlb424_str},
3497	{ 0x50, 0, 0, 64, itlb424_str},
3498	{ 0x4f, 0, 0, 32, itlb4k_str},
3499	{ 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
3500	{ 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
3501	{ 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
3502	{ 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
3503	{ 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
3504	{ 0x49, 16, 64, 4*1024*1024, l3_cache_str},
3505	{ 0x48, 12, 64, 3*1024*1024, l2_cache_str},
3506	{ 0x47, 8, 64, 8*1024*1024, l3_cache_str},
3507	{ 0x46, 4, 64, 4*1024*1024, l3_cache_str},
3508	{ 0x45, 4, 32, 2*1024*1024, l2_cache_str},
3509	{ 0x44, 4, 32, 1024*1024, l2_cache_str},
3510	{ 0x43, 4, 32, 512*1024, l2_cache_str},
3511	{ 0x42, 4, 32, 256*1024, l2_cache_str},
3512	{ 0x41, 4, 32, 128*1024, l2_cache_str},
3513	{ 0x3e, 4, 64, 512*1024, sl2_cache_str},
3514	{ 0x3d, 6, 64, 384*1024, sl2_cache_str},
3515	{ 0x3c, 4, 64, 256*1024, sl2_cache_str},
3516	{ 0x3b, 2, 64, 128*1024, sl2_cache_str},
3517	{ 0x3a, 6, 64, 192*1024, sl2_cache_str},
3518	{ 0x39, 4, 64, 128*1024, sl2_cache_str},
3519	{ 0x30, 8, 64, 32*1024, l1_icache_str},
3520	{ 0x2c, 8, 64, 32*1024, l1_dcache_str},
3521	{ 0x29, 8, 64, 4096*1024, sl3_cache_str},
3522	{ 0x25, 8, 64, 2048*1024, sl3_cache_str},
3523	{ 0x23, 8, 64, 1024*1024, sl3_cache_str},
3524	{ 0x22, 4, 64, 512*1024, sl3_cache_str},
3525	{ 0x0e, 6, 64, 24*1024, l1_dcache_str},
3526	{ 0x0d, 4, 32, 16*1024, l1_dcache_str},
3527	{ 0x0c, 4, 32, 16*1024, l1_dcache_str},
3528	{ 0x0b, 4, 0, 4, itlb4M_str},
3529	{ 0x0a, 2, 32, 8*1024, l1_dcache_str},
3530	{ 0x08, 4, 32, 16*1024, l1_icache_str},
3531	{ 0x06, 4, 32, 8*1024, l1_icache_str},
3532	{ 0x05, 4, 0, 32, dtlb4M_str},
3533	{ 0x04, 4, 0, 8, dtlb4M_str},
3534	{ 0x03, 4, 0, 64, dtlb4k_str},
3535	{ 0x02, 4, 0, 2, itlb4M_str},
3536	{ 0x01, 4, 0, 32, itlb4k_str},
3537	{ 0 }
3538};
3539
3540static const struct cachetab cyrix_ctab[] = {
3541	{ 0x70, 4, 0, 32, "tlb-4K" },
3542	{ 0x80, 4, 16, 16*1024, "l1-cache" },
3543	{ 0 }
3544};
3545
3546/*
3547 * Search a cache table for a matching entry
3548 */
3549static const struct cachetab *
3550find_cacheent(const struct cachetab *ct, uint_t code)
3551{
3552	if (code != 0) {
3553		for (; ct->ct_code != 0; ct++)
3554			if (ct->ct_code <= code)
3555				break;
3556		if (ct->ct_code == code)
3557			return (ct);
3558	}
3559	return (NULL);
3560}
3561
3562/*
3563 * Populate cachetab entry with L2 or L3 cache-information using
3564 * cpuid function 4. This function is called from intel_walk_cacheinfo()
3565 * when descriptor 0x49 is encountered. It returns 0 if no such cache
3566 * information is found.
3567 */
3568static int
3569intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
3570{
3571	uint32_t level, i;
3572	int ret = 0;
3573
3574	for (i = 0; i < cpi->cpi_std_4_size; i++) {
3575		level = CPI_CACHE_LVL(cpi->cpi_std_4[i]);
3576
3577		if (level == 2 || level == 3) {
3578			ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1;
3579			ct->ct_line_size =
3580			    CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1;
3581			ct->ct_size = ct->ct_assoc *
3582			    (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) *
3583			    ct->ct_line_size *
3584			    (cpi->cpi_std_4[i]->cp_ecx + 1);
3585
3586			if (level == 2) {
3587				ct->ct_label = l2_cache_str;
3588			} else if (level == 3) {
3589				ct->ct_label = l3_cache_str;
3590			}
3591			ret = 1;
3592		}
3593	}
3594
3595	return (ret);
3596}
3597
3598/*
3599 * Walk the cacheinfo descriptor, applying 'func' to every valid element
3600 * The walk is terminated if the walker returns non-zero.
3601 */
3602static void
3603intel_walk_cacheinfo(struct cpuid_info *cpi,
3604    void *arg, int (*func)(void *, const struct cachetab *))
3605{
3606	const struct cachetab *ct;
3607	struct cachetab des_49_ct, des_b1_ct;
3608	uint8_t *dp;
3609	int i;
3610
3611	if ((dp = cpi->cpi_cacheinfo) == NULL)
3612		return;
3613	for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3614		/*
3615		 * For overloaded descriptor 0x49 we use cpuid function 4
3616		 * if supported by the current processor, to create
3617		 * cache information.
3618		 * For overloaded descriptor 0xb1 we use X86_PAE flag
3619		 * to disambiguate the cache information.
3620		 */
3621		if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
3622		    intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
3623				ct = &des_49_ct;
3624		} else if (*dp == 0xb1) {
3625			des_b1_ct.ct_code = 0xb1;
3626			des_b1_ct.ct_assoc = 4;
3627			des_b1_ct.ct_line_size = 0;
3628			if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
3629				des_b1_ct.ct_size = 8;
3630				des_b1_ct.ct_label = itlb2M_str;
3631			} else {
3632				des_b1_ct.ct_size = 4;
3633				des_b1_ct.ct_label = itlb4M_str;
3634			}
3635			ct = &des_b1_ct;
3636		} else {
3637			if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
3638				continue;
3639			}
3640		}
3641
3642		if (func(arg, ct) != 0) {
3643			break;
3644		}
3645	}
3646}
3647
3648/*
3649 * (Like the Intel one, except for Cyrix CPUs)
3650 */
3651static void
3652cyrix_walk_cacheinfo(struct cpuid_info *cpi,
3653    void *arg, int (*func)(void *, const struct cachetab *))
3654{
3655	const struct cachetab *ct;
3656	uint8_t *dp;
3657	int i;
3658
3659	if ((dp = cpi->cpi_cacheinfo) == NULL)
3660		return;
3661	for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3662		/*
3663		 * Search Cyrix-specific descriptor table first ..
3664		 */
3665		if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
3666			if (func(arg, ct) != 0)
3667				break;
3668			continue;
3669		}
3670		/*
3671		 * .. else fall back to the Intel one
3672		 */
3673		if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
3674			if (func(arg, ct) != 0)
3675				break;
3676			continue;
3677		}
3678	}
3679}
3680
3681/*
3682 * A cacheinfo walker that adds associativity, line-size, and size properties
3683 * to the devinfo node it is passed as an argument.
3684 */
3685static int
3686add_cacheent_props(void *arg, const struct cachetab *ct)
3687{
3688	dev_info_t *devi = arg;
3689
3690	add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
3691	if (ct->ct_line_size != 0)
3692		add_cache_prop(devi, ct->ct_label, line_str,
3693		    ct->ct_line_size);
3694	add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
3695	return (0);
3696}
3697
3698
3699static const char fully_assoc[] = "fully-associative?";
3700
3701/*
3702 * AMD style cache/tlb description
3703 *
3704 * Extended functions 5 and 6 directly describe properties of
3705 * tlbs and various cache levels.
3706 */
3707static void
3708add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3709{
3710	switch (assoc) {
3711	case 0:	/* reserved; ignore */
3712		break;
3713	default:
3714		add_cache_prop(devi, label, assoc_str, assoc);
3715		break;
3716	case 0xff:
3717		add_cache_prop(devi, label, fully_assoc, 1);
3718		break;
3719	}
3720}
3721
3722static void
3723add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
3724{
3725	if (size == 0)
3726		return;
3727	add_cache_prop(devi, label, size_str, size);
3728	add_amd_assoc(devi, label, assoc);
3729}
3730
3731static void
3732add_amd_cache(dev_info_t *devi, const char *label,
3733    uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
3734{
3735	if (size == 0 || line_size == 0)
3736		return;
3737	add_amd_assoc(devi, label, assoc);
3738	/*
3739	 * Most AMD parts have a sectored cache. Multiple cache lines are
3740	 * associated with each tag. A sector consists of all cache lines
3741	 * associated with a tag. For example, the AMD K6-III has a sector
3742	 * size of 2 cache lines per tag.
3743	 */
3744	if (lines_per_tag != 0)
3745		add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
3746	add_cache_prop(devi, label, line_str, line_size);
3747	add_cache_prop(devi, label, size_str, size * 1024);
3748}
3749
3750static void
3751add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3752{
3753	switch (assoc) {
3754	case 0:	/* off */
3755		break;
3756	case 1:
3757	case 2:
3758	case 4:
3759		add_cache_prop(devi, label, assoc_str, assoc);
3760		break;
3761	case 6:
3762		add_cache_prop(devi, label, assoc_str, 8);
3763		break;
3764	case 8:
3765		add_cache_prop(devi, label, assoc_str, 16);
3766		break;
3767	case 0xf:
3768		add_cache_prop(devi, label, fully_assoc, 1);
3769		break;
3770	default: /* reserved; ignore */
3771		break;
3772	}
3773}
3774
3775static void
3776add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
3777{
3778	if (size == 0 || assoc == 0)
3779		return;
3780	add_amd_l2_assoc(devi, label, assoc);
3781	add_cache_prop(devi, label, size_str, size);
3782}
3783
3784static void
3785add_amd_l2_cache(dev_info_t *devi, const char *label,
3786    uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
3787{
3788	if (size == 0 || assoc == 0 || line_size == 0)
3789		return;
3790	add_amd_l2_assoc(devi, label, assoc);
3791	if (lines_per_tag != 0)
3792		add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
3793	add_cache_prop(devi, label, line_str, line_size);
3794	add_cache_prop(devi, label, size_str, size * 1024);
3795}
3796
3797static void
3798amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
3799{
3800	struct cpuid_regs *cp;
3801
3802	if (cpi->cpi_xmaxeax < 0x80000005)
3803		return;
3804	cp = &cpi->cpi_extd[5];
3805
3806	/*
3807	 * 4M/2M L1 TLB configuration
3808	 *
3809	 * We report the size for 2M pages because AMD uses two
3810	 * TLB entries for one 4M page.
3811	 */
3812	add_amd_tlb(devi, "dtlb-2M",
3813	    BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
3814	add_amd_tlb(devi, "itlb-2M",
3815	    BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
3816
3817	/*
3818	 * 4K L1 TLB configuration
3819	 */
3820
3821	switch (cpi->cpi_vendor) {
3822		uint_t nentries;
3823	case X86_VENDOR_TM:
3824		if (cpi->cpi_family >= 5) {
3825			/*
3826			 * Crusoe processors have 256 TLB entries, but
3827			 * cpuid data format constrains them to only
3828			 * reporting 255 of them.
3829			 */
3830			if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
3831				nentries = 256;
3832			/*
3833			 * Crusoe processors also have a unified TLB
3834			 */
3835			add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
3836			    nentries);
3837			break;
3838		}
3839		/*FALLTHROUGH*/
3840	default:
3841		add_amd_tlb(devi, itlb4k_str,
3842		    BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
3843		add_amd_tlb(devi, dtlb4k_str,
3844		    BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
3845		break;
3846	}
3847
3848	/*
3849	 * data L1 cache configuration
3850	 */
3851
3852	add_amd_cache(devi, l1_dcache_str,
3853	    BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
3854	    BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
3855
3856	/*
3857	 * code L1 cache configuration
3858	 */
3859
3860	add_amd_cache(devi, l1_icache_str,
3861	    BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
3862	    BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
3863
3864	if (cpi->cpi_xmaxeax < 0x80000006)
3865		return;
3866	cp = &cpi->cpi_extd[6];
3867
3868	/* Check for a unified L2 TLB for large pages */
3869
3870	if (BITX(cp->cp_eax, 31, 16) == 0)
3871		add_amd_l2_tlb(devi, "l2-tlb-2M",
3872		    BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3873	else {
3874		add_amd_l2_tlb(devi, "l2-dtlb-2M",
3875		    BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
3876		add_amd_l2_tlb(devi, "l2-itlb-2M",
3877		    BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3878	}
3879
3880	/* Check for a unified L2 TLB for 4K pages */
3881
3882	if (BITX(cp->cp_ebx, 31, 16) == 0) {
3883		add_amd_l2_tlb(devi, "l2-tlb-4K",
3884		    BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3885	} else {
3886		add_amd_l2_tlb(devi, "l2-dtlb-4K",
3887		    BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
3888		add_amd_l2_tlb(devi, "l2-itlb-4K",
3889		    BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3890	}
3891
3892	add_amd_l2_cache(devi, l2_cache_str,
3893	    BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
3894	    BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
3895}
3896
3897/*
3898 * There are two basic ways that the x86 world describes it cache
3899 * and tlb architecture - Intel's way and AMD's way.
3900 *
3901 * Return which flavor of cache architecture we should use
3902 */
3903static int
3904x86_which_cacheinfo(struct cpuid_info *cpi)
3905{
3906	switch (cpi->cpi_vendor) {
3907	case X86_VENDOR_Intel:
3908		if (cpi->cpi_maxeax >= 2)
3909			return (X86_VENDOR_Intel);
3910		break;
3911	case X86_VENDOR_AMD:
3912		/*
3913		 * The K5 model 1 was the first part from AMD that reported
3914		 * cache sizes via extended cpuid functions.
3915		 */
3916		if (cpi->cpi_family > 5 ||
3917		    (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
3918			return (X86_VENDOR_AMD);
3919		break;
3920	case X86_VENDOR_TM:
3921		if (cpi->cpi_family >= 5)
3922			return (X86_VENDOR_AMD);
3923		/*FALLTHROUGH*/
3924	default:
3925		/*
3926		 * If they have extended CPU data for 0x80000005
3927		 * then we assume they have AMD-format cache
3928		 * information.
3929		 *
3930		 * If not, and the vendor happens to be Cyrix,
3931		 * then try our-Cyrix specific handler.
3932		 *
3933		 * If we're not Cyrix, then assume we're using Intel's
3934		 * table-driven format instead.
3935		 */
3936		if (cpi->cpi_xmaxeax >= 0x80000005)
3937			return (X86_VENDOR_AMD);
3938		else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
3939			return (X86_VENDOR_Cyrix);
3940		else if (cpi->cpi_maxeax >= 2)
3941			return (X86_VENDOR_Intel);
3942		break;
3943	}
3944	return (-1);
3945}
3946
3947void
3948cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
3949    struct cpuid_info *cpi)
3950{
3951	dev_info_t *cpu_devi;
3952	int create;
3953
3954	cpu_devi = (dev_info_t *)dip;
3955
3956	/* device_type */
3957	(void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
3958	    "device_type", "cpu");
3959
3960	/* reg */
3961	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3962	    "reg", cpu_id);
3963
3964	/* cpu-mhz, and clock-frequency */
3965	if (cpu_freq > 0) {
3966		long long mul;
3967
3968		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3969		    "cpu-mhz", cpu_freq);
3970		if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
3971			(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3972			    "clock-frequency", (int)mul);
3973	}
3974
3975	if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
3976		return;
3977	}
3978
3979	/* vendor-id */
3980	(void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
3981	    "vendor-id", cpi->cpi_vendorstr);
3982
3983	if (cpi->cpi_maxeax == 0) {
3984		return;
3985	}
3986
3987	/*
3988	 * family, model, and step
3989	 */
3990	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3991	    "family", CPI_FAMILY(cpi));
3992	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3993	    "cpu-model", CPI_MODEL(cpi));
3994	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3995	    "stepping-id", CPI_STEP(cpi));
3996
3997	/* type */
3998	switch (cpi->cpi_vendor) {
3999	case X86_VENDOR_Intel:
4000		create = 1;
4001		break;
4002	default:
4003		create = 0;
4004		break;
4005	}
4006	if (create)
4007		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4008		    "type", CPI_TYPE(cpi));
4009
4010	/* ext-family */
4011	switch (cpi->cpi_vendor) {
4012	case X86_VENDOR_Intel:
4013	case X86_VENDOR_AMD:
4014		create = cpi->cpi_family >= 0xf;
4015		break;
4016	default:
4017		create = 0;
4018		break;
4019	}
4020	if (create)
4021		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4022		    "ext-family", CPI_FAMILY_XTD(cpi));
4023
4024	/* ext-model */
4025	switch (cpi->cpi_vendor) {
4026	case X86_VENDOR_Intel:
4027		create = IS_EXTENDED_MODEL_INTEL(cpi);
4028		break;
4029	case X86_VENDOR_AMD:
4030		create = CPI_FAMILY(cpi) == 0xf;
4031		break;
4032	default:
4033		create = 0;
4034		break;
4035	}
4036	if (create)
4037		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4038		    "ext-model", CPI_MODEL_XTD(cpi));
4039
4040	/* generation */
4041	switch (cpi->cpi_vendor) {
4042	case X86_VENDOR_AMD:
4043		/*
4044		 * AMD K5 model 1 was the first part to support this
4045		 */
4046		create = cpi->cpi_xmaxeax >= 0x80000001;
4047		break;
4048	default:
4049		create = 0;
4050		break;
4051	}
4052	if (create)
4053		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4054		    "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
4055
4056	/* brand-id */
4057	switch (cpi->cpi_vendor) {
4058	case X86_VENDOR_Intel:
4059		/*
4060		 * brand id first appeared on Pentium III Xeon model 8,
4061		 * and Celeron model 8 processors and Opteron
4062		 */
4063		create = cpi->cpi_family > 6 ||
4064		    (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
4065		break;
4066	case X86_VENDOR_AMD:
4067		create = cpi->cpi_family >= 0xf;
4068		break;
4069	default:
4070		create = 0;
4071		break;
4072	}
4073	if (create && cpi->cpi_brandid != 0) {
4074		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4075		    "brand-id", cpi->cpi_brandid);
4076	}
4077
4078	/* chunks, and apic-id */
4079	switch (cpi->cpi_vendor) {
4080		/*
4081		 * first available on Pentium IV and Opteron (K8)
4082		 */
4083	case X86_VENDOR_Intel:
4084		create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4085		break;
4086	case X86_VENDOR_AMD:
4087		create = cpi->cpi_family >= 0xf;
4088		break;
4089	default:
4090		create = 0;
4091		break;
4092	}
4093	if (create) {
4094		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4095		    "chunks", CPI_CHUNKS(cpi));
4096		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4097		    "apic-id", cpi->cpi_apicid);
4098		if (cpi->cpi_chipid >= 0) {
4099			(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4100			    "chip#", cpi->cpi_chipid);
4101			(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4102			    "clog#", cpi->cpi_clogid);
4103		}
4104	}
4105
4106	/* cpuid-features */
4107	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4108	    "cpuid-features", CPI_FEATURES_EDX(cpi));
4109
4110
4111	/* cpuid-features-ecx */
4112	switch (cpi->cpi_vendor) {
4113	case X86_VENDOR_Intel:
4114		create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4115		break;
4116	default:
4117		create = 0;
4118		break;
4119	}
4120	if (create)
4121		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4122		    "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
4123
4124	/* ext-cpuid-features */
4125	switch (cpi->cpi_vendor) {
4126	case X86_VENDOR_Intel:
4127	case X86_VENDOR_AMD:
4128	case X86_VENDOR_Cyrix:
4129	case X86_VENDOR_TM:
4130	case X86_VENDOR_Centaur:
4131		create = cpi->cpi_xmaxeax >= 0x80000001;
4132		break;
4133	default:
4134		create = 0;
4135		break;
4136	}
4137	if (create) {
4138		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4139		    "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
4140		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4141		    "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
4142	}
4143
4144	/*
4145	 * Brand String first appeared in Intel Pentium IV, AMD K5
4146	 * model 1, and Cyrix GXm.  On earlier models we try and
4147	 * simulate something similar .. so this string should always
4148	 * same -something- about the processor, however lame.
4149	 */
4150	(void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4151	    "brand-string", cpi->cpi_brandstr);
4152
4153	/*
4154	 * Finally, cache and tlb information
4155	 */
4156	switch (x86_which_cacheinfo(cpi)) {
4157	case X86_VENDOR_Intel:
4158		intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4159		break;
4160	case X86_VENDOR_Cyrix:
4161		cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4162		break;
4163	case X86_VENDOR_AMD:
4164		amd_cache_info(cpi, cpu_devi);
4165		break;
4166	default:
4167		break;
4168	}
4169}
4170
4171struct l2info {
4172	int *l2i_csz;
4173	int *l2i_lsz;
4174	int *l2i_assoc;
4175	int l2i_ret;
4176};
4177
4178/*
4179 * A cacheinfo walker that fetches the size, line-size and associativity
4180 * of the L2 cache
4181 */
4182static int
4183intel_l2cinfo(void *arg, const struct cachetab *ct)
4184{
4185	struct l2info *l2i = arg;
4186	int *ip;
4187
4188	if (ct->ct_label != l2_cache_str &&
4189	    ct->ct_label != sl2_cache_str)
4190		return (0);	/* not an L2 -- keep walking */
4191
4192	if ((ip = l2i->l2i_csz) != NULL)
4193		*ip = ct->ct_size;
4194	if ((ip = l2i->l2i_lsz) != NULL)
4195		*ip = ct->ct_line_size;
4196	if ((ip = l2i->l2i_assoc) != NULL)
4197		*ip = ct->ct_assoc;
4198	l2i->l2i_ret = ct->ct_size;
4199	return (1);		/* was an L2 -- terminate walk */
4200}
4201
4202/*
4203 * AMD L2/L3 Cache and TLB Associativity Field Definition:
4204 *
4205 *	Unlike the associativity for the L1 cache and tlb where the 8 bit
4206 *	value is the associativity, the associativity for the L2 cache and
4207 *	tlb is encoded in the following table. The 4 bit L2 value serves as
4208 *	an index into the amd_afd[] array to determine the associativity.
4209 *	-1 is undefined. 0 is fully associative.
4210 */
4211
4212static int amd_afd[] =
4213	{-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
4214
4215static void
4216amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
4217{
4218	struct cpuid_regs *cp;
4219	uint_t size, assoc;
4220	int i;
4221	int *ip;
4222
4223	if (cpi->cpi_xmaxeax < 0x80000006)
4224		return;
4225	cp = &cpi->cpi_extd[6];
4226
4227	if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
4228	    (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
4229		uint_t cachesz = size * 1024;
4230		assoc = amd_afd[i];
4231
4232		ASSERT(assoc != -1);
4233
4234		if ((ip = l2i->l2i_csz) != NULL)
4235			*ip = cachesz;
4236		if ((ip = l2i->l2i_lsz) != NULL)
4237			*ip = BITX(cp->cp_ecx, 7, 0);
4238		if ((ip = l2i->l2i_assoc) != NULL)
4239			*ip = assoc;
4240		l2i->l2i_ret = cachesz;
4241	}
4242}
4243
4244int
4245getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
4246{
4247	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4248	struct l2info __l2info, *l2i = &__l2info;
4249
4250	l2i->l2i_csz = csz;
4251	l2i->l2i_lsz = lsz;
4252	l2i->l2i_assoc = assoc;
4253	l2i->l2i_ret = -1;
4254
4255	switch (x86_which_cacheinfo(cpi)) {
4256	case X86_VENDOR_Intel:
4257		intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4258		break;
4259	case X86_VENDOR_Cyrix:
4260		cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4261		break;
4262	case X86_VENDOR_AMD:
4263		amd_l2cacheinfo(cpi, l2i);
4264		break;
4265	default:
4266		break;
4267	}
4268	return (l2i->l2i_ret);
4269}
4270
4271#if !defined(__xpv)
4272
4273uint32_t *
4274cpuid_mwait_alloc(cpu_t *cpu)
4275{
4276	uint32_t	*ret;
4277	size_t		mwait_size;
4278
4279	ASSERT(cpuid_checkpass(CPU, 2));
4280
4281	mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
4282	if (mwait_size == 0)
4283		return (NULL);
4284
4285	/*
4286	 * kmem_alloc() returns cache line size aligned data for mwait_size
4287	 * allocations.  mwait_size is currently cache line sized.  Neither
4288	 * of these implementation details are guarantied to be true in the
4289	 * future.
4290	 *
4291	 * First try allocating mwait_size as kmem_alloc() currently returns
4292	 * correctly aligned memory.  If kmem_alloc() does not return
4293	 * mwait_size aligned memory, then use mwait_size ROUNDUP.
4294	 *
4295	 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
4296	 * decide to free this memory.
4297	 */
4298	ret = kmem_zalloc(mwait_size, KM_SLEEP);
4299	if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
4300		cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4301		cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
4302		*ret = MWAIT_RUNNING;
4303		return (ret);
4304	} else {
4305		kmem_free(ret, mwait_size);
4306		ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
4307		cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4308		cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
4309		ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
4310		*ret = MWAIT_RUNNING;
4311		return (ret);
4312	}
4313}
4314
4315void
4316cpuid_mwait_free(cpu_t *cpu)
4317{
4318	if (cpu->cpu_m.mcpu_cpi == NULL) {
4319		return;
4320	}
4321
4322	if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
4323	    cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
4324		kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
4325		    cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
4326	}
4327
4328	cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
4329	cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
4330}
4331
4332void
4333patch_tsc_read(int flag)
4334{
4335	size_t cnt;
4336
4337	switch (flag) {
4338	case X86_NO_TSC:
4339		cnt = &_no_rdtsc_end - &_no_rdtsc_start;
4340		(void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
4341		break;
4342	case X86_HAVE_TSCP:
4343		cnt = &_tscp_end - &_tscp_start;
4344		(void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
4345		break;
4346	case X86_TSC_MFENCE:
4347		cnt = &_tsc_mfence_end - &_tsc_mfence_start;
4348		(void) memcpy((void *)tsc_read,
4349		    (void *)&_tsc_mfence_start, cnt);
4350		break;
4351	case X86_TSC_LFENCE:
4352		cnt = &_tsc_lfence_end - &_tsc_lfence_start;
4353		(void) memcpy((void *)tsc_read,
4354		    (void *)&_tsc_lfence_start, cnt);
4355		break;
4356	default:
4357		break;
4358	}
4359}
4360
4361int
4362cpuid_deep_cstates_supported(void)
4363{
4364	struct cpuid_info *cpi;
4365	struct cpuid_regs regs;
4366
4367	ASSERT(cpuid_checkpass(CPU, 1));
4368
4369	cpi = CPU->cpu_m.mcpu_cpi;
4370
4371	if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
4372		return (0);
4373
4374	switch (cpi->cpi_vendor) {
4375	case X86_VENDOR_Intel:
4376		if (cpi->cpi_xmaxeax < 0x80000007)
4377			return (0);
4378
4379		/*
4380		 * TSC run at a constant rate in all ACPI C-states?
4381		 */
4382		regs.cp_eax = 0x80000007;
4383		(void) __cpuid_insn(&regs);
4384		return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
4385
4386	default:
4387		return (0);
4388	}
4389}
4390
4391#endif	/* !__xpv */
4392
4393void
4394post_startup_cpu_fixups(void)
4395{
4396#ifndef __xpv
4397	/*
4398	 * Some AMD processors support C1E state. Entering this state will
4399	 * cause the local APIC timer to stop, which we can't deal with at
4400	 * this time.
4401	 */
4402	if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
4403		on_trap_data_t otd;
4404		uint64_t reg;
4405
4406		if (!on_trap(&otd, OT_DATA_ACCESS)) {
4407			reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
4408			/* Disable C1E state if it is enabled by BIOS */
4409			if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
4410			    AMD_ACTONCMPHALT_MASK) {
4411				reg &= ~(AMD_ACTONCMPHALT_MASK <<
4412				    AMD_ACTONCMPHALT_SHIFT);
4413				wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
4414			}
4415		}
4416		no_trap();
4417	}
4418#endif	/* !__xpv */
4419}
4420
4421/*
4422 * Setup necessary registers to enable XSAVE feature on this processor.
4423 * This function needs to be called early enough, so that no xsave/xrstor
4424 * ops will execute on the processor before the MSRs are properly set up.
4425 *
4426 * Current implementation has the following assumption:
4427 * - cpuid_pass1() is done, so that X86 features are known.
4428 * - fpu_probe() is done, so that fp_save_mech is chosen.
4429 */
4430void
4431xsave_setup_msr(cpu_t *cpu)
4432{
4433	ASSERT(fp_save_mech == FP_XSAVE);
4434	ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
4435
4436	/* Enable OSXSAVE in CR4. */
4437	setcr4(getcr4() | CR4_OSXSAVE);
4438	/*
4439	 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
4440	 * correct value.
4441	 */
4442	cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
4443	setup_xfem();
4444}
4445
4446/*
4447 * Starting with the Westmere processor the local
4448 * APIC timer will continue running in all C-states,
4449 * including the deepest C-states.
4450 */
4451int
4452cpuid_arat_supported(void)
4453{
4454	struct cpuid_info *cpi;
4455	struct cpuid_regs regs;
4456
4457	ASSERT(cpuid_checkpass(CPU, 1));
4458	ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4459
4460	cpi = CPU->cpu_m.mcpu_cpi;
4461
4462	switch (cpi->cpi_vendor) {
4463	case X86_VENDOR_Intel:
4464		/*
4465		 * Always-running Local APIC Timer is
4466		 * indicated by CPUID.6.EAX[2].
4467		 */
4468		if (cpi->cpi_maxeax >= 6) {
4469			regs.cp_eax = 6;
4470			(void) cpuid_insn(NULL, &regs);
4471			return (regs.cp_eax & CPUID_CSTATE_ARAT);
4472		} else {
4473			return (0);
4474		}
4475	default:
4476		return (0);
4477	}
4478}
4479
4480/*
4481 * Check support for Intel ENERGY_PERF_BIAS feature
4482 */
4483int
4484cpuid_iepb_supported(struct cpu *cp)
4485{
4486	struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
4487	struct cpuid_regs regs;
4488
4489	ASSERT(cpuid_checkpass(cp, 1));
4490
4491	if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
4492	    !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
4493		return (0);
4494	}
4495
4496	/*
4497	 * Intel ENERGY_PERF_BIAS MSR is indicated by
4498	 * capability bit CPUID.6.ECX.3
4499	 */
4500	if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
4501		return (0);
4502
4503	regs.cp_eax = 0x6;
4504	(void) cpuid_insn(NULL, &regs);
4505	return (regs.cp_ecx & CPUID_EPB_SUPPORT);
4506}
4507
4508/*
4509 * Check support for TSC deadline timer
4510 *
4511 * TSC deadline timer provides a superior software programming
4512 * model over local APIC timer that eliminates "time drifts".
4513 * Instead of specifying a relative time, software specifies an
4514 * absolute time as the target at which the processor should
4515 * generate a timer event.
4516 */
4517int
4518cpuid_deadline_tsc_supported(void)
4519{
4520	struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
4521	struct cpuid_regs regs;
4522
4523	ASSERT(cpuid_checkpass(CPU, 1));
4524	ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4525
4526	switch (cpi->cpi_vendor) {
4527	case X86_VENDOR_Intel:
4528		if (cpi->cpi_maxeax >= 1) {
4529			regs.cp_eax = 1;
4530			(void) cpuid_insn(NULL, &regs);
4531			return (regs.cp_ecx & CPUID_DEADLINE_TSC);
4532		} else {
4533			return (0);
4534		}
4535	default:
4536		return (0);
4537	}
4538}
4539
4540#if defined(__amd64) && !defined(__xpv)
4541/*
4542 * Patch in versions of bcopy for high performance Intel Nhm processors
4543 * and later...
4544 */
4545void
4546patch_memops(uint_t vendor)
4547{
4548	size_t cnt, i;
4549	caddr_t to, from;
4550
4551	if ((vendor == X86_VENDOR_Intel) &&
4552	    is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
4553		cnt = &bcopy_patch_end - &bcopy_patch_start;
4554		to = &bcopy_ck_size;
4555		from = &bcopy_patch_start;
4556		for (i = 0; i < cnt; i++) {
4557			*to++ = *from++;
4558		}
4559	}
4560}
4561#endif  /* __amd64 && !__xpv */
4562
4563/*
4564 * This function finds the number of bits to represent the number of cores per
4565 * chip and the number of strands per core for the Intel platforms.
4566 * It re-uses the x2APIC cpuid code of the cpuid_pass2().
4567 */
4568void
4569cpuid_get_ext_topo(uint_t vendor, uint_t *core_nbits, uint_t *strand_nbits)
4570{
4571	struct cpuid_regs regs;
4572	struct cpuid_regs *cp = &regs;
4573
4574	if (vendor != X86_VENDOR_Intel) {
4575		return;
4576	}
4577
4578	/* if the cpuid level is 0xB, extended topo is available. */
4579	cp->cp_eax = 0;
4580	if (__cpuid_insn(cp) >= 0xB) {
4581
4582		cp->cp_eax = 0xB;
4583		cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
4584		(void) __cpuid_insn(cp);
4585
4586		/*
4587		 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
4588		 * indicates that the extended topology enumeration leaf is
4589		 * available.
4590		 */
4591		if (cp->cp_ebx) {
4592			uint_t coreid_shift = 0;
4593			uint_t chipid_shift = 0;
4594			uint_t i;
4595			uint_t level;
4596
4597			for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
4598				cp->cp_eax = 0xB;
4599				cp->cp_ecx = i;
4600
4601				(void) __cpuid_insn(cp);
4602				level = CPI_CPU_LEVEL_TYPE(cp);
4603
4604				if (level == 1) {
4605					/*
4606					 * Thread level processor topology
4607					 * Number of bits shift right APIC ID
4608					 * to get the coreid.
4609					 */
4610					coreid_shift = BITX(cp->cp_eax, 4, 0);
4611				} else if (level == 2) {
4612					/*
4613					 * Core level processor topology
4614					 * Number of bits shift right APIC ID
4615					 * to get the chipid.
4616					 */
4617					chipid_shift = BITX(cp->cp_eax, 4, 0);
4618				}
4619			}
4620
4621			if (coreid_shift > 0 && chipid_shift > coreid_shift) {
4622				*strand_nbits = coreid_shift;
4623				*core_nbits = chipid_shift - coreid_shift;
4624			}
4625		}
4626	}
4627}
4628