mp_startup.c revision 3434:5142e1d7d0bc
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/types.h>
29#include <sys/thread.h>
30#include <sys/cpuvar.h>
31#include <sys/t_lock.h>
32#include <sys/param.h>
33#include <sys/proc.h>
34#include <sys/disp.h>
35#include <sys/mmu.h>
36#include <sys/class.h>
37#include <sys/cmn_err.h>
38#include <sys/debug.h>
39#include <sys/asm_linkage.h>
40#include <sys/x_call.h>
41#include <sys/systm.h>
42#include <sys/var.h>
43#include <sys/vtrace.h>
44#include <vm/hat.h>
45#include <sys/mmu.h>
46#include <vm/as.h>
47#include <vm/seg_kmem.h>
48#include <sys/segments.h>
49#include <sys/kmem.h>
50#include <sys/stack.h>
51#include <sys/smp_impldefs.h>
52#include <sys/x86_archext.h>
53#include <sys/machsystm.h>
54#include <sys/traptrace.h>
55#include <sys/clock.h>
56#include <sys/cpc_impl.h>
57#include <sys/pg.h>
58#include <sys/cmt.h>
59#include <sys/dtrace.h>
60#include <sys/archsystm.h>
61#include <sys/fp.h>
62#include <sys/reboot.h>
63#include <sys/kdi.h>
64#include <vm/hat_i86.h>
65#include <sys/memnode.h>
66#include <sys/pci_cfgspace.h>
67#include <sys/cpu_module.h>
68
69struct cpu	cpus[1];			/* CPU data */
70struct cpu	*cpu[NCPU] = {&cpus[0]};	/* pointers to all CPUs */
71cpu_core_t	cpu_core[NCPU];			/* cpu_core structures */
72
73/*
74 * Useful for disabling MP bring-up for an MP capable kernel
75 * (a kernel that was built with MP defined)
76 */
77int use_mp = 1;
78
79/*
80 * To be set by a PSM to indicate what CPUs are available on the system.
81 */
82cpuset_t mp_cpus = 1;
83
84/*
85 * This variable is used by the hat layer to decide whether or not
86 * critical sections are needed to prevent race conditions.  For sun4m,
87 * this variable is set once enough MP initialization has been done in
88 * order to allow cross calls.
89 */
90int flushes_require_xcalls = 0;
91cpuset_t	cpu_ready_set = 1;
92
93extern	void	real_mode_start(void);
94extern	void	real_mode_end(void);
95static 	void	mp_startup(void);
96
97static void cpu_sep_enable(void);
98static void cpu_sep_disable(void);
99static void cpu_asysc_enable(void);
100static void cpu_asysc_disable(void);
101
102extern int tsc_gethrtime_enable;
103
104/*
105 * Init CPU info - get CPU type info for processor_info system call.
106 */
107void
108init_cpu_info(struct cpu *cp)
109{
110	processor_info_t *pi = &cp->cpu_type_info;
111	char buf[CPU_IDSTRLEN];
112
113	/*
114	 * Get clock-frequency property for the CPU.
115	 */
116	pi->pi_clock = cpu_freq;
117
118	(void) strcpy(pi->pi_processor_type, "i386");
119	if (fpu_exists)
120		(void) strcpy(pi->pi_fputypes, "i387 compatible");
121
122	(void) cpuid_getidstr(cp, buf, sizeof (buf));
123
124	cp->cpu_idstr = kmem_alloc(strlen(buf) + 1, KM_SLEEP);
125	(void) strcpy(cp->cpu_idstr, buf);
126
127	cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_idstr);
128
129	(void) cpuid_getbrandstr(cp, buf, sizeof (buf));
130	cp->cpu_brandstr = kmem_alloc(strlen(buf) + 1, KM_SLEEP);
131	(void) strcpy(cp->cpu_brandstr, buf);
132
133	cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_brandstr);
134}
135
136/*
137 * Configure syscall support on this CPU.
138 */
139/*ARGSUSED*/
140static void
141init_cpu_syscall(struct cpu *cp)
142{
143	kpreempt_disable();
144
145#if defined(__amd64)
146	if (x86_feature & X86_ASYSC) {
147
148#if !defined(__lint)
149		/*
150		 * The syscall instruction imposes a certain ordering on
151		 * segment selectors, so we double-check that ordering
152		 * here.
153		 */
154		ASSERT(KDS_SEL == KCS_SEL + 8);
155		ASSERT(UDS_SEL == U32CS_SEL + 8);
156		ASSERT(UCS_SEL == U32CS_SEL + 16);
157#endif
158		/*
159		 * Turn syscall/sysret extensions on.
160		 */
161		cpu_asysc_enable();
162
163		/*
164		 * Program the magic registers ..
165		 */
166		wrmsr(MSR_AMD_STAR, ((uint64_t)(U32CS_SEL << 16 | KCS_SEL)) <<
167		    32);
168		wrmsr(MSR_AMD_LSTAR, (uint64_t)(uintptr_t)sys_syscall);
169		wrmsr(MSR_AMD_CSTAR, (uint64_t)(uintptr_t)sys_syscall32);
170
171		/*
172		 * This list of flags is masked off the incoming
173		 * %rfl when we enter the kernel.
174		 */
175		wrmsr(MSR_AMD_SFMASK, (uint64_t)(uintptr_t)(PS_IE | PS_T));
176	}
177#endif
178
179	/*
180	 * On 32-bit kernels, we use sysenter/sysexit because it's too
181	 * hard to use syscall/sysret, and it is more portable anyway.
182	 *
183	 * On 64-bit kernels on Nocona machines, the 32-bit syscall
184	 * variant isn't available to 32-bit applications, but sysenter is.
185	 */
186	if (x86_feature & X86_SEP) {
187
188#if !defined(__lint)
189		/*
190		 * The sysenter instruction imposes a certain ordering on
191		 * segment selectors, so we double-check that ordering
192		 * here. See "sysenter" in Intel document 245471-012, "IA-32
193		 * Intel Architecture Software Developer's Manual Volume 2:
194		 * Instruction Set Reference"
195		 */
196		ASSERT(KDS_SEL == KCS_SEL + 8);
197
198		ASSERT32(UCS_SEL == ((KCS_SEL + 16) | 3));
199		ASSERT32(UDS_SEL == UCS_SEL + 8);
200
201		ASSERT64(U32CS_SEL == ((KCS_SEL + 16) | 3));
202		ASSERT64(UDS_SEL == U32CS_SEL + 8);
203#endif
204
205		cpu_sep_enable();
206
207		/*
208		 * resume() sets this value to the base of the threads stack
209		 * via a context handler.
210		 */
211		wrmsr(MSR_INTC_SEP_ESP, 0ULL);
212		wrmsr(MSR_INTC_SEP_EIP, (uint64_t)(uintptr_t)sys_sysenter);
213	}
214
215	kpreempt_enable();
216}
217
218/*
219 * Multiprocessor initialization.
220 *
221 * Allocate and initialize the cpu structure, TRAPTRACE buffer, and the
222 * startup and idle threads for the specified CPU.
223 */
224static void
225mp_startup_init(int cpun)
226{
227#if defined(__amd64)
228extern void *long_mode_64(void);
229#endif	/* __amd64 */
230
231	struct cpu *cp;
232	struct tss *ntss;
233	kthread_id_t tp;
234	caddr_t	sp;
235	int size;
236	proc_t *procp;
237	extern void idle();
238
239	struct cpu_tables *tablesp;
240	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
241
242#ifdef TRAPTRACE
243	trap_trace_ctl_t *ttc = &trap_trace_ctl[cpun];
244#endif
245
246	ASSERT(cpun < NCPU && cpu[cpun] == NULL);
247
248	if ((cp = kmem_zalloc(sizeof (*cp), KM_NOSLEEP)) == NULL) {
249		panic("mp_startup_init: cpu%d: "
250		    "no memory for cpu structure", cpun);
251		/*NOTREACHED*/
252	}
253	procp = curthread->t_procp;
254
255	mutex_enter(&cpu_lock);
256	/*
257	 * Initialize the dispatcher first.
258	 */
259	disp_cpu_init(cp);
260	mutex_exit(&cpu_lock);
261
262	cpu_vm_data_init(cp);
263
264	/*
265	 * Allocate and initialize the startup thread for this CPU.
266	 * Interrupt and process switch stacks get allocated later
267	 * when the CPU starts running.
268	 */
269	tp = thread_create(NULL, 0, NULL, NULL, 0, procp,
270	    TS_STOPPED, maxclsyspri);
271
272	/*
273	 * Set state to TS_ONPROC since this thread will start running
274	 * as soon as the CPU comes online.
275	 *
276	 * All the other fields of the thread structure are setup by
277	 * thread_create().
278	 */
279	THREAD_ONPROC(tp, cp);
280	tp->t_preempt = 1;
281	tp->t_bound_cpu = cp;
282	tp->t_affinitycnt = 1;
283	tp->t_cpu = cp;
284	tp->t_disp_queue = cp->cpu_disp;
285
286	/*
287	 * Setup thread to start in mp_startup.
288	 */
289	sp = tp->t_stk;
290	tp->t_pc = (uintptr_t)mp_startup;
291	tp->t_sp = (uintptr_t)(sp - MINFRAME);
292
293	cp->cpu_id = cpun;
294	cp->cpu_self = cp;
295	cp->cpu_thread = tp;
296	cp->cpu_lwp = NULL;
297	cp->cpu_dispthread = tp;
298	cp->cpu_dispatch_pri = DISP_PRIO(tp);
299
300	/*
301	 * cpu_base_spl must be set explicitly here to prevent any blocking
302	 * operations in mp_startup from causing the spl of the cpu to drop
303	 * to 0 (allowing device interrupts before we're ready) in resume().
304	 * cpu_base_spl MUST remain at LOCK_LEVEL until the cpu is CPU_READY.
305	 * As an extra bit of security on DEBUG kernels, this is enforced with
306	 * an assertion in mp_startup() -- before cpu_base_spl is set to its
307	 * proper value.
308	 */
309	cp->cpu_base_spl = ipltospl(LOCK_LEVEL);
310
311	/*
312	 * Now, initialize per-CPU idle thread for this CPU.
313	 */
314	tp = thread_create(NULL, PAGESIZE, idle, NULL, 0, procp, TS_ONPROC, -1);
315
316	cp->cpu_idle_thread = tp;
317
318	tp->t_preempt = 1;
319	tp->t_bound_cpu = cp;
320	tp->t_affinitycnt = 1;
321	tp->t_cpu = cp;
322	tp->t_disp_queue = cp->cpu_disp;
323
324	/*
325	 * Bootstrap the CPU's PG data
326	 */
327	pg_cpu_bootstrap(cp);
328
329	/*
330	 * Perform CPC intialization on the new CPU.
331	 */
332	kcpc_hw_init(cp);
333
334	/*
335	 * Allocate virtual addresses for cpu_caddr1 and cpu_caddr2
336	 * for each CPU.
337	 */
338
339	setup_vaddr_for_ppcopy(cp);
340
341	/*
342	 * Allocate space for page directory, stack, tss, gdt and idt.
343	 * This assumes that kmem_alloc will return memory which is aligned
344	 * to the next higher power of 2 or a page(if size > MAXABIG)
345	 * If this assumption goes wrong at any time due to change in
346	 * kmem alloc, things may not work as the page directory has to be
347	 * page aligned
348	 */
349	if ((tablesp = kmem_zalloc(sizeof (*tablesp), KM_NOSLEEP)) == NULL)
350		panic("mp_startup_init: cpu%d cannot allocate tables", cpun);
351
352	if ((uintptr_t)tablesp & ~MMU_STD_PAGEMASK) {
353		kmem_free(tablesp, sizeof (struct cpu_tables));
354		size = sizeof (struct cpu_tables) + MMU_STD_PAGESIZE;
355		tablesp = kmem_zalloc(size, KM_NOSLEEP);
356		tablesp = (struct cpu_tables *)
357		    (((uintptr_t)tablesp + MMU_STD_PAGESIZE) &
358		    MMU_STD_PAGEMASK);
359	}
360
361	ntss = cp->cpu_tss = &tablesp->ct_tss;
362
363	if ((tablesp->ct_gdt = kmem_zalloc(PAGESIZE, KM_NOSLEEP)) == NULL)
364		panic("mp_startup_init: cpu%d cannot allocate GDT", cpun);
365	cp->cpu_gdt = tablesp->ct_gdt;
366	bcopy(CPU->cpu_gdt, cp->cpu_gdt, NGDT * (sizeof (user_desc_t)));
367
368#if defined(__amd64)
369
370	/*
371	 * #DF (double fault).
372	 */
373	ntss->tss_ist1 =
374	    (uint64_t)&tablesp->ct_stack[sizeof (tablesp->ct_stack)];
375
376#elif defined(__i386)
377
378	ntss->tss_esp0 = ntss->tss_esp1 = ntss->tss_esp2 = ntss->tss_esp =
379	    (uint32_t)&tablesp->ct_stack[sizeof (tablesp->ct_stack)];
380
381	ntss->tss_ss0 = ntss->tss_ss1 = ntss->tss_ss2 = ntss->tss_ss = KDS_SEL;
382
383	ntss->tss_eip = (uint32_t)mp_startup;
384
385	ntss->tss_cs = KCS_SEL;
386	ntss->tss_fs = KFS_SEL;
387	ntss->tss_gs = KGS_SEL;
388
389	/*
390	 * setup kernel %gs.
391	 */
392	set_usegd(&cp->cpu_gdt[GDT_GS], cp, sizeof (struct cpu) -1, SDT_MEMRWA,
393	    SEL_KPL, 0, 1);
394
395#endif	/* __i386 */
396
397	/*
398	 * Set I/O bit map offset equal to size of TSS segment limit
399	 * for no I/O permission map. This will cause all user I/O
400	 * instructions to generate #gp fault.
401	 */
402	ntss->tss_bitmapbase = sizeof (*ntss);
403
404	/*
405	 * setup kernel tss.
406	 */
407	set_syssegd((system_desc_t *)&cp->cpu_gdt[GDT_KTSS], cp->cpu_tss,
408	    sizeof (*cp->cpu_tss) -1, SDT_SYSTSS, SEL_KPL);
409
410	/*
411	 * If we have more than one node, each cpu gets a copy of IDT
412	 * local to its node. If this is a Pentium box, we use cpu 0's
413	 * IDT. cpu 0's IDT has been made read-only to workaround the
414	 * cmpxchgl register bug
415	 */
416	cp->cpu_idt = CPU->cpu_idt;
417	if (system_hardware.hd_nodes && x86_type != X86_TYPE_P5) {
418		cp->cpu_idt = kmem_alloc(sizeof (idt0), KM_SLEEP);
419		bcopy(idt0, cp->cpu_idt, sizeof (idt0));
420	}
421
422	/*
423	 * Get interrupt priority data from cpu 0
424	 */
425	cp->cpu_pri_data = CPU->cpu_pri_data;
426
427	hat_cpu_online(cp);
428
429	/* Should remove all entries for the current process/thread here */
430
431	/*
432	 * Fill up the real mode platter to make it easy for real mode code to
433	 * kick it off. This area should really be one passed by boot to kernel
434	 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
435	 * have identical physical and virtual address in paged mode.
436	 */
437	real_mode_platter->rm_idt_base = cp->cpu_idt;
438	real_mode_platter->rm_idt_lim = sizeof (idt0) - 1;
439	real_mode_platter->rm_gdt_base = cp->cpu_gdt;
440	real_mode_platter->rm_gdt_lim = sizeof (gdt0) -1;
441	real_mode_platter->rm_pdbr = getcr3();
442	real_mode_platter->rm_cpu = cpun;
443	real_mode_platter->rm_x86feature = x86_feature;
444	real_mode_platter->rm_cr4 = cr4_value;
445
446#if defined(__amd64)
447	if (getcr3() > 0xffffffffUL)
448		panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
449			"located above 4G in physical memory (@ 0x%llx).",
450			(unsigned long long)getcr3());
451
452	/*
453	 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
454	 * by code in real_mode_start():
455	 *
456	 * GDT[0]:  NULL selector
457	 * GDT[1]:  64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
458	 *
459	 * Clear the IDT as interrupts will be off and a limit of 0 will cause
460	 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
461	 * a course of action as any other, though it may cause the entire
462	 * platform to reset in some cases...
463	 */
464	real_mode_platter->rm_temp_gdt[0] = 0ULL;
465	real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
466
467	real_mode_platter->rm_temp_gdt_lim = (ushort_t)
468	    (sizeof (real_mode_platter->rm_temp_gdt) - 1);
469	real_mode_platter->rm_temp_gdt_base = rm_platter_pa +
470	    (uint32_t)(&((rm_platter_t *)0)->rm_temp_gdt);
471
472	real_mode_platter->rm_temp_idt_lim = 0;
473	real_mode_platter->rm_temp_idt_base = 0;
474
475	/*
476	 * Since the CPU needs to jump to protected mode using an identity
477	 * mapped address, we need to calculate it here.
478	 */
479	real_mode_platter->rm_longmode64_addr = rm_platter_pa +
480	    ((uint32_t)long_mode_64 - (uint32_t)real_mode_start);
481#endif	/* __amd64 */
482
483#ifdef TRAPTRACE
484	/*
485	 * If this is a TRAPTRACE kernel, allocate TRAPTRACE buffers for this
486	 * CPU.
487	 */
488	ttc->ttc_first = (uintptr_t)kmem_zalloc(trap_trace_bufsize, KM_SLEEP);
489	ttc->ttc_next = ttc->ttc_first;
490	ttc->ttc_limit = ttc->ttc_first + trap_trace_bufsize;
491#endif
492
493	/*
494	 * Record that we have another CPU.
495	 */
496	mutex_enter(&cpu_lock);
497	/*
498	 * Initialize the interrupt threads for this CPU
499	 */
500	cpu_intr_alloc(cp, NINTR_THREADS);
501	/*
502	 * Add CPU to list of available CPUs.  It'll be on the active list
503	 * after mp_startup().
504	 */
505	cpu_add_unit(cp);
506	mutex_exit(&cpu_lock);
507}
508
509/*
510 * Apply workarounds for known errata, and warn about those that are absent.
511 *
512 * System vendors occasionally create configurations which contain different
513 * revisions of the CPUs that are almost but not exactly the same.  At the
514 * time of writing, this meant that their clock rates were the same, their
515 * feature sets were the same, but the required workaround were -not-
516 * necessarily the same.  So, this routine is invoked on -every- CPU soon
517 * after starting to make sure that the resulting system contains the most
518 * pessimal set of workarounds needed to cope with *any* of the CPUs in the
519 * system.
520 *
521 * workaround_errata is invoked early in mlsetup() for CPU 0, and in
522 * mp_startup() for all slave CPUs. Slaves process workaround_errata prior
523 * to acknowledging their readiness to the master, so this routine will
524 * never be executed by multiple CPUs in parallel, thus making updates to
525 * global data safe.
526 *
527 * These workarounds are based on Rev 3.57 of the Revision Guide for
528 * AMD Athlon(tm) 64 and AMD Opteron(tm) Processors, August 2005.
529 */
530
531#if defined(OPTERON_ERRATUM_91)
532int opteron_erratum_91;		/* if non-zero -> at least one cpu has it */
533#endif
534
535#if defined(OPTERON_ERRATUM_93)
536int opteron_erratum_93;		/* if non-zero -> at least one cpu has it */
537#endif
538
539#if defined(OPTERON_ERRATUM_100)
540int opteron_erratum_100;	/* if non-zero -> at least one cpu has it */
541#endif
542
543#if defined(OPTERON_ERRATUM_109)
544int opteron_erratum_109;	/* if non-zero -> at least one cpu has it */
545#endif
546
547#if defined(OPTERON_ERRATUM_121)
548int opteron_erratum_121;	/* if non-zero -> at least one cpu has it */
549#endif
550
551#if defined(OPTERON_ERRATUM_122)
552int opteron_erratum_122;	/* if non-zero -> at least one cpu has it */
553#endif
554
555#if defined(OPTERON_ERRATUM_123)
556int opteron_erratum_123;	/* if non-zero -> at least one cpu has it */
557#endif
558
559#if defined(OPTERON_ERRATUM_131)
560int opteron_erratum_131;	/* if non-zero -> at least one cpu has it */
561#endif
562
563#if defined(OPTERON_WORKAROUND_6336786)
564int opteron_workaround_6336786;	/* non-zero -> WA relevant and applied */
565int opteron_workaround_6336786_UP = 0;	/* Not needed for UP */
566#endif
567
568#if defined(OPTERON_WORKAROUND_6323525)
569int opteron_workaround_6323525;	/* if non-zero -> at least one cpu has it */
570#endif
571
572#define	WARNING(cpu, n)						\
573	cmn_err(CE_WARN, "cpu%d: no workaround for erratum %d",	\
574	    (cpu)->cpu_id, (n))
575
576uint_t
577workaround_errata(struct cpu *cpu)
578{
579	uint_t missing = 0;
580
581	ASSERT(cpu == CPU);
582
583	/*LINTED*/
584	if (cpuid_opteron_erratum(cpu, 88) > 0) {
585		/*
586		 * SWAPGS May Fail To Read Correct GS Base
587		 */
588#if defined(OPTERON_ERRATUM_88)
589		/*
590		 * The workaround is an mfence in the relevant assembler code
591		 */
592#else
593		WARNING(cpu, 88);
594		missing++;
595#endif
596	}
597
598	if (cpuid_opteron_erratum(cpu, 91) > 0) {
599		/*
600		 * Software Prefetches May Report A Page Fault
601		 */
602#if defined(OPTERON_ERRATUM_91)
603		/*
604		 * fix is in trap.c
605		 */
606		opteron_erratum_91++;
607#else
608		WARNING(cpu, 91);
609		missing++;
610#endif
611	}
612
613	if (cpuid_opteron_erratum(cpu, 93) > 0) {
614		/*
615		 * RSM Auto-Halt Restart Returns to Incorrect RIP
616		 */
617#if defined(OPTERON_ERRATUM_93)
618		/*
619		 * fix is in trap.c
620		 */
621		opteron_erratum_93++;
622#else
623		WARNING(cpu, 93);
624		missing++;
625#endif
626	}
627
628	/*LINTED*/
629	if (cpuid_opteron_erratum(cpu, 95) > 0) {
630		/*
631		 * RET Instruction May Return to Incorrect EIP
632		 */
633#if defined(OPTERON_ERRATUM_95)
634#if defined(_LP64)
635		/*
636		 * Workaround this by ensuring that 32-bit user code and
637		 * 64-bit kernel code never occupy the same address
638		 * range mod 4G.
639		 */
640		if (_userlimit32 > 0xc0000000ul)
641			*(uintptr_t *)&_userlimit32 = 0xc0000000ul;
642
643		/*LINTED*/
644		ASSERT((uint32_t)COREHEAP_BASE == 0xc0000000u);
645#endif	/* _LP64 */
646#else
647		WARNING(cpu, 95);
648		missing++;
649#endif	/* OPTERON_ERRATUM_95 */
650	}
651
652	if (cpuid_opteron_erratum(cpu, 100) > 0) {
653		/*
654		 * Compatibility Mode Branches Transfer to Illegal Address
655		 */
656#if defined(OPTERON_ERRATUM_100)
657		/*
658		 * fix is in trap.c
659		 */
660		opteron_erratum_100++;
661#else
662		WARNING(cpu, 100);
663		missing++;
664#endif
665	}
666
667	/*LINTED*/
668	if (cpuid_opteron_erratum(cpu, 108) > 0) {
669		/*
670		 * CPUID Instruction May Return Incorrect Model Number In
671		 * Some Processors
672		 */
673#if defined(OPTERON_ERRATUM_108)
674		/*
675		 * (Our cpuid-handling code corrects the model number on
676		 * those processors)
677		 */
678#else
679		WARNING(cpu, 108);
680		missing++;
681#endif
682	}
683
684	/*LINTED*/
685	if (cpuid_opteron_erratum(cpu, 109) > 0) {
686		/*
687		 * Certain Reverse REP MOVS May Produce Unpredictable Behaviour
688		 */
689#if defined(OPTERON_ERRATUM_109)
690
691		/* workaround is to print a warning to upgrade BIOS */
692		if (rdmsr(MSR_AMD_PATCHLEVEL) == 0)
693			opteron_erratum_109++;
694#else
695		WARNING(cpu, 109);
696		missing++;
697#endif
698	}
699	/*LINTED*/
700	if (cpuid_opteron_erratum(cpu, 121) > 0) {
701		/*
702		 * Sequential Execution Across Non_Canonical Boundary Caused
703		 * Processor Hang
704		 */
705#if defined(OPTERON_ERRATUM_121)
706		static int	lma;
707
708		if (opteron_erratum_121)
709			opteron_erratum_121++;
710
711		/*
712		 * Erratum 121 is only present in long (64 bit) mode.
713		 * Workaround is to include the page immediately before the
714		 * va hole to eliminate the possibility of system hangs due to
715		 * sequential execution across the va hole boundary.
716		 */
717		if (lma == 0) {
718			/*
719			 * check LMA once: assume all cpus are in long mode
720			 * or not.
721			 */
722			lma = 1;
723
724			if (rdmsr(MSR_AMD_EFER) & AMD_EFER_LMA) {
725				if (hole_start) {
726					hole_start -= PAGESIZE;
727				} else {
728					/*
729					 * hole_start not yet initialized by
730					 * mmu_init. Initialize hole_start
731					 * with value to be subtracted.
732					 */
733					hole_start = PAGESIZE;
734				}
735				opteron_erratum_121++;
736			}
737		}
738#else
739		WARNING(cpu, 121);
740		missing++;
741#endif
742	}
743
744	/*LINTED*/
745	if (cpuid_opteron_erratum(cpu, 122) > 0) {
746		/*
747		 * TLB Flush Filter May Cause Cohenrency Problem in
748		 * Multiprocessor Systems
749		 */
750#if defined(OPTERON_ERRATUM_122)
751		/*
752		 * Erratum 122 is only present in MP configurations (multi-core
753		 * or multi-processor).
754		 */
755
756		if (opteron_erratum_122 || lgrp_plat_node_cnt > 1 ||
757		    cpuid_get_ncpu_per_chip(cpu) > 1) {
758			/* disable TLB Flush Filter */
759			wrmsr(MSR_AMD_HWCR, rdmsr(MSR_AMD_HWCR) |
760			    (uint64_t)(uintptr_t)AMD_HWCR_FFDIS);
761			opteron_erratum_122++;
762		}
763
764#else
765		WARNING(cpu, 122);
766		missing++;
767#endif
768	}
769
770#if defined(OPTERON_ERRATUM_123)
771	/*LINTED*/
772	if (cpuid_opteron_erratum(cpu, 123) > 0) {
773		/*
774		 * Bypassed Reads May Cause Data Corruption of System Hang in
775		 * Dual Core Processors
776		 */
777		/*
778		 * Erratum 123 applies only to multi-core cpus.
779		 */
780
781		if (cpuid_get_ncpu_per_chip(cpu) > 1) {
782			/* workaround is to print a warning to upgrade BIOS */
783			if (rdmsr(MSR_AMD_PATCHLEVEL) == 0)
784				opteron_erratum_123++;
785		}
786	}
787#endif
788
789#if defined(OPTERON_ERRATUM_131)
790	/*LINTED*/
791	if (cpuid_opteron_erratum(cpu, 131) > 0) {
792		/*
793		 * Multiprocessor Systems with Four or More Cores May Deadlock
794		 * Waiting for a Probe Response
795		 */
796		/*
797		 * Erratum 131 applies to any system with four or more cores.
798		 */
799		if ((opteron_erratum_131 == 0) && ((lgrp_plat_node_cnt *
800		    cpuid_get_ncpu_per_chip(cpu)) >= 4)) {
801			uint64_t nbcfg;
802			uint64_t wabits;
803
804			/*
805			 * Print a warning if neither of the workarounds
806			 * for Erratum 131 is present.
807			 */
808
809			wabits = AMD_NB_CFG_SRQ_HEARTBEAT |
810			    AMD_NB_CFG_SRQ_SPR;
811
812			nbcfg = rdmsr(MSR_AMD_NB_CFG);
813			if ((nbcfg & wabits) == 0) {
814				opteron_erratum_131++;
815			} else {
816				/* cannot have both workarounds set */
817				ASSERT((nbcfg & wabits) != wabits);
818			}
819		}
820	}
821#endif
822
823#if defined(OPTERON_WORKAROUND_6336786)
824	/*
825	 * This isn't really erratum, but for convenience the
826	 * detection/workaround code lives here and in cpuid_opteron_erratum.
827	 */
828	if (cpuid_opteron_erratum(cpu, 6336786) > 0) {
829		int	node;
830		uint8_t data;
831
832		/*
833		 * Disable C1-Clock ramping on multi-core/multi-processor
834		 * K8 platforms to guard against TSC drift.
835		 */
836		if (opteron_workaround_6336786) {
837			opteron_workaround_6336786++;
838		} else if ((lgrp_plat_node_cnt *
839		    cpuid_get_ncpu_per_chip(cpu) >= 2) ||
840		    opteron_workaround_6336786_UP) {
841			for (node = 0; node < lgrp_plat_node_cnt; node++) {
842				/*
843				 * Clear PMM7[1:0] (function 3, offset 0x87)
844				 * Northbridge device is the node id + 24.
845				 */
846				data = pci_getb_func(0, node + 24, 3, 0x87);
847				data &= 0xFC;
848				pci_putb_func(0, node + 24, 3, 0x87, data);
849			}
850			opteron_workaround_6336786++;
851		}
852	}
853#endif
854
855#if defined(OPTERON_WORKAROUND_6323525)
856	/*LINTED*/
857	/*
858	 * Mutex primitives don't work as expected.
859	 */
860	if (cpuid_opteron_erratum(cpu, 6323525) > 0) {
861
862		/*
863		 * problem only occurs with 2 or more cores. If bit in
864		 * MSR_BU_CFG set, then not applicable. The workaround
865		 * is to patch the semaphone routines with the lfence
866		 * instruction to provide necessary load memory barrier with
867		 * possible subsequent read-modify-write ops.
868		 *
869		 * It is too early in boot to call the patch routine so
870		 * set erratum variable to be done in startup_end().
871		 */
872		if (opteron_workaround_6323525) {
873			opteron_workaround_6323525++;
874		} else if ((x86_feature & X86_SSE2) && ((lgrp_plat_node_cnt *
875		    cpuid_get_ncpu_per_chip(cpu)) >= 2)) {
876			if ((xrdmsr(MSR_BU_CFG) & 0x02) == 0)
877				opteron_workaround_6323525++;
878		}
879	}
880#endif
881	return (missing);
882}
883
884void
885workaround_errata_end()
886{
887#if defined(OPTERON_ERRATUM_109)
888	if (opteron_erratum_109) {
889		cmn_err(CE_WARN,
890		    "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)"
891		    " processor\nerratum 109 was not detected; updating your"
892		    " system's BIOS to a version\ncontaining this"
893		    " microcode patch is HIGHLY recommended or erroneous"
894		    " system\noperation may occur.\n");
895	}
896#endif	/* OPTERON_ERRATUM_109 */
897#if defined(OPTERON_ERRATUM_123)
898	if (opteron_erratum_123) {
899		cmn_err(CE_WARN,
900		    "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)"
901		    " processor\nerratum 123 was not detected; updating your"
902		    " system's BIOS to a version\ncontaining this"
903		    " microcode patch is HIGHLY recommended or erroneous"
904		    " system\noperation may occur.\n");
905	}
906#endif	/* OPTERON_ERRATUM_123 */
907#if defined(OPTERON_ERRATUM_131)
908	if (opteron_erratum_131) {
909		cmn_err(CE_WARN,
910		    "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)"
911		    " processor\nerratum 131 was not detected; updating your"
912		    " system's BIOS to a version\ncontaining this"
913		    " microcode patch is HIGHLY recommended or erroneous"
914		    " system\noperation may occur.\n");
915	}
916#endif	/* OPTERON_ERRATUM_131 */
917}
918
919static ushort_t *mp_map_warm_reset_vector();
920static void mp_unmap_warm_reset_vector(ushort_t *warm_reset_vector);
921
922static cpuset_t procset = 1;
923
924/*ARGSUSED*/
925void
926start_other_cpus(int cprboot)
927{
928	unsigned int who;
929	int skipped = 0;
930	int cpuid = 0;
931	int delays = 0;
932	int started_cpu;
933	ushort_t *warm_reset_vector = NULL;
934
935	/*
936	 * Initialize our own cpu_info.
937	 */
938	init_cpu_info(CPU);
939
940	/*
941	 * Initialize our syscall handlers
942	 */
943	init_cpu_syscall(CPU);
944
945	/*
946	 * if only 1 cpu or not using MP, skip the rest of this
947	 */
948	if (CPUSET_ISEQUAL(mp_cpus, cpu_ready_set) || use_mp == 0) {
949		if (use_mp == 0)
950			cmn_err(CE_CONT, "?***** Not in MP mode\n");
951		goto done;
952	}
953
954	/*
955	 * perform such initialization as is needed
956	 * to be able to take CPUs on- and off-line.
957	 */
958	cpu_pause_init();
959
960	xc_init();		/* initialize processor crosscalls */
961
962	/*
963	 * Copy the real mode code at "real_mode_start" to the
964	 * page at rm_platter_va.
965	 */
966	warm_reset_vector = mp_map_warm_reset_vector();
967	if (warm_reset_vector == NULL)
968		goto done;
969
970	bcopy((caddr_t)real_mode_start,
971	    (caddr_t)((rm_platter_t *)rm_platter_va)->rm_code,
972	    (size_t)real_mode_end - (size_t)real_mode_start);
973
974	flushes_require_xcalls = 1;
975
976	ASSERT(CPU_IN_SET(procset, cpuid));
977	ASSERT(CPU_IN_SET(cpu_ready_set, cpuid));
978
979	/*
980	 * We lock our affinity to the master CPU to ensure that all slave CPUs
981	 * do their TSC syncs with the same CPU.
982	 */
983	affinity_set(CPU_CURRENT);
984
985	for (who = 0; who < NCPU; who++) {
986		if (who == cpuid)
987			continue;
988
989		delays = 0;
990
991		if (!CPU_IN_SET(mp_cpus, who))
992			continue;
993
994		if (ncpus >= max_ncpus) {
995			skipped = who;
996			continue;
997		}
998
999		mp_startup_init(who);
1000		started_cpu = 1;
1001		(*cpu_startf)(who, rm_platter_pa);
1002
1003		while (!CPU_IN_SET(procset, who)) {
1004			delay(1);
1005			if (++delays > (20 * hz)) {
1006
1007				cmn_err(CE_WARN,
1008				    "cpu%d failed to start", who);
1009
1010				mutex_enter(&cpu_lock);
1011				cpu[who]->cpu_flags = 0;
1012				cpu_vm_data_destroy(cpu[who]);
1013				cpu_del_unit(who);
1014				mutex_exit(&cpu_lock);
1015
1016				started_cpu = 0;
1017				break;
1018			}
1019		}
1020		if (!started_cpu)
1021			continue;
1022		if (tsc_gethrtime_enable)
1023			tsc_sync_master(who);
1024
1025	}
1026
1027	affinity_clear();
1028
1029	/*
1030	 * Wait for all CPUs that booted (have presence in procset)
1031	 * to come online (have presence in cpu_ready_set).  Note
1032	 * that the start CPU already satisfies both of these, so no
1033	 * special case is needed.
1034	 */
1035	for (who = 0; who < NCPU; who++) {
1036		if (!CPU_IN_SET(procset, who))
1037			continue;
1038
1039		while (!CPU_IN_SET(cpu_ready_set, who))
1040			delay(1);
1041	}
1042
1043	if (skipped) {
1044		cmn_err(CE_NOTE,
1045		    "System detected %d CPU(s), but "
1046		    "only %d CPU(s) were enabled during boot.",
1047		    skipped + 1, ncpus);
1048		cmn_err(CE_NOTE,
1049		    "Use \"boot-ncpus\" parameter to enable more CPU(s). "
1050		    "See eeprom(1M).");
1051	}
1052
1053done:
1054	workaround_errata_end();
1055
1056	if (warm_reset_vector != NULL)
1057		mp_unmap_warm_reset_vector(warm_reset_vector);
1058	hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
1059	    HAT_UNLOAD);
1060
1061	cmi_post_mpstartup();
1062}
1063
1064/*
1065 * Dummy functions - no i86pc platforms support dynamic cpu allocation.
1066 */
1067/*ARGSUSED*/
1068int
1069mp_cpu_configure(int cpuid)
1070{
1071	return (ENOTSUP);		/* not supported */
1072}
1073
1074/*ARGSUSED*/
1075int
1076mp_cpu_unconfigure(int cpuid)
1077{
1078	return (ENOTSUP);		/* not supported */
1079}
1080
1081/*
1082 * Startup function for 'other' CPUs (besides boot cpu).
1083 * Called from real_mode_start.
1084 *
1085 * WARNING: until CPU_READY is set, mp_startup and routines called by
1086 * mp_startup should not call routines (e.g. kmem_free) that could call
1087 * hat_unload which requires CPU_READY to be set.
1088 */
1089void
1090mp_startup(void)
1091{
1092	struct cpu *cp = CPU;
1093	uint_t new_x86_feature;
1094
1095	/*
1096	 * We need to get TSC on this proc synced (i.e., any delta
1097	 * from cpu0 accounted for) as soon as we can, because many
1098	 * many things use gethrtime/pc_gethrestime, including
1099	 * interrupts, cmn_err, etc.
1100	 */
1101
1102	/* Let cpu0 continue into tsc_sync_master() */
1103	CPUSET_ATOMIC_ADD(procset, cp->cpu_id);
1104
1105	if (tsc_gethrtime_enable)
1106		tsc_sync_slave();
1107
1108	/*
1109	 * Once this was done from assembly, but it's safer here; if
1110	 * it blocks, we need to be able to swtch() to and from, and
1111	 * since we get here by calling t_pc, we need to do that call
1112	 * before swtch() overwrites it.
1113	 */
1114
1115	(void) (*ap_mlsetup)();
1116
1117	new_x86_feature = cpuid_pass1(cp);
1118
1119	/*
1120	 * We need to Sync MTRR with cpu0's MTRR. We have to do
1121	 * this with interrupts disabled.
1122	 */
1123	if (x86_feature & X86_MTRR)
1124		mtrr_sync();
1125
1126	/*
1127	 * Initialize this CPU's syscall handlers
1128	 */
1129	init_cpu_syscall(cp);
1130
1131	/*
1132	 * Enable interrupts with spl set to LOCK_LEVEL. LOCK_LEVEL is the
1133	 * highest level at which a routine is permitted to block on
1134	 * an adaptive mutex (allows for cpu poke interrupt in case
1135	 * the cpu is blocked on a mutex and halts). Setting LOCK_LEVEL blocks
1136	 * device interrupts that may end up in the hat layer issuing cross
1137	 * calls before CPU_READY is set.
1138	 */
1139	(void) splx(ipltospl(LOCK_LEVEL));
1140
1141	/*
1142	 * Do a sanity check to make sure this new CPU is a sane thing
1143	 * to add to the collection of processors running this system.
1144	 *
1145	 * XXX	Clearly this needs to get more sophisticated, if x86
1146	 * systems start to get built out of heterogenous CPUs; as is
1147	 * likely to happen once the number of processors in a configuration
1148	 * gets large enough.
1149	 */
1150	if ((x86_feature & new_x86_feature) != x86_feature) {
1151		cmn_err(CE_CONT, "?cpu%d: %b\n",
1152		    cp->cpu_id, new_x86_feature, FMT_X86_FEATURE);
1153		cmn_err(CE_WARN, "cpu%d feature mismatch", cp->cpu_id);
1154	}
1155
1156	/*
1157	 * We could be more sophisticated here, and just mark the CPU
1158	 * as "faulted" but at this point we'll opt for the easier
1159	 * answer of dieing horribly.  Provided the boot cpu is ok,
1160	 * the system can be recovered by booting with use_mp set to zero.
1161	 */
1162	if (workaround_errata(cp) != 0)
1163		panic("critical workaround(s) missing for cpu%d", cp->cpu_id);
1164
1165	cpuid_pass2(cp);
1166	cpuid_pass3(cp);
1167	(void) cpuid_pass4(cp);
1168
1169	init_cpu_info(cp);
1170
1171	mutex_enter(&cpu_lock);
1172	/*
1173	 * Processor group initialization for this CPU is dependent on the
1174	 * cpuid probing, which must be done in the context of the current
1175	 * CPU.
1176	 */
1177	pghw_physid_create(cp);
1178	pg_cpu_init(cp);
1179	pg_cmt_cpu_startup(cp);
1180
1181	cp->cpu_flags |= CPU_RUNNING | CPU_READY | CPU_ENABLE | CPU_EXISTS;
1182	cpu_add_active(cp);
1183
1184	if (dtrace_cpu_init != NULL) {
1185		(*dtrace_cpu_init)(cp->cpu_id);
1186	}
1187
1188	mutex_exit(&cpu_lock);
1189
1190	/*
1191	 * Enable preemption here so that contention for any locks acquired
1192	 * later in mp_startup may be preempted if the thread owning those
1193	 * locks is continously executing on other CPUs (for example, this
1194	 * CPU must be preemptible to allow other CPUs to pause it during their
1195	 * startup phases).  It's safe to enable preemption here because the
1196	 * CPU state is pretty-much fully constructed.
1197	 */
1198	curthread->t_preempt = 0;
1199
1200	add_cpunode2devtree(cp->cpu_id, cp->cpu_m.mcpu_cpi);
1201
1202	/* The base spl should still be at LOCK LEVEL here */
1203	ASSERT(cp->cpu_base_spl == ipltospl(LOCK_LEVEL));
1204	set_base_spl();		/* Restore the spl to its proper value */
1205
1206	(void) spl0();				/* enable interrupts */
1207
1208	/*
1209	 * Set up the CPU module for this CPU.  This can't be done before
1210	 * this CPU is made CPU_READY, because we may (in heterogeneous systems)
1211	 * need to go load another CPU module.  The act of attempting to load
1212	 * a module may trigger a cross-call, which will ASSERT unless this
1213	 * cpu is CPU_READY.
1214	 */
1215	cmi_init();
1216
1217	if (x86_feature & X86_MCA)
1218		cmi_mca_init();
1219
1220	if (boothowto & RB_DEBUG)
1221		kdi_dvec_cpu_init(cp);
1222
1223	/*
1224	 * Setting the bit in cpu_ready_set must be the last operation in
1225	 * processor initialization; the boot CPU will continue to boot once
1226	 * it sees this bit set for all active CPUs.
1227	 */
1228	CPUSET_ATOMIC_ADD(cpu_ready_set, cp->cpu_id);
1229
1230	/*
1231	 * Because mp_startup() gets fired off after init() starts, we
1232	 * can't use the '?' trick to do 'boot -v' printing - so we
1233	 * always direct the 'cpu .. online' messages to the log.
1234	 */
1235	cmn_err(CE_CONT, "!cpu%d initialization complete - online\n",
1236	    cp->cpu_id);
1237
1238	/*
1239	 * Now we are done with the startup thread, so free it up.
1240	 */
1241	thread_exit();
1242	panic("mp_startup: cannot return");
1243	/*NOTREACHED*/
1244}
1245
1246
1247/*
1248 * Start CPU on user request.
1249 */
1250/* ARGSUSED */
1251int
1252mp_cpu_start(struct cpu *cp)
1253{
1254	ASSERT(MUTEX_HELD(&cpu_lock));
1255	return (0);
1256}
1257
1258/*
1259 * Stop CPU on user request.
1260 */
1261/* ARGSUSED */
1262int
1263mp_cpu_stop(struct cpu *cp)
1264{
1265	extern int cbe_psm_timer_mode;
1266	ASSERT(MUTEX_HELD(&cpu_lock));
1267
1268	/*
1269	 * If TIMER_PERIODIC mode is used, CPU0 is the one running it;
1270	 * can't stop it.  (This is true only for machines with no TSC.)
1271	 */
1272
1273	if ((cbe_psm_timer_mode == TIMER_PERIODIC) && (cp->cpu_id == 0))
1274		return (1);
1275
1276	return (0);
1277}
1278
1279/*
1280 * Power on CPU.
1281 */
1282/* ARGSUSED */
1283int
1284mp_cpu_poweron(struct cpu *cp)
1285{
1286	ASSERT(MUTEX_HELD(&cpu_lock));
1287	return (ENOTSUP);		/* not supported */
1288}
1289
1290/*
1291 * Power off CPU.
1292 */
1293/* ARGSUSED */
1294int
1295mp_cpu_poweroff(struct cpu *cp)
1296{
1297	ASSERT(MUTEX_HELD(&cpu_lock));
1298	return (ENOTSUP);		/* not supported */
1299}
1300
1301
1302/*
1303 * Take the specified CPU out of participation in interrupts.
1304 */
1305int
1306cpu_disable_intr(struct cpu *cp)
1307{
1308	if (psm_disable_intr(cp->cpu_id) != DDI_SUCCESS)
1309		return (EBUSY);
1310
1311	cp->cpu_flags &= ~CPU_ENABLE;
1312	return (0);
1313}
1314
1315/*
1316 * Allow the specified CPU to participate in interrupts.
1317 */
1318void
1319cpu_enable_intr(struct cpu *cp)
1320{
1321	ASSERT(MUTEX_HELD(&cpu_lock));
1322	cp->cpu_flags |= CPU_ENABLE;
1323	psm_enable_intr(cp->cpu_id);
1324}
1325
1326
1327
1328static ushort_t *
1329mp_map_warm_reset_vector()
1330{
1331	ushort_t *warm_reset_vector;
1332
1333	if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
1334	    sizeof (ushort_t *), PROT_READ|PROT_WRITE)))
1335		return (NULL);
1336
1337	/*
1338	 * setup secondary cpu bios boot up vector
1339	 */
1340	*warm_reset_vector = (ushort_t)((caddr_t)
1341		((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va
1342		+ ((ulong_t)rm_platter_va & 0xf));
1343	warm_reset_vector++;
1344	*warm_reset_vector = (ushort_t)(rm_platter_pa >> 4);
1345
1346	--warm_reset_vector;
1347	return (warm_reset_vector);
1348}
1349
1350static void
1351mp_unmap_warm_reset_vector(ushort_t *warm_reset_vector)
1352{
1353	psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *));
1354}
1355
1356void
1357mp_cpu_faulted_enter(struct cpu *cp)
1358{
1359	cmi_faulted_enter(cp);
1360}
1361
1362void
1363mp_cpu_faulted_exit(struct cpu *cp)
1364{
1365	cmi_faulted_exit(cp);
1366}
1367
1368/*
1369 * The following two routines are used as context operators on threads belonging
1370 * to processes with a private LDT (see sysi86).  Due to the rarity of such
1371 * processes, these routines are currently written for best code readability and
1372 * organization rather than speed.  We could avoid checking x86_feature at every
1373 * context switch by installing different context ops, depending on the
1374 * x86_feature flags, at LDT creation time -- one for each combination of fast
1375 * syscall feature flags.
1376 */
1377
1378/*ARGSUSED*/
1379void
1380cpu_fast_syscall_disable(void *arg)
1381{
1382	if (x86_feature & X86_SEP)
1383		cpu_sep_disable();
1384	if (x86_feature & X86_ASYSC)
1385		cpu_asysc_disable();
1386}
1387
1388/*ARGSUSED*/
1389void
1390cpu_fast_syscall_enable(void *arg)
1391{
1392	if (x86_feature & X86_SEP)
1393		cpu_sep_enable();
1394	if (x86_feature & X86_ASYSC)
1395		cpu_asysc_enable();
1396}
1397
1398static void
1399cpu_sep_enable(void)
1400{
1401	ASSERT(x86_feature & X86_SEP);
1402	ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
1403
1404	wrmsr(MSR_INTC_SEP_CS, (uint64_t)(uintptr_t)KCS_SEL);
1405}
1406
1407static void
1408cpu_sep_disable(void)
1409{
1410	ASSERT(x86_feature & X86_SEP);
1411	ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
1412
1413	/*
1414	 * Setting the SYSENTER_CS_MSR register to 0 causes software executing
1415	 * the sysenter or sysexit instruction to trigger a #gp fault.
1416	 */
1417	wrmsr(MSR_INTC_SEP_CS, 0ULL);
1418}
1419
1420static void
1421cpu_asysc_enable(void)
1422{
1423	ASSERT(x86_feature & X86_ASYSC);
1424	ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
1425
1426	wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) |
1427	    (uint64_t)(uintptr_t)AMD_EFER_SCE);
1428}
1429
1430static void
1431cpu_asysc_disable(void)
1432{
1433	ASSERT(x86_feature & X86_ASYSC);
1434	ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
1435
1436	/*
1437	 * Turn off the SCE (syscall enable) bit in the EFER register. Software
1438	 * executing syscall or sysret with this bit off will incur a #ud trap.
1439	 */
1440	wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) &
1441	    ~((uint64_t)(uintptr_t)AMD_EFER_SCE));
1442}
1443