1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26/*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
27/*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
28/*	  All Rights Reserved					*/
29
30/*	Copyright (c) 1987, 1988 Microsoft Corporation		*/
31/*	  All Rights Reserved					*/
32
33
34#include <sys/asm_linkage.h>
35#include <sys/asm_misc.h>
36#include <sys/regset.h>
37#include <sys/privregs.h>
38#include <sys/psw.h>
39#include <sys/reboot.h>
40#include <sys/x86_archext.h>
41#include <sys/machparam.h>
42
43#if defined(__lint)
44
45#include <sys/types.h>
46#include <sys/thread.h>
47#include <sys/systm.h>
48#include <sys/lgrp.h>
49#include <sys/regset.h>
50#include <sys/link.h>
51#include <sys/bootconf.h>
52#include <sys/bootsvcs.h>
53
54#else	/* __lint */
55
56#include <sys/segments.h>
57#include <sys/pcb.h>
58#include <sys/trap.h>
59#include <sys/ftrace.h>
60#include <sys/traptrace.h>
61#include <sys/clock.h>
62#include <sys/cmn_err.h>
63#include <sys/pit.h>
64#include <sys/panic.h>
65
66#if defined(__xpv)
67#include <sys/hypervisor.h>
68#endif
69
70#include "assym.h"
71
72/*
73 * Our assumptions:
74 *	- We are running in protected-paged mode.
75 *	- Interrupts are disabled.
76 *	- The GDT and IDT are the callers; we need our copies.
77 *	- The kernel's text, initialized data and bss are mapped.
78 *
79 * Our actions:
80 *	- Save arguments
81 *	- Initialize our stack pointer to the thread 0 stack (t0stack)
82 *	  and leave room for a phony "struct regs".
83 *	- Our GDT and IDT need to get munged.
84 *	- Since we are using the boot's GDT descriptors, we need
85 *	  to copy them into our GDT before we switch to ours.
86 *	- We start using our GDT by loading correct values in the
87 *	  selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL,
88 *	  gs=KGS_SEL).
89 *	- The default LDT entry for syscall is set.
90 *	- We load the default LDT into the hardware LDT register.
91 *	- We load the default TSS into the hardware task register.
92 *	- Check for cpu type, i.e. 486 vs. P5 vs. P6 etc.
93 *	- mlsetup(%esp) gets called.
94 *	- We change our appearance to look like the real thread 0.
95 *	  (NOTE: making ourselves to be a real thread may be a noop)
96 *	- main() gets called.  (NOTE: main() never returns).
97 *
98 * NOW, the real code!
99 */
100	/*
101	 * The very first thing in the kernel's text segment must be a jump
102	 * to the os/fakebop.c startup code.
103	 */
104	.text
105	jmp     _start
106
107	/*
108	 * Globals:
109	 */
110	.globl	_locore_start
111	.globl	mlsetup
112	.globl	main
113	.globl	panic
114	.globl	t0stack
115	.globl	t0
116	.globl	sysp
117	.globl	edata
118
119	/*
120	 * call back into boot - sysp (bootsvcs.h) and bootops (bootconf.h)
121	 */
122	.globl	bootops
123	.globl	bootopsp
124
125	/*
126	 * NOTE: t0stack should be the first thing in the data section so that
127	 * if it ever overflows, it will fault on the last kernel text page.
128	 */
129	.data
130	.comm	t0stack, DEFAULTSTKSZ, 32
131	.comm	t0, 4094, 32
132
133#endif	/* __lint */
134
135
136#if defined(__amd64)
137
138#if defined(__lint)
139
140/* ARGSUSED */
141void
142_locore_start(struct boot_syscalls *sysp, ulong_t rsi, struct bootops *bop)
143{}
144
145#else	/* __lint */
146
147	/*
148	 * kobj_init() vectors us back to here with (note) a slightly different
149	 * set of arguments than _start is given (see lint prototypes above).
150	 *
151	 * XXX	Make this less vile, please.
152	 */
153	ENTRY_NP(_locore_start)
154
155	/*
156	 * %rdi = boot services (should die someday)
157	 * %rdx = bootops
158	 * end
159	 */
160
161	leaq	edata(%rip), %rbp	/* reference edata for ksyms */
162	movq	$0, (%rbp)		/* limit stack back trace */
163
164	/*
165	 * Initialize our stack pointer to the thread 0 stack (t0stack)
166	 * and leave room for a "struct regs" for lwp0.  Note that the
167	 * stack doesn't actually align to a 16-byte boundary until just
168	 * before we call mlsetup because we want to use %rsp to point at
169	 * our regs structure.
170	 */
171	leaq	t0stack(%rip), %rsp
172	addq	$_CONST(DEFAULTSTKSZ - REGSIZE), %rsp
173#if (REGSIZE & 15) == 0
174	subq	$8, %rsp
175#endif
176	/*
177	 * Save call back for special x86 boot services vector
178	 */
179	movq	%rdi, sysp(%rip)
180
181	movq	%rdx, bootops(%rip)		/* save bootops */
182	movq	$bootops, bootopsp(%rip)
183
184	/*
185	 * Save arguments and flags, if only for debugging ..
186	 */
187	movq	%rdi, REGOFF_RDI(%rsp)
188	movq	%rsi, REGOFF_RSI(%rsp)
189	movq	%rdx, REGOFF_RDX(%rsp)
190	movq	%rcx, REGOFF_RCX(%rsp)
191	movq	%r8, REGOFF_R8(%rsp)
192	movq	%r9, REGOFF_R9(%rsp)
193	pushf
194	popq	%r11
195	movq	%r11, REGOFF_RFL(%rsp)
196
197#if !defined(__xpv)
198	/*
199	 * Enable write protect and alignment check faults.
200	 */
201	movq	%cr0, %rax
202	orq	$_CONST(CR0_WP|CR0_AM), %rax
203	andq	$_BITNOT(CR0_WT|CR0_CE), %rax
204	movq	%rax, %cr0
205#endif	/* __xpv */
206
207	/*
208	 * (We just assert this works by virtue of being here)
209	 */
210	bts	$X86FSET_CPUID, x86_featureset(%rip)
211
212	/*
213	 * mlsetup() gets called with a struct regs as argument, while
214	 * main takes no args and should never return.
215	 */
216	xorl	%ebp, %ebp
217	movq	%rsp, %rdi
218	pushq	%rbp
219	/* (stack pointer now aligned on 16-byte boundary right here) */
220	movq	%rsp, %rbp
221	call	mlsetup
222	call	main
223	/* NOTREACHED */
224	leaq	__return_from_main(%rip), %rdi
225	xorl	%eax, %eax
226	call	panic
227	SET_SIZE(_locore_start)
228
229#endif	/* __amd64 */
230#endif	/* __lint */
231
232#if !defined(__lint)
233
234__return_from_main:
235	.string	"main() returned"
236__unsupported_cpu:
237	.string	"486 style cpu detected - no longer supported!"
238
239#endif	/* !__lint */
240
241#if !defined(__amd64)
242
243#if defined(__lint)
244
245/* ARGSUSED */
246void
247_locore_start(struct boot_syscalls *sysp, struct bootops *bop)
248{}
249
250#else	/* __lint */
251
252	/*
253	 * kobj_init() vectors us back to here with (note) a slightly different
254	 * set of arguments than _start is given (see lint prototypes above).
255	 *
256	 * XXX	Make this less vile, please.
257	 */
258	ENTRY_NP(_locore_start)
259
260	/*
261	 *	%ecx = boot services (should die someday)
262	 *	%ebx = bootops
263	 */
264	mov	$edata, %ebp		/ edata needs to be defined for ksyms
265	movl	$0, (%ebp)		/ limit stack back trace
266
267	/*
268	 * Initialize our stack pointer to the thread 0 stack (t0stack)
269	 * and leave room for a phony "struct regs".
270	 */
271	movl	$t0stack + DEFAULTSTKSZ - REGSIZE, %esp
272
273	/*
274	 * Save call back for special x86 boot services vector
275	 */
276	mov	%ecx, sysp		/ save call back for boot services
277
278 	mov	%ebx, bootops		/ save bootops
279	movl	$bootops, bootopsp
280
281
282	/*
283	 * Save all registers and flags
284	 */
285	pushal
286	pushfl
287
288#if !defined(__xpv)
289	/*
290	 * Override bios settings and enable write protect and
291	 * alignment check faults.
292	 */
293	movl	%cr0, %eax
294
295	/*
296	 * enable WP for detecting faults, and enable alignment checking.
297	 */
298	orl	$_CONST(CR0_WP|CR0_AM), %eax
299	andl	$_BITNOT(CR0_WT|CR0_CE), %eax
300	movl	%eax, %cr0		/ set the cr0 register correctly and
301					/ override the BIOS setup
302
303	/*
304	 * If bit 21 of eflags can be flipped, then cpuid is present
305	 * and enabled.
306	 */
307	pushfl
308	popl	%ecx
309	movl	%ecx, %eax
310	xorl	$PS_ID, %eax		/ try complemented bit
311	pushl	%eax
312	popfl
313	pushfl
314	popl    %eax
315	cmpl	%eax, %ecx
316	jne	have_cpuid
317
318	/*
319	 * cpuid may be disabled on Cyrix, try to detect Cyrix by the 5/2 test
320	 * div does not modify the cc flags on Cyrix, even though this may
321	 * also be true for other vendors, this is generally true only for
322	 * newer models from those vendors that support and do not disable
323	 * cpuid (usually because cpuid cannot be disabled)
324	 */
325
326	/*
327	 * clear cc flags
328	 */
329	xorb	%ah, %ah
330	sahf
331
332	/*
333	 * perform 5/2 test
334	 */
335	movw	$5, %ax
336	movb	$2, %bl
337	divb	%bl
338
339	lahf
340	cmpb	$2, %ah
341	jne	cpu_486
342
343	/*
344	 * div did not modify the cc flags, chances are the vendor is Cyrix
345	 * assume the vendor is Cyrix and use the CCR's to enable cpuid
346	 */
347	.set	CYRIX_CRI, 0x22		/ CR Index Register
348	.set	CYRIX_CRD, 0x23		/ CR Data Register
349
350	.set	CYRIX_CCR3, 0xc3	/ Config Control Reg 3
351	.set	CYRIX_CCR4, 0xe8	/ Config Control Reg 4
352	.set	CYRIX_DIR0, 0xfe	/ Device Identification Reg 0
353	.set	CYRIX_DIR1, 0xff	/ Device Identification Reg 1
354
355	/*
356	 * even if the cpu vendor is Cyrix and the motherboard/chipset
357	 * vendor decided to ignore lines A1-A4 for I/O addresses, I/O port
358	 * 0x21 corresponds with 0x23 and since 0x22 is still untouched,
359	 * the reads and writes of 0x21 are guaranteed to be off-chip of
360	 * the cpu
361	 */
362
363	/*
364	 * enable read of ISR at I/O port 0x20
365	 */
366	movb	$0xb, %al
367	outb	$MCMD_PORT
368
369	/*
370	 * read IMR and store in %bl
371	 */
372	inb	$MIMR_PORT
373	movb	%al, %bl
374
375	/*
376	 * mask out all interrupts so that ISR will not change
377	 */
378	movb	$0xff, %al
379	outb	$MIMR_PORT
380
381	/*
382	 * reads of I/O port 0x22 on Cyrix are always directed off-chip
383	 * make use of I/O pull-up to test for an unknown device on 0x22
384	 */
385	inb	$CYRIX_CRI
386	cmpb	$0xff, %al
387	je	port_22_free
388
389	/*
390	 * motherboard/chipset vendor may be ignoring line A1 of I/O address
391	 */
392	movb	%al, %cl
393
394	/*
395	 * if the ISR and the value read from 0x22 do not match then we have
396	 * detected some unknown device, probably a chipset, at 0x22
397	 */
398	inb	$MCMD_PORT
399	cmpb	%al, %cl
400	jne	restore_IMR
401
402port_22_free:
403	/*
404	 * now test to see if some unknown device is using I/O port 0x23
405	 *
406	 * read the external I/O port at 0x23
407	 */
408	inb	$CYRIX_CRD
409
410	/*
411	 * Test for pull-up at 0x23 or if I/O address line A1 is being ignored.
412	 * IMR is 0xff so both tests are performed simultaneously.
413	 */
414	cmpb	$0xff, %al
415	jne	restore_IMR
416
417	/*
418	 * We are a Cyrix part. In case we are some model of Cx486 or a Cx586,
419	 * record the type and fix it later if not.
420	 */
421	movl	$X86_VENDOR_Cyrix, x86_vendor
422	movl	$X86_TYPE_CYRIX_486, x86_type
423
424	/*
425	 * Try to read CCR3. All Cyrix cpu's which support cpuid have CCR3.
426	 *
427	 * load CCR3 index into CCR index register
428	 */
429
430	movb	$CYRIX_CCR3, %al
431	outb	$CYRIX_CRI
432
433	/*
434	 * If we are not a Cyrix cpu, then we have performed an external I/O
435	 * cycle. If the CCR index was not valid for this Cyrix model, we may
436	 * have performed an external I/O cycle as well. In these cases and
437	 * if the motherboard/chipset vendor ignores I/O address line A1,
438	 * then the PIC will have IRQ3 set at the lowest priority as a side
439	 * effect of the above outb. We are reasonalbly confident that there
440	 * is not an unknown device on I/O port 0x22, so there should have been
441	 * no unpredictable side-effect of the above outb.
442	 */
443
444	/*
445	 * read CCR3
446	 */
447	inb	$CYRIX_CRD
448
449	/*
450	 * If we are not a Cyrix cpu the inb above produced an external I/O
451	 * cycle. If we are a Cyrix model that does not support CCR3 wex
452	 * produced an external I/O cycle. In all known Cyrix models 6x86 and
453	 * above, bit 3 of CCR3 is reserved and cannot be set to 1. In all
454	 * Cyrix models prior to the 6x86 that supported CCR3, bits 4-7 are
455	 * reserved as well. It is highly unlikely that CCR3 contains the value
456	 * 0xff. We test to see if I/O port 0x23 is pull-up or the IMR and
457	 * deduce we are not a Cyrix with support for cpuid if so.
458	 */
459	cmpb	$0xff, %al
460	je	restore_PIC
461
462	/*
463	 * There exist 486 ISA Cyrix chips that support CCR3 but do not support
464	 * DIR0 and DIR1. If we try to read DIR0, we may generate external I/O
465	 * cycles, the exact behavior is model specific and undocumented.
466	 * Unfortunately these external I/O cycles may confuse some PIC's beyond
467	 * recovery. Fortunatetly we can use the following undocumented trick:
468	 * if bit 4 of CCR3 can be toggled, then DIR0 and DIR1 are supported.
469	 * Pleasantly MAPEN contains bit 4 of CCR3, so this trick is guaranteed
470	 * to work on all Cyrix cpu's which support cpuid.
471	 */
472	movb	%al, %dl
473	xorb	$0x10, %dl
474	movb	%al, %cl
475
476	/*
477	 * write back CRR3 with toggled bit 4 to CCR3
478	 */
479	movb	$CYRIX_CCR3, %al
480	outb	$CYRIX_CRI
481
482	movb	%dl, %al
483	outb	$CYRIX_CRD
484
485	/*
486	 * read CCR3
487	 */
488	movb	$CYRIX_CCR3, %al
489	outb	$CYRIX_CRI
490	inb	$CYRIX_CRD
491	movb	%al, %dl
492
493	/*
494	 * restore CCR3
495	 */
496	movb	$CYRIX_CCR3, %al
497	outb	$CYRIX_CRI
498
499	movb	%cl, %al
500	outb	$CYRIX_CRD
501
502	/*
503	 * if bit 4 was not toggled DIR0 and DIR1 are not supported in which
504	 * case we do not have cpuid anyway
505	 */
506	andb	$0x10, %al
507	andb	$0x10, %dl
508	cmpb	%al, %dl
509	je	restore_PIC
510
511	/*
512	 * read DIR0
513	 */
514	movb	$CYRIX_DIR0, %al
515	outb	$CYRIX_CRI
516	inb	$CYRIX_CRD
517
518	/*
519	 * test for pull-up
520	 */
521	cmpb	$0xff, %al
522	je	restore_PIC
523
524	/*
525	 * Values of 0x20-0x27 in DIR0 are currently reserved by Cyrix for
526	 * future use. If Cyrix ever produces a cpu that supports cpuid with
527	 * these ids, the following test will have to change. For now we remain
528	 * pessimistic since the formats of the CRR's may be different then.
529	 *
530	 * test for at least a 6x86, to see if we support both MAPEN and CPUID
531	 */
532	cmpb	$0x30, %al
533	jb	restore_IMR
534
535	/*
536	 * enable MAPEN
537	 */
538	movb	$CYRIX_CCR3, %al
539	outb	$CYRIX_CRI
540
541	andb	$0xf, %cl
542	movb	%cl, %al
543	orb	$0x10, %al
544	outb	$CYRIX_CRD
545
546	/*
547	 * select CCR4
548	 */
549	movb	$CYRIX_CCR4, %al
550	outb	$CYRIX_CRI
551
552	/*
553	 * read CCR4
554	 */
555	inb	$CYRIX_CRD
556
557	/*
558	 * enable cpuid
559	 */
560	orb	$0x80, %al
561	movb	%al, %dl
562
563	/*
564	 * select CCR4
565	 */
566	movb	$CYRIX_CCR4, %al
567	outb	$CYRIX_CRI
568
569	/*
570	 * write CCR4
571	 */
572	movb	%dl, %al
573	outb	$CYRIX_CRD
574
575	/*
576	 * select CCR3
577	 */
578	movb	$CYRIX_CCR3, %al
579	outb	$CYRIX_CRI
580
581	/*
582	 * disable MAPEN and write CCR3
583	 */
584	movb	%cl, %al
585	outb	$CYRIX_CRD
586
587	/*
588	 * restore IMR
589	 */
590	movb	%bl, %al
591	outb	$MIMR_PORT
592
593	/*
594	 * test to see if cpuid available
595	 */
596	pushfl
597	popl	%ecx
598	movl	%ecx, %eax
599	xorl	$PS_ID, %eax		/ try complemented bit
600	pushl	%eax
601	popfl
602	pushfl
603	popl    %eax
604	cmpl	%eax, %ecx
605	jne	have_cpuid
606	jmp	cpu_486
607
608restore_PIC:
609	/*
610	 * In case the motherboard/chipset vendor is ignoring line A1 of the
611	 * I/O address, we set the PIC priorities to sane values.
612	 */
613	movb	$0xc7, %al	/ irq 7 lowest priority
614	outb	$MCMD_PORT
615
616restore_IMR:
617	movb	%bl, %al
618	outb	$MIMR_PORT
619	jmp	cpu_486
620
621have_cpuid:
622	/*
623	 * cpuid instruction present
624	 */
625	bts	$X86FSET_CPUID, x86_featureset	/ Just to set; Ignore the CF
626	movl	$0, %eax
627	cpuid
628
629	movl	%ebx, cpu_vendor
630	movl	%edx, cpu_vendor+4
631	movl	%ecx, cpu_vendor+8
632
633	/*
634	 * early cyrix cpus are somewhat strange and need to be
635	 * probed in curious ways to determine their identity
636	 */
637
638	leal	cpu_vendor, %esi
639	leal	CyrixInstead, %edi
640	movl	$12, %ecx
641	repz
642	  cmpsb
643	je	vendor_is_cyrix
644
645	/ let mlsetup()/cpuid_pass1() handle everything else in C
646
647	jmp	cpu_done
648
649is486:
650	/*
651	 * test to see if a useful cpuid
652	 */
653	testl	%eax, %eax
654	jz	isa486
655
656	movl	$1, %eax
657	cpuid
658
659	movl	%eax, %ebx
660	andl	$0xF00, %ebx
661	cmpl	$0x400, %ebx
662	je	isa486
663
664	rep;	ret	/* use 2 byte return instruction */
665			/* AMD Software Optimization Guide - Section 6.2 */
666isa486:
667	/*
668	 * lose the return address
669	 */
670	popl	%eax
671	jmp	cpu_486
672
673vendor_is_cyrix:
674	call	is486
675
676	/*
677	 * Processor signature and feature flags for Cyrix are insane.
678	 * BIOS can play with semi-documented registers, so cpuid must be used
679	 * cautiously. Since we are Cyrix that has cpuid, we have DIR0 and DIR1
680	 * Keep the family in %ebx and feature flags in %edx until not needed
681	 */
682
683	/*
684	 * read DIR0
685	 */
686	movb	$CYRIX_DIR0, %al
687	outb	$CYRIX_CRI
688	inb	$CYRIX_CRD
689
690	/*
691	 * First we handle the cases where we are a 6x86 or 6x86L.
692	 * The 6x86 is basically a 486, the only reliable bit in the
693	 * feature flags is for FPU. The 6x86L is better, unfortunately
694	 * there is no really good way to distinguish between these two
695	 * cpu's. We are pessimistic and when in doubt assume 6x86.
696	 */
697
698	cmpb	$0x40, %al
699	jae	maybeGX
700
701	/*
702	 * We are an M1, either a 6x86 or 6x86L.
703	 */
704	cmpb	$0x30, %al
705	je	maybe6x86L
706	cmpb	$0x31, %al
707	je	maybe6x86L
708	cmpb	$0x34, %al
709	je	maybe6x86L
710	cmpb	$0x35, %al
711	je	maybe6x86L
712
713	/*
714	 * although it is possible that we are a 6x86L, the cpu and
715	 * documentation are so buggy, we just do not care.
716	 */
717	jmp	likely6x86
718
719maybe6x86L:
720	/*
721	 *  read DIR1
722	 */
723	movb	$CYRIX_DIR1, %al
724	outb	$CYRIX_CRI
725	inb	$CYRIX_CRD
726	cmpb	$0x22, %al
727	jb	likely6x86
728
729	/*
730	 * We are a 6x86L, or at least a 6x86 with honest cpuid feature flags
731	 */
732	movl	$X86_TYPE_CYRIX_6x86L, x86_type
733	jmp	coma_bug
734
735likely6x86:
736	/*
737	 * We are likely a 6x86, or a 6x86L without a way of knowing
738	 *
739	 * The 6x86 has NO Pentium or Pentium Pro compatible features even
740	 * though it claims to be a Pentium Pro compatible!
741	 *
742	 * The 6x86 core used in the 6x86 may have most of the Pentium system
743	 * registers and largely conform to the Pentium System Programming
744	 * Reference. Documentation on these parts is long gone. Treat it as
745	 * a crippled Pentium and hope for the best.
746	 */
747
748	movl	$X86_TYPE_CYRIX_6x86, x86_type
749	jmp	coma_bug
750
751maybeGX:
752	/*
753	 * Now we check whether we are a MediaGX or GXm. We have particular
754	 * reason for concern here. Even though most of the GXm's
755	 * report having TSC in the cpuid feature flags, the TSC may be
756	 * horribly broken. What is worse, is that MediaGX's are basically
757	 * 486's while the good GXm's are more like Pentium Pro's!
758	 */
759
760	cmpb	$0x50, %al
761	jae	maybeM2
762
763	/*
764	 * We are either a MediaGX (sometimes called a Gx86) or GXm
765	 */
766
767	cmpb	$41, %al
768	je	maybeMediaGX
769
770	cmpb	$44, %al
771	jb	maybeGXm
772
773	cmpb	$47, %al
774	jbe	maybeMediaGX
775
776	/*
777	 * We do not honestly know what we are, so assume a MediaGX
778	 */
779	jmp	media_gx
780
781maybeGXm:
782	/*
783	 * It is still possible we are either a MediaGX or GXm, trust cpuid
784	 * family should be 5 on a GXm
785	 */
786	cmpl	$0x500, %ebx
787	je	GXm
788
789	/*
790	 * BIOS/Cyrix might set family to 6 on a GXm
791	 */
792	cmpl	$0x600, %ebx
793	jne	media_gx
794
795GXm:
796	movl	$X86_TYPE_CYRIX_GXm, x86_type
797	jmp	cpu_done
798
799maybeMediaGX:
800	/*
801	 * read DIR1
802	 */
803	movb	$CYRIX_DIR1, %al
804	outb	$CYRIX_CRI
805	inb	$CYRIX_CRD
806
807	cmpb	$0x30, %al
808	jae	maybeGXm
809
810	/*
811	 * we are a MediaGX for which we do not trust cpuid
812	 */
813media_gx:
814	movl	$X86_TYPE_CYRIX_MediaGX, x86_type
815	jmp	cpu_486
816
817maybeM2:
818	/*
819	 * Now we check whether we are a 6x86MX or MII. These cpu's are
820	 * virtually identical, but we care because for the 6x86MX, we
821	 * must work around the coma bug. Also for 6x86MX prior to revision
822	 * 1.4, the TSC may have serious bugs.
823	 */
824
825	cmpb	$0x60, %al
826	jae	maybeM3
827
828	/*
829	 * family should be 6, but BIOS/Cyrix might set it to 5
830	 */
831	cmpl	$0x600, %ebx
832	ja	cpu_486
833
834	/*
835	 *  read DIR1
836	 */
837	movb	$CYRIX_DIR1, %al
838	outb	$CYRIX_CRI
839	inb	$CYRIX_CRD
840
841	cmpb	$0x8, %al
842	jb	cyrix6x86MX
843	cmpb	$0x80, %al
844	jb	MII
845
846cyrix6x86MX:
847	/*
848	 * It is altogether unclear how the revision stamped on the cpu
849	 * maps to the values in DIR0 and DIR1. Just assume TSC is broken.
850	 */
851	movl	$X86_TYPE_CYRIX_6x86MX, x86_type
852	jmp	coma_bug
853
854MII:
855	movl	$X86_TYPE_CYRIX_MII, x86_type
856likeMII:
857	jmp	cpu_done
858
859maybeM3:
860	/*
861	 * We are some chip that we cannot identify yet, an MIII perhaps.
862	 * We will be optimistic and hope that the chip is much like an MII,
863	 * and that cpuid is sane. Cyrix seemed to have gotten it right in
864	 * time for the MII, we can only hope it stayed that way.
865	 * Maybe the BIOS or Cyrix is trying to hint at something
866	 */
867	cmpl	$0x500, %ebx
868	je	GXm
869
870	cmpb	$0x80, %al
871	jae	likelyM3
872
873	/*
874	 * Just test for the features Cyrix is known for
875	 */
876
877	jmp	MII
878
879likelyM3:
880	/*
881	 * DIR0 with values from 0x80 to 0x8f indicates a VIA Cyrix III, aka
882	 * the Cyrix MIII. There may be parts later that use the same ranges
883	 * for DIR0 with special values in DIR1, maybe the VIA CIII, but for
884	 * now we will call anything with a DIR0 of 0x80 or higher an MIII.
885	 * The MIII is supposed to support large pages, but we will believe
886	 * it when we see it. For now we just enable and test for MII features.
887	 */
888	movl	$X86_TYPE_VIA_CYRIX_III, x86_type
889	jmp	likeMII
890
891coma_bug:
892
893/*
894 * With NO_LOCK set to 0 in CCR1, the usual state that BIOS enforces, some
895 * bus cycles are issued with LOCK# asserted. With NO_LOCK set to 1, all bus
896 * cycles except page table accesses and interrupt ACK cycles do not assert
897 * LOCK#. xchgl is an instruction that asserts LOCK# if NO_LOCK is set to 0.
898 * Due to a bug in the cpu core involving over-optimization of branch
899 * prediction, register renaming, and execution of instructions down both the
900 * X and Y pipes for the xchgl instruction, short loops can be written that
901 * never de-assert LOCK# from one invocation of the loop to the next, ad
902 * infinitum. The undesirable effect of this situation is that interrupts are
903 * not serviced. The ideal workaround to this bug would be to set NO_LOCK to
904 * 1. Unfortunately bus cycles that would otherwise have asserted LOCK# no
905 * longer do, unless they are page table accesses or interrupt ACK cycles.
906 * With LOCK# not asserted, these bus cycles are now cached. This can cause
907 * undesirable behaviour if the ARR's are not configured correctly. Solaris
908 * does not configure the ARR's, nor does it provide any useful mechanism for
909 * doing so, thus the ideal workaround is not viable. Fortunately, the only
910 * known exploits for this bug involve the xchgl instruction specifically.
911 * There is a group of undocumented registers on Cyrix 6x86, 6x86L, and
912 * 6x86MX cpu's which can be used to specify one instruction as a serializing
913 * instruction. With the xchgl instruction serialized, LOCK# is still
914 * asserted, but it is the sole instruction for which LOCK# is asserted.
915 * There is now some added penalty for the xchgl instruction, but the usual
916 * bus locking is preserved. This ingenious workaround was discovered by
917 * disassembling a binary provided by Cyrix as a workaround for this bug on
918 * Windows, but its not documented anywhere by Cyrix, nor is the bug actually
919 * mentioned in any public errata! The only concern for this workaround is
920 * that there may be similar undiscovered bugs with other instructions that
921 * assert LOCK# that may be leveraged to similar ends. The fact that Cyrix
922 * fixed this bug sometime late in 1997 and no other exploits other than
923 * xchgl have been discovered is good indication that this workaround is
924 * reasonable.
925 */
926
927	.set	CYRIX_DBR0, 0x30	/ Debug Register 0
928	.set	CYRIX_DBR1, 0x31	/ Debug Register 1
929	.set	CYRIX_DBR2, 0x32	/ Debug Register 2
930	.set	CYRIX_DBR3, 0x33	/ Debug Register 3
931	.set	CYRIX_DOR, 0x3c		/ Debug Opcode Register
932
933	/*
934 	 * What is known about DBR1, DBR2, DBR3, and DOR is that for normal
935	 * cpu execution DBR1, DBR2, and DBR3 are set to 0. To obtain opcode
936	 * serialization, DBR1, DBR2, and DBR3 are loaded with 0xb8, 0x7f,
937	 * and 0xff. Then, DOR is loaded with the one byte opcode.
938	 */
939
940	/*
941	 * select CCR3
942	 */
943	movb	$CYRIX_CCR3, %al
944	outb	$CYRIX_CRI
945
946	/*
947	 * read CCR3 and mask out MAPEN
948	 */
949	inb	$CYRIX_CRD
950	andb	$0xf, %al
951
952	/*
953	 * save masked CCR3 in %ah
954	 */
955	movb	%al, %ah
956
957	/*
958	 * select CCR3
959	 */
960	movb	$CYRIX_CCR3, %al
961	outb	$CYRIX_CRI
962
963	/*
964	 * enable MAPEN
965	 */
966	movb	%ah, %al
967	orb	$0x10, %al
968	outb	$CYRIX_CRD
969
970	/*
971	 * read DBR0
972	 */
973	movb	$CYRIX_DBR0, %al
974	outb	$CYRIX_CRI
975	inb	$CYRIX_CRD
976
977	/*
978	 * disable MATCH and save in %bh
979	 */
980	orb	$0x80, %al
981	movb	%al, %bh
982
983	/*
984	 * write DBR0
985	 */
986	movb	$CYRIX_DBR0, %al
987	outb	$CYRIX_CRI
988	movb	%bh, %al
989	outb	$CYRIX_CRD
990
991	/*
992	 * write DBR1
993	 */
994	movb	$CYRIX_DBR1, %al
995	outb	$CYRIX_CRI
996	movb	$0xf8, %al
997	outb	$CYRIX_CRD
998
999	/*
1000	 * write DBR2
1001	 */
1002	movb	$CYRIX_DBR2, %al
1003	outb	$CYRIX_CRI
1004	movb	$0x7f, %al
1005	outb	$CYRIX_CRD
1006
1007	/*
1008	 * write DBR3
1009	 */
1010	movb	$CYRIX_DBR3, %al
1011	outb	$CYRIX_CRI
1012	xorb	%al, %al
1013	outb	$CYRIX_CRD
1014
1015	/*
1016	 * write DOR
1017	 */
1018	movb	$CYRIX_DOR, %al
1019	outb	$CYRIX_CRI
1020	movb	$0x87, %al
1021	outb	$CYRIX_CRD
1022
1023	/*
1024	 * enable MATCH
1025	 */
1026	movb	$CYRIX_DBR0, %al
1027	outb	$CYRIX_CRI
1028	movb	%bh, %al
1029	andb	$0x7f, %al
1030	outb	$CYRIX_CRD
1031
1032	/*
1033	 * disable MAPEN
1034	 */
1035	movb	$0xc3, %al
1036	outb	$CYRIX_CRI
1037	movb	%ah, %al
1038	outb	$CYRIX_CRD
1039
1040	jmp	cpu_done
1041
1042cpu_done:
1043
1044	popfl					/* Restore original FLAGS */
1045	popal					/* Restore all registers */
1046
1047#endif	/* !__xpv */
1048
1049	/*
1050	 *  mlsetup(%esp) gets called.
1051	 */
1052	pushl	%esp
1053	call	mlsetup
1054	addl	$4, %esp
1055
1056	/*
1057	 * We change our appearance to look like the real thread 0.
1058	 * (NOTE: making ourselves to be a real thread may be a noop)
1059	 * main() gets called.  (NOTE: main() never returns).
1060	 */
1061	call	main
1062	/* NOTREACHED */
1063	pushl	$__return_from_main
1064	call	panic
1065
1066	/* NOTREACHED */
1067cpu_486:
1068	pushl	$__unsupported_cpu
1069	call	panic
1070	SET_SIZE(_locore_start)
1071
1072#endif	/* __lint */
1073#endif	/* !__amd64 */
1074
1075
1076/*
1077 *  For stack layout, see privregs.h
1078 *  When cmntrap gets called, the error code and trap number have been pushed.
1079 *  When cmntrap_pushed gets called, the entire struct regs has been pushed.
1080 */
1081
1082#if defined(__lint)
1083
1084/* ARGSUSED */
1085void
1086cmntrap()
1087{}
1088
1089#else	/* __lint */
1090
1091	.globl	trap		/* C handler called below */
1092
1093#if defined(__amd64)
1094
1095	ENTRY_NP2(cmntrap, _cmntrap)
1096
1097	INTR_PUSH
1098
1099	ALTENTRY(cmntrap_pushed)
1100
1101	movq	%rsp, %rbp
1102
1103	/*
1104	 * - if this is a #pf i.e. T_PGFLT, %r15 is live
1105	 *   and contains the faulting address i.e. a copy of %cr2
1106	 *
1107	 * - if this is a #db i.e. T_SGLSTP, %r15 is live
1108	 *   and contains the value of %db6
1109	 */
1110
1111	TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
1112	TRACE_REGS(%rdi, %rsp, %rbx, %rcx)	/* Uses label 9 */
1113	TRACE_STAMP(%rdi)		/* Clobbers %eax, %edx, uses 9 */
1114
1115	/*
1116	 * We must first check if DTrace has set its NOFAULT bit.  This
1117	 * regrettably must happen before the trap stack is recorded, because
1118	 * this requires a call to getpcstack() and may induce recursion if an
1119	 * fbt::getpcstack: enabling is inducing the bad load.
1120	 */
1121	movl	%gs:CPU_ID, %eax
1122	shlq	$CPU_CORE_SHIFT, %rax
1123	leaq	cpu_core(%rip), %r8
1124	addq	%r8, %rax
1125	movw	CPUC_DTRACE_FLAGS(%rax), %cx
1126	testw	$CPU_DTRACE_NOFAULT, %cx
1127	jnz	.dtrace_induced
1128
1129	TRACE_STACK(%rdi)
1130
1131	movq	%rbp, %rdi
1132	movq	%r15, %rsi
1133	movl	%gs:CPU_ID, %edx
1134
1135	/*
1136	 * We know that this isn't a DTrace non-faulting load; we can now safely
1137	 * reenable interrupts.  (In the case of pagefaults, we enter through an
1138	 * interrupt gate.)
1139	 */
1140	ENABLE_INTR_FLAGS
1141
1142	call	trap		/* trap(rp, addr, cpuid) handles all traps */
1143	jmp	_sys_rtt
1144
1145.dtrace_induced:
1146	cmpw	$KCS_SEL, REGOFF_CS(%rbp)	/* test CS for user-mode trap */
1147	jne	2f				/* if from user, panic */
1148
1149	cmpl	$T_PGFLT, REGOFF_TRAPNO(%rbp)
1150	je	0f
1151
1152	cmpl	$T_GPFLT, REGOFF_TRAPNO(%rbp)
1153	jne	3f				/* if not PF or GP, panic */
1154
1155	/*
1156	 * If we've taken a GPF, we don't (unfortunately) have the address that
1157	 * induced the fault.  So instead of setting the fault to BADADDR,
1158	 * we'll set the fault to ILLOP.
1159	 */
1160	orw	$CPU_DTRACE_ILLOP, %cx
1161	movw	%cx, CPUC_DTRACE_FLAGS(%rax)
1162	jmp	1f
11630:
1164	orw	$CPU_DTRACE_BADADDR, %cx
1165	movw	%cx, CPUC_DTRACE_FLAGS(%rax)	/* set fault to bad addr */
1166	movq	%r15, CPUC_DTRACE_ILLVAL(%rax)
1167					    /* fault addr is illegal value */
11681:
1169	movq	REGOFF_RIP(%rbp), %rdi
1170	movq	%rdi, %r12
1171	call	dtrace_instr_size
1172	addq	%rax, %r12
1173	movq	%r12, REGOFF_RIP(%rbp)
1174	INTR_POP
1175	IRET
1176	/*NOTREACHED*/
11772:
1178	leaq	dtrace_badflags(%rip), %rdi
1179	xorl	%eax, %eax
1180	call	panic
11813:
1182	leaq	dtrace_badtrap(%rip), %rdi
1183	xorl	%eax, %eax
1184	call	panic
1185	SET_SIZE(cmntrap)
1186	SET_SIZE(_cmntrap)
1187
1188#elif defined(__i386)
1189
1190
1191	ENTRY_NP2(cmntrap, _cmntrap)
1192
1193	INTR_PUSH
1194
1195	ALTENTRY(cmntrap_pushed)
1196
1197	movl	%esp, %ebp
1198
1199	/*
1200	 * - if this is a #pf i.e. T_PGFLT, %esi is live
1201	 *   and contains the faulting address i.e. a copy of %cr2
1202	 *
1203	 * - if this is a #db i.e. T_SGLSTP, %esi is live
1204	 *   and contains the value of %db6
1205	 */
1206
1207	TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
1208	TRACE_REGS(%edi, %esp, %ebx, %ecx)	/* Uses label 9 */
1209	TRACE_STAMP(%edi)		/* Clobbers %eax, %edx, uses 9 */
1210
1211	/*
1212	 * We must first check if DTrace has set its NOFAULT bit.  This
1213	 * regrettably must happen before the trap stack is recorded, because
1214	 * this requires a call to getpcstack() and may induce recursion if an
1215	 * fbt::getpcstack: enabling is inducing the bad load.
1216	 */
1217	movl	%gs:CPU_ID, %eax
1218	shll	$CPU_CORE_SHIFT, %eax
1219	addl	$cpu_core, %eax
1220	movw	CPUC_DTRACE_FLAGS(%eax), %cx
1221	testw	$CPU_DTRACE_NOFAULT, %cx
1222	jnz	.dtrace_induced
1223
1224	TRACE_STACK(%edi)
1225
1226	pushl	%gs:CPU_ID
1227	pushl	%esi		/* fault address for PGFLTs */
1228	pushl	%ebp		/* &regs */
1229
1230	/*
1231	 * We know that this isn't a DTrace non-faulting load; we can now safely
1232	 * reenable interrupts.  (In the case of pagefaults, we enter through an
1233	 * interrupt gate.)
1234	 */
1235	ENABLE_INTR_FLAGS
1236
1237	call	trap		/* trap(rp, addr, cpuid) handles all traps */
1238	addl	$12, %esp	/* get argument off stack */
1239	jmp	_sys_rtt
1240
1241.dtrace_induced:
1242	cmpw	$KCS_SEL, REGOFF_CS(%ebp)	/* test CS for user-mode trap */
1243	jne	2f				/* if from user, panic */
1244
1245	cmpl	$T_PGFLT, REGOFF_TRAPNO(%ebp)
1246	je	0f
1247
1248	cmpl	$T_GPFLT, REGOFF_TRAPNO(%ebp)
1249	jne	3f				/* if not PF or GP, panic */
1250
1251	/*
1252	 * If we've taken a GPF, we don't (unfortunately) have the address that
1253	 * induced the fault.  So instead of setting the fault to BADADDR,
1254	 * we'll set the fault to ILLOP.
1255	 */
1256	orw	$CPU_DTRACE_ILLOP, %cx
1257	movw	%cx, CPUC_DTRACE_FLAGS(%eax)
1258	jmp	1f
12590:
1260	orw	$CPU_DTRACE_BADADDR, %cx
1261	movw	%cx, CPUC_DTRACE_FLAGS(%eax)	/* set fault to bad addr */
1262	movl	%esi, CPUC_DTRACE_ILLVAL(%eax)
1263					    /* fault addr is illegal value */
12641:
1265	pushl	REGOFF_EIP(%ebp)
1266	call	dtrace_instr_size
1267	addl	$4, %esp
1268	movl	REGOFF_EIP(%ebp), %ecx
1269	addl	%eax, %ecx
1270	movl	%ecx, REGOFF_EIP(%ebp)
1271	INTR_POP_KERNEL
1272	IRET
1273	/*NOTREACHED*/
12742:
1275	pushl	$dtrace_badflags
1276	call	panic
12773:
1278	pushl	$dtrace_badtrap
1279	call	panic
1280	SET_SIZE(cmntrap)
1281	SET_SIZE(_cmntrap)
1282
1283#endif	/* __i386 */
1284
1285/*
1286 * Declare a uintptr_t which has the size of _cmntrap to enable stack
1287 * traceback code to know when a regs structure is on the stack.
1288 */
1289	.globl	_cmntrap_size
1290	.align	CLONGSIZE
1291_cmntrap_size:
1292	.NWORD	. - _cmntrap
1293	.type	_cmntrap_size, @object
1294
1295dtrace_badflags:
1296	.string "bad DTrace flags"
1297
1298dtrace_badtrap:
1299	.string "bad DTrace trap"
1300
1301#endif	/* __lint */
1302
1303#if defined(__lint)
1304
1305/* ARGSUSED */
1306void
1307cmninttrap()
1308{}
1309
1310#if !defined(__xpv)
1311void
1312bop_trap_handler(void)
1313{}
1314#endif
1315
1316#else	/* __lint */
1317
1318	.globl	trap		/* C handler called below */
1319
1320#if defined(__amd64)
1321
1322	ENTRY_NP(cmninttrap)
1323
1324	INTR_PUSH
1325	INTGATE_INIT_KERNEL_FLAGS
1326
1327	TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
1328	TRACE_REGS(%rdi, %rsp, %rbx, %rcx)	/* Uses label 9 */
1329	TRACE_STAMP(%rdi)		/* Clobbers %eax, %edx, uses 9 */
1330
1331	movq	%rsp, %rbp
1332
1333	movl	%gs:CPU_ID, %edx
1334	xorl	%esi, %esi
1335	movq	%rsp, %rdi
1336	call	trap		/* trap(rp, addr, cpuid) handles all traps */
1337	jmp	_sys_rtt
1338	SET_SIZE(cmninttrap)
1339
1340#if !defined(__xpv)
1341	/*
1342	 * Handle traps early in boot. Just revectors into C quickly as
1343	 * these are always fatal errors.
1344	 *
1345	 * Adjust %rsp to get same stack layout as in 32bit mode for bop_trap().
1346	 */
1347	ENTRY(bop_trap_handler)
1348	movq	%rsp, %rdi
1349	sub	$8, %rsp
1350	call	bop_trap
1351	SET_SIZE(bop_trap_handler)
1352#endif
1353
1354#elif defined(__i386)
1355
1356	ENTRY_NP(cmninttrap)
1357
1358	INTR_PUSH
1359	INTGATE_INIT_KERNEL_FLAGS
1360
1361	TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
1362	TRACE_REGS(%edi, %esp, %ebx, %ecx)	/* Uses label 9 */
1363	TRACE_STAMP(%edi)		/* Clobbers %eax, %edx, uses 9 */
1364
1365	movl	%esp, %ebp
1366
1367	TRACE_STACK(%edi)
1368
1369	pushl	%gs:CPU_ID
1370	pushl	$0
1371	pushl	%ebp
1372	call	trap		/* trap(rp, addr, cpuid) handles all traps */
1373	addl	$12, %esp
1374	jmp	_sys_rtt
1375	SET_SIZE(cmninttrap)
1376
1377#if !defined(__xpv)
1378	/*
1379	 * Handle traps early in boot. Just revectors into C quickly as
1380	 * these are always fatal errors.
1381	 */
1382	ENTRY(bop_trap_handler)
1383	movl	%esp, %eax
1384	pushl	%eax
1385	call	bop_trap
1386	SET_SIZE(bop_trap_handler)
1387#endif
1388
1389#endif	/* __i386 */
1390
1391#endif	/* __lint */
1392
1393#if defined(__lint)
1394
1395/* ARGSUSED */
1396void
1397dtrace_trap()
1398{}
1399
1400#else	/* __lint */
1401
1402	.globl	dtrace_user_probe
1403
1404#if defined(__amd64)
1405
1406	ENTRY_NP(dtrace_trap)
1407
1408	INTR_PUSH
1409
1410	TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
1411	TRACE_REGS(%rdi, %rsp, %rbx, %rcx)	/* Uses label 9 */
1412	TRACE_STAMP(%rdi)		/* Clobbers %eax, %edx, uses 9 */
1413
1414	movq	%rsp, %rbp
1415
1416	movl	%gs:CPU_ID, %edx
1417#if defined(__xpv)
1418	movq	%gs:CPU_VCPU_INFO, %rsi
1419	movq	VCPU_INFO_ARCH_CR2(%rsi), %rsi
1420#else
1421	movq	%cr2, %rsi
1422#endif
1423	movq	%rsp, %rdi
1424
1425	ENABLE_INTR_FLAGS
1426
1427	call	dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */
1428	jmp	_sys_rtt
1429
1430	SET_SIZE(dtrace_trap)
1431
1432#elif defined(__i386)
1433
1434	ENTRY_NP(dtrace_trap)
1435
1436	INTR_PUSH
1437
1438	TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
1439	TRACE_REGS(%edi, %esp, %ebx, %ecx)	/* Uses label 9 */
1440	TRACE_STAMP(%edi)		/* Clobbers %eax, %edx, uses 9 */
1441
1442	movl	%esp, %ebp
1443
1444	pushl	%gs:CPU_ID
1445#if defined(__xpv)
1446	movl	%gs:CPU_VCPU_INFO, %eax
1447	movl	VCPU_INFO_ARCH_CR2(%eax), %eax
1448#else
1449	movl	%cr2, %eax
1450#endif
1451	pushl	%eax
1452	pushl	%ebp
1453
1454	ENABLE_INTR_FLAGS
1455
1456	call	dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */
1457	addl	$12, %esp		/* get argument off stack */
1458
1459	jmp	_sys_rtt
1460	SET_SIZE(dtrace_trap)
1461
1462#endif	/* __i386 */
1463
1464#endif	/* __lint */
1465
1466/*
1467 * Return from _sys_trap routine.
1468 */
1469
1470#if defined(__lint)
1471
1472void
1473lwp_rtt_initial(void)
1474{}
1475
1476void
1477lwp_rtt(void)
1478{}
1479
1480void
1481_sys_rtt(void)
1482{}
1483
1484#else	/* __lint */
1485
1486#if defined(__amd64)
1487
1488	ENTRY_NP(lwp_rtt_initial)
1489	movq	%gs:CPU_THREAD, %r15
1490	movq	T_STACK(%r15), %rsp	/* switch to the thread stack */
1491	movq	%rsp, %rbp
1492	call	__dtrace_probe___proc_start
1493	jmp	_lwp_rtt
1494
1495	ENTRY_NP(lwp_rtt)
1496
1497	/*
1498	 * r14	lwp
1499	 * rdx	lwp->lwp_procp
1500	 * r15	curthread
1501	 */
1502
1503	movq	%gs:CPU_THREAD, %r15
1504	movq	T_STACK(%r15), %rsp	/* switch to the thread stack */
1505	movq	%rsp, %rbp
1506_lwp_rtt:
1507	call	__dtrace_probe___proc_lwp__start
1508	movq	%gs:CPU_LWP, %r14
1509	movq	LWP_PROCP(%r14), %rdx
1510
1511	/*
1512	 * XX64	Is the stack misaligned correctly at this point?
1513	 *	If not, we need to do a push before calling anything ..
1514	 */
1515
1516#if defined(DEBUG)
1517	/*
1518	 * If we were to run lwp_savectx at this point -without-
1519	 * pcb_rupdate being set to 1, we'd end up sampling the hardware
1520	 * state left by the previous running lwp, rather than setting
1521	 * the values requested by the lwp creator.  Bad.
1522	 */
1523	testb	$0x1, PCB_RUPDATE(%r14)
1524	jne	1f
1525	leaq	_no_pending_updates(%rip), %rdi
1526	movl	$__LINE__, %esi
1527	movq	%r14, %rdx
1528	xorl	%eax, %eax
1529	call	panic
1530_no_pending_updates:
1531	.string	"locore.s:%d lwp_rtt(lwp %p) but pcb_rupdate != 1"
15321:
1533#endif
1534
1535	/*
1536	 * If agent lwp, clear %fs and %gs
1537	 */
1538	cmpq	%r15, P_AGENTTP(%rdx)
1539	jne	1f
1540	xorl	%ecx, %ecx
1541	movq	%rcx, REGOFF_FS(%rsp)
1542	movq	%rcx, REGOFF_GS(%rsp)
1543	movw	%cx, LWP_PCB_FS(%r14)
1544	movw	%cx, LWP_PCB_GS(%r14)
15451:
1546	call	dtrace_systrace_rtt
1547	movq	REGOFF_RDX(%rsp), %rsi
1548	movq	REGOFF_RAX(%rsp), %rdi
1549	call	post_syscall		/* post_syscall(rval1, rval2) */
1550
1551	/*
1552	 * set up to take fault on first use of fp
1553	 */
1554	STTS(%rdi)
1555
1556	/*
1557	 * XXX - may want a fast path that avoids sys_rtt_common in the
1558	 * most common case.
1559	 */
1560	ALTENTRY(_sys_rtt)
1561	CLI(%rax)			/* disable interrupts */
1562	ALTENTRY(_sys_rtt_ints_disabled)
1563	movq	%rsp, %rdi		/* pass rp to sys_rtt_common */
1564	call	sys_rtt_common		/* do common sys_rtt tasks */
1565	testq	%rax, %rax		/* returning to userland? */
1566	jz	sr_sup
1567
1568	/*
1569	 * Return to user
1570	 */
1571	ASSERT_UPCALL_MASK_IS_SET
1572	cmpw	$UCS_SEL, REGOFF_CS(%rsp) /* test for native (64-bit) lwp? */
1573	je	sys_rtt_syscall
1574
1575	/*
1576	 * Return to 32-bit userland
1577	 */
1578	ALTENTRY(sys_rtt_syscall32)
1579	USER32_POP
1580	IRET
1581	/*NOTREACHED*/
1582
1583	ALTENTRY(sys_rtt_syscall)
1584	/*
1585	 * Return to 64-bit userland
1586	 */
1587	USER_POP
1588	ALTENTRY(nopop_sys_rtt_syscall)
1589	IRET
1590	/*NOTREACHED*/
1591	SET_SIZE(nopop_sys_rtt_syscall)
1592
1593	/*
1594	 * Return to supervisor
1595	 * NOTE: to make the check in trap() that tests if we are executing
1596	 * segment register fixup/restore code work properly, sr_sup MUST be
1597	 * after _sys_rtt .
1598	 */
1599	ALTENTRY(sr_sup)
1600	/*
1601	 * Restore regs before doing iretq to kernel mode
1602	 */
1603	INTR_POP
1604	IRET
1605	.globl	_sys_rtt_end
1606_sys_rtt_end:
1607	/*NOTREACHED*/
1608	SET_SIZE(sr_sup)
1609	SET_SIZE(_sys_rtt_end)
1610	SET_SIZE(lwp_rtt)
1611	SET_SIZE(lwp_rtt_initial)
1612	SET_SIZE(_sys_rtt_ints_disabled)
1613	SET_SIZE(_sys_rtt)
1614	SET_SIZE(sys_rtt_syscall)
1615	SET_SIZE(sys_rtt_syscall32)
1616
1617#elif defined(__i386)
1618
1619	ENTRY_NP(lwp_rtt_initial)
1620	movl	%gs:CPU_THREAD, %eax
1621	movl	T_STACK(%eax), %esp	/* switch to the thread stack */
1622	movl	%esp, %ebp
1623	call	__dtrace_probe___proc_start
1624	jmp	_lwp_rtt
1625
1626	ENTRY_NP(lwp_rtt)
1627	movl	%gs:CPU_THREAD, %eax
1628	movl	T_STACK(%eax), %esp	/* switch to the thread stack */
1629	movl	%esp, %ebp
1630_lwp_rtt:
1631	call	__dtrace_probe___proc_lwp__start
1632
1633        /*
1634         * If agent lwp, clear %fs and %gs.
1635         */
1636        movl    %gs:CPU_LWP, %eax
1637        movl    LWP_PROCP(%eax), %edx
1638
1639        cmpl    %eax, P_AGENTTP(%edx)
1640        jne     1f
1641        movl    $0, REGOFF_FS(%esp)
1642        movl    $0, REGOFF_GS(%esp)
16431:
1644	call	dtrace_systrace_rtt
1645	movl	REGOFF_EDX(%esp), %edx
1646	movl	REGOFF_EAX(%esp), %eax
1647	pushl	%edx
1648	pushl	%eax
1649	call	post_syscall		/* post_syscall(rval1, rval2) */
1650	addl	$8, %esp
1651
1652	/*
1653	 * set up to take fault on first use of fp
1654	 */
1655	STTS(%eax)
1656
1657	/*
1658	 * XXX - may want a fast path that avoids sys_rtt_common in the
1659	 * most common case.
1660	 */
1661	ALTENTRY(_sys_rtt)
1662	CLI(%eax)			/* disable interrupts */
1663	ALTENTRY(_sys_rtt_ints_disabled)
1664	pushl	%esp			/* pass rp to sys_rtt_common */
1665	call	sys_rtt_common
1666	addl	$4, %esp		/* pop arg */
1667	testl	%eax, %eax		/* test for return to user mode */
1668	jz	sr_sup
1669
1670	/*
1671	 * Return to User.
1672	 */
1673	ALTENTRY(sys_rtt_syscall)
1674	INTR_POP_USER
1675
1676	/*
1677	 * There can be no instructions between this label and IRET or
1678	 * we could end up breaking linux brand support. See label usage
1679	 * in lx_brand_int80_callback for an example.
1680	 */
1681	ALTENTRY(nopop_sys_rtt_syscall)
1682	IRET
1683	/*NOTREACHED*/
1684	SET_SIZE(nopop_sys_rtt_syscall)
1685
1686	ALTENTRY(_sys_rtt_end)
1687
1688	/*
1689	 * Return to supervisor
1690	 */
1691	ALTENTRY(sr_sup)
1692
1693	/*
1694	 * Restore regs before doing iret to kernel mode
1695	 */
1696	INTR_POP_KERNEL
1697	IRET
1698	/*NOTREACHED*/
1699
1700	SET_SIZE(sr_sup)
1701	SET_SIZE(_sys_rtt_end)
1702	SET_SIZE(lwp_rtt)
1703	SET_SIZE(lwp_rtt_initial)
1704	SET_SIZE(_sys_rtt_ints_disabled)
1705	SET_SIZE(_sys_rtt)
1706	SET_SIZE(sys_rtt_syscall)
1707
1708#endif	/* __i386 */
1709
1710#endif	/* __lint */
1711
1712#if defined(__lint)
1713
1714/*
1715 * So why do we have to deal with all this crud in the world of ia32?
1716 *
1717 * Basically there are four classes of ia32 implementations, those that do not
1718 * have a TSC, those that have a marginal TSC that is broken to the extent
1719 * that it is useless, those that have a marginal TSC that is not quite so
1720 * horribly broken and can be used with some care, and those that have a
1721 * reliable TSC. This crud has to be here in order to sift through all the
1722 * variants.
1723 */
1724
1725/*ARGSUSED*/
1726uint64_t
1727freq_tsc(uint32_t *pit_counter)
1728{
1729	return (0);
1730}
1731
1732#else	/* __lint */
1733
1734#if defined(__amd64)
1735
1736	/*
1737	 * XX64 quick and dirty port from the i386 version. Since we
1738	 * believe the amd64 tsc is more reliable, could this code be
1739	 * simpler?
1740	 */
1741	ENTRY_NP(freq_tsc)
1742	pushq	%rbp
1743	movq	%rsp, %rbp
1744	movq	%rdi, %r9	/* save pit_counter */
1745	pushq	%rbx
1746
1747/ We have a TSC, but we have no way in general to know how reliable it is.
1748/ Usually a marginal TSC behaves appropriately unless not enough time
1749/ elapses between reads. A reliable TSC can be read as often and as rapidly
1750/ as desired. The simplistic approach of reading the TSC counter and
1751/ correlating to the PIT counter cannot be naively followed. Instead estimates
1752/ have to be taken to successively refine a guess at the speed of the cpu
1753/ and then the TSC and PIT counter are correlated. In practice very rarely
1754/ is more than one quick loop required for an estimate. Measures have to be
1755/ taken to prevent the PIT counter from wrapping beyond its resolution and for
1756/ measuring the clock rate of very fast processors.
1757/
1758/ The following constant can be tuned. It should be such that the loop does
1759/ not take too many nor too few PIT counts to execute. If this value is too
1760/ large, then on slow machines the loop will take a long time, or the PIT
1761/ counter may even wrap. If this value is too small, then on fast machines
1762/ the PIT counter may count so few ticks that the resolution of the PIT
1763/ itself causes a bad guess. Because this code is used in machines with
1764/ marginal TSC's and/or IO, if this value is too small on those, it may
1765/ cause the calculated cpu frequency to vary slightly from boot to boot.
1766/
1767/ In all cases even if this constant is set inappropriately, the algorithm
1768/ will still work and the caller should be able to handle variances in the
1769/ calculation of cpu frequency, but the calculation will be inefficient and
1770/ take a disproportionate amount of time relative to a well selected value.
1771/ As the slowest supported cpu becomes faster, this constant should be
1772/ carefully increased.
1773
1774	movl	$0x8000, %ecx
1775
1776	/ to make sure the instruction cache has been warmed
1777	clc
1778
1779	jmp	freq_tsc_loop
1780
1781/ The following block of code up to and including the latching of the PIT
1782/ counter after freq_tsc_perf_loop is very critical and very carefully
1783/ written, it should only be modified with great care. freq_tsc_loop to
1784/ freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in
1785/ freq_tsc_perf_loop up to the unlatching of the PIT counter.
1786
1787	.align	32
1788freq_tsc_loop:
1789	/ save the loop count in %ebx
1790	movl	%ecx, %ebx
1791
1792	/ initialize the PIT counter and start a count down
1793	movb	$PIT_LOADMODE, %al
1794	outb	$PITCTL_PORT
1795	movb	$0xff, %al
1796	outb	$PITCTR0_PORT
1797	outb	$PITCTR0_PORT
1798
1799	/ read the TSC and store the TS in %edi:%esi
1800	rdtsc
1801	movl	%eax, %esi
1802
1803freq_tsc_perf_loop:
1804	movl	%edx, %edi
1805	movl	%eax, %esi
1806	movl	%edx, %edi
1807	loop	freq_tsc_perf_loop
1808
1809	/ read the TSC and store the LSW in %ecx
1810	rdtsc
1811	movl	%eax, %ecx
1812
1813	/ latch the PIT counter and status
1814	movb	$_CONST(PIT_READBACK|PIT_READBACKC0), %al
1815	outb	$PITCTL_PORT
1816
1817	/ remember if the icache has been warmed
1818	setc	%ah
1819
1820	/ read the PIT status
1821	inb	$PITCTR0_PORT
1822	shll	$8, %eax
1823
1824	/ read PIT count
1825	inb	$PITCTR0_PORT
1826	shll	$8, %eax
1827	inb	$PITCTR0_PORT
1828	bswap	%eax
1829
1830	/ check to see if the PIT count was loaded into the CE
1831	btw	$_CONST(PITSTAT_NULLCNT+8), %ax
1832	jc	freq_tsc_increase_count
1833
1834	/ check to see if PIT counter wrapped
1835	btw	$_CONST(PITSTAT_OUTPUT+8), %ax
1836	jnc	freq_tsc_pit_did_not_wrap
1837
1838	/ halve count
1839	shrl	$1, %ebx
1840	movl	%ebx, %ecx
1841
1842	/ the instruction cache has been warmed
1843	stc
1844
1845	jmp	freq_tsc_loop
1846
1847freq_tsc_increase_count:
1848	shll	$1, %ebx
1849	jc	freq_tsc_too_fast
1850
1851	movl	%ebx, %ecx
1852
1853	/ the instruction cache has been warmed
1854	stc
1855
1856	jmp	freq_tsc_loop
1857
1858freq_tsc_pit_did_not_wrap:
1859	roll	$16, %eax
1860
1861	cmpw	$0x2000, %ax
1862	notw	%ax
1863	jb	freq_tsc_sufficient_duration
1864
1865freq_tsc_calculate:
1866	/ in mode 0, the PIT loads the count into the CE on the first CLK pulse,
1867	/ then on the second CLK pulse the CE is decremented, therefore mode 0
1868	/ is really a (count + 1) counter, ugh
1869	xorl	%esi, %esi
1870	movw	%ax, %si
1871	incl	%esi
1872
1873	movl	$0xf000, %eax
1874	mull	%ebx
1875
1876	/ tuck away (target_pit_count * loop_count)
1877	movl	%edx, %ecx
1878	movl	%eax, %ebx
1879
1880	movl	%esi, %eax
1881	movl	$0xffffffff, %edx
1882	mull	%edx
1883
1884	addl	%esi, %eax
1885	adcl	$0, %edx
1886
1887	cmpl	%ecx, %edx
1888	ja	freq_tsc_div_safe
1889	jb	freq_tsc_too_fast
1890
1891	cmpl	%ebx, %eax
1892	jbe	freq_tsc_too_fast
1893
1894freq_tsc_div_safe:
1895	movl	%ecx, %edx
1896	movl	%ebx, %eax
1897
1898	movl	%esi, %ecx
1899	divl	%ecx
1900
1901	movl	%eax, %ecx
1902
1903	/ the instruction cache has been warmed
1904	stc
1905
1906	jmp	freq_tsc_loop
1907
1908freq_tsc_sufficient_duration:
1909	/ test to see if the icache has been warmed
1910	btl	$16, %eax
1911	jnc	freq_tsc_calculate
1912
1913	/ recall mode 0 is a (count + 1) counter
1914	andl	$0xffff, %eax
1915	incl	%eax
1916
1917	/ save the number of PIT counts
1918	movl	%eax, (%r9)
1919
1920	/ calculate the number of TS's that elapsed
1921	movl	%ecx, %eax
1922	subl	%esi, %eax
1923	sbbl	%edi, %edx
1924
1925	jmp	freq_tsc_end
1926
1927freq_tsc_too_fast:
1928	/ return 0 as a 64 bit quantity
1929	xorl	%eax, %eax
1930	xorl	%edx, %edx
1931
1932freq_tsc_end:
1933	shlq	$32, %rdx
1934	orq	%rdx, %rax
1935
1936	popq	%rbx
1937	leaveq
1938	ret
1939	SET_SIZE(freq_tsc)
1940
1941#elif defined(__i386)
1942
1943	ENTRY_NP(freq_tsc)
1944	pushl	%ebp
1945	movl	%esp, %ebp
1946	pushl	%edi
1947	pushl	%esi
1948	pushl	%ebx
1949
1950/ We have a TSC, but we have no way in general to know how reliable it is.
1951/ Usually a marginal TSC behaves appropriately unless not enough time
1952/ elapses between reads. A reliable TSC can be read as often and as rapidly
1953/ as desired. The simplistic approach of reading the TSC counter and
1954/ correlating to the PIT counter cannot be naively followed. Instead estimates
1955/ have to be taken to successively refine a guess at the speed of the cpu
1956/ and then the TSC and PIT counter are correlated. In practice very rarely
1957/ is more than one quick loop required for an estimate. Measures have to be
1958/ taken to prevent the PIT counter from wrapping beyond its resolution and for
1959/ measuring the clock rate of very fast processors.
1960/
1961/ The following constant can be tuned. It should be such that the loop does
1962/ not take too many nor too few PIT counts to execute. If this value is too
1963/ large, then on slow machines the loop will take a long time, or the PIT
1964/ counter may even wrap. If this value is too small, then on fast machines
1965/ the PIT counter may count so few ticks that the resolution of the PIT
1966/ itself causes a bad guess. Because this code is used in machines with
1967/ marginal TSC's and/or IO, if this value is too small on those, it may
1968/ cause the calculated cpu frequency to vary slightly from boot to boot.
1969/
1970/ In all cases even if this constant is set inappropriately, the algorithm
1971/ will still work and the caller should be able to handle variances in the
1972/ calculation of cpu frequency, but the calculation will be inefficient and
1973/ take a disproportionate amount of time relative to a well selected value.
1974/ As the slowest supported cpu becomes faster, this constant should be
1975/ carefully increased.
1976
1977	movl	$0x8000, %ecx
1978
1979	/ to make sure the instruction cache has been warmed
1980	clc
1981
1982	jmp	freq_tsc_loop
1983
1984/ The following block of code up to and including the latching of the PIT
1985/ counter after freq_tsc_perf_loop is very critical and very carefully
1986/ written, it should only be modified with great care. freq_tsc_loop to
1987/ freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in
1988/ freq_tsc_perf_loop up to the unlatching of the PIT counter.
1989
1990	.align	32
1991freq_tsc_loop:
1992	/ save the loop count in %ebx
1993	movl	%ecx, %ebx
1994
1995	/ initialize the PIT counter and start a count down
1996	movb	$PIT_LOADMODE, %al
1997	outb	$PITCTL_PORT
1998	movb	$0xff, %al
1999	outb	$PITCTR0_PORT
2000	outb	$PITCTR0_PORT
2001
2002	/ read the TSC and store the TS in %edi:%esi
2003	rdtsc
2004	movl	%eax, %esi
2005
2006freq_tsc_perf_loop:
2007	movl	%edx, %edi
2008	movl	%eax, %esi
2009	movl	%edx, %edi
2010	loop	freq_tsc_perf_loop
2011
2012	/ read the TSC and store the LSW in %ecx
2013	rdtsc
2014	movl	%eax, %ecx
2015
2016	/ latch the PIT counter and status
2017	movb	$_CONST(PIT_READBACK|PIT_READBACKC0), %al
2018	outb	$PITCTL_PORT
2019
2020	/ remember if the icache has been warmed
2021	setc	%ah
2022
2023	/ read the PIT status
2024	inb	$PITCTR0_PORT
2025	shll	$8, %eax
2026
2027	/ read PIT count
2028	inb	$PITCTR0_PORT
2029	shll	$8, %eax
2030	inb	$PITCTR0_PORT
2031	bswap	%eax
2032
2033	/ check to see if the PIT count was loaded into the CE
2034	btw	$_CONST(PITSTAT_NULLCNT+8), %ax
2035	jc	freq_tsc_increase_count
2036
2037	/ check to see if PIT counter wrapped
2038	btw	$_CONST(PITSTAT_OUTPUT+8), %ax
2039	jnc	freq_tsc_pit_did_not_wrap
2040
2041	/ halve count
2042	shrl	$1, %ebx
2043	movl	%ebx, %ecx
2044
2045	/ the instruction cache has been warmed
2046	stc
2047
2048	jmp	freq_tsc_loop
2049
2050freq_tsc_increase_count:
2051	shll	$1, %ebx
2052	jc	freq_tsc_too_fast
2053
2054	movl	%ebx, %ecx
2055
2056	/ the instruction cache has been warmed
2057	stc
2058
2059	jmp	freq_tsc_loop
2060
2061freq_tsc_pit_did_not_wrap:
2062	roll	$16, %eax
2063
2064	cmpw	$0x2000, %ax
2065	notw	%ax
2066	jb	freq_tsc_sufficient_duration
2067
2068freq_tsc_calculate:
2069	/ in mode 0, the PIT loads the count into the CE on the first CLK pulse,
2070	/ then on the second CLK pulse the CE is decremented, therefore mode 0
2071	/ is really a (count + 1) counter, ugh
2072	xorl	%esi, %esi
2073	movw	%ax, %si
2074	incl	%esi
2075
2076	movl	$0xf000, %eax
2077	mull	%ebx
2078
2079	/ tuck away (target_pit_count * loop_count)
2080	movl	%edx, %ecx
2081	movl	%eax, %ebx
2082
2083	movl	%esi, %eax
2084	movl	$0xffffffff, %edx
2085	mull	%edx
2086
2087	addl	%esi, %eax
2088	adcl	$0, %edx
2089
2090	cmpl	%ecx, %edx
2091	ja	freq_tsc_div_safe
2092	jb	freq_tsc_too_fast
2093
2094	cmpl	%ebx, %eax
2095	jbe	freq_tsc_too_fast
2096
2097freq_tsc_div_safe:
2098	movl	%ecx, %edx
2099	movl	%ebx, %eax
2100
2101	movl	%esi, %ecx
2102	divl	%ecx
2103
2104	movl	%eax, %ecx
2105
2106	/ the instruction cache has been warmed
2107	stc
2108
2109	jmp	freq_tsc_loop
2110
2111freq_tsc_sufficient_duration:
2112	/ test to see if the icache has been warmed
2113	btl	$16, %eax
2114	jnc	freq_tsc_calculate
2115
2116	/ recall mode 0 is a (count + 1) counter
2117	andl	$0xffff, %eax
2118	incl	%eax
2119
2120	/ save the number of PIT counts
2121	movl	8(%ebp), %ebx
2122	movl	%eax, (%ebx)
2123
2124	/ calculate the number of TS's that elapsed
2125	movl	%ecx, %eax
2126	subl	%esi, %eax
2127	sbbl	%edi, %edx
2128
2129	jmp	freq_tsc_end
2130
2131freq_tsc_too_fast:
2132	/ return 0 as a 64 bit quantity
2133	xorl	%eax, %eax
2134	xorl	%edx, %edx
2135
2136freq_tsc_end:
2137	popl	%ebx
2138	popl	%esi
2139	popl	%edi
2140	popl	%ebp
2141	ret
2142	SET_SIZE(freq_tsc)
2143
2144#endif	/* __i386 */
2145#endif	/* __lint */
2146
2147#if !defined(__amd64)
2148#if defined(__lint)
2149
2150/*
2151 * We do not have a TSC so we use a block of instructions with well known
2152 * timings.
2153 */
2154
2155/*ARGSUSED*/
2156uint64_t
2157freq_notsc(uint32_t *pit_counter)
2158{
2159	return (0);
2160}
2161
2162#else	/* __lint */
2163	ENTRY_NP(freq_notsc)
2164	pushl	%ebp
2165	movl	%esp, %ebp
2166	pushl	%edi
2167	pushl	%esi
2168	pushl	%ebx
2169
2170	/ initial count for the idivl loop
2171	movl	$0x1000, %ecx
2172
2173	/ load the divisor
2174	movl	$1, %ebx
2175
2176	jmp	freq_notsc_loop
2177
2178.align	16
2179freq_notsc_loop:
2180	/ set high 32 bits of dividend to zero
2181	xorl	%edx, %edx
2182
2183	/ save the loop count in %edi
2184	movl	%ecx, %edi
2185
2186	/ initialize the PIT counter and start a count down
2187	movb	$PIT_LOADMODE, %al
2188	outb	$PITCTL_PORT
2189	movb	$0xff, %al
2190	outb	$PITCTR0_PORT
2191	outb	$PITCTR0_PORT
2192
2193	/ set low 32 bits of dividend to zero
2194	xorl	%eax, %eax
2195
2196/ It is vital that the arguments to idivl be set appropriately because on some
2197/ cpu's this instruction takes more or less clock ticks depending on its
2198/ arguments.
2199freq_notsc_perf_loop:
2200	idivl	%ebx
2201	idivl	%ebx
2202	idivl	%ebx
2203	idivl	%ebx
2204	idivl	%ebx
2205	loop	freq_notsc_perf_loop
2206
2207	/ latch the PIT counter and status
2208	movb	$_CONST(PIT_READBACK|PIT_READBACKC0), %al
2209	outb	$PITCTL_PORT
2210
2211	/ read the PIT status
2212	inb	$PITCTR0_PORT
2213	shll	$8, %eax
2214
2215	/ read PIT count
2216	inb	$PITCTR0_PORT
2217	shll	$8, %eax
2218	inb	$PITCTR0_PORT
2219	bswap	%eax
2220
2221	/ check to see if the PIT count was loaded into the CE
2222	btw	$_CONST(PITSTAT_NULLCNT+8), %ax
2223	jc	freq_notsc_increase_count
2224
2225	/ check to see if PIT counter wrapped
2226	btw	$_CONST(PITSTAT_OUTPUT+8), %ax
2227	jnc	freq_notsc_pit_did_not_wrap
2228
2229	/ halve count
2230	shrl	$1, %edi
2231	movl	%edi, %ecx
2232
2233	jmp	freq_notsc_loop
2234
2235freq_notsc_increase_count:
2236	shll	$1, %edi
2237	jc	freq_notsc_too_fast
2238
2239	movl	%edi, %ecx
2240
2241	jmp	freq_notsc_loop
2242
2243freq_notsc_pit_did_not_wrap:
2244	shrl	$16, %eax
2245
2246	cmpw	$0x2000, %ax
2247	notw	%ax
2248	jb	freq_notsc_sufficient_duration
2249
2250freq_notsc_calculate:
2251	/ in mode 0, the PIT loads the count into the CE on the first CLK pulse,
2252	/ then on the second CLK pulse the CE is decremented, therefore mode 0
2253	/ is really a (count + 1) counter, ugh
2254	xorl	%esi, %esi
2255	movw	%ax, %si
2256	incl	%esi
2257
2258	movl	%edi, %eax
2259	movl	$0xf000, %ecx
2260	mull	%ecx
2261
2262	/ tuck away (target_pit_count * loop_count)
2263	movl	%edx, %edi
2264	movl	%eax, %ecx
2265
2266	movl	%esi, %eax
2267	movl	$0xffffffff, %edx
2268	mull	%edx
2269
2270	addl	%esi, %eax
2271	adcl	$0, %edx
2272
2273	cmpl	%edi, %edx
2274	ja	freq_notsc_div_safe
2275	jb	freq_notsc_too_fast
2276
2277	cmpl	%ecx, %eax
2278	jbe	freq_notsc_too_fast
2279
2280freq_notsc_div_safe:
2281	movl	%edi, %edx
2282	movl	%ecx, %eax
2283
2284	movl	%esi, %ecx
2285	divl	%ecx
2286
2287	movl	%eax, %ecx
2288
2289	jmp	freq_notsc_loop
2290
2291freq_notsc_sufficient_duration:
2292	/ recall mode 0 is a (count + 1) counter
2293	incl	%eax
2294
2295	/ save the number of PIT counts
2296	movl	8(%ebp), %ebx
2297	movl	%eax, (%ebx)
2298
2299	/ calculate the number of cpu clock ticks that elapsed
2300	cmpl	$X86_VENDOR_Cyrix, x86_vendor
2301	jz	freq_notsc_notcyrix
2302
2303	/ freq_notsc_perf_loop takes 86 clock cycles on Cyrix 6x86 cores
2304	movl	$86, %eax
2305	jmp	freq_notsc_calculate_tsc
2306
2307freq_notsc_notcyrix:
2308	/ freq_notsc_perf_loop takes 237 clock cycles on Intel Pentiums
2309	movl	$237, %eax
2310
2311freq_notsc_calculate_tsc:
2312	mull	%edi
2313
2314	jmp	freq_notsc_end
2315
2316freq_notsc_too_fast:
2317	/ return 0 as a 64 bit quantity
2318	xorl	%eax, %eax
2319	xorl	%edx, %edx
2320
2321freq_notsc_end:
2322	popl	%ebx
2323	popl	%esi
2324	popl	%edi
2325	popl	%ebp
2326
2327	ret
2328	SET_SIZE(freq_notsc)
2329
2330#endif	/* __lint */
2331#endif	/* !__amd64 */
2332
2333#if !defined(__lint)
2334	.data
2335#if !defined(__amd64)
2336	.align	4
2337cpu_vendor:
2338	.long	0, 0, 0		/* Vendor ID string returned */
2339
2340	.globl	CyrixInstead
2341
2342	.globl	x86_featureset
2343	.globl	x86_type
2344	.globl	x86_vendor
2345#endif
2346
2347#endif	/* __lint */
2348