1/*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <i386/asm.h>
30#include <i386/rtclock_asm.h>
31#include <i386/proc_reg.h>
32#include <i386/eflags.h>
33
34#include <i386/postcode.h>
35#include <i386/apic.h>
36#include <i386/vmx/vmx_asm.h>
37#include <assym.s>
38
39/*
40**      ml_get_timebase()
41**
42**      Entry   - %rdi contains pointer to 64 bit structure.
43**
44**      Exit    - 64 bit structure filled in.
45**
46*/
47ENTRY(ml_get_timebase)
48
49	lfence
50	rdtsc
51	lfence
52        shlq	$32,%rdx
53        orq	%rdx,%rax
54	movq    %rax, (%rdi)
55
56	ret
57
58/*
59 *  	Convert between various timer units
60 *
61 *	This code converts 64-bit time units to other units.
62 *	For example, the TSC is converted to HPET units.
63 *
64 *	Time is a 64-bit integer that is some number of ticks.
65 *	Conversion is 64-bit fixed point number which is composed
66 *	of a 32 bit integer and a 32 bit fraction.
67 *
68 *	The time ticks are multiplied by the conversion factor.  The
69 *	calculations are done as a 128-bit value but both the high
70 *	and low words are dropped.  The high word is overflow and the
71 *	low word is the fraction part of the result.
72 *
73 *	We return a 64-bit value.
74 *
75 *	Note that we can use this function to multiply 2 conversion factors.
76 *	We do this in order to calculate the multiplier used to convert
77 *	directly between any two units.
78 *
79 *	uint64_t tmrCvt(uint64_t time,		// %rdi
80 *			uint64_t conversion)	// %rsi
81 *
82 */
83ENTRY(tmrCvt)
84	movq	%rdi,%rax
85	mulq	%rsi				/* result is %rdx:%rax */
86	shrdq   $32,%rdx,%rax			/* %rdx:%rax >>= 32 */
87	ret
88
89 /*
90 * void _rtc_nanotime_adjust(
91 *		uint64_t        tsc_base_delta,	// %rdi
92 *		rtc_nanotime_t  *dst);		// %rsi
93 */
94ENTRY(_rtc_nanotime_adjust)
95	movl	RNT_GENERATION(%rsi),%eax	/* get current generation */
96	movl	$0,RNT_GENERATION(%rsi)		/* flag data as being updated */
97	addq	%rdi,RNT_TSC_BASE(%rsi)
98
99	incl	%eax				/* next generation */
100	jnz	1f
101	incl	%eax				/* skip 0, which is a flag */
1021:	movl	%eax,RNT_GENERATION(%rsi)	/* update generation */
103
104	ret
105
106/*
107 * uint64_t _rtc_nanotime_read(rtc_nanotime_t *rntp);
108 *
109 * This is the same as the commpage nanotime routine, except that it uses the
110 * kernel internal "rtc_nanotime_info" data instead of the commpage data.
111 * These two copies of data are kept in sync by rtc_clock_napped().
112 *
113 * Warning!  There are several copies of this code in the trampolines found in
114 * osfmk/x86_64/idt64.s, coming from the various TIMER macros in rtclock_asm.h.
115 * They're all kept in sync by using the RTC_NANOTIME_READ() macro.
116 *
117 * The algorithm we use is:
118 *
119 *	ns = ((((rdtsc - rnt_tsc_base)<<rnt_shift)*rnt_tsc_scale) / 2**32) + rnt_ns_base;
120 *
121 * rnt_shift, a constant computed during initialization, is the smallest value for which:
122 *
123 *	(tscFreq << rnt_shift) > SLOW_TSC_THRESHOLD
124 *
125 * Where SLOW_TSC_THRESHOLD is about 10e9.  Since most processor's tscFreqs are greater
126 * than 1GHz, rnt_shift is usually 0.  rnt_tsc_scale is also a 32-bit constant:
127 *
128 *	rnt_tsc_scale = (10e9 * 2**32) / (tscFreq << rnt_shift);
129 *
130 * On 64-bit processors this algorithm could be simplified by doing a 64x64 bit
131 * multiply of rdtsc by tscFCvtt2n:
132 *
133 *	ns = (((rdtsc - rnt_tsc_base) * tscFCvtt2n) / 2**32) + rnt_ns_base;
134 *
135 * We don't do so in order to use the same algorithm in 32- and 64-bit mode.
136 * When U32 goes away, we should reconsider.
137 *
138 * Since this routine is not synchronized and can be called in any context,
139 * we use a generation count to guard against seeing partially updated data.
140 * In addition, the _rtc_nanotime_store() routine zeroes the generation before
141 * updating the data, and stores the nonzero generation only after all fields
142 * have been stored.  Because IA32 guarantees that stores by one processor
143 * must be seen in order by another, we can avoid using a lock.  We spin while
144 * the generation is zero.
145 *
146 * unint64_t _rtc_nanotime_read(
147 *			rtc_nanotime_t *rntp);		// %rdi
148 *
149 */
150ENTRY(_rtc_nanotime_read)
151
152	PAL_RTC_NANOTIME_READ_FAST()
153
154	ret
155
156/*
157 * extern uint64_t _rtc_tsc_to_nanoseconds(
158 *          uint64_t    value,              // %rdi
159 *          pal_rtc_nanotime_t	*rntp);     // %rsi
160 *
161 * Converts TSC units to nanoseconds, using an abbreviated form of the above
162 * algorithm.  Note that while we could have simply used tmrCvt(value,tscFCvtt2n),
163 * which would avoid the need for this asm, doing so is a bit more risky since
164 * we'd be using a different algorithm with possibly different rounding etc.
165 */
166
167ENTRY(_rtc_tsc_to_nanoseconds)
168	movq    %rdi,%rax			/* copy value (in TSC units) to convert */
169	movl    RNT_SHIFT(%rsi),%ecx
170	movl    RNT_SCALE(%rsi),%edx
171	shlq    %cl,%rax			/* tscUnits << shift */
172	mulq    %rdx				/* (tscUnits << shift) * scale */
173	shrdq   $32,%rdx,%rax			/* %rdx:%rax >>= 32 */
174	ret
175
176
177
178Entry(call_continuation)
179	movq	%rdi,%rcx			/* get continuation */
180	movq	%rsi,%rdi			/* continuation param */
181	movq	%rdx,%rsi			/* wait result */
182	movq	%gs:CPU_KERNEL_STACK,%rsp	/* set the stack */
183	xorq	%rbp,%rbp			/* zero frame pointer */
184	call	*%rcx				/* call continuation */
185	movq	%gs:CPU_ACTIVE_THREAD,%rdi
186	call	EXT(thread_terminate)
187
188Entry(x86_init_wrapper)
189	xor	%rbp, %rbp
190	movq	%rsi, %rsp
191	callq	*%rdi
192
193	/*
194	* Generate a 64-bit quantity with possibly random characteristics, intended for use
195	* before the kernel entropy pool is available. The processor's RNG is used if
196	* available, and a value derived from the Time Stamp Counter is returned if not.
197	* Multiple invocations may result in well-correlated values if sourced from the TSC.
198	*/
199Entry(ml_early_random)
200	mov	%rbx, %rsi
201	mov	$1, %eax
202	cpuid
203	mov	%rsi, %rbx
204	test	$(1 << 30), %ecx
205	jz	Lnon_rdrand
206	RDRAND_RAX		/* RAX := 64 bits of DRBG entropy */
207	jnc	Lnon_rdrand
208	ret
209Lnon_rdrand:
210	rdtsc /* EDX:EAX := TSC */
211	/* Distribute low order bits */
212	mov	%eax, %ecx
213	xor	%al, %ah
214	shl	$16, %rcx
215	xor	%rcx, %rax
216	xor	%eax, %edx
217
218	/* Incorporate ASLR entropy, if any */
219	lea	(%rip), %rcx
220	shr	$21, %rcx
221	movzbl	%cl, %ecx
222	shl	$16, %ecx
223	xor	%ecx, %edx
224
225	mov	%ah, %cl
226	ror	%cl, %edx /* Right rotate EDX (TSC&0xFF ^ (TSC>>8 & 0xFF))&1F */
227	shl	$32, %rdx
228	xor	%rdx, %rax
229	mov	%cl, %al
230	ret
231
232#if CONFIG_VMX
233
234/*
235 *	__vmxon -- Enter VMX Operation
236 *	int __vmxon(addr64_t v);
237 */
238Entry(__vmxon)
239	FRAME
240	push	%rdi
241
242	mov	$(VMX_FAIL_INVALID), %ecx
243	mov	$(VMX_FAIL_VALID), %edx
244	mov	$(VMX_SUCCEED), %eax
245	vmxon	(%rsp)
246	cmovcl 	%ecx, %eax	/* CF = 1, ZF = 0 */
247	cmovzl	%edx, %eax	/* CF = 0, ZF = 1 */
248
249	pop	%rdi
250	EMARF
251	ret
252
253/*
254 *	__vmxoff -- Leave VMX Operation
255 *	int __vmxoff(void);
256 */
257Entry(__vmxoff)
258	FRAME
259
260	mov	$(VMX_FAIL_INVALID), %ecx
261	mov	$(VMX_FAIL_VALID), %edx
262	mov	$(VMX_SUCCEED), %eax
263	vmxoff
264	cmovcl 	%ecx, %eax	/* CF = 1, ZF = 0 */
265	cmovzl	%edx, %eax	/* CF = 0, ZF = 1 */
266
267	EMARF
268	ret
269
270#endif /* CONFIG_VMX */
271
272/*
273 *	mfence -- Memory Barrier
274 *	Use out-of-line assembly to get
275 *	standard x86-64 ABI guarantees
276 *	about what the caller's codegen
277 *	has in registers vs. memory
278 */
279Entry(do_mfence)
280	mfence
281	ret
282