1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <i386/asm.h>
30#include <i386/rtclock.h>
31#include <i386/proc_reg.h>
32#include <i386/eflags.h>
33
34#include <i386/postcode.h>
35#include <i386/apic.h>
36#include <assym.s>
37
38/*
39**      ml_get_timebase()
40**
41**      Entry   - %esp contains pointer to 64 bit structure.
42**
43**      Exit    - 64 bit structure filled in.
44**
45*/
46ENTRY(ml_get_timebase)
47
48			movl    S_ARG0, %ecx
49
50			lfence
51			rdtsc
52			lfence
53
54			movl    %edx, 0(%ecx)
55			movl    %eax, 4(%ecx)
56
57			ret
58
59/*
60 *  	Convert between various timer units
61 *
62 *		uint64_t tmrCvt(uint64_t time, uint64_t *conversion)
63 *
64 *		This code converts 64-bit time units to other units.
65 *		For example, the TSC is converted to HPET units.
66 *
67 *		Time is a 64-bit integer that is some number of ticks.
68 *		Conversion is 64-bit fixed point number which is composed
69 *		of a 32 bit integer and a 32 bit fraction.
70 *
71 *		The time ticks are multiplied by the conversion factor.  The
72 *		calculations are done as a 128-bit value but both the high
73 *		and low words are dropped.  The high word is overflow and the
74 *		low word is the fraction part of the result.
75 *
76 *		We return a 64-bit value.
77 *
78 *		Note that we can use this function to multiply 2 conversion factors.
79 *		We do this in order to calculate the multiplier used to convert
80 *		directly between any two units.
81 *
82 */
83
84			.globl	EXT(tmrCvt)
85			.align FALIGN
86
87LEXT(tmrCvt)
88
89			pushl	%ebp					// Save a volatile
90			movl	%esp,%ebp				// Get the parameters - 8
91			pushl	%ebx					// Save a volatile
92			pushl	%esi					// Save a volatile
93			pushl	%edi					// Save a volatile
94
95//			%ebp + 8	- low-order ts
96//			%ebp + 12	- high-order ts
97//			%ebp + 16	- low-order cvt
98//			%ebp + 20	- high-order cvt
99
100			movl	8(%ebp),%eax			// Get low-order ts
101			mull	16(%ebp)				// Multiply by low-order conversion
102			movl	%edx,%edi				// Need to save only the high order part
103
104			movl	12(%ebp),%eax			// Get the high-order ts
105			mull	16(%ebp)				// Multiply by low-order conversion
106			addl	%eax,%edi				// Add in the overflow from the low x low calculation
107			adcl	$0,%edx					// Add in any overflow to high high part
108			movl	%edx,%esi				// Save high high part
109
110//			We now have the upper 64 bits of the 96 bit multiply of ts and the low half of cvt
111//			in %esi:%edi
112
113			movl	8(%ebp),%eax			// Get low-order ts
114			mull	20(%ebp)				// Multiply by high-order conversion
115			movl	%eax,%ebx				// Need to save the low order part
116			movl	%edx,%ecx				// Need to save the high order part
117
118			movl	12(%ebp),%eax			// Get the high-order ts
119			mull	20(%ebp)				// Multiply by high-order conversion
120
121//			Now have %ecx:%ebx as low part of high low and %edx:%eax as high part of high high
122//			We don't care about the highest word since it is overflow
123
124			addl	%edi,%ebx				// Add the low words
125			adcl	%ecx,%esi				// Add in the high plus carry from low
126			addl	%eax,%esi				// Add in the rest of the high
127
128			movl	%ebx,%eax				// Pass back low word
129			movl	%esi,%edx				// and the high word
130
131			popl	%edi					// Restore a volatile
132			popl	%esi					// Restore a volatile
133			popl	%ebx					// Restore a volatile
134			popl	%ebp					// Restore a volatile
135
136			ret						// Leave...
137
138			.globl	EXT(_rtc_nanotime_store)
139			.align	FALIGN
140
141LEXT(_rtc_nanotime_store)
142		push		%ebp
143		movl		%esp,%ebp
144		push		%esi
145
146		mov		32(%ebp),%edx				/* get ptr to rtc_nanotime_info */
147
148		movl		RNT_GENERATION(%edx),%esi		/* get current generation */
149		movl		$0,RNT_GENERATION(%edx)			/* flag data as being updated */
150
151		mov		8(%ebp),%eax
152		mov		%eax,RNT_TSC_BASE(%edx)
153		mov		12(%ebp),%eax
154		mov		%eax,RNT_TSC_BASE+4(%edx)
155
156		mov		24(%ebp),%eax
157		mov		%eax,RNT_SCALE(%edx)
158
159		mov		28(%ebp),%eax
160		mov		%eax,RNT_SHIFT(%edx)
161
162		mov		16(%ebp),%eax
163		mov		%eax,RNT_NS_BASE(%edx)
164		mov		20(%ebp),%eax
165		mov		%eax,RNT_NS_BASE+4(%edx)
166
167		incl		%esi					/* next generation */
168		jnz		1f
169		incl		%esi					/* skip 0, which is a flag */
1701:		movl		%esi,RNT_GENERATION(%edx)		/* update generation and make usable */
171
172		pop		%esi
173		pop		%ebp
174		ret
175
176
177/* unint64_t _rtc_nanotime_read( rtc_nanotime_t *rntp, int slow );
178 *
179 * This is the same as the commpage nanotime routine, except that it uses the
180 * kernel internal "rtc_nanotime_info" data instead of the commpage data.  The two copies
181 * of data (one in the kernel and one in user space) are kept in sync by rtc_clock_napped().
182 *
183 * Warning!  There is another copy of this code in osfmk/i386/locore.s.  The
184 * two versions must be kept in sync with each other!
185 *
186 * There are actually two versions of the algorithm, one each for "slow" and "fast"
187 * processors.  The more common "fast" algorithm is:
188 *
189 *	nanoseconds = (((rdtsc - rnt_tsc_base) * rnt_tsc_scale) / 2**32) - rnt_ns_base;
190 *
191 * Of course, the divide by 2**32 is a nop.  rnt_tsc_scale is a constant computed during initialization:
192 *
193 *	rnt_tsc_scale = (10e9 * 2**32) / tscFreq;
194 *
195 * The "slow" algorithm uses long division:
196 *
197 *	nanoseconds = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) - rnt_ns_base;
198 *
199 * Since this routine is not synchronized and can be called in any context,
200 * we use a generation count to guard against seeing partially updated data.  In addition,
201 * the _rtc_nanotime_store() routine -- just above -- zeroes the generation before
202 * updating the data, and stores the nonzero generation only after all other data has been
203 * stored.  Because IA32 guarantees that stores by one processor must be seen in order
204 * by another, we can avoid using a lock.  We spin while the generation is zero.
205 *
206 * In accordance with the ABI, we return the 64-bit nanotime in %edx:%eax.
207 */
208
209		.globl	EXT(_rtc_nanotime_read)
210		.align	FALIGN
211LEXT(_rtc_nanotime_read)
212		pushl		%ebp
213		movl		%esp,%ebp
214		pushl		%esi
215		pushl		%edi
216		pushl		%ebx
217		movl		8(%ebp),%edi				/* get ptr to rtc_nanotime_info */
218		movl		12(%ebp),%eax				/* get "slow" flag */
219		testl		%eax,%eax
220		jnz		Lslow
221
222		/* Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD */
223		RTC_NANOTIME_READ_FAST()
224
225		popl		%ebx
226		popl		%edi
227		popl		%esi
228		popl		%ebp
229		ret
230
231		/* Processor whose TSC frequency is slower than or equal to SLOW_TSC_THRESHOLD */
232Lslow:
233		movl		RNT_GENERATION(%edi),%esi		/* get generation (0 if being changed) */
234		testl		%esi,%esi				/* if being changed, loop until stable */
235		jz		Lslow
236		pushl		%esi					/* save generation */
237		pushl		RNT_SHIFT(%edi)				/* save low 32 bits of tscFreq */
238
239		lfence
240		rdtsc	  						/* get TSC in %edx:%eax */
241		lfence
242		subl		RNT_TSC_BASE(%edi),%eax
243		sbbl		RNT_TSC_BASE+4(%edi),%edx
244
245		/*
246		* Do the math to convert tsc ticks to nanoseconds.  We first
247		* do long multiply of 1 billion times the tsc.  Then we do
248		* long division by the tsc frequency
249		*/
250		mov		$1000000000, %ecx			/* number of nanoseconds in a second */
251		mov		%edx, %ebx
252		mul		%ecx
253		mov		%edx, %edi
254		mov		%eax, %esi
255		mov		%ebx, %eax
256		mul		%ecx
257		add		%edi, %eax
258		adc		$0, %edx				/* result in edx:eax:esi */
259		mov		%eax, %edi
260		popl		%ecx					/* get low 32 tscFreq */
261		xor		%eax, %eax
262		xchg		%edx, %eax
263		div		%ecx
264		xor		%eax, %eax
265		mov		%edi, %eax
266		div		%ecx
267		mov		%eax, %ebx
268		mov		%esi, %eax
269		div		%ecx
270		mov		%ebx, %edx				/* result in edx:eax */
271
272		movl		8(%ebp),%edi				/* recover ptr to rtc_nanotime_info */
273		popl		%esi					/* recover generation */
274
275		addl		RNT_NS_BASE(%edi),%eax
276		adcl		RNT_NS_BASE+4(%edi),%edx
277
278		cmpl		RNT_GENERATION(%edi),%esi		/* have the parameters changed? */
279		jne		Lslow					/* yes, loop until stable */
280
281		pop		%ebx
282		pop		%edi
283		pop		%esi
284		pop		%ebp
285		ret							/* result in edx:eax */
286
287