1/* 2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <i386/asm.h> 30#include <i386/apic.h> 31#include <i386/eflags.h> 32#include <i386/rtclock_asm.h> 33#include <i386/postcode.h> 34#include <i386/proc_reg.h> 35#include <assym.s> 36 37/* 38** ml_get_timebase() 39** 40** Entry - %esp contains pointer to 64 bit structure. 41** 42** Exit - 64 bit structure filled in. 43** 44*/ 45ENTRY(ml_get_timebase) 46 47 movl S_ARG0, %ecx 48 49 lfence 50 rdtsc 51 lfence 52 53 movl %edx, 0(%ecx) 54 movl %eax, 4(%ecx) 55 56 ret 57 58/* 59 * Convert between various timer units 60 * 61 * uint64_t tmrCvt(uint64_t time, uint64_t *conversion) 62 * 63 * This code converts 64-bit time units to other units. 64 * For example, the TSC is converted to HPET units. 65 * 66 * Time is a 64-bit integer that is some number of ticks. 67 * Conversion is 64-bit fixed point number which is composed 68 * of a 32 bit integer and a 32 bit fraction. 69 * 70 * The time ticks are multiplied by the conversion factor. The 71 * calculations are done as a 128-bit value but both the high 72 * and low words are dropped. The high word is overflow and the 73 * low word is the fraction part of the result. 74 * 75 * We return a 64-bit value. 76 * 77 * Note that we can use this function to multiply 2 conversion factors. 78 * We do this in order to calculate the multiplier used to convert 79 * directly between any two units. 80 * 81 */ 82 83 .globl EXT(tmrCvt) 84 .align FALIGN 85 86LEXT(tmrCvt) 87 88 pushl %ebp // Save a volatile 89 movl %esp,%ebp // Get the parameters - 8 90 pushl %ebx // Save a volatile 91 pushl %esi // Save a volatile 92 pushl %edi // Save a volatile 93 94// %ebp + 8 - low-order ts 95// %ebp + 12 - high-order ts 96// %ebp + 16 - low-order cvt 97// %ebp + 20 - high-order cvt 98 99 movl 8(%ebp),%eax // Get low-order ts 100 mull 16(%ebp) // Multiply by low-order conversion 101 movl %edx,%edi // Need to save only the high order part 102 103 movl 12(%ebp),%eax // Get the high-order ts 104 mull 16(%ebp) // Multiply by low-order conversion 105 addl %eax,%edi // Add in the overflow from the low x low calculation 106 adcl $0,%edx // Add in any overflow to high high part 107 movl %edx,%esi // Save high high part 108 109// We now have the upper 64 bits of the 96 bit multiply of ts and the low half of cvt 110// in %esi:%edi 111 112 movl 8(%ebp),%eax // Get low-order ts 113 mull 20(%ebp) // Multiply by high-order conversion 114 movl %eax,%ebx // Need to save the low order part 115 movl %edx,%ecx // Need to save the high order part 116 117 movl 12(%ebp),%eax // Get the high-order ts 118 mull 20(%ebp) // Multiply by high-order conversion 119 120// Now have %ecx:%ebx as low part of high low and %edx:%eax as high part of high high 121// We don't care about the highest word since it is overflow 122 123 addl %edi,%ebx // Add the low words 124 adcl %ecx,%esi // Add in the high plus carry from low 125 addl %eax,%esi // Add in the rest of the high 126 127 movl %ebx,%eax // Pass back low word 128 movl %esi,%edx // and the high word 129 130 popl %edi // Restore a volatile 131 popl %esi // Restore a volatile 132 popl %ebx // Restore a volatile 133 popl %ebp // Restore a volatile 134 135 ret // Leave... 136 137 138/* void _rtc_nanotime_adjust( 139 uint64_t tsc_base_delta, 140 rtc_nanotime_t *dst); 141*/ 142 .globl EXT(_rtc_nanotime_adjust) 143 .align FALIGN 144 145LEXT(_rtc_nanotime_adjust) 146 mov 12(%esp),%edx /* ptr to rtc_nanotime_info */ 147 148 movl RNT_GENERATION(%edx),%ecx /* get current generation */ 149 movl $0,RNT_GENERATION(%edx) /* flag data as being updated */ 150 151 movl 4(%esp),%eax /* get lower 32-bits of delta */ 152 addl %eax,RNT_TSC_BASE(%edx) 153 adcl $0,RNT_TSC_BASE+4(%edx) /* propagate carry */ 154 155 incl %ecx /* next generation */ 156 jnz 1f 157 incl %ecx /* skip 0, which is a flag */ 1581: movl %ecx,RNT_GENERATION(%edx) /* update generation and make usable */ 159 160 ret 161 162 163/* unint64_t _rtc_nanotime_read( rtc_nanotime_t *rntp, int slow ); 164 * 165 * This is the same as the commpage nanotime routine, except that it uses the 166 * kernel internal "rtc_nanotime_info" data instead of the commpage data. The two copies 167 * of data (one in the kernel and one in user space) are kept in sync by rtc_clock_napped(). 168 * 169 * Warning! There is another copy of this code in osfmk/i386/locore.s. The 170 * two versions must be kept in sync with each other! 171 * 172 * There are actually two versions of the algorithm, one each for "slow" and "fast" 173 * processors. The more common "fast" algorithm is: 174 * 175 * nanoseconds = (((rdtsc - rnt_tsc_base) * rnt_tsc_scale) / 2**32) - rnt_ns_base; 176 * 177 * Of course, the divide by 2**32 is a nop. rnt_tsc_scale is a constant computed during initialization: 178 * 179 * rnt_tsc_scale = (10e9 * 2**32) / tscFreq; 180 * 181 * The "slow" algorithm uses long division: 182 * 183 * nanoseconds = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) - rnt_ns_base; 184 * 185 * Since this routine is not synchronized and can be called in any context, 186 * we use a generation count to guard against seeing partially updated data. In addition, 187 * the _rtc_nanotime_store() routine -- just above -- zeroes the generation before 188 * updating the data, and stores the nonzero generation only after all other data has been 189 * stored. Because IA32 guarantees that stores by one processor must be seen in order 190 * by another, we can avoid using a lock. We spin while the generation is zero. 191 * 192 * In accordance with the ABI, we return the 64-bit nanotime in %edx:%eax. 193 */ 194 195 .globl EXT(_rtc_nanotime_read) 196 .align FALIGN 197LEXT(_rtc_nanotime_read) 198 pushl %ebp 199 movl %esp,%ebp 200 pushl %esi 201 pushl %edi 202 pushl %ebx 203 movl 8(%ebp),%edi /* get ptr to rtc_nanotime_info */ 204 movl 12(%ebp),%eax /* get "slow" flag */ 205 testl %eax,%eax 206 jnz Lslow 207 208 /* Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD */ 209 PAL_RTC_NANOTIME_READ_FAST() 210 211 popl %ebx 212 popl %edi 213 popl %esi 214 popl %ebp 215 ret 216 217 /* Processor whose TSC frequency is slower than or equal to SLOW_TSC_THRESHOLD */ 218Lslow: 219 movl RNT_GENERATION(%edi),%esi /* get generation (0 if being changed) */ 220 testl %esi,%esi /* if being changed, loop until stable */ 221 jz Lslow 222 pushl %esi /* save generation */ 223 pushl RNT_SHIFT(%edi) /* save low 32 bits of tscFreq */ 224 225 lfence 226 rdtsc /* get TSC in %edx:%eax */ 227 lfence 228 subl RNT_TSC_BASE(%edi),%eax 229 sbbl RNT_TSC_BASE+4(%edi),%edx 230 231 /* 232 * Do the math to convert tsc ticks to nanoseconds. We first 233 * do long multiply of 1 billion times the tsc. Then we do 234 * long division by the tsc frequency 235 */ 236 mov $1000000000, %ecx /* number of nanoseconds in a second */ 237 mov %edx, %ebx 238 mul %ecx 239 mov %edx, %edi 240 mov %eax, %esi 241 mov %ebx, %eax 242 mul %ecx 243 add %edi, %eax 244 adc $0, %edx /* result in edx:eax:esi */ 245 mov %eax, %edi 246 popl %ecx /* get low 32 tscFreq */ 247 xor %eax, %eax 248 xchg %edx, %eax 249 div %ecx 250 xor %eax, %eax 251 mov %edi, %eax 252 div %ecx 253 mov %eax, %ebx 254 mov %esi, %eax 255 div %ecx 256 mov %ebx, %edx /* result in edx:eax */ 257 258 movl 8(%ebp),%edi /* recover ptr to rtc_nanotime_info */ 259 popl %esi /* recover generation */ 260 261 addl RNT_NS_BASE(%edi),%eax 262 adcl RNT_NS_BASE+4(%edi),%edx 263 264 cmpl RNT_GENERATION(%edi),%esi /* have the parameters changed? */ 265 jne Lslow /* yes, loop until stable */ 266 267 pop %ebx 268 pop %edi 269 pop %esi 270 pop %ebp 271 ret /* result in edx:eax */ 272 273 274 275/* 276 * Timing routines. 277 */ 278Entry(timer_update) 279 movl 4(%esp),%ecx 280 movl 8(%esp),%eax 281 movl 12(%esp),%edx 282 movl %eax,TIMER_HIGHCHK(%ecx) 283 movl %edx,TIMER_LOW(%ecx) 284 movl %eax,TIMER_HIGH(%ecx) 285 ret 286 287Entry(timer_grab) 288 movl 4(%esp),%ecx 2890: movl TIMER_HIGH(%ecx),%edx 290 movl TIMER_LOW(%ecx),%eax 291 cmpl TIMER_HIGHCHK(%ecx),%edx 292 jne 0b 293 ret 294 295 296Entry(call_continuation) 297 movl S_ARG0,%eax /* get continuation */ 298 movl S_ARG1,%edx /* continuation param */ 299 movl S_ARG2,%ecx /* wait result */ 300 movl %gs:CPU_KERNEL_STACK,%esp /* pop the stack */ 301 xorl %ebp,%ebp /* zero frame pointer */ 302 subl $8,%esp /* align the stack */ 303 pushl %ecx 304 pushl %edx 305 call *%eax /* call continuation */ 306 addl $16,%esp 307 movl %gs:CPU_ACTIVE_THREAD,%eax 308 pushl %eax 309 call EXT(thread_terminate) 310 311 312Entry(ml_early_random) 313 xor %eax, %eax 314 ret 315