1/* 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <i386/asm.h> 30#include <i386/rtclock.h> 31#include <i386/proc_reg.h> 32#include <i386/eflags.h> 33 34#include <i386/postcode.h> 35#include <i386/apic.h> 36#include <assym.s> 37 38/* 39** ml_get_timebase() 40** 41** Entry - %esp contains pointer to 64 bit structure. 42** 43** Exit - 64 bit structure filled in. 44** 45*/ 46ENTRY(ml_get_timebase) 47 48 movl S_ARG0, %ecx 49 50 lfence 51 rdtsc 52 lfence 53 54 movl %edx, 0(%ecx) 55 movl %eax, 4(%ecx) 56 57 ret 58 59/* 60 * Convert between various timer units 61 * 62 * uint64_t tmrCvt(uint64_t time, uint64_t *conversion) 63 * 64 * This code converts 64-bit time units to other units. 65 * For example, the TSC is converted to HPET units. 66 * 67 * Time is a 64-bit integer that is some number of ticks. 68 * Conversion is 64-bit fixed point number which is composed 69 * of a 32 bit integer and a 32 bit fraction. 70 * 71 * The time ticks are multiplied by the conversion factor. The 72 * calculations are done as a 128-bit value but both the high 73 * and low words are dropped. The high word is overflow and the 74 * low word is the fraction part of the result. 75 * 76 * We return a 64-bit value. 77 * 78 * Note that we can use this function to multiply 2 conversion factors. 79 * We do this in order to calculate the multiplier used to convert 80 * directly between any two units. 81 * 82 */ 83 84 .globl EXT(tmrCvt) 85 .align FALIGN 86 87LEXT(tmrCvt) 88 89 pushl %ebp // Save a volatile 90 movl %esp,%ebp // Get the parameters - 8 91 pushl %ebx // Save a volatile 92 pushl %esi // Save a volatile 93 pushl %edi // Save a volatile 94 95// %ebp + 8 - low-order ts 96// %ebp + 12 - high-order ts 97// %ebp + 16 - low-order cvt 98// %ebp + 20 - high-order cvt 99 100 movl 8(%ebp),%eax // Get low-order ts 101 mull 16(%ebp) // Multiply by low-order conversion 102 movl %edx,%edi // Need to save only the high order part 103 104 movl 12(%ebp),%eax // Get the high-order ts 105 mull 16(%ebp) // Multiply by low-order conversion 106 addl %eax,%edi // Add in the overflow from the low x low calculation 107 adcl $0,%edx // Add in any overflow to high high part 108 movl %edx,%esi // Save high high part 109 110// We now have the upper 64 bits of the 96 bit multiply of ts and the low half of cvt 111// in %esi:%edi 112 113 movl 8(%ebp),%eax // Get low-order ts 114 mull 20(%ebp) // Multiply by high-order conversion 115 movl %eax,%ebx // Need to save the low order part 116 movl %edx,%ecx // Need to save the high order part 117 118 movl 12(%ebp),%eax // Get the high-order ts 119 mull 20(%ebp) // Multiply by high-order conversion 120 121// Now have %ecx:%ebx as low part of high low and %edx:%eax as high part of high high 122// We don't care about the highest word since it is overflow 123 124 addl %edi,%ebx // Add the low words 125 adcl %ecx,%esi // Add in the high plus carry from low 126 addl %eax,%esi // Add in the rest of the high 127 128 movl %ebx,%eax // Pass back low word 129 movl %esi,%edx // and the high word 130 131 popl %edi // Restore a volatile 132 popl %esi // Restore a volatile 133 popl %ebx // Restore a volatile 134 popl %ebp // Restore a volatile 135 136 ret // Leave... 137 138 .globl EXT(_rtc_nanotime_store) 139 .align FALIGN 140 141LEXT(_rtc_nanotime_store) 142 push %ebp 143 movl %esp,%ebp 144 push %esi 145 146 mov 32(%ebp),%edx /* get ptr to rtc_nanotime_info */ 147 148 movl RNT_GENERATION(%edx),%esi /* get current generation */ 149 movl $0,RNT_GENERATION(%edx) /* flag data as being updated */ 150 151 mov 8(%ebp),%eax 152 mov %eax,RNT_TSC_BASE(%edx) 153 mov 12(%ebp),%eax 154 mov %eax,RNT_TSC_BASE+4(%edx) 155 156 mov 24(%ebp),%eax 157 mov %eax,RNT_SCALE(%edx) 158 159 mov 28(%ebp),%eax 160 mov %eax,RNT_SHIFT(%edx) 161 162 mov 16(%ebp),%eax 163 mov %eax,RNT_NS_BASE(%edx) 164 mov 20(%ebp),%eax 165 mov %eax,RNT_NS_BASE+4(%edx) 166 167 incl %esi /* next generation */ 168 jnz 1f 169 incl %esi /* skip 0, which is a flag */ 1701: movl %esi,RNT_GENERATION(%edx) /* update generation and make usable */ 171 172 pop %esi 173 pop %ebp 174 ret 175 176 177/* unint64_t _rtc_nanotime_read( rtc_nanotime_t *rntp, int slow ); 178 * 179 * This is the same as the commpage nanotime routine, except that it uses the 180 * kernel internal "rtc_nanotime_info" data instead of the commpage data. The two copies 181 * of data (one in the kernel and one in user space) are kept in sync by rtc_clock_napped(). 182 * 183 * Warning! There is another copy of this code in osfmk/i386/locore.s. The 184 * two versions must be kept in sync with each other! 185 * 186 * There are actually two versions of the algorithm, one each for "slow" and "fast" 187 * processors. The more common "fast" algorithm is: 188 * 189 * nanoseconds = (((rdtsc - rnt_tsc_base) * rnt_tsc_scale) / 2**32) - rnt_ns_base; 190 * 191 * Of course, the divide by 2**32 is a nop. rnt_tsc_scale is a constant computed during initialization: 192 * 193 * rnt_tsc_scale = (10e9 * 2**32) / tscFreq; 194 * 195 * The "slow" algorithm uses long division: 196 * 197 * nanoseconds = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) - rnt_ns_base; 198 * 199 * Since this routine is not synchronized and can be called in any context, 200 * we use a generation count to guard against seeing partially updated data. In addition, 201 * the _rtc_nanotime_store() routine -- just above -- zeroes the generation before 202 * updating the data, and stores the nonzero generation only after all other data has been 203 * stored. Because IA32 guarantees that stores by one processor must be seen in order 204 * by another, we can avoid using a lock. We spin while the generation is zero. 205 * 206 * In accordance with the ABI, we return the 64-bit nanotime in %edx:%eax. 207 */ 208 209 .globl EXT(_rtc_nanotime_read) 210 .align FALIGN 211LEXT(_rtc_nanotime_read) 212 pushl %ebp 213 movl %esp,%ebp 214 pushl %esi 215 pushl %edi 216 pushl %ebx 217 movl 8(%ebp),%edi /* get ptr to rtc_nanotime_info */ 218 movl 12(%ebp),%eax /* get "slow" flag */ 219 testl %eax,%eax 220 jnz Lslow 221 222 /* Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD */ 223 RTC_NANOTIME_READ_FAST() 224 225 popl %ebx 226 popl %edi 227 popl %esi 228 popl %ebp 229 ret 230 231 /* Processor whose TSC frequency is slower than or equal to SLOW_TSC_THRESHOLD */ 232Lslow: 233 movl RNT_GENERATION(%edi),%esi /* get generation (0 if being changed) */ 234 testl %esi,%esi /* if being changed, loop until stable */ 235 jz Lslow 236 pushl %esi /* save generation */ 237 pushl RNT_SHIFT(%edi) /* save low 32 bits of tscFreq */ 238 239 lfence 240 rdtsc /* get TSC in %edx:%eax */ 241 lfence 242 subl RNT_TSC_BASE(%edi),%eax 243 sbbl RNT_TSC_BASE+4(%edi),%edx 244 245 /* 246 * Do the math to convert tsc ticks to nanoseconds. We first 247 * do long multiply of 1 billion times the tsc. Then we do 248 * long division by the tsc frequency 249 */ 250 mov $1000000000, %ecx /* number of nanoseconds in a second */ 251 mov %edx, %ebx 252 mul %ecx 253 mov %edx, %edi 254 mov %eax, %esi 255 mov %ebx, %eax 256 mul %ecx 257 add %edi, %eax 258 adc $0, %edx /* result in edx:eax:esi */ 259 mov %eax, %edi 260 popl %ecx /* get low 32 tscFreq */ 261 xor %eax, %eax 262 xchg %edx, %eax 263 div %ecx 264 xor %eax, %eax 265 mov %edi, %eax 266 div %ecx 267 mov %eax, %ebx 268 mov %esi, %eax 269 div %ecx 270 mov %ebx, %edx /* result in edx:eax */ 271 272 movl 8(%ebp),%edi /* recover ptr to rtc_nanotime_info */ 273 popl %esi /* recover generation */ 274 275 addl RNT_NS_BASE(%edi),%eax 276 adcl RNT_NS_BASE+4(%edi),%edx 277 278 cmpl RNT_GENERATION(%edi),%esi /* have the parameters changed? */ 279 jne Lslow /* yes, loop until stable */ 280 281 pop %ebx 282 pop %edi 283 pop %esi 284 pop %ebp 285 ret /* result in edx:eax */ 286 287