1/* 2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <i386/asm.h> 30#include <i386/rtclock_asm.h> 31#include <i386/proc_reg.h> 32#include <i386/eflags.h> 33 34#include <i386/postcode.h> 35#include <i386/apic.h> 36#include <i386/vmx/vmx_asm.h> 37#include <assym.s> 38 39/* 40** ml_get_timebase() 41** 42** Entry - %rdi contains pointer to 64 bit structure. 43** 44** Exit - 64 bit structure filled in. 45** 46*/ 47ENTRY(ml_get_timebase) 48 49 lfence 50 rdtsc 51 lfence 52 shlq $32,%rdx 53 orq %rdx,%rax 54 movq %rax, (%rdi) 55 56 ret 57 58/* 59 * Convert between various timer units 60 * 61 * This code converts 64-bit time units to other units. 62 * For example, the TSC is converted to HPET units. 63 * 64 * Time is a 64-bit integer that is some number of ticks. 65 * Conversion is 64-bit fixed point number which is composed 66 * of a 32 bit integer and a 32 bit fraction. 67 * 68 * The time ticks are multiplied by the conversion factor. The 69 * calculations are done as a 128-bit value but both the high 70 * and low words are dropped. The high word is overflow and the 71 * low word is the fraction part of the result. 72 * 73 * We return a 64-bit value. 74 * 75 * Note that we can use this function to multiply 2 conversion factors. 76 * We do this in order to calculate the multiplier used to convert 77 * directly between any two units. 78 * 79 * uint64_t tmrCvt(uint64_t time, // %rdi 80 * uint64_t conversion) // %rsi 81 * 82 */ 83ENTRY(tmrCvt) 84 movq %rdi,%rax 85 mulq %rsi /* result is %rdx:%rax */ 86 shrdq $32,%rdx,%rax /* %rdx:%rax >>= 32 */ 87 ret 88 89 /* 90 * void _rtc_nanotime_adjust( 91 * uint64_t tsc_base_delta, // %rdi 92 * rtc_nanotime_t *dst); // %rsi 93 */ 94ENTRY(_rtc_nanotime_adjust) 95 movl RNT_GENERATION(%rsi),%eax /* get current generation */ 96 movl $0,RNT_GENERATION(%rsi) /* flag data as being updated */ 97 addq %rdi,RNT_TSC_BASE(%rsi) 98 99 incl %eax /* next generation */ 100 jnz 1f 101 incl %eax /* skip 0, which is a flag */ 1021: movl %eax,RNT_GENERATION(%rsi) /* update generation */ 103 104 ret 105 106/* 107 * uint64_t _rtc_nanotime_read(rtc_nanotime_t *rntp); 108 * 109 * This is the same as the commpage nanotime routine, except that it uses the 110 * kernel internal "rtc_nanotime_info" data instead of the commpage data. 111 * These two copies of data are kept in sync by rtc_clock_napped(). 112 * 113 * Warning! There are several copies of this code in the trampolines found in 114 * osfmk/x86_64/idt64.s, coming from the various TIMER macros in rtclock_asm.h. 115 * They're all kept in sync by using the RTC_NANOTIME_READ() macro. 116 * 117 * The algorithm we use is: 118 * 119 * ns = ((((rdtsc - rnt_tsc_base)<<rnt_shift)*rnt_tsc_scale) / 2**32) + rnt_ns_base; 120 * 121 * rnt_shift, a constant computed during initialization, is the smallest value for which: 122 * 123 * (tscFreq << rnt_shift) > SLOW_TSC_THRESHOLD 124 * 125 * Where SLOW_TSC_THRESHOLD is about 10e9. Since most processor's tscFreqs are greater 126 * than 1GHz, rnt_shift is usually 0. rnt_tsc_scale is also a 32-bit constant: 127 * 128 * rnt_tsc_scale = (10e9 * 2**32) / (tscFreq << rnt_shift); 129 * 130 * On 64-bit processors this algorithm could be simplified by doing a 64x64 bit 131 * multiply of rdtsc by tscFCvtt2n: 132 * 133 * ns = (((rdtsc - rnt_tsc_base) * tscFCvtt2n) / 2**32) + rnt_ns_base; 134 * 135 * We don't do so in order to use the same algorithm in 32- and 64-bit mode. 136 * When U32 goes away, we should reconsider. 137 * 138 * Since this routine is not synchronized and can be called in any context, 139 * we use a generation count to guard against seeing partially updated data. 140 * In addition, the _rtc_nanotime_store() routine zeroes the generation before 141 * updating the data, and stores the nonzero generation only after all fields 142 * have been stored. Because IA32 guarantees that stores by one processor 143 * must be seen in order by another, we can avoid using a lock. We spin while 144 * the generation is zero. 145 * 146 * unint64_t _rtc_nanotime_read( 147 * rtc_nanotime_t *rntp); // %rdi 148 * 149 */ 150ENTRY(_rtc_nanotime_read) 151 152 PAL_RTC_NANOTIME_READ_FAST() 153 154 ret 155 156/* 157 * extern uint64_t _rtc_tsc_to_nanoseconds( 158 * uint64_t value, // %rdi 159 * pal_rtc_nanotime_t *rntp); // %rsi 160 * 161 * Converts TSC units to nanoseconds, using an abbreviated form of the above 162 * algorithm. Note that while we could have simply used tmrCvt(value,tscFCvtt2n), 163 * which would avoid the need for this asm, doing so is a bit more risky since 164 * we'd be using a different algorithm with possibly different rounding etc. 165 */ 166 167ENTRY(_rtc_tsc_to_nanoseconds) 168 movq %rdi,%rax /* copy value (in TSC units) to convert */ 169 movl RNT_SHIFT(%rsi),%ecx 170 movl RNT_SCALE(%rsi),%edx 171 shlq %cl,%rax /* tscUnits << shift */ 172 mulq %rdx /* (tscUnits << shift) * scale */ 173 shrdq $32,%rdx,%rax /* %rdx:%rax >>= 32 */ 174 ret 175 176 177 178Entry(call_continuation) 179 movq %rdi,%rcx /* get continuation */ 180 movq %rsi,%rdi /* continuation param */ 181 movq %rdx,%rsi /* wait result */ 182 movq %gs:CPU_KERNEL_STACK,%rsp /* set the stack */ 183 xorq %rbp,%rbp /* zero frame pointer */ 184 call *%rcx /* call continuation */ 185 movq %gs:CPU_ACTIVE_THREAD,%rdi 186 call EXT(thread_terminate) 187 188Entry(x86_init_wrapper) 189 xor %rbp, %rbp 190 movq %rsi, %rsp 191 callq *%rdi 192 193 /* 194 * Generate a 64-bit quantity with possibly random characteristics, intended for use 195 * before the kernel entropy pool is available. The processor's RNG is used if 196 * available, and a value derived from the Time Stamp Counter is returned if not. 197 * Multiple invocations may result in well-correlated values if sourced from the TSC. 198 */ 199Entry(ml_early_random) 200 mov %rbx, %rsi 201 mov $1, %eax 202 cpuid 203 mov %rsi, %rbx 204 test $(1 << 30), %ecx 205 jz Lnon_rdrand 206 RDRAND_RAX /* RAX := 64 bits of DRBG entropy */ 207 jnc Lnon_rdrand 208 ret 209Lnon_rdrand: 210 rdtsc /* EDX:EAX := TSC */ 211 /* Distribute low order bits */ 212 mov %eax, %ecx 213 xor %al, %ah 214 shl $16, %rcx 215 xor %rcx, %rax 216 xor %eax, %edx 217 218 /* Incorporate ASLR entropy, if any */ 219 lea (%rip), %rcx 220 shr $21, %rcx 221 movzbl %cl, %ecx 222 shl $16, %ecx 223 xor %ecx, %edx 224 225 mov %ah, %cl 226 ror %cl, %edx /* Right rotate EDX (TSC&0xFF ^ (TSC>>8 & 0xFF))&1F */ 227 shl $32, %rdx 228 xor %rdx, %rax 229 mov %cl, %al 230 ret 231 232#if CONFIG_VMX 233 234/* 235 * __vmxon -- Enter VMX Operation 236 * int __vmxon(addr64_t v); 237 */ 238Entry(__vmxon) 239 FRAME 240 push %rdi 241 242 mov $(VMX_FAIL_INVALID), %ecx 243 mov $(VMX_FAIL_VALID), %edx 244 mov $(VMX_SUCCEED), %eax 245 vmxon (%rsp) 246 cmovcl %ecx, %eax /* CF = 1, ZF = 0 */ 247 cmovzl %edx, %eax /* CF = 0, ZF = 1 */ 248 249 pop %rdi 250 EMARF 251 ret 252 253/* 254 * __vmxoff -- Leave VMX Operation 255 * int __vmxoff(void); 256 */ 257Entry(__vmxoff) 258 FRAME 259 260 mov $(VMX_FAIL_INVALID), %ecx 261 mov $(VMX_FAIL_VALID), %edx 262 mov $(VMX_SUCCEED), %eax 263 vmxoff 264 cmovcl %ecx, %eax /* CF = 1, ZF = 0 */ 265 cmovzl %edx, %eax /* CF = 0, ZF = 1 */ 266 267 EMARF 268 ret 269 270#endif /* CONFIG_VMX */ 271 272/* 273 * mfence -- Memory Barrier 274 * Use out-of-line assembly to get 275 * standard x86-64 ABI guarantees 276 * about what the caller's codegen 277 * has in registers vs. memory 278 */ 279Entry(do_mfence) 280 mfence 281 ret 282