1/* 2 * Copyright (c) 2010-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24#include <System/machine/cpu_capabilities.h> 25 26// bool save_xxm = (*((uint32_t*)_COMM_PAGE_CPU_CAPABILITIES) & kHasAVX1_0) != 0; 27 28#if __x86_64__ 29 30#define RDI_SAVE_RBP -8 31#define RSI_SAVE_RBP -16 32#define RDX_SAVE_RBP -24 33#define RCX_SAVE_RBP -32 34#define RBX_SAVE_RBP -40 35#define R8_SAVE_RBP -48 36#define R9_SAVE_RBP -56 37#define R10_SAVE_RBP -64 38#define R11_SAVE_RBP -72 39#define STATIC_STACK_SIZE 256 // extra padding to allow it to be 64-byte aligned 40 41#define XMM0_SAVE_RSP 0x00 42#define XMM1_SAVE_RSP 0x10 43#define XMM2_SAVE_RSP 0x20 44#define XMM3_SAVE_RSP 0x30 45#define XMM4_SAVE_RSP 0x40 46#define XMM5_SAVE_RSP 0x50 47#define XMM6_SAVE_RSP 0x60 48#define XMM7_SAVE_RSP 0x70 49 50 51 // returns address of TLV in %rax, all other registers preserved 52 .globl _tlv_get_addr 53 .private_extern _tlv_get_addr 54_tlv_get_addr: 55 movq 8(%rdi),%rax // get key from descriptor 56 movq %gs:0x0(,%rax,8),%rax // get thread value 57 testq %rax,%rax // if NULL, lazily allocate 58 je LlazyAllocate 59 addq 16(%rdi),%rax // add offset from descriptor 60 ret 61LlazyAllocate: 62 pushq %rbp 63 movq %rsp,%rbp 64 subq $STATIC_STACK_SIZE,%rsp 65 movq %rdi,RDI_SAVE_RBP(%rbp) # save registers that might be used as parameters 66 movq %rsi,RSI_SAVE_RBP(%rbp) 67 movq %rdx,RDX_SAVE_RBP(%rbp) 68 movq %rcx,RCX_SAVE_RBP(%rbp) 69 movq %rbx,RBX_SAVE_RBP(%rbp) 70 movq %r8, R8_SAVE_RBP(%rbp) 71 movq %r9, R9_SAVE_RBP(%rbp) 72 movq %r10,R10_SAVE_RBP(%rbp) 73 movq %r11,R11_SAVE_RBP(%rbp) 74 75 cmpl $0, _inited(%rip) 76 jne Linited 77 movl $0x01,%eax 78 cpuid # get cpu features to check on xsave instruction support 79 andl $0x08000000,%ecx # check OSXSAVE bit 80 movl %ecx,_hasXSave(%rip) 81 cmpl $0, %ecx 82 jne LxsaveInfo 83 movl $1, _inited(%rip) 84 jmp Lsse 85 86LxsaveInfo: 87 movl $0x0D,%eax 88 movl $0x00,%ecx 89 cpuid # get xsave parameter info 90 movl %eax,_features_lo32(%rip) 91 movl %edx,_features_hi32(%rip) 92 movl %ecx,_bufferSize32(%rip) 93 movl $1, _inited(%rip) 94 95Linited: 96 cmpl $0, _hasXSave(%rip) 97 jne Lxsave 98 99Lsse: 100 subq $128, %rsp 101 movdqa %xmm0, XMM0_SAVE_RSP(%rsp) 102 movdqa %xmm1, XMM1_SAVE_RSP(%rsp) 103 movdqa %xmm2, XMM2_SAVE_RSP(%rsp) 104 movdqa %xmm3, XMM3_SAVE_RSP(%rsp) 105 movdqa %xmm4, XMM4_SAVE_RSP(%rsp) 106 movdqa %xmm5, XMM5_SAVE_RSP(%rsp) 107 movdqa %xmm6, XMM6_SAVE_RSP(%rsp) 108 movdqa %xmm7, XMM7_SAVE_RSP(%rsp) 109 jmp Lalloc 110 111Lxsave: 112 movl _bufferSize32(%rip),%eax 113 movq %rsp, %rdi 114 subq %rax, %rdi # stack alloc buffer 115 andq $-64, %rdi # 64-byte align stack 116 movq %rdi, %rsp 117 # xsave requires buffer to be zero'ed out 118 movq $0, %rcx 119 movq %rdi, %r8 120 movq %rdi, %r9 121 addq %rax, %r9 122Lz: movq %rcx, (%r8) 123 addq $8, %r8 124 cmpq %r8,%r9 125 ja Lz 126 127 movl _features_lo32(%rip),%eax 128 movl _features_hi32(%rip),%edx 129 # call xsave with buffer on stack and eax:edx flag bits 130 # note: do not use xsaveopt, it assumes you are using the same 131 # buffer as previous xsaves, and this thread is on the same cpu. 132 xsave (%rsp) 133 134Lalloc: 135 movq RDI_SAVE_RBP(%rbp),%rdi 136 movq 8(%rdi),%rdi // get key from descriptor 137 call _tlv_allocate_and_initialize_for_key 138 139 cmpl $0, _hasXSave(%rip) 140 jne Lxrstror 141 142 movdqa XMM0_SAVE_RSP(%rsp),%xmm0 143 movdqa XMM1_SAVE_RSP(%rsp),%xmm1 144 movdqa XMM2_SAVE_RSP(%rsp),%xmm2 145 movdqa XMM3_SAVE_RSP(%rsp),%xmm3 146 movdqa XMM4_SAVE_RSP(%rsp),%xmm4 147 movdqa XMM5_SAVE_RSP(%rsp),%xmm5 148 movdqa XMM6_SAVE_RSP(%rsp),%xmm6 149 movdqa XMM7_SAVE_RSP(%rsp),%xmm7 150 jmp Ldone 151 152Lxrstror: 153 movq %rax,%r11 154 movl _features_lo32(%rip),%eax 155 movl _features_hi32(%rip),%edx 156 # call xsave with buffer on stack and eax:edx flag bits 157 xrstor (%rsp) 158 movq %r11,%rax 159 160Ldone: 161 movq RDI_SAVE_RBP(%rbp),%rdi 162 movq RSI_SAVE_RBP(%rbp),%rsi 163 movq RDX_SAVE_RBP(%rbp),%rdx 164 movq RCX_SAVE_RBP(%rbp),%rcx 165 movq RBX_SAVE_RBP(%rbp),%rbx 166 movq R8_SAVE_RBP(%rbp),%r8 167 movq R9_SAVE_RBP(%rbp),%r9 168 movq R10_SAVE_RBP(%rbp),%r10 169 movq R11_SAVE_RBP(%rbp),%r11 170 movq %rbp,%rsp 171 popq %rbp 172 addq 16(%rdi),%rax // result = buffer + offset 173 ret 174 175 .data 176# Cached info from cpuid. 177_inited: .long 0 178_features_lo32: .long 0 179_features_hi32: .long 0 180_bufferSize32: .long 0 181_hasXSave: .long 0 182 183#endif 184 185 186 187#if __i386__ 188 // returns address of TLV in %eax, all other registers (except %ecx) preserved 189 .globl _tlv_get_addr 190 .private_extern _tlv_get_addr 191_tlv_get_addr: 192 movl 4(%eax),%ecx // get key from descriptor 193 movl %gs:0x0(,%ecx,4),%ecx // get thread value 194 testl %ecx,%ecx // if NULL, lazily allocate 195 je LlazyAllocate 196 movl 8(%eax),%eax // add offset from descriptor 197 addl %ecx,%eax 198 ret 199LlazyAllocate: 200 pushl %ebp 201 movl %esp,%ebp 202 pushl %edx // save edx 203 subl $548,%esp 204 movl %eax,-8(%ebp) // save descriptor 205 lea -528(%ebp),%ecx // get 512 byte buffer in frame 206 and $-16, %ecx // 16-byte align buffer for fxsave 207 fxsave (%ecx) 208 movl 4(%eax),%ecx // get key from descriptor 209 movl %ecx,(%esp) // push key parameter, also leaves stack aligned properly 210 call _tlv_allocate_and_initialize_for_key 211 movl -8(%ebp),%ecx // get descriptor 212 movl 8(%ecx),%ecx // get offset from descriptor 213 addl %ecx,%eax // add offset to buffer 214 lea -528(%ebp),%ecx 215 and $-16, %ecx // 16-byte align buffer for fxrstor 216 fxrstor (%ecx) 217 addl $548,%esp 218 popl %edx // restore edx 219 popl %ebp 220 ret 221#endif 222 223#if __arm64__ 224 // Parameters: X0 = descriptor 225 // Result: X0 = address of TLV 226 // Note: all registers except X0, x16, and x17 are preserved 227 .align 2 228 .globl _tlv_get_addr 229 .private_extern _tlv_get_addr 230_tlv_get_addr: 231 ldr x16, [x0, #8] // get key from descriptor 232 mrs x17, TPIDRRO_EL0 233 and x17, x17, #-8 // clear low 3 bits??? 234 ldr x17, [x17, x16, lsl #3] // get thread allocation address for this key 235 cbz x17, LlazyAllocate // if NULL, lazily allocate 236 ldr x16, [x0, #16] // get offset from descriptor 237 add x0, x17, x16 // return allocation+offset 238 ret lr 239 240LlazyAllocate: 241 stp fp, lr, [sp, #-16]! 242 mov fp, sp 243 sub sp, sp, #288 244 stp x1, x2, [sp, #-16]! // save all registers that C function might trash 245 stp x3, x4, [sp, #-16]! 246 stp x5, x6, [sp, #-16]! 247 stp x7, x8, [sp, #-16]! 248 stp x9, x10, [sp, #-16]! 249 stp x11, x12, [sp, #-16]! 250 stp x13, x14, [sp, #-16]! 251 stp x15, x16, [sp, #-16]! 252 stp q0, q1, [sp, #-32]! 253 stp q2, q3, [sp, #-32]! 254 stp q4, q5, [sp, #-32]! 255 stp q6, q7, [sp, #-32]! 256 stp x0, x17, [sp, #-16]! // save descriptor 257 258 mov x0, x16 // use key from descriptor as parameter 259 bl _tlv_allocate_and_initialize_for_key 260 ldp x16, x17, [sp], #16 // pop descriptor 261 ldr x16, [x16, #16] // get offset from descriptor 262 add x0, x0, x16 // return allocation+offset 263 264 ldp q6, q7, [sp], #32 265 ldp q4, q5, [sp], #32 266 ldp q2, q3, [sp], #32 267 ldp q0, q1, [sp], #32 268 ldp x15, x16, [sp], #16 269 ldp x13, x14, [sp], #16 270 ldp x11, x12, [sp], #16 271 ldp x9, x10, [sp], #16 272 ldp x7, x8, [sp], #16 273 ldp x5, x6, [sp], #16 274 ldp x3, x4, [sp], #16 275 ldp x1, x2, [sp], #16 276 277 mov sp, fp 278 ldp fp, lr, [sp], #16 279 ret lr 280 281#endif 282 283#if 0 284#if __arm__ 285 // returns address of TLV in r0, all other registers preserved 286 .globl _tlv_get_addr 287 .private_extern _tlv_get_addr 288_tlv_get_addr: 289 push {r1,r2,r3,r7,lr} 290 mov r7,r0 // save descriptor in r7 291 ldr r0, [r7, #4] // get key from descriptor 292 bl _pthread_getspecific // get thread value 293 cmp r0, #0 294 bne L2 // if NULL, lazily allocate 295 ldr r0, [r7, #4] // get key from descriptor 296 bl _tlv_allocate_and_initialize_for_key 297L2: ldr r1, [r7, #8] // get offset from descriptor 298 add r0, r1, r0 // add offset into allocation block 299 pop {r1,r2,r3,r7,pc} 300#endif 301#endif 302 303 .subsections_via_symbols 304 305 306