1/* 2 * Copyright (c) 2010-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24#include <System/machine/cpu_capabilities.h> 25 26// bool save_xxm = (*((uint32_t*)_COMM_PAGE_CPU_CAPABILITIES) & kHasAVX1_0) != 0; 27 28#if __x86_64__ 29 // returns address of TLV in %rax, all other registers preserved 30 #define FP_SAVE -192 31 #define VECTOR_SAVE -704 32 #define STACK_SIZE 704 33 34 .globl _tlv_get_addr 35 .private_extern _tlv_get_addr 36_tlv_get_addr: 37 movq 8(%rdi),%rax // get key from descriptor 38 movq %gs:0x0(,%rax,8),%rax // get thread value 39 testq %rax,%rax // if NULL, lazily allocate 40 je LlazyAllocate 41 addq 16(%rdi),%rax // add offset from descriptor 42 ret 43LlazyAllocate: 44 pushq %rbp 45 movq %rsp, %rbp 46 subq $STACK_SIZE,%rsp // fxsave uses 512 bytes of store, xsave uses 47 movq %rdi,-8(%rbp) 48 movq %rsi,-16(%rbp) 49 movq %rdx,-24(%rbp) 50 movq %rcx,-32(%rbp) 51 movq %r8,-40(%rbp) 52 movq %r9,-48(%rbp) 53 movq %r10,-56(%rbp) 54 movq %r11,-64(%rbp) 55 fnsave FP_SAVE(%rbp) 56 movq $(_COMM_PAGE_CPU_CAPABILITIES), %rcx 57 movl (%rcx), %ecx 58 testl $kHasAVX1_0, %ecx 59 jne L2 60 movdqa %xmm0, VECTOR_SAVE+0x00(%rbp) 61 movdqa %xmm1, VECTOR_SAVE+0x10(%rbp) 62 movdqa %xmm2, VECTOR_SAVE+0x20(%rbp) 63 movdqa %xmm3, VECTOR_SAVE+0x30(%rbp) 64 movdqa %xmm4, VECTOR_SAVE+0x40(%rbp) 65 movdqa %xmm5, VECTOR_SAVE+0x50(%rbp) 66 movdqa %xmm6, VECTOR_SAVE+0x60(%rbp) 67 movdqa %xmm7, VECTOR_SAVE+0x70(%rbp) 68 movdqa %xmm8, VECTOR_SAVE+0x80(%rbp) 69 movdqa %xmm9, VECTOR_SAVE+0x90(%rbp) 70 movdqa %xmm10,VECTOR_SAVE+0xA0(%rbp) 71 movdqa %xmm11,VECTOR_SAVE+0xB0(%rbp) 72 movdqa %xmm12,VECTOR_SAVE+0xC0(%rbp) 73 movdqa %xmm13,VECTOR_SAVE+0xD0(%rbp) 74 movdqa %xmm14,VECTOR_SAVE+0xE0(%rbp) 75 movdqa %xmm15,VECTOR_SAVE+0xF0(%rbp) 76 jmp L3 77L2: vmovdqu %ymm0, VECTOR_SAVE+0x00(%rbp) 78 vmovdqu %ymm1, VECTOR_SAVE+0x20(%rbp) 79 vmovdqu %ymm2, VECTOR_SAVE+0x40(%rbp) 80 vmovdqu %ymm3, VECTOR_SAVE+0x60(%rbp) 81 vmovdqu %ymm4, VECTOR_SAVE+0x80(%rbp) 82 vmovdqu %ymm5, VECTOR_SAVE+0xA0(%rbp) 83 vmovdqu %ymm6, VECTOR_SAVE+0xC0(%rbp) 84 vmovdqu %ymm7, VECTOR_SAVE+0xE0(%rbp) 85 vmovdqu %ymm8, VECTOR_SAVE+0x100(%rbp) 86 vmovdqu %ymm9, VECTOR_SAVE+0x120(%rbp) 87 vmovdqu %ymm10,VECTOR_SAVE+0x140(%rbp) 88 vmovdqu %ymm11,VECTOR_SAVE+0x160(%rbp) 89 vmovdqu %ymm12,VECTOR_SAVE+0x180(%rbp) 90 vmovdqu %ymm13,VECTOR_SAVE+0x1A0(%rbp) 91 vmovdqu %ymm14,VECTOR_SAVE+0x1C0(%rbp) 92 vmovdqu %ymm15,VECTOR_SAVE+0x1E0(%rbp) 93L3: movq -32(%rbp),%rcx 94 movq 8(%rdi),%rdi // get key from descriptor 95 call _tlv_allocate_and_initialize_for_key 96 97 frstor FP_SAVE(%rbp) 98 movq $(_COMM_PAGE_CPU_CAPABILITIES), %rcx 99 movl (%rcx), %ecx 100 testl $kHasAVX1_0, %ecx 101 jne L4 102 movdqa VECTOR_SAVE+0x00(%rbp), %xmm0 103 movdqa VECTOR_SAVE+0x10(%rbp), %xmm1 104 movdqa VECTOR_SAVE+0x20(%rbp), %xmm2 105 movdqa VECTOR_SAVE+0x30(%rbp), %xmm3 106 movdqa VECTOR_SAVE+0x40(%rbp), %xmm4 107 movdqa VECTOR_SAVE+0x50(%rbp), %xmm5 108 movdqa VECTOR_SAVE+0x60(%rbp), %xmm6 109 movdqa VECTOR_SAVE+0x70(%rbp), %xmm7 110 movdqa VECTOR_SAVE+0x80(%rbp), %xmm8 111 movdqa VECTOR_SAVE+0x90(%rbp), %xmm9 112 movdqa VECTOR_SAVE+0xA0(%rbp), %xmm10 113 movdqa VECTOR_SAVE+0xB0(%rbp), %xmm11 114 movdqa VECTOR_SAVE+0xC0(%rbp), %xmm12 115 movdqa VECTOR_SAVE+0xD0(%rbp), %xmm13 116 movdqa VECTOR_SAVE+0xE0(%rbp), %xmm14 117 movdqa VECTOR_SAVE+0xF0(%rbp), %xmm15 118 jmp L5 119L4: vmovdqu VECTOR_SAVE+0x00(%rbp), %ymm0 120 vmovdqu VECTOR_SAVE+0x20(%rbp), %ymm1 121 vmovdqu VECTOR_SAVE+0x40(%rbp), %ymm2 122 vmovdqu VECTOR_SAVE+0x60(%rbp), %ymm3 123 vmovdqu VECTOR_SAVE+0x80(%rbp), %ymm4 124 vmovdqu VECTOR_SAVE+0xA0(%rbp), %ymm5 125 vmovdqu VECTOR_SAVE+0xC0(%rbp), %ymm6 126 vmovdqu VECTOR_SAVE+0xE0(%rbp), %ymm7 127 vmovdqu VECTOR_SAVE+0x100(%rbp), %ymm8 128 vmovdqu VECTOR_SAVE+0x120(%rbp), %ymm9 129 vmovdqu VECTOR_SAVE+0x140(%rbp), %ymm10 130 vmovdqu VECTOR_SAVE+0x160(%rbp), %ymm11 131 vmovdqu VECTOR_SAVE+0x180(%rbp), %ymm12 132 vmovdqu VECTOR_SAVE+0x1A0(%rbp), %ymm13 133 vmovdqu VECTOR_SAVE+0x1C0(%rbp), %ymm14 134 vmovdqu VECTOR_SAVE+0x1E0(%rbp), %ymm15 135L5: movq -64(%rbp),%r11 136 movq -56(%rbp),%r10 137 movq -48(%rbp),%r9 138 movq -40(%rbp),%r8 139 movq -32(%rbp),%rcx 140 movq -24(%rbp),%rdx 141 movq -16(%rbp),%rsi 142 movq -8(%rbp),%rdi 143 addq 16(%rdi),%rax // result = buffer + offset 144 addq $STACK_SIZE,%rsp 145 popq %rbp 146 ret 147#endif 148 149 150 151#if __i386__ 152 // returns address of TLV in %eax, all other registers (except %ecx) preserved 153 .globl _tlv_get_addr 154 .private_extern _tlv_get_addr 155_tlv_get_addr: 156 movl 4(%eax),%ecx // get key from descriptor 157 movl %gs:0x0(,%ecx,4),%ecx // get thread value 158 testl %ecx,%ecx // if NULL, lazily allocate 159 je LlazyAllocate 160 movl 8(%eax),%eax // add offset from descriptor 161 addl %ecx,%eax 162 ret 163LlazyAllocate: 164 pushl %ebp 165 movl %esp,%ebp 166 pushl %edx // save edx 167 subl $548,%esp 168 movl %eax,-8(%ebp) // save descriptor 169 lea -528(%ebp),%ecx // get 512 byte buffer in frame 170 and $-16, %ecx // 16-byte align buffer for fxsave 171 fxsave (%ecx) 172 movl 4(%eax),%ecx // get key from descriptor 173 movl %ecx,(%esp) // push key parameter, also leaves stack aligned properly 174 call _tlv_allocate_and_initialize_for_key 175 movl -8(%ebp),%ecx // get descriptor 176 movl 8(%ecx),%ecx // get offset from descriptor 177 addl %ecx,%eax // add offset to buffer 178 lea -528(%ebp),%ecx 179 and $-16, %ecx // 16-byte align buffer for fxrstor 180 fxrstor (%ecx) 181 addl $548,%esp 182 popl %edx // restore edx 183 popl %ebp 184 ret 185#endif 186 187 188#if 0 189#if __arm__ 190 // returns address of TLV in r0, all other registers preserved 191 .globl _tlv_get_addr 192 .private_extern _tlv_get_addr 193_tlv_get_addr: 194 push {r1,r2,r3,r7,lr} 195 mov r7,r0 // save descriptor in r7 196 ldr r0, [r7, #4] // get key from descriptor 197 bl _pthread_getspecific // get thread value 198 cmp r0, #0 199 bne L2 // if NULL, lazily allocate 200 ldr r0, [r7, #4] // get key from descriptor 201 bl _tlv_allocate_and_initialize_for_key 202L2: ldr r1, [r7, #8] // get offset from descriptor 203 add r0, r1, r0 // add offset into allocation block 204 pop {r1,r2,r3,r7,pc} 205#endif 206#endif 207 208 .subsections_via_symbols 209 210 211