1/* 2 * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <sys/appleapiopts.h> 30#include <machine/cpu_capabilities.h> 31#include <machine/commpage.h> 32 33/* OSAtomic.h library native implementations. */ 34 35 .text 36 .align 2, 0x90 37 38// This is a regparm(3) subroutine used by: 39 40// bool OSAtomicCompareAndSwap32( int32_t old, int32_t new, int32_t *value); 41// int32_t OSAtomicAnd32( int32_t mask, int32_t *value); 42// int32_t OSAtomicOr32( int32_t mask, int32_t *value); 43// int32_t OSAtomicXor32( int32_t mask, int32_t *value); 44 45// It assumes old -> %eax, new -> %edx, value -> %ecx 46// on success: returns with ZF set 47// on failure: returns with *value in %eax, ZF clear 48 49// The first word of the routine contains the address of the first instruction, 50// so callers can pass parameters in registers by using the absolute: 51 52// call *_COMPARE_AND_SWAP32 53 54// TODO: move the .long onto a separate page to reduce icache pollution (?) 55 56Lcompare_and_swap32_mp: 57.long _COMM_PAGE_COMPARE_AND_SWAP32+4 58 lock 59 cmpxchgl %edx, (%ecx) 60 ret 61 62 COMMPAGE_DESCRIPTOR(compare_and_swap32_mp,_COMM_PAGE_COMPARE_AND_SWAP32,0,kUP) 63 64Lcompare_and_swap32_up: 65.long _COMM_PAGE_COMPARE_AND_SWAP32+4 66 cmpxchgl %edx, (%ecx) 67 ret 68 69 COMMPAGE_DESCRIPTOR(compare_and_swap32_up,_COMM_PAGE_COMPARE_AND_SWAP32,kUP,0) 70 71// This is a subroutine used by: 72// bool OSAtomicCompareAndSwap64( int64_t old, int64_t new, int64_t *value); 73 74// It assumes old -> %eax/%edx, new -> %ebx/%ecx, value -> %esi 75// on success: returns with ZF set 76// on failure: returns with *value in %eax/%edx, ZF clear 77 78Lcompare_and_swap64_mp: 79.long _COMM_PAGE_COMPARE_AND_SWAP64+4 80 lock 81 cmpxchg8b (%esi) 82 ret 83 84 COMMPAGE_DESCRIPTOR(compare_and_swap64_mp,_COMM_PAGE_COMPARE_AND_SWAP64,0,kUP) 85 86Lcompare_and_swap64_up: 87.long _COMM_PAGE_COMPARE_AND_SWAP64+4 88 cmpxchg8b (%esi) 89 ret 90 91 COMMPAGE_DESCRIPTOR(compare_and_swap64_up,_COMM_PAGE_COMPARE_AND_SWAP64,kUP,0) 92 93// This is a subroutine used by: 94// bool OSAtomicTestAndSet( uint32_t n, void *value ); 95// It assumes n -> %eax, value -> %edx 96 97// Returns: old value of bit in CF 98 99Lbit_test_and_set_mp: 100.long _COMM_PAGE_BTS+4 101 lock 102 btsl %eax, (%edx) 103 ret 104 105 COMMPAGE_DESCRIPTOR(bit_test_and_set_mp,_COMM_PAGE_BTS,0,kUP) 106 107Lbit_test_and_set_up: 108.long _COMM_PAGE_BTS+4 109 btsl %eax, (%edx) 110 ret 111 112 COMMPAGE_DESCRIPTOR(bit_test_and_set_up,_COMM_PAGE_BTS,kUP,0) 113 114// This is a subroutine used by: 115// bool OSAtomicTestAndClear( uint32_t n, void *value ); 116// It assumes n -> %eax, value -> %edx 117 118// Returns: old value of bit in CF 119 120Lbit_test_and_clear_mp: 121.long _COMM_PAGE_BTC+4 122 lock 123 btrl %eax, (%edx) 124 ret 125 126 COMMPAGE_DESCRIPTOR(bit_test_and_clear_mp,_COMM_PAGE_BTC,0,kUP) 127 128Lbit_test_and_clear_up: 129.long _COMM_PAGE_BTC+4 130 btrl %eax, (%edx) 131 ret 132 133 COMMPAGE_DESCRIPTOR(bit_test_and_clear_up,_COMM_PAGE_BTC,kUP,0) 134 135// This is a subroutine used by: 136// int32_t OSAtomicAdd32( int32_t amt, int32_t *value ); 137// It assumes amt -> %eax, value -> %edx 138 139// Returns: old value in %eax 140// NB: OSAtomicAdd32 returns the new value, so clients will add amt to %eax 141 142Latomic_add32_mp: 143.long _COMM_PAGE_ATOMIC_ADD32+4 144 lock 145 xaddl %eax, (%edx) 146 ret 147 148 COMMPAGE_DESCRIPTOR(atomic_add32_mp,_COMM_PAGE_ATOMIC_ADD32,0,kUP) 149 150Latomic_add32_up: 151.long _COMM_PAGE_ATOMIC_ADD32+4 152 xaddl %eax, (%edx) 153 ret 154 155 COMMPAGE_DESCRIPTOR(atomic_add32_up,_COMM_PAGE_ATOMIC_ADD32,kUP,0) 156 157 158// OSMemoryBarrier() 159// These are used both in 32 and 64-bit mode. We use a fence even on UP 160// machines, so this function can be used with nontemporal stores. 161 162Lmemory_barrier: 163 lock 164 addl $0,(%esp) 165 ret 166 167 COMMPAGE_DESCRIPTOR(memory_barrier,_COMM_PAGE_MEMORY_BARRIER,0,kHasSSE2); 168 169Lmemory_barrier_sse2: 170 mfence 171 ret 172 173 COMMPAGE_DESCRIPTOR(memory_barrier_sse2,_COMM_PAGE_MEMORY_BARRIER,kHasSSE2,0); 174 175 176/* 177 * typedef volatile struct { 178 * void *opaque1; <-- ptr to 1st queue element or null 179 * long opaque2; <-- generation count 180 * } OSQueueHead; 181 * 182 * void OSAtomicEnqueue( OSQueueHead *list, void *new, size_t offset); 183 */ 184 185LAtomicEnqueue: 186 pushl %edi 187 pushl %esi 188 pushl %ebx 189 movl 16(%esp),%edi // %edi == ptr to list head 190 movl 20(%esp),%ebx // %ebx == new 191 movl 24(%esp),%esi // %esi == offset 192 movl (%edi),%eax // %eax == ptr to 1st element in Q 193 movl 4(%edi),%edx // %edx == current generation count 1941: 195 movl %eax,(%ebx,%esi)// link to old list head from new element 196 movl %edx,%ecx 197 incl %ecx // increment generation count 198 lock // always lock for now... 199 cmpxchg8b (%edi) // ...push on new element 200 jnz 1b 201 popl %ebx 202 popl %esi 203 popl %edi 204 ret 205 206 COMMPAGE_DESCRIPTOR(AtomicEnqueue,_COMM_PAGE_ENQUEUE,0,0) 207 208 209/* void* OSAtomicDequeue( OSQueueHead *list, size_t offset); */ 210 211LAtomicDequeue: 212 pushl %edi 213 pushl %esi 214 pushl %ebx 215 movl 16(%esp),%edi // %edi == ptr to list head 216 movl 20(%esp),%esi // %esi == offset 217 movl (%edi),%eax // %eax == ptr to 1st element in Q 218 movl 4(%edi),%edx // %edx == current generation count 2191: 220 testl %eax,%eax // list empty? 221 jz 2f // yes 222 movl (%eax,%esi),%ebx // point to 2nd in Q 223 movl %edx,%ecx 224 incl %ecx // increment generation count 225 lock // always lock for now... 226 cmpxchg8b (%edi) // ...pop off 1st element 227 jnz 1b 2282: 229 popl %ebx 230 popl %esi 231 popl %edi 232 ret // ptr to 1st element in Q still in %eax 233 234 COMMPAGE_DESCRIPTOR(AtomicDequeue,_COMM_PAGE_DEQUEUE,0,0) 235 236 237 238/************************* x86_64 versions follow **************************/ 239 240 241// This is a subroutine used by: 242 243// bool OSAtomicCompareAndSwap32( int32_t old, int32_t new, int32_t *value); 244// int32_t OSAtomicAnd32( int32_t mask, int32_t *value); 245// int32_t OSAtomicOr32( int32_t mask, int32_t *value); 246// int32_t OSAtomicXor32( int32_t mask, int32_t *value); 247 248// It assumes: old -> %rdi (ie, it follows the ABI parameter conventions) 249// new -> %rsi 250// value -> %rdx 251// on success: returns with ZF set 252// on failure: returns with *value in %eax, ZF clear 253 254 .code64 255Lcompare_and_swap32_mp_64: 256 movl %edi,%eax // put old value where "cmpxchg" wants it 257 lock 258 cmpxchgl %esi, (%rdx) 259 ret 260 261 COMMPAGE_DESCRIPTOR(compare_and_swap32_mp_64,_COMM_PAGE_COMPARE_AND_SWAP32,0,kUP) 262 263 .code64 264Lcompare_and_swap32_up_64: 265 movl %edi,%eax // put old value where "cmpxchg" wants it 266 cmpxchgl %esi, (%rdx) 267 ret 268 269 COMMPAGE_DESCRIPTOR(compare_and_swap32_up_64,_COMM_PAGE_COMPARE_AND_SWAP32,kUP,0) 270 271// This is a subroutine used by: 272// bool OSAtomicCompareAndSwap64( int64_t old, int64_t new, int64_t *value); 273 274// It assumes: old -> %rdi (ie, it follows the ABI parameter conventions) 275// new -> %rsi 276// value -> %rdx 277// on success: returns with ZF set 278// on failure: returns with *value in %rax, ZF clear 279 280 .code64 281Lcompare_and_swap64_mp_64: 282 movq %rdi,%rax // put old value where "cmpxchg" wants it 283 lock 284 cmpxchgq %rsi, (%rdx) 285 ret 286 287 COMMPAGE_DESCRIPTOR(compare_and_swap64_mp_64,_COMM_PAGE_COMPARE_AND_SWAP64,0,kUP) 288 289 .code64 290Lcompare_and_swap64_up_64: 291 movq %rdi,%rax // put old value where "cmpxchg" wants it 292 cmpxchgq %rsi, (%rdx) 293 ret 294 295 COMMPAGE_DESCRIPTOR(compare_and_swap64_up_64,_COMM_PAGE_COMPARE_AND_SWAP64,kUP,0) 296 297// This is a subroutine used by: 298// bool OSAtomicTestAndSet( uint32_t n, void *value ); 299// It is called with standard register conventions: 300// n = %rdi 301// value = %rsi 302// Returns: old value of bit in CF 303 304 .code64 305Lbit_test_and_set_mp_64: 306 lock 307 btsl %edi, (%rsi) 308 ret 309 310 COMMPAGE_DESCRIPTOR(bit_test_and_set_mp_64,_COMM_PAGE_BTS,0,kUP) 311 312 .code64 313Lbit_test_and_set_up_64: 314 btsl %edi, (%rsi) 315 ret 316 317 COMMPAGE_DESCRIPTOR(bit_test_and_set_up_64,_COMM_PAGE_BTS,kUP,0) 318 319// This is a subroutine used by: 320// bool OSAtomicTestAndClear( uint32_t n, void *value ); 321// It is called with standard register conventions: 322// n = %rdi 323// value = %rsi 324// Returns: old value of bit in CF 325 326 .code64 327Lbit_test_and_clear_mp_64: 328 lock 329 btrl %edi, (%rsi) 330 ret 331 332 COMMPAGE_DESCRIPTOR(bit_test_and_clear_mp_64,_COMM_PAGE_BTC,0,kUP) 333 334 .code64 335Lbit_test_and_clear_up_64: 336 btrl %edi, (%rsi) 337 ret 338 339 COMMPAGE_DESCRIPTOR(bit_test_and_clear_up_64,_COMM_PAGE_BTC,kUP,0) 340 341// This is a subroutine used by: 342// int32_t OSAtomicAdd32( int32_t amt, int32_t *value ); 343// It is called with standard register conventions: 344// amt = %rdi 345// value = %rsi 346// Returns: old value in %edi 347// NB: OSAtomicAdd32 returns the new value, so clients will add amt to %edi 348 349 .code64 350Latomic_add32_mp_64: 351 lock 352 xaddl %edi, (%rsi) 353 ret 354 355 COMMPAGE_DESCRIPTOR(atomic_add32_mp_64,_COMM_PAGE_ATOMIC_ADD32,0,kUP) 356 357 .code64 358Latomic_add32_up_64: 359 xaddl %edi, (%rsi) 360 ret 361 362 COMMPAGE_DESCRIPTOR(atomic_add32_up_64,_COMM_PAGE_ATOMIC_ADD32,kUP,0) 363 364// This is a subroutine used by: 365// int64_t OSAtomicAdd64( int64_t amt, int64_t *value ); 366// It is called with standard register conventions: 367// amt = %rdi 368// value = %rsi 369// Returns: old value in %rdi 370// NB: OSAtomicAdd64 returns the new value, so clients will add amt to %rdi 371 372 .code64 373Latomic_add64_mp_64: 374 lock 375 xaddq %rdi, (%rsi) 376 ret 377 378 COMMPAGE_DESCRIPTOR(atomic_add64_mp_64,_COMM_PAGE_ATOMIC_ADD64,0,kUP) 379 380 .code64 381Latomic_add64_up_64: 382 xaddq %rdi, (%rsi) 383 ret 384 385 COMMPAGE_DESCRIPTOR(atomic_add64_up_64,_COMM_PAGE_ATOMIC_ADD64,kUP,0) 386 387 388/* 389 * typedef volatile struct { 390 * void *opaque1; <-- ptr to 1st queue element or null 391 * long opaque2; <-- generation count 392 * } OSQueueHead; 393 * 394 * void OSAtomicEnqueue( OSQueueHead *list, void *new, size_t offset); 395 */ 396 397 .code64 398LAtomicEnqueue_64: // %rdi == list head, %rsi == new, %rdx == offset 399 pushq %rbx 400 movq %rsi,%rbx // %rbx == new 401 movq %rdx,%rsi // %rsi == offset 402 movq (%rdi),%rax // %rax == ptr to 1st element in Q 403 movq 8(%rdi),%rdx // %rdx == current generation count 4041: 405 movq %rax,(%rbx,%rsi)// link to old list head from new element 406 movq %rdx,%rcx 407 incq %rcx // increment generation count 408 lock // always lock for now... 409 cmpxchg16b (%rdi) // ...push on new element 410 jnz 1b 411 popq %rbx 412 ret 413 414 COMMPAGE_DESCRIPTOR(AtomicEnqueue_64,_COMM_PAGE_ENQUEUE,0,0) 415 416 417/* void* OSAtomicDequeue( OSQueueHead *list, size_t offset); */ 418 419 .code64 420LAtomicDequeue_64: // %rdi == list head, %rsi == offset 421 pushq %rbx 422 movq (%rdi),%rax // %rax == ptr to 1st element in Q 423 movq 8(%rdi),%rdx // %rdx == current generation count 4241: 425 testq %rax,%rax // list empty? 426 jz 2f // yes 427 movq (%rax,%rsi),%rbx // point to 2nd in Q 428 movq %rdx,%rcx 429 incq %rcx // increment generation count 430 lock // always lock for now... 431 cmpxchg16b (%rdi) // ...pop off 1st element 432 jnz 1b 4332: 434 popq %rbx 435 ret // ptr to 1st element in Q still in %rax 436 437 COMMPAGE_DESCRIPTOR(AtomicDequeue_64,_COMM_PAGE_DEQUEUE,0,0) 438