stdatomic.c revision 255092
1/*- 2 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/arm/arm/stdatomic.c 255092 2013-08-31 08:50:45Z theraven $"); 29 30#include <sys/param.h> 31#include <sys/stdatomic.h> 32#include <sys/types.h> 33 34#include <machine/cpufunc.h> 35#include <machine/sysarch.h> 36 37#ifdef _KERNEL 38#include "opt_global.h" 39#endif 40 41/* 42 * Executing statements with interrupts disabled. 43 */ 44 45#if defined(_KERNEL) && !defined(SMP) 46#define WITHOUT_INTERRUPTS(s) do { \ 47 register_t regs; \ 48 \ 49 regs = intr_disable(); \ 50 do s while (0); \ 51 intr_restore(regs); \ 52} while (0) 53#endif /* _KERNEL && !SMP */ 54 55/* 56 * Memory barriers. 57 * 58 * It turns out __sync_synchronize() does not emit any code when used 59 * with GCC 4.2. Implement our own version that does work reliably. 60 * 61 * Although __sync_lock_test_and_set() should only perform an acquire 62 * barrier, make it do a full barrier like the other functions. This 63 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably. 64 */ 65 66#if defined(_KERNEL) && !defined(SMP) 67static inline void 68do_sync(void) 69{ 70 71 __asm volatile ("" : : : "memory"); 72} 73#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 74static inline void 75do_sync(void) 76{ 77 78 __asm volatile ("dmb" : : : "memory"); 79} 80#elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 81 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 82 defined(__ARM_ARCH_6ZK__) 83static inline void 84do_sync(void) 85{ 86 87 __asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory"); 88} 89#endif 90 91#if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS) 92 93/* 94 * New C11 __atomic_* API. 95 */ 96 97#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 98 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 99 defined(__ARM_ARCH_6ZK__) || \ 100 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 101 102/* These systems should be supported by the compiler. */ 103 104#else /* __ARM_ARCH_5__ */ 105 106/* Clang doesn't allow us to reimplement builtins without this. */ 107#ifdef __clang__ 108#pragma redefine_extname __sync_synchronize_ext __sync_synchronize 109#define __sync_synchronize __sync_synchronize_ext 110#endif 111 112void 113__sync_synchronize(void) 114{ 115} 116 117#ifdef _KERNEL 118 119#ifdef SMP 120#error "On SMP systems we should have proper atomic operations." 121#endif 122 123/* 124 * On uniprocessor systems, we can perform the atomic operations by 125 * disabling interrupts. 126 */ 127 128#define EMIT_LOAD_N(N, uintN_t) \ 129uintN_t \ 130__atomic_load_##N(uintN_t *mem, int model __unused) \ 131{ \ 132 uintN_t ret; \ 133 \ 134 WITHOUT_INTERRUPTS({ \ 135 ret = *mem; \ 136 }); \ 137 return (ret); \ 138} 139 140#define EMIT_STORE_N(N, uintN_t) \ 141void \ 142__atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 143{ \ 144 \ 145 WITHOUT_INTERRUPTS({ \ 146 *mem = val; \ 147 }); \ 148} 149 150#define EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 151_Bool \ 152__atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected, \ 153 uintN_t desired, int success __unused, int failure __unused) \ 154{ \ 155 _Bool ret; \ 156 \ 157 WITHOUT_INTERRUPTS({ \ 158 if (*mem == *expected) { \ 159 *mem = desired; \ 160 ret = 1; \ 161 } else { \ 162 *expected = *mem; \ 163 ret = 0; \ 164 } \ 165 }); \ 166 return (ret); \ 167} 168 169#define EMIT_FETCH_OP_N(N, uintN_t, name, op) \ 170uintN_t \ 171__atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 172{ \ 173 uintN_t ret; \ 174 \ 175 WITHOUT_INTERRUPTS({ \ 176 ret = *mem; \ 177 *mem op val; \ 178 }); \ 179 return (ret); \ 180} 181 182#define EMIT_ALL_OPS_N(N, uintN_t) \ 183EMIT_LOAD_N(N, uintN_t) \ 184EMIT_STORE_N(N, uintN_t) \ 185EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 186EMIT_FETCH_OP_N(N, uintN_t, exchange, =) \ 187EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=) \ 188EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=) \ 189EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=) \ 190EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=) \ 191EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=) 192 193EMIT_ALL_OPS_N(1, uint8_t) 194EMIT_ALL_OPS_N(2, uint16_t) 195EMIT_ALL_OPS_N(4, uint32_t) 196EMIT_ALL_OPS_N(8, uint64_t) 197#undef EMIT_ALL_OPS_N 198 199#else /* !_KERNEL */ 200 201/* 202 * For userspace on uniprocessor systems, we can implement the atomic 203 * operations by using a Restartable Atomic Sequence. This makes the 204 * kernel restart the code from the beginning when interrupted. 205 */ 206 207#define EMIT_LOAD_N(N, uintN_t) \ 208uintN_t \ 209__atomic_load_##N(uintN_t *mem, int model __unused) \ 210{ \ 211 \ 212 return (*mem); \ 213} 214 215#define EMIT_STORE_N(N, uintN_t) \ 216void \ 217__atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 218{ \ 219 \ 220 *mem = val; \ 221} 222 223#define EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 224uintN_t \ 225__atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused) \ 226{ \ 227 uint32_t old, temp, ras_start; \ 228 \ 229 ras_start = ARM_RAS_START; \ 230 __asm volatile ( \ 231 /* Set up Restartable Atomic Sequence. */ \ 232 "1:" \ 233 "\tadr %2, 1b\n" \ 234 "\tstr %2, [%5]\n" \ 235 "\tadr %2, 2f\n" \ 236 "\tstr %2, [%5, #4]\n" \ 237 \ 238 "\t"ldr" %0, %4\n" /* Load old value. */ \ 239 "\t"str" %3, %1\n" /* Store new value. */ \ 240 \ 241 /* Tear down Restartable Atomic Sequence. */ \ 242 "2:" \ 243 "\tmov %2, #0x00000000\n" \ 244 "\tstr %2, [%5]\n" \ 245 "\tmov %2, #0xffffffff\n" \ 246 "\tstr %2, [%5, #4]\n" \ 247 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 248 : "r" (val), "m" (*mem), "r" (ras_start)); \ 249 return (old); \ 250} 251 252#define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 253_Bool \ 254__atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected, \ 255 uintN_t desired, int success __unused, int failure __unused) \ 256{ \ 257 uint32_t expected, old, temp, ras_start; \ 258 \ 259 expected = *pexpected; \ 260 ras_start = ARM_RAS_START; \ 261 __asm volatile ( \ 262 /* Set up Restartable Atomic Sequence. */ \ 263 "1:" \ 264 "\tadr %2, 1b\n" \ 265 "\tstr %2, [%6]\n" \ 266 "\tadr %2, 2f\n" \ 267 "\tstr %2, [%6, #4]\n" \ 268 \ 269 "\t"ldr" %0, %5\n" /* Load old value. */ \ 270 "\tcmp %0, %3\n" /* Compare to expected value. */\ 271 "\t"streq" %4, %1\n" /* Store new value. */ \ 272 \ 273 /* Tear down Restartable Atomic Sequence. */ \ 274 "2:" \ 275 "\tmov %2, #0x00000000\n" \ 276 "\tstr %2, [%6]\n" \ 277 "\tmov %2, #0xffffffff\n" \ 278 "\tstr %2, [%6, #4]\n" \ 279 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 280 : "r" (expected), "r" (desired), "m" (*mem), \ 281 "r" (ras_start)); \ 282 if (old == expected) { \ 283 return (1); \ 284 } else { \ 285 *pexpected = old; \ 286 return (0); \ 287 } \ 288} 289 290#define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op) \ 291uintN_t \ 292__atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 293{ \ 294 uint32_t old, temp, ras_start; \ 295 \ 296 ras_start = ARM_RAS_START; \ 297 __asm volatile ( \ 298 /* Set up Restartable Atomic Sequence. */ \ 299 "1:" \ 300 "\tadr %2, 1b\n" \ 301 "\tstr %2, [%5]\n" \ 302 "\tadr %2, 2f\n" \ 303 "\tstr %2, [%5, #4]\n" \ 304 \ 305 "\t"ldr" %0, %4\n" /* Load old value. */ \ 306 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 307 "\t"str" %2, %1\n" /* Store new value. */ \ 308 \ 309 /* Tear down Restartable Atomic Sequence. */ \ 310 "2:" \ 311 "\tmov %2, #0x00000000\n" \ 312 "\tstr %2, [%5]\n" \ 313 "\tmov %2, #0xffffffff\n" \ 314 "\tstr %2, [%5, #4]\n" \ 315 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 316 : "r" (val), "m" (*mem), "r" (ras_start)); \ 317 return (old); \ 318} 319 320#define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 321EMIT_LOAD_N(N, uintN_t) \ 322EMIT_STORE_N(N, uintN_t) \ 323EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 324EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 325EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add") \ 326EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and") \ 327EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr") \ 328EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub") \ 329EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor") 330 331EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq") 332EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq") 333EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 334#undef EMIT_ALL_OPS_N 335 336#endif /* _KERNEL */ 337 338#endif 339 340#endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */ 341 342#if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS) 343 344#ifdef __clang__ 345#pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1 346#pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2 347#pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4 348#pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1 349#pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2 350#pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4 351#pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1 352#pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2 353#pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4 354#pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1 355#pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2 356#pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4 357#pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1 358#pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2 359#pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4 360#pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1 361#pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2 362#pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4 363#pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1 364#pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2 365#pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4 366#endif 367 368/* 369 * Old __sync_* API. 370 */ 371 372#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 373 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 374 defined(__ARM_ARCH_6ZK__) || \ 375 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 376 377/* Implementations for old GCC versions, lacking support for atomics. */ 378 379typedef union { 380 uint8_t v8[4]; 381 uint32_t v32; 382} reg_t; 383 384/* 385 * Given a memory address pointing to an 8-bit or 16-bit integer, return 386 * the address of the 32-bit word containing it. 387 */ 388 389static inline uint32_t * 390round_to_word(void *ptr) 391{ 392 393 return ((uint32_t *)((intptr_t)ptr & ~3)); 394} 395 396/* 397 * Utility functions for loading and storing 8-bit and 16-bit integers 398 * in 32-bit words at an offset corresponding with the location of the 399 * atomic variable. 400 */ 401 402static inline void 403put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val) 404{ 405 size_t offset; 406 407 offset = (intptr_t)offset_ptr & 3; 408 r->v8[offset] = val; 409} 410 411static inline uint8_t 412get_1(const reg_t *r, const uint8_t *offset_ptr) 413{ 414 size_t offset; 415 416 offset = (intptr_t)offset_ptr & 3; 417 return (r->v8[offset]); 418} 419 420static inline void 421put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val) 422{ 423 size_t offset; 424 union { 425 uint16_t in; 426 uint8_t out[2]; 427 } bytes; 428 429 offset = (intptr_t)offset_ptr & 3; 430 bytes.in = val; 431 r->v8[offset] = bytes.out[0]; 432 r->v8[offset + 1] = bytes.out[1]; 433} 434 435static inline uint16_t 436get_2(const reg_t *r, const uint16_t *offset_ptr) 437{ 438 size_t offset; 439 union { 440 uint8_t in[2]; 441 uint16_t out; 442 } bytes; 443 444 offset = (intptr_t)offset_ptr & 3; 445 bytes.in[0] = r->v8[offset]; 446 bytes.in[1] = r->v8[offset + 1]; 447 return (bytes.out); 448} 449 450/* 451 * 8-bit and 16-bit routines. 452 * 453 * These operations are not natively supported by the CPU, so we use 454 * some shifting and bitmasking on top of the 32-bit instructions. 455 */ 456 457#define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \ 458uintN_t \ 459__sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 460{ \ 461 uint32_t *mem32; \ 462 reg_t val32, negmask, old; \ 463 uint32_t temp1, temp2; \ 464 \ 465 mem32 = round_to_word(mem); \ 466 val32.v32 = 0x00000000; \ 467 put_##N(&val32, mem, val); \ 468 negmask.v32 = 0xffffffff; \ 469 put_##N(&negmask, mem, 0); \ 470 \ 471 do_sync(); \ 472 __asm volatile ( \ 473 "1:" \ 474 "\tldrex %0, %6\n" /* Load old value. */ \ 475 "\tand %2, %5, %0\n" /* Remove the old value. */ \ 476 "\torr %2, %2, %4\n" /* Put in the new value. */ \ 477 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 478 "\tcmp %3, #0\n" /* Did it succeed? */ \ 479 "\tbne 1b\n" /* Spin if failed. */ \ 480 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 481 "=&r" (temp2) \ 482 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \ 483 return (get_##N(&old, mem)); \ 484} 485 486EMIT_LOCK_TEST_AND_SET_N(1, uint8_t) 487EMIT_LOCK_TEST_AND_SET_N(2, uint16_t) 488 489#define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 490uintN_t \ 491__sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 492 uintN_t desired) \ 493{ \ 494 uint32_t *mem32; \ 495 reg_t expected32, desired32, posmask, old; \ 496 uint32_t negmask, temp1, temp2; \ 497 \ 498 mem32 = round_to_word(mem); \ 499 expected32.v32 = 0x00000000; \ 500 put_##N(&expected32, mem, expected); \ 501 desired32.v32 = 0x00000000; \ 502 put_##N(&desired32, mem, desired); \ 503 posmask.v32 = 0x00000000; \ 504 put_##N(&posmask, mem, ~0); \ 505 negmask = ~posmask.v32; \ 506 \ 507 do_sync(); \ 508 __asm volatile ( \ 509 "1:" \ 510 "\tldrex %0, %8\n" /* Load old value. */ \ 511 "\tand %2, %6, %0\n" /* Isolate the old value. */ \ 512 "\tcmp %2, %4\n" /* Compare to expected value. */\ 513 "\tbne 2f\n" /* Values are unequal. */ \ 514 "\tand %2, %7, %0\n" /* Remove the old value. */ \ 515 "\torr %2, %5\n" /* Put in the new value. */ \ 516 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 517 "\tcmp %3, #0\n" /* Did it succeed? */ \ 518 "\tbne 1b\n" /* Spin if failed. */ \ 519 "2:" \ 520 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \ 521 "=&r" (temp2) \ 522 : "r" (expected32.v32), "r" (desired32.v32), \ 523 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \ 524 return (get_##N(&old, mem)); \ 525} 526 527EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t) 528EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t) 529 530#define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \ 531uintN_t \ 532__sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 533{ \ 534 uint32_t *mem32; \ 535 reg_t val32, posmask, old; \ 536 uint32_t negmask, temp1, temp2; \ 537 \ 538 mem32 = round_to_word(mem); \ 539 val32.v32 = 0x00000000; \ 540 put_##N(&val32, mem, val); \ 541 posmask.v32 = 0x00000000; \ 542 put_##N(&posmask, mem, ~0); \ 543 negmask = ~posmask.v32; \ 544 \ 545 do_sync(); \ 546 __asm volatile ( \ 547 "1:" \ 548 "\tldrex %0, %7\n" /* Load old value. */ \ 549 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 550 "\tand %2, %5\n" /* Isolate the new value. */ \ 551 "\tand %3, %6, %0\n" /* Remove the old value. */ \ 552 "\torr %2, %2, %3\n" /* Put in the new value. */ \ 553 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 554 "\tcmp %3, #0\n" /* Did it succeed? */ \ 555 "\tbne 1b\n" /* Spin if failed. */ \ 556 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 557 "=&r" (temp2) \ 558 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \ 559 "m" (*mem32)); \ 560 return (get_##N(&old, mem)); \ 561} 562 563EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add") 564EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub") 565EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add") 566EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub") 567 568#define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \ 569uintN_t \ 570__sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 571{ \ 572 uint32_t *mem32; \ 573 reg_t val32, old; \ 574 uint32_t temp1, temp2; \ 575 \ 576 mem32 = round_to_word(mem); \ 577 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \ 578 put_##N(&val32, mem, val); \ 579 \ 580 do_sync(); \ 581 __asm volatile ( \ 582 "1:" \ 583 "\tldrex %0, %5\n" /* Load old value. */ \ 584 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \ 585 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 586 "\tcmp %3, #0\n" /* Did it succeed? */ \ 587 "\tbne 1b\n" /* Spin if failed. */ \ 588 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 589 "=&r" (temp2) \ 590 : "r" (val32.v32), "m" (*mem32)); \ 591 return (get_##N(&old, mem)); \ 592} 593 594EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1) 595EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0) 596EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0) 597EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1) 598EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0) 599EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0) 600 601/* 602 * 32-bit routines. 603 */ 604 605uint32_t 606__sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val) 607{ 608 uint32_t old, temp; 609 610 do_sync(); 611 __asm volatile ( 612 "1:" 613 "\tldrex %0, %4\n" /* Load old value. */ 614 "\tstrex %2, %3, %1\n" /* Attempt to store. */ 615 "\tcmp %2, #0\n" /* Did it succeed? */ 616 "\tbne 1b\n" /* Spin if failed. */ 617 : "=&r" (old), "=m" (*mem), "=&r" (temp) 618 : "r" (val), "m" (*mem)); 619 return (old); 620} 621 622uint32_t 623__sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected, 624 uint32_t desired) 625{ 626 uint32_t old, temp; 627 628 do_sync(); 629 __asm volatile ( 630 "1:" 631 "\tldrex %0, %5\n" /* Load old value. */ 632 "\tcmp %0, %3\n" /* Compare to expected value. */ 633 "\tbne 2f\n" /* Values are unequal. */ 634 "\tstrex %2, %4, %1\n" /* Attempt to store. */ 635 "\tcmp %2, #0\n" /* Did it succeed? */ 636 "\tbne 1b\n" /* Spin if failed. */ 637 "2:" 638 : "=&r" (old), "=m" (*mem), "=&r" (temp) 639 : "r" (expected), "r" (desired), "m" (*mem)); 640 return (old); 641} 642 643#define EMIT_FETCH_AND_OP_4(name, op) \ 644uint32_t \ 645__sync_##name##_4##_c(uint32_t *mem, uint32_t val) \ 646{ \ 647 uint32_t old, temp1, temp2; \ 648 \ 649 do_sync(); \ 650 __asm volatile ( \ 651 "1:" \ 652 "\tldrex %0, %5\n" /* Load old value. */ \ 653 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 654 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 655 "\tcmp %3, #0\n" /* Did it succeed? */ \ 656 "\tbne 1b\n" /* Spin if failed. */ \ 657 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \ 658 "=&r" (temp2) \ 659 : "r" (val), "m" (*mem)); \ 660 return (old); \ 661} 662 663EMIT_FETCH_AND_OP_4(fetch_and_add, "add") 664EMIT_FETCH_AND_OP_4(fetch_and_and, "and") 665EMIT_FETCH_AND_OP_4(fetch_and_or, "orr") 666EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub") 667EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor") 668 669#else /* __ARM_ARCH_5__ */ 670 671#ifdef _KERNEL 672 673#ifdef SMP 674#error "On SMP systems we should have proper atomic operations." 675#endif 676 677/* 678 * On uniprocessor systems, we can perform the atomic operations by 679 * disabling interrupts. 680 */ 681 682#define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 683uintN_t \ 684__sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ 685 uintN_t desired) \ 686{ \ 687 uintN_t ret; \ 688 \ 689 WITHOUT_INTERRUPTS({ \ 690 ret = *mem; \ 691 if (*mem == expected) \ 692 *mem = desired; \ 693 }); \ 694 return (ret); \ 695} 696 697#define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op) \ 698uintN_t \ 699__sync_##name##_##N(uintN_t *mem, uintN_t val) \ 700{ \ 701 uintN_t ret; \ 702 \ 703 WITHOUT_INTERRUPTS({ \ 704 ret = *mem; \ 705 *mem op val; \ 706 }); \ 707 return (ret); \ 708} 709 710#define EMIT_ALL_OPS_N(N, uintN_t) \ 711EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 712EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =) \ 713EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=) \ 714EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=) \ 715EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=) \ 716EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=) \ 717EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=) 718 719EMIT_ALL_OPS_N(1, uint8_t) 720EMIT_ALL_OPS_N(2, uint16_t) 721EMIT_ALL_OPS_N(4, uint32_t) 722EMIT_ALL_OPS_N(8, uint64_t) 723#undef EMIT_ALL_OPS_N 724 725#else /* !_KERNEL */ 726 727/* 728 * For userspace on uniprocessor systems, we can implement the atomic 729 * operations by using a Restartable Atomic Sequence. This makes the 730 * kernel restart the code from the beginning when interrupted. 731 */ 732 733#define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 734uintN_t \ 735__sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 736{ \ 737 uint32_t old, temp, ras_start; \ 738 \ 739 ras_start = ARM_RAS_START; \ 740 __asm volatile ( \ 741 /* Set up Restartable Atomic Sequence. */ \ 742 "1:" \ 743 "\tadr %2, 1b\n" \ 744 "\tstr %2, [%5]\n" \ 745 "\tadr %2, 2f\n" \ 746 "\tstr %2, [%5, #4]\n" \ 747 \ 748 "\t"ldr" %0, %4\n" /* Load old value. */ \ 749 "\t"str" %3, %1\n" /* Store new value. */ \ 750 \ 751 /* Tear down Restartable Atomic Sequence. */ \ 752 "2:" \ 753 "\tmov %2, #0x00000000\n" \ 754 "\tstr %2, [%5]\n" \ 755 "\tmov %2, #0xffffffff\n" \ 756 "\tstr %2, [%5, #4]\n" \ 757 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 758 : "r" (val), "m" (*mem), "r" (ras_start)); \ 759 return (old); \ 760} 761 762#define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 763uintN_t \ 764__sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 765 uintN_t desired) \ 766{ \ 767 uint32_t old, temp, ras_start; \ 768 \ 769 ras_start = ARM_RAS_START; \ 770 __asm volatile ( \ 771 /* Set up Restartable Atomic Sequence. */ \ 772 "1:" \ 773 "\tadr %2, 1b\n" \ 774 "\tstr %2, [%6]\n" \ 775 "\tadr %2, 2f\n" \ 776 "\tstr %2, [%6, #4]\n" \ 777 \ 778 "\t"ldr" %0, %5\n" /* Load old value. */ \ 779 "\tcmp %0, %3\n" /* Compare to expected value. */\ 780 "\t"streq" %4, %1\n" /* Store new value. */ \ 781 \ 782 /* Tear down Restartable Atomic Sequence. */ \ 783 "2:" \ 784 "\tmov %2, #0x00000000\n" \ 785 "\tstr %2, [%6]\n" \ 786 "\tmov %2, #0xffffffff\n" \ 787 "\tstr %2, [%6, #4]\n" \ 788 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 789 : "r" (expected), "r" (desired), "m" (*mem), \ 790 "r" (ras_start)); \ 791 return (old); \ 792} 793 794#define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op) \ 795uintN_t \ 796__sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 797{ \ 798 uint32_t old, temp, ras_start; \ 799 \ 800 ras_start = ARM_RAS_START; \ 801 __asm volatile ( \ 802 /* Set up Restartable Atomic Sequence. */ \ 803 "1:" \ 804 "\tadr %2, 1b\n" \ 805 "\tstr %2, [%5]\n" \ 806 "\tadr %2, 2f\n" \ 807 "\tstr %2, [%5, #4]\n" \ 808 \ 809 "\t"ldr" %0, %4\n" /* Load old value. */ \ 810 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 811 "\t"str" %2, %1\n" /* Store new value. */ \ 812 \ 813 /* Tear down Restartable Atomic Sequence. */ \ 814 "2:" \ 815 "\tmov %2, #0x00000000\n" \ 816 "\tstr %2, [%5]\n" \ 817 "\tmov %2, #0xffffffff\n" \ 818 "\tstr %2, [%5, #4]\n" \ 819 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 820 : "r" (val), "m" (*mem), "r" (ras_start)); \ 821 return (old); \ 822} 823 824#define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 825EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 826EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 827EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add") \ 828EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and") \ 829EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr") \ 830EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub") \ 831EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor") 832 833EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb") 834EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh") 835EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 836 837#ifndef __clang__ 838__strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 839__strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 840__strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 841__strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 842__strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 843__strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 844__strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 845__strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 846__strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); 847__strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 848__strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 849__strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 850__strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 851__strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 852__strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 853__strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 854__strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 855__strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 856__strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 857__strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 858__strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 859#endif 860 861#endif /* _KERNEL */ 862 863#endif 864 865#endif /* __SYNC_ATOMICS */ 866