1/*- 2 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <stdint.h> 28#include <sys/cdefs.h> 29// __FBSDID("$FreeBSD: head/sys/arm/arm/stdatomic.c 255738 2013-09-20 20:44:32Z zbb $"); 30 31#define __SYNC_ATOMICS 32#define __strong_reference(sym,aliassym) \ 33 extern __typeof (sym) aliassym __attribute__ ((__alias__ (#sym))) 34 35#include <sys/param.h> 36#include <sys/types.h> 37 38#ifdef _KERNEL 39#include "opt_global.h" 40#endif 41 42/* 43 * Executing statements with interrupts disabled. 44 */ 45 46#if defined(_KERNEL) && !defined(SMP) 47#define WITHOUT_INTERRUPTS(s) do { \ 48 register_t regs; \ 49 \ 50 regs = intr_disable(); \ 51 do s while (0); \ 52 intr_restore(regs); \ 53} while (0) 54#endif /* _KERNEL && !SMP */ 55 56/* 57 * Memory barriers. 58 * 59 * It turns out __sync_synchronize() does not emit any code when used 60 * with GCC 4.2. Implement our own version that does work reliably. 61 * 62 * Although __sync_lock_test_and_set() should only perform an acquire 63 * barrier, make it do a full barrier like the other functions. This 64 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably. 65 */ 66 67#if defined(_KERNEL) && !defined(SMP) 68static inline void 69do_sync(void) 70{ 71 72 __asm volatile ("" : : : "memory"); 73} 74#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 75static inline void 76do_sync(void) 77{ 78 79 __asm volatile ("dmb" : : : "memory"); 80} 81#elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 82 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 83 defined(__ARM_ARCH_6ZK__) 84static inline void 85do_sync(void) 86{ 87 88 __asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory"); 89} 90#endif 91 92#if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS) 93 94/* 95 * New C11 __atomic_* API. 96 */ 97 98#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 99 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 100 defined(__ARM_ARCH_6ZK__) || \ 101 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 102 103/* These systems should be supported by the compiler. */ 104 105#else /* __ARM_ARCH_5__ */ 106 107/* Clang doesn't allow us to reimplement builtins without this. */ 108#ifdef __clang__ 109#pragma redefine_extname __sync_synchronize_ext __sync_synchronize 110#define __sync_synchronize __sync_synchronize_ext 111#endif 112 113void 114__sync_synchronize(void) 115{ 116} 117 118#ifdef _KERNEL 119 120#ifdef SMP 121#error "On SMP systems we should have proper atomic operations." 122#endif 123 124/* 125 * On uniprocessor systems, we can perform the atomic operations by 126 * disabling interrupts. 127 */ 128 129#define EMIT_LOAD_N(N, uintN_t) \ 130uintN_t \ 131__atomic_load_##N(uintN_t *mem, int model __unused) \ 132{ \ 133 uintN_t ret; \ 134 \ 135 WITHOUT_INTERRUPTS({ \ 136 ret = *mem; \ 137 }); \ 138 return (ret); \ 139} 140 141#define EMIT_STORE_N(N, uintN_t) \ 142void \ 143__atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 144{ \ 145 \ 146 WITHOUT_INTERRUPTS({ \ 147 *mem = val; \ 148 }); \ 149} 150 151#define EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 152_Bool \ 153__atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected, \ 154 uintN_t desired, int success __unused, int failure __unused) \ 155{ \ 156 _Bool ret; \ 157 \ 158 WITHOUT_INTERRUPTS({ \ 159 if (*mem == *expected) { \ 160 *mem = desired; \ 161 ret = 1; \ 162 } else { \ 163 *expected = *mem; \ 164 ret = 0; \ 165 } \ 166 }); \ 167 return (ret); \ 168} 169 170#define EMIT_FETCH_OP_N(N, uintN_t, name, op) \ 171uintN_t \ 172__atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 173{ \ 174 uintN_t ret; \ 175 \ 176 WITHOUT_INTERRUPTS({ \ 177 ret = *mem; \ 178 *mem op val; \ 179 }); \ 180 return (ret); \ 181} 182 183#define EMIT_ALL_OPS_N(N, uintN_t) \ 184EMIT_LOAD_N(N, uintN_t) \ 185EMIT_STORE_N(N, uintN_t) \ 186EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 187EMIT_FETCH_OP_N(N, uintN_t, exchange, =) \ 188EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=) \ 189EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=) \ 190EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=) \ 191EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=) \ 192EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=) 193 194EMIT_ALL_OPS_N(1, uint8_t) 195EMIT_ALL_OPS_N(2, uint16_t) 196EMIT_ALL_OPS_N(4, uint32_t) 197EMIT_ALL_OPS_N(8, uint64_t) 198#undef EMIT_ALL_OPS_N 199 200#else /* !_KERNEL */ 201 202/* 203 * For userspace on uniprocessor systems, we can implement the atomic 204 * operations by using a Restartable Atomic Sequence. This makes the 205 * kernel restart the code from the beginning when interrupted. 206 */ 207 208#define EMIT_LOAD_N(N, uintN_t) \ 209uintN_t \ 210__atomic_load_##N(uintN_t *mem, int model __unused) \ 211{ \ 212 \ 213 return (*mem); \ 214} 215 216#define EMIT_STORE_N(N, uintN_t) \ 217void \ 218__atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 219{ \ 220 \ 221 *mem = val; \ 222} 223 224#define EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 225uintN_t \ 226__atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused) \ 227{ \ 228 uint32_t old, temp, ras_start; \ 229 \ 230 ras_start = ARM_RAS_START; \ 231 __asm volatile ( \ 232 /* Set up Restartable Atomic Sequence. */ \ 233 "1:" \ 234 "\tadr %2, 1b\n" \ 235 "\tstr %2, [%5]\n" \ 236 "\tadr %2, 2f\n" \ 237 "\tstr %2, [%5, #4]\n" \ 238 \ 239 "\t"ldr" %0, %4\n" /* Load old value. */ \ 240 "\t"str" %3, %1\n" /* Store new value. */ \ 241 \ 242 /* Tear down Restartable Atomic Sequence. */ \ 243 "2:" \ 244 "\tmov %2, #0x00000000\n" \ 245 "\tstr %2, [%5]\n" \ 246 "\tmov %2, #0xffffffff\n" \ 247 "\tstr %2, [%5, #4]\n" \ 248 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 249 : "r" (val), "m" (*mem), "r" (ras_start)); \ 250 return (old); \ 251} 252 253#define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 254_Bool \ 255__atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected, \ 256 uintN_t desired, int success __unused, int failure __unused) \ 257{ \ 258 uint32_t expected, old, temp, ras_start; \ 259 \ 260 expected = *pexpected; \ 261 ras_start = ARM_RAS_START; \ 262 __asm volatile ( \ 263 /* Set up Restartable Atomic Sequence. */ \ 264 "1:" \ 265 "\tadr %2, 1b\n" \ 266 "\tstr %2, [%6]\n" \ 267 "\tadr %2, 2f\n" \ 268 "\tstr %2, [%6, #4]\n" \ 269 \ 270 "\t"ldr" %0, %5\n" /* Load old value. */ \ 271 "\tcmp %0, %3\n" /* Compare to expected value. */\ 272 "\t"streq" %4, %1\n" /* Store new value. */ \ 273 \ 274 /* Tear down Restartable Atomic Sequence. */ \ 275 "2:" \ 276 "\tmov %2, #0x00000000\n" \ 277 "\tstr %2, [%6]\n" \ 278 "\tmov %2, #0xffffffff\n" \ 279 "\tstr %2, [%6, #4]\n" \ 280 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 281 : "r" (expected), "r" (desired), "m" (*mem), \ 282 "r" (ras_start)); \ 283 if (old == expected) { \ 284 return (1); \ 285 } else { \ 286 *pexpected = old; \ 287 return (0); \ 288 } \ 289} 290 291#define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op) \ 292uintN_t \ 293__atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 294{ \ 295 uint32_t old, temp, ras_start; \ 296 \ 297 ras_start = ARM_RAS_START; \ 298 __asm volatile ( \ 299 /* Set up Restartable Atomic Sequence. */ \ 300 "1:" \ 301 "\tadr %2, 1b\n" \ 302 "\tstr %2, [%5]\n" \ 303 "\tadr %2, 2f\n" \ 304 "\tstr %2, [%5, #4]\n" \ 305 \ 306 "\t"ldr" %0, %4\n" /* Load old value. */ \ 307 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 308 "\t"str" %2, %1\n" /* Store new value. */ \ 309 \ 310 /* Tear down Restartable Atomic Sequence. */ \ 311 "2:" \ 312 "\tmov %2, #0x00000000\n" \ 313 "\tstr %2, [%5]\n" \ 314 "\tmov %2, #0xffffffff\n" \ 315 "\tstr %2, [%5, #4]\n" \ 316 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 317 : "r" (val), "m" (*mem), "r" (ras_start)); \ 318 return (old); \ 319} 320 321#define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 322EMIT_LOAD_N(N, uintN_t) \ 323EMIT_STORE_N(N, uintN_t) \ 324EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 325EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 326EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add") \ 327EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and") \ 328EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr") \ 329EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub") \ 330EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor") 331 332EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq") 333EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq") 334EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 335#undef EMIT_ALL_OPS_N 336 337#endif /* _KERNEL */ 338 339#endif 340 341#endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */ 342 343#if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS) 344 345#ifdef __clang__ 346#pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1 347#pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2 348#pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4 349#pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1 350#pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2 351#pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4 352#pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1 353#pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2 354#pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1 355#pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2 356#pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4 357#pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1 358#pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2 359#pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4 360#pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1 361#pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2 362#pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4 363#pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1 364#pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2 365#pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4 366#endif 367 368/* 369 * Old __sync_* API. 370 */ 371 372#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 373 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 374 defined(__ARM_ARCH_6ZK__) || \ 375 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 376 377/* Implementations for old GCC versions, lacking support for atomics. */ 378 379typedef union { 380 uint8_t v8[4]; 381 uint32_t v32; 382} reg_t; 383 384/* 385 * Given a memory address pointing to an 8-bit or 16-bit integer, return 386 * the address of the 32-bit word containing it. 387 */ 388 389static inline uint32_t * 390round_to_word(void *ptr) 391{ 392 393 return ((uint32_t *)((intptr_t)ptr & ~3)); 394} 395 396/* 397 * Utility functions for loading and storing 8-bit and 16-bit integers 398 * in 32-bit words at an offset corresponding with the location of the 399 * atomic variable. 400 */ 401 402static inline void 403put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val) 404{ 405 size_t offset; 406 407 offset = (intptr_t)offset_ptr & 3; 408 r->v8[offset] = val; 409} 410 411static inline uint8_t 412get_1(const reg_t *r, const uint8_t *offset_ptr) 413{ 414 size_t offset; 415 416 offset = (intptr_t)offset_ptr & 3; 417 return (r->v8[offset]); 418} 419 420static inline void 421put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val) 422{ 423 size_t offset; 424 union { 425 uint16_t in; 426 uint8_t out[2]; 427 } bytes; 428 429 offset = (intptr_t)offset_ptr & 3; 430 bytes.in = val; 431 r->v8[offset] = bytes.out[0]; 432 r->v8[offset + 1] = bytes.out[1]; 433} 434 435static inline uint16_t 436get_2(const reg_t *r, const uint16_t *offset_ptr) 437{ 438 size_t offset; 439 union { 440 uint8_t in[2]; 441 uint16_t out; 442 } bytes; 443 444 offset = (intptr_t)offset_ptr & 3; 445 bytes.in[0] = r->v8[offset]; 446 bytes.in[1] = r->v8[offset + 1]; 447 return (bytes.out); 448} 449 450/* 451 * 8-bit and 16-bit routines. 452 * 453 * These operations are not natively supported by the CPU, so we use 454 * some shifting and bitmasking on top of the 32-bit instructions. 455 */ 456 457#define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \ 458uintN_t \ 459__sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 460{ \ 461 uint32_t *mem32; \ 462 reg_t val32, negmask, old; \ 463 uint32_t temp1, temp2; \ 464 \ 465 mem32 = round_to_word(mem); \ 466 val32.v32 = 0x00000000; \ 467 put_##N(&val32, mem, val); \ 468 negmask.v32 = 0xffffffff; \ 469 put_##N(&negmask, mem, 0); \ 470 \ 471 do_sync(); \ 472 __asm volatile ( \ 473 "1:" \ 474 "\tldrex %0, %6\n" /* Load old value. */ \ 475 "\tand %2, %5, %0\n" /* Remove the old value. */ \ 476 "\torr %2, %2, %4\n" /* Put in the new value. */ \ 477 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 478 "\tcmp %3, #0\n" /* Did it succeed? */ \ 479 "\tbne 1b\n" /* Spin if failed. */ \ 480 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 481 "=&r" (temp2) \ 482 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \ 483 return (get_##N(&old, mem)); \ 484} 485 486EMIT_LOCK_TEST_AND_SET_N(1, uint8_t) 487EMIT_LOCK_TEST_AND_SET_N(2, uint16_t) 488 489#define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 490uintN_t \ 491__sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 492 uintN_t desired) \ 493{ \ 494 uint32_t *mem32; \ 495 reg_t expected32, desired32, posmask, old; \ 496 uint32_t negmask, temp1, temp2; \ 497 \ 498 mem32 = round_to_word(mem); \ 499 expected32.v32 = 0x00000000; \ 500 put_##N(&expected32, mem, expected); \ 501 desired32.v32 = 0x00000000; \ 502 put_##N(&desired32, mem, desired); \ 503 posmask.v32 = 0x00000000; \ 504 put_##N(&posmask, mem, ~0); \ 505 negmask = ~posmask.v32; \ 506 \ 507 do_sync(); \ 508 __asm volatile ( \ 509 "1:" \ 510 "\tldrex %0, %8\n" /* Load old value. */ \ 511 "\tand %2, %6, %0\n" /* Isolate the old value. */ \ 512 "\tcmp %2, %4\n" /* Compare to expected value. */\ 513 "\tbne 2f\n" /* Values are unequal. */ \ 514 "\tand %2, %7, %0\n" /* Remove the old value. */ \ 515 "\torr %2, %5\n" /* Put in the new value. */ \ 516 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 517 "\tcmp %3, #0\n" /* Did it succeed? */ \ 518 "\tbne 1b\n" /* Spin if failed. */ \ 519 "2:" \ 520 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \ 521 "=&r" (temp2) \ 522 : "r" (expected32.v32), "r" (desired32.v32), \ 523 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \ 524 return (get_##N(&old, mem)); \ 525} 526 527EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t) 528EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t) 529 530#define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \ 531uintN_t \ 532__sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 533{ \ 534 uint32_t *mem32; \ 535 reg_t val32, posmask, old; \ 536 uint32_t negmask, temp1, temp2; \ 537 \ 538 mem32 = round_to_word(mem); \ 539 val32.v32 = 0x00000000; \ 540 put_##N(&val32, mem, val); \ 541 posmask.v32 = 0x00000000; \ 542 put_##N(&posmask, mem, ~0); \ 543 negmask = ~posmask.v32; \ 544 \ 545 do_sync(); \ 546 __asm volatile ( \ 547 "1:" \ 548 "\tldrex %0, %7\n" /* Load old value. */ \ 549 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 550 "\tand %2, %5\n" /* Isolate the new value. */ \ 551 "\tand %3, %6, %0\n" /* Remove the old value. */ \ 552 "\torr %2, %2, %3\n" /* Put in the new value. */ \ 553 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 554 "\tcmp %3, #0\n" /* Did it succeed? */ \ 555 "\tbne 1b\n" /* Spin if failed. */ \ 556 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 557 "=&r" (temp2) \ 558 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \ 559 "m" (*mem32)); \ 560 return (get_##N(&old, mem)); \ 561} 562 563EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add") 564EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub") 565EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add") 566EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub") 567 568#define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \ 569uintN_t \ 570__sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 571{ \ 572 uint32_t *mem32; \ 573 reg_t val32, old; \ 574 uint32_t temp1, temp2; \ 575 \ 576 mem32 = round_to_word(mem); \ 577 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \ 578 put_##N(&val32, mem, val); \ 579 \ 580 do_sync(); \ 581 __asm volatile ( \ 582 "1:" \ 583 "\tldrex %0, %5\n" /* Load old value. */ \ 584 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \ 585 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 586 "\tcmp %3, #0\n" /* Did it succeed? */ \ 587 "\tbne 1b\n" /* Spin if failed. */ \ 588 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 589 "=&r" (temp2) \ 590 : "r" (val32.v32), "m" (*mem32)); \ 591 return (get_##N(&old, mem)); \ 592} 593 594EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1) 595EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0) 596EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0) 597EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1) 598EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0) 599EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0) 600 601/* 602 * 32-bit routines. 603 */ 604 605uint32_t 606__sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val) 607{ 608 uint32_t old, temp; 609 610 do_sync(); 611 __asm volatile ( 612 "1:" 613 "\tldrex %0, %4\n" /* Load old value. */ 614 "\tstrex %2, %3, %1\n" /* Attempt to store. */ 615 "\tcmp %2, #0\n" /* Did it succeed? */ 616 "\tbne 1b\n" /* Spin if failed. */ 617 : "=&r" (old), "=m" (*mem), "=&r" (temp) 618 : "r" (val), "m" (*mem)); 619 return (old); 620} 621 622uint32_t 623__sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected, 624 uint32_t desired) 625{ 626 uint32_t old, temp; 627 628 do_sync(); 629 __asm volatile ( 630 "1:" 631 "\tldrex %0, %5\n" /* Load old value. */ 632 "\tcmp %0, %3\n" /* Compare to expected value. */ 633 "\tbne 2f\n" /* Values are unequal. */ 634 "\tstrex %2, %4, %1\n" /* Attempt to store. */ 635 "\tcmp %2, #0\n" /* Did it succeed? */ 636 "\tbne 1b\n" /* Spin if failed. */ 637 "2:" 638 : "=&r" (old), "=m" (*mem), "=&r" (temp) 639 : "r" (expected), "r" (desired), "m" (*mem)); 640 return (old); 641} 642 643#define EMIT_FETCH_AND_OP_4(name, op) \ 644uint32_t \ 645__sync_##name##_4##_c(uint32_t *mem, uint32_t val) \ 646{ \ 647 uint32_t old, temp1, temp2; \ 648 \ 649 do_sync(); \ 650 __asm volatile ( \ 651 "1:" \ 652 "\tldrex %0, %5\n" /* Load old value. */ \ 653 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 654 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 655 "\tcmp %3, #0\n" /* Did it succeed? */ \ 656 "\tbne 1b\n" /* Spin if failed. */ \ 657 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \ 658 "=&r" (temp2) \ 659 : "r" (val), "m" (*mem)); \ 660 return (old); \ 661} 662 663EMIT_FETCH_AND_OP_4(fetch_and_and, "and") 664EMIT_FETCH_AND_OP_4(fetch_and_or, "orr") 665EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub") 666EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor") 667 668#ifndef __clang__ 669__strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 670__strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 671__strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 672__strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 673__strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 674__strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 675__strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 676__strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 677__strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 678__strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 679__strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 680__strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 681__strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 682__strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 683__strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 684__strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 685__strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 686__strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 687__strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 688__strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 689#endif 690 691#else /* __ARM_ARCH_5__ */ 692 693#ifdef _KERNEL 694 695#ifdef SMP 696#error "On SMP systems we should have proper atomic operations." 697#endif 698 699/* 700 * On uniprocessor systems, we can perform the atomic operations by 701 * disabling interrupts. 702 */ 703 704#define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 705uintN_t \ 706__sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ 707 uintN_t desired) \ 708{ \ 709 uintN_t ret; \ 710 \ 711 WITHOUT_INTERRUPTS({ \ 712 ret = *mem; \ 713 if (*mem == expected) \ 714 *mem = desired; \ 715 }); \ 716 return (ret); \ 717} 718 719#define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op) \ 720uintN_t \ 721__sync_##name##_##N(uintN_t *mem, uintN_t val) \ 722{ \ 723 uintN_t ret; \ 724 \ 725 WITHOUT_INTERRUPTS({ \ 726 ret = *mem; \ 727 *mem op val; \ 728 }); \ 729 return (ret); \ 730} 731 732#define EMIT_ALL_OPS_N(N, uintN_t) \ 733EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 734EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =) \ 735EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=) \ 736EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=) \ 737EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=) \ 738EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=) \ 739EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=) 740 741EMIT_ALL_OPS_N(1, uint8_t) 742EMIT_ALL_OPS_N(2, uint16_t) 743EMIT_ALL_OPS_N(4, uint32_t) 744EMIT_ALL_OPS_N(8, uint64_t) 745#undef EMIT_ALL_OPS_N 746 747#else /* !_KERNEL */ 748 749/* 750 * For userspace on uniprocessor systems, we can implement the atomic 751 * operations by using a Restartable Atomic Sequence. This makes the 752 * kernel restart the code from the beginning when interrupted. 753 */ 754 755#define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 756uintN_t \ 757__sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 758{ \ 759 uint32_t old, temp, ras_start; \ 760 \ 761 ras_start = ARM_RAS_START; \ 762 __asm volatile ( \ 763 /* Set up Restartable Atomic Sequence. */ \ 764 "1:" \ 765 "\tadr %2, 1b\n" \ 766 "\tstr %2, [%5]\n" \ 767 "\tadr %2, 2f\n" \ 768 "\tstr %2, [%5, #4]\n" \ 769 \ 770 "\t"ldr" %0, %4\n" /* Load old value. */ \ 771 "\t"str" %3, %1\n" /* Store new value. */ \ 772 \ 773 /* Tear down Restartable Atomic Sequence. */ \ 774 "2:" \ 775 "\tmov %2, #0x00000000\n" \ 776 "\tstr %2, [%5]\n" \ 777 "\tmov %2, #0xffffffff\n" \ 778 "\tstr %2, [%5, #4]\n" \ 779 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 780 : "r" (val), "m" (*mem), "r" (ras_start)); \ 781 return (old); \ 782} 783 784#define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 785uintN_t \ 786__sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 787 uintN_t desired) \ 788{ \ 789 uint32_t old, temp, ras_start; \ 790 \ 791 ras_start = ARM_RAS_START; \ 792 __asm volatile ( \ 793 /* Set up Restartable Atomic Sequence. */ \ 794 "1:" \ 795 "\tadr %2, 1b\n" \ 796 "\tstr %2, [%6]\n" \ 797 "\tadr %2, 2f\n" \ 798 "\tstr %2, [%6, #4]\n" \ 799 \ 800 "\t"ldr" %0, %5\n" /* Load old value. */ \ 801 "\tcmp %0, %3\n" /* Compare to expected value. */\ 802 "\t"streq" %4, %1\n" /* Store new value. */ \ 803 \ 804 /* Tear down Restartable Atomic Sequence. */ \ 805 "2:" \ 806 "\tmov %2, #0x00000000\n" \ 807 "\tstr %2, [%6]\n" \ 808 "\tmov %2, #0xffffffff\n" \ 809 "\tstr %2, [%6, #4]\n" \ 810 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 811 : "r" (expected), "r" (desired), "m" (*mem), \ 812 "r" (ras_start)); \ 813 return (old); \ 814} 815 816#define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op) \ 817uintN_t \ 818__sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 819{ \ 820 uint32_t old, temp, ras_start; \ 821 \ 822 ras_start = ARM_RAS_START; \ 823 __asm volatile ( \ 824 /* Set up Restartable Atomic Sequence. */ \ 825 "1:" \ 826 "\tadr %2, 1b\n" \ 827 "\tstr %2, [%5]\n" \ 828 "\tadr %2, 2f\n" \ 829 "\tstr %2, [%5, #4]\n" \ 830 \ 831 "\t"ldr" %0, %4\n" /* Load old value. */ \ 832 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 833 "\t"str" %2, %1\n" /* Store new value. */ \ 834 \ 835 /* Tear down Restartable Atomic Sequence. */ \ 836 "2:" \ 837 "\tmov %2, #0x00000000\n" \ 838 "\tstr %2, [%5]\n" \ 839 "\tmov %2, #0xffffffff\n" \ 840 "\tstr %2, [%5, #4]\n" \ 841 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 842 : "r" (val), "m" (*mem), "r" (ras_start)); \ 843 return (old); \ 844} 845 846#define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 847EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 848EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 849EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add") \ 850EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and") \ 851EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr") \ 852EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub") \ 853EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor") 854 855EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb") 856EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh") 857EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 858 859#ifndef __clang__ 860__strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 861__strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 862__strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 863__strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 864__strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 865__strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 866__strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 867__strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 868__strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 869__strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 870__strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 871__strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 872__strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 873__strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 874__strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 875__strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 876__strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 877__strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 878__strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 879__strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 880#endif 881 882#endif /* _KERNEL */ 883 884#endif 885 886#endif /* __SYNC_ATOMICS */ 887