1/*- 2 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD$"); 29 30#include <sys/param.h> 31#include <sys/stdatomic.h> 32#include <sys/types.h> 33 34#include <machine/cpufunc.h> 35#include <machine/sysarch.h> 36 37#ifdef _KERNEL 38#include "opt_global.h" 39#endif 40 41/* 42 * Executing statements with interrupts disabled. 43 */ 44 45#if defined(_KERNEL) && !defined(SMP) 46#define WITHOUT_INTERRUPTS(s) do { \ 47 register_t regs; \ 48 \ 49 regs = intr_disable(); \ 50 do s while (0); \ 51 intr_restore(regs); \ 52} while (0) 53#endif /* _KERNEL && !SMP */ 54 55/* 56 * Memory barriers. 57 * 58 * It turns out __sync_synchronize() does not emit any code when used 59 * with GCC 4.2. Implement our own version that does work reliably. 60 * 61 * Although __sync_lock_test_and_set() should only perform an acquire 62 * barrier, make it do a full barrier like the other functions. This 63 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably. 64 */ 65 66#if defined(_KERNEL) && !defined(SMP) 67static inline void 68do_sync(void) 69{ 70 71 __asm volatile ("" : : : "memory"); 72} 73#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 74static inline void 75do_sync(void) 76{ 77 78 __asm volatile ("dmb" : : : "memory"); 79} 80#elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 81 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 82 defined(__ARM_ARCH_6ZK__) 83static inline void 84do_sync(void) 85{ 86 87 __asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory"); 88} 89#endif 90 91#if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS) 92 93/* 94 * New C11 __atomic_* API. 95 */ 96 97#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 98 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 99 defined(__ARM_ARCH_6ZK__) || \ 100 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 101 102/* These systems should be supported by the compiler. */ 103 104#else /* __ARM_ARCH_5__ */ 105 106/* Clang doesn't allow us to reimplement builtins without this. */ 107#ifdef __clang__ 108#pragma redefine_extname __sync_synchronize_ext __sync_synchronize 109#define __sync_synchronize __sync_synchronize_ext 110#endif 111 112void 113__sync_synchronize(void) 114{ 115} 116 117#ifdef _KERNEL 118 119#ifdef SMP 120#error "On SMP systems we should have proper atomic operations." 121#endif 122 123/* 124 * On uniprocessor systems, we can perform the atomic operations by 125 * disabling interrupts. 126 */ 127 128#define EMIT_LOAD_N(N, uintN_t) \ 129uintN_t \ 130__atomic_load_##N(uintN_t *mem, int model __unused) \ 131{ \ 132 uintN_t ret; \ 133 \ 134 WITHOUT_INTERRUPTS({ \ 135 ret = *mem; \ 136 }); \ 137 return (ret); \ 138} 139 140#define EMIT_STORE_N(N, uintN_t) \ 141void \ 142__atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 143{ \ 144 \ 145 WITHOUT_INTERRUPTS({ \ 146 *mem = val; \ 147 }); \ 148} 149 150#define EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 151_Bool \ 152__atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected, \ 153 uintN_t desired, int success __unused, int failure __unused) \ 154{ \ 155 _Bool ret; \ 156 \ 157 WITHOUT_INTERRUPTS({ \ 158 if (*mem == *expected) { \ 159 *mem = desired; \ 160 ret = 1; \ 161 } else { \ 162 *expected = *mem; \ 163 ret = 0; \ 164 } \ 165 }); \ 166 return (ret); \ 167} 168 169#define EMIT_FETCH_OP_N(N, uintN_t, name, op) \ 170uintN_t \ 171__atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 172{ \ 173 uintN_t ret; \ 174 \ 175 WITHOUT_INTERRUPTS({ \ 176 ret = *mem; \ 177 *mem op val; \ 178 }); \ 179 return (ret); \ 180} 181 182#define EMIT_ALL_OPS_N(N, uintN_t) \ 183EMIT_LOAD_N(N, uintN_t) \ 184EMIT_STORE_N(N, uintN_t) \ 185EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 186EMIT_FETCH_OP_N(N, uintN_t, exchange, =) \ 187EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=) \ 188EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=) \ 189EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=) \ 190EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=) \ 191EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=) 192 193EMIT_ALL_OPS_N(1, uint8_t) 194EMIT_ALL_OPS_N(2, uint16_t) 195EMIT_ALL_OPS_N(4, uint32_t) 196EMIT_ALL_OPS_N(8, uint64_t) 197#undef EMIT_ALL_OPS_N 198 199#else /* !_KERNEL */ 200 201/* 202 * For userspace on uniprocessor systems, we can implement the atomic 203 * operations by using a Restartable Atomic Sequence. This makes the 204 * kernel restart the code from the beginning when interrupted. 205 */ 206 207#define EMIT_LOAD_N(N, uintN_t) \ 208uintN_t \ 209__atomic_load_##N(uintN_t *mem, int model __unused) \ 210{ \ 211 \ 212 return (*mem); \ 213} 214 215#define EMIT_STORE_N(N, uintN_t) \ 216void \ 217__atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 218{ \ 219 \ 220 *mem = val; \ 221} 222 223#define EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 224uintN_t \ 225__atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused) \ 226{ \ 227 uint32_t old, temp, ras_start; \ 228 \ 229 ras_start = ARM_RAS_START; \ 230 __asm volatile ( \ 231 /* Set up Restartable Atomic Sequence. */ \ 232 "1:" \ 233 "\tadr %2, 1b\n" \ 234 "\tstr %2, [%5]\n" \ 235 "\tadr %2, 2f\n" \ 236 "\tstr %2, [%5, #4]\n" \ 237 \ 238 "\t"ldr" %0, %4\n" /* Load old value. */ \ 239 "\t"str" %3, %1\n" /* Store new value. */ \ 240 \ 241 /* Tear down Restartable Atomic Sequence. */ \ 242 "2:" \ 243 "\tmov %2, #0x00000000\n" \ 244 "\tstr %2, [%5]\n" \ 245 "\tmov %2, #0xffffffff\n" \ 246 "\tstr %2, [%5, #4]\n" \ 247 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 248 : "r" (val), "m" (*mem), "r" (ras_start)); \ 249 return (old); \ 250} 251 252#define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 253_Bool \ 254__atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected, \ 255 uintN_t desired, int success __unused, int failure __unused) \ 256{ \ 257 uint32_t expected, old, temp, ras_start; \ 258 \ 259 expected = *pexpected; \ 260 ras_start = ARM_RAS_START; \ 261 __asm volatile ( \ 262 /* Set up Restartable Atomic Sequence. */ \ 263 "1:" \ 264 "\tadr %2, 1b\n" \ 265 "\tstr %2, [%6]\n" \ 266 "\tadr %2, 2f\n" \ 267 "\tstr %2, [%6, #4]\n" \ 268 \ 269 "\t"ldr" %0, %5\n" /* Load old value. */ \ 270 "\tcmp %0, %3\n" /* Compare to expected value. */\ 271 "\t"streq" %4, %1\n" /* Store new value. */ \ 272 \ 273 /* Tear down Restartable Atomic Sequence. */ \ 274 "2:" \ 275 "\tmov %2, #0x00000000\n" \ 276 "\tstr %2, [%6]\n" \ 277 "\tmov %2, #0xffffffff\n" \ 278 "\tstr %2, [%6, #4]\n" \ 279 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 280 : "r" (expected), "r" (desired), "m" (*mem), \ 281 "r" (ras_start)); \ 282 if (old == expected) { \ 283 return (1); \ 284 } else { \ 285 *pexpected = old; \ 286 return (0); \ 287 } \ 288} 289 290#define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op) \ 291uintN_t \ 292__atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 293{ \ 294 uint32_t old, temp, ras_start; \ 295 \ 296 ras_start = ARM_RAS_START; \ 297 __asm volatile ( \ 298 /* Set up Restartable Atomic Sequence. */ \ 299 "1:" \ 300 "\tadr %2, 1b\n" \ 301 "\tstr %2, [%5]\n" \ 302 "\tadr %2, 2f\n" \ 303 "\tstr %2, [%5, #4]\n" \ 304 \ 305 "\t"ldr" %0, %4\n" /* Load old value. */ \ 306 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 307 "\t"str" %2, %1\n" /* Store new value. */ \ 308 \ 309 /* Tear down Restartable Atomic Sequence. */ \ 310 "2:" \ 311 "\tmov %2, #0x00000000\n" \ 312 "\tstr %2, [%5]\n" \ 313 "\tmov %2, #0xffffffff\n" \ 314 "\tstr %2, [%5, #4]\n" \ 315 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 316 : "r" (val), "m" (*mem), "r" (ras_start)); \ 317 return (old); \ 318} 319 320#define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 321EMIT_LOAD_N(N, uintN_t) \ 322EMIT_STORE_N(N, uintN_t) \ 323EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 324EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 325EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add") \ 326EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and") \ 327EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr") \ 328EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub") \ 329EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor") 330 331EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq") 332EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq") 333EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 334#undef EMIT_ALL_OPS_N 335 336#endif /* _KERNEL */ 337 338#endif 339 340#endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */ 341 342#if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS) 343 344#ifdef __clang__ 345#pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1 346#pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2 347#pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4 348#pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1 349#pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2 350#pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4 351#pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1 352#pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2 353#pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4 354#pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1 355#pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2 356#pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4 357#pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1 358#pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2 359#pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4 360#pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1 361#pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2 362#pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4 363#pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1 364#pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2 365#pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4 366#endif 367 368/* 369 * Old __sync_* API. 370 */ 371 372#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 373 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 374 defined(__ARM_ARCH_6ZK__) || \ 375 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 376 377/* Implementations for old GCC versions, lacking support for atomics. */ 378 379typedef union { 380 uint8_t v8[4]; 381 uint32_t v32; 382} reg_t; 383 384/* 385 * Given a memory address pointing to an 8-bit or 16-bit integer, return 386 * the address of the 32-bit word containing it. 387 */ 388 389static inline uint32_t * 390round_to_word(void *ptr) 391{ 392 393 return ((uint32_t *)((intptr_t)ptr & ~3)); 394} 395 396/* 397 * Utility functions for loading and storing 8-bit and 16-bit integers 398 * in 32-bit words at an offset corresponding with the location of the 399 * atomic variable. 400 */ 401 402static inline void 403put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val) 404{ 405 size_t offset; 406 407 offset = (intptr_t)offset_ptr & 3; 408 r->v8[offset] = val; 409} 410 411static inline uint8_t 412get_1(const reg_t *r, const uint8_t *offset_ptr) 413{ 414 size_t offset; 415 416 offset = (intptr_t)offset_ptr & 3; 417 return (r->v8[offset]); 418} 419 420static inline void 421put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val) 422{ 423 size_t offset; 424 union { 425 uint16_t in; 426 uint8_t out[2]; 427 } bytes; 428 429 offset = (intptr_t)offset_ptr & 3; 430 bytes.in = val; 431 r->v8[offset] = bytes.out[0]; 432 r->v8[offset + 1] = bytes.out[1]; 433} 434 435static inline uint16_t 436get_2(const reg_t *r, const uint16_t *offset_ptr) 437{ 438 size_t offset; 439 union { 440 uint8_t in[2]; 441 uint16_t out; 442 } bytes; 443 444 offset = (intptr_t)offset_ptr & 3; 445 bytes.in[0] = r->v8[offset]; 446 bytes.in[1] = r->v8[offset + 1]; 447 return (bytes.out); 448} 449 450/* 451 * 8-bit and 16-bit routines. 452 * 453 * These operations are not natively supported by the CPU, so we use 454 * some shifting and bitmasking on top of the 32-bit instructions. 455 */ 456 457#define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \ 458uintN_t \ 459__sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 460{ \ 461 uint32_t *mem32; \ 462 reg_t val32, negmask, old; \ 463 uint32_t temp1, temp2; \ 464 \ 465 mem32 = round_to_word(mem); \ 466 val32.v32 = 0x00000000; \ 467 put_##N(&val32, mem, val); \ 468 negmask.v32 = 0xffffffff; \ 469 put_##N(&negmask, mem, 0); \ 470 \ 471 do_sync(); \ 472 __asm volatile ( \ 473 "1:" \ 474 "\tldrex %0, %6\n" /* Load old value. */ \ 475 "\tand %2, %5, %0\n" /* Remove the old value. */ \ 476 "\torr %2, %2, %4\n" /* Put in the new value. */ \ 477 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 478 "\tcmp %3, #0\n" /* Did it succeed? */ \ 479 "\tbne 1b\n" /* Spin if failed. */ \ 480 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 481 "=&r" (temp2) \ 482 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \ 483 return (get_##N(&old, mem)); \ 484} 485 486EMIT_LOCK_TEST_AND_SET_N(1, uint8_t) 487EMIT_LOCK_TEST_AND_SET_N(2, uint16_t) 488 489#define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 490uintN_t \ 491__sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 492 uintN_t desired) \ 493{ \ 494 uint32_t *mem32; \ 495 reg_t expected32, desired32, posmask, old; \ 496 uint32_t negmask, temp1, temp2; \ 497 \ 498 mem32 = round_to_word(mem); \ 499 expected32.v32 = 0x00000000; \ 500 put_##N(&expected32, mem, expected); \ 501 desired32.v32 = 0x00000000; \ 502 put_##N(&desired32, mem, desired); \ 503 posmask.v32 = 0x00000000; \ 504 put_##N(&posmask, mem, ~0); \ 505 negmask = ~posmask.v32; \ 506 \ 507 do_sync(); \ 508 __asm volatile ( \ 509 "1:" \ 510 "\tldrex %0, %8\n" /* Load old value. */ \ 511 "\tand %2, %6, %0\n" /* Isolate the old value. */ \ 512 "\tcmp %2, %4\n" /* Compare to expected value. */\ 513 "\tbne 2f\n" /* Values are unequal. */ \ 514 "\tand %2, %7, %0\n" /* Remove the old value. */ \ 515 "\torr %2, %5\n" /* Put in the new value. */ \ 516 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 517 "\tcmp %3, #0\n" /* Did it succeed? */ \ 518 "\tbne 1b\n" /* Spin if failed. */ \ 519 "2:" \ 520 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \ 521 "=&r" (temp2) \ 522 : "r" (expected32.v32), "r" (desired32.v32), \ 523 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \ 524 return (get_##N(&old, mem)); \ 525} 526 527EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t) 528EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t) 529 530#define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \ 531uintN_t \ 532__sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 533{ \ 534 uint32_t *mem32; \ 535 reg_t val32, posmask, old; \ 536 uint32_t negmask, temp1, temp2; \ 537 \ 538 mem32 = round_to_word(mem); \ 539 val32.v32 = 0x00000000; \ 540 put_##N(&val32, mem, val); \ 541 posmask.v32 = 0x00000000; \ 542 put_##N(&posmask, mem, ~0); \ 543 negmask = ~posmask.v32; \ 544 \ 545 do_sync(); \ 546 __asm volatile ( \ 547 "1:" \ 548 "\tldrex %0, %7\n" /* Load old value. */ \ 549 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 550 "\tand %2, %5\n" /* Isolate the new value. */ \ 551 "\tand %3, %6, %0\n" /* Remove the old value. */ \ 552 "\torr %2, %2, %3\n" /* Put in the new value. */ \ 553 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 554 "\tcmp %3, #0\n" /* Did it succeed? */ \ 555 "\tbne 1b\n" /* Spin if failed. */ \ 556 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 557 "=&r" (temp2) \ 558 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \ 559 "m" (*mem32)); \ 560 return (get_##N(&old, mem)); \ 561} 562 563EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add") 564EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub") 565EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add") 566EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub") 567 568#define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \ 569uintN_t \ 570__sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 571{ \ 572 uint32_t *mem32; \ 573 reg_t val32, old; \ 574 uint32_t temp1, temp2; \ 575 \ 576 mem32 = round_to_word(mem); \ 577 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \ 578 put_##N(&val32, mem, val); \ 579 \ 580 do_sync(); \ 581 __asm volatile ( \ 582 "1:" \ 583 "\tldrex %0, %5\n" /* Load old value. */ \ 584 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \ 585 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 586 "\tcmp %3, #0\n" /* Did it succeed? */ \ 587 "\tbne 1b\n" /* Spin if failed. */ \ 588 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 589 "=&r" (temp2) \ 590 : "r" (val32.v32), "m" (*mem32)); \ 591 return (get_##N(&old, mem)); \ 592} 593 594EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1) 595EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0) 596EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0) 597EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1) 598EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0) 599EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0) 600 601/* 602 * 32-bit routines. 603 */ 604 605uint32_t 606__sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val) 607{ 608 uint32_t old, temp; 609 610 do_sync(); 611 __asm volatile ( 612 "1:" 613 "\tldrex %0, %4\n" /* Load old value. */ 614 "\tstrex %2, %3, %1\n" /* Attempt to store. */ 615 "\tcmp %2, #0\n" /* Did it succeed? */ 616 "\tbne 1b\n" /* Spin if failed. */ 617 : "=&r" (old), "=m" (*mem), "=&r" (temp) 618 : "r" (val), "m" (*mem)); 619 return (old); 620} 621 622uint32_t 623__sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected, 624 uint32_t desired) 625{ 626 uint32_t old, temp; 627 628 do_sync(); 629 __asm volatile ( 630 "1:" 631 "\tldrex %0, %5\n" /* Load old value. */ 632 "\tcmp %0, %3\n" /* Compare to expected value. */ 633 "\tbne 2f\n" /* Values are unequal. */ 634 "\tstrex %2, %4, %1\n" /* Attempt to store. */ 635 "\tcmp %2, #0\n" /* Did it succeed? */ 636 "\tbne 1b\n" /* Spin if failed. */ 637 "2:" 638 : "=&r" (old), "=m" (*mem), "=&r" (temp) 639 : "r" (expected), "r" (desired), "m" (*mem)); 640 return (old); 641} 642 643#define EMIT_FETCH_AND_OP_4(name, op) \ 644uint32_t \ 645__sync_##name##_4##_c(uint32_t *mem, uint32_t val) \ 646{ \ 647 uint32_t old, temp1, temp2; \ 648 \ 649 do_sync(); \ 650 __asm volatile ( \ 651 "1:" \ 652 "\tldrex %0, %5\n" /* Load old value. */ \ 653 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 654 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 655 "\tcmp %3, #0\n" /* Did it succeed? */ \ 656 "\tbne 1b\n" /* Spin if failed. */ \ 657 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \ 658 "=&r" (temp2) \ 659 : "r" (val), "m" (*mem)); \ 660 return (old); \ 661} 662 663EMIT_FETCH_AND_OP_4(fetch_and_add, "add") 664EMIT_FETCH_AND_OP_4(fetch_and_and, "and") 665EMIT_FETCH_AND_OP_4(fetch_and_or, "orr") 666EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub") 667EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor") 668 669#ifndef __clang__ 670__strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 671__strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 672__strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 673__strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 674__strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 675__strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 676__strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 677__strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 678__strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); 679__strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 680__strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 681__strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 682__strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 683__strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 684__strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 685__strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 686__strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 687__strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 688__strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 689__strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 690__strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 691#endif 692 693#else /* __ARM_ARCH_5__ */ 694 695#ifdef _KERNEL 696 697#ifdef SMP 698#error "On SMP systems we should have proper atomic operations." 699#endif 700 701/* 702 * On uniprocessor systems, we can perform the atomic operations by 703 * disabling interrupts. 704 */ 705 706#define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 707uintN_t \ 708__sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ 709 uintN_t desired) \ 710{ \ 711 uintN_t ret; \ 712 \ 713 WITHOUT_INTERRUPTS({ \ 714 ret = *mem; \ 715 if (*mem == expected) \ 716 *mem = desired; \ 717 }); \ 718 return (ret); \ 719} 720 721#define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op) \ 722uintN_t \ 723__sync_##name##_##N(uintN_t *mem, uintN_t val) \ 724{ \ 725 uintN_t ret; \ 726 \ 727 WITHOUT_INTERRUPTS({ \ 728 ret = *mem; \ 729 *mem op val; \ 730 }); \ 731 return (ret); \ 732} 733 734#define EMIT_ALL_OPS_N(N, uintN_t) \ 735EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 736EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =) \ 737EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=) \ 738EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=) \ 739EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=) \ 740EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=) \ 741EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=) 742 743EMIT_ALL_OPS_N(1, uint8_t) 744EMIT_ALL_OPS_N(2, uint16_t) 745EMIT_ALL_OPS_N(4, uint32_t) 746EMIT_ALL_OPS_N(8, uint64_t) 747#undef EMIT_ALL_OPS_N 748 749#else /* !_KERNEL */ 750 751/* 752 * For userspace on uniprocessor systems, we can implement the atomic 753 * operations by using a Restartable Atomic Sequence. This makes the 754 * kernel restart the code from the beginning when interrupted. 755 */ 756 757#define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 758uintN_t \ 759__sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 760{ \ 761 uint32_t old, temp, ras_start; \ 762 \ 763 ras_start = ARM_RAS_START; \ 764 __asm volatile ( \ 765 /* Set up Restartable Atomic Sequence. */ \ 766 "1:" \ 767 "\tadr %2, 1b\n" \ 768 "\tstr %2, [%5]\n" \ 769 "\tadr %2, 2f\n" \ 770 "\tstr %2, [%5, #4]\n" \ 771 \ 772 "\t"ldr" %0, %4\n" /* Load old value. */ \ 773 "\t"str" %3, %1\n" /* Store new value. */ \ 774 \ 775 /* Tear down Restartable Atomic Sequence. */ \ 776 "2:" \ 777 "\tmov %2, #0x00000000\n" \ 778 "\tstr %2, [%5]\n" \ 779 "\tmov %2, #0xffffffff\n" \ 780 "\tstr %2, [%5, #4]\n" \ 781 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 782 : "r" (val), "m" (*mem), "r" (ras_start)); \ 783 return (old); \ 784} 785 786#define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 787uintN_t \ 788__sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 789 uintN_t desired) \ 790{ \ 791 uint32_t old, temp, ras_start; \ 792 \ 793 ras_start = ARM_RAS_START; \ 794 __asm volatile ( \ 795 /* Set up Restartable Atomic Sequence. */ \ 796 "1:" \ 797 "\tadr %2, 1b\n" \ 798 "\tstr %2, [%6]\n" \ 799 "\tadr %2, 2f\n" \ 800 "\tstr %2, [%6, #4]\n" \ 801 \ 802 "\t"ldr" %0, %5\n" /* Load old value. */ \ 803 "\tcmp %0, %3\n" /* Compare to expected value. */\ 804 "\t"streq" %4, %1\n" /* Store new value. */ \ 805 \ 806 /* Tear down Restartable Atomic Sequence. */ \ 807 "2:" \ 808 "\tmov %2, #0x00000000\n" \ 809 "\tstr %2, [%6]\n" \ 810 "\tmov %2, #0xffffffff\n" \ 811 "\tstr %2, [%6, #4]\n" \ 812 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 813 : "r" (expected), "r" (desired), "m" (*mem), \ 814 "r" (ras_start)); \ 815 return (old); \ 816} 817 818#define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op) \ 819uintN_t \ 820__sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 821{ \ 822 uint32_t old, temp, ras_start; \ 823 \ 824 ras_start = ARM_RAS_START; \ 825 __asm volatile ( \ 826 /* Set up Restartable Atomic Sequence. */ \ 827 "1:" \ 828 "\tadr %2, 1b\n" \ 829 "\tstr %2, [%5]\n" \ 830 "\tadr %2, 2f\n" \ 831 "\tstr %2, [%5, #4]\n" \ 832 \ 833 "\t"ldr" %0, %4\n" /* Load old value. */ \ 834 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 835 "\t"str" %2, %1\n" /* Store new value. */ \ 836 \ 837 /* Tear down Restartable Atomic Sequence. */ \ 838 "2:" \ 839 "\tmov %2, #0x00000000\n" \ 840 "\tstr %2, [%5]\n" \ 841 "\tmov %2, #0xffffffff\n" \ 842 "\tstr %2, [%5, #4]\n" \ 843 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 844 : "r" (val), "m" (*mem), "r" (ras_start)); \ 845 return (old); \ 846} 847 848#define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 849EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 850EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 851EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add") \ 852EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and") \ 853EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr") \ 854EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub") \ 855EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor") 856 857EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb") 858EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh") 859EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 860 861#ifndef __clang__ 862__strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 863__strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 864__strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 865__strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 866__strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 867__strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 868__strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 869__strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 870__strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); 871__strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 872__strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 873__strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 874__strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 875__strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 876__strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 877__strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 878__strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 879__strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 880__strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 881__strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 882__strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 883#endif 884 885#endif /* _KERNEL */ 886 887#endif 888 889#endif /* __SYNC_ATOMICS */ 890