1/*- 2 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD$"); 29 30#include <sys/param.h> 31#include <sys/stdatomic.h> 32#include <sys/types.h> 33 34#include <machine/atomic.h> 35#include <machine/cpufunc.h> 36#include <machine/sysarch.h> 37 38/* 39 * Executing statements with interrupts disabled. 40 */ 41 42#if defined(_KERNEL) && !defined(SMP) 43#define WITHOUT_INTERRUPTS(s) do { \ 44 register_t regs; \ 45 \ 46 regs = intr_disable(); \ 47 do s while (0); \ 48 intr_restore(regs); \ 49} while (0) 50#endif /* _KERNEL && !SMP */ 51 52/* 53 * Memory barriers. 54 * 55 * It turns out __sync_synchronize() does not emit any code when used 56 * with GCC 4.2. Implement our own version that does work reliably. 57 * 58 * Although __sync_lock_test_and_set() should only perform an acquire 59 * barrier, make it do a full barrier like the other functions. This 60 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably. 61 */ 62 63#if defined(_KERNEL) && !defined(SMP) 64static inline void 65do_sync(void) 66{ 67 68 __asm volatile ("" : : : "memory"); 69} 70#elif __ARM_ARCH >= 6 71static inline void 72do_sync(void) 73{ 74 75 dmb(); 76} 77#endif 78 79#if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS) 80 81/* 82 * New C11 __atomic_* API. 83 */ 84 85/* ARMv6+ systems should be supported by the compiler. */ 86#if __ARM_ARCH <= 5 87 88/* Clang doesn't allow us to reimplement builtins without this. */ 89#ifdef __clang__ 90#pragma redefine_extname __sync_synchronize_ext __sync_synchronize 91#define __sync_synchronize __sync_synchronize_ext 92#endif 93 94void 95__sync_synchronize(void) 96{ 97} 98 99#ifdef _KERNEL 100 101#ifdef SMP 102#error "On SMP systems we should have proper atomic operations." 103#endif 104 105/* 106 * On uniprocessor systems, we can perform the atomic operations by 107 * disabling interrupts. 108 */ 109 110#define EMIT_LOAD_N(N, uintN_t) \ 111uintN_t \ 112__atomic_load_##N(uintN_t *mem, int model __unused) \ 113{ \ 114 uintN_t ret; \ 115 \ 116 WITHOUT_INTERRUPTS({ \ 117 ret = *mem; \ 118 }); \ 119 return (ret); \ 120} 121 122#define EMIT_STORE_N(N, uintN_t) \ 123void \ 124__atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 125{ \ 126 \ 127 WITHOUT_INTERRUPTS({ \ 128 *mem = val; \ 129 }); \ 130} 131 132#define EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 133_Bool \ 134__atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected, \ 135 uintN_t desired, int success __unused, int failure __unused) \ 136{ \ 137 _Bool ret; \ 138 \ 139 WITHOUT_INTERRUPTS({ \ 140 if (*mem == *expected) { \ 141 *mem = desired; \ 142 ret = 1; \ 143 } else { \ 144 *expected = *mem; \ 145 ret = 0; \ 146 } \ 147 }); \ 148 return (ret); \ 149} 150 151#define EMIT_FETCH_OP_N(N, uintN_t, name, op) \ 152uintN_t \ 153__atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 154{ \ 155 uintN_t ret; \ 156 \ 157 WITHOUT_INTERRUPTS({ \ 158 ret = *mem; \ 159 *mem op val; \ 160 }); \ 161 return (ret); \ 162} 163 164#define EMIT_ALL_OPS_N(N, uintN_t) \ 165EMIT_LOAD_N(N, uintN_t) \ 166EMIT_STORE_N(N, uintN_t) \ 167EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 168EMIT_FETCH_OP_N(N, uintN_t, exchange, =) \ 169EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=) \ 170EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=) \ 171EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=) \ 172EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=) \ 173EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=) 174 175EMIT_ALL_OPS_N(1, uint8_t) 176EMIT_ALL_OPS_N(2, uint16_t) 177EMIT_ALL_OPS_N(4, uint32_t) 178EMIT_ALL_OPS_N(8, uint64_t) 179#undef EMIT_ALL_OPS_N 180 181#else /* !_KERNEL */ 182 183/* 184 * For userspace on uniprocessor systems, we can implement the atomic 185 * operations by using a Restartable Atomic Sequence. This makes the 186 * kernel restart the code from the beginning when interrupted. 187 */ 188 189#define EMIT_LOAD_N(N, uintN_t) \ 190uintN_t \ 191__atomic_load_##N(uintN_t *mem, int model __unused) \ 192{ \ 193 \ 194 return (*mem); \ 195} 196 197#define EMIT_STORE_N(N, uintN_t) \ 198void \ 199__atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 200{ \ 201 \ 202 *mem = val; \ 203} 204 205#define EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 206uintN_t \ 207__atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused) \ 208{ \ 209 uint32_t old, temp, ras_start; \ 210 \ 211 ras_start = ARM_RAS_START; \ 212 __asm volatile ( \ 213 /* Set up Restartable Atomic Sequence. */ \ 214 "1:" \ 215 "\tadr %2, 1b\n" \ 216 "\tstr %2, [%5]\n" \ 217 "\tadr %2, 2f\n" \ 218 "\tstr %2, [%5, #4]\n" \ 219 \ 220 "\t"ldr" %0, %4\n" /* Load old value. */ \ 221 "\t"str" %3, %1\n" /* Store new value. */ \ 222 \ 223 /* Tear down Restartable Atomic Sequence. */ \ 224 "2:" \ 225 "\tmov %2, #0x00000000\n" \ 226 "\tstr %2, [%5]\n" \ 227 "\tmov %2, #0xffffffff\n" \ 228 "\tstr %2, [%5, #4]\n" \ 229 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 230 : "r" (val), "m" (*mem), "r" (ras_start)); \ 231 return (old); \ 232} 233 234#define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 235_Bool \ 236__atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected, \ 237 uintN_t desired, int success __unused, int failure __unused) \ 238{ \ 239 uint32_t expected, old, temp, ras_start; \ 240 \ 241 expected = *pexpected; \ 242 ras_start = ARM_RAS_START; \ 243 __asm volatile ( \ 244 /* Set up Restartable Atomic Sequence. */ \ 245 "1:" \ 246 "\tadr %2, 1b\n" \ 247 "\tstr %2, [%6]\n" \ 248 "\tadr %2, 2f\n" \ 249 "\tstr %2, [%6, #4]\n" \ 250 \ 251 "\t"ldr" %0, %5\n" /* Load old value. */ \ 252 "\tcmp %0, %3\n" /* Compare to expected value. */\ 253 "\t"streq" %4, %1\n" /* Store new value. */ \ 254 \ 255 /* Tear down Restartable Atomic Sequence. */ \ 256 "2:" \ 257 "\tmov %2, #0x00000000\n" \ 258 "\tstr %2, [%6]\n" \ 259 "\tmov %2, #0xffffffff\n" \ 260 "\tstr %2, [%6, #4]\n" \ 261 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 262 : "r" (expected), "r" (desired), "m" (*mem), \ 263 "r" (ras_start)); \ 264 if (old == expected) { \ 265 return (1); \ 266 } else { \ 267 *pexpected = old; \ 268 return (0); \ 269 } \ 270} 271 272#define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op, ret) \ 273uintN_t \ 274__atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 275{ \ 276 uint32_t old, new, ras_start; \ 277 \ 278 ras_start = ARM_RAS_START; \ 279 __asm volatile ( \ 280 /* Set up Restartable Atomic Sequence. */ \ 281 "1:" \ 282 "\tadr %2, 1b\n" \ 283 "\tstr %2, [%5]\n" \ 284 "\tadr %2, 2f\n" \ 285 "\tstr %2, [%5, #4]\n" \ 286 \ 287 "\t"ldr" %0, %4\n" /* Load old value. */ \ 288 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 289 "\t"str" %2, %1\n" /* Store new value. */ \ 290 \ 291 /* Tear down Restartable Atomic Sequence. */ \ 292 "2:" \ 293 "\tmov %2, #0x00000000\n" \ 294 "\tstr %2, [%5]\n" \ 295 "\tmov %2, #0xffffffff\n" \ 296 "\tstr %2, [%5, #4]\n" \ 297 : "=&r" (old), "=m" (*mem), "=&r" (new) \ 298 : "r" (val), "m" (*mem), "r" (ras_start)); \ 299 return (ret); \ 300} 301 302#define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 303EMIT_LOAD_N(N, uintN_t) \ 304EMIT_STORE_N(N, uintN_t) \ 305EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 306EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 307EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add", old) \ 308EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and", old) \ 309EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr", old) \ 310EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub", old) \ 311EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor", old) \ 312EMIT_FETCH_OP_N(N, uintN_t, ldr, str, add_fetch, "add", new) \ 313EMIT_FETCH_OP_N(N, uintN_t, ldr, str, and_fetch, "and", new) \ 314EMIT_FETCH_OP_N(N, uintN_t, ldr, str, or_fetch, "orr", new) \ 315EMIT_FETCH_OP_N(N, uintN_t, ldr, str, sub_fetch, "sub", new) \ 316EMIT_FETCH_OP_N(N, uintN_t, ldr, str, xor_fetch, "eor", new) 317 318EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq") 319EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq") 320EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 321#undef EMIT_ALL_OPS_N 322 323#endif /* _KERNEL */ 324 325#endif /* __ARM_ARCH */ 326 327#endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */ 328 329#if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS) 330 331#ifdef __clang__ 332#pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1 333#pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2 334#pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4 335#pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1 336#pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2 337#pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4 338#pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1 339#pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2 340#pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4 341#pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1 342#pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2 343#pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4 344#pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1 345#pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2 346#pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4 347#pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1 348#pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2 349#pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4 350#pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1 351#pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2 352#pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4 353#endif 354 355/* 356 * Old __sync_* API. 357 */ 358 359#if __ARM_ARCH >= 6 360 361/* Implementations for old GCC versions, lacking support for atomics. */ 362 363typedef union { 364 uint8_t v8[4]; 365 uint32_t v32; 366} reg_t; 367 368/* 369 * Given a memory address pointing to an 8-bit or 16-bit integer, return 370 * the address of the 32-bit word containing it. 371 */ 372 373static inline uint32_t * 374round_to_word(void *ptr) 375{ 376 377 return ((uint32_t *)((intptr_t)ptr & ~3)); 378} 379 380/* 381 * Utility functions for loading and storing 8-bit and 16-bit integers 382 * in 32-bit words at an offset corresponding with the location of the 383 * atomic variable. 384 */ 385 386static inline void 387put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val) 388{ 389 size_t offset; 390 391 offset = (intptr_t)offset_ptr & 3; 392 r->v8[offset] = val; 393} 394 395static inline uint8_t 396get_1(const reg_t *r, const uint8_t *offset_ptr) 397{ 398 size_t offset; 399 400 offset = (intptr_t)offset_ptr & 3; 401 return (r->v8[offset]); 402} 403 404static inline void 405put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val) 406{ 407 size_t offset; 408 union { 409 uint16_t in; 410 uint8_t out[2]; 411 } bytes; 412 413 offset = (intptr_t)offset_ptr & 3; 414 bytes.in = val; 415 r->v8[offset] = bytes.out[0]; 416 r->v8[offset + 1] = bytes.out[1]; 417} 418 419static inline uint16_t 420get_2(const reg_t *r, const uint16_t *offset_ptr) 421{ 422 size_t offset; 423 union { 424 uint8_t in[2]; 425 uint16_t out; 426 } bytes; 427 428 offset = (intptr_t)offset_ptr & 3; 429 bytes.in[0] = r->v8[offset]; 430 bytes.in[1] = r->v8[offset + 1]; 431 return (bytes.out); 432} 433 434/* 435 * 8-bit and 16-bit routines. 436 * 437 * These operations are not natively supported by the CPU, so we use 438 * some shifting and bitmasking on top of the 32-bit instructions. 439 */ 440 441#define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \ 442uintN_t \ 443__sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 444{ \ 445 uint32_t *mem32; \ 446 reg_t val32, negmask, old; \ 447 uint32_t temp1, temp2; \ 448 \ 449 mem32 = round_to_word(mem); \ 450 val32.v32 = 0x00000000; \ 451 put_##N(&val32, mem, val); \ 452 negmask.v32 = 0xffffffff; \ 453 put_##N(&negmask, mem, 0); \ 454 \ 455 do_sync(); \ 456 __asm volatile ( \ 457 "1:" \ 458 "\tldrex %0, %6\n" /* Load old value. */ \ 459 "\tand %2, %5, %0\n" /* Remove the old value. */ \ 460 "\torr %2, %2, %4\n" /* Put in the new value. */ \ 461 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 462 "\tcmp %3, #0\n" /* Did it succeed? */ \ 463 "\tbne 1b\n" /* Spin if failed. */ \ 464 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 465 "=&r" (temp2) \ 466 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \ 467 return (get_##N(&old, mem)); \ 468} 469 470EMIT_LOCK_TEST_AND_SET_N(1, uint8_t) 471EMIT_LOCK_TEST_AND_SET_N(2, uint16_t) 472 473#define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 474uintN_t \ 475__sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 476 uintN_t desired) \ 477{ \ 478 uint32_t *mem32; \ 479 reg_t expected32, desired32, posmask, old; \ 480 uint32_t negmask, temp1, temp2; \ 481 \ 482 mem32 = round_to_word(mem); \ 483 expected32.v32 = 0x00000000; \ 484 put_##N(&expected32, mem, expected); \ 485 desired32.v32 = 0x00000000; \ 486 put_##N(&desired32, mem, desired); \ 487 posmask.v32 = 0x00000000; \ 488 put_##N(&posmask, mem, ~0); \ 489 negmask = ~posmask.v32; \ 490 \ 491 do_sync(); \ 492 __asm volatile ( \ 493 "1:" \ 494 "\tldrex %0, %8\n" /* Load old value. */ \ 495 "\tand %2, %6, %0\n" /* Isolate the old value. */ \ 496 "\tcmp %2, %4\n" /* Compare to expected value. */\ 497 "\tbne 2f\n" /* Values are unequal. */ \ 498 "\tand %2, %7, %0\n" /* Remove the old value. */ \ 499 "\torr %2, %5\n" /* Put in the new value. */ \ 500 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 501 "\tcmp %3, #0\n" /* Did it succeed? */ \ 502 "\tbne 1b\n" /* Spin if failed. */ \ 503 "2:" \ 504 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \ 505 "=&r" (temp2) \ 506 : "r" (expected32.v32), "r" (desired32.v32), \ 507 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \ 508 return (get_##N(&old, mem)); \ 509} 510 511EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t) 512EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t) 513 514#define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \ 515uintN_t \ 516__sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 517{ \ 518 uint32_t *mem32; \ 519 reg_t val32, posmask, old; \ 520 uint32_t negmask, temp1, temp2; \ 521 \ 522 mem32 = round_to_word(mem); \ 523 val32.v32 = 0x00000000; \ 524 put_##N(&val32, mem, val); \ 525 posmask.v32 = 0x00000000; \ 526 put_##N(&posmask, mem, ~0); \ 527 negmask = ~posmask.v32; \ 528 \ 529 do_sync(); \ 530 __asm volatile ( \ 531 "1:" \ 532 "\tldrex %0, %7\n" /* Load old value. */ \ 533 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 534 "\tand %2, %5\n" /* Isolate the new value. */ \ 535 "\tand %3, %6, %0\n" /* Remove the old value. */ \ 536 "\torr %2, %2, %3\n" /* Put in the new value. */ \ 537 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 538 "\tcmp %3, #0\n" /* Did it succeed? */ \ 539 "\tbne 1b\n" /* Spin if failed. */ \ 540 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 541 "=&r" (temp2) \ 542 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \ 543 "m" (*mem32)); \ 544 return (get_##N(&old, mem)); \ 545} 546 547EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add") 548EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub") 549EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add") 550EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub") 551 552#define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \ 553uintN_t \ 554__sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 555{ \ 556 uint32_t *mem32; \ 557 reg_t val32, old; \ 558 uint32_t temp1, temp2; \ 559 \ 560 mem32 = round_to_word(mem); \ 561 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \ 562 put_##N(&val32, mem, val); \ 563 \ 564 do_sync(); \ 565 __asm volatile ( \ 566 "1:" \ 567 "\tldrex %0, %5\n" /* Load old value. */ \ 568 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \ 569 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 570 "\tcmp %3, #0\n" /* Did it succeed? */ \ 571 "\tbne 1b\n" /* Spin if failed. */ \ 572 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 573 "=&r" (temp2) \ 574 : "r" (val32.v32), "m" (*mem32)); \ 575 return (get_##N(&old, mem)); \ 576} 577 578EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1) 579EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0) 580EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0) 581EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1) 582EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0) 583EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0) 584 585/* 586 * 32-bit routines. 587 */ 588 589uint32_t 590__sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val) 591{ 592 uint32_t old, temp; 593 594 do_sync(); 595 __asm volatile ( 596 "1:" 597 "\tldrex %0, %4\n" /* Load old value. */ 598 "\tstrex %2, %3, %1\n" /* Attempt to store. */ 599 "\tcmp %2, #0\n" /* Did it succeed? */ 600 "\tbne 1b\n" /* Spin if failed. */ 601 : "=&r" (old), "=m" (*mem), "=&r" (temp) 602 : "r" (val), "m" (*mem)); 603 return (old); 604} 605 606uint32_t 607__sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected, 608 uint32_t desired) 609{ 610 uint32_t old, temp; 611 612 do_sync(); 613 __asm volatile ( 614 "1:" 615 "\tldrex %0, %5\n" /* Load old value. */ 616 "\tcmp %0, %3\n" /* Compare to expected value. */ 617 "\tbne 2f\n" /* Values are unequal. */ 618 "\tstrex %2, %4, %1\n" /* Attempt to store. */ 619 "\tcmp %2, #0\n" /* Did it succeed? */ 620 "\tbne 1b\n" /* Spin if failed. */ 621 "2:" 622 : "=&r" (old), "=m" (*mem), "=&r" (temp) 623 : "r" (expected), "r" (desired), "m" (*mem)); 624 return (old); 625} 626 627#define EMIT_FETCH_AND_OP_4(name, op) \ 628uint32_t \ 629__sync_##name##_4##_c(uint32_t *mem, uint32_t val) \ 630{ \ 631 uint32_t old, temp1, temp2; \ 632 \ 633 do_sync(); \ 634 __asm volatile ( \ 635 "1:" \ 636 "\tldrex %0, %5\n" /* Load old value. */ \ 637 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 638 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 639 "\tcmp %3, #0\n" /* Did it succeed? */ \ 640 "\tbne 1b\n" /* Spin if failed. */ \ 641 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \ 642 "=&r" (temp2) \ 643 : "r" (val), "m" (*mem)); \ 644 return (old); \ 645} 646 647EMIT_FETCH_AND_OP_4(fetch_and_add, "add") 648EMIT_FETCH_AND_OP_4(fetch_and_and, "and") 649EMIT_FETCH_AND_OP_4(fetch_and_or, "orr") 650EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub") 651EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor") 652 653#ifndef __clang__ 654__strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 655__strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 656__strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 657__strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 658__strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 659__strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 660__strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 661__strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 662__strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); 663__strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 664__strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 665__strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 666__strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 667__strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 668__strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 669__strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 670__strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 671__strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 672__strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 673__strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 674__strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 675#endif 676 677#else /* __ARM_ARCH < 6 */ 678 679#ifdef _KERNEL 680 681#ifdef SMP 682#error "On SMP systems we should have proper atomic operations." 683#endif 684 685/* 686 * On uniprocessor systems, we can perform the atomic operations by 687 * disabling interrupts. 688 */ 689 690#define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 691uintN_t \ 692__sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ 693 uintN_t desired) \ 694{ \ 695 uintN_t ret; \ 696 \ 697 WITHOUT_INTERRUPTS({ \ 698 ret = *mem; \ 699 if (*mem == expected) \ 700 *mem = desired; \ 701 }); \ 702 return (ret); \ 703} 704 705#define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op) \ 706uintN_t \ 707__sync_##name##_##N(uintN_t *mem, uintN_t val) \ 708{ \ 709 uintN_t ret; \ 710 \ 711 WITHOUT_INTERRUPTS({ \ 712 ret = *mem; \ 713 *mem op val; \ 714 }); \ 715 return (ret); \ 716} 717 718#define EMIT_ALL_OPS_N(N, uintN_t) \ 719EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 720EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =) \ 721EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=) \ 722EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=) \ 723EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=) \ 724EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=) \ 725EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=) 726 727EMIT_ALL_OPS_N(1, uint8_t) 728EMIT_ALL_OPS_N(2, uint16_t) 729EMIT_ALL_OPS_N(4, uint32_t) 730EMIT_ALL_OPS_N(8, uint64_t) 731#undef EMIT_ALL_OPS_N 732 733#else /* !_KERNEL */ 734 735/* 736 * For userspace on uniprocessor systems, we can implement the atomic 737 * operations by using a Restartable Atomic Sequence. This makes the 738 * kernel restart the code from the beginning when interrupted. 739 */ 740 741#define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 742uintN_t \ 743__sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 744{ \ 745 uint32_t old, temp, ras_start; \ 746 \ 747 ras_start = ARM_RAS_START; \ 748 __asm volatile ( \ 749 /* Set up Restartable Atomic Sequence. */ \ 750 "1:" \ 751 "\tadr %2, 1b\n" \ 752 "\tstr %2, [%5]\n" \ 753 "\tadr %2, 2f\n" \ 754 "\tstr %2, [%5, #4]\n" \ 755 \ 756 "\t"ldr" %0, %4\n" /* Load old value. */ \ 757 "\t"str" %3, %1\n" /* Store new value. */ \ 758 \ 759 /* Tear down Restartable Atomic Sequence. */ \ 760 "2:" \ 761 "\tmov %2, #0x00000000\n" \ 762 "\tstr %2, [%5]\n" \ 763 "\tmov %2, #0xffffffff\n" \ 764 "\tstr %2, [%5, #4]\n" \ 765 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 766 : "r" (val), "m" (*mem), "r" (ras_start)); \ 767 return (old); \ 768} 769 770#define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 771uintN_t \ 772__sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 773 uintN_t desired) \ 774{ \ 775 uint32_t old, temp, ras_start; \ 776 \ 777 ras_start = ARM_RAS_START; \ 778 __asm volatile ( \ 779 /* Set up Restartable Atomic Sequence. */ \ 780 "1:" \ 781 "\tadr %2, 1b\n" \ 782 "\tstr %2, [%6]\n" \ 783 "\tadr %2, 2f\n" \ 784 "\tstr %2, [%6, #4]\n" \ 785 \ 786 "\t"ldr" %0, %5\n" /* Load old value. */ \ 787 "\tcmp %0, %3\n" /* Compare to expected value. */\ 788 "\t"streq" %4, %1\n" /* Store new value. */ \ 789 \ 790 /* Tear down Restartable Atomic Sequence. */ \ 791 "2:" \ 792 "\tmov %2, #0x00000000\n" \ 793 "\tstr %2, [%6]\n" \ 794 "\tmov %2, #0xffffffff\n" \ 795 "\tstr %2, [%6, #4]\n" \ 796 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 797 : "r" (expected), "r" (desired), "m" (*mem), \ 798 "r" (ras_start)); \ 799 return (old); \ 800} 801 802#define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op) \ 803uintN_t \ 804__sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 805{ \ 806 uint32_t old, temp, ras_start; \ 807 \ 808 ras_start = ARM_RAS_START; \ 809 __asm volatile ( \ 810 /* Set up Restartable Atomic Sequence. */ \ 811 "1:" \ 812 "\tadr %2, 1b\n" \ 813 "\tstr %2, [%5]\n" \ 814 "\tadr %2, 2f\n" \ 815 "\tstr %2, [%5, #4]\n" \ 816 \ 817 "\t"ldr" %0, %4\n" /* Load old value. */ \ 818 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 819 "\t"str" %2, %1\n" /* Store new value. */ \ 820 \ 821 /* Tear down Restartable Atomic Sequence. */ \ 822 "2:" \ 823 "\tmov %2, #0x00000000\n" \ 824 "\tstr %2, [%5]\n" \ 825 "\tmov %2, #0xffffffff\n" \ 826 "\tstr %2, [%5, #4]\n" \ 827 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 828 : "r" (val), "m" (*mem), "r" (ras_start)); \ 829 return (old); \ 830} 831 832#define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 833EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 834EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 835EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add") \ 836EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and") \ 837EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr") \ 838EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub") \ 839EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor") 840 841#ifdef __clang__ 842EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq") 843EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq") 844#else 845EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb") 846EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh") 847#endif 848EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 849 850#ifndef __clang__ 851__strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 852__strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 853__strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 854__strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 855__strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 856__strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 857__strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 858__strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 859__strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); 860__strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 861__strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 862__strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 863__strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 864__strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 865__strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 866__strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 867__strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 868__strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 869__strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 870__strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 871__strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 872#endif /* __ARM_ARCH */ 873 874#endif /* _KERNEL */ 875 876#endif 877 878#endif /* __SYNC_ATOMICS */ 879