stdatomic.c revision 251695
1/*- 2 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/arm/arm/stdatomic.c 251695 2013-06-13 18:46:49Z ed $"); 29 30#include <sys/param.h> 31#include <sys/stdatomic.h> 32#include <sys/types.h> 33 34#include <machine/cpufunc.h> 35 36#ifdef _KERNEL 37#include "opt_global.h" 38#endif 39 40/* 41 * Executing statements with interrupts disabled. 42 */ 43 44#ifndef SMP 45#define WITHOUT_INTERRUPTS(s) do { \ 46 register_t regs; \ 47 \ 48 regs = intr_disable(); \ 49 do s while (0); \ 50 intr_restore(regs); \ 51} while (0) 52#endif /* !SMP */ 53 54/* 55 * Memory barriers. 56 * 57 * It turns out __sync_synchronize() does not emit any code when used 58 * with GCC 4.2. Implement our own version that does work reliably. 59 * 60 * Although __sync_lock_test_and_set() should only perform an acquire 61 * barrier, make it do a full barrier like the other functions. This 62 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably. 63 */ 64 65static inline void 66do_sync(void) 67{ 68 69#if defined(_KERNEL) && !defined(SMP) 70 __asm volatile ("" : : : "memory"); 71#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 72 __asm volatile ("dmb" : : : "memory"); 73#else /* __ARM_ARCH_6__ */ 74 __asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory"); 75#endif 76} 77 78#if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS) 79 80/* 81 * New C11 __atomic_* API. 82 */ 83 84#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 85 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 86 defined(__ARM_ARCH_6ZK__) || \ 87 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 88 89/* These systems should be supported by the compiler. */ 90 91#elif defined(_KERNEL) 92 93#ifdef SMP 94#error "On SMP systems we should have proper atomic operations." 95#endif 96 97/* Clang doesn't allow us to reimplement builtins without this. */ 98#ifdef __clang__ 99#pragma redefine_extname __sync_synchronize_ext __sync_synchronize 100#define __sync_synchronize __sync_synchronize_ext 101#endif 102 103void 104__sync_synchronize(void) 105{ 106 107 do_sync(); 108} 109 110/* 111 * On uniprocessor systems, we can perform the atomic operations by 112 * disabling interrupts. 113 */ 114 115#define EMIT_LOAD_N(N, uintN_t) \ 116uintN_t \ 117__atomic_load_##N(uintN_t *mem, int model __unused) \ 118{ \ 119 uintN_t ret; \ 120 \ 121 WITHOUT_INTERRUPTS({ \ 122 ret = *mem; \ 123 }); \ 124 return (ret); \ 125} 126 127#define EMIT_STORE_N(N, uintN_t) \ 128void \ 129__atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 130{ \ 131 \ 132 WITHOUT_INTERRUPTS({ \ 133 *mem = val; \ 134 }); \ 135} 136 137#define EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 138_Bool \ 139__atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected, \ 140 uintN_t desired, int success __unused, int failure __unused) \ 141{ \ 142 _Bool ret; \ 143 \ 144 WITHOUT_INTERRUPTS({ \ 145 if (*mem == *expected) { \ 146 *mem = desired; \ 147 ret = 1; \ 148 } else { \ 149 *expected = *mem; \ 150 ret = 0; \ 151 } \ 152 }); \ 153 return (ret); \ 154} 155 156#define EMIT_FETCH_OP_N(N, uintN_t, name, op) \ 157uintN_t \ 158__atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 159{ \ 160 uintN_t ret; \ 161 \ 162 WITHOUT_INTERRUPTS({ \ 163 ret = *mem; \ 164 *mem op val; \ 165 }); \ 166 return (ret); \ 167} 168 169#define EMIT_ALL_OPS_N(N, uintN_t) \ 170EMIT_LOAD_N(N, uintN_t) \ 171EMIT_STORE_N(N, uintN_t) \ 172EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 173EMIT_FETCH_OP_N(N, uintN_t, exchange, =) \ 174EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=) \ 175EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=) \ 176EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=) \ 177EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=) \ 178EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=) 179 180EMIT_ALL_OPS_N(1, uint8_t) 181EMIT_ALL_OPS_N(2, uint16_t) 182EMIT_ALL_OPS_N(4, uint32_t) 183EMIT_ALL_OPS_N(8, uint64_t) 184 185#else /* !__ARM_ARCH_6__ && !__ARM_ARCH_7__ && !_KERNEL */ 186 187/* XXX: Implement intrinsics for ARMv5 userspace. */ 188 189#endif 190 191#endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */ 192 193/* 194 * Old __sync_* API. 195 */ 196 197#if defined(__SYNC_ATOMICS) 198 199#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 200 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 201 defined(__ARM_ARCH_6ZK__) || \ 202 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 203 204/* Implementations for old GCC versions, lacking support for atomics. */ 205 206typedef union { 207 uint8_t v8[4]; 208 uint32_t v32; 209} reg_t; 210 211/* 212 * Given a memory address pointing to an 8-bit or 16-bit integer, return 213 * the address of the 32-bit word containing it. 214 */ 215 216static inline uint32_t * 217round_to_word(void *ptr) 218{ 219 220 return ((uint32_t *)((intptr_t)ptr & ~3)); 221} 222 223/* 224 * Utility functions for loading and storing 8-bit and 16-bit integers 225 * in 32-bit words at an offset corresponding with the location of the 226 * atomic variable. 227 */ 228 229static inline void 230put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val) 231{ 232 size_t offset; 233 234 offset = (intptr_t)offset_ptr & 3; 235 r->v8[offset] = val; 236} 237 238static inline uint8_t 239get_1(const reg_t *r, const uint8_t *offset_ptr) 240{ 241 size_t offset; 242 243 offset = (intptr_t)offset_ptr & 3; 244 return (r->v8[offset]); 245} 246 247static inline void 248put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val) 249{ 250 size_t offset; 251 union { 252 uint16_t in; 253 uint8_t out[2]; 254 } bytes; 255 256 offset = (intptr_t)offset_ptr & 3; 257 bytes.in = val; 258 r->v8[offset] = bytes.out[0]; 259 r->v8[offset + 1] = bytes.out[1]; 260} 261 262static inline uint16_t 263get_2(const reg_t *r, const uint16_t *offset_ptr) 264{ 265 size_t offset; 266 union { 267 uint8_t in[2]; 268 uint16_t out; 269 } bytes; 270 271 offset = (intptr_t)offset_ptr & 3; 272 bytes.in[0] = r->v8[offset]; 273 bytes.in[1] = r->v8[offset + 1]; 274 return (bytes.out); 275} 276 277/* 278 * 8-bit and 16-bit routines. 279 * 280 * These operations are not natively supported by the CPU, so we use 281 * some shifting and bitmasking on top of the 32-bit instructions. 282 */ 283 284#define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \ 285uintN_t \ 286__sync_lock_test_and_set_##N(uintN_t *mem, uintN_t val) \ 287{ \ 288 uint32_t *mem32; \ 289 reg_t val32, negmask, old; \ 290 uint32_t temp1, temp2; \ 291 \ 292 mem32 = round_to_word(mem); \ 293 val32.v32 = 0x00000000; \ 294 put_##N(&val32, mem, val); \ 295 negmask.v32 = 0xffffffff; \ 296 put_##N(&negmask, mem, 0); \ 297 \ 298 do_sync(); \ 299 __asm volatile ( \ 300 "1:" \ 301 "\tldrex %0, %6\n" /* Load old value. */ \ 302 "\tand %2, %5, %0\n" /* Remove the old value. */ \ 303 "\torr %2, %2, %4\n" /* Put in the new value. */ \ 304 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 305 "\tcmp %3, #0\n" /* Did it succeed? */ \ 306 "\tbne 1b\n" /* Spin if failed. */ \ 307 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 308 "=&r" (temp2) \ 309 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \ 310 return (get_##N(&old, mem)); \ 311} 312 313EMIT_LOCK_TEST_AND_SET_N(1, uint8_t) 314EMIT_LOCK_TEST_AND_SET_N(2, uint16_t) 315 316#define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 317uintN_t \ 318__sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ 319 uintN_t desired) \ 320{ \ 321 uint32_t *mem32; \ 322 reg_t expected32, desired32, posmask, negmask, old; \ 323 uint32_t temp1, temp2; \ 324 \ 325 mem32 = round_to_word(mem); \ 326 expected32.v32 = 0x00000000; \ 327 put_##N(&expected32, mem, expected); \ 328 desired32.v32 = 0x00000000; \ 329 put_##N(&desired32, mem, desired); \ 330 posmask.v32 = 0x00000000; \ 331 put_##N(&posmask, mem, ~0); \ 332 negmask.v32 = ~posmask.v32; \ 333 \ 334 do_sync(); \ 335 __asm volatile ( \ 336 "1:" \ 337 "\tldrex %0, %8\n" /* Load old value. */ \ 338 "\tand %2, %6, %0\n" /* Isolate the old value. */ \ 339 "\tcmp %2, %4\n" /* Compare to expected value. */\ 340 "\tbne 2f\n" /* Values are unequal. */ \ 341 "\tand %2, %7, %0\n" /* Remove the old value. */ \ 342 "\torr %2, %5\n" /* Put in the new value. */ \ 343 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 344 "\tcmp %3, #0\n" /* Did it succeed? */ \ 345 "\tbne 1b\n" /* Spin if failed. */ \ 346 "2:" \ 347 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \ 348 "=&r" (temp2) \ 349 : "r" (expected32.v32), "r" (desired32.v32), \ 350 "r" (posmask.v32), "r" (negmask.v32), "m" (*mem32)); \ 351 return (get_##N(&old, mem)); \ 352} 353 354EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t) 355EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t) 356 357#define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \ 358uintN_t \ 359__sync_##name##_##N(uintN_t *mem, uintN_t val) \ 360{ \ 361 uint32_t *mem32; \ 362 reg_t val32, posmask, negmask, old; \ 363 uint32_t temp1, temp2; \ 364 \ 365 mem32 = round_to_word(mem); \ 366 val32.v32 = 0x00000000; \ 367 put_##N(&val32, mem, val); \ 368 posmask.v32 = 0x00000000; \ 369 put_##N(&posmask, mem, ~0); \ 370 negmask.v32 = ~posmask.v32; \ 371 \ 372 do_sync(); \ 373 __asm volatile ( \ 374 "1:" \ 375 "\tldrex %0, %7\n" /* Load old value. */ \ 376 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 377 "\tand %2, %5\n" /* Isolate the new value. */ \ 378 "\tand %3, %6, %0\n" /* Remove the old value. */ \ 379 "\torr %2, %2, %3\n" /* Put in the new value. */ \ 380 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 381 "\tcmp %3, #0\n" /* Did it succeed? */ \ 382 "\tbne 1b\n" /* Spin if failed. */ \ 383 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 384 "=&r" (temp2) \ 385 : "r" (val32.v32), "r" (posmask.v32), \ 386 "r" (negmask.v32), "m" (*mem32)); \ 387 return (get_##N(&old, mem)); \ 388} 389 390EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add") 391EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub") 392EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add") 393EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub") 394 395#define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \ 396uintN_t \ 397__sync_##name##_##N(uintN_t *mem, uintN_t val) \ 398{ \ 399 uint32_t *mem32; \ 400 reg_t val32, old; \ 401 uint32_t temp1, temp2; \ 402 \ 403 mem32 = round_to_word(mem); \ 404 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \ 405 put_##N(&val32, mem, val); \ 406 \ 407 do_sync(); \ 408 __asm volatile ( \ 409 "1:" \ 410 "\tldrex %0, %5\n" /* Load old value. */ \ 411 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \ 412 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 413 "\tcmp %3, #0\n" /* Did it succeed? */ \ 414 "\tbne 1b\n" /* Spin if failed. */ \ 415 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 416 "=&r" (temp2) \ 417 : "r" (val32.v32), "m" (*mem32)); \ 418 return (get_##N(&old, mem)); \ 419} 420 421EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1) 422EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0) 423EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0) 424EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1) 425EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0) 426EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0) 427 428/* 429 * 32-bit routines. 430 */ 431 432uint32_t 433__sync_val_compare_and_swap_4(uint32_t *mem, uint32_t expected, 434 uint32_t desired) 435{ 436 uint32_t old, temp1, temp2; 437 438 do_sync(); 439 __asm volatile ( 440 "1:" 441 "\tldrex %0, %6\n" /* Load old value. */ 442 "\tcmp %0, %4\n" /* Compare to expected value. */ 443 "\tbne 2f\n" /* Values are unequal. */ 444 "\tmov %2, %5\n" /* Value to store. */ 445 "\tstrex %3, %2, %1\n" /* Attempt to store. */ 446 "\tcmp %3, #0\n" /* Did it succeed? */ 447 "\tbne 1b\n" /* Spin if failed. */ 448 "2:" 449 : "=&r" (old), "=m" (*mem), "=&r" (temp1), "=&r" (temp2) 450 : "r" (expected), "r" (desired), "m" (*mem)); 451 return (old); 452} 453 454#define EMIT_FETCH_AND_OP_4(name, op) \ 455uint32_t \ 456__sync_##name##_4(uint32_t *mem, uint32_t val) \ 457{ \ 458 uint32_t old, temp1, temp2; \ 459 \ 460 do_sync(); \ 461 __asm volatile ( \ 462 "1:" \ 463 "\tldrex %0, %5\n" /* Load old value. */ \ 464 "\t"op"\n" /* Calculate new value. */ \ 465 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 466 "\tcmp %3, #0\n" /* Did it succeed? */ \ 467 "\tbne 1b\n" /* Spin if failed. */ \ 468 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \ 469 "=&r" (temp2) \ 470 : "r" (val), "m" (*mem)); \ 471 return (old); \ 472} 473 474EMIT_FETCH_AND_OP_4(lock_test_and_set, "mov %2, %4") 475EMIT_FETCH_AND_OP_4(fetch_and_add, "add %2, %0, %4") 476EMIT_FETCH_AND_OP_4(fetch_and_and, "and %2, %0, %4") 477EMIT_FETCH_AND_OP_4(fetch_and_or, "orr %2, %0, %4") 478EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub %2, %0, %4") 479EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor %2, %0, %4") 480 481#elif defined(_KERNEL) 482 483#ifdef SMP 484#error "On SMP systems we should have proper atomic operations." 485#endif 486 487/* 488 * On uniprocessor systems, we can perform the atomic operations by 489 * disabling interrupts. 490 */ 491 492#define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 493uintN_t \ 494__sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ 495 uintN_t desired) \ 496{ \ 497 uintN_t ret; \ 498 \ 499 WITHOUT_INTERRUPTS({ \ 500 ret = *mem; \ 501 if (*mem == expected) \ 502 *mem = desired; \ 503 }); \ 504 return (ret); \ 505} 506 507#define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op) \ 508uintN_t \ 509__sync_##name##_##N(uintN_t *mem, uintN_t val) \ 510{ \ 511 uintN_t ret; \ 512 \ 513 WITHOUT_INTERRUPTS({ \ 514 ret = *mem; \ 515 *mem op val; \ 516 }); \ 517 return (ret); \ 518} 519 520#define EMIT_ALL_OPS_N(N, uintN_t) \ 521EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 522EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =) \ 523EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=) \ 524EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=) \ 525EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=) \ 526EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=) \ 527EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=) 528 529EMIT_ALL_OPS_N(1, uint8_t) 530EMIT_ALL_OPS_N(2, uint16_t) 531EMIT_ALL_OPS_N(4, uint32_t) 532EMIT_ALL_OPS_N(8, uint64_t) 533 534#else /* !__ARM_ARCH_6__ && !__ARM_ARCH_7__ && !_KERNEL */ 535 536/* XXX: Implement intrinsics for ARMv5 userspace. */ 537 538#endif 539 540#endif /* __SYNC_ATOMICS */ 541